diff --git a/base/poco/Crypto/include/Poco/Crypto/OpenSSLInitializer.h b/base/poco/Crypto/include/Poco/Crypto/OpenSSLInitializer.h index 147cfaeefca2..4e2bf3ff5309 100644 --- a/base/poco/Crypto/include/Poco/Crypto/OpenSSLInitializer.h +++ b/base/poco/Crypto/include/Poco/Crypto/OpenSSLInitializer.h @@ -19,6 +19,7 @@ #include +#include #include "Poco/AtomicCounter.h" #include "Poco/Crypto/Crypto.h" #include "Poco/Mutex.h" @@ -83,6 +84,8 @@ namespace Crypto private: static Poco::FastMutex * _mutexes; static Poco::AtomicCounter _rc; + + static OSSL_PROVIDER * legacy_provider; }; diff --git a/base/poco/Crypto/src/OpenSSLInitializer.cpp b/base/poco/Crypto/src/OpenSSLInitializer.cpp index 31798e8dd7e6..0b83b3e21ff9 100644 --- a/base/poco/Crypto/src/OpenSSLInitializer.cpp +++ b/base/poco/Crypto/src/OpenSSLInitializer.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #if OPENSSL_VERSION_NUMBER >= 0x0907000L #include #endif @@ -36,6 +37,7 @@ namespace Crypto { Poco::FastMutex* OpenSSLInitializer::_mutexes(0); Poco::AtomicCounter OpenSSLInitializer::_rc; +OSSL_PROVIDER * OpenSSLInitializer::legacy_provider; OpenSSLInitializer::OpenSSLInitializer() @@ -67,12 +69,16 @@ void OpenSSLInitializer::initialize() SSL_library_init(); SSL_load_error_strings(); OpenSSL_add_all_algorithms(); - + char seed[SEEDSIZE]; RandomInputStream rnd; rnd.read(seed, sizeof(seed)); RAND_seed(seed, SEEDSIZE); - + + legacy_provider = OSSL_PROVIDER_load(NULL, "legacy"); + if (!legacy_provider) + throw std::runtime_error("Failed to load OpenSSL legacy provider"); + int nMutexes = CRYPTO_num_locks(); _mutexes = new Poco::FastMutex[nMutexes]; CRYPTO_set_locking_callback(&OpenSSLInitializer::lock); @@ -80,8 +86,8 @@ void OpenSSLInitializer::initialize() // https://sourceforge.net/p/poco/bugs/110/ // // From http://www.openssl.org/docs/crypto/threads.html : -// "If the application does not register such a callback using CRYPTO_THREADID_set_callback(), -// then a default implementation is used - on Windows and BeOS this uses the system's +// "If the application does not register such a callback using CRYPTO_THREADID_set_callback(), +// then a default implementation is used - on Windows and BeOS this uses the system's // default thread identifying APIs" CRYPTO_set_id_callback(&OpenSSLInitializer::id); CRYPTO_set_dynlock_create_callback(&OpenSSLInitializer::dynlockCreate); @@ -100,7 +106,8 @@ void OpenSSLInitializer::uninitialize() CRYPTO_set_locking_callback(0); CRYPTO_set_id_callback(0); delete [] _mutexes; - + + OSSL_PROVIDER_unload(legacy_provider); CONF_modules_free(); } } diff --git a/base/poco/NetSSL_OpenSSL/src/Context.cpp b/base/poco/NetSSL_OpenSSL/src/Context.cpp index 69c88eef63ac..9acb17e49cdb 100644 --- a/base/poco/NetSSL_OpenSSL/src/Context.cpp +++ b/base/poco/NetSSL_OpenSSL/src/Context.cpp @@ -466,7 +466,7 @@ void Context::flushSessionCache() poco_assert (isForServerUse()); Poco::Timestamp now; - SSL_CTX_flush_sessions(_pSSLContext, static_cast(now.epochTime())); + SSL_CTX_flush_sessions_ex(_pSSLContext, static_cast(now.epochTime())); } diff --git a/contrib/openssl b/contrib/openssl index 2aa34c68d677..d0f95dba4cb0 160000 --- a/contrib/openssl +++ b/contrib/openssl @@ -1 +1 @@ -Subproject commit 2aa34c68d677b447fb85c55167d8d1ab98ba4def +Subproject commit d0f95dba4cb06e912c131d64ec77acb20d270fd1 diff --git a/contrib/openssl-cmake/CMakeLists.txt b/contrib/openssl-cmake/CMakeLists.txt index 32d1c3a1080a..6a093495e1d6 100644 --- a/contrib/openssl-cmake/CMakeLists.txt +++ b/contrib/openssl-cmake/CMakeLists.txt @@ -28,7 +28,6 @@ endif() # (*) https://github.com/openssl/openssl/blob/master/INSTALL.md set(OPENSSL_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/openssl) -set(OPENSSL_BINARY_DIR ${ClickHouse_BINARY_DIR}/contrib/openssl) set(OPENSSLDIR "/etc/ssl" CACHE PATH "Set the default openssl directory") set(OPENSSL_ENGINESDIR "/usr/local/lib/engines-3" CACHE PATH "Set the default openssl directory for engines") @@ -75,7 +74,7 @@ elseif(ARCH_LOONGARCH64) add_definitions(-DOPENSSL_CPUID_OBJ -DL_ENDIAN) endif() -file(STRINGS "${PLATFORM_DIRECTORY}/include/openssl/opensslv.h" OPENSSL_VERSION_STR +file(STRINGS "common/include/openssl/opensslv.h" OPENSSL_VERSION_STR REGEX "^#[\t ]*define[\t ]+OPENSSL_VERSION_STR[\t ]+\"([0-9])+\\.([0-9])+\\.([0-9])+\".*") string(REGEX REPLACE "^.*OPENSSL_VERSION_STR[\t ]+\"([0-9]+\\.[0-9]+\\.[0-9]+)\".*$" "\\1" OPENSSL_VERSION_STR "${OPENSSL_VERSION_STR}") @@ -109,133 +108,18 @@ if(NOT ARCH_S390X) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=lld") # only relevant for -DENABLE_OPENSSL_DYNAMIC=1 endif() -if(ARCH_AMD64) - if(OS_DARWIN) - set(OPENSSL_SYSTEM "macosx") - else() - macro(perl_generate_asm FILE_IN FILE_OUT) - execute_process(COMMAND sh -c "mkdir -p $(dirname ${FILE_OUT})") - # Manually set $CC because the called Perl scripts require it. - add_custom_command(OUTPUT ${FILE_OUT} - COMMAND ${CMAKE_COMMAND} -E env "CC=${CMAKE_CXX_COMPILER}" /usr/bin/env perl ${FILE_IN} ${FILE_OUT}) - endmacro() - - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/aes/asm/aes-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/aes/aes-x86_64.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/aes/asm/aesni-mb-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/aes/aesni-mb-x86_64.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/aes/asm/aesni-sha1-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/aes/aesni-sha1-x86_64.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/aes/asm/aesni-sha256-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/aes/aesni-sha256-x86_64.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/aes/asm/aesni-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/aes/aesni-x86_64.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/aes/asm/bsaes-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/aes/bsaes-x86_64.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/aes/asm/vpaes-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/aes/vpaes-x86_64.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/bn/asm/rsaz-2k-avx512.pl ${OPENSSL_BINARY_DIR}/crypto/bn/rsaz-2k-avx512.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/bn/asm/rsaz-3k-avx512.pl ${OPENSSL_BINARY_DIR}/crypto/bn/rsaz-3k-avx512.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/bn/asm/rsaz-4k-avx512.pl ${OPENSSL_BINARY_DIR}/crypto/bn/rsaz-4k-avx512.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/bn/asm/rsaz-avx2.pl ${OPENSSL_BINARY_DIR}/crypto/bn/rsaz-avx2.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/bn/asm/rsaz-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/bn/rsaz-x86_64.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/bn/asm/x86_64-gf2m.pl ${OPENSSL_BINARY_DIR}/crypto/bn/x86_64-gf2m.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/bn/asm/x86_64-mont.pl ${OPENSSL_BINARY_DIR}/crypto/bn/x86_64-mont.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/bn/asm/x86_64-mont5.pl ${OPENSSL_BINARY_DIR}/crypto/bn/x86_64-mont5.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/camellia/asm/cmll-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/camellia/cmll-x86_64.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/chacha/asm/chacha-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/chacha/chacha-x86_64.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/ec/asm/ecp_nistz256-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/ec/ecp_nistz256-x86_64.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/ec/asm/x25519-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/ec/x25519-x86_64.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/md5/asm/md5-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/md5/md5-x86_64.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/modes/asm/aesni-gcm-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/modes/aesni-gcm-x86_64.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/modes/asm/aes-gcm-avx512.pl ${OPENSSL_BINARY_DIR}/crypto/modes/aes-gcm-avx512.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/x86_64cpuid.pl ${OPENSSL_BINARY_DIR}/crypto/x86_64cpuid.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/modes/asm/ghash-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/modes/ghash-x86_64.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/poly1305/asm/poly1305-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/poly1305/poly1305-x86_64.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/rc4/asm/rc4-md5-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/rc4/rc4-md5-x86_64.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/rc4/asm/rc4-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/rc4/rc4-x86_64.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/sha/asm/keccak1600-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/sha/keccak1600-x86_64.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/sha/asm/sha1-mb-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/sha/sha1-mb-x86_64.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/sha/asm/sha1-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/sha/sha1-x86_64.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/sha/asm/sha256-mb-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/sha/sha256-mb-x86_64.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/sha/asm/sha512-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/sha/sha256-x86_64.s) # Looks like a terrible mistake but is needed, otherwise the build falls apart - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/sha/asm/sha512-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/sha/sha512-x86_64.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/whrlpool/asm/wp-x86_64.pl ${OPENSSL_BINARY_DIR}/crypto/whrlpool/wp-x86_64.s) - endif() -elseif(ARCH_AARCH64) - if(OS_DARWIN) - set(OPENSSL_SYSTEM "macosx") - else() - macro(perl_generate_asm FILE_IN FILE_OUT) - add_custom_command(OUTPUT ${FILE_OUT} - COMMAND ${CMAKE_COMMAND} -E env "CC=${CMAKE_CXX_COMPILER}" /usr/bin/env perl ${FILE_IN} "linux64" ${FILE_OUT}) - endmacro() - - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/aes/asm/aesv8-armx.pl ${OPENSSL_BINARY_DIR}/crypto/aes/aesv8-armx.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/aes/asm/bsaes-armv8.pl ${OPENSSL_BINARY_DIR}/crypto/aes/bsaes-armv8.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/aes/asm/vpaes-armv8.pl ${OPENSSL_BINARY_DIR}/crypto/aes/vpaes-armv8.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/bn/asm/armv8-mont.pl ${OPENSSL_BINARY_DIR}/crypto/bn/armv8-mont.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/chacha/asm/chacha-armv8.pl ${OPENSSL_BINARY_DIR}/crypto/chacha/chacha-armv8.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/chacha/asm/chacha-armv8-sve.pl ${OPENSSL_BINARY_DIR}/crypto/chacha/chacha-armv8-sve.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/ec/asm/ecp_nistz256-armv8.pl ${OPENSSL_BINARY_DIR}/crypto/ec/ecp_nistz256-armv8.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/ec/asm/ecp_sm2p256-armv8.pl ${OPENSSL_BINARY_DIR}/crypto/ec/ecp_sm2p256-armv8.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/arm64cpuid.pl ${OPENSSL_BINARY_DIR}/crypto/arm64cpuid.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/modes/asm/ghashv8-armx.pl ${OPENSSL_BINARY_DIR}/crypto/modes/ghashv8-armx.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/poly1305/asm/poly1305-armv8.pl ${OPENSSL_BINARY_DIR}/crypto/poly1305/poly1305-armv8.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/sha/asm/keccak1600-armv8.pl ${OPENSSL_BINARY_DIR}/crypto/sha/keccak1600-armv8.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/sha/asm/sha1-armv8.pl ${OPENSSL_BINARY_DIR}/crypto/sha/sha1-armv8.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/sha/asm/sha512-armv8.pl ${OPENSSL_BINARY_DIR}/crypto/sha/sha256-armv8.S) # This is not a mistake - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/sha/asm/sha512-armv8.pl ${OPENSSL_BINARY_DIR}/crypto/sha/sha512-armv8.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/modes/asm/aes-gcm-armv8_64.pl ${OPENSSL_BINARY_DIR}/crypto/modes/asm/aes-gcm-armv8_64.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/modes/asm/aes-gcm-armv8-unroll8_64.pl ${OPENSSL_BINARY_DIR}/crypto/modes/asm/aes-gcm-armv8-unroll8_64.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/sm3/asm/sm3-armv8.pl ${OPENSSL_BINARY_DIR}/crypto/sm3/asm/sm3-armv8.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/sm4/asm/sm4-armv8.pl ${OPENSSL_BINARY_DIR}/crypto/sm4/asm/sm4-armv8.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/sm4/asm/vpsm4-armv8.pl ${OPENSSL_BINARY_DIR}/crypto/sm4/asm/vpsm4-armv8.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/sm4/asm/vpsm4_ex-armv8.pl ${OPENSSL_BINARY_DIR}/crypto/sm4/asm/vpsm4_ex-armv8.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/md5/asm/md5-aarch64.pl ${OPENSSL_BINARY_DIR}/crypto/md5/asm/md5-aarch64.S) - endif() -elseif(ARCH_PPC64LE) - macro(perl_generate_asm FILE_IN FILE_OUT) - add_custom_command(OUTPUT ${FILE_OUT} - COMMAND ${CMAKE_COMMAND} -E env "CC=${CMAKE_CXX_COMPILER}" /usr/bin/env perl ${FILE_IN} "linux64v2" ${FILE_OUT}) - endmacro() - - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/aes/asm/aesp8-ppc.pl ${OPENSSL_BINARY_DIR}/crypto/aes/aesp8-ppc.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/modes/asm/ghashp8-ppc.pl ${OPENSSL_BINARY_DIR}/crypto/modes/ghashp8-ppc.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/ppccpuid.pl ${OPENSSL_BINARY_DIR}/crypto/ppccpuid.s) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/modes/asm/aes-gcm-ppc.pl ${OPENSSL_BINARY_DIR}/crypto/modes/aes-gcm-ppc.s) -elseif(ARCH_S390X) - macro(perl_generate_asm FILE_IN FILE_OUT) - add_custom_command(OUTPUT ${FILE_OUT} - COMMAND ${CMAKE_COMMAND} -E env "CC=${CMAKE_CXX_COMPILER}" /usr/bin/env perl ${FILE_IN} "linux64" ${FILE_OUT}) - endmacro() - - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/aes/asm/aes-s390x.pl ${OPENSSL_BINARY_DIR}/crypto/aes/aes-s390x.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/s390xcpuid.pl ${OPENSSL_BINARY_DIR}/crypto/s390xcpuid.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/chacha/asm/chacha-s390x.pl ${OPENSSL_BINARY_DIR}/crypto/chacha/chacha-s390x.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/rc4/asm/rc4-s390x.pl ${OPENSSL_BINARY_DIR}/crypto/rc4/rc4-s390x.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/sha/asm/keccak1600-s390x.pl ${OPENSSL_BINARY_DIR}/crypto/sha/keccak1600-s390x.S) -elseif(ARCH_RISCV64) - macro(perl_generate_asm FILE_IN FILE_OUT) - add_custom_command(OUTPUT ${FILE_OUT} - COMMAND ${CMAKE_COMMAND} -E env "CC=${CMAKE_CXX_COMPILER}" /usr/bin/env perl ${FILE_IN} "linux64" ${FILE_OUT}) - endmacro() - - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/riscv64cpuid.pl ${OPENSSL_BINARY_DIR}/crypto/riscv64cpuid.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/aes/asm/aes-riscv64-zkn.pl ${OPENSSL_BINARY_DIR}/crypto/aes/aes-riscv64-zkn.S) - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/modes/asm/ghash-riscv64.pl ${OPENSSL_BINARY_DIR}/crypto/modes/ghash-riscv64.S) -elseif(ARCH_LOONGARCH64) - macro(perl_generate_asm FILE_IN FILE_OUT) - add_custom_command(OUTPUT ${FILE_OUT} - COMMAND ${CMAKE_COMMAND} -E env "CC=${CMAKE_CXX_COMPILER}" /usr/bin/env perl ${FILE_IN} "linux64" ${FILE_OUT}) - endmacro() - - perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/loongarch64cpuid.pl ${OPENSSL_BINARY_DIR}/crypto/loongarch64cpuid.S) -endif() - set(CRYPTO_SRC - der_digests_gen.c - der_dsa_gen.c - der_ec_gen.c - der_ecx_gen.c - der_rsa_gen.c - der_wrap_gen.c - der_sm2_gen.c + common/providers/der_digests_gen.c + common/providers/der_dsa_gen.c + common/providers/der_ec_gen.c + common/providers/der_ecx_gen.c + common/providers/der_ml_dsa_gen.c + common/providers/der_rsa_gen.c + common/providers/der_slh_dsa_gen.c + common/providers/der_sm2_gen.c + common/providers/der_wrap_gen.c - ${PLATFORM_DIRECTORY}/params_idx.c + common/params_idx.c ${OPENSSL_SOURCE_DIR}/crypto/aes/aes_cfb.c ${OPENSSL_SOURCE_DIR}/crypto/aes/aes_ecb.c @@ -429,6 +313,7 @@ set(CRYPTO_SRC ${OPENSSL_SOURCE_DIR}/crypto/comp/c_zstd.c ${OPENSSL_SOURCE_DIR}/crypto/comp/comp_err.c ${OPENSSL_SOURCE_DIR}/crypto/comp/comp_lib.c + ${OPENSSL_SOURCE_DIR}/crypto/comp_methods.c ${OPENSSL_SOURCE_DIR}/crypto/conf/conf_api.c ${OPENSSL_SOURCE_DIR}/crypto/conf/conf_def.c ${OPENSSL_SOURCE_DIR}/crypto/conf/conf_err.c @@ -460,6 +345,7 @@ set(CRYPTO_SRC ${OPENSSL_SOURCE_DIR}/crypto/ct/ct_x509v3.c ${OPENSSL_SOURCE_DIR}/crypto/ctype.c ${OPENSSL_SOURCE_DIR}/crypto/cversion.c + ${OPENSSL_SOURCE_DIR}/crypto/defaults.c ${OPENSSL_SOURCE_DIR}/crypto/der_writer.c ${OPENSSL_SOURCE_DIR}/crypto/des/cbc_cksm.c ${OPENSSL_SOURCE_DIR}/crypto/des/cbc_enc.c @@ -676,7 +562,9 @@ set(CRYPTO_SRC ${OPENSSL_SOURCE_DIR}/crypto/evp/pmeth_check.c ${OPENSSL_SOURCE_DIR}/crypto/evp/pmeth_gn.c ${OPENSSL_SOURCE_DIR}/crypto/evp/pmeth_lib.c + ${OPENSSL_SOURCE_DIR}/crypto/evp/s_lib.c ${OPENSSL_SOURCE_DIR}/crypto/evp/signature.c + ${OPENSSL_SOURCE_DIR}/crypto/evp/skeymgmt_meth.c ${OPENSSL_SOURCE_DIR}/crypto/ex_data.c ${OPENSSL_SOURCE_DIR}/crypto/ffc/ffc_backend.c ${OPENSSL_SOURCE_DIR}/crypto/ffc/ffc_dh.c @@ -686,6 +574,8 @@ set(CRYPTO_SRC ${OPENSSL_SOURCE_DIR}/crypto/ffc/ffc_params_generate.c ${OPENSSL_SOURCE_DIR}/crypto/ffc/ffc_params_validate.c ${OPENSSL_SOURCE_DIR}/crypto/getenv.c + ${OPENSSL_SOURCE_DIR}/crypto/hashtable/hashfunc.c + ${OPENSSL_SOURCE_DIR}/crypto/hashtable/hashtable.c ${OPENSSL_SOURCE_DIR}/crypto/hmac/hmac.c ${OPENSSL_SOURCE_DIR}/crypto/hpke/hpke.c ${OPENSSL_SOURCE_DIR}/crypto/hpke/hpke_util.c @@ -697,6 +587,7 @@ set(CRYPTO_SRC ${OPENSSL_SOURCE_DIR}/crypto/idea/i_ecb.c ${OPENSSL_SOURCE_DIR}/crypto/idea/i_ofb64.c ${OPENSSL_SOURCE_DIR}/crypto/idea/i_skey.c + ${OPENSSL_SOURCE_DIR}/crypto/indicator_core.c ${OPENSSL_SOURCE_DIR}/crypto/info.c ${OPENSSL_SOURCE_DIR}/crypto/init.c ${OPENSSL_SOURCE_DIR}/crypto/initthread.c @@ -712,6 +603,15 @@ set(CRYPTO_SRC ${OPENSSL_SOURCE_DIR}/crypto/mdc2/mdc2dgst.c ${OPENSSL_SOURCE_DIR}/crypto/mem.c ${OPENSSL_SOURCE_DIR}/crypto/mem_sec.c + ${OPENSSL_SOURCE_DIR}/crypto/ml_dsa/ml_dsa_encoders.c + ${OPENSSL_SOURCE_DIR}/crypto/ml_dsa/ml_dsa_key.c + ${OPENSSL_SOURCE_DIR}/crypto/ml_dsa/ml_dsa_key_compress.c + ${OPENSSL_SOURCE_DIR}/crypto/ml_dsa/ml_dsa_matrix.c + ${OPENSSL_SOURCE_DIR}/crypto/ml_dsa/ml_dsa_ntt.c + ${OPENSSL_SOURCE_DIR}/crypto/ml_dsa/ml_dsa_params.c + ${OPENSSL_SOURCE_DIR}/crypto/ml_dsa/ml_dsa_sample.c + ${OPENSSL_SOURCE_DIR}/crypto/ml_dsa/ml_dsa_sign.c + ${OPENSSL_SOURCE_DIR}/crypto/ml_kem/ml_kem.c ${OPENSSL_SOURCE_DIR}/crypto/modes/cbc128.c ${OPENSSL_SOURCE_DIR}/crypto/modes/ccm128.c ${OPENSSL_SOURCE_DIR}/crypto/modes/cfb128.c @@ -744,6 +644,7 @@ set(CRYPTO_SRC ${OPENSSL_SOURCE_DIR}/crypto/ocsp/ocsp_srv.c ${OPENSSL_SOURCE_DIR}/crypto/ocsp/ocsp_vfy.c ${OPENSSL_SOURCE_DIR}/crypto/ocsp/v3_ocsp.c + ${OPENSSL_SOURCE_DIR}/crypto/ocsp/v3_ocsp.c ${OPENSSL_SOURCE_DIR}/crypto/packet.c ${OPENSSL_SOURCE_DIR}/crypto/param_build.c ${OPENSSL_SOURCE_DIR}/crypto/param_build_set.c @@ -854,6 +755,16 @@ set(CRYPTO_SRC ${OPENSSL_SOURCE_DIR}/crypto/sha/sha512.c ${OPENSSL_SOURCE_DIR}/crypto/siphash/siphash.c ${OPENSSL_SOURCE_DIR}/crypto/sleep.c + ${OPENSSL_SOURCE_DIR}/crypto/slh_dsa/slh_adrs.c + ${OPENSSL_SOURCE_DIR}/crypto/slh_dsa/slh_dsa.c + ${OPENSSL_SOURCE_DIR}/crypto/slh_dsa/slh_dsa_hash_ctx.c + ${OPENSSL_SOURCE_DIR}/crypto/slh_dsa/slh_dsa_key.c + ${OPENSSL_SOURCE_DIR}/crypto/slh_dsa/slh_fors.c + ${OPENSSL_SOURCE_DIR}/crypto/slh_dsa/slh_hash.c + ${OPENSSL_SOURCE_DIR}/crypto/slh_dsa/slh_hypertree.c + ${OPENSSL_SOURCE_DIR}/crypto/slh_dsa/slh_params.c + ${OPENSSL_SOURCE_DIR}/crypto/slh_dsa/slh_wots.c + ${OPENSSL_SOURCE_DIR}/crypto/slh_dsa/slh_xmss.c ${OPENSSL_SOURCE_DIR}/crypto/sm2/sm2_crypt.c ${OPENSSL_SOURCE_DIR}/crypto/sm2/sm2_err.c ${OPENSSL_SOURCE_DIR}/crypto/sm2/sm2_key.c @@ -864,6 +775,7 @@ set(CRYPTO_SRC ${OPENSSL_SOURCE_DIR}/crypto/sparse_array.c ${OPENSSL_SOURCE_DIR}/crypto/srp/srp_lib.c ${OPENSSL_SOURCE_DIR}/crypto/srp/srp_vfy.c + ${OPENSSL_SOURCE_DIR}/crypto/ssl_err.c ${OPENSSL_SOURCE_DIR}/crypto/stack/stack.c ${OPENSSL_SOURCE_DIR}/crypto/store/store_err.c ${OPENSSL_SOURCE_DIR}/crypto/store/store_init.c @@ -912,14 +824,24 @@ set(CRYPTO_SRC ${OPENSSL_SOURCE_DIR}/crypto/x509/pcy_map.c ${OPENSSL_SOURCE_DIR}/crypto/x509/pcy_node.c ${OPENSSL_SOURCE_DIR}/crypto/x509/pcy_tree.c + ${OPENSSL_SOURCE_DIR}/crypto/x509/t_acert.c ${OPENSSL_SOURCE_DIR}/crypto/x509/t_crl.c ${OPENSSL_SOURCE_DIR}/crypto/x509/t_req.c ${OPENSSL_SOURCE_DIR}/crypto/x509/t_x509.c + ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_aaa.c + ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_ac_tgt.c ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_addr.c ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_admis.c ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_akeya.c + ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_akeya.c ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_akid.c ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_asid.c + ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_attrdesc.c + ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_attrmap.c + ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_audit_id.c + ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_authattid.c + ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_battcons.c + ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_battcons.c ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_bcons.c ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_bitst.c ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_conf.c @@ -933,6 +855,7 @@ set(CRYPTO_SRC ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_ind_iss.c ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_info.c ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_int.c + ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_iobo.c ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_ist.c ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_lib.c ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_ncons.c @@ -945,15 +868,20 @@ set(CRYPTO_SRC ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_pmaps.c ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_prn.c ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_purp.c + ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_rolespec.c ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_san.c + ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_sda.c ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_single_use.c ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_skid.c ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_soa_id.c ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_sxnet.c + ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_timespec.c ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_tlsf.c + ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_usernotice.c ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_utf8.c ${OPENSSL_SOURCE_DIR}/crypto/x509/v3_utl.c ${OPENSSL_SOURCE_DIR}/crypto/x509/v3err.c + ${OPENSSL_SOURCE_DIR}/crypto/x509/x509_acert.c ${OPENSSL_SOURCE_DIR}/crypto/x509/x509_att.c ${OPENSSL_SOURCE_DIR}/crypto/x509/x509_cmp.c ${OPENSSL_SOURCE_DIR}/crypto/x509/x509_d2.c @@ -993,8 +921,11 @@ set(CRYPTO_SRC ${OPENSSL_SOURCE_DIR}/providers/common/der/der_ec_key.c ${OPENSSL_SOURCE_DIR}/providers/common/der/der_ec_sig.c ${OPENSSL_SOURCE_DIR}/providers/common/der/der_ecx_key.c + ${OPENSSL_SOURCE_DIR}/providers/common/der/der_ml_dsa_key.c ${OPENSSL_SOURCE_DIR}/providers/common/der/der_rsa_key.c ${OPENSSL_SOURCE_DIR}/providers/common/der/der_rsa_sig.c + ${OPENSSL_SOURCE_DIR}/providers/common/der/der_slh_dsa_key.c + ${OPENSSL_SOURCE_DIR}/providers/common/der/der_slh_dsa_key.c ${OPENSSL_SOURCE_DIR}/providers/common/der/der_sm2_key.c ${OPENSSL_SOURCE_DIR}/providers/common/der/der_sm2_sig.c ${OPENSSL_SOURCE_DIR}/providers/common/digest_to_nid.c @@ -1027,6 +958,7 @@ set(CRYPTO_SRC ${OPENSSL_SOURCE_DIR}/providers/implementations/ciphers/cipher_aes_xts.c ${OPENSSL_SOURCE_DIR}/providers/implementations/ciphers/cipher_aes_xts_fips.c ${OPENSSL_SOURCE_DIR}/providers/implementations/ciphers/cipher_aes_xts_hw.c + ${OPENSSL_SOURCE_DIR}/providers/implementations/ciphers/cipher_aes_xts_hw.c ${OPENSSL_SOURCE_DIR}/providers/implementations/ciphers/cipher_aria.c ${OPENSSL_SOURCE_DIR}/providers/implementations/ciphers/cipher_aria_ccm.c ${OPENSSL_SOURCE_DIR}/providers/implementations/ciphers/cipher_aria_ccm_hw.c @@ -1106,6 +1038,9 @@ set(CRYPTO_SRC ${OPENSSL_SOURCE_DIR}/providers/implementations/encode_decode/encode_key2ms.c ${OPENSSL_SOURCE_DIR}/providers/implementations/encode_decode/encode_key2text.c ${OPENSSL_SOURCE_DIR}/providers/implementations/encode_decode/endecoder_common.c + ${OPENSSL_SOURCE_DIR}/providers/implementations/encode_decode/ml_common_codecs.c + ${OPENSSL_SOURCE_DIR}/providers/implementations/encode_decode/ml_dsa_codecs.c + ${OPENSSL_SOURCE_DIR}/providers/implementations/encode_decode/ml_kem_codecs.c ${OPENSSL_SOURCE_DIR}/providers/implementations/exchange/dh_exch.c ${OPENSSL_SOURCE_DIR}/providers/implementations/exchange/ecdh_exch.c ${OPENSSL_SOURCE_DIR}/providers/implementations/exchange/ecx_exch.c @@ -1126,16 +1061,27 @@ set(CRYPTO_SRC ${OPENSSL_SOURCE_DIR}/providers/implementations/kdfs/tls1_prf.c ${OPENSSL_SOURCE_DIR}/providers/implementations/kdfs/x942kdf.c ${OPENSSL_SOURCE_DIR}/providers/implementations/kem/ec_kem.c + ${OPENSSL_SOURCE_DIR}/providers/implementations/kem/ec_kem.c + ${OPENSSL_SOURCE_DIR}/providers/implementations/kem/ecx_kem.c ${OPENSSL_SOURCE_DIR}/providers/implementations/kem/ecx_kem.c ${OPENSSL_SOURCE_DIR}/providers/implementations/kem/kem_util.c + ${OPENSSL_SOURCE_DIR}/providers/implementations/kem/kem_util.c + ${OPENSSL_SOURCE_DIR}/providers/implementations/kem/ml_kem_kem.c + ${OPENSSL_SOURCE_DIR}/providers/implementations/kem/mlx_kem.c + ${OPENSSL_SOURCE_DIR}/providers/implementations/kem/rsa_kem.c ${OPENSSL_SOURCE_DIR}/providers/implementations/kem/rsa_kem.c + ${OPENSSL_SOURCE_DIR}/providers/implementations/kem/template_kem.c ${OPENSSL_SOURCE_DIR}/providers/implementations/keymgmt/dh_kmgmt.c ${OPENSSL_SOURCE_DIR}/providers/implementations/keymgmt/dsa_kmgmt.c ${OPENSSL_SOURCE_DIR}/providers/implementations/keymgmt/ec_kmgmt.c ${OPENSSL_SOURCE_DIR}/providers/implementations/keymgmt/ecx_kmgmt.c ${OPENSSL_SOURCE_DIR}/providers/implementations/keymgmt/kdf_legacy_kmgmt.c ${OPENSSL_SOURCE_DIR}/providers/implementations/keymgmt/mac_legacy_kmgmt.c + ${OPENSSL_SOURCE_DIR}/providers/implementations/keymgmt/ml_dsa_kmgmt.c + ${OPENSSL_SOURCE_DIR}/providers/implementations/keymgmt/ml_kem_kmgmt.c + ${OPENSSL_SOURCE_DIR}/providers/implementations/keymgmt/mlx_kmgmt.c ${OPENSSL_SOURCE_DIR}/providers/implementations/keymgmt/rsa_kmgmt.c + ${OPENSSL_SOURCE_DIR}/providers/implementations/keymgmt/slh_dsa_kmgmt.c ${OPENSSL_SOURCE_DIR}/providers/implementations/macs/blake2b_mac.c ${OPENSSL_SOURCE_DIR}/providers/implementations/macs/blake2s_mac.c ${OPENSSL_SOURCE_DIR}/providers/implementations/macs/cmac_prov.c @@ -1144,7 +1090,6 @@ set(CRYPTO_SRC ${OPENSSL_SOURCE_DIR}/providers/implementations/macs/kmac_prov.c ${OPENSSL_SOURCE_DIR}/providers/implementations/macs/poly1305_prov.c ${OPENSSL_SOURCE_DIR}/providers/implementations/macs/siphash_prov.c - ${OPENSSL_SOURCE_DIR}/providers/implementations/rands/crngt.c ${OPENSSL_SOURCE_DIR}/providers/implementations/rands/drbg.c ${OPENSSL_SOURCE_DIR}/providers/implementations/rands/drbg_ctr.c ${OPENSSL_SOURCE_DIR}/providers/implementations/rands/drbg_hash.c @@ -1159,22 +1104,26 @@ set(CRYPTO_SRC ${OPENSSL_SOURCE_DIR}/providers/implementations/signature/ecdsa_sig.c ${OPENSSL_SOURCE_DIR}/providers/implementations/signature/eddsa_sig.c ${OPENSSL_SOURCE_DIR}/providers/implementations/signature/mac_legacy_sig.c + ${OPENSSL_SOURCE_DIR}/providers/implementations/signature/ml_dsa_sig.c ${OPENSSL_SOURCE_DIR}/providers/implementations/signature/rsa_sig.c + ${OPENSSL_SOURCE_DIR}/providers/implementations/signature/slh_dsa_sig.c ${OPENSSL_SOURCE_DIR}/providers/implementations/signature/sm2_sig.c + ${OPENSSL_SOURCE_DIR}/providers/implementations/skeymgmt/aes_skmgmt.c + ${OPENSSL_SOURCE_DIR}/providers/implementations/skeymgmt/generic.c ${OPENSSL_SOURCE_DIR}/providers/implementations/storemgmt/file_store.c ${OPENSSL_SOURCE_DIR}/providers/implementations/storemgmt/file_store_any2obj.c ${OPENSSL_SOURCE_DIR}/providers/nullprov.c ${OPENSSL_SOURCE_DIR}/providers/prov_running.c - ${OPENSSL_SOURCE_DIR}/ssl/record/methods/tls_pad.c ${OPENSSL_SOURCE_DIR}/ssl/record/methods/ssl3_cbc.c + ${OPENSSL_SOURCE_DIR}/ssl/record/methods/tls_pad.c +) + + +set(CRYPTO_SRC ${CRYPTO_SRC} + ${OPENSSL_SOURCE_DIR}/providers/legacyprov.c + ${OPENSSL_SOURCE_DIR}/providers/defltprov.c ) -if(NOT ENABLE_OPENSSL_DYNAMIC) - set(CRYPTO_SRC ${CRYPTO_SRC} - ${OPENSSL_SOURCE_DIR}/providers/fips/fips_entry.c - ${OPENSSL_SOURCE_DIR}/providers/fips/fipsprov.c - ) -endif() if(ARCH_AMD64) if (OS_DARWIN) @@ -1182,55 +1131,59 @@ if(ARCH_AMD64) ${OPENSSL_SOURCE_DIR}/crypto/aes/aes_cbc.c ${OPENSSL_SOURCE_DIR}/crypto/aes/aes_core.c ${OPENSSL_SOURCE_DIR}/crypto/bn/bn_asm.c - ${OPENSSL_SOURCE_DIR}/crypto/chacha/chacha_enc.c - ${OPENSSL_SOURCE_DIR}/crypto/sha/keccak1600.c - ${OPENSSL_SOURCE_DIR}/crypto/whrlpool/wp_block.c ${OPENSSL_SOURCE_DIR}/crypto/camellia/camellia.c ${OPENSSL_SOURCE_DIR}/crypto/camellia/cmll_cbc.c + ${OPENSSL_SOURCE_DIR}/crypto/chacha/chacha_enc.c ${OPENSSL_SOURCE_DIR}/crypto/mem_clr.c ${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_enc.c ${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_skey.c + ${OPENSSL_SOURCE_DIR}/crypto/sha/keccak1600.c + ${OPENSSL_SOURCE_DIR}/crypto/whrlpool/wp_block.c ) else() set(CRYPTO_SRC ${CRYPTO_SRC} + asm/crypto/aes/aes-x86_64.s + asm/crypto/aes/aesni-mb-x86_64.s + asm/crypto/aes/aesni-sha1-x86_64.s + asm/crypto/aes/aesni-sha256-x86_64.s + asm/crypto/aes/aesni-x86_64.s + asm/crypto/aes/aesni-xts-avx512.s + asm/crypto/aes/bsaes-x86_64.s + asm/crypto/aes/vpaes-x86_64.s + asm/crypto/bn/rsaz-2k-avx512.s + asm/crypto/bn/rsaz-2k-avxifma.s + asm/crypto/bn/rsaz-3k-avx512.s + asm/crypto/bn/rsaz-3k-avxifma.s + asm/crypto/bn/rsaz-4k-avx512.s + asm/crypto/bn/rsaz-4k-avxifma.s + asm/crypto/bn/rsaz-avx2.s + asm/crypto/bn/rsaz-x86_64.s + asm/crypto/bn/x86_64-gf2m.s + asm/crypto/bn/x86_64-mont.s + asm/crypto/bn/x86_64-mont5.s + asm/crypto/camellia/cmll-x86_64.s + asm/crypto/chacha/chacha-x86_64.s + asm/crypto/ec/ecp_nistz256-x86_64.s + asm/crypto/ec/x25519-x86_64.s + asm/crypto/md5/md5-x86_64.s + asm/crypto/modes/aes-gcm-avx512.s + asm/crypto/modes/aesni-gcm-x86_64.s + asm/crypto/modes/ghash-x86_64.s + asm/crypto/poly1305/poly1305-x86_64.s + asm/crypto/rc4/rc4-md5-x86_64.s + asm/crypto/rc4/rc4-x86_64.s + asm/crypto/sha/keccak1600-x86_64.s + asm/crypto/sha/sha1-mb-x86_64.s + asm/crypto/sha/sha1-x86_64.s + asm/crypto/sha/sha256-mb-x86_64.s + asm/crypto/sha/sha256-x86_64.s + asm/crypto/sha/sha512-x86_64.s + asm/crypto/whrlpool/wp-x86_64.s + asm/crypto/x86_64cpuid.s ${OPENSSL_SOURCE_DIR}/crypto/bn/asm/x86_64-gcc.c ${OPENSSL_SOURCE_DIR}/crypto/bn/rsaz_exp.c - ${OPENSSL_BINARY_DIR}/crypto/aes/aes-x86_64.s - ${OPENSSL_BINARY_DIR}/crypto/aes/aesni-mb-x86_64.s - ${OPENSSL_BINARY_DIR}/crypto/aes/aesni-sha1-x86_64.s - ${OPENSSL_BINARY_DIR}/crypto/aes/aesni-sha256-x86_64.s - ${OPENSSL_BINARY_DIR}/crypto/aes/aesni-x86_64.s - ${OPENSSL_BINARY_DIR}/crypto/aes/bsaes-x86_64.s - ${OPENSSL_BINARY_DIR}/crypto/aes/vpaes-x86_64.s - ${OPENSSL_BINARY_DIR}/crypto/bn/rsaz-2k-avx512.s - ${OPENSSL_BINARY_DIR}/crypto/bn/rsaz-3k-avx512.s - ${OPENSSL_BINARY_DIR}/crypto/bn/rsaz-4k-avx512.s - ${OPENSSL_BINARY_DIR}/crypto/bn/rsaz-avx2.s - ${OPENSSL_BINARY_DIR}/crypto/bn/rsaz-x86_64.s ${OPENSSL_SOURCE_DIR}/crypto/bn/rsaz_exp_x2.c - ${OPENSSL_BINARY_DIR}/crypto/bn/x86_64-gf2m.s - ${OPENSSL_BINARY_DIR}/crypto/bn/x86_64-mont.s - ${OPENSSL_BINARY_DIR}/crypto/bn/x86_64-mont5.s - ${OPENSSL_BINARY_DIR}/crypto/camellia/cmll-x86_64.s - ${OPENSSL_BINARY_DIR}/crypto/chacha/chacha-x86_64.s - ${OPENSSL_BINARY_DIR}/crypto/ec/ecp_nistz256-x86_64.s ${OPENSSL_SOURCE_DIR}/crypto/ec/ecp_nistz256.c - ${OPENSSL_BINARY_DIR}/crypto/ec/x25519-x86_64.s - ${OPENSSL_BINARY_DIR}/crypto/x86_64cpuid.s - ${OPENSSL_BINARY_DIR}/crypto/md5/md5-x86_64.s - ${OPENSSL_BINARY_DIR}/crypto/modes/aesni-gcm-x86_64.s - ${OPENSSL_BINARY_DIR}/crypto/modes/aes-gcm-avx512.s - ${OPENSSL_BINARY_DIR}/crypto/modes/ghash-x86_64.s - ${OPENSSL_BINARY_DIR}/crypto/poly1305/poly1305-x86_64.s - ${OPENSSL_BINARY_DIR}/crypto/rc4/rc4-md5-x86_64.s - ${OPENSSL_BINARY_DIR}/crypto/rc4/rc4-x86_64.s - ${OPENSSL_BINARY_DIR}/crypto/sha/keccak1600-x86_64.s - ${OPENSSL_BINARY_DIR}/crypto/sha/sha1-mb-x86_64.s - ${OPENSSL_BINARY_DIR}/crypto/sha/sha1-x86_64.s - ${OPENSSL_BINARY_DIR}/crypto/sha/sha256-mb-x86_64.s - ${OPENSSL_BINARY_DIR}/crypto/sha/sha256-x86_64.s - ${OPENSSL_BINARY_DIR}/crypto/sha/sha512-x86_64.s - ${OPENSSL_BINARY_DIR}/crypto/whrlpool/wp-x86_64.s ) endif() elseif(ARCH_AARCH64) @@ -1239,127 +1192,134 @@ elseif(ARCH_AARCH64) ${OPENSSL_SOURCE_DIR}/crypto/aes/aes_cbc.c ${OPENSSL_SOURCE_DIR}/crypto/aes/aes_core.c ${OPENSSL_SOURCE_DIR}/crypto/bn/bn_asm.c - ${OPENSSL_SOURCE_DIR}/crypto/chacha/chacha_enc.c - ${OPENSSL_SOURCE_DIR}/crypto/sha/keccak1600.c - ${OPENSSL_SOURCE_DIR}/crypto/whrlpool/wp_block.c ${OPENSSL_SOURCE_DIR}/crypto/camellia/camellia.c ${OPENSSL_SOURCE_DIR}/crypto/camellia/cmll_cbc.c + ${OPENSSL_SOURCE_DIR}/crypto/chacha/chacha_enc.c ${OPENSSL_SOURCE_DIR}/crypto/mem_clr.c ${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_enc.c ${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_skey.c + ${OPENSSL_SOURCE_DIR}/crypto/sha/keccak1600.c + ${OPENSSL_SOURCE_DIR}/crypto/whrlpool/wp_block.c ) else() set(CRYPTO_SRC ${CRYPTO_SRC} + common/params_idx.c + + asm/crypto/aes/aesv8-armx.S + asm/crypto/aes/bsaes-armv8.S + asm/crypto/aes/vpaes-armv8.S + asm/crypto/arm64cpuid.S + asm/crypto/bn/armv8-mont.S + asm/crypto/chacha/chacha-armv8-sve.S + asm/crypto/chacha/chacha-armv8.S + asm/crypto/ec/ecp_nistz256-armv8.S + asm/crypto/ec/ecp_sm2p256-armv8.S + asm/crypto/md5/asm/md5-aarch64.S + asm/crypto/modes/asm/aes-gcm-armv8-unroll8_64.S + asm/crypto/modes/asm/aes-gcm-armv8_64.S + asm/crypto/modes/ghashv8-armx.S + asm/crypto/poly1305/poly1305-armv8.S + asm/crypto/sha/keccak1600-armv8.S + asm/crypto/sha/sha1-armv8.S + asm/crypto/sha/sha256-armv8.S + asm/crypto/sha/sha512-armv8.S + asm/crypto/sm3/asm/sm3-armv8.S + asm/crypto/sm4/asm/sm4-armv8.S + asm/crypto/sm4/asm/vpsm4-armv8.S + asm/crypto/sm4/asm/vpsm4_ex-armv8.S ${OPENSSL_SOURCE_DIR}/crypto/aes/aes_cbc.c ${OPENSSL_SOURCE_DIR}/crypto/aes/aes_core.c + ${OPENSSL_SOURCE_DIR}/crypto/armcap.c ${OPENSSL_SOURCE_DIR}/crypto/bn/bn_asm.c ${OPENSSL_SOURCE_DIR}/crypto/bn/rsaz_exp.c ${OPENSSL_SOURCE_DIR}/crypto/bn/rsaz_exp_x2.c ${OPENSSL_SOURCE_DIR}/crypto/camellia/camellia.c ${OPENSSL_SOURCE_DIR}/crypto/camellia/cmll_cbc.c - ${OPENSSL_SOURCE_DIR}/crypto/armcap.c - ${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_enc.c - ${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_skey.c - ${OPENSSL_SOURCE_DIR}/crypto/whrlpool/wp_block.c - ${OPENSSL_BINARY_DIR}/crypto/aes/aesv8-armx.S - ${OPENSSL_BINARY_DIR}/crypto/aes/vpaes-armv8.S - ${OPENSSL_BINARY_DIR}/crypto/bn/armv8-mont.S - ${OPENSSL_BINARY_DIR}/crypto/chacha/chacha-armv8.S - ${OPENSSL_BINARY_DIR}/crypto/ec/ecp_nistz256-armv8.S ${OPENSSL_SOURCE_DIR}/crypto/ec/ecp_nistz256.c ${OPENSSL_SOURCE_DIR}/crypto/ec/ecp_sm2p256.c ${OPENSSL_SOURCE_DIR}/crypto/ec/ecp_sm2p256_table.c - ${OPENSSL_BINARY_DIR}/crypto/arm64cpuid.S - ${OPENSSL_BINARY_DIR}/crypto/modes/ghashv8-armx.S - ${OPENSSL_BINARY_DIR}/crypto/poly1305/poly1305-armv8.S - ${OPENSSL_BINARY_DIR}/crypto/sha/keccak1600-armv8.S - ${OPENSSL_BINARY_DIR}/crypto/sha/sha1-armv8.S - ${OPENSSL_BINARY_DIR}/crypto/sha/sha256-armv8.S - ${OPENSSL_BINARY_DIR}/crypto/sha/sha512-armv8.S - ${OPENSSL_BINARY_DIR}/crypto/modes/asm/aes-gcm-armv8_64.S - ${OPENSSL_BINARY_DIR}/crypto/sm4/asm/sm4-armv8.S - ${OPENSSL_BINARY_DIR}/crypto/sm4/asm/vpsm4-armv8.S - ${OPENSSL_BINARY_DIR}/crypto/md5/asm/md5-aarch64.S - ${OPENSSL_BINARY_DIR}/crypto/aes/bsaes-armv8.S - ${OPENSSL_BINARY_DIR}/crypto/chacha/chacha-armv8-sve.S - ${OPENSSL_BINARY_DIR}/crypto/ec/ecp_sm2p256-armv8.S - ${OPENSSL_BINARY_DIR}/crypto/modes/asm/aes-gcm-armv8-unroll8_64.S - ${OPENSSL_BINARY_DIR}/crypto/sm3/asm/sm3-armv8.S - ${OPENSSL_BINARY_DIR}/crypto/sm4/asm/vpsm4_ex-armv8.S - - ${PLATFORM_DIRECTORY}/params_idx.c + ${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_enc.c + ${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_skey.c + ${OPENSSL_SOURCE_DIR}/crypto/whrlpool/wp_block.c ) endif() elseif(ARCH_PPC64LE) set(CRYPTO_SRC ${CRYPTO_SRC} - ${OPENSSL_BINARY_DIR}/crypto/modes/ghashp8-ppc.s - ${OPENSSL_BINARY_DIR}/crypto/aes/aesp8-ppc.s - ${OPENSSL_BINARY_DIR}/crypto/ppccpuid.s - ${OPENSSL_SOURCE_DIR}/crypto/aes/aes_core.c + asm/crypto/aes/aesp8-ppc.s + asm/crypto/modes/aes-gcm-ppc.s + asm/crypto/modes/ghashp8-ppc.s + asm/crypto/ppccpuid.s ${OPENSSL_SOURCE_DIR}/crypto/aes/aes_cbc.c + ${OPENSSL_SOURCE_DIR}/crypto/aes/aes_core.c ${OPENSSL_SOURCE_DIR}/crypto/bn/bn_asm.c ${OPENSSL_SOURCE_DIR}/crypto/camellia/camellia.c ${OPENSSL_SOURCE_DIR}/crypto/camellia/cmll_cbc.c ${OPENSSL_SOURCE_DIR}/crypto/chacha/chacha_enc.c + ${OPENSSL_SOURCE_DIR}/crypto/ppccap.c ${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_enc.c ${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_skey.c ${OPENSSL_SOURCE_DIR}/crypto/sha/keccak1600.c ${OPENSSL_SOURCE_DIR}/crypto/whrlpool/wp_block.c ${OPENSSL_SOURCE_DIR}/engines/e_afalg.c - ${OPENSSL_SOURCE_DIR}/crypto/ppccap.c - ${OPENSSL_BINARY_DIR}/crypto/modes/aes-gcm-ppc.s ) elseif(ARCH_S390X) set(CRYPTO_SRC ${CRYPTO_SRC} - ${OPENSSL_BINARY_DIR}/crypto/aes/aes-s390x.S - ${OPENSSL_BINARY_DIR}/crypto/s390xcpuid.S + asm/crypto/aes/aes-s390x.S + asm/crypto/chacha/chacha-s390x.S + asm/crypto/rc4/rc4-s390x.S + asm/crypto/s390xcpuid.S + asm/crypto/sha/keccak1600-s390x.S ${OPENSSL_SOURCE_DIR}/crypto/bn/asm/s390x.S - ${OPENSSL_SOURCE_DIR}/crypto/s390xcap.c ${OPENSSL_SOURCE_DIR}/crypto/bn/bn_s390x.c ${OPENSSL_SOURCE_DIR}/crypto/camellia/camellia.c ${OPENSSL_SOURCE_DIR}/crypto/camellia/cmll_cbc.c - ${OPENSSL_BINARY_DIR}/crypto/chacha/chacha-s390x.S - ${OPENSSL_BINARY_DIR}/crypto/rc4/rc4-s390x.S - ${OPENSSL_BINARY_DIR}/crypto/sha/keccak1600-s390x.S + ${OPENSSL_SOURCE_DIR}/crypto/s390xcap.c ${OPENSSL_SOURCE_DIR}/crypto/whrlpool/wp_block.c ) elseif(ARCH_RISCV64) set(CRYPTO_SRC ${CRYPTO_SRC} - ${OPENSSL_BINARY_DIR}/crypto/aes/aes-riscv64-zkn.S - ${OPENSSL_BINARY_DIR}/crypto/modes/ghash-riscv64.S - ${OPENSSL_BINARY_DIR}/crypto/riscv64cpuid.S + asm/crypto/aes/aes-riscv64-zkn.S + asm/crypto/aes/aes-riscv64-zvbb-zvkg-zvkned.S + asm/crypto/aes/aes-riscv64-zvkb-zvkned.S + asm/crypto/aes/aes-riscv64-zvkned.S + asm/crypto/modes/aes-gcm-riscv64-zvkb-zvkg-zvkned.S + asm/crypto/modes/ghash-riscv64-zvkb-zvbc.S + asm/crypto/modes/ghash-riscv64-zvkg.S + asm/crypto/modes/ghash-riscv64.S + asm/crypto/riscv64cpuid.S + asm/crypto/sm4/sm4-riscv64-zvksed.S ${OPENSSL_SOURCE_DIR}/crypto/aes/aes_cbc.c ${OPENSSL_SOURCE_DIR}/crypto/aes/aes_core.c ${OPENSSL_SOURCE_DIR}/crypto/bn/bn_asm.c - ${OPENSSL_SOURCE_DIR}/crypto/chacha/chacha_enc.c - ${OPENSSL_SOURCE_DIR}/crypto/sha/keccak1600.c - ${OPENSSL_SOURCE_DIR}/crypto/whrlpool/wp_block.c - ${OPENSSL_SOURCE_DIR}/crypto/whrlpool/wp_dgst.c - ${OPENSSL_SOURCE_DIR}/crypto/ec/ecp_nistz256.c ${OPENSSL_SOURCE_DIR}/crypto/camellia/camellia.c ${OPENSSL_SOURCE_DIR}/crypto/camellia/cmll_cbc.c + ${OPENSSL_SOURCE_DIR}/crypto/chacha/chacha_enc.c + ${OPENSSL_SOURCE_DIR}/crypto/ec/ecp_nistz256.c ${OPENSSL_SOURCE_DIR}/crypto/mem_clr.c ${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_enc.c ${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_skey.c ${OPENSSL_SOURCE_DIR}/crypto/riscvcap.c + ${OPENSSL_SOURCE_DIR}/crypto/sha/keccak1600.c + ${OPENSSL_SOURCE_DIR}/crypto/whrlpool/wp_block.c + ${OPENSSL_SOURCE_DIR}/crypto/whrlpool/wp_dgst.c ) elseif(ARCH_LOONGARCH64) set(CRYPTO_SRC ${CRYPTO_SRC} - ${OPENSSL_BINARY_DIR}/crypto/loongarch64cpuid.S + asm/crypto/loongarch64cpuid.S ${OPENSSL_SOURCE_DIR}/crypto/aes/aes_cbc.c ${OPENSSL_SOURCE_DIR}/crypto/aes/aes_core.c ${OPENSSL_SOURCE_DIR}/crypto/bn/bn_asm.c - ${OPENSSL_SOURCE_DIR}/crypto/chacha/chacha_enc.c - ${OPENSSL_SOURCE_DIR}/crypto/sha/keccak1600.c - ${OPENSSL_SOURCE_DIR}/crypto/whrlpool/wp_block.c - ${OPENSSL_SOURCE_DIR}/crypto/whrlpool/wp_dgst.c - ${OPENSSL_SOURCE_DIR}/crypto/ec/ecp_nistz256.c ${OPENSSL_SOURCE_DIR}/crypto/camellia/camellia.c ${OPENSSL_SOURCE_DIR}/crypto/camellia/cmll_cbc.c + ${OPENSSL_SOURCE_DIR}/crypto/chacha/chacha_enc.c + ${OPENSSL_SOURCE_DIR}/crypto/ec/ecp_nistz256.c + ${OPENSSL_SOURCE_DIR}/crypto/loongarchcap.c ${OPENSSL_SOURCE_DIR}/crypto/mem_clr.c ${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_enc.c ${OPENSSL_SOURCE_DIR}/crypto/rc4/rc4_skey.c - ${OPENSSL_SOURCE_DIR}/crypto/loongarchcap.c + ${OPENSSL_SOURCE_DIR}/crypto/sha/keccak1600.c + ${OPENSSL_SOURCE_DIR}/crypto/whrlpool/wp_block.c + ${OPENSSL_SOURCE_DIR}/crypto/whrlpool/wp_dgst.c ) endif() @@ -1368,45 +1328,28 @@ set(SSL_SRC ${OPENSSL_SOURCE_DIR}/ssl/d1_lib.c ${OPENSSL_SOURCE_DIR}/ssl/d1_msg.c ${OPENSSL_SOURCE_DIR}/ssl/d1_srtp.c - ${OPENSSL_SOURCE_DIR}/ssl/event_queue.c ${OPENSSL_SOURCE_DIR}/ssl/methods.c ${OPENSSL_SOURCE_DIR}/ssl/pqueue.c ${OPENSSL_SOURCE_DIR}/ssl/priority_queue.c - ${OPENSSL_SOURCE_DIR}/ssl/s3_enc.c - ${OPENSSL_SOURCE_DIR}/ssl/s3_lib.c - ${OPENSSL_SOURCE_DIR}/ssl/s3_msg.c - ${OPENSSL_SOURCE_DIR}/ssl/ssl_asn1.c - ${OPENSSL_SOURCE_DIR}/ssl/ssl_cert.c - ${OPENSSL_SOURCE_DIR}/ssl/ssl_cert_comp.c - ${OPENSSL_SOURCE_DIR}/ssl/ssl_ciph.c - ${OPENSSL_SOURCE_DIR}/ssl/ssl_conf.c - ${OPENSSL_SOURCE_DIR}/ssl/ssl_err.c - ${OPENSSL_SOURCE_DIR}/ssl/ssl_err_legacy.c - ${OPENSSL_SOURCE_DIR}/ssl/ssl_init.c - ${OPENSSL_SOURCE_DIR}/ssl/ssl_lib.c - ${OPENSSL_SOURCE_DIR}/ssl/ssl_mcnf.c - ${OPENSSL_SOURCE_DIR}/ssl/ssl_rsa.c - ${OPENSSL_SOURCE_DIR}/ssl/ssl_rsa_legacy.c - ${OPENSSL_SOURCE_DIR}/ssl/ssl_sess.c - ${OPENSSL_SOURCE_DIR}/ssl/ssl_stat.c - ${OPENSSL_SOURCE_DIR}/ssl/ssl_txt.c - ${OPENSSL_SOURCE_DIR}/ssl/ssl_utst.c - ${OPENSSL_SOURCE_DIR}/ssl/t1_enc.c - ${OPENSSL_SOURCE_DIR}/ssl/t1_lib.c - ${OPENSSL_SOURCE_DIR}/ssl/t1_trce.c - ${OPENSSL_SOURCE_DIR}/ssl/tls13_enc.c - ${OPENSSL_SOURCE_DIR}/ssl/tls_depr.c - ${OPENSSL_SOURCE_DIR}/ssl/tls_srp.c ${OPENSSL_SOURCE_DIR}/ssl/quic/cc_newreno.c + ${OPENSSL_SOURCE_DIR}/ssl/quic/json_enc.c + ${OPENSSL_SOURCE_DIR}/ssl/quic/qlog.c + ${OPENSSL_SOURCE_DIR}/ssl/quic/qlog_event_helpers.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_ackm.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_cfq.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_channel.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_demux.c + ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_engine.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_fc.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_fifd.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_impl.c + ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_lcidm.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_method.c + ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_obj.c + ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_port.c + ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_rcidm.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_reactor.c + ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_reactor_wait_ctx.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_record_rx.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_record_shared.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_record_tx.c @@ -1414,20 +1357,24 @@ set(SSL_SRC ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_rstream.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_rx_depack.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_sf_list.c + ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_srt_gen.c + ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_srtm.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_sstream.c + ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_sstream.c + ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_statm.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_statm.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_stream_map.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_thread_assist.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_tls.c + ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_tls_api.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_trace.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_tserver.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_txp.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_txpim.c + ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_types.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_wire.c ${OPENSSL_SOURCE_DIR}/ssl/quic/quic_wire_pkt.c ${OPENSSL_SOURCE_DIR}/ssl/quic/uint_set.c - ${OPENSSL_SOURCE_DIR}/ssl/record/rec_layer_d1.c - ${OPENSSL_SOURCE_DIR}/ssl/record/rec_layer_s3.c ${OPENSSL_SOURCE_DIR}/ssl/record/methods/dtls_meth.c ${OPENSSL_SOURCE_DIR}/ssl/record/methods/ssl3_meth.c ${OPENSSL_SOURCE_DIR}/ssl/record/methods/tls13_meth.c @@ -1435,6 +1382,29 @@ set(SSL_SRC ${OPENSSL_SOURCE_DIR}/ssl/record/methods/tls_common.c ${OPENSSL_SOURCE_DIR}/ssl/record/methods/tls_multib.c ${OPENSSL_SOURCE_DIR}/ssl/record/methods/tlsany_meth.c + ${OPENSSL_SOURCE_DIR}/ssl/record/rec_layer_d1.c + ${OPENSSL_SOURCE_DIR}/ssl/record/rec_layer_s3.c + ${OPENSSL_SOURCE_DIR}/ssl/rio/poll_builder.c + ${OPENSSL_SOURCE_DIR}/ssl/rio/poll_immediate.c + ${OPENSSL_SOURCE_DIR}/ssl/rio/rio_notifier.c + ${OPENSSL_SOURCE_DIR}/ssl/s3_enc.c + ${OPENSSL_SOURCE_DIR}/ssl/s3_lib.c + ${OPENSSL_SOURCE_DIR}/ssl/s3_msg.c + ${OPENSSL_SOURCE_DIR}/ssl/ssl_asn1.c + ${OPENSSL_SOURCE_DIR}/ssl/ssl_cert.c + ${OPENSSL_SOURCE_DIR}/ssl/ssl_cert_comp.c + ${OPENSSL_SOURCE_DIR}/ssl/ssl_ciph.c + ${OPENSSL_SOURCE_DIR}/ssl/ssl_conf.c + ${OPENSSL_SOURCE_DIR}/ssl/ssl_err_legacy.c + ${OPENSSL_SOURCE_DIR}/ssl/ssl_init.c + ${OPENSSL_SOURCE_DIR}/ssl/ssl_lib.c + ${OPENSSL_SOURCE_DIR}/ssl/ssl_mcnf.c + ${OPENSSL_SOURCE_DIR}/ssl/ssl_rsa.c + ${OPENSSL_SOURCE_DIR}/ssl/ssl_rsa_legacy.c + ${OPENSSL_SOURCE_DIR}/ssl/ssl_sess.c + ${OPENSSL_SOURCE_DIR}/ssl/ssl_stat.c + ${OPENSSL_SOURCE_DIR}/ssl/ssl_txt.c + ${OPENSSL_SOURCE_DIR}/ssl/ssl_utst.c ${OPENSSL_SOURCE_DIR}/ssl/statem/extensions.c ${OPENSSL_SOURCE_DIR}/ssl/statem/extensions_clnt.c ${OPENSSL_SOURCE_DIR}/ssl/statem/extensions_cust.c @@ -1444,6 +1414,12 @@ set(SSL_SRC ${OPENSSL_SOURCE_DIR}/ssl/statem/statem_dtls.c ${OPENSSL_SOURCE_DIR}/ssl/statem/statem_lib.c ${OPENSSL_SOURCE_DIR}/ssl/statem/statem_srvr.c + ${OPENSSL_SOURCE_DIR}/ssl/t1_enc.c + ${OPENSSL_SOURCE_DIR}/ssl/t1_lib.c + ${OPENSSL_SOURCE_DIR}/ssl/t1_trce.c + ${OPENSSL_SOURCE_DIR}/ssl/tls13_enc.c + ${OPENSSL_SOURCE_DIR}/ssl/tls_depr.c + ${OPENSSL_SOURCE_DIR}/ssl/tls_srp.c ) if(ENABLE_OPENSSL_DYNAMIC) @@ -1458,14 +1434,18 @@ else() add_library(crypto ${CRYPTO_SRC}) add_library(ssl ${SSL_SRC}) endif() +# Enable legacy crypto support for OpenSSL 3.+ +# to avoid runtime `dlopen(legacy.so)` in both static and dynamic builds. +add_definitions(-DSTATIC_LEGACY) target_include_directories(crypto - SYSTEM PUBLIC "${PLATFORM_DIRECTORY}/include" + SYSTEM PUBLIC "common/include" PRIVATE "${PLATFORM_DIRECTORY}/include_private") target_include_directories(crypto SYSTEM PUBLIC ${OPENSSL_SOURCE_DIR}/include PRIVATE ${OPENSSL_SOURCE_DIR}/providers/common/include + PRIVATE ${OPENSSL_SOURCE_DIR}/providers/fips/include PRIVATE ${OPENSSL_SOURCE_DIR}/providers/implementations/include PRIVATE ${OPENSSL_SOURCE_DIR}/crypto PRIVATE ${OPENSSL_SOURCE_DIR}/crypto/include @@ -1483,7 +1463,7 @@ add_library(OpenSSL::Crypto ALIAS crypto) add_library(OpenSSL::SSL ALIAS ssl) if(OPENSSL_AUX_BUILD_FOR_CROSS_COMPILATION) - install(DIRECTORY "${PLATFORM_DIRECTORY}/include" DESTINATION "${CMAKE_BINARY_DIR}") + install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/common/include" DESTINATION "${CMAKE_BINARY_DIR}") install(DIRECTORY "${OPENSSL_SOURCE_DIR}/include" DESTINATION "${CMAKE_BINARY_DIR}") else() install(FILES openssl.conf fipsmodule.conf DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-server" COMPONENT clickhouse) diff --git a/contrib/openssl-cmake/asm/crypto/aes/aes-riscv64-zkn.S b/contrib/openssl-cmake/asm/crypto/aes/aes-riscv64-zkn.S new file mode 100644 index 000000000000..80ae8736e90f --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/aes/aes-riscv64-zkn.S @@ -0,0 +1,704 @@ +.text +.balign 16 +.globl rv64i_zkne_encrypt +.type rv64i_zkne_encrypt,@function +rv64i_zkne_encrypt: + addi sp,sp,-16 + sd x8,8(sp) + sd x9,0(sp) + + # Load input to block cipher + ld x6,0(x10) + ld x7,8(x10) + + # Load key + ld x13,0(x12) + ld x14,8(x12) + + # Load number of rounds + lwu x30,240(x12) + + # initial transformation + xor x6,x6,x13 + xor x7,x7,x14 + + # The main loop only executes the first N-1 rounds. + add x30,x30,-1 + + # Do Nr - 1 rounds (final round is special) +1: + .word 913507379 + .word 912491699 + + # Update key ptr to point to next key in schedule + add x12,x12,16 + + # Grab next key in schedule + ld x13,0(x12) + ld x14,8(x12) + xor x6,x8,x13 + xor x7,x9,x14 + + add x30,x30,-1 + bgtz x30,1b + + # final round + .word 846398515 + .word 845382835 + + # since not added 16 before + ld x13,16(x12) + ld x14,24(x12) + xor x6,x8,x13 + xor x7,x9,x14 + + sd x6,0(x11) + sd x7,8(x11) + + # Pop registers and return + ld x8,8(sp) + ld x9,0(sp) + addi sp,sp,16 + ret +.text +.balign 16 +.globl rv64i_zknd_decrypt +.type rv64i_zknd_decrypt,@function +rv64i_zknd_decrypt: + addi sp,sp,-16 + sd x8,8(sp) + sd x9,0(sp) + + # Load input to block cipher + ld x6,0(x10) + ld x7,8(x10) + + # Load number of rounds + lwu x30,240(x12) + + # Load the last key + slli x13,x30,4 + add x12,x12,x13 + ld x13,0(x12) + ld x14,8(x12) + + xor x6,x6,x13 + xor x7,x7,x14 + + # The main loop only executes the first N-1 rounds. + add x30,x30,-1 + + # Do Nr - 1 rounds (final round is special) +1: + .word 1047725107 + .word 1046709427 + + # Update key ptr to point to next key in schedule + add x12,x12,-16 + + # Grab next key in schedule + ld x13,0(x12) + ld x14,8(x12) + xor x6,x8,x13 + xor x7,x9,x14 + + add x30,x30,-1 + bgtz x30,1b + + # final round + .word 980616243 + .word 979600563 + + add x12,x12,-16 + ld x13,0(x12) + ld x14,8(x12) + xor x6,x8,x13 + xor x7,x9,x14 + + sd x6,0(x11) + sd x7,8(x11) + # Pop registers and return + ld x8,8(sp) + ld x9,0(sp) + addi sp,sp,16 + ret +.text +.balign 16 +.globl rv64i_zkne_set_encrypt_key +.type rv64i_zkne_set_encrypt_key,@function +rv64i_zkne_set_encrypt_key: + addi sp,sp,-16 + sd x8,0(sp) + bnez x10,1f # if (!userKey || !key) return -1; + bnez x12,1f + li a0,-1 + ret +1: + # Determine number of rounds from key size in bits + li x6,128 + bne x11,x6,1f + li x7,10 # key->rounds = 10 if bits == 128 + sw x7,240(x12) # store key->rounds + ld x6,0(x10) + ld x7,8(x10) + sd x6,0(x12) + sd x7,8(x12) + .word 822318099 + .word 2120483635 + .word 2121466803 + add x12,x12,16 + sd x6,0(x12) + sd x7,8(x12) + .word 823366675 + .word 2120483635 + .word 2121466803 + add x12,x12,16 + sd x6,0(x12) + sd x7,8(x12) + .word 824415251 + .word 2120483635 + .word 2121466803 + add x12,x12,16 + sd x6,0(x12) + sd x7,8(x12) + .word 825463827 + .word 2120483635 + .word 2121466803 + add x12,x12,16 + sd x6,0(x12) + sd x7,8(x12) + .word 826512403 + .word 2120483635 + .word 2121466803 + add x12,x12,16 + sd x6,0(x12) + sd x7,8(x12) + .word 827560979 + .word 2120483635 + .word 2121466803 + add x12,x12,16 + sd x6,0(x12) + sd x7,8(x12) + .word 828609555 + .word 2120483635 + .word 2121466803 + add x12,x12,16 + sd x6,0(x12) + sd x7,8(x12) + .word 829658131 + .word 2120483635 + .word 2121466803 + add x12,x12,16 + sd x6,0(x12) + sd x7,8(x12) + .word 830706707 + .word 2120483635 + .word 2121466803 + add x12,x12,16 + sd x6,0(x12) + sd x7,8(x12) + .word 831755283 + .word 2120483635 + .word 2121466803 + add x12,x12,16 + sd x6,0(x12) + sd x7,8(x12) + + j 4f +1: + li x6,192 + bne x11,x6,2f + li x7,12 # key->rounds = 12 if bits == 192 + sw x7,240(x12) # store key->rounds + ld x6,0(x10) + ld x7,8(x10) + ld x8,16(x10) + sd x6,0(x12) + sd x7,8(x12) + sd x8,16(x12) + .word 822351507 + .word 2120647475 + .word 2121466803 + .word 2122548275 + add x12,x12,24 + sd x6,0(x12) + sd x7,8(x12) + sd x8,16(x12) + .word 823400083 + .word 2120647475 + .word 2121466803 + .word 2122548275 + add x12,x12,24 + sd x6,0(x12) + sd x7,8(x12) + sd x8,16(x12) + .word 824448659 + .word 2120647475 + .word 2121466803 + .word 2122548275 + add x12,x12,24 + sd x6,0(x12) + sd x7,8(x12) + sd x8,16(x12) + .word 825497235 + .word 2120647475 + .word 2121466803 + .word 2122548275 + add x12,x12,24 + sd x6,0(x12) + sd x7,8(x12) + sd x8,16(x12) + .word 826545811 + .word 2120647475 + .word 2121466803 + .word 2122548275 + add x12,x12,24 + sd x6,0(x12) + sd x7,8(x12) + sd x8,16(x12) + .word 827594387 + .word 2120647475 + .word 2121466803 + .word 2122548275 + add x12,x12,24 + sd x6,0(x12) + sd x7,8(x12) + sd x8,16(x12) + .word 828642963 + .word 2120647475 + .word 2121466803 + .word 2122548275 + add x12,x12,24 + sd x6,0(x12) + sd x7,8(x12) + sd x8,16(x12) + .word 829691539 + .word 2120647475 + .word 2121466803 + add x12,x12,24 + sd x6,0(x12) + sd x7,8(x12) + + j 4f +2: + li x7,14 # key->rounds = 14 if bits == 256 + li x6,256 + beq x11,x6,3f + li a0,-2 # If bits != 128, 192, or 256, return -2 + j 5f +3: + sw x7,240(x12) # store key->rounds + ld x6,0(x10) + ld x7,8(x10) + ld x8,16(x10) + ld x13,24(x10) + sd x6,0(x12) + sd x7,8(x12) + sd x8,16(x12) + sd x13,24(x12) + .word 822515475 + .word 2120680243 + .word 2121466803 + add x12,x12,32 + sd x6,0(x12) + sd x7,8(x12) + .word 832804627 + .word 2122777651 + .word 2127824563 + sd x8,16(x12) + sd x13,24(x12) + .word 823564051 + .word 2120680243 + .word 2121466803 + add x12,x12,32 + sd x6,0(x12) + sd x7,8(x12) + .word 832804627 + .word 2122777651 + .word 2127824563 + sd x8,16(x12) + sd x13,24(x12) + .word 824612627 + .word 2120680243 + .word 2121466803 + add x12,x12,32 + sd x6,0(x12) + sd x7,8(x12) + .word 832804627 + .word 2122777651 + .word 2127824563 + sd x8,16(x12) + sd x13,24(x12) + .word 825661203 + .word 2120680243 + .word 2121466803 + add x12,x12,32 + sd x6,0(x12) + sd x7,8(x12) + .word 832804627 + .word 2122777651 + .word 2127824563 + sd x8,16(x12) + sd x13,24(x12) + .word 826709779 + .word 2120680243 + .word 2121466803 + add x12,x12,32 + sd x6,0(x12) + sd x7,8(x12) + .word 832804627 + .word 2122777651 + .word 2127824563 + sd x8,16(x12) + sd x13,24(x12) + .word 827758355 + .word 2120680243 + .word 2121466803 + add x12,x12,32 + sd x6,0(x12) + sd x7,8(x12) + .word 832804627 + .word 2122777651 + .word 2127824563 + sd x8,16(x12) + sd x13,24(x12) + .word 828806931 + .word 2120680243 + .word 2121466803 + add x12,x12,32 + sd x6,0(x12) + sd x7,8(x12) + +4: # return 0 + li a0,0 +5: # return a0 + ld x8,0(sp) + addi sp,sp,16 + ret +.text +.balign 16 +.globl rv64i_zknd_set_decrypt_key +.type rv64i_zknd_set_decrypt_key,@function +rv64i_zknd_set_decrypt_key: + addi sp,sp,-16 + sd x8,0(sp) + bnez x10,1f # if (!userKey || !key) return -1; + bnez x12,1f + li a0,-1 + ret +1: + # Determine number of rounds from key size in bits + li x6,128 + bne x11,x6,1f + li x7,10 # key->rounds = 10 if bits == 128 + sw x7,240(x12) # store key->rounds + ld x6,0(x10) + ld x7,8(x10) + sd x6,0(x12) + sd x7,8(x12) + .word 822318099 + .word 2120483635 + .word 2121466803 + add x12,x12,16 + .word 805508115 + sd x8,0(x12) + .word 805540883 + sd x8,8(x12) + .word 823366675 + .word 2120483635 + .word 2121466803 + add x12,x12,16 + .word 805508115 + sd x8,0(x12) + .word 805540883 + sd x8,8(x12) + .word 824415251 + .word 2120483635 + .word 2121466803 + add x12,x12,16 + .word 805508115 + sd x8,0(x12) + .word 805540883 + sd x8,8(x12) + .word 825463827 + .word 2120483635 + .word 2121466803 + add x12,x12,16 + .word 805508115 + sd x8,0(x12) + .word 805540883 + sd x8,8(x12) + .word 826512403 + .word 2120483635 + .word 2121466803 + add x12,x12,16 + .word 805508115 + sd x8,0(x12) + .word 805540883 + sd x8,8(x12) + .word 827560979 + .word 2120483635 + .word 2121466803 + add x12,x12,16 + .word 805508115 + sd x8,0(x12) + .word 805540883 + sd x8,8(x12) + .word 828609555 + .word 2120483635 + .word 2121466803 + add x12,x12,16 + .word 805508115 + sd x8,0(x12) + .word 805540883 + sd x8,8(x12) + .word 829658131 + .word 2120483635 + .word 2121466803 + add x12,x12,16 + .word 805508115 + sd x8,0(x12) + .word 805540883 + sd x8,8(x12) + .word 830706707 + .word 2120483635 + .word 2121466803 + add x12,x12,16 + .word 805508115 + sd x8,0(x12) + .word 805540883 + sd x8,8(x12) + .word 831755283 + .word 2120483635 + .word 2121466803 + add x12,x12,16 + sd x6,0(x12) + sd x7,8(x12) + + j 4f +1: + li x6,192 + bne x11,x6,2f + li x7,12 # key->rounds = 12 if bits == 192 + sw x7,240(x12) # store key->rounds + ld x6,0(x10) + ld x7,8(x10) + ld x8,16(x10) + sd x6,0(x12) + sd x7,8(x12) + .word 805574291 + sd x13,16(x12) + .word 822351507 + .word 2120647475 + .word 2121466803 + add x12,x12,24 + .word 805508755 + sd x13,0(x12) + .word 805541523 + sd x13,8(x12) + # the reason is in ke192enc + .word 2122548275 + .word 805574291 + sd x13,16(x12) + .word 823400083 + .word 2120647475 + .word 2121466803 + add x12,x12,24 + .word 805508755 + sd x13,0(x12) + .word 805541523 + sd x13,8(x12) + # the reason is in ke192enc + .word 2122548275 + .word 805574291 + sd x13,16(x12) + .word 824448659 + .word 2120647475 + .word 2121466803 + add x12,x12,24 + .word 805508755 + sd x13,0(x12) + .word 805541523 + sd x13,8(x12) + # the reason is in ke192enc + .word 2122548275 + .word 805574291 + sd x13,16(x12) + .word 825497235 + .word 2120647475 + .word 2121466803 + add x12,x12,24 + .word 805508755 + sd x13,0(x12) + .word 805541523 + sd x13,8(x12) + # the reason is in ke192enc + .word 2122548275 + .word 805574291 + sd x13,16(x12) + .word 826545811 + .word 2120647475 + .word 2121466803 + add x12,x12,24 + .word 805508755 + sd x13,0(x12) + .word 805541523 + sd x13,8(x12) + # the reason is in ke192enc + .word 2122548275 + .word 805574291 + sd x13,16(x12) + .word 827594387 + .word 2120647475 + .word 2121466803 + add x12,x12,24 + .word 805508755 + sd x13,0(x12) + .word 805541523 + sd x13,8(x12) + # the reason is in ke192enc + .word 2122548275 + .word 805574291 + sd x13,16(x12) + .word 828642963 + .word 2120647475 + .word 2121466803 + add x12,x12,24 + .word 805508755 + sd x13,0(x12) + .word 805541523 + sd x13,8(x12) + # the reason is in ke192enc + .word 2122548275 + .word 805574291 + sd x13,16(x12) + .word 829691539 + .word 2120647475 + .word 2121466803 + add x12,x12,24 + sd x6,0(x12) + sd x7,8(x12) + + j 4f +2: + li x7,14 # key->rounds = 14 if bits == 256 + li x6,256 + beq x11,x6,3f + li a0,-2 # If bits != 128, 192, or 256, return -2 + j 5f +3: + sw x7,240(x12) # store key->rounds + ld x6,0(x10) + ld x7,8(x10) + ld x8,16(x10) + ld x13,24(x10) + sd x6,0(x12) + sd x7,8(x12) + .word 805574419 + sd x14,16(x12) + .word 805738259 + sd x14,24(x12) + .word 822515475 + .word 2120680243 + .word 2121466803 + add x12,x12,32 + .word 832804627 + .word 2122777651 + .word 2127824563 + .word 805508883 + sd x14,0(x12) + .word 805541651 + sd x14,8(x12) + .word 805574419 + sd x14,16(x12) + .word 805738259 + sd x14,24(x12) + .word 823564051 + .word 2120680243 + .word 2121466803 + add x12,x12,32 + .word 832804627 + .word 2122777651 + .word 2127824563 + .word 805508883 + sd x14,0(x12) + .word 805541651 + sd x14,8(x12) + .word 805574419 + sd x14,16(x12) + .word 805738259 + sd x14,24(x12) + .word 824612627 + .word 2120680243 + .word 2121466803 + add x12,x12,32 + .word 832804627 + .word 2122777651 + .word 2127824563 + .word 805508883 + sd x14,0(x12) + .word 805541651 + sd x14,8(x12) + .word 805574419 + sd x14,16(x12) + .word 805738259 + sd x14,24(x12) + .word 825661203 + .word 2120680243 + .word 2121466803 + add x12,x12,32 + .word 832804627 + .word 2122777651 + .word 2127824563 + .word 805508883 + sd x14,0(x12) + .word 805541651 + sd x14,8(x12) + .word 805574419 + sd x14,16(x12) + .word 805738259 + sd x14,24(x12) + .word 826709779 + .word 2120680243 + .word 2121466803 + add x12,x12,32 + .word 832804627 + .word 2122777651 + .word 2127824563 + .word 805508883 + sd x14,0(x12) + .word 805541651 + sd x14,8(x12) + .word 805574419 + sd x14,16(x12) + .word 805738259 + sd x14,24(x12) + .word 827758355 + .word 2120680243 + .word 2121466803 + add x12,x12,32 + .word 832804627 + .word 2122777651 + .word 2127824563 + .word 805508883 + sd x14,0(x12) + .word 805541651 + sd x14,8(x12) + .word 805574419 + sd x14,16(x12) + .word 805738259 + sd x14,24(x12) + .word 828806931 + .word 2120680243 + .word 2121466803 + add x12,x12,32 + sd x6,0(x12) + sd x7,8(x12) + # last two one dropped + +4: # return 0 + li a0,0 +5: # return a0 + ld x8,0(sp) + addi sp,sp,16 + ret diff --git a/contrib/openssl-cmake/asm/crypto/aes/aes-riscv64-zvbb-zvkg-zvkned.S b/contrib/openssl-cmake/asm/crypto/aes/aes-riscv64-zvbb-zvkg-zvkned.S new file mode 100644 index 000000000000..f8a68e7c42dd --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/aes/aes-riscv64-zvbb-zvkg-zvkned.S @@ -0,0 +1,943 @@ +.text +.p2align 3 +.globl rv64i_zvbb_zvkg_zvkned_aes_xts_encrypt +.type rv64i_zvbb_zvkg_zvkned_aes_xts_encrypt,@function +rv64i_zvbb_zvkg_zvkned_aes_xts_encrypt: + # Load number of rounds + lwu t0, 240(a4) + .word 3439489111 + .word 34074119 + .word 34041479 + .word 2815667831 + addi t0, t0, -1 + addi a4, a4, 16 +1: + .word 34041479 + .word 2815503991 + addi t0, t0, -1 + addi a4, a4, 16 + bnez t0, 1b + .word 34041479 + .word 2815536759 + + + # aes block size is 16 + andi a6, a2, 15 + mv t3, a2 + beqz a6, 1f + sub a2, a2, a6 + addi t3, a2, -16 +1: + # We make the `LENGTH` become e32 length here. + srli t4, a2, 2 + srli t3, t3, 2 + + # Load number of rounds + lwu t0, 240(a3) + li t1, 14 + li t2, 10 + beq t0, t1, aes_xts_enc_256 + beq t0, t2, aes_xts_enc_128 +.size rv64i_zvbb_zvkg_zvkned_aes_xts_encrypt,.-rv64i_zvbb_zvkg_zvkned_aes_xts_encrypt +.p2align 3 +aes_xts_enc_128: + # load input + .word 221182167 + .word 33909767 + + li t0, 5 + # We could simplify the initialization steps if we have `block<=1`. + blt t4, t0, 1f + + # Note: We use `vgmul` for GF(2^128) multiplication. The `vgmul` uses + # different order of coefficients. We should use`vbrev8` to reverse the + # data when we use `vgmul`. + .word 3439489111 + .word 1271144535 + .word 221179991 + .word 1577072727 + # v16: [r-IV0, r-IV0, ...] + .word 2785257591 + + # Prepare GF(2^128) multiplier [1, x, x^2, x^3, ...] in v8. + slli t0, t4, 2 + .word 218296407 + # v2: [`1`, `1`, `1`, `1`, ...] + .word 1577103703 + # v3: [`0`, `1`, `2`, `3`, ...] + .word 1376297431 + .word 227733591 + # v4: [`1`, 0, `1`, 0, `1`, 0, `1`, 0, ...] + .word 1243816535 + # v6: [`0`, 0, `1`, 0, `2`, 0, `3`, 0, ...] + .word 1244865367 + slli t0, t4, 1 + .word 219344983 + # v8: [1<<0=1, 0, 0, 0, 1<<1=x, 0, 0, 0, 1<<2=x^2, 0, 0, 0, ...] + .word 3594716247 + + # Compute [r-IV0*1, r-IV0*x, r-IV0*x^2, r-IV0*x^3, ...] in v16 + .word 221179991 + .word 1250174039 + .word 2726865015 + + # Compute [IV0*1, IV0*x, IV0*x^2, IV0*x^3, ...] in v28. + # Reverse the bits order back. + .word 1258565207 + + # Prepare the x^n multiplier in v20. The `n` is the aes-xts block number + # in a LMUL=4 register group. + # n = ((VLEN*LMUL)/(32*4)) = ((VLEN*4)/(32*4)) + # = (VLEN/32) + # We could use vsetvli with `e32, m1` to compute the `n` number. + .word 218133207 + li t1, 1 + sll t0, t1, t0 + .word 3447812183 + .word 1577070679 + .word 3380670551 + .word 1577238615 + .word 3447812183 + .word 1241784407 + .word 221179991 + .word 1577073239 + .word 2785258103 + + j 2f +1: + .word 3439489111 + .word 1271146583 +2: + + .word 3439489111 + .word 34005127 + addi a3, a3, 16 + .word 34005255 + addi a3, a3, 16 + .word 34005383 + addi a3, a3, 16 + .word 34005511 + addi a3, a3, 16 + .word 34005639 + addi a3, a3, 16 + .word 34005767 + addi a3, a3, 16 + .word 34005895 + addi a3, a3, 16 + .word 34006023 + addi a3, a3, 16 + .word 34006151 + addi a3, a3, 16 + .word 34006279 + addi a3, a3, 16 + .word 34006407 + + + .word 221182167 + j 1f + +.Lenc_blocks_128: + .word 221182167 + # load plaintext into v24 + .word 33909767 + # update iv + .word 2739447927 + # reverse the iv's bits order back + .word 1258565207 +1: + .word 797838423 + slli t0, a7, 2 + sub t4, t4, a7 + add a0, a0, t0 + .word 2786307191 + .word 2787191927 + .word 2788240503 + .word 2789289079 + .word 2790337655 + .word 2791386231 + .word 2792434807 + .word 2793483383 + .word 2794531959 + .word 2795580535 + .word 2796661879 + + .word 797838423 + + # store ciphertext + .word 221147223 + .word 33942567 + add a1, a1, t0 + sub t3, t3, a7 + + bnez t4, .Lenc_blocks_128 + + bnez a6, 1f + ret +1: + # slidedown second to last block + addi a7, a7, -4 + .word 3441586263 + # ciphertext + .word 1065929815 + # multiplier + .word 1057540183 + + .word 3439489111 + .word 1577848023 + + # load last block into v24 + # note: We should load the last block before store the second to last block + # for in-place operation. + .word 134770775 + .word 33885191 + + # setup `x` multiplier with byte-reversed order + # 0b00000010 => 0b01000000 (0x40) + li t0, 0x40 + .word 3439489111 + .word 1577074263 + .word 3355504727 + .word 1577242199 + + # compute IV for last block + .word 3439489111 + .word 2747836535 + .word 1258565207 + + # store second to last block + .word 201879639 + .word 33918119 + + + # xts last block + .word 3439489111 + .word 797838423 + .word 2786307191 + .word 2787191927 + .word 2788240503 + .word 2789289079 + .word 2790337655 + .word 2791386231 + .word 2792434807 + .word 2793483383 + .word 2794531959 + .word 2795580535 + .word 2796661879 + + .word 797838423 + + # store last block ciphertext + addi a1, a1, -16 + .word 33942567 + + ret +.size aes_xts_enc_128,.-aes_xts_enc_128 +.p2align 3 +aes_xts_enc_256: + # load input + .word 221182167 + .word 33909767 + + li t0, 5 + # We could simplify the initialization steps if we have `block<=1`. + blt t4, t0, 1f + + # Note: We use `vgmul` for GF(2^128) multiplication. The `vgmul` uses + # different order of coefficients. We should use`vbrev8` to reverse the + # data when we use `vgmul`. + .word 3439489111 + .word 1271144535 + .word 221179991 + .word 1577072727 + # v16: [r-IV0, r-IV0, ...] + .word 2785257591 + + # Prepare GF(2^128) multiplier [1, x, x^2, x^3, ...] in v8. + slli t0, t4, 2 + .word 218296407 + # v2: [`1`, `1`, `1`, `1`, ...] + .word 1577103703 + # v3: [`0`, `1`, `2`, `3`, ...] + .word 1376297431 + .word 227733591 + # v4: [`1`, 0, `1`, 0, `1`, 0, `1`, 0, ...] + .word 1243816535 + # v6: [`0`, 0, `1`, 0, `2`, 0, `3`, 0, ...] + .word 1244865367 + slli t0, t4, 1 + .word 219344983 + # v8: [1<<0=1, 0, 0, 0, 1<<1=x, 0, 0, 0, 1<<2=x^2, 0, 0, 0, ...] + .word 3594716247 + + # Compute [r-IV0*1, r-IV0*x, r-IV0*x^2, r-IV0*x^3, ...] in v16 + .word 221179991 + .word 1250174039 + .word 2726865015 + + # Compute [IV0*1, IV0*x, IV0*x^2, IV0*x^3, ...] in v28. + # Reverse the bits order back. + .word 1258565207 + + # Prepare the x^n multiplier in v20. The `n` is the aes-xts block number + # in a LMUL=4 register group. + # n = ((VLEN*LMUL)/(32*4)) = ((VLEN*4)/(32*4)) + # = (VLEN/32) + # We could use vsetvli with `e32, m1` to compute the `n` number. + .word 218133207 + li t1, 1 + sll t0, t1, t0 + .word 3447812183 + .word 1577070679 + .word 3380670551 + .word 1577238615 + .word 3447812183 + .word 1241784407 + .word 221179991 + .word 1577073239 + .word 2785258103 + + j 2f +1: + .word 3439489111 + .word 1271146583 +2: + + .word 3439489111 + .word 34005127 + addi a3, a3, 16 + .word 34005255 + addi a3, a3, 16 + .word 34005383 + addi a3, a3, 16 + .word 34005511 + addi a3, a3, 16 + .word 34005639 + addi a3, a3, 16 + .word 34005767 + addi a3, a3, 16 + .word 34005895 + addi a3, a3, 16 + .word 34006023 + addi a3, a3, 16 + .word 34006151 + addi a3, a3, 16 + .word 34006279 + addi a3, a3, 16 + .word 34006407 + addi a3, a3, 16 + .word 34006535 + addi a3, a3, 16 + .word 34006663 + addi a3, a3, 16 + .word 34006791 + addi a3, a3, 16 + .word 34006919 + + + .word 221182167 + j 1f + +.Lenc_blocks_256: + .word 221182167 + # load plaintext into v24 + .word 33909767 + # update iv + .word 2739447927 + # reverse the iv's bits order back + .word 1258565207 +1: + .word 797838423 + slli t0, a7, 2 + sub t4, t4, a7 + add a0, a0, t0 + .word 2786307191 + .word 2787191927 + .word 2788240503 + .word 2789289079 + .word 2790337655 + .word 2791386231 + .word 2792434807 + .word 2793483383 + .word 2794531959 + .word 2795580535 + .word 2796629111 + .word 2797677687 + .word 2798726263 + .word 2799774839 + .word 2800856183 + + .word 797838423 + + # store ciphertext + .word 221147223 + .word 33942567 + add a1, a1, t0 + sub t3, t3, a7 + + bnez t4, .Lenc_blocks_256 + + bnez a6, 1f + ret +1: + # slidedown second to last block + addi a7, a7, -4 + .word 3441586263 + # ciphertext + .word 1065929815 + # multiplier + .word 1057540183 + + .word 3439489111 + .word 1577848023 + + # load last block into v24 + # note: We should load the last block before store the second to last block + # for in-place operation. + .word 134770775 + .word 33885191 + + # setup `x` multiplier with byte-reversed order + # 0b00000010 => 0b01000000 (0x40) + li t0, 0x40 + .word 3439489111 + .word 1577074263 + .word 3355504727 + .word 1577242199 + + # compute IV for last block + .word 3439489111 + .word 2747836535 + .word 1258565207 + + # store second to last block + .word 201879639 + .word 33918119 + + + # xts last block + .word 3439489111 + .word 797838423 + .word 2786307191 + .word 2787191927 + .word 2788240503 + .word 2789289079 + .word 2790337655 + .word 2791386231 + .word 2792434807 + .word 2793483383 + .word 2794531959 + .word 2795580535 + .word 2796629111 + .word 2797677687 + .word 2798726263 + .word 2799774839 + .word 2800856183 + + .word 797838423 + + # store last block ciphertext + addi a1, a1, -16 + .word 33942567 + + ret +.size aes_xts_enc_256,.-aes_xts_enc_256 +.p2align 3 +.globl rv64i_zvbb_zvkg_zvkned_aes_xts_decrypt +.type rv64i_zvbb_zvkg_zvkned_aes_xts_decrypt,@function +rv64i_zvbb_zvkg_zvkned_aes_xts_decrypt: + # Load number of rounds + lwu t0, 240(a4) + .word 3439489111 + .word 34074119 + .word 34041479 + .word 2815667831 + addi t0, t0, -1 + addi a4, a4, 16 +1: + .word 34041479 + .word 2815503991 + addi t0, t0, -1 + addi a4, a4, 16 + bnez t0, 1b + .word 34041479 + .word 2815536759 + + + # aes block size is 16 + andi a6, a2, 15 + beqz a6, 1f + sub a2, a2, a6 + addi a2, a2, -16 +1: + # We make the `LENGTH` become e32 length here. + srli t4, a2, 2 + + # Load number of rounds + lwu t0, 240(a3) + li t1, 14 + li t2, 10 + beq t0, t1, aes_xts_dec_256 + beq t0, t2, aes_xts_dec_128 +.size rv64i_zvbb_zvkg_zvkned_aes_xts_decrypt,.-rv64i_zvbb_zvkg_zvkned_aes_xts_decrypt +.p2align 3 +aes_xts_dec_128: + # load input + .word 221182167 + .word 33909767 + + li t0, 5 + # We could simplify the initialization steps if we have `block<=1`. + blt t4, t0, 1f + + # Note: We use `vgmul` for GF(2^128) multiplication. The `vgmul` uses + # different order of coefficients. We should use`vbrev8` to reverse the + # data when we use `vgmul`. + .word 3439489111 + .word 1271144535 + .word 221179991 + .word 1577072727 + # v16: [r-IV0, r-IV0, ...] + .word 2785257591 + + # Prepare GF(2^128) multiplier [1, x, x^2, x^3, ...] in v8. + slli t0, t4, 2 + .word 218296407 + # v2: [`1`, `1`, `1`, `1`, ...] + .word 1577103703 + # v3: [`0`, `1`, `2`, `3`, ...] + .word 1376297431 + .word 227733591 + # v4: [`1`, 0, `1`, 0, `1`, 0, `1`, 0, ...] + .word 1243816535 + # v6: [`0`, 0, `1`, 0, `2`, 0, `3`, 0, ...] + .word 1244865367 + slli t0, t4, 1 + .word 219344983 + # v8: [1<<0=1, 0, 0, 0, 1<<1=x, 0, 0, 0, 1<<2=x^2, 0, 0, 0, ...] + .word 3594716247 + + # Compute [r-IV0*1, r-IV0*x, r-IV0*x^2, r-IV0*x^3, ...] in v16 + .word 221179991 + .word 1250174039 + .word 2726865015 + + # Compute [IV0*1, IV0*x, IV0*x^2, IV0*x^3, ...] in v28. + # Reverse the bits order back. + .word 1258565207 + + # Prepare the x^n multiplier in v20. The `n` is the aes-xts block number + # in a LMUL=4 register group. + # n = ((VLEN*LMUL)/(32*4)) = ((VLEN*4)/(32*4)) + # = (VLEN/32) + # We could use vsetvli with `e32, m1` to compute the `n` number. + .word 218133207 + li t1, 1 + sll t0, t1, t0 + .word 3447812183 + .word 1577070679 + .word 3380670551 + .word 1577238615 + .word 3447812183 + .word 1241784407 + .word 221179991 + .word 1577073239 + .word 2785258103 + + j 2f +1: + .word 3439489111 + .word 1271146583 +2: + + .word 3439489111 + .word 34005127 + addi a3, a3, 16 + .word 34005255 + addi a3, a3, 16 + .word 34005383 + addi a3, a3, 16 + .word 34005511 + addi a3, a3, 16 + .word 34005639 + addi a3, a3, 16 + .word 34005767 + addi a3, a3, 16 + .word 34005895 + addi a3, a3, 16 + .word 34006023 + addi a3, a3, 16 + .word 34006151 + addi a3, a3, 16 + .word 34006279 + addi a3, a3, 16 + .word 34006407 + + + beqz t4, 2f + + .word 221182167 + j 1f + +.Ldec_blocks_128: + .word 221182167 + # load ciphertext into v24 + .word 33909767 + # update iv + .word 2739447927 + # reverse the iv's bits order back + .word 1258565207 +1: + .word 797838423 + slli t0, a7, 2 + sub t4, t4, a7 + add a0, a0, t0 + .word 2796792951 + .word 2795514999 + .word 2794466423 + .word 2793417847 + .word 2792369271 + .word 2791320695 + .word 2790272119 + .word 2789223543 + .word 2788174967 + .word 2787126391 + .word 2786110583 + + .word 797838423 + + # store plaintext + .word 33942567 + add a1, a1, t0 + + bnez t4, .Ldec_blocks_128 + +2: + bnez a6, 1f + ret +1: + # load second to last block's ciphertext + .word 3439489111 + .word 33909767 + addi a0, a0, 16 + + # setup `x` multiplier with byte-reversed order + # 0b00000010 => 0b01000000 (0x40) + li t0, 0x40 + .word 3439489111 + .word 1577073239 + .word 3355504727 + .word 1577241175 + + beqz a2, 1f + # slidedown third to last block + addi a7, a7, -4 + .word 3441586263 + # multiplier + .word 1057540183 + + # compute IV for last block + .word 3439489111 + .word 2739447927 + .word 1258565207 + + # compute IV for second to last block + .word 2739447927 + .word 1258565335 + j 2f +1: + # compute IV for second to last block + .word 3439489111 + .word 2739447927 + .word 1258565335 +2: + + + ## xts second to last block + .word 3439489111 + .word 797871191 + .word 2796792951 + .word 2795514999 + .word 2794466423 + .word 2793417847 + .word 2792369271 + .word 2791320695 + .word 2790272119 + .word 2789223543 + .word 2788174967 + .word 2787126391 + .word 2786110583 + + .word 797871191 + .word 1577848023 + + # load last block ciphertext + .word 134770775 + .word 33885191 + + # store second to last block plaintext + addi t0, a1, 16 + .word 33721511 + + ## xts last block + .word 3439489111 + .word 797838423 + .word 2796792951 + .word 2795514999 + .word 2794466423 + .word 2793417847 + .word 2792369271 + .word 2791320695 + .word 2790272119 + .word 2789223543 + .word 2788174967 + .word 2787126391 + .word 2786110583 + + .word 797838423 + + # store second to last block plaintext + .word 33942567 + + ret +.size aes_xts_dec_128,.-aes_xts_dec_128 +.p2align 3 +aes_xts_dec_256: + # load input + .word 221182167 + .word 33909767 + + li t0, 5 + # We could simplify the initialization steps if we have `block<=1`. + blt t4, t0, 1f + + # Note: We use `vgmul` for GF(2^128) multiplication. The `vgmul` uses + # different order of coefficients. We should use`vbrev8` to reverse the + # data when we use `vgmul`. + .word 3439489111 + .word 1271144535 + .word 221179991 + .word 1577072727 + # v16: [r-IV0, r-IV0, ...] + .word 2785257591 + + # Prepare GF(2^128) multiplier [1, x, x^2, x^3, ...] in v8. + slli t0, t4, 2 + .word 218296407 + # v2: [`1`, `1`, `1`, `1`, ...] + .word 1577103703 + # v3: [`0`, `1`, `2`, `3`, ...] + .word 1376297431 + .word 227733591 + # v4: [`1`, 0, `1`, 0, `1`, 0, `1`, 0, ...] + .word 1243816535 + # v6: [`0`, 0, `1`, 0, `2`, 0, `3`, 0, ...] + .word 1244865367 + slli t0, t4, 1 + .word 219344983 + # v8: [1<<0=1, 0, 0, 0, 1<<1=x, 0, 0, 0, 1<<2=x^2, 0, 0, 0, ...] + .word 3594716247 + + # Compute [r-IV0*1, r-IV0*x, r-IV0*x^2, r-IV0*x^3, ...] in v16 + .word 221179991 + .word 1250174039 + .word 2726865015 + + # Compute [IV0*1, IV0*x, IV0*x^2, IV0*x^3, ...] in v28. + # Reverse the bits order back. + .word 1258565207 + + # Prepare the x^n multiplier in v20. The `n` is the aes-xts block number + # in a LMUL=4 register group. + # n = ((VLEN*LMUL)/(32*4)) = ((VLEN*4)/(32*4)) + # = (VLEN/32) + # We could use vsetvli with `e32, m1` to compute the `n` number. + .word 218133207 + li t1, 1 + sll t0, t1, t0 + .word 3447812183 + .word 1577070679 + .word 3380670551 + .word 1577238615 + .word 3447812183 + .word 1241784407 + .word 221179991 + .word 1577073239 + .word 2785258103 + + j 2f +1: + .word 3439489111 + .word 1271146583 +2: + + .word 3439489111 + .word 34005127 + addi a3, a3, 16 + .word 34005255 + addi a3, a3, 16 + .word 34005383 + addi a3, a3, 16 + .word 34005511 + addi a3, a3, 16 + .word 34005639 + addi a3, a3, 16 + .word 34005767 + addi a3, a3, 16 + .word 34005895 + addi a3, a3, 16 + .word 34006023 + addi a3, a3, 16 + .word 34006151 + addi a3, a3, 16 + .word 34006279 + addi a3, a3, 16 + .word 34006407 + addi a3, a3, 16 + .word 34006535 + addi a3, a3, 16 + .word 34006663 + addi a3, a3, 16 + .word 34006791 + addi a3, a3, 16 + .word 34006919 + + + beqz t4, 2f + + .word 221182167 + j 1f + +.Ldec_blocks_256: + .word 221182167 + # load ciphertext into v24 + .word 33909767 + # update iv + .word 2739447927 + # reverse the iv's bits order back + .word 1258565207 +1: + .word 797838423 + slli t0, a7, 2 + sub t4, t4, a7 + add a0, a0, t0 + .word 2800987255 + .word 2799709303 + .word 2798660727 + .word 2797612151 + .word 2796563575 + .word 2795514999 + .word 2794466423 + .word 2793417847 + .word 2792369271 + .word 2791320695 + .word 2790272119 + .word 2789223543 + .word 2788174967 + .word 2787126391 + .word 2786110583 + + .word 797838423 + + # store plaintext + .word 33942567 + add a1, a1, t0 + + bnez t4, .Ldec_blocks_256 + +2: + bnez a6, 1f + ret +1: + # load second to last block's ciphertext + .word 3439489111 + .word 33909767 + addi a0, a0, 16 + + # setup `x` multiplier with byte-reversed order + # 0b00000010 => 0b01000000 (0x40) + li t0, 0x40 + .word 3439489111 + .word 1577073239 + .word 3355504727 + .word 1577241175 + + beqz a2, 1f + # slidedown third to last block + addi a7, a7, -4 + .word 3441586263 + # multiplier + .word 1057540183 + + # compute IV for last block + .word 3439489111 + .word 2739447927 + .word 1258565207 + + # compute IV for second to last block + .word 2739447927 + .word 1258565335 + j 2f +1: + # compute IV for second to last block + .word 3439489111 + .word 2739447927 + .word 1258565335 +2: + + + ## xts second to last block + .word 3439489111 + .word 797871191 + .word 2800987255 + .word 2799709303 + .word 2798660727 + .word 2797612151 + .word 2796563575 + .word 2795514999 + .word 2794466423 + .word 2793417847 + .word 2792369271 + .word 2791320695 + .word 2790272119 + .word 2789223543 + .word 2788174967 + .word 2787126391 + .word 2786110583 + + .word 797871191 + .word 1577848023 + + # load last block ciphertext + .word 134770775 + .word 33885191 + + # store second to last block plaintext + addi t0, a1, 16 + .word 33721511 + + ## xts last block + .word 3439489111 + .word 797838423 + .word 2800987255 + .word 2799709303 + .word 2798660727 + .word 2797612151 + .word 2796563575 + .word 2795514999 + .word 2794466423 + .word 2793417847 + .word 2792369271 + .word 2791320695 + .word 2790272119 + .word 2789223543 + .word 2788174967 + .word 2787126391 + .word 2786110583 + + .word 797838423 + + # store second to last block plaintext + .word 33942567 + + ret +.size aes_xts_dec_256,.-aes_xts_dec_256 diff --git a/contrib/openssl-cmake/asm/crypto/aes/aes-riscv64-zvkb-zvkned.S b/contrib/openssl-cmake/asm/crypto/aes/aes-riscv64-zvkb-zvkned.S new file mode 100644 index 000000000000..2d6a71e355eb --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/aes/aes-riscv64-zvkb-zvkned.S @@ -0,0 +1,326 @@ +.text +.p2align 3 +.globl rv64i_zvkb_zvkned_ctr32_encrypt_blocks +.type rv64i_zvkb_zvkned_ctr32_encrypt_blocks,@function +rv64i_zvkb_zvkned_ctr32_encrypt_blocks: + beqz a2, 1f + + # Load number of rounds + lwu t0, 240(a3) + li t1, 14 + li t2, 12 + li t3, 10 + + slli t5, a2, 2 + + beq t0, t1, ctr32_encrypt_blocks_256 + beq t0, t2, ctr32_encrypt_blocks_192 + beq t0, t3, ctr32_encrypt_blocks_128 + +1: + ret + +.size rv64i_zvkb_zvkned_ctr32_encrypt_blocks,.-rv64i_zvkb_zvkned_ctr32_encrypt_blocks +.p2align 3 +ctr32_encrypt_blocks_128: + # Load all 11 round keys to v1-v11 registers. + .word 3439489111 + .word 34005127 + addi a3, a3, 16 + .word 34005255 + addi a3, a3, 16 + .word 34005383 + addi a3, a3, 16 + .word 34005511 + addi a3, a3, 16 + .word 34005639 + addi a3, a3, 16 + .word 34005767 + addi a3, a3, 16 + .word 34005895 + addi a3, a3, 16 + .word 34006023 + addi a3, a3, 16 + .word 34006151 + addi a3, a3, 16 + .word 34006279 + addi a3, a3, 16 + .word 34006407 + + # Setup mask into v0 + # The mask pattern for 4*N-th elements + # mask v0: [000100010001....] + # Note: + # We could setup the mask just for the maximum element length instead of + # the VLMAX. + li t0, 0b10001000 + .word 201356247 + .word 1577238615 + # Load IV. + # v31:[IV0, IV1, IV2, big-endian count] + .word 3439489111 + .word 34041735 + # Convert the big-endian counter into little-endian. + .word 3305271383 + .word 1240772567 + # Splat the IV to v16 + .word 221212759 + .word 1577072727 + .word 2817763447 + # Prepare the ctr pattern into v20 + # v20: [x, x, x, 0, x, x, x, 1, x, x, x, 2, ...] + .word 1342712407 + # v16:[IV0, IV1, IV2, count+0, IV0, IV1, IV2, count+1, ...] + .word 86998743 + .word 17434711 + + + ##### AES body + j 2f +1: + .word 86998743 + # Increase ctr in v16. + .word 17811543 +2: + # Load plaintext into v20 + .word 33909255 + slli t0, t4, 2 + srli t6, t4, 2 + sub t5, t5, t4 + add a0, a0, t0 + # Prepare the AES ctr input into v24. + # The ctr data uses big-endian form. + .word 1577585751 + .word 1233431639 + + .word 2786307191 + .word 2787191927 + .word 2788240503 + .word 2789289079 + .word 2790337655 + .word 2791386231 + .word 2792434807 + .word 2793483383 + .word 2794531959 + .word 2795580535 + .word 2796661879 + + # ciphertext + .word 797576279 + + # Store the ciphertext. + .word 33942567 + add a1, a1, t0 + + bnez t5, 1b + + ret +.size ctr32_encrypt_blocks_128,.-ctr32_encrypt_blocks_128 +.p2align 3 +ctr32_encrypt_blocks_192: + # Load all 13 round keys to v1-v13 registers. + .word 3439489111 + .word 34005127 + addi a3, a3, 16 + .word 34005255 + addi a3, a3, 16 + .word 34005383 + addi a3, a3, 16 + .word 34005511 + addi a3, a3, 16 + .word 34005639 + addi a3, a3, 16 + .word 34005767 + addi a3, a3, 16 + .word 34005895 + addi a3, a3, 16 + .word 34006023 + addi a3, a3, 16 + .word 34006151 + addi a3, a3, 16 + .word 34006279 + addi a3, a3, 16 + .word 34006407 + addi a3, a3, 16 + .word 34006535 + addi a3, a3, 16 + .word 34006663 + + # Setup mask into v0 + # The mask pattern for 4*N-th elements + # mask v0: [000100010001....] + # Note: + # We could setup the mask just for the maximum element length instead of + # the VLMAX. + li t0, 0b10001000 + .word 201356247 + .word 1577238615 + # Load IV. + # v31:[IV0, IV1, IV2, big-endian count] + .word 3439489111 + .word 34041735 + # Convert the big-endian counter into little-endian. + .word 3305271383 + .word 1240772567 + # Splat the IV to v16 + .word 221212759 + .word 1577072727 + .word 2817763447 + # Prepare the ctr pattern into v20 + # v20: [x, x, x, 0, x, x, x, 1, x, x, x, 2, ...] + .word 1342712407 + # v16:[IV0, IV1, IV2, count+0, IV0, IV1, IV2, count+1, ...] + .word 86998743 + .word 17434711 + + + ##### AES body + j 2f +1: + .word 86998743 + # Increase ctr in v16. + .word 17811543 +2: + # Load plaintext into v20 + .word 33909255 + slli t0, t4, 2 + srli t6, t4, 2 + sub t5, t5, t4 + add a0, a0, t0 + # Prepare the AES ctr input into v24. + # The ctr data uses big-endian form. + .word 1577585751 + .word 1233431639 + + .word 2786307191 + .word 2787191927 + .word 2788240503 + .word 2789289079 + .word 2790337655 + .word 2791386231 + .word 2792434807 + .word 2793483383 + .word 2794531959 + .word 2795580535 + .word 2796629111 + .word 2797677687 + .word 2798759031 + + # ciphertext + .word 797576279 + + # Store the ciphertext. + .word 33942567 + add a1, a1, t0 + + bnez t5, 1b + + ret +.size ctr32_encrypt_blocks_192,.-ctr32_encrypt_blocks_192 +.p2align 3 +ctr32_encrypt_blocks_256: + # Load all 15 round keys to v1-v15 registers. + .word 3439489111 + .word 34005127 + addi a3, a3, 16 + .word 34005255 + addi a3, a3, 16 + .word 34005383 + addi a3, a3, 16 + .word 34005511 + addi a3, a3, 16 + .word 34005639 + addi a3, a3, 16 + .word 34005767 + addi a3, a3, 16 + .word 34005895 + addi a3, a3, 16 + .word 34006023 + addi a3, a3, 16 + .word 34006151 + addi a3, a3, 16 + .word 34006279 + addi a3, a3, 16 + .word 34006407 + addi a3, a3, 16 + .word 34006535 + addi a3, a3, 16 + .word 34006663 + addi a3, a3, 16 + .word 34006791 + addi a3, a3, 16 + .word 34006919 + + # Setup mask into v0 + # The mask pattern for 4*N-th elements + # mask v0: [000100010001....] + # Note: + # We could setup the mask just for the maximum element length instead of + # the VLMAX. + li t0, 0b10001000 + .word 201356247 + .word 1577238615 + # Load IV. + # v31:[IV0, IV1, IV2, big-endian count] + .word 3439489111 + .word 34041735 + # Convert the big-endian counter into little-endian. + .word 3305271383 + .word 1240772567 + # Splat the IV to v16 + .word 221212759 + .word 1577072727 + .word 2817763447 + # Prepare the ctr pattern into v20 + # v20: [x, x, x, 0, x, x, x, 1, x, x, x, 2, ...] + .word 1342712407 + # v16:[IV0, IV1, IV2, count+0, IV0, IV1, IV2, count+1, ...] + .word 86998743 + .word 17434711 + + + ##### AES body + j 2f +1: + .word 86998743 + # Increase ctr in v16. + .word 17811543 +2: + # Load plaintext into v20 + .word 33909255 + slli t0, t4, 2 + srli t6, t4, 2 + sub t5, t5, t4 + add a0, a0, t0 + # Prepare the AES ctr input into v24. + # The ctr data uses big-endian form. + .word 1577585751 + .word 1233431639 + + .word 2786307191 + .word 2787191927 + .word 2788240503 + .word 2789289079 + .word 2790337655 + .word 2791386231 + .word 2792434807 + .word 2793483383 + .word 2794531959 + .word 2795580535 + .word 2796629111 + .word 2797677687 + .word 2798726263 + .word 2799774839 + .word 2800856183 + + # ciphertext + .word 797576279 + + # Store the ciphertext. + .word 33942567 + add a1, a1, t0 + + bnez t5, 1b + + ret +.size ctr32_encrypt_blocks_256,.-ctr32_encrypt_blocks_256 diff --git a/contrib/openssl-cmake/asm/crypto/aes/aes-riscv64-zvkned.S b/contrib/openssl-cmake/asm/crypto/aes/aes-riscv64-zvkned.S new file mode 100644 index 000000000000..91d1f13940ff --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/aes/aes-riscv64-zvkned.S @@ -0,0 +1,1401 @@ +.text +.p2align 3 +.globl rv64i_zvkned_cbc_encrypt +.type rv64i_zvkned_cbc_encrypt,@function +rv64i_zvkned_cbc_encrypt: + # check whether the length is a multiple of 16 and >= 16 + li t1, 16 + blt a2, t1, L_end + andi t1, a2, 15 + bnez t1, L_end + + # Load number of rounds + lwu t2, 240(a3) + + # Get proper routine for key size + li t0, 10 + beq t2, t0, L_cbc_enc_128 + + li t0, 12 + beq t2, t0, L_cbc_enc_192 + + li t0, 14 + beq t2, t0, L_cbc_enc_256 + + ret +.size rv64i_zvkned_cbc_encrypt,.-rv64i_zvkned_cbc_encrypt +.p2align 3 +L_cbc_enc_128: + # Load all 11 round keys to v1-v11 registers. + .word 3439489111 + .word 34005127 + addi a3, a3, 16 + .word 34005255 + addi a3, a3, 16 + .word 34005383 + addi a3, a3, 16 + .word 34005511 + addi a3, a3, 16 + .word 34005639 + addi a3, a3, 16 + .word 34005767 + addi a3, a3, 16 + .word 34005895 + addi a3, a3, 16 + .word 34006023 + addi a3, a3, 16 + .word 34006151 + addi a3, a3, 16 + .word 34006279 + addi a3, a3, 16 + .word 34006407 + + + # Load IV. + .word 34039815 + + .word 33909767 + .word 797445207 + j 2f + +1: + .word 33908871 + .word 797477975 + +2: + # AES body + .word 2786307191 # with round key w[ 0, 3] + .word 2787191927 # with round key w[ 4, 7] + .word 2788240503 # with round key w[ 8,11] + .word 2789289079 # with round key w[12,15] + .word 2790337655 # with round key w[16,19] + .word 2791386231 # with round key w[20,23] + .word 2792434807 # with round key w[24,27] + .word 2793483383 # with round key w[28,31] + .word 2794531959 # with round key w[32,35] + .word 2795580535 # with round key w[36,39] + .word 2796661879 # with round key w[40,43] + + + .word 33942567 + + addi a0, a0, 16 + addi a1, a1, 16 + addi a2, a2, -16 + + bnez a2, 1b + + .word 34040871 + + ret +.size L_cbc_enc_128,.-L_cbc_enc_128 +.p2align 3 +L_cbc_enc_192: + # Load all 13 round keys to v1-v13 registers. + .word 3439489111 + .word 34005127 + addi a3, a3, 16 + .word 34005255 + addi a3, a3, 16 + .word 34005383 + addi a3, a3, 16 + .word 34005511 + addi a3, a3, 16 + .word 34005639 + addi a3, a3, 16 + .word 34005767 + addi a3, a3, 16 + .word 34005895 + addi a3, a3, 16 + .word 34006023 + addi a3, a3, 16 + .word 34006151 + addi a3, a3, 16 + .word 34006279 + addi a3, a3, 16 + .word 34006407 + addi a3, a3, 16 + .word 34006535 + addi a3, a3, 16 + .word 34006663 + + + # Load IV. + .word 34039815 + + .word 33909767 + .word 797445207 + j 2f + +1: + .word 33908871 + .word 797477975 + +2: + # AES body + .word 2786307191 # with round key w[ 0, 3] + .word 2787191927 # with round key w[ 4, 7] + .word 2788240503 # with round key w[ 8,11] + .word 2789289079 # with round key w[12,15] + .word 2790337655 # with round key w[16,19] + .word 2791386231 # with round key w[20,23] + .word 2792434807 # with round key w[24,27] + .word 2793483383 # with round key w[28,31] + .word 2794531959 # with round key w[32,35] + .word 2795580535 # with round key w[36,39] + .word 2796629111 # with round key w[40,43] + .word 2797677687 # with round key w[44,47] + .word 2798759031 # with round key w[48,51] + + + .word 33942567 + + addi a0, a0, 16 + addi a1, a1, 16 + addi a2, a2, -16 + + bnez a2, 1b + + .word 34040871 + + ret +.size L_cbc_enc_192,.-L_cbc_enc_192 +.p2align 3 +L_cbc_enc_256: + # Load all 15 round keys to v1-v15 registers. + .word 3439489111 + .word 34005127 + addi a3, a3, 16 + .word 34005255 + addi a3, a3, 16 + .word 34005383 + addi a3, a3, 16 + .word 34005511 + addi a3, a3, 16 + .word 34005639 + addi a3, a3, 16 + .word 34005767 + addi a3, a3, 16 + .word 34005895 + addi a3, a3, 16 + .word 34006023 + addi a3, a3, 16 + .word 34006151 + addi a3, a3, 16 + .word 34006279 + addi a3, a3, 16 + .word 34006407 + addi a3, a3, 16 + .word 34006535 + addi a3, a3, 16 + .word 34006663 + addi a3, a3, 16 + .word 34006791 + addi a3, a3, 16 + .word 34006919 + + + # Load IV. + .word 34039815 + + .word 33909767 + .word 797445207 + j 2f + +1: + .word 33908871 + .word 797477975 + +2: + # AES body + .word 2786307191 # with round key w[ 0, 3] + .word 2787191927 # with round key w[ 4, 7] + .word 2788240503 # with round key w[ 8,11] + .word 2789289079 # with round key w[12,15] + .word 2790337655 # with round key w[16,19] + .word 2791386231 # with round key w[20,23] + .word 2792434807 # with round key w[24,27] + .word 2793483383 # with round key w[28,31] + .word 2794531959 # with round key w[32,35] + .word 2795580535 # with round key w[36,39] + .word 2796629111 # with round key w[40,43] + .word 2797677687 # with round key w[44,47] + .word 2798726263 # with round key w[48,51] + .word 2799774839 # with round key w[52,55] + .word 2800856183 # with round key w[56,59] + + + .word 33942567 + + addi a0, a0, 16 + addi a1, a1, 16 + addi a2, a2, -16 + + bnez a2, 1b + + .word 34040871 + + ret +.size L_cbc_enc_256,.-L_cbc_enc_256 +.p2align 3 +.globl rv64i_zvkned_cbc_decrypt +.type rv64i_zvkned_cbc_decrypt,@function +rv64i_zvkned_cbc_decrypt: + # check whether the length is a multiple of 16 and >= 16 + li t1, 16 + blt a2, t1, L_end + andi t1, a2, 15 + bnez t1, L_end + + # Load number of rounds + lwu t2, 240(a3) + + # Get proper routine for key size + li t0, 10 + beq t2, t0, L_cbc_dec_128 + + li t0, 12 + beq t2, t0, L_cbc_dec_192 + + li t0, 14 + beq t2, t0, L_cbc_dec_256 + + ret +.size rv64i_zvkned_cbc_decrypt,.-rv64i_zvkned_cbc_decrypt +.p2align 3 +L_cbc_dec_128: + # Load all 11 round keys to v1-v11 registers. + .word 3439489111 + .word 34005127 + addi a3, a3, 16 + .word 34005255 + addi a3, a3, 16 + .word 34005383 + addi a3, a3, 16 + .word 34005511 + addi a3, a3, 16 + .word 34005639 + addi a3, a3, 16 + .word 34005767 + addi a3, a3, 16 + .word 34005895 + addi a3, a3, 16 + .word 34006023 + addi a3, a3, 16 + .word 34006151 + addi a3, a3, 16 + .word 34006279 + addi a3, a3, 16 + .word 34006407 + + + # Load IV. + .word 34039815 + + .word 33909767 + .word 1577846999 + j 2f + +1: + .word 33909767 + .word 1577846999 + addi a1, a1, 16 + +2: + # AES body + .word 2796792951 # with round key w[40,43] + .word 2795514999 # with round key w[36,39] + .word 2794466423 # with round key w[32,35] + .word 2793417847 # with round key w[28,31] + .word 2792369271 # with round key w[24,27] + .word 2791320695 # with round key w[20,23] + .word 2790272119 # with round key w[16,19] + .word 2789223543 # with round key w[12,15] + .word 2788174967 # with round key w[ 8,11] + .word 2787126391 # with round key w[ 4, 7] + .word 2786110583 # with round key w[ 0, 3] + + + .word 797445207 + .word 33942567 + .word 1577617495 + + addi a2, a2, -16 + addi a0, a0, 16 + + bnez a2, 1b + + .word 34039847 + + ret +.size L_cbc_dec_128,.-L_cbc_dec_128 +.p2align 3 +L_cbc_dec_192: + # Load all 13 round keys to v1-v13 registers. + .word 3439489111 + .word 34005127 + addi a3, a3, 16 + .word 34005255 + addi a3, a3, 16 + .word 34005383 + addi a3, a3, 16 + .word 34005511 + addi a3, a3, 16 + .word 34005639 + addi a3, a3, 16 + .word 34005767 + addi a3, a3, 16 + .word 34005895 + addi a3, a3, 16 + .word 34006023 + addi a3, a3, 16 + .word 34006151 + addi a3, a3, 16 + .word 34006279 + addi a3, a3, 16 + .word 34006407 + addi a3, a3, 16 + .word 34006535 + addi a3, a3, 16 + .word 34006663 + + + # Load IV. + .word 34039815 + + .word 33909767 + .word 1577846999 + j 2f + +1: + .word 33909767 + .word 1577846999 + addi a1, a1, 16 + +2: + # AES body + .word 2798890103 # with round key w[48,51] + .word 2797612151 # with round key w[44,47] + .word 2796563575 # with round key w[40,43] + .word 2795514999 # with round key w[36,39] + .word 2794466423 # with round key w[32,35] + .word 2793417847 # with round key w[28,31] + .word 2792369271 # with round key w[24,27] + .word 2791320695 # with round key w[20,23] + .word 2790272119 # with round key w[16,19] + .word 2789223543 # with round key w[12,15] + .word 2788174967 # with round key w[ 8,11] + .word 2787126391 # with round key w[ 4, 7] + .word 2786110583 # with round key w[ 0, 3] + + + .word 797445207 + .word 33942567 + .word 1577617495 + + addi a2, a2, -16 + addi a0, a0, 16 + + bnez a2, 1b + + .word 34039847 + + ret +.size L_cbc_dec_192,.-L_cbc_dec_192 +.p2align 3 +L_cbc_dec_256: + # Load all 15 round keys to v1-v15 registers. + .word 3439489111 + .word 34005127 + addi a3, a3, 16 + .word 34005255 + addi a3, a3, 16 + .word 34005383 + addi a3, a3, 16 + .word 34005511 + addi a3, a3, 16 + .word 34005639 + addi a3, a3, 16 + .word 34005767 + addi a3, a3, 16 + .word 34005895 + addi a3, a3, 16 + .word 34006023 + addi a3, a3, 16 + .word 34006151 + addi a3, a3, 16 + .word 34006279 + addi a3, a3, 16 + .word 34006407 + addi a3, a3, 16 + .word 34006535 + addi a3, a3, 16 + .word 34006663 + addi a3, a3, 16 + .word 34006791 + addi a3, a3, 16 + .word 34006919 + + + # Load IV. + .word 34039815 + + .word 33909767 + .word 1577846999 + j 2f + +1: + .word 33909767 + .word 1577846999 + addi a1, a1, 16 + +2: + # AES body + .word 2800987255 # with round key w[56,59] + .word 2799709303 # with round key w[52,55] + .word 2798660727 # with round key w[48,51] + .word 2797612151 # with round key w[44,47] + .word 2796563575 # with round key w[40,43] + .word 2795514999 # with round key w[36,39] + .word 2794466423 # with round key w[32,35] + .word 2793417847 # with round key w[28,31] + .word 2792369271 # with round key w[24,27] + .word 2791320695 # with round key w[20,23] + .word 2790272119 # with round key w[16,19] + .word 2789223543 # with round key w[12,15] + .word 2788174967 # with round key w[ 8,11] + .word 2787126391 # with round key w[ 4, 7] + .word 2786110583 # with round key w[ 0, 3] + + + .word 797445207 + .word 33942567 + .word 1577617495 + + addi a2, a2, -16 + addi a0, a0, 16 + + bnez a2, 1b + + .word 34039847 + + ret +.size L_cbc_dec_256,.-L_cbc_dec_256 +.p2align 3 +.globl rv64i_zvkned_ecb_encrypt +.type rv64i_zvkned_ecb_encrypt,@function +rv64i_zvkned_ecb_encrypt: + # Make the LEN become e32 length. + srli t3, a2, 2 + + # Load number of rounds + lwu t2, 240(a3) + + # Get proper routine for key size + li t0, 10 + beq t2, t0, L_ecb_enc_128 + + li t0, 12 + beq t2, t0, L_ecb_enc_192 + + li t0, 14 + beq t2, t0, L_ecb_enc_256 + + ret +.size rv64i_zvkned_ecb_encrypt,.-rv64i_zvkned_ecb_encrypt +.p2align 3 +L_ecb_enc_128: + # Load all 11 round keys to v1-v11 registers. + .word 3439489111 + .word 34005127 + addi a3, a3, 16 + .word 34005255 + addi a3, a3, 16 + .word 34005383 + addi a3, a3, 16 + .word 34005511 + addi a3, a3, 16 + .word 34005639 + addi a3, a3, 16 + .word 34005767 + addi a3, a3, 16 + .word 34005895 + addi a3, a3, 16 + .word 34006023 + addi a3, a3, 16 + .word 34006151 + addi a3, a3, 16 + .word 34006279 + addi a3, a3, 16 + .word 34006407 + + +1: + .word 221149271 + slli t0, a6, 2 + sub t3, t3, a6 + + .word 33909767 + + # AES body + .word 2786307191 # with round key w[ 0, 3] + .word 2787191927 # with round key w[ 4, 7] + .word 2788240503 # with round key w[ 8,11] + .word 2789289079 # with round key w[12,15] + .word 2790337655 # with round key w[16,19] + .word 2791386231 # with round key w[20,23] + .word 2792434807 # with round key w[24,27] + .word 2793483383 # with round key w[28,31] + .word 2794531959 # with round key w[32,35] + .word 2795580535 # with round key w[36,39] + .word 2796661879 # with round key w[40,43] + + + .word 33942567 + + add a0, a0, t0 + add a1, a1, t0 + + bnez t3, 1b + + ret +.size L_ecb_enc_128,.-L_ecb_enc_128 +.p2align 3 +L_ecb_enc_192: + # Load all 13 round keys to v1-v13 registers. + .word 3439489111 + .word 34005127 + addi a3, a3, 16 + .word 34005255 + addi a3, a3, 16 + .word 34005383 + addi a3, a3, 16 + .word 34005511 + addi a3, a3, 16 + .word 34005639 + addi a3, a3, 16 + .word 34005767 + addi a3, a3, 16 + .word 34005895 + addi a3, a3, 16 + .word 34006023 + addi a3, a3, 16 + .word 34006151 + addi a3, a3, 16 + .word 34006279 + addi a3, a3, 16 + .word 34006407 + addi a3, a3, 16 + .word 34006535 + addi a3, a3, 16 + .word 34006663 + + +1: + .word 221149271 + slli t0, a6, 2 + sub t3, t3, a6 + + .word 33909767 + + # AES body + .word 2786307191 # with round key w[ 0, 3] + .word 2787191927 # with round key w[ 4, 7] + .word 2788240503 # with round key w[ 8,11] + .word 2789289079 # with round key w[12,15] + .word 2790337655 # with round key w[16,19] + .word 2791386231 # with round key w[20,23] + .word 2792434807 # with round key w[24,27] + .word 2793483383 # with round key w[28,31] + .word 2794531959 # with round key w[32,35] + .word 2795580535 # with round key w[36,39] + .word 2796629111 # with round key w[40,43] + .word 2797677687 # with round key w[44,47] + .word 2798759031 # with round key w[48,51] + + + .word 33942567 + + add a0, a0, t0 + add a1, a1, t0 + + bnez t3, 1b + + ret +.size L_ecb_enc_192,.-L_ecb_enc_192 +.p2align 3 +L_ecb_enc_256: + # Load all 15 round keys to v1-v15 registers. + .word 3439489111 + .word 34005127 + addi a3, a3, 16 + .word 34005255 + addi a3, a3, 16 + .word 34005383 + addi a3, a3, 16 + .word 34005511 + addi a3, a3, 16 + .word 34005639 + addi a3, a3, 16 + .word 34005767 + addi a3, a3, 16 + .word 34005895 + addi a3, a3, 16 + .word 34006023 + addi a3, a3, 16 + .word 34006151 + addi a3, a3, 16 + .word 34006279 + addi a3, a3, 16 + .word 34006407 + addi a3, a3, 16 + .word 34006535 + addi a3, a3, 16 + .word 34006663 + addi a3, a3, 16 + .word 34006791 + addi a3, a3, 16 + .word 34006919 + + +1: + .word 221149271 + slli t0, a6, 2 + sub t3, t3, a6 + + .word 33909767 + + # AES body + .word 2786307191 # with round key w[ 0, 3] + .word 2787191927 # with round key w[ 4, 7] + .word 2788240503 # with round key w[ 8,11] + .word 2789289079 # with round key w[12,15] + .word 2790337655 # with round key w[16,19] + .word 2791386231 # with round key w[20,23] + .word 2792434807 # with round key w[24,27] + .word 2793483383 # with round key w[28,31] + .word 2794531959 # with round key w[32,35] + .word 2795580535 # with round key w[36,39] + .word 2796629111 # with round key w[40,43] + .word 2797677687 # with round key w[44,47] + .word 2798726263 # with round key w[48,51] + .word 2799774839 # with round key w[52,55] + .word 2800856183 # with round key w[56,59] + + + .word 33942567 + + add a0, a0, t0 + add a1, a1, t0 + + bnez t3, 1b + + ret +.size L_ecb_enc_256,.-L_ecb_enc_256 +.p2align 3 +.globl rv64i_zvkned_ecb_decrypt +.type rv64i_zvkned_ecb_decrypt,@function +rv64i_zvkned_ecb_decrypt: + # Make the LEN become e32 length. + srli t3, a2, 2 + + # Load number of rounds + lwu t2, 240(a3) + + # Get proper routine for key size + li t0, 10 + beq t2, t0, L_ecb_dec_128 + + li t0, 12 + beq t2, t0, L_ecb_dec_192 + + li t0, 14 + beq t2, t0, L_ecb_dec_256 + + ret +.size rv64i_zvkned_ecb_decrypt,.-rv64i_zvkned_ecb_decrypt +.p2align 3 +L_ecb_dec_128: + # Load all 11 round keys to v1-v11 registers. + .word 3439489111 + .word 34005127 + addi a3, a3, 16 + .word 34005255 + addi a3, a3, 16 + .word 34005383 + addi a3, a3, 16 + .word 34005511 + addi a3, a3, 16 + .word 34005639 + addi a3, a3, 16 + .word 34005767 + addi a3, a3, 16 + .word 34005895 + addi a3, a3, 16 + .word 34006023 + addi a3, a3, 16 + .word 34006151 + addi a3, a3, 16 + .word 34006279 + addi a3, a3, 16 + .word 34006407 + + +1: + .word 221149271 + slli t0, a6, 2 + sub t3, t3, a6 + + .word 33909767 + + # AES body + .word 2796792951 # with round key w[40,43] + .word 2795514999 # with round key w[36,39] + .word 2794466423 # with round key w[32,35] + .word 2793417847 # with round key w[28,31] + .word 2792369271 # with round key w[24,27] + .word 2791320695 # with round key w[20,23] + .word 2790272119 # with round key w[16,19] + .word 2789223543 # with round key w[12,15] + .word 2788174967 # with round key w[ 8,11] + .word 2787126391 # with round key w[ 4, 7] + .word 2786110583 # with round key w[ 0, 3] + + + .word 33942567 + + add a0, a0, t0 + add a1, a1, t0 + + bnez t3, 1b + + ret +.size L_ecb_dec_128,.-L_ecb_dec_128 +.p2align 3 +L_ecb_dec_192: + # Load all 13 round keys to v1-v13 registers. + .word 3439489111 + .word 34005127 + addi a3, a3, 16 + .word 34005255 + addi a3, a3, 16 + .word 34005383 + addi a3, a3, 16 + .word 34005511 + addi a3, a3, 16 + .word 34005639 + addi a3, a3, 16 + .word 34005767 + addi a3, a3, 16 + .word 34005895 + addi a3, a3, 16 + .word 34006023 + addi a3, a3, 16 + .word 34006151 + addi a3, a3, 16 + .word 34006279 + addi a3, a3, 16 + .word 34006407 + addi a3, a3, 16 + .word 34006535 + addi a3, a3, 16 + .word 34006663 + + +1: + .word 221149271 + slli t0, a6, 2 + sub t3, t3, a6 + + .word 33909767 + + # AES body + .word 2798890103 # with round key w[48,51] + .word 2797612151 # with round key w[44,47] + .word 2796563575 # with round key w[40,43] + .word 2795514999 # with round key w[36,39] + .word 2794466423 # with round key w[32,35] + .word 2793417847 # with round key w[28,31] + .word 2792369271 # with round key w[24,27] + .word 2791320695 # with round key w[20,23] + .word 2790272119 # with round key w[16,19] + .word 2789223543 # with round key w[12,15] + .word 2788174967 # with round key w[ 8,11] + .word 2787126391 # with round key w[ 4, 7] + .word 2786110583 # with round key w[ 0, 3] + + + .word 33942567 + + add a0, a0, t0 + add a1, a1, t0 + + bnez t3, 1b + + ret +.size L_ecb_dec_192,.-L_ecb_dec_192 +.p2align 3 +L_ecb_dec_256: + # Load all 15 round keys to v1-v15 registers. + .word 3439489111 + .word 34005127 + addi a3, a3, 16 + .word 34005255 + addi a3, a3, 16 + .word 34005383 + addi a3, a3, 16 + .word 34005511 + addi a3, a3, 16 + .word 34005639 + addi a3, a3, 16 + .word 34005767 + addi a3, a3, 16 + .word 34005895 + addi a3, a3, 16 + .word 34006023 + addi a3, a3, 16 + .word 34006151 + addi a3, a3, 16 + .word 34006279 + addi a3, a3, 16 + .word 34006407 + addi a3, a3, 16 + .word 34006535 + addi a3, a3, 16 + .word 34006663 + addi a3, a3, 16 + .word 34006791 + addi a3, a3, 16 + .word 34006919 + + +1: + .word 221149271 + slli t0, a6, 2 + sub t3, t3, a6 + + .word 33909767 + + # AES body + .word 2800987255 # with round key w[56,59] + .word 2799709303 # with round key w[52,55] + .word 2798660727 # with round key w[48,51] + .word 2797612151 # with round key w[44,47] + .word 2796563575 # with round key w[40,43] + .word 2795514999 # with round key w[36,39] + .word 2794466423 # with round key w[32,35] + .word 2793417847 # with round key w[28,31] + .word 2792369271 # with round key w[24,27] + .word 2791320695 # with round key w[20,23] + .word 2790272119 # with round key w[16,19] + .word 2789223543 # with round key w[12,15] + .word 2788174967 # with round key w[ 8,11] + .word 2787126391 # with round key w[ 4, 7] + .word 2786110583 # with round key w[ 0, 3] + + + .word 33942567 + + add a0, a0, t0 + add a1, a1, t0 + + bnez t3, 1b + + ret +.size L_ecb_dec_256,.-L_ecb_dec_256 +.p2align 3 +.globl rv64i_zvkned_set_encrypt_key +.type rv64i_zvkned_set_encrypt_key,@function +rv64i_zvkned_set_encrypt_key: + beqz a0, L_fail_m1 + beqz a2, L_fail_m1 + + # Get proper routine for key size + li t1, 256 + beq a1, t1, L_set_key_256 + li t1, 128 + beq a1, t1, L_set_key_128 + + j L_fail_m2 + +.size rv64i_zvkned_set_encrypt_key,.-rv64i_zvkned_set_encrypt_key +.p2align 3 +.globl rv64i_zvkned_set_decrypt_key +.type rv64i_zvkned_set_decrypt_key,@function +rv64i_zvkned_set_decrypt_key: + beqz a0, L_fail_m1 + beqz a2, L_fail_m1 + + # Get proper routine for key size + li t1, 256 + beq a1, t1, L_set_key_256 + li t1, 128 + beq a1, t1, L_set_key_128 + + j L_fail_m2 + +.size rv64i_zvkned_set_decrypt_key,.-rv64i_zvkned_set_decrypt_key +.p2align 3 +L_set_key_128: + # Store the number of rounds + li t2, 10 + sw t2, 240(a2) + + .word 0xc1027057 + + # Load the key + .word 33907975 + + # Generate keys for round 2-11 into registers v11-v20. + .word 2325784055 # v11 <- rk2 (w[ 4, 7]) + .word 2326865527 # v12 <- rk3 (w[ 8,11]) + .word 2327946999 # v13 <- rk4 (w[12,15]) + .word 2329028471 # v14 <- rk5 (w[16,19]) + .word 2330109943 # v15 <- rk6 (w[20,23]) + .word 2331191415 # v16 <- rk7 (w[24,27]) + .word 2332272887 # v17 <- rk8 (w[28,31]) + .word 2333354359 # v18 <- rk9 (w[32,35]) + .word 2334435831 # v19 <- rk10 (w[36,39]) + .word 2335517303 # v20 <- rk11 (w[40,43]) + + # Store the round keys + .word 33973543 + addi a2, a2, 16 + .word 33973671 + addi a2, a2, 16 + .word 33973799 + addi a2, a2, 16 + .word 33973927 + addi a2, a2, 16 + .word 33974055 + addi a2, a2, 16 + .word 33974183 + addi a2, a2, 16 + .word 33974311 + addi a2, a2, 16 + .word 33974439 + addi a2, a2, 16 + .word 33974567 + addi a2, a2, 16 + .word 33974695 + addi a2, a2, 16 + .word 33974823 + + li a0, 1 + ret +.size L_set_key_128,.-L_set_key_128 +.p2align 3 +L_set_key_256: + # Store the number of rounds + li t2, 14 + sw t2, 240(a2) + + .word 0xc1027057 + + # Load the key + .word 33907975 + addi a0, a0, 16 + .word 33908103 + + .word 1577387607 + .word 2863736439 + .word 1577420503 + .word 2864817911 + .word 1577453399 + .word 2865899383 + .word 1577486295 + .word 2866980855 + .word 1577519191 + .word 2868062327 + .word 1577552087 + .word 2869143799 + .word 1577584983 + .word 2870225271 + .word 1577617879 + .word 2871306743 + .word 1577650775 + .word 2872388215 + .word 1577683671 + .word 2873469687 + .word 1577716567 + .word 2874551159 + .word 1577749463 + .word 2875632631 + .word 1577782359 + .word 2876714103 + + .word 33973543 + addi a2, a2, 16 + .word 33973671 + addi a2, a2, 16 + .word 33973799 + addi a2, a2, 16 + .word 33973927 + addi a2, a2, 16 + .word 33974055 + addi a2, a2, 16 + .word 33974183 + addi a2, a2, 16 + .word 33974311 + addi a2, a2, 16 + .word 33974439 + addi a2, a2, 16 + .word 33974567 + addi a2, a2, 16 + .word 33974695 + addi a2, a2, 16 + .word 33974823 + addi a2, a2, 16 + .word 33974951 + addi a2, a2, 16 + .word 33975079 + addi a2, a2, 16 + .word 33975207 + addi a2, a2, 16 + .word 33975335 + + li a0, 1 + ret +.size L_set_key_256,.-L_set_key_256 +.p2align 3 +.globl rv64i_zvkned_encrypt +.type rv64i_zvkned_encrypt,@function +rv64i_zvkned_encrypt: + # Load number of rounds + lwu t5, 240(a2) + + # Get proper routine for key size + li t6, 14 + beq t5, t6, L_enc_256 + li t6, 10 + beq t5, t6, L_enc_128 + li t6, 12 + beq t5, t6, L_enc_192 + + j L_fail_m2 +.size rv64i_zvkned_encrypt,.-rv64i_zvkned_encrypt +.p2align 3 +L_enc_128: + .word 3439489111 + + .word 33906823 + + .word 33973511 + .word 2795741431 # with round key w[ 0, 3] + addi a2, a2, 16 + .word 33973639 + .word 2796626167 # with round key w[ 4, 7] + addi a2, a2, 16 + .word 33973767 + .word 2797674743 # with round key w[ 8,11] + addi a2, a2, 16 + .word 33973895 + .word 2798723319 # with round key w[12,15] + addi a2, a2, 16 + .word 33974023 + .word 2799771895 # with round key w[16,19] + addi a2, a2, 16 + .word 33974151 + .word 2800820471 # with round key w[20,23] + addi a2, a2, 16 + .word 33974279 + .word 2801869047 # with round key w[24,27] + addi a2, a2, 16 + .word 33974407 + .word 2802917623 # with round key w[28,31] + addi a2, a2, 16 + .word 33974535 + .word 2803966199 # with round key w[32,35] + addi a2, a2, 16 + .word 33974663 + .word 2805014775 # with round key w[36,39] + addi a2, a2, 16 + .word 33974791 + .word 2806096119 # with round key w[40,43] + + .word 33939623 + + ret +.size L_enc_128,.-L_enc_128 +.p2align 3 +L_enc_192: + .word 3439489111 + + .word 33906823 + + .word 33973511 + .word 2795741431 # with round key w[ 0, 3] + addi a2, a2, 16 + .word 33973639 + .word 2796626167 + addi a2, a2, 16 + .word 33973767 + .word 2797674743 + addi a2, a2, 16 + .word 33973895 + .word 2798723319 + addi a2, a2, 16 + .word 33974023 + .word 2799771895 + addi a2, a2, 16 + .word 33974151 + .word 2800820471 + addi a2, a2, 16 + .word 33974279 + .word 2801869047 + addi a2, a2, 16 + .word 33974407 + .word 2802917623 + addi a2, a2, 16 + .word 33974535 + .word 2803966199 + addi a2, a2, 16 + .word 33974663 + .word 2805014775 + addi a2, a2, 16 + .word 33974791 + .word 2806063351 + addi a2, a2, 16 + .word 33974919 + .word 2807111927 + addi a2, a2, 16 + .word 33975047 + .word 2808193271 + + .word 33939623 + ret +.size L_enc_192,.-L_enc_192 +.p2align 3 +L_enc_256: + .word 3439489111 + + .word 33906823 + + .word 33973511 + .word 2795741431 # with round key w[ 0, 3] + addi a2, a2, 16 + .word 33973639 + .word 2796626167 + addi a2, a2, 16 + .word 33973767 + .word 2797674743 + addi a2, a2, 16 + .word 33973895 + .word 2798723319 + addi a2, a2, 16 + .word 33974023 + .word 2799771895 + addi a2, a2, 16 + .word 33974151 + .word 2800820471 + addi a2, a2, 16 + .word 33974279 + .word 2801869047 + addi a2, a2, 16 + .word 33974407 + .word 2802917623 + addi a2, a2, 16 + .word 33974535 + .word 2803966199 + addi a2, a2, 16 + .word 33974663 + .word 2805014775 + addi a2, a2, 16 + .word 33974791 + .word 2806063351 + addi a2, a2, 16 + .word 33974919 + .word 2807111927 + addi a2, a2, 16 + .word 33975047 + .word 2808160503 + addi a2, a2, 16 + .word 33975175 + .word 2809209079 + addi a2, a2, 16 + .word 33975303 + .word 2810290423 + + .word 33939623 + ret +.size L_enc_256,.-L_enc_256 +.p2align 3 +.globl rv64i_zvkned_decrypt +.type rv64i_zvkned_decrypt,@function +rv64i_zvkned_decrypt: + # Load number of rounds + lwu t5, 240(a2) + + # Get proper routine for key size + li t6, 14 + beq t5, t6, L_dec_256 + li t6, 10 + beq t5, t6, L_dec_128 + li t6, 12 + beq t5, t6, L_dec_192 + + j L_fail_m2 +.size rv64i_zvkned_decrypt,.-rv64i_zvkned_decrypt +.p2align 3 +L_dec_128: + .word 3439489111 + + .word 33906823 + + addi a2, a2, 160 + .word 33974791 + .word 2806227191 # with round key w[40,43] + addi a2, a2, -16 + .word 33974663 + .word 2804949239 # with round key w[36,39] + addi a2, a2, -16 + .word 33974535 + .word 2803900663 # with round key w[32,35] + addi a2, a2, -16 + .word 33974407 + .word 2802852087 # with round key w[28,31] + addi a2, a2, -16 + .word 33974279 + .word 2801803511 # with round key w[24,27] + addi a2, a2, -16 + .word 33974151 + .word 2800754935 # with round key w[20,23] + addi a2, a2, -16 + .word 33974023 + .word 2799706359 # with round key w[16,19] + addi a2, a2, -16 + .word 33973895 + .word 2798657783 # with round key w[12,15] + addi a2, a2, -16 + .word 33973767 + .word 2797609207 # with round key w[ 8,11] + addi a2, a2, -16 + .word 33973639 + .word 2796560631 # with round key w[ 4, 7] + addi a2, a2, -16 + .word 33973511 + .word 2795544823 # with round key w[ 0, 3] + + .word 33939623 + + ret +.size L_dec_128,.-L_dec_128 +.p2align 3 +L_dec_192: + .word 3439489111 + + .word 33906823 + + addi a2, a2, 192 + .word 33975047 + .word 2808324343 # with round key w[48,51] + addi a2, a2, -16 + .word 33974919 + .word 2807046391 # with round key w[44,47] + addi a2, a2, -16 + .word 33974791 + .word 2805997815 # with round key w[40,43] + addi a2, a2, -16 + .word 33974663 + .word 2804949239 # with round key w[36,39] + addi a2, a2, -16 + .word 33974535 + .word 2803900663 # with round key w[32,35] + addi a2, a2, -16 + .word 33974407 + .word 2802852087 # with round key w[28,31] + addi a2, a2, -16 + .word 33974279 + .word 2801803511 # with round key w[24,27] + addi a2, a2, -16 + .word 33974151 + .word 2800754935 # with round key w[20,23] + addi a2, a2, -16 + .word 33974023 + .word 2799706359 # with round key w[16,19] + addi a2, a2, -16 + .word 33973895 + .word 2798657783 # with round key w[12,15] + addi a2, a2, -16 + .word 33973767 + .word 2797609207 # with round key w[ 8,11] + addi a2, a2, -16 + .word 33973639 + .word 2796560631 # with round key w[ 4, 7] + addi a2, a2, -16 + .word 33973511 + .word 2795544823 # with round key w[ 0, 3] + + .word 33939623 + + ret +.size L_dec_192,.-L_dec_192 +.p2align 3 +L_dec_256: + .word 3439489111 + + .word 33906823 + + addi a2, a2, 224 + .word 33975303 + .word 2810421495 # with round key w[56,59] + addi a2, a2, -16 + .word 33975175 + .word 2809143543 # with round key w[52,55] + addi a2, a2, -16 + .word 33975047 + .word 2808094967 # with round key w[48,51] + addi a2, a2, -16 + .word 33974919 + .word 2807046391 # with round key w[44,47] + addi a2, a2, -16 + .word 33974791 + .word 2805997815 # with round key w[40,43] + addi a2, a2, -16 + .word 33974663 + .word 2804949239 # with round key w[36,39] + addi a2, a2, -16 + .word 33974535 + .word 2803900663 # with round key w[32,35] + addi a2, a2, -16 + .word 33974407 + .word 2802852087 # with round key w[28,31] + addi a2, a2, -16 + .word 33974279 + .word 2801803511 # with round key w[24,27] + addi a2, a2, -16 + .word 33974151 + .word 2800754935 # with round key w[20,23] + addi a2, a2, -16 + .word 33974023 + .word 2799706359 # with round key w[16,19] + addi a2, a2, -16 + .word 33973895 + .word 2798657783 # with round key w[12,15] + addi a2, a2, -16 + .word 33973767 + .word 2797609207 # with round key w[ 8,11] + addi a2, a2, -16 + .word 33973639 + .word 2796560631 # with round key w[ 4, 7] + addi a2, a2, -16 + .word 33973511 + .word 2795544823 # with round key w[ 0, 3] + + .word 33939623 + + ret +.size L_dec_256,.-L_dec_256 +L_fail_m1: + li a0, -1 + ret +.size L_fail_m1,.-L_fail_m1 + +L_fail_m2: + li a0, -2 + ret +.size L_fail_m2,.-L_fail_m2 + +L_end: + ret +.size L_end,.-L_end diff --git a/contrib/openssl-cmake/asm/crypto/aes/aes-s390x.S b/contrib/openssl-cmake/asm/crypto/aes/aes-s390x.S new file mode 100644 index 000000000000..db245d308c49 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/aes/aes-s390x.S @@ -0,0 +1,2368 @@ +#include "s390x_arch.h" + +.text + +.type AES_Te,@object +.align 256 +AES_Te: +.long 0xc66363a5,0xc66363a5 +.long 0xf87c7c84,0xf87c7c84 +.long 0xee777799,0xee777799 +.long 0xf67b7b8d,0xf67b7b8d +.long 0xfff2f20d,0xfff2f20d +.long 0xd66b6bbd,0xd66b6bbd +.long 0xde6f6fb1,0xde6f6fb1 +.long 0x91c5c554,0x91c5c554 +.long 0x60303050,0x60303050 +.long 0x02010103,0x02010103 +.long 0xce6767a9,0xce6767a9 +.long 0x562b2b7d,0x562b2b7d +.long 0xe7fefe19,0xe7fefe19 +.long 0xb5d7d762,0xb5d7d762 +.long 0x4dababe6,0x4dababe6 +.long 0xec76769a,0xec76769a +.long 0x8fcaca45,0x8fcaca45 +.long 0x1f82829d,0x1f82829d +.long 0x89c9c940,0x89c9c940 +.long 0xfa7d7d87,0xfa7d7d87 +.long 0xeffafa15,0xeffafa15 +.long 0xb25959eb,0xb25959eb +.long 0x8e4747c9,0x8e4747c9 +.long 0xfbf0f00b,0xfbf0f00b +.long 0x41adadec,0x41adadec +.long 0xb3d4d467,0xb3d4d467 +.long 0x5fa2a2fd,0x5fa2a2fd +.long 0x45afafea,0x45afafea +.long 0x239c9cbf,0x239c9cbf +.long 0x53a4a4f7,0x53a4a4f7 +.long 0xe4727296,0xe4727296 +.long 0x9bc0c05b,0x9bc0c05b +.long 0x75b7b7c2,0x75b7b7c2 +.long 0xe1fdfd1c,0xe1fdfd1c +.long 0x3d9393ae,0x3d9393ae +.long 0x4c26266a,0x4c26266a +.long 0x6c36365a,0x6c36365a +.long 0x7e3f3f41,0x7e3f3f41 +.long 0xf5f7f702,0xf5f7f702 +.long 0x83cccc4f,0x83cccc4f +.long 0x6834345c,0x6834345c +.long 0x51a5a5f4,0x51a5a5f4 +.long 0xd1e5e534,0xd1e5e534 +.long 0xf9f1f108,0xf9f1f108 +.long 0xe2717193,0xe2717193 +.long 0xabd8d873,0xabd8d873 +.long 0x62313153,0x62313153 +.long 0x2a15153f,0x2a15153f +.long 0x0804040c,0x0804040c +.long 0x95c7c752,0x95c7c752 +.long 0x46232365,0x46232365 +.long 0x9dc3c35e,0x9dc3c35e +.long 0x30181828,0x30181828 +.long 0x379696a1,0x379696a1 +.long 0x0a05050f,0x0a05050f +.long 0x2f9a9ab5,0x2f9a9ab5 +.long 0x0e070709,0x0e070709 +.long 0x24121236,0x24121236 +.long 0x1b80809b,0x1b80809b +.long 0xdfe2e23d,0xdfe2e23d +.long 0xcdebeb26,0xcdebeb26 +.long 0x4e272769,0x4e272769 +.long 0x7fb2b2cd,0x7fb2b2cd +.long 0xea75759f,0xea75759f +.long 0x1209091b,0x1209091b +.long 0x1d83839e,0x1d83839e +.long 0x582c2c74,0x582c2c74 +.long 0x341a1a2e,0x341a1a2e +.long 0x361b1b2d,0x361b1b2d +.long 0xdc6e6eb2,0xdc6e6eb2 +.long 0xb45a5aee,0xb45a5aee +.long 0x5ba0a0fb,0x5ba0a0fb +.long 0xa45252f6,0xa45252f6 +.long 0x763b3b4d,0x763b3b4d +.long 0xb7d6d661,0xb7d6d661 +.long 0x7db3b3ce,0x7db3b3ce +.long 0x5229297b,0x5229297b +.long 0xdde3e33e,0xdde3e33e +.long 0x5e2f2f71,0x5e2f2f71 +.long 0x13848497,0x13848497 +.long 0xa65353f5,0xa65353f5 +.long 0xb9d1d168,0xb9d1d168 +.long 0x00000000,0x00000000 +.long 0xc1eded2c,0xc1eded2c +.long 0x40202060,0x40202060 +.long 0xe3fcfc1f,0xe3fcfc1f +.long 0x79b1b1c8,0x79b1b1c8 +.long 0xb65b5bed,0xb65b5bed +.long 0xd46a6abe,0xd46a6abe +.long 0x8dcbcb46,0x8dcbcb46 +.long 0x67bebed9,0x67bebed9 +.long 0x7239394b,0x7239394b +.long 0x944a4ade,0x944a4ade +.long 0x984c4cd4,0x984c4cd4 +.long 0xb05858e8,0xb05858e8 +.long 0x85cfcf4a,0x85cfcf4a +.long 0xbbd0d06b,0xbbd0d06b +.long 0xc5efef2a,0xc5efef2a +.long 0x4faaaae5,0x4faaaae5 +.long 0xedfbfb16,0xedfbfb16 +.long 0x864343c5,0x864343c5 +.long 0x9a4d4dd7,0x9a4d4dd7 +.long 0x66333355,0x66333355 +.long 0x11858594,0x11858594 +.long 0x8a4545cf,0x8a4545cf +.long 0xe9f9f910,0xe9f9f910 +.long 0x04020206,0x04020206 +.long 0xfe7f7f81,0xfe7f7f81 +.long 0xa05050f0,0xa05050f0 +.long 0x783c3c44,0x783c3c44 +.long 0x259f9fba,0x259f9fba +.long 0x4ba8a8e3,0x4ba8a8e3 +.long 0xa25151f3,0xa25151f3 +.long 0x5da3a3fe,0x5da3a3fe +.long 0x804040c0,0x804040c0 +.long 0x058f8f8a,0x058f8f8a +.long 0x3f9292ad,0x3f9292ad +.long 0x219d9dbc,0x219d9dbc +.long 0x70383848,0x70383848 +.long 0xf1f5f504,0xf1f5f504 +.long 0x63bcbcdf,0x63bcbcdf +.long 0x77b6b6c1,0x77b6b6c1 +.long 0xafdada75,0xafdada75 +.long 0x42212163,0x42212163 +.long 0x20101030,0x20101030 +.long 0xe5ffff1a,0xe5ffff1a +.long 0xfdf3f30e,0xfdf3f30e +.long 0xbfd2d26d,0xbfd2d26d +.long 0x81cdcd4c,0x81cdcd4c +.long 0x180c0c14,0x180c0c14 +.long 0x26131335,0x26131335 +.long 0xc3ecec2f,0xc3ecec2f +.long 0xbe5f5fe1,0xbe5f5fe1 +.long 0x359797a2,0x359797a2 +.long 0x884444cc,0x884444cc +.long 0x2e171739,0x2e171739 +.long 0x93c4c457,0x93c4c457 +.long 0x55a7a7f2,0x55a7a7f2 +.long 0xfc7e7e82,0xfc7e7e82 +.long 0x7a3d3d47,0x7a3d3d47 +.long 0xc86464ac,0xc86464ac +.long 0xba5d5de7,0xba5d5de7 +.long 0x3219192b,0x3219192b +.long 0xe6737395,0xe6737395 +.long 0xc06060a0,0xc06060a0 +.long 0x19818198,0x19818198 +.long 0x9e4f4fd1,0x9e4f4fd1 +.long 0xa3dcdc7f,0xa3dcdc7f +.long 0x44222266,0x44222266 +.long 0x542a2a7e,0x542a2a7e +.long 0x3b9090ab,0x3b9090ab +.long 0x0b888883,0x0b888883 +.long 0x8c4646ca,0x8c4646ca +.long 0xc7eeee29,0xc7eeee29 +.long 0x6bb8b8d3,0x6bb8b8d3 +.long 0x2814143c,0x2814143c +.long 0xa7dede79,0xa7dede79 +.long 0xbc5e5ee2,0xbc5e5ee2 +.long 0x160b0b1d,0x160b0b1d +.long 0xaddbdb76,0xaddbdb76 +.long 0xdbe0e03b,0xdbe0e03b +.long 0x64323256,0x64323256 +.long 0x743a3a4e,0x743a3a4e +.long 0x140a0a1e,0x140a0a1e +.long 0x924949db,0x924949db +.long 0x0c06060a,0x0c06060a +.long 0x4824246c,0x4824246c +.long 0xb85c5ce4,0xb85c5ce4 +.long 0x9fc2c25d,0x9fc2c25d +.long 0xbdd3d36e,0xbdd3d36e +.long 0x43acacef,0x43acacef +.long 0xc46262a6,0xc46262a6 +.long 0x399191a8,0x399191a8 +.long 0x319595a4,0x319595a4 +.long 0xd3e4e437,0xd3e4e437 +.long 0xf279798b,0xf279798b +.long 0xd5e7e732,0xd5e7e732 +.long 0x8bc8c843,0x8bc8c843 +.long 0x6e373759,0x6e373759 +.long 0xda6d6db7,0xda6d6db7 +.long 0x018d8d8c,0x018d8d8c +.long 0xb1d5d564,0xb1d5d564 +.long 0x9c4e4ed2,0x9c4e4ed2 +.long 0x49a9a9e0,0x49a9a9e0 +.long 0xd86c6cb4,0xd86c6cb4 +.long 0xac5656fa,0xac5656fa +.long 0xf3f4f407,0xf3f4f407 +.long 0xcfeaea25,0xcfeaea25 +.long 0xca6565af,0xca6565af +.long 0xf47a7a8e,0xf47a7a8e +.long 0x47aeaee9,0x47aeaee9 +.long 0x10080818,0x10080818 +.long 0x6fbabad5,0x6fbabad5 +.long 0xf0787888,0xf0787888 +.long 0x4a25256f,0x4a25256f +.long 0x5c2e2e72,0x5c2e2e72 +.long 0x381c1c24,0x381c1c24 +.long 0x57a6a6f1,0x57a6a6f1 +.long 0x73b4b4c7,0x73b4b4c7 +.long 0x97c6c651,0x97c6c651 +.long 0xcbe8e823,0xcbe8e823 +.long 0xa1dddd7c,0xa1dddd7c +.long 0xe874749c,0xe874749c +.long 0x3e1f1f21,0x3e1f1f21 +.long 0x964b4bdd,0x964b4bdd +.long 0x61bdbddc,0x61bdbddc +.long 0x0d8b8b86,0x0d8b8b86 +.long 0x0f8a8a85,0x0f8a8a85 +.long 0xe0707090,0xe0707090 +.long 0x7c3e3e42,0x7c3e3e42 +.long 0x71b5b5c4,0x71b5b5c4 +.long 0xcc6666aa,0xcc6666aa +.long 0x904848d8,0x904848d8 +.long 0x06030305,0x06030305 +.long 0xf7f6f601,0xf7f6f601 +.long 0x1c0e0e12,0x1c0e0e12 +.long 0xc26161a3,0xc26161a3 +.long 0x6a35355f,0x6a35355f +.long 0xae5757f9,0xae5757f9 +.long 0x69b9b9d0,0x69b9b9d0 +.long 0x17868691,0x17868691 +.long 0x99c1c158,0x99c1c158 +.long 0x3a1d1d27,0x3a1d1d27 +.long 0x279e9eb9,0x279e9eb9 +.long 0xd9e1e138,0xd9e1e138 +.long 0xebf8f813,0xebf8f813 +.long 0x2b9898b3,0x2b9898b3 +.long 0x22111133,0x22111133 +.long 0xd26969bb,0xd26969bb +.long 0xa9d9d970,0xa9d9d970 +.long 0x078e8e89,0x078e8e89 +.long 0x339494a7,0x339494a7 +.long 0x2d9b9bb6,0x2d9b9bb6 +.long 0x3c1e1e22,0x3c1e1e22 +.long 0x15878792,0x15878792 +.long 0xc9e9e920,0xc9e9e920 +.long 0x87cece49,0x87cece49 +.long 0xaa5555ff,0xaa5555ff +.long 0x50282878,0x50282878 +.long 0xa5dfdf7a,0xa5dfdf7a +.long 0x038c8c8f,0x038c8c8f +.long 0x59a1a1f8,0x59a1a1f8 +.long 0x09898980,0x09898980 +.long 0x1a0d0d17,0x1a0d0d17 +.long 0x65bfbfda,0x65bfbfda +.long 0xd7e6e631,0xd7e6e631 +.long 0x844242c6,0x844242c6 +.long 0xd06868b8,0xd06868b8 +.long 0x824141c3,0x824141c3 +.long 0x299999b0,0x299999b0 +.long 0x5a2d2d77,0x5a2d2d77 +.long 0x1e0f0f11,0x1e0f0f11 +.long 0x7bb0b0cb,0x7bb0b0cb +.long 0xa85454fc,0xa85454fc +.long 0x6dbbbbd6,0x6dbbbbd6 +.long 0x2c16163a,0x2c16163a +# Te4[256] +.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 +.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 +.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 +.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 +.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc +.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 +.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a +.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 +.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 +.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 +.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b +.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf +.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 +.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 +.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 +.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 +.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 +.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 +.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 +.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb +.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c +.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 +.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 +.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 +.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 +.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a +.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e +.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e +.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 +.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf +.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 +.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 +# rcon[] +.long 0x01000000, 0x02000000, 0x04000000, 0x08000000 +.long 0x10000000, 0x20000000, 0x40000000, 0x80000000 +.long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 +.align 256 +.size AES_Te,.-AES_Te + +# void AES_encrypt(const unsigned char *inp, unsigned char *out, +# const AES_KEY *key) { +.globl AES_encrypt +.type AES_encrypt,@function +AES_encrypt: + l %r0,240(%r4) + lhi %r1,16 + clr %r0,%r1 + jl .Lesoft + + la %r1,0(%r4) + #la %r2,0(%r2) + la %r4,0(%r3) + lghi %r3,16 # single block length + .long 0xb92e0042 # km %r4,%r2 + brc 1,.-4 # can this happen? + br %r14 +.align 64 +.Lesoft: + stmg %r3,%r14,3*8(%r15) + + llgf %r8,0(%r2) + llgf %r9,4(%r2) + llgf %r10,8(%r2) + llgf %r11,12(%r2) + + larl %r12,AES_Te + bras %r14,_s390x_AES_encrypt + + lg %r3,3*8(%r15) + st %r8,0(%r3) + st %r9,4(%r3) + st %r10,8(%r3) + st %r11,12(%r3) + + lmg %r6,%r14,6*8(%r15) + br %r14 +.size AES_encrypt,.-AES_encrypt + +.type _s390x_AES_encrypt,@function +.align 16 +_s390x_AES_encrypt: + stg %r14,15*8(%r15) + x %r8,0(%r4) + x %r9,4(%r4) + x %r10,8(%r4) + x %r11,12(%r4) + l %r13,240(%r4) + llill %r0,2040 + aghi %r13,-1 + j .Lenc_loop +.align 16 +.Lenc_loop: + sllg %r1,%r8,3 + srlg %r2,%r8,5 + srlg %r3,%r8,13 + srl %r8,21 + nr %r8,%r0 + ngr %r1,%r0 + nr %r2,%r0 + nr %r3,%r0 + + srlg %r5,%r9,13 # i0 + sllg %r6,%r9,3 + srlg %r7,%r9,5 + srl %r9,21 + nr %r5,%r0 + nr %r9,%r0 + ngr %r6,%r0 + nr %r7,%r0 + + l %r8,0(%r8,%r12) # Te0[s0>>24] + l %r1,1(%r1,%r12) # Te3[s0>>0] + l %r2,2(%r2,%r12) # Te2[s0>>8] + l %r3,3(%r3,%r12) # Te1[s0>>16] + + x %r8,3(%r5,%r12) # Te1[s1>>16] + l %r9,0(%r9,%r12) # Te0[s1>>24] + x %r2,1(%r6,%r12) # Te3[s1>>0] + x %r3,2(%r7,%r12) # Te2[s1>>8] + + srlg %r5,%r10,5 # i0 + srlg %r6,%r10,13 # i1 + nr %r5,%r0 + nr %r6,%r0 + sllg %r7,%r10,3 + srl %r10,21 + nr %r10,%r0 + ngr %r7,%r0 + + xr %r9,%r1 + srlg %r14,%r11,5 # i1 + sllg %r1,%r11,3 # i0 + nr %r14,%r0 + la %r4,16(%r4) + ngr %r1,%r0 + + x %r8,2(%r5,%r12) # Te2[s2>>8] + x %r9,3(%r6,%r12) # Te1[s2>>16] + l %r10,0(%r10,%r12) # Te0[s2>>24] + x %r3,1(%r7,%r12) # Te3[s2>>0] + + srlg %r7,%r11,13 # i2 + xr %r10,%r2 + srl %r11,21 + nr %r7,%r0 + nr %r11,%r0 + + x %r8,0(%r4) + x %r9,4(%r4) + x %r10,8(%r4) + x %r3,12(%r4) + + x %r8,1(%r1,%r12) # Te3[s3>>0] + x %r9,2(%r14,%r12) # Te2[s3>>8] + x %r10,3(%r7,%r12) # Te1[s3>>16] + l %r11,0(%r11,%r12) # Te0[s3>>24] + xr %r11,%r3 + + brct %r13,.Lenc_loop + .align 16 + + sllg %r1,%r8,3 + srlg %r2,%r8,5 + ngr %r1,%r0 + srlg %r3,%r8,13 + srl %r8,21 + nr %r8,%r0 + nr %r2,%r0 + nr %r3,%r0 + + srlg %r5,%r9,13 # i0 + sllg %r6,%r9,3 + ngr %r6,%r0 + srlg %r7,%r9,5 + srl %r9,21 + nr %r5,%r0 + nr %r9,%r0 + nr %r7,%r0 + + llgc %r8,2(%r8,%r12) # Te4[s0>>24] + llgc %r1,2(%r1,%r12) # Te4[s0>>0] + sll %r8,24 + llgc %r2,2(%r2,%r12) # Te4[s0>>8] + llgc %r3,2(%r3,%r12) # Te4[s0>>16] + sll %r2,8 + sll %r3,16 + + llgc %r5,2(%r5,%r12) # Te4[s1>>16] + llgc %r9,2(%r9,%r12) # Te4[s1>>24] + llgc %r6,2(%r6,%r12) # Te4[s1>>0] + llgc %r7,2(%r7,%r12) # Te4[s1>>8] + sll %r5,16 + sll %r9,24 + sll %r7,8 + or %r8,%r5 + or %r9,%r1 + or %r2,%r6 + or %r3,%r7 + + srlg %r5,%r10,5 # i0 + srlg %r6,%r10,13 # i1 + nr %r5,%r0 + nr %r6,%r0 + sllg %r7,%r10,3 + srl %r10,21 + ngr %r7,%r0 + nr %r10,%r0 + + sllg %r1,%r11,3 # i0 + srlg %r14,%r11,5 # i1 + ngr %r1,%r0 + + llgc %r5,2(%r5,%r12) # Te4[s2>>8] + llgc %r6,2(%r6,%r12) # Te4[s2>>16] + sll %r5,8 + llgc %r10,2(%r10,%r12) # Te4[s2>>24] + llgc %r7,2(%r7,%r12) # Te4[s2>>0] + sll %r6,16 + nr %r14,%r0 + sll %r10,24 + or %r8,%r5 + or %r9,%r6 + or %r10,%r2 + or %r3,%r7 + + srlg %r7,%r11,13 # i2 + srl %r11,21 + nr %r7,%r0 + nr %r11,%r0 + + l %r0,16(%r4) + l %r2,20(%r4) + + llgc %r5,2(%r1,%r12) # Te4[s3>>0] + llgc %r6,2(%r14,%r12) # Te4[s3>>8] + llgc %r7,2(%r7,%r12) # Te4[s3>>16] + llgc %r11,2(%r11,%r12) # Te4[s3>>24] + sll %r6,8 + sll %r7,16 + sll %r11,24 + or %r8,%r5 + or %r9,%r6 + or %r10,%r7 + or %r11,%r3 + + lg %r14,15*8(%r15) + xr %r8,%r0 + xr %r9,%r2 + x %r10,24(%r4) + x %r11,28(%r4) + + br %r14 +.size _s390x_AES_encrypt,.-_s390x_AES_encrypt +.type AES_Td,@object +.align 256 +AES_Td: +.long 0x51f4a750,0x51f4a750 +.long 0x7e416553,0x7e416553 +.long 0x1a17a4c3,0x1a17a4c3 +.long 0x3a275e96,0x3a275e96 +.long 0x3bab6bcb,0x3bab6bcb +.long 0x1f9d45f1,0x1f9d45f1 +.long 0xacfa58ab,0xacfa58ab +.long 0x4be30393,0x4be30393 +.long 0x2030fa55,0x2030fa55 +.long 0xad766df6,0xad766df6 +.long 0x88cc7691,0x88cc7691 +.long 0xf5024c25,0xf5024c25 +.long 0x4fe5d7fc,0x4fe5d7fc +.long 0xc52acbd7,0xc52acbd7 +.long 0x26354480,0x26354480 +.long 0xb562a38f,0xb562a38f +.long 0xdeb15a49,0xdeb15a49 +.long 0x25ba1b67,0x25ba1b67 +.long 0x45ea0e98,0x45ea0e98 +.long 0x5dfec0e1,0x5dfec0e1 +.long 0xc32f7502,0xc32f7502 +.long 0x814cf012,0x814cf012 +.long 0x8d4697a3,0x8d4697a3 +.long 0x6bd3f9c6,0x6bd3f9c6 +.long 0x038f5fe7,0x038f5fe7 +.long 0x15929c95,0x15929c95 +.long 0xbf6d7aeb,0xbf6d7aeb +.long 0x955259da,0x955259da +.long 0xd4be832d,0xd4be832d +.long 0x587421d3,0x587421d3 +.long 0x49e06929,0x49e06929 +.long 0x8ec9c844,0x8ec9c844 +.long 0x75c2896a,0x75c2896a +.long 0xf48e7978,0xf48e7978 +.long 0x99583e6b,0x99583e6b +.long 0x27b971dd,0x27b971dd +.long 0xbee14fb6,0xbee14fb6 +.long 0xf088ad17,0xf088ad17 +.long 0xc920ac66,0xc920ac66 +.long 0x7dce3ab4,0x7dce3ab4 +.long 0x63df4a18,0x63df4a18 +.long 0xe51a3182,0xe51a3182 +.long 0x97513360,0x97513360 +.long 0x62537f45,0x62537f45 +.long 0xb16477e0,0xb16477e0 +.long 0xbb6bae84,0xbb6bae84 +.long 0xfe81a01c,0xfe81a01c +.long 0xf9082b94,0xf9082b94 +.long 0x70486858,0x70486858 +.long 0x8f45fd19,0x8f45fd19 +.long 0x94de6c87,0x94de6c87 +.long 0x527bf8b7,0x527bf8b7 +.long 0xab73d323,0xab73d323 +.long 0x724b02e2,0x724b02e2 +.long 0xe31f8f57,0xe31f8f57 +.long 0x6655ab2a,0x6655ab2a +.long 0xb2eb2807,0xb2eb2807 +.long 0x2fb5c203,0x2fb5c203 +.long 0x86c57b9a,0x86c57b9a +.long 0xd33708a5,0xd33708a5 +.long 0x302887f2,0x302887f2 +.long 0x23bfa5b2,0x23bfa5b2 +.long 0x02036aba,0x02036aba +.long 0xed16825c,0xed16825c +.long 0x8acf1c2b,0x8acf1c2b +.long 0xa779b492,0xa779b492 +.long 0xf307f2f0,0xf307f2f0 +.long 0x4e69e2a1,0x4e69e2a1 +.long 0x65daf4cd,0x65daf4cd +.long 0x0605bed5,0x0605bed5 +.long 0xd134621f,0xd134621f +.long 0xc4a6fe8a,0xc4a6fe8a +.long 0x342e539d,0x342e539d +.long 0xa2f355a0,0xa2f355a0 +.long 0x058ae132,0x058ae132 +.long 0xa4f6eb75,0xa4f6eb75 +.long 0x0b83ec39,0x0b83ec39 +.long 0x4060efaa,0x4060efaa +.long 0x5e719f06,0x5e719f06 +.long 0xbd6e1051,0xbd6e1051 +.long 0x3e218af9,0x3e218af9 +.long 0x96dd063d,0x96dd063d +.long 0xdd3e05ae,0xdd3e05ae +.long 0x4de6bd46,0x4de6bd46 +.long 0x91548db5,0x91548db5 +.long 0x71c45d05,0x71c45d05 +.long 0x0406d46f,0x0406d46f +.long 0x605015ff,0x605015ff +.long 0x1998fb24,0x1998fb24 +.long 0xd6bde997,0xd6bde997 +.long 0x894043cc,0x894043cc +.long 0x67d99e77,0x67d99e77 +.long 0xb0e842bd,0xb0e842bd +.long 0x07898b88,0x07898b88 +.long 0xe7195b38,0xe7195b38 +.long 0x79c8eedb,0x79c8eedb +.long 0xa17c0a47,0xa17c0a47 +.long 0x7c420fe9,0x7c420fe9 +.long 0xf8841ec9,0xf8841ec9 +.long 0x00000000,0x00000000 +.long 0x09808683,0x09808683 +.long 0x322bed48,0x322bed48 +.long 0x1e1170ac,0x1e1170ac +.long 0x6c5a724e,0x6c5a724e +.long 0xfd0efffb,0xfd0efffb +.long 0x0f853856,0x0f853856 +.long 0x3daed51e,0x3daed51e +.long 0x362d3927,0x362d3927 +.long 0x0a0fd964,0x0a0fd964 +.long 0x685ca621,0x685ca621 +.long 0x9b5b54d1,0x9b5b54d1 +.long 0x24362e3a,0x24362e3a +.long 0x0c0a67b1,0x0c0a67b1 +.long 0x9357e70f,0x9357e70f +.long 0xb4ee96d2,0xb4ee96d2 +.long 0x1b9b919e,0x1b9b919e +.long 0x80c0c54f,0x80c0c54f +.long 0x61dc20a2,0x61dc20a2 +.long 0x5a774b69,0x5a774b69 +.long 0x1c121a16,0x1c121a16 +.long 0xe293ba0a,0xe293ba0a +.long 0xc0a02ae5,0xc0a02ae5 +.long 0x3c22e043,0x3c22e043 +.long 0x121b171d,0x121b171d +.long 0x0e090d0b,0x0e090d0b +.long 0xf28bc7ad,0xf28bc7ad +.long 0x2db6a8b9,0x2db6a8b9 +.long 0x141ea9c8,0x141ea9c8 +.long 0x57f11985,0x57f11985 +.long 0xaf75074c,0xaf75074c +.long 0xee99ddbb,0xee99ddbb +.long 0xa37f60fd,0xa37f60fd +.long 0xf701269f,0xf701269f +.long 0x5c72f5bc,0x5c72f5bc +.long 0x44663bc5,0x44663bc5 +.long 0x5bfb7e34,0x5bfb7e34 +.long 0x8b432976,0x8b432976 +.long 0xcb23c6dc,0xcb23c6dc +.long 0xb6edfc68,0xb6edfc68 +.long 0xb8e4f163,0xb8e4f163 +.long 0xd731dcca,0xd731dcca +.long 0x42638510,0x42638510 +.long 0x13972240,0x13972240 +.long 0x84c61120,0x84c61120 +.long 0x854a247d,0x854a247d +.long 0xd2bb3df8,0xd2bb3df8 +.long 0xaef93211,0xaef93211 +.long 0xc729a16d,0xc729a16d +.long 0x1d9e2f4b,0x1d9e2f4b +.long 0xdcb230f3,0xdcb230f3 +.long 0x0d8652ec,0x0d8652ec +.long 0x77c1e3d0,0x77c1e3d0 +.long 0x2bb3166c,0x2bb3166c +.long 0xa970b999,0xa970b999 +.long 0x119448fa,0x119448fa +.long 0x47e96422,0x47e96422 +.long 0xa8fc8cc4,0xa8fc8cc4 +.long 0xa0f03f1a,0xa0f03f1a +.long 0x567d2cd8,0x567d2cd8 +.long 0x223390ef,0x223390ef +.long 0x87494ec7,0x87494ec7 +.long 0xd938d1c1,0xd938d1c1 +.long 0x8ccaa2fe,0x8ccaa2fe +.long 0x98d40b36,0x98d40b36 +.long 0xa6f581cf,0xa6f581cf +.long 0xa57ade28,0xa57ade28 +.long 0xdab78e26,0xdab78e26 +.long 0x3fadbfa4,0x3fadbfa4 +.long 0x2c3a9de4,0x2c3a9de4 +.long 0x5078920d,0x5078920d +.long 0x6a5fcc9b,0x6a5fcc9b +.long 0x547e4662,0x547e4662 +.long 0xf68d13c2,0xf68d13c2 +.long 0x90d8b8e8,0x90d8b8e8 +.long 0x2e39f75e,0x2e39f75e +.long 0x82c3aff5,0x82c3aff5 +.long 0x9f5d80be,0x9f5d80be +.long 0x69d0937c,0x69d0937c +.long 0x6fd52da9,0x6fd52da9 +.long 0xcf2512b3,0xcf2512b3 +.long 0xc8ac993b,0xc8ac993b +.long 0x10187da7,0x10187da7 +.long 0xe89c636e,0xe89c636e +.long 0xdb3bbb7b,0xdb3bbb7b +.long 0xcd267809,0xcd267809 +.long 0x6e5918f4,0x6e5918f4 +.long 0xec9ab701,0xec9ab701 +.long 0x834f9aa8,0x834f9aa8 +.long 0xe6956e65,0xe6956e65 +.long 0xaaffe67e,0xaaffe67e +.long 0x21bccf08,0x21bccf08 +.long 0xef15e8e6,0xef15e8e6 +.long 0xbae79bd9,0xbae79bd9 +.long 0x4a6f36ce,0x4a6f36ce +.long 0xea9f09d4,0xea9f09d4 +.long 0x29b07cd6,0x29b07cd6 +.long 0x31a4b2af,0x31a4b2af +.long 0x2a3f2331,0x2a3f2331 +.long 0xc6a59430,0xc6a59430 +.long 0x35a266c0,0x35a266c0 +.long 0x744ebc37,0x744ebc37 +.long 0xfc82caa6,0xfc82caa6 +.long 0xe090d0b0,0xe090d0b0 +.long 0x33a7d815,0x33a7d815 +.long 0xf104984a,0xf104984a +.long 0x41ecdaf7,0x41ecdaf7 +.long 0x7fcd500e,0x7fcd500e +.long 0x1791f62f,0x1791f62f +.long 0x764dd68d,0x764dd68d +.long 0x43efb04d,0x43efb04d +.long 0xccaa4d54,0xccaa4d54 +.long 0xe49604df,0xe49604df +.long 0x9ed1b5e3,0x9ed1b5e3 +.long 0x4c6a881b,0x4c6a881b +.long 0xc12c1fb8,0xc12c1fb8 +.long 0x4665517f,0x4665517f +.long 0x9d5eea04,0x9d5eea04 +.long 0x018c355d,0x018c355d +.long 0xfa877473,0xfa877473 +.long 0xfb0b412e,0xfb0b412e +.long 0xb3671d5a,0xb3671d5a +.long 0x92dbd252,0x92dbd252 +.long 0xe9105633,0xe9105633 +.long 0x6dd64713,0x6dd64713 +.long 0x9ad7618c,0x9ad7618c +.long 0x37a10c7a,0x37a10c7a +.long 0x59f8148e,0x59f8148e +.long 0xeb133c89,0xeb133c89 +.long 0xcea927ee,0xcea927ee +.long 0xb761c935,0xb761c935 +.long 0xe11ce5ed,0xe11ce5ed +.long 0x7a47b13c,0x7a47b13c +.long 0x9cd2df59,0x9cd2df59 +.long 0x55f2733f,0x55f2733f +.long 0x1814ce79,0x1814ce79 +.long 0x73c737bf,0x73c737bf +.long 0x53f7cdea,0x53f7cdea +.long 0x5ffdaa5b,0x5ffdaa5b +.long 0xdf3d6f14,0xdf3d6f14 +.long 0x7844db86,0x7844db86 +.long 0xcaaff381,0xcaaff381 +.long 0xb968c43e,0xb968c43e +.long 0x3824342c,0x3824342c +.long 0xc2a3405f,0xc2a3405f +.long 0x161dc372,0x161dc372 +.long 0xbce2250c,0xbce2250c +.long 0x283c498b,0x283c498b +.long 0xff0d9541,0xff0d9541 +.long 0x39a80171,0x39a80171 +.long 0x080cb3de,0x080cb3de +.long 0xd8b4e49c,0xd8b4e49c +.long 0x6456c190,0x6456c190 +.long 0x7bcb8461,0x7bcb8461 +.long 0xd532b670,0xd532b670 +.long 0x486c5c74,0x486c5c74 +.long 0xd0b85742,0xd0b85742 +# Td4[256] +.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 +.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb +.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 +.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb +.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d +.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e +.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 +.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 +.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 +.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 +.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda +.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 +.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a +.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 +.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 +.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b +.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea +.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 +.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 +.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e +.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 +.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b +.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 +.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 +.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 +.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f +.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d +.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef +.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 +.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 +.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 +.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d +.size AES_Td,.-AES_Td + +# void AES_decrypt(const unsigned char *inp, unsigned char *out, +# const AES_KEY *key) { +.globl AES_decrypt +.type AES_decrypt,@function +AES_decrypt: + l %r0,240(%r4) + lhi %r1,16 + clr %r0,%r1 + jl .Ldsoft + + la %r1,0(%r4) + #la %r2,0(%r2) + la %r4,0(%r3) + lghi %r3,16 # single block length + .long 0xb92e0042 # km %r4,%r2 + brc 1,.-4 # can this happen? + br %r14 +.align 64 +.Ldsoft: + stmg %r3,%r14,3*8(%r15) + + llgf %r8,0(%r2) + llgf %r9,4(%r2) + llgf %r10,8(%r2) + llgf %r11,12(%r2) + + larl %r12,AES_Td + bras %r14,_s390x_AES_decrypt + + lg %r3,3*8(%r15) + st %r8,0(%r3) + st %r9,4(%r3) + st %r10,8(%r3) + st %r11,12(%r3) + + lmg %r6,%r14,6*8(%r15) + br %r14 +.size AES_decrypt,.-AES_decrypt + +.type _s390x_AES_decrypt,@function +.align 16 +_s390x_AES_decrypt: + stg %r14,15*8(%r15) + x %r8,0(%r4) + x %r9,4(%r4) + x %r10,8(%r4) + x %r11,12(%r4) + l %r13,240(%r4) + llill %r0,2040 + aghi %r13,-1 + j .Ldec_loop +.align 16 +.Ldec_loop: + srlg %r1,%r8,13 + srlg %r2,%r8,5 + sllg %r3,%r8,3 + srl %r8,21 + nr %r8,%r0 + nr %r1,%r0 + nr %r2,%r0 + ngr %r3,%r0 + + sllg %r5,%r9,3 # i0 + srlg %r6,%r9,13 + srlg %r7,%r9,5 + srl %r9,21 + ngr %r5,%r0 + nr %r9,%r0 + nr %r6,%r0 + nr %r7,%r0 + + l %r8,0(%r8,%r12) # Td0[s0>>24] + l %r1,3(%r1,%r12) # Td1[s0>>16] + l %r2,2(%r2,%r12) # Td2[s0>>8] + l %r3,1(%r3,%r12) # Td3[s0>>0] + + x %r8,1(%r5,%r12) # Td3[s1>>0] + l %r9,0(%r9,%r12) # Td0[s1>>24] + x %r2,3(%r6,%r12) # Td1[s1>>16] + x %r3,2(%r7,%r12) # Td2[s1>>8] + + srlg %r5,%r10,5 # i0 + sllg %r6,%r10,3 # i1 + srlg %r7,%r10,13 + srl %r10,21 + nr %r5,%r0 + ngr %r6,%r0 + nr %r10,%r0 + nr %r7,%r0 + + xr %r9,%r1 + srlg %r14,%r11,5 # i1 + srlg %r1,%r11,13 # i0 + nr %r14,%r0 + la %r4,16(%r4) + nr %r1,%r0 + + x %r8,2(%r5,%r12) # Td2[s2>>8] + x %r9,1(%r6,%r12) # Td3[s2>>0] + l %r10,0(%r10,%r12) # Td0[s2>>24] + x %r3,3(%r7,%r12) # Td1[s2>>16] + + sllg %r7,%r11,3 # i2 + srl %r11,21 + ngr %r7,%r0 + nr %r11,%r0 + + xr %r10,%r2 + x %r8,0(%r4) + x %r9,4(%r4) + x %r10,8(%r4) + x %r3,12(%r4) + + x %r8,3(%r1,%r12) # Td1[s3>>16] + x %r9,2(%r14,%r12) # Td2[s3>>8] + x %r10,1(%r7,%r12) # Td3[s3>>0] + l %r11,0(%r11,%r12) # Td0[s3>>24] + xr %r11,%r3 + + brct %r13,.Ldec_loop + .align 16 + + l %r1,2048(%r12) # prefetch Td4 + l %r2,2112(%r12) + l %r3,2176(%r12) + l %r5,2240(%r12) + llill %r0,0xff + + srlg %r7,%r8,24 # i0 + srlg %r1,%r8,16 + srlg %r2,%r8,8 + nr %r8,%r0 # i3 + nr %r1,%r0 + + srlg %r5,%r9,24 + nr %r2,%r0 + srlg %r6,%r9,16 + srlg %r14,%r9,8 + nr %r9,%r0 # i0 + nr %r6,%r0 + nr %r14,%r0 + + llgc %r7,2048(%r7,%r12) # Td4[s0>>24] + llgc %r1,2048(%r1,%r12) # Td4[s0>>16] + llgc %r2,2048(%r2,%r12) # Td4[s0>>8] + sll %r1,16 + llgc %r3,2048(%r8,%r12) # Td4[s0>>0] + sllg %r8,%r7,24 + sll %r2,8 + + llgc %r9,2048(%r9,%r12) # Td4[s1>>0] + llgc %r5,2048(%r5,%r12) # Td4[s1>>24] + llgc %r6,2048(%r6,%r12) # Td4[s1>>16] + sll %r5,24 + llgc %r7,2048(%r14,%r12) # Td4[s1>>8] + sll %r6,16 + sll %r7,8 + or %r8,%r9 + or %r1,%r5 + or %r2,%r6 + or %r3,%r7 + + srlg %r5,%r10,8 # i0 + srlg %r6,%r10,24 + srlg %r7,%r10,16 + nr %r10,%r0 # i1 + nr %r5,%r0 + nr %r7,%r0 + llgc %r5,2048(%r5,%r12) # Td4[s2>>8] + llgc %r9,2048(%r10,%r12) # Td4[s2>>0] + llgc %r6,2048(%r6,%r12) # Td4[s2>>24] + llgc %r7,2048(%r7,%r12) # Td4[s2>>16] + sll %r5,8 + sll %r6,24 + or %r8,%r5 + sll %r7,16 + or %r2,%r6 + or %r3,%r7 + + srlg %r5,%r11,16 # i0 + srlg %r6,%r11,8 # i1 + srlg %r7,%r11,24 + nr %r11,%r0 # i2 + nr %r5,%r0 + nr %r6,%r0 + + lg %r14,15*8(%r15) + or %r9,%r1 + l %r0,16(%r4) + l %r1,20(%r4) + + llgc %r5,2048(%r5,%r12) # Td4[s3>>16] + llgc %r6,2048(%r6,%r12) # Td4[s3>>8] + sll %r5,16 + llgc %r10,2048(%r11,%r12) # Td4[s3>>0] + llgc %r11,2048(%r7,%r12) # Td4[s3>>24] + sll %r6,8 + sll %r11,24 + or %r8,%r5 + or %r9,%r6 + or %r10,%r2 + or %r11,%r3 + + xr %r8,%r0 + xr %r9,%r1 + x %r10,24(%r4) + x %r11,28(%r4) + + br %r14 +.size _s390x_AES_decrypt,.-_s390x_AES_decrypt +# void AES_set_encrypt_key(const unsigned char *in, int bits, +# AES_KEY *key) { +.globl AES_set_encrypt_key +.type AES_set_encrypt_key,@function +.align 16 +AES_set_encrypt_key: +_s390x_AES_set_encrypt_key: + lghi %r0,0 + clgr %r2,%r0 + je .Lminus1 + clgr %r4,%r0 + je .Lminus1 + + lghi %r0,128 + clr %r3,%r0 + je .Lproceed + lghi %r0,192 + clr %r3,%r0 + je .Lproceed + lghi %r0,256 + clr %r3,%r0 + je .Lproceed + lghi %r2,-2 + br %r14 + +.align 16 +.Lproceed: + # convert bits to km(c) code, [128,192,256]->[18,19,20] + lhi %r5,-128 + lhi %r0,18 + ar %r5,%r3 + srl %r5,6 + ar %r5,%r0 + + larl %r1,OPENSSL_s390xcap_P + llihh %r0,0x8000 + srlg %r0,%r0,0(%r5) + ng %r0,S390X_KM(%r1) # check availability of both km... + ng %r0,S390X_KMC(%r1) # ...and kmc support for given key length + jz .Lekey_internal + + lmg %r0,%r1,0(%r2) # just copy 128 bits... + stmg %r0,%r1,0(%r4) + lhi %r0,192 + cr %r3,%r0 + jl 1f + lg %r1,16(%r2) + stg %r1,16(%r4) + je 1f + lg %r1,24(%r2) + stg %r1,24(%r4) +1: st %r3,236(%r4) # save bits [for debugging purposes] + lgr %r0,%r5 + st %r5,240(%r4) # save km(c) code + lghi %r2,0 + br %r14 +.align 16 +.Lekey_internal: + stmg %r4,%r13,4*8(%r15) # all non-volatile regs and %r4 + + larl %r12,AES_Te+2048 + + llgf %r8,0(%r2) + llgf %r9,4(%r2) + llgf %r10,8(%r2) + llgf %r11,12(%r2) + st %r8,0(%r4) + st %r9,4(%r4) + st %r10,8(%r4) + st %r11,12(%r4) + lghi %r0,128 + cr %r3,%r0 + jne .Lnot128 + + llill %r0,0xff + lghi %r3,0 # i=0 + lghi %r13,10 + st %r13,240(%r4) + + llgfr %r2,%r11 # temp=rk[3] + srlg %r5,%r11,8 + srlg %r6,%r11,16 + srlg %r7,%r11,24 + nr %r2,%r0 + nr %r5,%r0 + nr %r6,%r0 + +.align 16 +.L128_loop: + la %r2,0(%r2,%r12) + la %r5,0(%r5,%r12) + la %r6,0(%r6,%r12) + la %r7,0(%r7,%r12) + icm %r2,2,0(%r2) # Te4[rk[3]>>0]<<8 + icm %r2,4,0(%r5) # Te4[rk[3]>>8]<<16 + icm %r2,8,0(%r6) # Te4[rk[3]>>16]<<24 + icm %r2,1,0(%r7) # Te4[rk[3]>>24] + x %r2,256(%r3,%r12) # rcon[i] + xr %r8,%r2 # rk[4]=rk[0]^... + xr %r9,%r8 # rk[5]=rk[1]^rk[4] + xr %r10,%r9 # rk[6]=rk[2]^rk[5] + xr %r11,%r10 # rk[7]=rk[3]^rk[6] + + llgfr %r2,%r11 # temp=rk[3] + srlg %r5,%r11,8 + srlg %r6,%r11,16 + nr %r2,%r0 + nr %r5,%r0 + srlg %r7,%r11,24 + nr %r6,%r0 + + st %r8,16(%r4) + st %r9,20(%r4) + st %r10,24(%r4) + st %r11,28(%r4) + la %r4,16(%r4) # key+=4 + la %r3,4(%r3) # i++ + brct %r13,.L128_loop + lghi %r0,10 + lghi %r2,0 + lmg %r4,%r13,4*8(%r15) + br %r14 + +.align 16 +.Lnot128: + llgf %r0,16(%r2) + llgf %r1,20(%r2) + st %r0,16(%r4) + st %r1,20(%r4) + lghi %r0,192 + cr %r3,%r0 + jne .Lnot192 + + llill %r0,0xff + lghi %r3,0 # i=0 + lghi %r13,12 + st %r13,240(%r4) + lghi %r13,8 + + srlg %r5,%r1,8 + srlg %r6,%r1,16 + srlg %r7,%r1,24 + nr %r1,%r0 + nr %r5,%r0 + nr %r6,%r0 + +.align 16 +.L192_loop: + la %r1,0(%r1,%r12) + la %r5,0(%r5,%r12) + la %r6,0(%r6,%r12) + la %r7,0(%r7,%r12) + icm %r1,2,0(%r1) # Te4[rk[5]>>0]<<8 + icm %r1,4,0(%r5) # Te4[rk[5]>>8]<<16 + icm %r1,8,0(%r6) # Te4[rk[5]>>16]<<24 + icm %r1,1,0(%r7) # Te4[rk[5]>>24] + x %r1,256(%r3,%r12) # rcon[i] + xr %r8,%r1 # rk[6]=rk[0]^... + xr %r9,%r8 # rk[7]=rk[1]^rk[6] + xr %r10,%r9 # rk[8]=rk[2]^rk[7] + xr %r11,%r10 # rk[9]=rk[3]^rk[8] + + st %r8,24(%r4) + st %r9,28(%r4) + st %r10,32(%r4) + st %r11,36(%r4) + brct %r13,.L192_continue + lghi %r0,12 + lghi %r2,0 + lmg %r4,%r13,4*8(%r15) + br %r14 + +.align 16 +.L192_continue: + lgr %r1,%r11 + x %r1,16(%r4) # rk[10]=rk[4]^rk[9] + st %r1,40(%r4) + x %r1,20(%r4) # rk[11]=rk[5]^rk[10] + st %r1,44(%r4) + + srlg %r5,%r1,8 + srlg %r6,%r1,16 + srlg %r7,%r1,24 + nr %r1,%r0 + nr %r5,%r0 + nr %r6,%r0 + + la %r4,24(%r4) # key+=6 + la %r3,4(%r3) # i++ + j .L192_loop + +.align 16 +.Lnot192: + llgf %r0,24(%r2) + llgf %r1,28(%r2) + st %r0,24(%r4) + st %r1,28(%r4) + llill %r0,0xff + lghi %r3,0 # i=0 + lghi %r13,14 + st %r13,240(%r4) + lghi %r13,7 + + srlg %r5,%r1,8 + srlg %r6,%r1,16 + srlg %r7,%r1,24 + nr %r1,%r0 + nr %r5,%r0 + nr %r6,%r0 + +.align 16 +.L256_loop: + la %r1,0(%r1,%r12) + la %r5,0(%r5,%r12) + la %r6,0(%r6,%r12) + la %r7,0(%r7,%r12) + icm %r1,2,0(%r1) # Te4[rk[7]>>0]<<8 + icm %r1,4,0(%r5) # Te4[rk[7]>>8]<<16 + icm %r1,8,0(%r6) # Te4[rk[7]>>16]<<24 + icm %r1,1,0(%r7) # Te4[rk[7]>>24] + x %r1,256(%r3,%r12) # rcon[i] + xr %r8,%r1 # rk[8]=rk[0]^... + xr %r9,%r8 # rk[9]=rk[1]^rk[8] + xr %r10,%r9 # rk[10]=rk[2]^rk[9] + xr %r11,%r10 # rk[11]=rk[3]^rk[10] + st %r8,32(%r4) + st %r9,36(%r4) + st %r10,40(%r4) + st %r11,44(%r4) + brct %r13,.L256_continue + lghi %r0,14 + lghi %r2,0 + lmg %r4,%r13,4*8(%r15) + br %r14 + +.align 16 +.L256_continue: + lgr %r1,%r11 # temp=rk[11] + srlg %r5,%r11,8 + srlg %r6,%r11,16 + srlg %r7,%r11,24 + nr %r1,%r0 + nr %r5,%r0 + nr %r6,%r0 + la %r1,0(%r1,%r12) + la %r5,0(%r5,%r12) + la %r6,0(%r6,%r12) + la %r7,0(%r7,%r12) + llgc %r1,0(%r1) # Te4[rk[11]>>0] + icm %r1,2,0(%r5) # Te4[rk[11]>>8]<<8 + icm %r1,4,0(%r6) # Te4[rk[11]>>16]<<16 + icm %r1,8,0(%r7) # Te4[rk[11]>>24]<<24 + x %r1,16(%r4) # rk[12]=rk[4]^... + st %r1,48(%r4) + x %r1,20(%r4) # rk[13]=rk[5]^rk[12] + st %r1,52(%r4) + x %r1,24(%r4) # rk[14]=rk[6]^rk[13] + st %r1,56(%r4) + x %r1,28(%r4) # rk[15]=rk[7]^rk[14] + st %r1,60(%r4) + + srlg %r5,%r1,8 + srlg %r6,%r1,16 + srlg %r7,%r1,24 + nr %r1,%r0 + nr %r5,%r0 + nr %r6,%r0 + + la %r4,32(%r4) # key+=8 + la %r3,4(%r3) # i++ + j .L256_loop + +.Lminus1: + lghi %r2,-1 + br %r14 +.size AES_set_encrypt_key,.-AES_set_encrypt_key + +# void AES_set_decrypt_key(const unsigned char *in, int bits, +# AES_KEY *key) { +.globl AES_set_decrypt_key +.type AES_set_decrypt_key,@function +.align 16 +AES_set_decrypt_key: + #stg %r4,4*8(%r15) # I rely on AES_set_encrypt_key to + stg %r14,14*8(%r15) # save non-volatile registers and %r4! + bras %r14,_s390x_AES_set_encrypt_key + #lg %r4,4*8(%r15) + lg %r14,14*8(%r15) + ltgr %r2,%r2 + bnzr %r14 + #l %r0,240(%r4) + lhi %r1,16 + cr %r0,%r1 + jl .Lgo + oill %r0,S390X_DECRYPT # set "decrypt" bit + st %r0,240(%r4) + br %r14 +.align 16 +.Lgo: lgr %r13,%r0 #llgf %r13,240(%r4) + la %r5,0(%r4) + sllg %r6,%r13,4 + la %r6,0(%r6,%r4) + srl %r13,1 + lghi %r1,-16 + +.align 16 +.Linv: lmg %r8,%r9,0(%r5) + lmg %r10,%r11,0(%r6) + stmg %r8,%r9,0(%r6) + stmg %r10,%r11,0(%r5) + la %r5,16(%r5) + la %r6,0(%r1,%r6) + brct %r13,.Linv + llgf %r13,240(%r4) + aghi %r13,-1 + sll %r13,2 # (rounds-1)*4 + llilh %r5,0x8080 + llilh %r6,0x1b1b + llilh %r7,0xfefe + oill %r5,0x8080 + oill %r6,0x1b1b + oill %r7,0xfefe + +.align 16 +.Lmix: l %r8,16(%r4) # tp1 + lr %r9,%r8 + ngr %r9,%r5 + srlg %r1,%r9,7 + slr %r9,%r1 + nr %r9,%r6 + sllg %r1,%r8,1 + nr %r1,%r7 + xr %r9,%r1 # tp2 + + lr %r10,%r9 + ngr %r10,%r5 + srlg %r1,%r10,7 + slr %r10,%r1 + nr %r10,%r6 + sllg %r1,%r9,1 + nr %r1,%r7 + xr %r10,%r1 # tp4 + + lr %r11,%r10 + ngr %r11,%r5 + srlg %r1,%r11,7 + slr %r11,%r1 + nr %r11,%r6 + sllg %r1,%r10,1 + nr %r1,%r7 + xr %r11,%r1 # tp8 + + xr %r9,%r8 # tp2^tp1 + xr %r10,%r8 # tp4^tp1 + rll %r8,%r8,24 # = ROTATE(tp1,8) + xr %r10,%r11 # ^=tp8 + xr %r8,%r9 # ^=tp2^tp1 + xr %r9,%r11 # tp2^tp1^tp8 + xr %r8,%r10 # ^=tp4^tp1^tp8 + rll %r9,%r9,8 + rll %r10,%r10,16 + xr %r8,%r9 # ^= ROTATE(tp8^tp2^tp1,24) + rll %r11,%r11,24 + xr %r8,%r10 # ^= ROTATE(tp8^tp4^tp1,16) + xr %r8,%r11 # ^= ROTATE(tp8,8) + + st %r8,16(%r4) + la %r4,4(%r4) + brct %r13,.Lmix + + lmg %r6,%r13,6*8(%r15)# as was saved by AES_set_encrypt_key! + lghi %r2,0 + br %r14 +.size AES_set_decrypt_key,.-AES_set_decrypt_key +.globl AES_cbc_encrypt +.type AES_cbc_encrypt,@function +.align 16 +AES_cbc_encrypt: + xgr %r3,%r4 # flip %r3 and %r4, out and len + xgr %r4,%r3 + xgr %r3,%r4 + lhi %r0,16 + cl %r0,240(%r5) + jh .Lcbc_software + + lg %r0,0(%r6) # copy ivec + lg %r1,8(%r6) + stmg %r0,%r1,16(%r15) + lmg %r0,%r1,0(%r5) # copy key, cover 256 bit + stmg %r0,%r1,32(%r15) + lmg %r0,%r1,16(%r5) + stmg %r0,%r1,48(%r15) + l %r0,240(%r5) # load kmc code + lghi %r5,15 # res=len%16, len-=res; + ngr %r5,%r3 + slgr %r3,%r5 + la %r1,16(%r15) # parameter block - ivec || key + jz .Lkmc_truncated + .long 0xb92f0042 # kmc %r4,%r2 + brc 1,.-4 # pay attention to "partial completion" + ltr %r5,%r5 + jnz .Lkmc_truncated +.Lkmc_done: + lmg %r0,%r1,16(%r15) # copy ivec to caller + stg %r0,0(%r6) + stg %r1,8(%r6) + br %r14 +.align 16 +.Lkmc_truncated: + ahi %r5,-1 # it's the way it's encoded in mvc + tmll %r0,S390X_DECRYPT + jnz .Lkmc_truncated_dec + lghi %r1,0 + stg %r1,16*8(%r15) + stg %r1,16*8+8(%r15) + bras %r1,1f + mvc 16*8(1,%r15),0(%r2) +1: ex %r5,0(%r1) + la %r1,16(%r15) # restore parameter block + la %r2,16*8(%r15) + lghi %r3,16 + .long 0xb92f0042 # kmc %r4,%r2 + j .Lkmc_done +.align 16 +.Lkmc_truncated_dec: + stg %r4,4*8(%r15) + la %r4,16*8(%r15) + lghi %r3,16 + .long 0xb92f0042 # kmc %r4,%r2 + lg %r4,4*8(%r15) + bras %r1,2f + mvc 0(1,%r4),16*8(%r15) +2: ex %r5,0(%r1) + j .Lkmc_done +.align 16 +.Lcbc_software: + stmg %r5,%r14,5*8(%r15) + lhi %r0,0 + cl %r0,164(%r15) + je .Lcbc_decrypt + + larl %r12,AES_Te + + llgf %r8,0(%r6) + llgf %r9,4(%r6) + llgf %r10,8(%r6) + llgf %r11,12(%r6) + + lghi %r0,16 + slgr %r3,%r0 + brc 4,.Lcbc_enc_tail # if borrow +.Lcbc_enc_loop: + stmg %r2,%r4,2*8(%r15) + x %r8,0(%r2) + x %r9,4(%r2) + x %r10,8(%r2) + x %r11,12(%r2) + lgr %r4,%r5 + + bras %r14,_s390x_AES_encrypt + + lmg %r2,%r5,2*8(%r15) + st %r8,0(%r4) + st %r9,4(%r4) + st %r10,8(%r4) + st %r11,12(%r4) + + la %r2,16(%r2) + la %r4,16(%r4) + lghi %r0,16 + ltgr %r3,%r3 + jz .Lcbc_enc_done + slgr %r3,%r0 + brc 4,.Lcbc_enc_tail # if borrow + j .Lcbc_enc_loop +.align 16 +.Lcbc_enc_done: + lg %r6,6*8(%r15) + st %r8,0(%r6) + st %r9,4(%r6) + st %r10,8(%r6) + st %r11,12(%r6) + + lmg %r7,%r14,7*8(%r15) + br %r14 + +.align 16 +.Lcbc_enc_tail: + aghi %r3,15 + lghi %r0,0 + stg %r0,16*8(%r15) + stg %r0,16*8+8(%r15) + bras %r1,3f + mvc 16*8(1,%r15),0(%r2) +3: ex %r3,0(%r1) + lghi %r3,0 + la %r2,16*8(%r15) + j .Lcbc_enc_loop + +.align 16 +.Lcbc_decrypt: + larl %r12,AES_Td + + lg %r0,0(%r6) + lg %r1,8(%r6) + stmg %r0,%r1,16*8(%r15) + +.Lcbc_dec_loop: + stmg %r2,%r4,2*8(%r15) + llgf %r8,0(%r2) + llgf %r9,4(%r2) + llgf %r10,8(%r2) + llgf %r11,12(%r2) + lgr %r4,%r5 + + bras %r14,_s390x_AES_decrypt + + lmg %r2,%r5,2*8(%r15) + sllg %r8,%r8,32 + sllg %r10,%r10,32 + lr %r8,%r9 + lr %r10,%r11 + + lg %r0,0(%r2) + lg %r1,8(%r2) + xg %r8,16*8(%r15) + xg %r10,16*8+8(%r15) + lghi %r9,16 + slgr %r3,%r9 + brc 4,.Lcbc_dec_tail # if borrow + brc 2,.Lcbc_dec_done # if zero + stg %r8,0(%r4) + stg %r10,8(%r4) + stmg %r0,%r1,16*8(%r15) + + la %r2,16(%r2) + la %r4,16(%r4) + j .Lcbc_dec_loop + +.Lcbc_dec_done: + stg %r8,0(%r4) + stg %r10,8(%r4) +.Lcbc_dec_exit: + lmg %r6,%r14,6*8(%r15) + stmg %r0,%r1,0(%r6) + + br %r14 + +.align 16 +.Lcbc_dec_tail: + aghi %r3,15 + stg %r8,16*8(%r15) + stg %r10,16*8+8(%r15) + bras %r9,4f + mvc 0(1,%r4),16*8(%r15) +4: ex %r3,0(%r9) + j .Lcbc_dec_exit +.size AES_cbc_encrypt,.-AES_cbc_encrypt +.globl AES_ctr32_encrypt +.type AES_ctr32_encrypt,@function +.align 16 +AES_ctr32_encrypt: + xgr %r3,%r4 # flip %r3 and %r4, %r4 and %r3 + xgr %r4,%r3 + xgr %r3,%r4 + llgfr %r3,%r3 # safe in ctr32 subroutine even in 64-bit case + l %r0,240(%r5) + lhi %r1,16 + clr %r0,%r1 + jl .Lctr32_software + + stg %r10,10*8(%r15) + stg %r11,11*8(%r15) + + clr %r3,%r1 # does work even in 64-bit mode + jle .Lctr32_nokma # kma is slower for <= 16 blocks + + larl %r1,OPENSSL_s390xcap_P + lr %r10,%r0 + llihh %r11,0x8000 + srlg %r11,%r11,0(%r10) + ng %r11,S390X_KMA(%r1) # check kma capability vector + jz .Lctr32_nokma + + lghi %r1,-160-112 + lgr %r11,%r15 + la %r15,0(%r1,%r15) # prepare parameter block + + lhi %r1,0x0600 + sllg %r3,%r3,4 + or %r0,%r1 # set HS and LAAD flags + + stg %r11,0(%r15) # backchain + la %r1,160(%r15) + + xc 160+0(64,%r15),160+0(%r15) # clear reserved/unused + # in parameter block + + lmg %r10,%r11,0(%r5) # copy key + stg %r10,160+80(%r15) + stg %r11,160+88(%r15) + lmg %r10,%r11,16(%r5) + stg %r10,160+96(%r15) + stg %r11,160+104(%r15) + + lmg %r10,%r11,0(%r6) # copy iv + stg %r10,160+64(%r15) + ahi %r11,-1 # kma requires counter-1 + stg %r11,160+72(%r15) + st %r11,160+12(%r15) # copy counter + + lghi %r10,0 # no AAD + lghi %r11,0 + + .long 0xb929a042 # kma %r4,%r10,%r2 + brc 1,.-4 # pay attention to "partial completion" + + stg %r0,160+80(%r15) # wipe key + stg %r0,160+88(%r15) + stg %r0,160+96(%r15) + stg %r0,160+104(%r15) + la %r15,160+112(%r15) + + lmg %r10,%r11,10*8(%r15) + br %r14 + +.align 16 +.Lctr32_nokma: + stmg %r6,%r9,6*8(%r15) + + slgr %r4,%r2 + la %r1,0(%r5) # %r1 is permanent copy of %r5 + lg %r5,0(%r6) # load ivec + lg %r6,8(%r6) + + # prepare and allocate stack frame at the top of 4K page + # with 1K reserved for eventual signal handling + lghi %r8,-1024-256-16# guarantee at least 256-bytes buffer + lghi %r9,-4096 + algr %r8,%r15 + lgr %r7,%r15 + ngr %r8,%r9 # align at page boundary + slgr %r7,%r8 # total buffer size + lgr %r10,%r15 + lghi %r9,1024+16 # sl[g]fi is extended-immediate facility + slgr %r7,%r9 # deduct reservation to get usable buffer size + # buffer size is at lest 256 and at most 3072+256-16 + + la %r15,1024(%r8) # alloca + srlg %r7,%r7,4 # convert bytes to blocks, minimum 16 + stg %r10,0(%r15) # back-chain + stg %r7,8(%r15) + + slgr %r3,%r7 + brc 1,.Lctr32_hw_switch # not zero, no borrow + algr %r7,%r3 # input is shorter than allocated buffer + lghi %r3,0 + stg %r7,8(%r15) + +.Lctr32_hw_switch: +.Lctr32_km_loop: + la %r10,16(%r15) + lgr %r11,%r7 +.Lctr32_km_prepare: + stg %r5,0(%r10) + stg %r6,8(%r10) + la %r10,16(%r10) + ahi %r6,1 # 32-bit increment, preserves upper half + brct %r11,.Lctr32_km_prepare + + la %r8,16(%r15) # inp + sllg %r9,%r7,4 # len + la %r10,16(%r15) # out + .long 0xb92e00a8 # km %r10,%r8 + brc 1,.-4 # pay attention to "partial completion" + + la %r10,16(%r15) + lgr %r11,%r7 + slgr %r10,%r2 +.Lctr32_km_xor: + lg %r8,0(%r2) + lg %r9,8(%r2) + xg %r8,0(%r10,%r2) + xg %r9,8(%r10,%r2) + stg %r8,0(%r4,%r2) + stg %r9,8(%r4,%r2) + la %r2,16(%r2) + brct %r11,.Lctr32_km_xor + + slgr %r3,%r7 + brc 1,.Lctr32_km_loop # not zero, no borrow + algr %r7,%r3 + lghi %r3,0 + brc 4+1,.Lctr32_km_loop # not zero + + lg %r8,0(%r15) + lg %r9,8(%r15) + la %r10,16(%r15) +.Lctr32_km_zap: + stg %r8,0(%r10) + stg %r8,8(%r10) + la %r10,16(%r10) + brct %r9,.Lctr32_km_zap + + la %r15,0(%r8) + lmg %r6,%r11,6*8(%r15) + br %r14 +.align 16 +.Lctr32_software: + stmg %r5,%r14,5*8(%r15) + slgr %r2,%r4 + larl %r12,AES_Te + llgf %r1,12(%r6) + +.Lctr32_loop: + stmg %r2,%r4,2*8(%r15) + llgf %r8,0(%r6) + llgf %r9,4(%r6) + llgf %r10,8(%r6) + lgr %r11,%r1 + st %r1,16*8(%r15) + lgr %r4,%r5 + + bras %r14,_s390x_AES_encrypt + + lmg %r2,%r6,2*8(%r15) + llgf %r1,16*8(%r15) + x %r8,0(%r2,%r4) + x %r9,4(%r2,%r4) + x %r10,8(%r2,%r4) + x %r11,12(%r2,%r4) + stm %r8,%r11,0(%r4) + + la %r4,16(%r4) + ahi %r1,1 # 32-bit increment + brct %r3,.Lctr32_loop + + lmg %r6,%r14,6*8(%r15) + br %r14 +.size AES_ctr32_encrypt,.-AES_ctr32_encrypt +.type _s390x_xts_km,@function +.align 16 +_s390x_xts_km: + llgfr %r8,%r0 # put aside the function code + lghi %r9,0x7f + nr %r9,%r0 + larl %r1,OPENSSL_s390xcap_P + llihh %r0,0x8000 + srlg %r0,%r0,32(%r9) # check for 32+function code + ng %r0,S390X_KM(%r1) # check km capability vector + lgr %r0,%r8 # restore the function code + la %r1,0(%r5) # restore %r5 + jz .Lxts_km_vanilla + + lmg %r6,%r7,144(%r15) # put aside the tweak value + algr %r4,%r2 + + oill %r0,32 # switch to xts function code + aghi %r9,-18 # + sllg %r9,%r9,3 # (function code - 18)*8, 0 or 16 + la %r1,144-16(%r15) + slgr %r1,%r9 # parameter block position + lmg %r8,%r11,0(%r5) # load 256 bits of key material, + stmg %r8,%r11,0(%r1) # and copy it to parameter block. + # yes, it contains junk and overlaps + # with the tweak in 128-bit case. + # it's done to avoid conditional + # branch. + stmg %r6,%r7,144(%r15) # "re-seat" the tweak value + + .long 0xb92e0042 # km %r4,%r2 + brc 1,.-4 # pay attention to "partial completion" + + lrvg %r8,144+0(%r15) # load the last tweak + lrvg %r9,144+8(%r15) + stmg %r0,%r3,144-32(%r15) # wipe copy of the key + + nill %r0,0xffdf # switch back to original function code + la %r1,0(%r5) # restore pointer to %r5 + slgr %r4,%r2 + + llgc %r3,2*8-1(%r15) + nill %r3,0x0f # %r3%=16 + br %r14 + +.align 16 +.Lxts_km_vanilla: + # prepare and allocate stack frame at the top of 4K page + # with 1K reserved for eventual signal handling + lghi %r8,-1024-256-16# guarantee at least 256-bytes buffer + lghi %r9,-4096 + algr %r8,%r15 + lgr %r7,%r15 + ngr %r8,%r9 # align at page boundary + slgr %r7,%r8 # total buffer size + lgr %r10,%r15 + lghi %r9,1024+16 # sl[g]fi is extended-immediate facility + slgr %r7,%r9 # deduct reservation to get usable buffer size + # buffer size is at lest 256 and at most 3072+256-16 + + la %r15,1024(%r8) # alloca + nill %r7,0xfff0 # round to 16*n + stg %r10,0(%r15) # back-chain + nill %r3,0xfff0 # redundant + stg %r7,8(%r15) + + slgr %r3,%r7 + brc 1,.Lxts_km_go # not zero, no borrow + algr %r7,%r3 # input is shorter than allocated buffer + lghi %r3,0 + stg %r7,8(%r15) + +.Lxts_km_go: + lrvg %r8,144+0(%r10) # load the tweak value in little-endian + lrvg %r9,144+8(%r10) + + la %r10,16(%r15) # vector of ascending tweak values + slgr %r10,%r2 + srlg %r11,%r7,4 + j .Lxts_km_start + +.Lxts_km_loop: + la %r10,16(%r15) + slgr %r10,%r2 + srlg %r11,%r7,4 +.Lxts_km_prepare: + lghi %r5,0x87 + srag %r6,%r9,63 # broadcast upper bit + ngr %r5,%r6 # rem + algr %r8,%r8 + alcgr %r9,%r9 + xgr %r8,%r5 +.Lxts_km_start: + lrvgr %r5,%r8 # flip byte order + lrvgr %r6,%r9 + stg %r5,0(%r10,%r2) + stg %r6,8(%r10,%r2) + xg %r5,0(%r2) + xg %r6,8(%r2) + stg %r5,0(%r4,%r2) + stg %r6,8(%r4,%r2) + la %r2,16(%r2) + brct %r11,.Lxts_km_prepare + + slgr %r2,%r7 # rewind %r2 + la %r10,0(%r4,%r2) + lgr %r11,%r7 + .long 0xb92e00aa # km %r10,%r10 + brc 1,.-4 # pay attention to "partial completion" + + la %r10,16(%r15) + slgr %r10,%r2 + srlg %r11,%r7,4 +.Lxts_km_xor: + lg %r5,0(%r4,%r2) + lg %r6,8(%r4,%r2) + xg %r5,0(%r10,%r2) + xg %r6,8(%r10,%r2) + stg %r5,0(%r4,%r2) + stg %r6,8(%r4,%r2) + la %r2,16(%r2) + brct %r11,.Lxts_km_xor + + slgr %r3,%r7 + brc 1,.Lxts_km_loop # not zero, no borrow + algr %r7,%r3 + lghi %r3,0 + brc 4+1,.Lxts_km_loop # not zero + + lg %r5,0(%r15) # back-chain + llgf %r7,12(%r15) # bytes used + la %r6,16(%r15) + srlg %r7,%r7,4 +.Lxts_km_zap: + stg %r5,0(%r6) + stg %r5,8(%r6) + la %r6,16(%r6) + brct %r7,.Lxts_km_zap + + la %r15,0(%r5) + llgc %r3,2*8-1(%r5) + nill %r3,0x0f # %r3%=16 + bzr %r14 + + # generate one more tweak... + lghi %r5,0x87 + srag %r6,%r9,63 # broadcast upper bit + ngr %r5,%r6 # rem + algr %r8,%r8 + alcgr %r9,%r9 + xgr %r8,%r5 + + ltr %r3,%r3 # clear zero flag + br %r14 +.size _s390x_xts_km,.-_s390x_xts_km + +.globl AES_xts_encrypt +.type AES_xts_encrypt,@function +.align 16 +AES_xts_encrypt: + xgr %r3,%r4 # flip %r3 and %r4, %r4 and %r3 + xgr %r4,%r3 + xgr %r3,%r4 + stg %r3,1*8(%r15) # save copy of %r3 + srag %r3,%r3,4 # formally wrong, because it expands + # sign byte, but who can afford asking + # to process more than 2^63-1 bytes? + # I use it, because it sets condition + # code... + bcr 8,%r14 # abort if zero (i.e. less than 16) + llgf %r0,240(%r6) + lhi %r1,16 + clr %r0,%r1 + jl .Lxts_enc_software + + stg %r14,5*8(%r15) + stmg %r6,%r11,6*8(%r15) + + sllg %r3,%r3,4 # %r3&=~15 + slgr %r4,%r2 + + # generate the tweak value + lg %r11,160(%r15) # pointer to iv + la %r10,144(%r15) + lmg %r8,%r9,0(%r11) + lghi %r11,16 + stmg %r8,%r9,0(%r10) + la %r1,0(%r6) # %r6 is not needed anymore + .long 0xb92e00aa # km %r10,%r10, generate the tweak + brc 1,.-4 # can this happen? + + l %r0,240(%r5) + la %r1,0(%r5) # %r5 is not needed anymore + bras %r14,_s390x_xts_km + jz .Lxts_enc_km_done + + aghi %r2,-16 # take one step back + la %r7,0(%r4,%r2) # put aside real %r4 +.Lxts_enc_km_steal: + llgc %r5,16(%r2) + llgc %r6,0(%r4,%r2) + stc %r5,0(%r4,%r2) + stc %r6,16(%r4,%r2) + la %r2,1(%r2) + brct %r3,.Lxts_enc_km_steal + + la %r10,0(%r7) + lghi %r11,16 + lrvgr %r5,%r8 # flip byte order + lrvgr %r6,%r9 + xg %r5,0(%r10) + xg %r6,8(%r10) + stg %r5,0(%r10) + stg %r6,8(%r10) + .long 0xb92e00aa # km %r10,%r10 + brc 1,.-4 # can this happen? + lrvgr %r5,%r8 # flip byte order + lrvgr %r6,%r9 + xg %r5,0(%r7) + xg %r6,8(%r7) + stg %r5,0(%r7) + stg %r6,8(%r7) + +.Lxts_enc_km_done: + stg %r15,144+0(%r15) # wipe tweak + stg %r15,144+8(%r15) + lg %r14,5*8(%r15) + lmg %r6,%r11,6*8(%r15) + br %r14 +.align 16 +.Lxts_enc_software: + stmg %r6,%r14,6*8(%r15) + + slgr %r4,%r2 + + lg %r11,160(%r15) # ivp + llgf %r8,0(%r11) # load iv + llgf %r9,4(%r11) + llgf %r10,8(%r11) + llgf %r11,12(%r11) + stmg %r2,%r5,2*8(%r15) + la %r4,0(%r6) + larl %r12,AES_Te + bras %r14,_s390x_AES_encrypt # generate the tweak + lmg %r2,%r5,2*8(%r15) + stm %r8,%r11,144(%r15) # save the tweak + j .Lxts_enc_enter + +.align 16 +.Lxts_enc_loop: + lrvg %r9,144+0(%r15) # load the tweak in little-endian + lrvg %r11,144+8(%r15) + lghi %r1,0x87 + srag %r0,%r11,63 # broadcast upper bit + ngr %r1,%r0 # rem + algr %r9,%r9 + alcgr %r11,%r11 + xgr %r9,%r1 + lrvgr %r9,%r9 # flip byte order + lrvgr %r11,%r11 + srlg %r8,%r9,32 # smash the tweak to 4x32-bits + stg %r9,144+0(%r15) # save the tweak + llgfr %r9,%r9 + srlg %r10,%r11,32 + stg %r11,144+8(%r15) + llgfr %r11,%r11 + la %r2,16(%r2) # %r2+=16 +.Lxts_enc_enter: + x %r8,0(%r2) # ^=*(%r2) + x %r9,4(%r2) + x %r10,8(%r2) + x %r11,12(%r2) + stmg %r2,%r3,2*8(%r15) # only two registers are changing + la %r4,0(%r5) + bras %r14,_s390x_AES_encrypt + lmg %r2,%r5,2*8(%r15) + x %r8,144+0(%r15) # ^=tweak + x %r9,144+4(%r15) + x %r10,144+8(%r15) + x %r11,144+12(%r15) + st %r8,0(%r4,%r2) + st %r9,4(%r4,%r2) + st %r10,8(%r4,%r2) + st %r11,12(%r4,%r2) + brctg %r3,.Lxts_enc_loop + + llgc %r3,15(%r15) + nill %r3,0x0f # %r3%16 + jz .Lxts_enc_done + + la %r7,0(%r2,%r4) # put aside real %r4 +.Lxts_enc_steal: + llgc %r0,16(%r2) + llgc %r1,0(%r4,%r2) + stc %r0,0(%r4,%r2) + stc %r1,16(%r4,%r2) + la %r2,1(%r2) + brct %r3,.Lxts_enc_steal + la %r4,0(%r7) # restore real %r4 + + # generate last tweak... + lrvg %r9,144+0(%r15) # load the tweak in little-endian + lrvg %r11,144+8(%r15) + lghi %r1,0x87 + srag %r0,%r11,63 # broadcast upper bit + ngr %r1,%r0 # rem + algr %r9,%r9 + alcgr %r11,%r11 + xgr %r9,%r1 + lrvgr %r9,%r9 # flip byte order + lrvgr %r11,%r11 + srlg %r8,%r9,32 # smash the tweak to 4x32-bits + stg %r9,144+0(%r15) # save the tweak + llgfr %r9,%r9 + srlg %r10,%r11,32 + stg %r11,144+8(%r15) + llgfr %r11,%r11 + + x %r8,0(%r4) # ^=*(inp)|stolen cipther-text + x %r9,4(%r4) + x %r10,8(%r4) + x %r11,12(%r4) + stg %r4,4*8(%r15) + la %r4,0(%r5) + bras %r14,_s390x_AES_encrypt + lg %r4,4*8(%r15) + x %r8,144(%r15) # ^=tweak + x %r9,148(%r15) + x %r10,152(%r15) + x %r11,156(%r15) + st %r8,0(%r4) + st %r9,4(%r4) + st %r10,8(%r4) + st %r11,12(%r4) + +.Lxts_enc_done: + stg %r15,144+0(%r15) # wipe tweak + stg %r15,144+8(%r15) + lmg %r6,%r14,6*8(%r15) + br %r14 +.size AES_xts_encrypt,.-AES_xts_encrypt +.globl AES_xts_decrypt +.type AES_xts_decrypt,@function +.align 16 +AES_xts_decrypt: + xgr %r3,%r4 # flip %r3 and %r4, %r4 and %r3 + xgr %r4,%r3 + xgr %r3,%r4 + stg %r3,1*8(%r15) # save copy of %r3 + aghi %r3,-16 + bcr 4,%r14 # abort if less than zero. formally + # wrong, because %r3 is unsigned, + # but who can afford asking to + # process more than 2^63-1 bytes? + tmll %r3,0x0f + jnz .Lxts_dec_proceed + aghi %r3,16 +.Lxts_dec_proceed: + llgf %r0,240(%r6) + lhi %r1,16 + clr %r0,%r1 + jl .Lxts_dec_software + + stg %r14,5*8(%r15) + stmg %r6,%r11,6*8(%r15) + + nill %r3,0xfff0 # %r3&=~15 + slgr %r4,%r2 + + # generate the tweak value + lg %r11,160(%r15) # pointer to iv + la %r10,144(%r15) + lmg %r8,%r9,0(%r11) + lghi %r11,16 + stmg %r8,%r9,0(%r10) + la %r1,0(%r6) # %r6 is not needed past this point + .long 0xb92e00aa # km %r10,%r10, generate the tweak + brc 1,.-4 # can this happen? + + l %r0,240(%r5) + la %r1,0(%r5) # %r5 is not needed anymore + + ltgr %r3,%r3 + jz .Lxts_dec_km_short + bras %r14,_s390x_xts_km + jz .Lxts_dec_km_done + + lrvgr %r10,%r8 # make copy in reverse byte order + lrvgr %r11,%r9 + j .Lxts_dec_km_2ndtweak + +.Lxts_dec_km_short: + llgc %r3,15(%r15) + nill %r3,0x0f # %r3%=16 + lrvg %r8,144+0(%r15) # load the tweak + lrvg %r9,144+8(%r15) + lrvgr %r10,%r8 # make copy in reverse byte order + lrvgr %r11,%r9 + +.Lxts_dec_km_2ndtweak: + lghi %r5,0x87 + srag %r6,%r9,63 # broadcast upper bit + ngr %r5,%r6 # rem + algr %r8,%r8 + alcgr %r9,%r9 + xgr %r8,%r5 + lrvgr %r5,%r8 # flip byte order + lrvgr %r6,%r9 + + xg %r5,0(%r2) + xg %r6,8(%r2) + stg %r5,0(%r4,%r2) + stg %r6,8(%r4,%r2) + la %r6,0(%r4,%r2) + lghi %r7,16 + .long 0xb92e0066 # km %r6,%r6 + brc 1,.-4 # can this happen? + lrvgr %r5,%r8 + lrvgr %r6,%r9 + xg %r5,0(%r4,%r2) + xg %r6,8(%r4,%r2) + stg %r5,0(%r4,%r2) + stg %r6,8(%r4,%r2) + + la %r7,0(%r4,%r2) # put aside real %r4 +.Lxts_dec_km_steal: + llgc %r5,16(%r2) + llgc %r6,0(%r4,%r2) + stc %r5,0(%r4,%r2) + stc %r6,16(%r4,%r2) + la %r2,1(%r2) + brct %r3,.Lxts_dec_km_steal + + lgr %r8,%r10 + lgr %r9,%r11 + xg %r8,0(%r7) + xg %r9,8(%r7) + stg %r8,0(%r7) + stg %r9,8(%r7) + la %r8,0(%r7) + lghi %r9,16 + .long 0xb92e0088 # km %r8,%r8 + brc 1,.-4 # can this happen? + xg %r10,0(%r7) + xg %r11,8(%r7) + stg %r10,0(%r7) + stg %r11,8(%r7) +.Lxts_dec_km_done: + stg %r15,144+0(%r15) # wipe tweak + stg %r15,144+8(%r15) + lg %r14,5*8(%r15) + lmg %r6,%r11,6*8(%r15) + br %r14 +.align 16 +.Lxts_dec_software: + stmg %r6,%r14,6*8(%r15) + + srlg %r3,%r3,4 + slgr %r4,%r2 + + lg %r11,160(%r15) # ivp + llgf %r8,0(%r11) # load iv + llgf %r9,4(%r11) + llgf %r10,8(%r11) + llgf %r11,12(%r11) + stmg %r2,%r5,2*8(%r15) + la %r4,0(%r6) + larl %r12,AES_Te + bras %r14,_s390x_AES_encrypt # generate the tweak + lmg %r2,%r5,2*8(%r15) + larl %r12,AES_Td + ltgr %r3,%r3 + stm %r8,%r11,144(%r15) # save the tweak + jz .Lxts_dec_short + j .Lxts_dec_enter + +.align 16 +.Lxts_dec_loop: + lrvg %r9,144+0(%r15) # load the tweak in little-endian + lrvg %r11,144+8(%r15) + lghi %r1,0x87 + srag %r0,%r11,63 # broadcast upper bit + ngr %r1,%r0 # rem + algr %r9,%r9 + alcgr %r11,%r11 + xgr %r9,%r1 + lrvgr %r9,%r9 # flip byte order + lrvgr %r11,%r11 + srlg %r8,%r9,32 # smash the tweak to 4x32-bits + stg %r9,144+0(%r15) # save the tweak + llgfr %r9,%r9 + srlg %r10,%r11,32 + stg %r11,144+8(%r15) + llgfr %r11,%r11 +.Lxts_dec_enter: + x %r8,0(%r2) # tweak^=*(inp) + x %r9,4(%r2) + x %r10,8(%r2) + x %r11,12(%r2) + stmg %r2,%r3,2*8(%r15) # only two registers are changing + la %r4,0(%r5) + bras %r14,_s390x_AES_decrypt + lmg %r2,%r5,2*8(%r15) + x %r8,144+0(%r15) # ^=tweak + x %r9,144+4(%r15) + x %r10,144+8(%r15) + x %r11,144+12(%r15) + st %r8,0(%r4,%r2) + st %r9,4(%r4,%r2) + st %r10,8(%r4,%r2) + st %r11,12(%r4,%r2) + la %r2,16(%r2) + brctg %r3,.Lxts_dec_loop + + llgc %r3,15(%r15) + nill %r3,0x0f # %r3%16 + jz .Lxts_dec_done + + # generate pair of tweaks... + lrvg %r9,144+0(%r15) # load the tweak in little-endian + lrvg %r11,144+8(%r15) + lghi %r1,0x87 + srag %r0,%r11,63 # broadcast upper bit + ngr %r1,%r0 # rem + algr %r9,%r9 + alcgr %r11,%r11 + xgr %r9,%r1 + lrvgr %r6,%r9 # flip byte order + lrvgr %r7,%r11 + stmg %r6,%r7,144(%r15) # save the 1st tweak + j .Lxts_dec_2ndtweak + +.align 16 +.Lxts_dec_short: + llgc %r3,15(%r15) + nill %r3,0x0f # %r3%16 + lrvg %r9,144+0(%r15) # load the tweak in little-endian + lrvg %r11,144+8(%r15) +.Lxts_dec_2ndtweak: + lghi %r1,0x87 + srag %r0,%r11,63 # broadcast upper bit + ngr %r1,%r0 # rem + algr %r9,%r9 + alcgr %r11,%r11 + xgr %r9,%r1 + lrvgr %r9,%r9 # flip byte order + lrvgr %r11,%r11 + srlg %r8,%r9,32 # smash the tweak to 4x32-bits + stg %r9,144-16+0(%r15) # save the 2nd tweak + llgfr %r9,%r9 + srlg %r10,%r11,32 + stg %r11,144-16+8(%r15) + llgfr %r11,%r11 + + x %r8,0(%r2) # tweak_the_2nd^=*(inp) + x %r9,4(%r2) + x %r10,8(%r2) + x %r11,12(%r2) + stmg %r2,%r3,2*8(%r15) + la %r4,0(%r5) + bras %r14,_s390x_AES_decrypt + lmg %r2,%r5,2*8(%r15) + x %r8,144-16+0(%r15) # ^=tweak_the_2nd + x %r9,144-16+4(%r15) + x %r10,144-16+8(%r15) + x %r11,144-16+12(%r15) + st %r8,0(%r4,%r2) + st %r9,4(%r4,%r2) + st %r10,8(%r4,%r2) + st %r11,12(%r4,%r2) + + la %r7,0(%r4,%r2) # put aside real %r4 +.Lxts_dec_steal: + llgc %r0,16(%r2) + llgc %r1,0(%r4,%r2) + stc %r0,0(%r4,%r2) + stc %r1,16(%r4,%r2) + la %r2,1(%r2) + brct %r3,.Lxts_dec_steal + la %r4,0(%r7) # restore real %r4 + + lm %r8,%r11,144(%r15) # load the 1st tweak + x %r8,0(%r4) # tweak^=*(inp)|stolen cipher-text + x %r9,4(%r4) + x %r10,8(%r4) + x %r11,12(%r4) + stg %r4,4*8(%r15) + la %r4,0(%r5) + bras %r14,_s390x_AES_decrypt + lg %r4,4*8(%r15) + x %r8,144+0(%r15) # ^=tweak + x %r9,144+4(%r15) + x %r10,144+8(%r15) + x %r11,144+12(%r15) + st %r8,0(%r4) + st %r9,4(%r4) + st %r10,8(%r4) + st %r11,12(%r4) + stg %r15,144-16+0(%r15) # wipe 2nd tweak + stg %r15,144-16+8(%r15) +.Lxts_dec_done: + stg %r15,144+0(%r15) # wipe tweak + stg %r15,144+8(%r15) + lmg %r6,%r14,6*8(%r15) + br %r14 +.size AES_xts_decrypt,.-AES_xts_decrypt +.string "AES for s390x, CRYPTOGAMS by " diff --git a/contrib/openssl-cmake/asm/crypto/aes/aes-x86_64.s b/contrib/openssl-cmake/asm/crypto/aes/aes-x86_64.s new file mode 100644 index 000000000000..b210921d7753 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/aes/aes-x86_64.s @@ -0,0 +1,2660 @@ +.text +.type _x86_64_AES_encrypt,@function +.align 16 +_x86_64_AES_encrypt: +.cfi_startproc + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + + movl 240(%r15),%r13d + subl $1,%r13d + jmp .Lenc_loop +.align 16 +.Lenc_loop: + + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movl 0(%r14,%rsi,8),%r10d + movl 0(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r12d + + movzbl %bh,%esi + movzbl %ch,%edi + movzbl %dl,%ebp + xorl 3(%r14,%rsi,8),%r10d + xorl 3(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r8d + + movzbl %dh,%esi + shrl $16,%ecx + movzbl %ah,%ebp + xorl 3(%r14,%rsi,8),%r12d + shrl $16,%edx + xorl 3(%r14,%rbp,8),%r8d + + shrl $16,%ebx + leaq 16(%r15),%r15 + shrl $16,%eax + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + xorl 2(%r14,%rsi,8),%r10d + xorl 2(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r12d + + movzbl %dh,%esi + movzbl %ah,%edi + movzbl %bl,%ebp + xorl 1(%r14,%rsi,8),%r10d + xorl 1(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r8d + + movl 12(%r15),%edx + movzbl %bh,%edi + movzbl %ch,%ebp + movl 0(%r15),%eax + xorl 1(%r14,%rdi,8),%r12d + xorl 1(%r14,%rbp,8),%r8d + + movl 4(%r15),%ebx + movl 8(%r15),%ecx + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + subl $1,%r13d + jnz .Lenc_loop + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movzbl 2(%r14,%rsi,8),%r10d + movzbl 2(%r14,%rdi,8),%r11d + movzbl 2(%r14,%rbp,8),%r12d + + movzbl %dl,%esi + movzbl %bh,%edi + movzbl %ch,%ebp + movzbl 2(%r14,%rsi,8),%r8d + movl 0(%r14,%rdi,8),%edi + movl 0(%r14,%rbp,8),%ebp + + andl $0x0000ff00,%edi + andl $0x0000ff00,%ebp + + xorl %edi,%r10d + xorl %ebp,%r11d + shrl $16,%ecx + + movzbl %dh,%esi + movzbl %ah,%edi + shrl $16,%edx + movl 0(%r14,%rsi,8),%esi + movl 0(%r14,%rdi,8),%edi + + andl $0x0000ff00,%esi + andl $0x0000ff00,%edi + shrl $16,%ebx + xorl %esi,%r12d + xorl %edi,%r8d + shrl $16,%eax + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + movl 0(%r14,%rsi,8),%esi + movl 0(%r14,%rdi,8),%edi + movl 0(%r14,%rbp,8),%ebp + + andl $0x00ff0000,%esi + andl $0x00ff0000,%edi + andl $0x00ff0000,%ebp + + xorl %esi,%r10d + xorl %edi,%r11d + xorl %ebp,%r12d + + movzbl %bl,%esi + movzbl %dh,%edi + movzbl %ah,%ebp + movl 0(%r14,%rsi,8),%esi + movl 2(%r14,%rdi,8),%edi + movl 2(%r14,%rbp,8),%ebp + + andl $0x00ff0000,%esi + andl $0xff000000,%edi + andl $0xff000000,%ebp + + xorl %esi,%r8d + xorl %edi,%r10d + xorl %ebp,%r11d + + movzbl %bh,%esi + movzbl %ch,%edi + movl 16+12(%r15),%edx + movl 2(%r14,%rsi,8),%esi + movl 2(%r14,%rdi,8),%edi + movl 16+0(%r15),%eax + + andl $0xff000000,%esi + andl $0xff000000,%edi + + xorl %esi,%r12d + xorl %edi,%r8d + + movl 16+4(%r15),%ebx + movl 16+8(%r15),%ecx + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx +.byte 0xf3,0xc3 +.cfi_endproc +.size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt +.type _x86_64_AES_encrypt_compact,@function +.align 16 +_x86_64_AES_encrypt_compact: +.cfi_startproc + leaq 128(%r14),%r8 + movl 0-128(%r8),%edi + movl 32-128(%r8),%ebp + movl 64-128(%r8),%r10d + movl 96-128(%r8),%r11d + movl 128-128(%r8),%edi + movl 160-128(%r8),%ebp + movl 192-128(%r8),%r10d + movl 224-128(%r8),%r11d + jmp .Lenc_loop_compact +.align 16 +.Lenc_loop_compact: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + leaq 16(%r15),%r15 + movzbl %al,%r10d + movzbl %bl,%r11d + movzbl %cl,%r12d + movzbl %dl,%r8d + movzbl %bh,%esi + movzbl %ch,%edi + shrl $16,%ecx + movzbl %dh,%ebp + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d + movzbl (%r14,%r8,1),%r8d + + movzbl (%r14,%rsi,1),%r9d + movzbl %ah,%esi + movzbl (%r14,%rdi,1),%r13d + movzbl %cl,%edi + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + + shll $8,%r9d + shrl $16,%edx + shll $8,%r13d + xorl %r9d,%r10d + shrl $16,%eax + movzbl %dl,%r9d + shrl $16,%ebx + xorl %r13d,%r11d + shll $8,%ebp + movzbl %al,%r13d + movzbl (%r14,%rdi,1),%edi + xorl %ebp,%r12d + + shll $8,%esi + movzbl %bl,%ebp + shll $16,%edi + xorl %esi,%r8d + movzbl (%r14,%r9,1),%r9d + movzbl %dh,%esi + movzbl (%r14,%r13,1),%r13d + xorl %edi,%r10d + + shrl $8,%ecx + movzbl %ah,%edi + shll $16,%r9d + shrl $8,%ebx + shll $16,%r13d + xorl %r9d,%r11d + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rcx,1),%edx + movzbl (%r14,%rbx,1),%ecx + + shll $16,%ebp + xorl %r13d,%r12d + shll $24,%esi + xorl %ebp,%r8d + shll $24,%edi + xorl %esi,%r10d + shll $24,%edx + xorl %edi,%r11d + shll $24,%ecx + movl %r10d,%eax + movl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + cmpq 16(%rsp),%r15 + je .Lenc_compact_done + movl $0x80808080,%r10d + movl $0x80808080,%r11d + andl %eax,%r10d + andl %ebx,%r11d + movl %r10d,%esi + movl %r11d,%edi + shrl $7,%r10d + leal (%rax,%rax,1),%r8d + shrl $7,%r11d + leal (%rbx,%rbx,1),%r9d + subl %r10d,%esi + subl %r11d,%edi + andl $0xfefefefe,%r8d + andl $0xfefefefe,%r9d + andl $0x1b1b1b1b,%esi + andl $0x1b1b1b1b,%edi + movl %eax,%r10d + movl %ebx,%r11d + xorl %esi,%r8d + xorl %edi,%r9d + + xorl %r8d,%eax + xorl %r9d,%ebx + movl $0x80808080,%r12d + roll $24,%eax + movl $0x80808080,%ebp + roll $24,%ebx + andl %ecx,%r12d + andl %edx,%ebp + xorl %r8d,%eax + xorl %r9d,%ebx + movl %r12d,%esi + rorl $16,%r10d + movl %ebp,%edi + rorl $16,%r11d + leal (%rcx,%rcx,1),%r8d + shrl $7,%r12d + xorl %r10d,%eax + shrl $7,%ebp + xorl %r11d,%ebx + rorl $8,%r10d + leal (%rdx,%rdx,1),%r9d + rorl $8,%r11d + subl %r12d,%esi + subl %ebp,%edi + xorl %r10d,%eax + xorl %r11d,%ebx + + andl $0xfefefefe,%r8d + andl $0xfefefefe,%r9d + andl $0x1b1b1b1b,%esi + andl $0x1b1b1b1b,%edi + movl %ecx,%r12d + movl %edx,%ebp + xorl %esi,%r8d + xorl %edi,%r9d + + rorl $16,%r12d + xorl %r8d,%ecx + rorl $16,%ebp + xorl %r9d,%edx + roll $24,%ecx + movl 0(%r14),%esi + roll $24,%edx + xorl %r8d,%ecx + movl 64(%r14),%edi + xorl %r9d,%edx + movl 128(%r14),%r8d + xorl %r12d,%ecx + rorl $8,%r12d + xorl %ebp,%edx + rorl $8,%ebp + xorl %r12d,%ecx + movl 192(%r14),%r9d + xorl %ebp,%edx + jmp .Lenc_loop_compact +.align 16 +.Lenc_compact_done: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx +.byte 0xf3,0xc3 +.cfi_endproc +.size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact +.globl AES_encrypt +.type AES_encrypt,@function +.align 16 +.globl asm_AES_encrypt +.hidden asm_AES_encrypt +asm_AES_encrypt: +AES_encrypt: +.cfi_startproc +.byte 243,15,30,250 + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + + + leaq -63(%rdx),%rcx + andq $-64,%rsp + subq %rsp,%rcx + negq %rcx + andq $0x3c0,%rcx + subq %rcx,%rsp + subq $32,%rsp + + movq %rsi,16(%rsp) + movq %rax,24(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x18,0x06,0x23,0x08 +.Lenc_prologue: + + movq %rdx,%r15 + movl 240(%r15),%r13d + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + + shll $4,%r13d + leaq (%r15,%r13,1),%rbp + movq %r15,(%rsp) + movq %rbp,8(%rsp) + + + leaq .LAES_Te+2048(%rip),%r14 + leaq 768(%rsp),%rbp + subq %r14,%rbp + andq $0x300,%rbp + leaq (%r14,%rbp,1),%r14 + + call _x86_64_AES_encrypt_compact + + movq 16(%rsp),%r9 + movq 24(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lenc_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size AES_encrypt,.-AES_encrypt +.type _x86_64_AES_decrypt,@function +.align 16 +_x86_64_AES_decrypt: +.cfi_startproc + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + + movl 240(%r15),%r13d + subl $1,%r13d + jmp .Ldec_loop +.align 16 +.Ldec_loop: + + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movl 0(%r14,%rsi,8),%r10d + movl 0(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r12d + + movzbl %dh,%esi + movzbl %ah,%edi + movzbl %dl,%ebp + xorl 3(%r14,%rsi,8),%r10d + xorl 3(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r8d + + movzbl %bh,%esi + shrl $16,%eax + movzbl %ch,%ebp + xorl 3(%r14,%rsi,8),%r12d + shrl $16,%edx + xorl 3(%r14,%rbp,8),%r8d + + shrl $16,%ebx + leaq 16(%r15),%r15 + shrl $16,%ecx + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + xorl 2(%r14,%rsi,8),%r10d + xorl 2(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r12d + + movzbl %bh,%esi + movzbl %ch,%edi + movzbl %bl,%ebp + xorl 1(%r14,%rsi,8),%r10d + xorl 1(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r8d + + movzbl %dh,%esi + movl 12(%r15),%edx + movzbl %ah,%ebp + xorl 1(%r14,%rsi,8),%r12d + movl 0(%r15),%eax + xorl 1(%r14,%rbp,8),%r8d + + xorl %r10d,%eax + movl 4(%r15),%ebx + movl 8(%r15),%ecx + xorl %r12d,%ecx + xorl %r11d,%ebx + xorl %r8d,%edx + subl $1,%r13d + jnz .Ldec_loop + leaq 2048(%r14),%r14 + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movzbl (%r14,%rsi,1),%r10d + movzbl (%r14,%rdi,1),%r11d + movzbl (%r14,%rbp,1),%r12d + + movzbl %dl,%esi + movzbl %dh,%edi + movzbl %ah,%ebp + movzbl (%r14,%rsi,1),%r8d + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $8,%edi + shll $8,%ebp + + xorl %edi,%r10d + xorl %ebp,%r11d + shrl $16,%edx + + movzbl %bh,%esi + movzbl %ch,%edi + shrl $16,%eax + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + + shll $8,%esi + shll $8,%edi + shrl $16,%ebx + xorl %esi,%r12d + xorl %edi,%r8d + shrl $16,%ecx + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $16,%esi + shll $16,%edi + shll $16,%ebp + + xorl %esi,%r10d + xorl %edi,%r11d + xorl %ebp,%r12d + + movzbl %bl,%esi + movzbl %bh,%edi + movzbl %ch,%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $16,%esi + shll $24,%edi + shll $24,%ebp + + xorl %esi,%r8d + xorl %edi,%r10d + xorl %ebp,%r11d + + movzbl %dh,%esi + movzbl %ah,%edi + movl 16+12(%r15),%edx + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movl 16+0(%r15),%eax + + shll $24,%esi + shll $24,%edi + + xorl %esi,%r12d + xorl %edi,%r8d + + movl 16+4(%r15),%ebx + movl 16+8(%r15),%ecx + leaq -2048(%r14),%r14 + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx +.byte 0xf3,0xc3 +.cfi_endproc +.size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt +.type _x86_64_AES_decrypt_compact,@function +.align 16 +_x86_64_AES_decrypt_compact: +.cfi_startproc + leaq 128(%r14),%r8 + movl 0-128(%r8),%edi + movl 32-128(%r8),%ebp + movl 64-128(%r8),%r10d + movl 96-128(%r8),%r11d + movl 128-128(%r8),%edi + movl 160-128(%r8),%ebp + movl 192-128(%r8),%r10d + movl 224-128(%r8),%r11d + jmp .Ldec_loop_compact + +.align 16 +.Ldec_loop_compact: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + leaq 16(%r15),%r15 + movzbl %al,%r10d + movzbl %bl,%r11d + movzbl %cl,%r12d + movzbl %dl,%r8d + movzbl %dh,%esi + movzbl %ah,%edi + shrl $16,%edx + movzbl %bh,%ebp + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d + movzbl (%r14,%r8,1),%r8d + + movzbl (%r14,%rsi,1),%r9d + movzbl %ch,%esi + movzbl (%r14,%rdi,1),%r13d + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + + shrl $16,%ecx + shll $8,%r13d + shll $8,%r9d + movzbl %cl,%edi + shrl $16,%eax + xorl %r9d,%r10d + shrl $16,%ebx + movzbl %dl,%r9d + + shll $8,%ebp + xorl %r13d,%r11d + shll $8,%esi + movzbl %al,%r13d + movzbl (%r14,%rdi,1),%edi + xorl %ebp,%r12d + movzbl %bl,%ebp + + shll $16,%edi + xorl %esi,%r8d + movzbl (%r14,%r9,1),%r9d + movzbl %bh,%esi + movzbl (%r14,%rbp,1),%ebp + xorl %edi,%r10d + movzbl (%r14,%r13,1),%r13d + movzbl %ch,%edi + + shll $16,%ebp + shll $16,%r9d + shll $16,%r13d + xorl %ebp,%r8d + movzbl %dh,%ebp + xorl %r9d,%r11d + shrl $8,%eax + xorl %r13d,%r12d + + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%ebx + movzbl (%r14,%rbp,1),%ecx + movzbl (%r14,%rax,1),%edx + + movl %r10d,%eax + shll $24,%esi + shll $24,%ebx + shll $24,%ecx + xorl %esi,%eax + shll $24,%edx + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + cmpq 16(%rsp),%r15 + je .Ldec_compact_done + + movq 256+0(%r14),%rsi + shlq $32,%rbx + shlq $32,%rdx + movq 256+8(%r14),%rdi + orq %rbx,%rax + orq %rdx,%rcx + movq 256+16(%r14),%rbp + movq %rsi,%r9 + movq %rsi,%r12 + andq %rax,%r9 + andq %rcx,%r12 + movq %r9,%rbx + movq %r12,%rdx + shrq $7,%r9 + leaq (%rax,%rax,1),%r8 + shrq $7,%r12 + leaq (%rcx,%rcx,1),%r11 + subq %r9,%rbx + subq %r12,%rdx + andq %rdi,%r8 + andq %rdi,%r11 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r8 + xorq %rdx,%r11 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r8,%r10 + andq %r11,%r13 + movq %r10,%rbx + movq %r13,%rdx + shrq $7,%r10 + leaq (%r8,%r8,1),%r9 + shrq $7,%r13 + leaq (%r11,%r11,1),%r12 + subq %r10,%rbx + subq %r13,%rdx + andq %rdi,%r9 + andq %rdi,%r12 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r9 + xorq %rdx,%r12 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r9,%r10 + andq %r12,%r13 + movq %r10,%rbx + movq %r13,%rdx + shrq $7,%r10 + xorq %rax,%r8 + shrq $7,%r13 + xorq %rcx,%r11 + subq %r10,%rbx + subq %r13,%rdx + leaq (%r9,%r9,1),%r10 + leaq (%r12,%r12,1),%r13 + xorq %rax,%r9 + xorq %rcx,%r12 + andq %rdi,%r10 + andq %rdi,%r13 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r10 + xorq %rdx,%r13 + + xorq %r10,%rax + xorq %r13,%rcx + xorq %r10,%r8 + xorq %r13,%r11 + movq %rax,%rbx + movq %rcx,%rdx + xorq %r10,%r9 + shrq $32,%rbx + xorq %r13,%r12 + shrq $32,%rdx + xorq %r8,%r10 + roll $8,%eax + xorq %r11,%r13 + roll $8,%ecx + xorq %r9,%r10 + roll $8,%ebx + xorq %r12,%r13 + + roll $8,%edx + xorl %r10d,%eax + shrq $32,%r10 + xorl %r13d,%ecx + shrq $32,%r13 + xorl %r10d,%ebx + xorl %r13d,%edx + + movq %r8,%r10 + roll $24,%r8d + movq %r11,%r13 + roll $24,%r11d + shrq $32,%r10 + xorl %r8d,%eax + shrq $32,%r13 + xorl %r11d,%ecx + roll $24,%r10d + movq %r9,%r8 + roll $24,%r13d + movq %r12,%r11 + shrq $32,%r8 + xorl %r10d,%ebx + shrq $32,%r11 + xorl %r13d,%edx + + movq 0(%r14),%rsi + roll $16,%r9d + movq 64(%r14),%rdi + roll $16,%r12d + movq 128(%r14),%rbp + roll $16,%r8d + movq 192(%r14),%r10 + xorl %r9d,%eax + roll $16,%r11d + xorl %r12d,%ecx + movq 256(%r14),%r13 + xorl %r8d,%ebx + xorl %r11d,%edx + jmp .Ldec_loop_compact +.align 16 +.Ldec_compact_done: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx +.byte 0xf3,0xc3 +.cfi_endproc +.size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact +.globl AES_decrypt +.type AES_decrypt,@function +.align 16 +.globl asm_AES_decrypt +.hidden asm_AES_decrypt +asm_AES_decrypt: +AES_decrypt: +.cfi_startproc +.byte 243,15,30,250 + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + + + leaq -63(%rdx),%rcx + andq $-64,%rsp + subq %rsp,%rcx + negq %rcx + andq $0x3c0,%rcx + subq %rcx,%rsp + subq $32,%rsp + + movq %rsi,16(%rsp) + movq %rax,24(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x18,0x06,0x23,0x08 +.Ldec_prologue: + + movq %rdx,%r15 + movl 240(%r15),%r13d + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + + shll $4,%r13d + leaq (%r15,%r13,1),%rbp + movq %r15,(%rsp) + movq %rbp,8(%rsp) + + + leaq .LAES_Td+2048(%rip),%r14 + leaq 768(%rsp),%rbp + subq %r14,%rbp + andq $0x300,%rbp + leaq (%r14,%rbp,1),%r14 + shrq $3,%rbp + addq %rbp,%r14 + + call _x86_64_AES_decrypt_compact + + movq 16(%rsp),%r9 + movq 24(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Ldec_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size AES_decrypt,.-AES_decrypt +.globl AES_set_encrypt_key +.type AES_set_encrypt_key,@function +.align 16 +AES_set_encrypt_key: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $8,%rsp +.cfi_adjust_cfa_offset 8 +.Lenc_key_prologue: + + call _x86_64_AES_set_encrypt_key + + movq 40(%rsp),%rbp +.cfi_restore %rbp + movq 48(%rsp),%rbx +.cfi_restore %rbx + addq $56,%rsp +.cfi_adjust_cfa_offset -56 +.Lenc_key_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size AES_set_encrypt_key,.-AES_set_encrypt_key + +.type _x86_64_AES_set_encrypt_key,@function +.align 16 +_x86_64_AES_set_encrypt_key: +.cfi_startproc + movl %esi,%ecx + movq %rdi,%rsi + movq %rdx,%rdi + + testq $-1,%rsi + jz .Lbadpointer + testq $-1,%rdi + jz .Lbadpointer + + leaq .LAES_Te(%rip),%rbp + leaq 2048+128(%rbp),%rbp + + + movl 0-128(%rbp),%eax + movl 32-128(%rbp),%ebx + movl 64-128(%rbp),%r8d + movl 96-128(%rbp),%edx + movl 128-128(%rbp),%eax + movl 160-128(%rbp),%ebx + movl 192-128(%rbp),%r8d + movl 224-128(%rbp),%edx + + cmpl $128,%ecx + je .L10rounds + cmpl $192,%ecx + je .L12rounds + cmpl $256,%ecx + je .L14rounds + movq $-2,%rax + jmp .Lexit + +.L10rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rdx + movq %rax,0(%rdi) + movq %rdx,8(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp .L10shortcut +.align 4 +.L10loop: + movl 0(%rdi),%eax + movl 12(%rdi),%edx +.L10shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,16(%rdi) + xorl 4(%rdi),%eax + movl %eax,20(%rdi) + xorl 8(%rdi),%eax + movl %eax,24(%rdi) + xorl 12(%rdi),%eax + movl %eax,28(%rdi) + addl $1,%ecx + leaq 16(%rdi),%rdi + cmpl $10,%ecx + jl .L10loop + + movl $10,80(%rdi) + xorq %rax,%rax + jmp .Lexit + +.L12rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 16(%rsi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rdx,16(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp .L12shortcut +.align 4 +.L12loop: + movl 0(%rdi),%eax + movl 20(%rdi),%edx +.L12shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,24(%rdi) + xorl 4(%rdi),%eax + movl %eax,28(%rdi) + xorl 8(%rdi),%eax + movl %eax,32(%rdi) + xorl 12(%rdi),%eax + movl %eax,36(%rdi) + + cmpl $7,%ecx + je .L12break + addl $1,%ecx + + xorl 16(%rdi),%eax + movl %eax,40(%rdi) + xorl 20(%rdi),%eax + movl %eax,44(%rdi) + + leaq 24(%rdi),%rdi + jmp .L12loop +.L12break: + movl $12,72(%rdi) + xorq %rax,%rax + jmp .Lexit + +.L14rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 16(%rsi),%rcx + movq 24(%rsi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp .L14shortcut +.align 4 +.L14loop: + movl 0(%rdi),%eax + movl 28(%rdi),%edx +.L14shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,32(%rdi) + xorl 4(%rdi),%eax + movl %eax,36(%rdi) + xorl 8(%rdi),%eax + movl %eax,40(%rdi) + xorl 12(%rdi),%eax + movl %eax,44(%rdi) + + cmpl $6,%ecx + je .L14break + addl $1,%ecx + + movl %eax,%edx + movl 16(%rdi),%eax + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + shll $8,%ebx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $16,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $24,%ebx + xorl %ebx,%eax + + movl %eax,48(%rdi) + xorl 20(%rdi),%eax + movl %eax,52(%rdi) + xorl 24(%rdi),%eax + movl %eax,56(%rdi) + xorl 28(%rdi),%eax + movl %eax,60(%rdi) + + leaq 32(%rdi),%rdi + jmp .L14loop +.L14break: + movl $14,48(%rdi) + xorq %rax,%rax + jmp .Lexit + +.Lbadpointer: + movq $-1,%rax +.Lexit: +.byte 0xf3,0xc3 +.cfi_endproc +.size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key +.globl AES_set_decrypt_key +.type AES_set_decrypt_key,@function +.align 16 +AES_set_decrypt_key: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rdx +.cfi_adjust_cfa_offset 8 +.Ldec_key_prologue: + + call _x86_64_AES_set_encrypt_key + movq (%rsp),%r8 + cmpl $0,%eax + jne .Labort + + movl 240(%r8),%r14d + xorq %rdi,%rdi + leaq (%rdi,%r14,4),%rcx + movq %r8,%rsi + leaq (%r8,%rcx,4),%rdi +.align 4 +.Linvert: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 0(%rdi),%rcx + movq 8(%rdi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,0(%rsi) + movq %rdx,8(%rsi) + leaq 16(%rsi),%rsi + leaq -16(%rdi),%rdi + cmpq %rsi,%rdi + jne .Linvert + + leaq .LAES_Te+2048+1024(%rip),%rax + + movq 40(%rax),%rsi + movq 48(%rax),%rdi + movq 56(%rax),%rbp + + movq %r8,%r15 + subl $1,%r14d +.align 4 +.Lpermute: + leaq 16(%r15),%r15 + movq 0(%r15),%rax + movq 8(%r15),%rcx + movq %rsi,%r9 + movq %rsi,%r12 + andq %rax,%r9 + andq %rcx,%r12 + movq %r9,%rbx + movq %r12,%rdx + shrq $7,%r9 + leaq (%rax,%rax,1),%r8 + shrq $7,%r12 + leaq (%rcx,%rcx,1),%r11 + subq %r9,%rbx + subq %r12,%rdx + andq %rdi,%r8 + andq %rdi,%r11 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r8 + xorq %rdx,%r11 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r8,%r10 + andq %r11,%r13 + movq %r10,%rbx + movq %r13,%rdx + shrq $7,%r10 + leaq (%r8,%r8,1),%r9 + shrq $7,%r13 + leaq (%r11,%r11,1),%r12 + subq %r10,%rbx + subq %r13,%rdx + andq %rdi,%r9 + andq %rdi,%r12 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r9 + xorq %rdx,%r12 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r9,%r10 + andq %r12,%r13 + movq %r10,%rbx + movq %r13,%rdx + shrq $7,%r10 + xorq %rax,%r8 + shrq $7,%r13 + xorq %rcx,%r11 + subq %r10,%rbx + subq %r13,%rdx + leaq (%r9,%r9,1),%r10 + leaq (%r12,%r12,1),%r13 + xorq %rax,%r9 + xorq %rcx,%r12 + andq %rdi,%r10 + andq %rdi,%r13 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r10 + xorq %rdx,%r13 + + xorq %r10,%rax + xorq %r13,%rcx + xorq %r10,%r8 + xorq %r13,%r11 + movq %rax,%rbx + movq %rcx,%rdx + xorq %r10,%r9 + shrq $32,%rbx + xorq %r13,%r12 + shrq $32,%rdx + xorq %r8,%r10 + roll $8,%eax + xorq %r11,%r13 + roll $8,%ecx + xorq %r9,%r10 + roll $8,%ebx + xorq %r12,%r13 + + roll $8,%edx + xorl %r10d,%eax + shrq $32,%r10 + xorl %r13d,%ecx + shrq $32,%r13 + xorl %r10d,%ebx + xorl %r13d,%edx + + movq %r8,%r10 + roll $24,%r8d + movq %r11,%r13 + roll $24,%r11d + shrq $32,%r10 + xorl %r8d,%eax + shrq $32,%r13 + xorl %r11d,%ecx + roll $24,%r10d + movq %r9,%r8 + roll $24,%r13d + movq %r12,%r11 + shrq $32,%r8 + xorl %r10d,%ebx + shrq $32,%r11 + xorl %r13d,%edx + + + roll $16,%r9d + + roll $16,%r12d + + roll $16,%r8d + + xorl %r9d,%eax + roll $16,%r11d + xorl %r12d,%ecx + + xorl %r8d,%ebx + xorl %r11d,%edx + movl %eax,0(%r15) + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + subl $1,%r14d + jnz .Lpermute + + xorq %rax,%rax +.Labort: + movq 8(%rsp),%r15 +.cfi_restore %r15 + movq 16(%rsp),%r14 +.cfi_restore %r14 + movq 24(%rsp),%r13 +.cfi_restore %r13 + movq 32(%rsp),%r12 +.cfi_restore %r12 + movq 40(%rsp),%rbp +.cfi_restore %rbp + movq 48(%rsp),%rbx +.cfi_restore %rbx + addq $56,%rsp +.cfi_adjust_cfa_offset -56 +.Ldec_key_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size AES_set_decrypt_key,.-AES_set_decrypt_key +.globl AES_cbc_encrypt +.type AES_cbc_encrypt,@function +.align 16 + +.globl asm_AES_cbc_encrypt +.hidden asm_AES_cbc_encrypt +asm_AES_cbc_encrypt: +AES_cbc_encrypt: +.cfi_startproc +.byte 243,15,30,250 + cmpq $0,%rdx + je .Lcbc_epilogue + pushfq + + +.cfi_adjust_cfa_offset 8 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-32 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-40 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-48 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-56 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-64 +.Lcbc_prologue: + + cld + movl %r9d,%r9d + + leaq .LAES_Te(%rip),%r14 + leaq .LAES_Td(%rip),%r10 + cmpq $0,%r9 + cmoveq %r10,%r14 + +.cfi_remember_state + movl OPENSSL_ia32cap_P(%rip),%r10d + cmpq $512,%rdx + jb .Lcbc_slow_prologue + testq $15,%rdx + jnz .Lcbc_slow_prologue + btl $28,%r10d + jc .Lcbc_slow_prologue + + + leaq -88-248(%rsp),%r15 + andq $-64,%r15 + + + movq %r14,%r10 + leaq 2304(%r14),%r11 + movq %r15,%r12 + andq $0xFFF,%r10 + andq $0xFFF,%r11 + andq $0xFFF,%r12 + + cmpq %r11,%r12 + jb .Lcbc_te_break_out + subq %r11,%r12 + subq %r12,%r15 + jmp .Lcbc_te_ok +.Lcbc_te_break_out: + subq %r10,%r12 + andq $0xFFF,%r12 + addq $320,%r12 + subq %r12,%r15 +.align 4 +.Lcbc_te_ok: + + xchgq %rsp,%r15 +.cfi_def_cfa_register %r15 + + movq %r15,16(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x40 +.Lcbc_fast_body: + movq %rdi,24(%rsp) + movq %rsi,32(%rsp) + movq %rdx,40(%rsp) + movq %rcx,48(%rsp) + movq %r8,56(%rsp) + movl $0,80+240(%rsp) + movq %r8,%rbp + movq %r9,%rbx + movq %rsi,%r9 + movq %rdi,%r8 + movq %rcx,%r15 + + movl 240(%r15),%eax + + movq %r15,%r10 + subq %r14,%r10 + andq $0xfff,%r10 + cmpq $2304,%r10 + jb .Lcbc_do_ecopy + cmpq $4096-248,%r10 + jb .Lcbc_skip_ecopy +.align 4 +.Lcbc_do_ecopy: + movq %r15,%rsi + leaq 80(%rsp),%rdi + leaq 80(%rsp),%r15 + movl $30,%ecx +.long 0x90A548F3 + movl %eax,(%rdi) +.Lcbc_skip_ecopy: + movq %r15,0(%rsp) + + movl $18,%ecx +.align 4 +.Lcbc_prefetch_te: + movq 0(%r14),%r10 + movq 32(%r14),%r11 + movq 64(%r14),%r12 + movq 96(%r14),%r13 + leaq 128(%r14),%r14 + subl $1,%ecx + jnz .Lcbc_prefetch_te + leaq -2304(%r14),%r14 + + cmpq $0,%rbx + je .LFAST_DECRYPT + + + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + +.align 4 +.Lcbc_fast_enc_loop: + xorl 0(%r8),%eax + xorl 4(%r8),%ebx + xorl 8(%r8),%ecx + xorl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_encrypt + + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + subq $16,%r10 + testq $-16,%r10 + movq %r10,40(%rsp) + jnz .Lcbc_fast_enc_loop + movq 56(%rsp),%rbp + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + jmp .Lcbc_fast_cleanup + + +.align 16 +.LFAST_DECRYPT: + cmpq %r8,%r9 + je .Lcbc_fast_dec_in_place + + movq %rbp,64(%rsp) +.align 4 +.Lcbc_fast_dec_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_decrypt + + movq 64(%rsp),%rbp + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + xorl 0(%rbp),%eax + xorl 4(%rbp),%ebx + xorl 8(%rbp),%ecx + xorl 12(%rbp),%edx + movq %r8,%rbp + + subq $16,%r10 + movq %r10,40(%rsp) + movq %rbp,64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + jnz .Lcbc_fast_dec_loop + movq 56(%rsp),%r12 + movq 0(%rbp),%r10 + movq 8(%rbp),%r11 + movq %r10,0(%r12) + movq %r11,8(%r12) + jmp .Lcbc_fast_cleanup + +.align 16 +.Lcbc_fast_dec_in_place: + movq 0(%rbp),%r10 + movq 8(%rbp),%r11 + movq %r10,0+64(%rsp) + movq %r11,8+64(%rsp) +.align 4 +.Lcbc_fast_dec_in_place_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_decrypt + + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + xorl 0+64(%rsp),%eax + xorl 4+64(%rsp),%ebx + xorl 8+64(%rsp),%ecx + xorl 12+64(%rsp),%edx + + movq 0(%r8),%r11 + movq 8(%r8),%r12 + subq $16,%r10 + jz .Lcbc_fast_dec_in_place_done + + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + movq %r10,40(%rsp) + jmp .Lcbc_fast_dec_in_place_loop +.Lcbc_fast_dec_in_place_done: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + +.align 4 +.Lcbc_fast_cleanup: + cmpl $0,80+240(%rsp) + leaq 80(%rsp),%rdi + je .Lcbc_exit + movl $30,%ecx + xorq %rax,%rax +.long 0x90AB48F3 + + jmp .Lcbc_exit + + +.align 16 +.Lcbc_slow_prologue: +.cfi_restore_state + + leaq -88(%rsp),%rbp + andq $-64,%rbp + + leaq -88-63(%rcx),%r10 + subq %rbp,%r10 + negq %r10 + andq $0x3c0,%r10 + subq %r10,%rbp + + xchgq %rsp,%rbp +.cfi_def_cfa_register %rbp + + movq %rbp,16(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x40 +.Lcbc_slow_body: + + + + + movq %r8,56(%rsp) + movq %r8,%rbp + movq %r9,%rbx + movq %rsi,%r9 + movq %rdi,%r8 + movq %rcx,%r15 + movq %rdx,%r10 + + movl 240(%r15),%eax + movq %r15,0(%rsp) + shll $4,%eax + leaq (%r15,%rax,1),%rax + movq %rax,8(%rsp) + + + leaq 2048(%r14),%r14 + leaq 768-8(%rsp),%rax + subq %r14,%rax + andq $0x300,%rax + leaq (%r14,%rax,1),%r14 + + cmpq $0,%rbx + je .LSLOW_DECRYPT + + + testq $-16,%r10 + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + jz .Lcbc_slow_enc_tail + +.align 4 +.Lcbc_slow_enc_loop: + xorl 0(%r8),%eax + xorl 4(%r8),%ebx + xorl 8(%r8),%ecx + xorl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + movq %r9,32(%rsp) + movq %r10,40(%rsp) + + call _x86_64_AES_encrypt_compact + + movq 24(%rsp),%r8 + movq 32(%rsp),%r9 + movq 40(%rsp),%r10 + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + subq $16,%r10 + testq $-16,%r10 + jnz .Lcbc_slow_enc_loop + testq $15,%r10 + jnz .Lcbc_slow_enc_tail + movq 56(%rsp),%rbp + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + jmp .Lcbc_exit + +.align 4 +.Lcbc_slow_enc_tail: + movq %rax,%r11 + movq %rcx,%r12 + movq %r10,%rcx + movq %r8,%rsi + movq %r9,%rdi +.long 0x9066A4F3 + movq $16,%rcx + subq %r10,%rcx + xorq %rax,%rax +.long 0x9066AAF3 + movq %r9,%r8 + movq $16,%r10 + movq %r11,%rax + movq %r12,%rcx + jmp .Lcbc_slow_enc_loop + +.align 16 +.LSLOW_DECRYPT: + shrq $3,%rax + addq %rax,%r14 + + movq 0(%rbp),%r11 + movq 8(%rbp),%r12 + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + +.align 4 +.Lcbc_slow_dec_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + movq %r9,32(%rsp) + movq %r10,40(%rsp) + + call _x86_64_AES_decrypt_compact + + movq 24(%rsp),%r8 + movq 32(%rsp),%r9 + movq 40(%rsp),%r10 + xorl 0+64(%rsp),%eax + xorl 4+64(%rsp),%ebx + xorl 8+64(%rsp),%ecx + xorl 12+64(%rsp),%edx + + movq 0(%r8),%r11 + movq 8(%r8),%r12 + subq $16,%r10 + jc .Lcbc_slow_dec_partial + jz .Lcbc_slow_dec_done + + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + jmp .Lcbc_slow_dec_loop +.Lcbc_slow_dec_done: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + jmp .Lcbc_exit + +.align 4 +.Lcbc_slow_dec_partial: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0+64(%rsp) + movl %ebx,4+64(%rsp) + movl %ecx,8+64(%rsp) + movl %edx,12+64(%rsp) + + movq %r9,%rdi + leaq 64(%rsp),%rsi + leaq 16(%r10),%rcx +.long 0x9066A4F3 + jmp .Lcbc_exit + +.align 16 +.Lcbc_exit: + movq 16(%rsp),%rsi +.cfi_def_cfa %rsi,64 + movq (%rsi),%r15 +.cfi_restore %r15 + movq 8(%rsi),%r14 +.cfi_restore %r14 + movq 16(%rsi),%r13 +.cfi_restore %r13 + movq 24(%rsi),%r12 +.cfi_restore %r12 + movq 32(%rsi),%rbp +.cfi_restore %rbp + movq 40(%rsi),%rbx +.cfi_restore %rbx + leaq 48(%rsi),%rsp +.cfi_def_cfa %rsp,16 +.Lcbc_popfq: + popfq + + +.cfi_adjust_cfa_offset -8 +.Lcbc_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size AES_cbc_encrypt,.-AES_cbc_encrypt +.section .rodata +.align 64 +.LAES_Te: +.long 0xa56363c6,0xa56363c6 +.long 0x847c7cf8,0x847c7cf8 +.long 0x997777ee,0x997777ee +.long 0x8d7b7bf6,0x8d7b7bf6 +.long 0x0df2f2ff,0x0df2f2ff +.long 0xbd6b6bd6,0xbd6b6bd6 +.long 0xb16f6fde,0xb16f6fde +.long 0x54c5c591,0x54c5c591 +.long 0x50303060,0x50303060 +.long 0x03010102,0x03010102 +.long 0xa96767ce,0xa96767ce +.long 0x7d2b2b56,0x7d2b2b56 +.long 0x19fefee7,0x19fefee7 +.long 0x62d7d7b5,0x62d7d7b5 +.long 0xe6abab4d,0xe6abab4d +.long 0x9a7676ec,0x9a7676ec +.long 0x45caca8f,0x45caca8f +.long 0x9d82821f,0x9d82821f +.long 0x40c9c989,0x40c9c989 +.long 0x877d7dfa,0x877d7dfa +.long 0x15fafaef,0x15fafaef +.long 0xeb5959b2,0xeb5959b2 +.long 0xc947478e,0xc947478e +.long 0x0bf0f0fb,0x0bf0f0fb +.long 0xecadad41,0xecadad41 +.long 0x67d4d4b3,0x67d4d4b3 +.long 0xfda2a25f,0xfda2a25f +.long 0xeaafaf45,0xeaafaf45 +.long 0xbf9c9c23,0xbf9c9c23 +.long 0xf7a4a453,0xf7a4a453 +.long 0x967272e4,0x967272e4 +.long 0x5bc0c09b,0x5bc0c09b +.long 0xc2b7b775,0xc2b7b775 +.long 0x1cfdfde1,0x1cfdfde1 +.long 0xae93933d,0xae93933d +.long 0x6a26264c,0x6a26264c +.long 0x5a36366c,0x5a36366c +.long 0x413f3f7e,0x413f3f7e +.long 0x02f7f7f5,0x02f7f7f5 +.long 0x4fcccc83,0x4fcccc83 +.long 0x5c343468,0x5c343468 +.long 0xf4a5a551,0xf4a5a551 +.long 0x34e5e5d1,0x34e5e5d1 +.long 0x08f1f1f9,0x08f1f1f9 +.long 0x937171e2,0x937171e2 +.long 0x73d8d8ab,0x73d8d8ab +.long 0x53313162,0x53313162 +.long 0x3f15152a,0x3f15152a +.long 0x0c040408,0x0c040408 +.long 0x52c7c795,0x52c7c795 +.long 0x65232346,0x65232346 +.long 0x5ec3c39d,0x5ec3c39d +.long 0x28181830,0x28181830 +.long 0xa1969637,0xa1969637 +.long 0x0f05050a,0x0f05050a +.long 0xb59a9a2f,0xb59a9a2f +.long 0x0907070e,0x0907070e +.long 0x36121224,0x36121224 +.long 0x9b80801b,0x9b80801b +.long 0x3de2e2df,0x3de2e2df +.long 0x26ebebcd,0x26ebebcd +.long 0x6927274e,0x6927274e +.long 0xcdb2b27f,0xcdb2b27f +.long 0x9f7575ea,0x9f7575ea +.long 0x1b090912,0x1b090912 +.long 0x9e83831d,0x9e83831d +.long 0x742c2c58,0x742c2c58 +.long 0x2e1a1a34,0x2e1a1a34 +.long 0x2d1b1b36,0x2d1b1b36 +.long 0xb26e6edc,0xb26e6edc +.long 0xee5a5ab4,0xee5a5ab4 +.long 0xfba0a05b,0xfba0a05b +.long 0xf65252a4,0xf65252a4 +.long 0x4d3b3b76,0x4d3b3b76 +.long 0x61d6d6b7,0x61d6d6b7 +.long 0xceb3b37d,0xceb3b37d +.long 0x7b292952,0x7b292952 +.long 0x3ee3e3dd,0x3ee3e3dd +.long 0x712f2f5e,0x712f2f5e +.long 0x97848413,0x97848413 +.long 0xf55353a6,0xf55353a6 +.long 0x68d1d1b9,0x68d1d1b9 +.long 0x00000000,0x00000000 +.long 0x2cededc1,0x2cededc1 +.long 0x60202040,0x60202040 +.long 0x1ffcfce3,0x1ffcfce3 +.long 0xc8b1b179,0xc8b1b179 +.long 0xed5b5bb6,0xed5b5bb6 +.long 0xbe6a6ad4,0xbe6a6ad4 +.long 0x46cbcb8d,0x46cbcb8d +.long 0xd9bebe67,0xd9bebe67 +.long 0x4b393972,0x4b393972 +.long 0xde4a4a94,0xde4a4a94 +.long 0xd44c4c98,0xd44c4c98 +.long 0xe85858b0,0xe85858b0 +.long 0x4acfcf85,0x4acfcf85 +.long 0x6bd0d0bb,0x6bd0d0bb +.long 0x2aefefc5,0x2aefefc5 +.long 0xe5aaaa4f,0xe5aaaa4f +.long 0x16fbfbed,0x16fbfbed +.long 0xc5434386,0xc5434386 +.long 0xd74d4d9a,0xd74d4d9a +.long 0x55333366,0x55333366 +.long 0x94858511,0x94858511 +.long 0xcf45458a,0xcf45458a +.long 0x10f9f9e9,0x10f9f9e9 +.long 0x06020204,0x06020204 +.long 0x817f7ffe,0x817f7ffe +.long 0xf05050a0,0xf05050a0 +.long 0x443c3c78,0x443c3c78 +.long 0xba9f9f25,0xba9f9f25 +.long 0xe3a8a84b,0xe3a8a84b +.long 0xf35151a2,0xf35151a2 +.long 0xfea3a35d,0xfea3a35d +.long 0xc0404080,0xc0404080 +.long 0x8a8f8f05,0x8a8f8f05 +.long 0xad92923f,0xad92923f +.long 0xbc9d9d21,0xbc9d9d21 +.long 0x48383870,0x48383870 +.long 0x04f5f5f1,0x04f5f5f1 +.long 0xdfbcbc63,0xdfbcbc63 +.long 0xc1b6b677,0xc1b6b677 +.long 0x75dadaaf,0x75dadaaf +.long 0x63212142,0x63212142 +.long 0x30101020,0x30101020 +.long 0x1affffe5,0x1affffe5 +.long 0x0ef3f3fd,0x0ef3f3fd +.long 0x6dd2d2bf,0x6dd2d2bf +.long 0x4ccdcd81,0x4ccdcd81 +.long 0x140c0c18,0x140c0c18 +.long 0x35131326,0x35131326 +.long 0x2fececc3,0x2fececc3 +.long 0xe15f5fbe,0xe15f5fbe +.long 0xa2979735,0xa2979735 +.long 0xcc444488,0xcc444488 +.long 0x3917172e,0x3917172e +.long 0x57c4c493,0x57c4c493 +.long 0xf2a7a755,0xf2a7a755 +.long 0x827e7efc,0x827e7efc +.long 0x473d3d7a,0x473d3d7a +.long 0xac6464c8,0xac6464c8 +.long 0xe75d5dba,0xe75d5dba +.long 0x2b191932,0x2b191932 +.long 0x957373e6,0x957373e6 +.long 0xa06060c0,0xa06060c0 +.long 0x98818119,0x98818119 +.long 0xd14f4f9e,0xd14f4f9e +.long 0x7fdcdca3,0x7fdcdca3 +.long 0x66222244,0x66222244 +.long 0x7e2a2a54,0x7e2a2a54 +.long 0xab90903b,0xab90903b +.long 0x8388880b,0x8388880b +.long 0xca46468c,0xca46468c +.long 0x29eeeec7,0x29eeeec7 +.long 0xd3b8b86b,0xd3b8b86b +.long 0x3c141428,0x3c141428 +.long 0x79dedea7,0x79dedea7 +.long 0xe25e5ebc,0xe25e5ebc +.long 0x1d0b0b16,0x1d0b0b16 +.long 0x76dbdbad,0x76dbdbad +.long 0x3be0e0db,0x3be0e0db +.long 0x56323264,0x56323264 +.long 0x4e3a3a74,0x4e3a3a74 +.long 0x1e0a0a14,0x1e0a0a14 +.long 0xdb494992,0xdb494992 +.long 0x0a06060c,0x0a06060c +.long 0x6c242448,0x6c242448 +.long 0xe45c5cb8,0xe45c5cb8 +.long 0x5dc2c29f,0x5dc2c29f +.long 0x6ed3d3bd,0x6ed3d3bd +.long 0xefacac43,0xefacac43 +.long 0xa66262c4,0xa66262c4 +.long 0xa8919139,0xa8919139 +.long 0xa4959531,0xa4959531 +.long 0x37e4e4d3,0x37e4e4d3 +.long 0x8b7979f2,0x8b7979f2 +.long 0x32e7e7d5,0x32e7e7d5 +.long 0x43c8c88b,0x43c8c88b +.long 0x5937376e,0x5937376e +.long 0xb76d6dda,0xb76d6dda +.long 0x8c8d8d01,0x8c8d8d01 +.long 0x64d5d5b1,0x64d5d5b1 +.long 0xd24e4e9c,0xd24e4e9c +.long 0xe0a9a949,0xe0a9a949 +.long 0xb46c6cd8,0xb46c6cd8 +.long 0xfa5656ac,0xfa5656ac +.long 0x07f4f4f3,0x07f4f4f3 +.long 0x25eaeacf,0x25eaeacf +.long 0xaf6565ca,0xaf6565ca +.long 0x8e7a7af4,0x8e7a7af4 +.long 0xe9aeae47,0xe9aeae47 +.long 0x18080810,0x18080810 +.long 0xd5baba6f,0xd5baba6f +.long 0x887878f0,0x887878f0 +.long 0x6f25254a,0x6f25254a +.long 0x722e2e5c,0x722e2e5c +.long 0x241c1c38,0x241c1c38 +.long 0xf1a6a657,0xf1a6a657 +.long 0xc7b4b473,0xc7b4b473 +.long 0x51c6c697,0x51c6c697 +.long 0x23e8e8cb,0x23e8e8cb +.long 0x7cdddda1,0x7cdddda1 +.long 0x9c7474e8,0x9c7474e8 +.long 0x211f1f3e,0x211f1f3e +.long 0xdd4b4b96,0xdd4b4b96 +.long 0xdcbdbd61,0xdcbdbd61 +.long 0x868b8b0d,0x868b8b0d +.long 0x858a8a0f,0x858a8a0f +.long 0x907070e0,0x907070e0 +.long 0x423e3e7c,0x423e3e7c +.long 0xc4b5b571,0xc4b5b571 +.long 0xaa6666cc,0xaa6666cc +.long 0xd8484890,0xd8484890 +.long 0x05030306,0x05030306 +.long 0x01f6f6f7,0x01f6f6f7 +.long 0x120e0e1c,0x120e0e1c +.long 0xa36161c2,0xa36161c2 +.long 0x5f35356a,0x5f35356a +.long 0xf95757ae,0xf95757ae +.long 0xd0b9b969,0xd0b9b969 +.long 0x91868617,0x91868617 +.long 0x58c1c199,0x58c1c199 +.long 0x271d1d3a,0x271d1d3a +.long 0xb99e9e27,0xb99e9e27 +.long 0x38e1e1d9,0x38e1e1d9 +.long 0x13f8f8eb,0x13f8f8eb +.long 0xb398982b,0xb398982b +.long 0x33111122,0x33111122 +.long 0xbb6969d2,0xbb6969d2 +.long 0x70d9d9a9,0x70d9d9a9 +.long 0x898e8e07,0x898e8e07 +.long 0xa7949433,0xa7949433 +.long 0xb69b9b2d,0xb69b9b2d +.long 0x221e1e3c,0x221e1e3c +.long 0x92878715,0x92878715 +.long 0x20e9e9c9,0x20e9e9c9 +.long 0x49cece87,0x49cece87 +.long 0xff5555aa,0xff5555aa +.long 0x78282850,0x78282850 +.long 0x7adfdfa5,0x7adfdfa5 +.long 0x8f8c8c03,0x8f8c8c03 +.long 0xf8a1a159,0xf8a1a159 +.long 0x80898909,0x80898909 +.long 0x170d0d1a,0x170d0d1a +.long 0xdabfbf65,0xdabfbf65 +.long 0x31e6e6d7,0x31e6e6d7 +.long 0xc6424284,0xc6424284 +.long 0xb86868d0,0xb86868d0 +.long 0xc3414182,0xc3414182 +.long 0xb0999929,0xb0999929 +.long 0x772d2d5a,0x772d2d5a +.long 0x110f0f1e,0x110f0f1e +.long 0xcbb0b07b,0xcbb0b07b +.long 0xfc5454a8,0xfc5454a8 +.long 0xd6bbbb6d,0xd6bbbb6d +.long 0x3a16162c,0x3a16162c +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.long 0x00000001, 0x00000002, 0x00000004, 0x00000008 +.long 0x00000010, 0x00000020, 0x00000040, 0x00000080 +.long 0x0000001b, 0x00000036, 0x80808080, 0x80808080 +.long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b +.align 64 +.LAES_Td: +.long 0x50a7f451,0x50a7f451 +.long 0x5365417e,0x5365417e +.long 0xc3a4171a,0xc3a4171a +.long 0x965e273a,0x965e273a +.long 0xcb6bab3b,0xcb6bab3b +.long 0xf1459d1f,0xf1459d1f +.long 0xab58faac,0xab58faac +.long 0x9303e34b,0x9303e34b +.long 0x55fa3020,0x55fa3020 +.long 0xf66d76ad,0xf66d76ad +.long 0x9176cc88,0x9176cc88 +.long 0x254c02f5,0x254c02f5 +.long 0xfcd7e54f,0xfcd7e54f +.long 0xd7cb2ac5,0xd7cb2ac5 +.long 0x80443526,0x80443526 +.long 0x8fa362b5,0x8fa362b5 +.long 0x495ab1de,0x495ab1de +.long 0x671bba25,0x671bba25 +.long 0x980eea45,0x980eea45 +.long 0xe1c0fe5d,0xe1c0fe5d +.long 0x02752fc3,0x02752fc3 +.long 0x12f04c81,0x12f04c81 +.long 0xa397468d,0xa397468d +.long 0xc6f9d36b,0xc6f9d36b +.long 0xe75f8f03,0xe75f8f03 +.long 0x959c9215,0x959c9215 +.long 0xeb7a6dbf,0xeb7a6dbf +.long 0xda595295,0xda595295 +.long 0x2d83bed4,0x2d83bed4 +.long 0xd3217458,0xd3217458 +.long 0x2969e049,0x2969e049 +.long 0x44c8c98e,0x44c8c98e +.long 0x6a89c275,0x6a89c275 +.long 0x78798ef4,0x78798ef4 +.long 0x6b3e5899,0x6b3e5899 +.long 0xdd71b927,0xdd71b927 +.long 0xb64fe1be,0xb64fe1be +.long 0x17ad88f0,0x17ad88f0 +.long 0x66ac20c9,0x66ac20c9 +.long 0xb43ace7d,0xb43ace7d +.long 0x184adf63,0x184adf63 +.long 0x82311ae5,0x82311ae5 +.long 0x60335197,0x60335197 +.long 0x457f5362,0x457f5362 +.long 0xe07764b1,0xe07764b1 +.long 0x84ae6bbb,0x84ae6bbb +.long 0x1ca081fe,0x1ca081fe +.long 0x942b08f9,0x942b08f9 +.long 0x58684870,0x58684870 +.long 0x19fd458f,0x19fd458f +.long 0x876cde94,0x876cde94 +.long 0xb7f87b52,0xb7f87b52 +.long 0x23d373ab,0x23d373ab +.long 0xe2024b72,0xe2024b72 +.long 0x578f1fe3,0x578f1fe3 +.long 0x2aab5566,0x2aab5566 +.long 0x0728ebb2,0x0728ebb2 +.long 0x03c2b52f,0x03c2b52f +.long 0x9a7bc586,0x9a7bc586 +.long 0xa50837d3,0xa50837d3 +.long 0xf2872830,0xf2872830 +.long 0xb2a5bf23,0xb2a5bf23 +.long 0xba6a0302,0xba6a0302 +.long 0x5c8216ed,0x5c8216ed +.long 0x2b1ccf8a,0x2b1ccf8a +.long 0x92b479a7,0x92b479a7 +.long 0xf0f207f3,0xf0f207f3 +.long 0xa1e2694e,0xa1e2694e +.long 0xcdf4da65,0xcdf4da65 +.long 0xd5be0506,0xd5be0506 +.long 0x1f6234d1,0x1f6234d1 +.long 0x8afea6c4,0x8afea6c4 +.long 0x9d532e34,0x9d532e34 +.long 0xa055f3a2,0xa055f3a2 +.long 0x32e18a05,0x32e18a05 +.long 0x75ebf6a4,0x75ebf6a4 +.long 0x39ec830b,0x39ec830b +.long 0xaaef6040,0xaaef6040 +.long 0x069f715e,0x069f715e +.long 0x51106ebd,0x51106ebd +.long 0xf98a213e,0xf98a213e +.long 0x3d06dd96,0x3d06dd96 +.long 0xae053edd,0xae053edd +.long 0x46bde64d,0x46bde64d +.long 0xb58d5491,0xb58d5491 +.long 0x055dc471,0x055dc471 +.long 0x6fd40604,0x6fd40604 +.long 0xff155060,0xff155060 +.long 0x24fb9819,0x24fb9819 +.long 0x97e9bdd6,0x97e9bdd6 +.long 0xcc434089,0xcc434089 +.long 0x779ed967,0x779ed967 +.long 0xbd42e8b0,0xbd42e8b0 +.long 0x888b8907,0x888b8907 +.long 0x385b19e7,0x385b19e7 +.long 0xdbeec879,0xdbeec879 +.long 0x470a7ca1,0x470a7ca1 +.long 0xe90f427c,0xe90f427c +.long 0xc91e84f8,0xc91e84f8 +.long 0x00000000,0x00000000 +.long 0x83868009,0x83868009 +.long 0x48ed2b32,0x48ed2b32 +.long 0xac70111e,0xac70111e +.long 0x4e725a6c,0x4e725a6c +.long 0xfbff0efd,0xfbff0efd +.long 0x5638850f,0x5638850f +.long 0x1ed5ae3d,0x1ed5ae3d +.long 0x27392d36,0x27392d36 +.long 0x64d90f0a,0x64d90f0a +.long 0x21a65c68,0x21a65c68 +.long 0xd1545b9b,0xd1545b9b +.long 0x3a2e3624,0x3a2e3624 +.long 0xb1670a0c,0xb1670a0c +.long 0x0fe75793,0x0fe75793 +.long 0xd296eeb4,0xd296eeb4 +.long 0x9e919b1b,0x9e919b1b +.long 0x4fc5c080,0x4fc5c080 +.long 0xa220dc61,0xa220dc61 +.long 0x694b775a,0x694b775a +.long 0x161a121c,0x161a121c +.long 0x0aba93e2,0x0aba93e2 +.long 0xe52aa0c0,0xe52aa0c0 +.long 0x43e0223c,0x43e0223c +.long 0x1d171b12,0x1d171b12 +.long 0x0b0d090e,0x0b0d090e +.long 0xadc78bf2,0xadc78bf2 +.long 0xb9a8b62d,0xb9a8b62d +.long 0xc8a91e14,0xc8a91e14 +.long 0x8519f157,0x8519f157 +.long 0x4c0775af,0x4c0775af +.long 0xbbdd99ee,0xbbdd99ee +.long 0xfd607fa3,0xfd607fa3 +.long 0x9f2601f7,0x9f2601f7 +.long 0xbcf5725c,0xbcf5725c +.long 0xc53b6644,0xc53b6644 +.long 0x347efb5b,0x347efb5b +.long 0x7629438b,0x7629438b +.long 0xdcc623cb,0xdcc623cb +.long 0x68fcedb6,0x68fcedb6 +.long 0x63f1e4b8,0x63f1e4b8 +.long 0xcadc31d7,0xcadc31d7 +.long 0x10856342,0x10856342 +.long 0x40229713,0x40229713 +.long 0x2011c684,0x2011c684 +.long 0x7d244a85,0x7d244a85 +.long 0xf83dbbd2,0xf83dbbd2 +.long 0x1132f9ae,0x1132f9ae +.long 0x6da129c7,0x6da129c7 +.long 0x4b2f9e1d,0x4b2f9e1d +.long 0xf330b2dc,0xf330b2dc +.long 0xec52860d,0xec52860d +.long 0xd0e3c177,0xd0e3c177 +.long 0x6c16b32b,0x6c16b32b +.long 0x99b970a9,0x99b970a9 +.long 0xfa489411,0xfa489411 +.long 0x2264e947,0x2264e947 +.long 0xc48cfca8,0xc48cfca8 +.long 0x1a3ff0a0,0x1a3ff0a0 +.long 0xd82c7d56,0xd82c7d56 +.long 0xef903322,0xef903322 +.long 0xc74e4987,0xc74e4987 +.long 0xc1d138d9,0xc1d138d9 +.long 0xfea2ca8c,0xfea2ca8c +.long 0x360bd498,0x360bd498 +.long 0xcf81f5a6,0xcf81f5a6 +.long 0x28de7aa5,0x28de7aa5 +.long 0x268eb7da,0x268eb7da +.long 0xa4bfad3f,0xa4bfad3f +.long 0xe49d3a2c,0xe49d3a2c +.long 0x0d927850,0x0d927850 +.long 0x9bcc5f6a,0x9bcc5f6a +.long 0x62467e54,0x62467e54 +.long 0xc2138df6,0xc2138df6 +.long 0xe8b8d890,0xe8b8d890 +.long 0x5ef7392e,0x5ef7392e +.long 0xf5afc382,0xf5afc382 +.long 0xbe805d9f,0xbe805d9f +.long 0x7c93d069,0x7c93d069 +.long 0xa92dd56f,0xa92dd56f +.long 0xb31225cf,0xb31225cf +.long 0x3b99acc8,0x3b99acc8 +.long 0xa77d1810,0xa77d1810 +.long 0x6e639ce8,0x6e639ce8 +.long 0x7bbb3bdb,0x7bbb3bdb +.long 0x097826cd,0x097826cd +.long 0xf418596e,0xf418596e +.long 0x01b79aec,0x01b79aec +.long 0xa89a4f83,0xa89a4f83 +.long 0x656e95e6,0x656e95e6 +.long 0x7ee6ffaa,0x7ee6ffaa +.long 0x08cfbc21,0x08cfbc21 +.long 0xe6e815ef,0xe6e815ef +.long 0xd99be7ba,0xd99be7ba +.long 0xce366f4a,0xce366f4a +.long 0xd4099fea,0xd4099fea +.long 0xd67cb029,0xd67cb029 +.long 0xafb2a431,0xafb2a431 +.long 0x31233f2a,0x31233f2a +.long 0x3094a5c6,0x3094a5c6 +.long 0xc066a235,0xc066a235 +.long 0x37bc4e74,0x37bc4e74 +.long 0xa6ca82fc,0xa6ca82fc +.long 0xb0d090e0,0xb0d090e0 +.long 0x15d8a733,0x15d8a733 +.long 0x4a9804f1,0x4a9804f1 +.long 0xf7daec41,0xf7daec41 +.long 0x0e50cd7f,0x0e50cd7f +.long 0x2ff69117,0x2ff69117 +.long 0x8dd64d76,0x8dd64d76 +.long 0x4db0ef43,0x4db0ef43 +.long 0x544daacc,0x544daacc +.long 0xdf0496e4,0xdf0496e4 +.long 0xe3b5d19e,0xe3b5d19e +.long 0x1b886a4c,0x1b886a4c +.long 0xb81f2cc1,0xb81f2cc1 +.long 0x7f516546,0x7f516546 +.long 0x04ea5e9d,0x04ea5e9d +.long 0x5d358c01,0x5d358c01 +.long 0x737487fa,0x737487fa +.long 0x2e410bfb,0x2e410bfb +.long 0x5a1d67b3,0x5a1d67b3 +.long 0x52d2db92,0x52d2db92 +.long 0x335610e9,0x335610e9 +.long 0x1347d66d,0x1347d66d +.long 0x8c61d79a,0x8c61d79a +.long 0x7a0ca137,0x7a0ca137 +.long 0x8e14f859,0x8e14f859 +.long 0x893c13eb,0x893c13eb +.long 0xee27a9ce,0xee27a9ce +.long 0x35c961b7,0x35c961b7 +.long 0xede51ce1,0xede51ce1 +.long 0x3cb1477a,0x3cb1477a +.long 0x59dfd29c,0x59dfd29c +.long 0x3f73f255,0x3f73f255 +.long 0x79ce1418,0x79ce1418 +.long 0xbf37c773,0xbf37c773 +.long 0xeacdf753,0xeacdf753 +.long 0x5baafd5f,0x5baafd5f +.long 0x146f3ddf,0x146f3ddf +.long 0x86db4478,0x86db4478 +.long 0x81f3afca,0x81f3afca +.long 0x3ec468b9,0x3ec468b9 +.long 0x2c342438,0x2c342438 +.long 0x5f40a3c2,0x5f40a3c2 +.long 0x72c31d16,0x72c31d16 +.long 0x0c25e2bc,0x0c25e2bc +.long 0x8b493c28,0x8b493c28 +.long 0x41950dff,0x41950dff +.long 0x7101a839,0x7101a839 +.long 0xdeb30c08,0xdeb30c08 +.long 0x9ce4b4d8,0x9ce4b4d8 +.long 0x90c15664,0x90c15664 +.long 0x6184cb7b,0x6184cb7b +.long 0x70b632d5,0x70b632d5 +.long 0x745c6c48,0x745c6c48 +.long 0x4257b8d0,0x4257b8d0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +.previous diff --git a/contrib/openssl-cmake/asm/crypto/aes/aesni-mb-x86_64.s b/contrib/openssl-cmake/asm/crypto/aes/aesni-mb-x86_64.s new file mode 100644 index 000000000000..044d4d84bb3f --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/aes/aesni-mb-x86_64.s @@ -0,0 +1,1588 @@ +.text + + + +.globl aesni_multi_cbc_encrypt +.type aesni_multi_cbc_encrypt,@function +.align 32 +aesni_multi_cbc_encrypt: +.cfi_startproc + cmpl $2,%edx + jb .Lenc_non_avx + movl OPENSSL_ia32cap_P+4(%rip),%ecx + testl $268435456,%ecx + jnz _avx_cbc_enc_shortcut + jmp .Lenc_non_avx +.align 16 +.Lenc_non_avx: + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + + + + + + + subq $48,%rsp + andq $-64,%rsp + movq %rax,16(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08 + +.Lenc4x_body: + movdqu (%rsi),%xmm12 + leaq 120(%rsi),%rsi + leaq 80(%rdi),%rdi + +.Lenc4x_loop_grande: + movl %edx,24(%rsp) + xorl %edx,%edx + + movl -64(%rdi),%ecx + movq -80(%rdi),%r8 + cmpl %edx,%ecx + movq -72(%rdi),%r12 + cmovgl %ecx,%edx + testl %ecx,%ecx + + movdqu -56(%rdi),%xmm2 + movl %ecx,32(%rsp) + cmovleq %rsp,%r8 + + movl -24(%rdi),%ecx + movq -40(%rdi),%r9 + cmpl %edx,%ecx + movq -32(%rdi),%r13 + cmovgl %ecx,%edx + testl %ecx,%ecx + + movdqu -16(%rdi),%xmm3 + movl %ecx,36(%rsp) + cmovleq %rsp,%r9 + + movl 16(%rdi),%ecx + movq 0(%rdi),%r10 + cmpl %edx,%ecx + movq 8(%rdi),%r14 + cmovgl %ecx,%edx + testl %ecx,%ecx + + movdqu 24(%rdi),%xmm4 + movl %ecx,40(%rsp) + cmovleq %rsp,%r10 + + movl 56(%rdi),%ecx + movq 40(%rdi),%r11 + cmpl %edx,%ecx + movq 48(%rdi),%r15 + cmovgl %ecx,%edx + testl %ecx,%ecx + + movdqu 64(%rdi),%xmm5 + movl %ecx,44(%rsp) + cmovleq %rsp,%r11 + testl %edx,%edx + jz .Lenc4x_done + + movups 16-120(%rsi),%xmm1 + pxor %xmm12,%xmm2 + movups 32-120(%rsi),%xmm0 + pxor %xmm12,%xmm3 + movl 240-120(%rsi),%eax + pxor %xmm12,%xmm4 + movdqu (%r8),%xmm6 + pxor %xmm12,%xmm5 + movdqu (%r9),%xmm7 + pxor %xmm6,%xmm2 + movdqu (%r10),%xmm8 + pxor %xmm7,%xmm3 + movdqu (%r11),%xmm9 + pxor %xmm8,%xmm4 + pxor %xmm9,%xmm5 + movdqa 32(%rsp),%xmm10 + xorq %rbx,%rbx + jmp .Loop_enc4x + +.align 32 +.Loop_enc4x: + addq $16,%rbx + leaq 16(%rsp),%rbp + movl $1,%ecx + subq %rbx,%rbp + +.byte 102,15,56,220,209 + prefetcht0 31(%r8,%rbx,1) + prefetcht0 31(%r9,%rbx,1) +.byte 102,15,56,220,217 + prefetcht0 31(%r10,%rbx,1) + prefetcht0 31(%r10,%rbx,1) +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 48-120(%rsi),%xmm1 + cmpl 32(%rsp),%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + cmovgeq %rbp,%r8 + cmovgq %rbp,%r12 +.byte 102,15,56,220,232 + movups -56(%rsi),%xmm0 + cmpl 36(%rsp),%ecx +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + cmovgeq %rbp,%r9 + cmovgq %rbp,%r13 +.byte 102,15,56,220,233 + movups -40(%rsi),%xmm1 + cmpl 40(%rsp),%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + cmovgeq %rbp,%r10 + cmovgq %rbp,%r14 +.byte 102,15,56,220,232 + movups -24(%rsi),%xmm0 + cmpl 44(%rsp),%ecx +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + cmovgeq %rbp,%r11 + cmovgq %rbp,%r15 +.byte 102,15,56,220,233 + movups -8(%rsi),%xmm1 + movdqa %xmm10,%xmm11 +.byte 102,15,56,220,208 + prefetcht0 15(%r12,%rbx,1) + prefetcht0 15(%r13,%rbx,1) +.byte 102,15,56,220,216 + prefetcht0 15(%r14,%rbx,1) + prefetcht0 15(%r15,%rbx,1) +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 128-120(%rsi),%xmm0 + pxor %xmm12,%xmm12 + +.byte 102,15,56,220,209 + pcmpgtd %xmm12,%xmm11 + movdqu -120(%rsi),%xmm12 +.byte 102,15,56,220,217 + paddd %xmm11,%xmm10 + movdqa %xmm10,32(%rsp) +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 144-120(%rsi),%xmm1 + + cmpl $11,%eax + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 160-120(%rsi),%xmm0 + + jb .Lenc4x_tail + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 176-120(%rsi),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 192-120(%rsi),%xmm0 + + je .Lenc4x_tail + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 208-120(%rsi),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 224-120(%rsi),%xmm0 + jmp .Lenc4x_tail + +.align 32 +.Lenc4x_tail: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movdqu (%r8,%rbx,1),%xmm6 + movdqu 16-120(%rsi),%xmm1 + +.byte 102,15,56,221,208 + movdqu (%r9,%rbx,1),%xmm7 + pxor %xmm12,%xmm6 +.byte 102,15,56,221,216 + movdqu (%r10,%rbx,1),%xmm8 + pxor %xmm12,%xmm7 +.byte 102,15,56,221,224 + movdqu (%r11,%rbx,1),%xmm9 + pxor %xmm12,%xmm8 +.byte 102,15,56,221,232 + movdqu 32-120(%rsi),%xmm0 + pxor %xmm12,%xmm9 + + movups %xmm2,-16(%r12,%rbx,1) + pxor %xmm6,%xmm2 + movups %xmm3,-16(%r13,%rbx,1) + pxor %xmm7,%xmm3 + movups %xmm4,-16(%r14,%rbx,1) + pxor %xmm8,%xmm4 + movups %xmm5,-16(%r15,%rbx,1) + pxor %xmm9,%xmm5 + + decl %edx + jnz .Loop_enc4x + + movq 16(%rsp),%rax +.cfi_def_cfa %rax,8 + movl 24(%rsp),%edx + + + + + + + + + + + + leaq 160(%rdi),%rdi + decl %edx + jnz .Lenc4x_loop_grande + +.Lenc4x_done: + movq -48(%rax),%r15 +.cfi_restore %r15 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lenc4x_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt + +.globl aesni_multi_cbc_decrypt +.type aesni_multi_cbc_decrypt,@function +.align 32 +aesni_multi_cbc_decrypt: +.cfi_startproc + cmpl $2,%edx + jb .Ldec_non_avx + movl OPENSSL_ia32cap_P+4(%rip),%ecx + testl $268435456,%ecx + jnz _avx_cbc_dec_shortcut + jmp .Ldec_non_avx +.align 16 +.Ldec_non_avx: + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + + + + + + + subq $48,%rsp + andq $-64,%rsp + movq %rax,16(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08 + +.Ldec4x_body: + movdqu (%rsi),%xmm12 + leaq 120(%rsi),%rsi + leaq 80(%rdi),%rdi + +.Ldec4x_loop_grande: + movl %edx,24(%rsp) + xorl %edx,%edx + + movl -64(%rdi),%ecx + movq -80(%rdi),%r8 + cmpl %edx,%ecx + movq -72(%rdi),%r12 + cmovgl %ecx,%edx + testl %ecx,%ecx + + movdqu -56(%rdi),%xmm6 + movl %ecx,32(%rsp) + cmovleq %rsp,%r8 + + movl -24(%rdi),%ecx + movq -40(%rdi),%r9 + cmpl %edx,%ecx + movq -32(%rdi),%r13 + cmovgl %ecx,%edx + testl %ecx,%ecx + + movdqu -16(%rdi),%xmm7 + movl %ecx,36(%rsp) + cmovleq %rsp,%r9 + + movl 16(%rdi),%ecx + movq 0(%rdi),%r10 + cmpl %edx,%ecx + movq 8(%rdi),%r14 + cmovgl %ecx,%edx + testl %ecx,%ecx + + movdqu 24(%rdi),%xmm8 + movl %ecx,40(%rsp) + cmovleq %rsp,%r10 + + movl 56(%rdi),%ecx + movq 40(%rdi),%r11 + cmpl %edx,%ecx + movq 48(%rdi),%r15 + cmovgl %ecx,%edx + testl %ecx,%ecx + + movdqu 64(%rdi),%xmm9 + movl %ecx,44(%rsp) + cmovleq %rsp,%r11 + testl %edx,%edx + jz .Ldec4x_done + + movups 16-120(%rsi),%xmm1 + movups 32-120(%rsi),%xmm0 + movl 240-120(%rsi),%eax + movdqu (%r8),%xmm2 + movdqu (%r9),%xmm3 + pxor %xmm12,%xmm2 + movdqu (%r10),%xmm4 + pxor %xmm12,%xmm3 + movdqu (%r11),%xmm5 + pxor %xmm12,%xmm4 + pxor %xmm12,%xmm5 + movdqa 32(%rsp),%xmm10 + xorq %rbx,%rbx + jmp .Loop_dec4x + +.align 32 +.Loop_dec4x: + addq $16,%rbx + leaq 16(%rsp),%rbp + movl $1,%ecx + subq %rbx,%rbp + +.byte 102,15,56,222,209 + prefetcht0 31(%r8,%rbx,1) + prefetcht0 31(%r9,%rbx,1) +.byte 102,15,56,222,217 + prefetcht0 31(%r10,%rbx,1) + prefetcht0 31(%r11,%rbx,1) +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 48-120(%rsi),%xmm1 + cmpl 32(%rsp),%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + cmovgeq %rbp,%r8 + cmovgq %rbp,%r12 +.byte 102,15,56,222,232 + movups -56(%rsi),%xmm0 + cmpl 36(%rsp),%ecx +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + cmovgeq %rbp,%r9 + cmovgq %rbp,%r13 +.byte 102,15,56,222,233 + movups -40(%rsi),%xmm1 + cmpl 40(%rsp),%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + cmovgeq %rbp,%r10 + cmovgq %rbp,%r14 +.byte 102,15,56,222,232 + movups -24(%rsi),%xmm0 + cmpl 44(%rsp),%ecx +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + cmovgeq %rbp,%r11 + cmovgq %rbp,%r15 +.byte 102,15,56,222,233 + movups -8(%rsi),%xmm1 + movdqa %xmm10,%xmm11 +.byte 102,15,56,222,208 + prefetcht0 15(%r12,%rbx,1) + prefetcht0 15(%r13,%rbx,1) +.byte 102,15,56,222,216 + prefetcht0 15(%r14,%rbx,1) + prefetcht0 15(%r15,%rbx,1) +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups 128-120(%rsi),%xmm0 + pxor %xmm12,%xmm12 + +.byte 102,15,56,222,209 + pcmpgtd %xmm12,%xmm11 + movdqu -120(%rsi),%xmm12 +.byte 102,15,56,222,217 + paddd %xmm11,%xmm10 + movdqa %xmm10,32(%rsp) +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 144-120(%rsi),%xmm1 + + cmpl $11,%eax + +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups 160-120(%rsi),%xmm0 + + jb .Ldec4x_tail + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 176-120(%rsi),%xmm1 + +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups 192-120(%rsi),%xmm0 + + je .Ldec4x_tail + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 208-120(%rsi),%xmm1 + +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups 224-120(%rsi),%xmm0 + jmp .Ldec4x_tail + +.align 32 +.Ldec4x_tail: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + pxor %xmm0,%xmm6 + pxor %xmm0,%xmm7 +.byte 102,15,56,222,233 + movdqu 16-120(%rsi),%xmm1 + pxor %xmm0,%xmm8 + pxor %xmm0,%xmm9 + movdqu 32-120(%rsi),%xmm0 + +.byte 102,15,56,223,214 +.byte 102,15,56,223,223 + movdqu -16(%r8,%rbx,1),%xmm6 + movdqu -16(%r9,%rbx,1),%xmm7 +.byte 102,65,15,56,223,224 +.byte 102,65,15,56,223,233 + movdqu -16(%r10,%rbx,1),%xmm8 + movdqu -16(%r11,%rbx,1),%xmm9 + + movups %xmm2,-16(%r12,%rbx,1) + movdqu (%r8,%rbx,1),%xmm2 + movups %xmm3,-16(%r13,%rbx,1) + movdqu (%r9,%rbx,1),%xmm3 + pxor %xmm12,%xmm2 + movups %xmm4,-16(%r14,%rbx,1) + movdqu (%r10,%rbx,1),%xmm4 + pxor %xmm12,%xmm3 + movups %xmm5,-16(%r15,%rbx,1) + movdqu (%r11,%rbx,1),%xmm5 + pxor %xmm12,%xmm4 + pxor %xmm12,%xmm5 + + decl %edx + jnz .Loop_dec4x + + movq 16(%rsp),%rax +.cfi_def_cfa %rax,8 + movl 24(%rsp),%edx + + leaq 160(%rdi),%rdi + decl %edx + jnz .Ldec4x_loop_grande + +.Ldec4x_done: + movq -48(%rax),%r15 +.cfi_restore %r15 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Ldec4x_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt +.type aesni_multi_cbc_encrypt_avx,@function +.align 32 +aesni_multi_cbc_encrypt_avx: +.cfi_startproc +_avx_cbc_enc_shortcut: + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + + + + + + + + + subq $192,%rsp + andq $-128,%rsp + movq %rax,16(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08 + +.Lenc8x_body: + vzeroupper + vmovdqu (%rsi),%xmm15 + leaq 120(%rsi),%rsi + leaq 160(%rdi),%rdi + shrl $1,%edx + +.Lenc8x_loop_grande: + + xorl %edx,%edx + + movl -144(%rdi),%ecx + + movq -160(%rdi),%r8 + cmpl %edx,%ecx + + movq -152(%rdi),%rbx + cmovgl %ecx,%edx + testl %ecx,%ecx + + vmovdqu -136(%rdi),%xmm2 + movl %ecx,32(%rsp) + cmovleq %rsp,%r8 + subq %r8,%rbx + movq %rbx,64(%rsp) + + movl -104(%rdi),%ecx + + movq -120(%rdi),%r9 + cmpl %edx,%ecx + + movq -112(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + + vmovdqu -96(%rdi),%xmm3 + movl %ecx,36(%rsp) + cmovleq %rsp,%r9 + subq %r9,%rbp + movq %rbp,72(%rsp) + + movl -64(%rdi),%ecx + + movq -80(%rdi),%r10 + cmpl %edx,%ecx + + movq -72(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + + vmovdqu -56(%rdi),%xmm4 + movl %ecx,40(%rsp) + cmovleq %rsp,%r10 + subq %r10,%rbp + movq %rbp,80(%rsp) + + movl -24(%rdi),%ecx + + movq -40(%rdi),%r11 + cmpl %edx,%ecx + + movq -32(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + + vmovdqu -16(%rdi),%xmm5 + movl %ecx,44(%rsp) + cmovleq %rsp,%r11 + subq %r11,%rbp + movq %rbp,88(%rsp) + + movl 16(%rdi),%ecx + + movq 0(%rdi),%r12 + cmpl %edx,%ecx + + movq 8(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + + vmovdqu 24(%rdi),%xmm6 + movl %ecx,48(%rsp) + cmovleq %rsp,%r12 + subq %r12,%rbp + movq %rbp,96(%rsp) + + movl 56(%rdi),%ecx + + movq 40(%rdi),%r13 + cmpl %edx,%ecx + + movq 48(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + + vmovdqu 64(%rdi),%xmm7 + movl %ecx,52(%rsp) + cmovleq %rsp,%r13 + subq %r13,%rbp + movq %rbp,104(%rsp) + + movl 96(%rdi),%ecx + + movq 80(%rdi),%r14 + cmpl %edx,%ecx + + movq 88(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + + vmovdqu 104(%rdi),%xmm8 + movl %ecx,56(%rsp) + cmovleq %rsp,%r14 + subq %r14,%rbp + movq %rbp,112(%rsp) + + movl 136(%rdi),%ecx + + movq 120(%rdi),%r15 + cmpl %edx,%ecx + + movq 128(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + + vmovdqu 144(%rdi),%xmm9 + movl %ecx,60(%rsp) + cmovleq %rsp,%r15 + subq %r15,%rbp + movq %rbp,120(%rsp) + testl %edx,%edx + jz .Lenc8x_done + + vmovups 16-120(%rsi),%xmm1 + vmovups 32-120(%rsi),%xmm0 + movl 240-120(%rsi),%eax + + vpxor (%r8),%xmm15,%xmm10 + leaq 128(%rsp),%rbp + vpxor (%r9),%xmm15,%xmm11 + vpxor (%r10),%xmm15,%xmm12 + vpxor (%r11),%xmm15,%xmm13 + vpxor %xmm10,%xmm2,%xmm2 + vpxor (%r12),%xmm15,%xmm10 + vpxor %xmm11,%xmm3,%xmm3 + vpxor (%r13),%xmm15,%xmm11 + vpxor %xmm12,%xmm4,%xmm4 + vpxor (%r14),%xmm15,%xmm12 + vpxor %xmm13,%xmm5,%xmm5 + vpxor (%r15),%xmm15,%xmm13 + vpxor %xmm10,%xmm6,%xmm6 + movl $1,%ecx + vpxor %xmm11,%xmm7,%xmm7 + vpxor %xmm12,%xmm8,%xmm8 + vpxor %xmm13,%xmm9,%xmm9 + jmp .Loop_enc8x + +.align 32 +.Loop_enc8x: + vaesenc %xmm1,%xmm2,%xmm2 + cmpl 32+0(%rsp),%ecx + vaesenc %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r8) + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm5,%xmm5 + leaq (%r8,%rbx,1),%rbx + cmovgeq %rsp,%r8 + vaesenc %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm1,%xmm7,%xmm7 + subq %r8,%rbx + vaesenc %xmm1,%xmm8,%xmm8 + vpxor 16(%r8),%xmm15,%xmm10 + movq %rbx,64+0(%rsp) + vaesenc %xmm1,%xmm9,%xmm9 + vmovups -72(%rsi),%xmm1 + leaq 16(%r8,%rbx,1),%r8 + vmovdqu %xmm10,0(%rbp) + vaesenc %xmm0,%xmm2,%xmm2 + cmpl 32+4(%rsp),%ecx + movq 64+8(%rsp),%rbx + vaesenc %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r9) + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + leaq (%r9,%rbx,1),%rbx + cmovgeq %rsp,%r9 + vaesenc %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm0,%xmm7,%xmm7 + subq %r9,%rbx + vaesenc %xmm0,%xmm8,%xmm8 + vpxor 16(%r9),%xmm15,%xmm11 + movq %rbx,64+8(%rsp) + vaesenc %xmm0,%xmm9,%xmm9 + vmovups -56(%rsi),%xmm0 + leaq 16(%r9,%rbx,1),%r9 + vmovdqu %xmm11,16(%rbp) + vaesenc %xmm1,%xmm2,%xmm2 + cmpl 32+8(%rsp),%ecx + movq 64+16(%rsp),%rbx + vaesenc %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r10) + vaesenc %xmm1,%xmm4,%xmm4 + prefetcht0 15(%r8) + vaesenc %xmm1,%xmm5,%xmm5 + leaq (%r10,%rbx,1),%rbx + cmovgeq %rsp,%r10 + vaesenc %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm1,%xmm7,%xmm7 + subq %r10,%rbx + vaesenc %xmm1,%xmm8,%xmm8 + vpxor 16(%r10),%xmm15,%xmm12 + movq %rbx,64+16(%rsp) + vaesenc %xmm1,%xmm9,%xmm9 + vmovups -40(%rsi),%xmm1 + leaq 16(%r10,%rbx,1),%r10 + vmovdqu %xmm12,32(%rbp) + vaesenc %xmm0,%xmm2,%xmm2 + cmpl 32+12(%rsp),%ecx + movq 64+24(%rsp),%rbx + vaesenc %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r11) + vaesenc %xmm0,%xmm4,%xmm4 + prefetcht0 15(%r9) + vaesenc %xmm0,%xmm5,%xmm5 + leaq (%r11,%rbx,1),%rbx + cmovgeq %rsp,%r11 + vaesenc %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm0,%xmm7,%xmm7 + subq %r11,%rbx + vaesenc %xmm0,%xmm8,%xmm8 + vpxor 16(%r11),%xmm15,%xmm13 + movq %rbx,64+24(%rsp) + vaesenc %xmm0,%xmm9,%xmm9 + vmovups -24(%rsi),%xmm0 + leaq 16(%r11,%rbx,1),%r11 + vmovdqu %xmm13,48(%rbp) + vaesenc %xmm1,%xmm2,%xmm2 + cmpl 32+16(%rsp),%ecx + movq 64+32(%rsp),%rbx + vaesenc %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r12) + vaesenc %xmm1,%xmm4,%xmm4 + prefetcht0 15(%r10) + vaesenc %xmm1,%xmm5,%xmm5 + leaq (%r12,%rbx,1),%rbx + cmovgeq %rsp,%r12 + vaesenc %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm1,%xmm7,%xmm7 + subq %r12,%rbx + vaesenc %xmm1,%xmm8,%xmm8 + vpxor 16(%r12),%xmm15,%xmm10 + movq %rbx,64+32(%rsp) + vaesenc %xmm1,%xmm9,%xmm9 + vmovups -8(%rsi),%xmm1 + leaq 16(%r12,%rbx,1),%r12 + vaesenc %xmm0,%xmm2,%xmm2 + cmpl 32+20(%rsp),%ecx + movq 64+40(%rsp),%rbx + vaesenc %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r13) + vaesenc %xmm0,%xmm4,%xmm4 + prefetcht0 15(%r11) + vaesenc %xmm0,%xmm5,%xmm5 + leaq (%rbx,%r13,1),%rbx + cmovgeq %rsp,%r13 + vaesenc %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm0,%xmm7,%xmm7 + subq %r13,%rbx + vaesenc %xmm0,%xmm8,%xmm8 + vpxor 16(%r13),%xmm15,%xmm11 + movq %rbx,64+40(%rsp) + vaesenc %xmm0,%xmm9,%xmm9 + vmovups 8(%rsi),%xmm0 + leaq 16(%r13,%rbx,1),%r13 + vaesenc %xmm1,%xmm2,%xmm2 + cmpl 32+24(%rsp),%ecx + movq 64+48(%rsp),%rbx + vaesenc %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r14) + vaesenc %xmm1,%xmm4,%xmm4 + prefetcht0 15(%r12) + vaesenc %xmm1,%xmm5,%xmm5 + leaq (%r14,%rbx,1),%rbx + cmovgeq %rsp,%r14 + vaesenc %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm1,%xmm7,%xmm7 + subq %r14,%rbx + vaesenc %xmm1,%xmm8,%xmm8 + vpxor 16(%r14),%xmm15,%xmm12 + movq %rbx,64+48(%rsp) + vaesenc %xmm1,%xmm9,%xmm9 + vmovups 24(%rsi),%xmm1 + leaq 16(%r14,%rbx,1),%r14 + vaesenc %xmm0,%xmm2,%xmm2 + cmpl 32+28(%rsp),%ecx + movq 64+56(%rsp),%rbx + vaesenc %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r15) + vaesenc %xmm0,%xmm4,%xmm4 + prefetcht0 15(%r13) + vaesenc %xmm0,%xmm5,%xmm5 + leaq (%r15,%rbx,1),%rbx + cmovgeq %rsp,%r15 + vaesenc %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm0,%xmm7,%xmm7 + subq %r15,%rbx + vaesenc %xmm0,%xmm8,%xmm8 + vpxor 16(%r15),%xmm15,%xmm13 + movq %rbx,64+56(%rsp) + vaesenc %xmm0,%xmm9,%xmm9 + vmovups 40(%rsi),%xmm0 + leaq 16(%r15,%rbx,1),%r15 + vmovdqu 32(%rsp),%xmm14 + prefetcht0 15(%r14) + prefetcht0 15(%r15) + cmpl $11,%eax + jb .Lenc8x_tail + + vaesenc %xmm1,%xmm2,%xmm2 + vaesenc %xmm1,%xmm3,%xmm3 + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm5,%xmm5 + vaesenc %xmm1,%xmm6,%xmm6 + vaesenc %xmm1,%xmm7,%xmm7 + vaesenc %xmm1,%xmm8,%xmm8 + vaesenc %xmm1,%xmm9,%xmm9 + vmovups 176-120(%rsi),%xmm1 + + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vaesenc %xmm0,%xmm8,%xmm8 + vaesenc %xmm0,%xmm9,%xmm9 + vmovups 192-120(%rsi),%xmm0 + je .Lenc8x_tail + + vaesenc %xmm1,%xmm2,%xmm2 + vaesenc %xmm1,%xmm3,%xmm3 + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm5,%xmm5 + vaesenc %xmm1,%xmm6,%xmm6 + vaesenc %xmm1,%xmm7,%xmm7 + vaesenc %xmm1,%xmm8,%xmm8 + vaesenc %xmm1,%xmm9,%xmm9 + vmovups 208-120(%rsi),%xmm1 + + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vaesenc %xmm0,%xmm8,%xmm8 + vaesenc %xmm0,%xmm9,%xmm9 + vmovups 224-120(%rsi),%xmm0 + +.Lenc8x_tail: + vaesenc %xmm1,%xmm2,%xmm2 + vpxor %xmm15,%xmm15,%xmm15 + vaesenc %xmm1,%xmm3,%xmm3 + vaesenc %xmm1,%xmm4,%xmm4 + vpcmpgtd %xmm15,%xmm14,%xmm15 + vaesenc %xmm1,%xmm5,%xmm5 + vaesenc %xmm1,%xmm6,%xmm6 + vpaddd %xmm14,%xmm15,%xmm15 + vmovdqu 48(%rsp),%xmm14 + vaesenc %xmm1,%xmm7,%xmm7 + movq 64(%rsp),%rbx + vaesenc %xmm1,%xmm8,%xmm8 + vaesenc %xmm1,%xmm9,%xmm9 + vmovups 16-120(%rsi),%xmm1 + + vaesenclast %xmm0,%xmm2,%xmm2 + vmovdqa %xmm15,32(%rsp) + vpxor %xmm15,%xmm15,%xmm15 + vaesenclast %xmm0,%xmm3,%xmm3 + vaesenclast %xmm0,%xmm4,%xmm4 + vpcmpgtd %xmm15,%xmm14,%xmm15 + vaesenclast %xmm0,%xmm5,%xmm5 + vaesenclast %xmm0,%xmm6,%xmm6 + vpaddd %xmm15,%xmm14,%xmm14 + vmovdqu -120(%rsi),%xmm15 + vaesenclast %xmm0,%xmm7,%xmm7 + vaesenclast %xmm0,%xmm8,%xmm8 + vmovdqa %xmm14,48(%rsp) + vaesenclast %xmm0,%xmm9,%xmm9 + vmovups 32-120(%rsi),%xmm0 + + vmovups %xmm2,-16(%r8) + subq %rbx,%r8 + vpxor 0(%rbp),%xmm2,%xmm2 + vmovups %xmm3,-16(%r9) + subq 72(%rsp),%r9 + vpxor 16(%rbp),%xmm3,%xmm3 + vmovups %xmm4,-16(%r10) + subq 80(%rsp),%r10 + vpxor 32(%rbp),%xmm4,%xmm4 + vmovups %xmm5,-16(%r11) + subq 88(%rsp),%r11 + vpxor 48(%rbp),%xmm5,%xmm5 + vmovups %xmm6,-16(%r12) + subq 96(%rsp),%r12 + vpxor %xmm10,%xmm6,%xmm6 + vmovups %xmm7,-16(%r13) + subq 104(%rsp),%r13 + vpxor %xmm11,%xmm7,%xmm7 + vmovups %xmm8,-16(%r14) + subq 112(%rsp),%r14 + vpxor %xmm12,%xmm8,%xmm8 + vmovups %xmm9,-16(%r15) + subq 120(%rsp),%r15 + vpxor %xmm13,%xmm9,%xmm9 + + decl %edx + jnz .Loop_enc8x + + movq 16(%rsp),%rax +.cfi_def_cfa %rax,8 + + + + + +.Lenc8x_done: + vzeroupper + movq -48(%rax),%r15 +.cfi_restore %r15 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lenc8x_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx + +.type aesni_multi_cbc_decrypt_avx,@function +.align 32 +aesni_multi_cbc_decrypt_avx: +.cfi_startproc +_avx_cbc_dec_shortcut: + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + + + + + + + + + + subq $256,%rsp + andq $-256,%rsp + subq $192,%rsp + movq %rax,16(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08 + +.Ldec8x_body: + vzeroupper + vmovdqu (%rsi),%xmm15 + leaq 120(%rsi),%rsi + leaq 160(%rdi),%rdi + shrl $1,%edx + +.Ldec8x_loop_grande: + + xorl %edx,%edx + + movl -144(%rdi),%ecx + + movq -160(%rdi),%r8 + cmpl %edx,%ecx + + movq -152(%rdi),%rbx + cmovgl %ecx,%edx + testl %ecx,%ecx + + vmovdqu -136(%rdi),%xmm2 + movl %ecx,32(%rsp) + cmovleq %rsp,%r8 + subq %r8,%rbx + movq %rbx,64(%rsp) + vmovdqu %xmm2,192(%rsp) + + movl -104(%rdi),%ecx + + movq -120(%rdi),%r9 + cmpl %edx,%ecx + + movq -112(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + + vmovdqu -96(%rdi),%xmm3 + movl %ecx,36(%rsp) + cmovleq %rsp,%r9 + subq %r9,%rbp + movq %rbp,72(%rsp) + vmovdqu %xmm3,208(%rsp) + + movl -64(%rdi),%ecx + + movq -80(%rdi),%r10 + cmpl %edx,%ecx + + movq -72(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + + vmovdqu -56(%rdi),%xmm4 + movl %ecx,40(%rsp) + cmovleq %rsp,%r10 + subq %r10,%rbp + movq %rbp,80(%rsp) + vmovdqu %xmm4,224(%rsp) + + movl -24(%rdi),%ecx + + movq -40(%rdi),%r11 + cmpl %edx,%ecx + + movq -32(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + + vmovdqu -16(%rdi),%xmm5 + movl %ecx,44(%rsp) + cmovleq %rsp,%r11 + subq %r11,%rbp + movq %rbp,88(%rsp) + vmovdqu %xmm5,240(%rsp) + + movl 16(%rdi),%ecx + + movq 0(%rdi),%r12 + cmpl %edx,%ecx + + movq 8(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + + vmovdqu 24(%rdi),%xmm6 + movl %ecx,48(%rsp) + cmovleq %rsp,%r12 + subq %r12,%rbp + movq %rbp,96(%rsp) + vmovdqu %xmm6,256(%rsp) + + movl 56(%rdi),%ecx + + movq 40(%rdi),%r13 + cmpl %edx,%ecx + + movq 48(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + + vmovdqu 64(%rdi),%xmm7 + movl %ecx,52(%rsp) + cmovleq %rsp,%r13 + subq %r13,%rbp + movq %rbp,104(%rsp) + vmovdqu %xmm7,272(%rsp) + + movl 96(%rdi),%ecx + + movq 80(%rdi),%r14 + cmpl %edx,%ecx + + movq 88(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + + vmovdqu 104(%rdi),%xmm8 + movl %ecx,56(%rsp) + cmovleq %rsp,%r14 + subq %r14,%rbp + movq %rbp,112(%rsp) + vmovdqu %xmm8,288(%rsp) + + movl 136(%rdi),%ecx + + movq 120(%rdi),%r15 + cmpl %edx,%ecx + + movq 128(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + + vmovdqu 144(%rdi),%xmm9 + movl %ecx,60(%rsp) + cmovleq %rsp,%r15 + subq %r15,%rbp + movq %rbp,120(%rsp) + vmovdqu %xmm9,304(%rsp) + testl %edx,%edx + jz .Ldec8x_done + + vmovups 16-120(%rsi),%xmm1 + vmovups 32-120(%rsi),%xmm0 + movl 240-120(%rsi),%eax + leaq 192+128(%rsp),%rbp + + vmovdqu (%r8),%xmm2 + vmovdqu (%r9),%xmm3 + vmovdqu (%r10),%xmm4 + vmovdqu (%r11),%xmm5 + vmovdqu (%r12),%xmm6 + vmovdqu (%r13),%xmm7 + vmovdqu (%r14),%xmm8 + vmovdqu (%r15),%xmm9 + vmovdqu %xmm2,0(%rbp) + vpxor %xmm15,%xmm2,%xmm2 + vmovdqu %xmm3,16(%rbp) + vpxor %xmm15,%xmm3,%xmm3 + vmovdqu %xmm4,32(%rbp) + vpxor %xmm15,%xmm4,%xmm4 + vmovdqu %xmm5,48(%rbp) + vpxor %xmm15,%xmm5,%xmm5 + vmovdqu %xmm6,64(%rbp) + vpxor %xmm15,%xmm6,%xmm6 + vmovdqu %xmm7,80(%rbp) + vpxor %xmm15,%xmm7,%xmm7 + vmovdqu %xmm8,96(%rbp) + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu %xmm9,112(%rbp) + vpxor %xmm15,%xmm9,%xmm9 + xorq $0x80,%rbp + movl $1,%ecx + jmp .Loop_dec8x + +.align 32 +.Loop_dec8x: + vaesdec %xmm1,%xmm2,%xmm2 + cmpl 32+0(%rsp),%ecx + vaesdec %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r8) + vaesdec %xmm1,%xmm4,%xmm4 + vaesdec %xmm1,%xmm5,%xmm5 + leaq (%r8,%rbx,1),%rbx + cmovgeq %rsp,%r8 + vaesdec %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm1,%xmm7,%xmm7 + subq %r8,%rbx + vaesdec %xmm1,%xmm8,%xmm8 + vmovdqu 16(%r8),%xmm10 + movq %rbx,64+0(%rsp) + vaesdec %xmm1,%xmm9,%xmm9 + vmovups -72(%rsi),%xmm1 + leaq 16(%r8,%rbx,1),%r8 + vmovdqu %xmm10,128(%rsp) + vaesdec %xmm0,%xmm2,%xmm2 + cmpl 32+4(%rsp),%ecx + movq 64+8(%rsp),%rbx + vaesdec %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r9) + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + leaq (%r9,%rbx,1),%rbx + cmovgeq %rsp,%r9 + vaesdec %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm0,%xmm7,%xmm7 + subq %r9,%rbx + vaesdec %xmm0,%xmm8,%xmm8 + vmovdqu 16(%r9),%xmm11 + movq %rbx,64+8(%rsp) + vaesdec %xmm0,%xmm9,%xmm9 + vmovups -56(%rsi),%xmm0 + leaq 16(%r9,%rbx,1),%r9 + vmovdqu %xmm11,144(%rsp) + vaesdec %xmm1,%xmm2,%xmm2 + cmpl 32+8(%rsp),%ecx + movq 64+16(%rsp),%rbx + vaesdec %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r10) + vaesdec %xmm1,%xmm4,%xmm4 + prefetcht0 15(%r8) + vaesdec %xmm1,%xmm5,%xmm5 + leaq (%r10,%rbx,1),%rbx + cmovgeq %rsp,%r10 + vaesdec %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm1,%xmm7,%xmm7 + subq %r10,%rbx + vaesdec %xmm1,%xmm8,%xmm8 + vmovdqu 16(%r10),%xmm12 + movq %rbx,64+16(%rsp) + vaesdec %xmm1,%xmm9,%xmm9 + vmovups -40(%rsi),%xmm1 + leaq 16(%r10,%rbx,1),%r10 + vmovdqu %xmm12,160(%rsp) + vaesdec %xmm0,%xmm2,%xmm2 + cmpl 32+12(%rsp),%ecx + movq 64+24(%rsp),%rbx + vaesdec %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r11) + vaesdec %xmm0,%xmm4,%xmm4 + prefetcht0 15(%r9) + vaesdec %xmm0,%xmm5,%xmm5 + leaq (%r11,%rbx,1),%rbx + cmovgeq %rsp,%r11 + vaesdec %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm0,%xmm7,%xmm7 + subq %r11,%rbx + vaesdec %xmm0,%xmm8,%xmm8 + vmovdqu 16(%r11),%xmm13 + movq %rbx,64+24(%rsp) + vaesdec %xmm0,%xmm9,%xmm9 + vmovups -24(%rsi),%xmm0 + leaq 16(%r11,%rbx,1),%r11 + vmovdqu %xmm13,176(%rsp) + vaesdec %xmm1,%xmm2,%xmm2 + cmpl 32+16(%rsp),%ecx + movq 64+32(%rsp),%rbx + vaesdec %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r12) + vaesdec %xmm1,%xmm4,%xmm4 + prefetcht0 15(%r10) + vaesdec %xmm1,%xmm5,%xmm5 + leaq (%r12,%rbx,1),%rbx + cmovgeq %rsp,%r12 + vaesdec %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm1,%xmm7,%xmm7 + subq %r12,%rbx + vaesdec %xmm1,%xmm8,%xmm8 + vmovdqu 16(%r12),%xmm10 + movq %rbx,64+32(%rsp) + vaesdec %xmm1,%xmm9,%xmm9 + vmovups -8(%rsi),%xmm1 + leaq 16(%r12,%rbx,1),%r12 + vaesdec %xmm0,%xmm2,%xmm2 + cmpl 32+20(%rsp),%ecx + movq 64+40(%rsp),%rbx + vaesdec %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r13) + vaesdec %xmm0,%xmm4,%xmm4 + prefetcht0 15(%r11) + vaesdec %xmm0,%xmm5,%xmm5 + leaq (%rbx,%r13,1),%rbx + cmovgeq %rsp,%r13 + vaesdec %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm0,%xmm7,%xmm7 + subq %r13,%rbx + vaesdec %xmm0,%xmm8,%xmm8 + vmovdqu 16(%r13),%xmm11 + movq %rbx,64+40(%rsp) + vaesdec %xmm0,%xmm9,%xmm9 + vmovups 8(%rsi),%xmm0 + leaq 16(%r13,%rbx,1),%r13 + vaesdec %xmm1,%xmm2,%xmm2 + cmpl 32+24(%rsp),%ecx + movq 64+48(%rsp),%rbx + vaesdec %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r14) + vaesdec %xmm1,%xmm4,%xmm4 + prefetcht0 15(%r12) + vaesdec %xmm1,%xmm5,%xmm5 + leaq (%r14,%rbx,1),%rbx + cmovgeq %rsp,%r14 + vaesdec %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm1,%xmm7,%xmm7 + subq %r14,%rbx + vaesdec %xmm1,%xmm8,%xmm8 + vmovdqu 16(%r14),%xmm12 + movq %rbx,64+48(%rsp) + vaesdec %xmm1,%xmm9,%xmm9 + vmovups 24(%rsi),%xmm1 + leaq 16(%r14,%rbx,1),%r14 + vaesdec %xmm0,%xmm2,%xmm2 + cmpl 32+28(%rsp),%ecx + movq 64+56(%rsp),%rbx + vaesdec %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r15) + vaesdec %xmm0,%xmm4,%xmm4 + prefetcht0 15(%r13) + vaesdec %xmm0,%xmm5,%xmm5 + leaq (%r15,%rbx,1),%rbx + cmovgeq %rsp,%r15 + vaesdec %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm0,%xmm7,%xmm7 + subq %r15,%rbx + vaesdec %xmm0,%xmm8,%xmm8 + vmovdqu 16(%r15),%xmm13 + movq %rbx,64+56(%rsp) + vaesdec %xmm0,%xmm9,%xmm9 + vmovups 40(%rsi),%xmm0 + leaq 16(%r15,%rbx,1),%r15 + vmovdqu 32(%rsp),%xmm14 + prefetcht0 15(%r14) + prefetcht0 15(%r15) + cmpl $11,%eax + jb .Ldec8x_tail + + vaesdec %xmm1,%xmm2,%xmm2 + vaesdec %xmm1,%xmm3,%xmm3 + vaesdec %xmm1,%xmm4,%xmm4 + vaesdec %xmm1,%xmm5,%xmm5 + vaesdec %xmm1,%xmm6,%xmm6 + vaesdec %xmm1,%xmm7,%xmm7 + vaesdec %xmm1,%xmm8,%xmm8 + vaesdec %xmm1,%xmm9,%xmm9 + vmovups 176-120(%rsi),%xmm1 + + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vaesdec %xmm0,%xmm8,%xmm8 + vaesdec %xmm0,%xmm9,%xmm9 + vmovups 192-120(%rsi),%xmm0 + je .Ldec8x_tail + + vaesdec %xmm1,%xmm2,%xmm2 + vaesdec %xmm1,%xmm3,%xmm3 + vaesdec %xmm1,%xmm4,%xmm4 + vaesdec %xmm1,%xmm5,%xmm5 + vaesdec %xmm1,%xmm6,%xmm6 + vaesdec %xmm1,%xmm7,%xmm7 + vaesdec %xmm1,%xmm8,%xmm8 + vaesdec %xmm1,%xmm9,%xmm9 + vmovups 208-120(%rsi),%xmm1 + + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vaesdec %xmm0,%xmm8,%xmm8 + vaesdec %xmm0,%xmm9,%xmm9 + vmovups 224-120(%rsi),%xmm0 + +.Ldec8x_tail: + vaesdec %xmm1,%xmm2,%xmm2 + vpxor %xmm15,%xmm15,%xmm15 + vaesdec %xmm1,%xmm3,%xmm3 + vaesdec %xmm1,%xmm4,%xmm4 + vpcmpgtd %xmm15,%xmm14,%xmm15 + vaesdec %xmm1,%xmm5,%xmm5 + vaesdec %xmm1,%xmm6,%xmm6 + vpaddd %xmm14,%xmm15,%xmm15 + vmovdqu 48(%rsp),%xmm14 + vaesdec %xmm1,%xmm7,%xmm7 + movq 64(%rsp),%rbx + vaesdec %xmm1,%xmm8,%xmm8 + vaesdec %xmm1,%xmm9,%xmm9 + vmovups 16-120(%rsi),%xmm1 + + vaesdeclast %xmm0,%xmm2,%xmm2 + vmovdqa %xmm15,32(%rsp) + vpxor %xmm15,%xmm15,%xmm15 + vaesdeclast %xmm0,%xmm3,%xmm3 + vpxor 0(%rbp),%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm4,%xmm4 + vpxor 16(%rbp),%xmm3,%xmm3 + vpcmpgtd %xmm15,%xmm14,%xmm15 + vaesdeclast %xmm0,%xmm5,%xmm5 + vpxor 32(%rbp),%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm6,%xmm6 + vpxor 48(%rbp),%xmm5,%xmm5 + vpaddd %xmm15,%xmm14,%xmm14 + vmovdqu -120(%rsi),%xmm15 + vaesdeclast %xmm0,%xmm7,%xmm7 + vpxor 64(%rbp),%xmm6,%xmm6 + vaesdeclast %xmm0,%xmm8,%xmm8 + vpxor 80(%rbp),%xmm7,%xmm7 + vmovdqa %xmm14,48(%rsp) + vaesdeclast %xmm0,%xmm9,%xmm9 + vpxor 96(%rbp),%xmm8,%xmm8 + vmovups 32-120(%rsi),%xmm0 + + vmovups %xmm2,-16(%r8) + subq %rbx,%r8 + vmovdqu 128+0(%rsp),%xmm2 + vpxor 112(%rbp),%xmm9,%xmm9 + vmovups %xmm3,-16(%r9) + subq 72(%rsp),%r9 + vmovdqu %xmm2,0(%rbp) + vpxor %xmm15,%xmm2,%xmm2 + vmovdqu 128+16(%rsp),%xmm3 + vmovups %xmm4,-16(%r10) + subq 80(%rsp),%r10 + vmovdqu %xmm3,16(%rbp) + vpxor %xmm15,%xmm3,%xmm3 + vmovdqu 128+32(%rsp),%xmm4 + vmovups %xmm5,-16(%r11) + subq 88(%rsp),%r11 + vmovdqu %xmm4,32(%rbp) + vpxor %xmm15,%xmm4,%xmm4 + vmovdqu 128+48(%rsp),%xmm5 + vmovups %xmm6,-16(%r12) + subq 96(%rsp),%r12 + vmovdqu %xmm5,48(%rbp) + vpxor %xmm15,%xmm5,%xmm5 + vmovdqu %xmm10,64(%rbp) + vpxor %xmm10,%xmm15,%xmm6 + vmovups %xmm7,-16(%r13) + subq 104(%rsp),%r13 + vmovdqu %xmm11,80(%rbp) + vpxor %xmm11,%xmm15,%xmm7 + vmovups %xmm8,-16(%r14) + subq 112(%rsp),%r14 + vmovdqu %xmm12,96(%rbp) + vpxor %xmm12,%xmm15,%xmm8 + vmovups %xmm9,-16(%r15) + subq 120(%rsp),%r15 + vmovdqu %xmm13,112(%rbp) + vpxor %xmm13,%xmm15,%xmm9 + + xorq $128,%rbp + decl %edx + jnz .Loop_dec8x + + movq 16(%rsp),%rax +.cfi_def_cfa %rax,8 + + + + + +.Ldec8x_done: + vzeroupper + movq -48(%rax),%r15 +.cfi_restore %r15 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Ldec8x_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx diff --git a/contrib/openssl-cmake/asm/crypto/aes/aesni-sha1-x86_64.s b/contrib/openssl-cmake/asm/crypto/aes/aesni-sha1-x86_64.s new file mode 100644 index 000000000000..bb8105cef0c0 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/aes/aesni-sha1-x86_64.s @@ -0,0 +1,3037 @@ +.text + + +.globl aesni_cbc_sha1_enc +.type aesni_cbc_sha1_enc,@function +.align 32 +aesni_cbc_sha1_enc: +.cfi_startproc + + movl OPENSSL_ia32cap_P+0(%rip),%r10d + movq OPENSSL_ia32cap_P+4(%rip),%r11 + btq $61,%r11 + jc aesni_cbc_sha1_enc_shaext + andl $268435456,%r11d + andl $1073741824,%r10d + orl %r11d,%r10d + cmpl $1342177280,%r10d + je aesni_cbc_sha1_enc_avx + jmp aesni_cbc_sha1_enc_ssse3 + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_cbc_sha1_enc,.-aesni_cbc_sha1_enc +.type aesni_cbc_sha1_enc_ssse3,@function +.align 32 +aesni_cbc_sha1_enc_ssse3: +.cfi_startproc + movq 8(%rsp),%r10 + + + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + leaq -104(%rsp),%rsp +.cfi_adjust_cfa_offset 104 + + + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + leaq 112(%rcx),%r15 + movdqu (%r8),%xmm2 + movq %r8,88(%rsp) + shlq $6,%r14 + subq %r12,%r13 + movl 240-112(%r15),%r8d + addq %r10,%r14 + + leaq K_XX_XX(%rip),%r11 + movl 0(%r9),%eax + movl 4(%r9),%ebx + movl 8(%r9),%ecx + movl 12(%r9),%edx + movl %ebx,%esi + movl 16(%r9),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + + movdqa 64(%r11),%xmm3 + movdqa 0(%r11),%xmm13 + movdqu 0(%r10),%xmm4 + movdqu 16(%r10),%xmm5 + movdqu 32(%r10),%xmm6 + movdqu 48(%r10),%xmm7 +.byte 102,15,56,0,227 +.byte 102,15,56,0,235 +.byte 102,15,56,0,243 + addq $64,%r10 + paddd %xmm13,%xmm4 +.byte 102,15,56,0,251 + paddd %xmm13,%xmm5 + paddd %xmm13,%xmm6 + movdqa %xmm4,0(%rsp) + psubd %xmm13,%xmm4 + movdqa %xmm5,16(%rsp) + psubd %xmm13,%xmm5 + movdqa %xmm6,32(%rsp) + psubd %xmm13,%xmm6 + movups -112(%r15),%xmm15 + movups 16-112(%r15),%xmm0 + jmp .Loop_ssse3 +.align 32 +.Loop_ssse3: + rorl $2,%ebx + movups 0(%r12),%xmm14 + xorps %xmm15,%xmm14 + xorps %xmm14,%xmm2 + movups -80(%r15),%xmm1 +.byte 102,15,56,220,208 + pshufd $238,%xmm4,%xmm8 + xorl %edx,%esi + movdqa %xmm7,%xmm12 + paddd %xmm7,%xmm13 + movl %eax,%edi + addl 0(%rsp),%ebp + punpcklqdq %xmm5,%xmm8 + xorl %ecx,%ebx + roll $5,%eax + addl %esi,%ebp + psrldq $4,%xmm12 + andl %ebx,%edi + xorl %ecx,%ebx + pxor %xmm4,%xmm8 + addl %eax,%ebp + rorl $7,%eax + pxor %xmm6,%xmm12 + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + pxor %xmm12,%xmm8 + xorl %ebx,%eax + roll $5,%ebp + movdqa %xmm13,48(%rsp) + addl %edi,%edx + movups -64(%r15),%xmm0 +.byte 102,15,56,220,209 + andl %eax,%esi + movdqa %xmm8,%xmm3 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + movdqa %xmm8,%xmm12 + xorl %ebx,%esi + pslldq $12,%xmm3 + paddd %xmm8,%xmm8 + movl %edx,%edi + addl 8(%rsp),%ecx + psrld $31,%xmm12 + xorl %eax,%ebp + roll $5,%edx + addl %esi,%ecx + movdqa %xmm3,%xmm13 + andl %ebp,%edi + xorl %eax,%ebp + psrld $30,%xmm3 + addl %edx,%ecx + rorl $7,%edx + por %xmm12,%xmm8 + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + movups -48(%r15),%xmm1 +.byte 102,15,56,220,208 + pslld $2,%xmm13 + pxor %xmm3,%xmm8 + xorl %ebp,%edx + movdqa 0(%r11),%xmm3 + roll $5,%ecx + addl %edi,%ebx + andl %edx,%esi + pxor %xmm13,%xmm8 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + pshufd $238,%xmm5,%xmm9 + xorl %ebp,%esi + movdqa %xmm8,%xmm13 + paddd %xmm8,%xmm3 + movl %ebx,%edi + addl 16(%rsp),%eax + punpcklqdq %xmm6,%xmm9 + xorl %edx,%ecx + roll $5,%ebx + addl %esi,%eax + psrldq $4,%xmm13 + andl %ecx,%edi + xorl %edx,%ecx + pxor %xmm5,%xmm9 + addl %ebx,%eax + rorl $7,%ebx + movups -32(%r15),%xmm0 +.byte 102,15,56,220,209 + pxor %xmm7,%xmm13 + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + pxor %xmm13,%xmm9 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm3,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + movdqa %xmm9,%xmm12 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + movdqa %xmm9,%xmm13 + xorl %ecx,%esi + pslldq $12,%xmm12 + paddd %xmm9,%xmm9 + movl %ebp,%edi + addl 24(%rsp),%edx + psrld $31,%xmm13 + xorl %ebx,%eax + roll $5,%ebp + addl %esi,%edx + movups -16(%r15),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm12,%xmm3 + andl %eax,%edi + xorl %ebx,%eax + psrld $30,%xmm12 + addl %ebp,%edx + rorl $7,%ebp + por %xmm13,%xmm9 + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + pslld $2,%xmm3 + pxor %xmm12,%xmm9 + xorl %eax,%ebp + movdqa 16(%r11),%xmm12 + roll $5,%edx + addl %edi,%ecx + andl %ebp,%esi + pxor %xmm3,%xmm9 + xorl %eax,%ebp + addl %edx,%ecx + rorl $7,%edx + pshufd $238,%xmm6,%xmm10 + xorl %eax,%esi + movdqa %xmm9,%xmm3 + paddd %xmm9,%xmm12 + movl %ecx,%edi + addl 32(%rsp),%ebx + movups 0(%r15),%xmm0 +.byte 102,15,56,220,209 + punpcklqdq %xmm7,%xmm10 + xorl %ebp,%edx + roll $5,%ecx + addl %esi,%ebx + psrldq $4,%xmm3 + andl %edx,%edi + xorl %ebp,%edx + pxor %xmm6,%xmm10 + addl %ecx,%ebx + rorl $7,%ecx + pxor %xmm8,%xmm3 + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + pxor %xmm3,%xmm10 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm12,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + movdqa %xmm10,%xmm13 + xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm10,%xmm3 + xorl %edx,%esi + pslldq $12,%xmm13 + paddd %xmm10,%xmm10 + movl %eax,%edi + addl 40(%rsp),%ebp + psrld $31,%xmm3 + xorl %ecx,%ebx + roll $5,%eax + addl %esi,%ebp + movdqa %xmm13,%xmm12 + andl %ebx,%edi + xorl %ecx,%ebx + psrld $30,%xmm13 + addl %eax,%ebp + rorl $7,%eax + por %xmm3,%xmm10 + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + pslld $2,%xmm12 + pxor %xmm13,%xmm10 + xorl %ebx,%eax + movdqa 16(%r11),%xmm13 + roll $5,%ebp + addl %edi,%edx + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 + andl %eax,%esi + pxor %xmm12,%xmm10 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + pshufd $238,%xmm7,%xmm11 + xorl %ebx,%esi + movdqa %xmm10,%xmm12 + paddd %xmm10,%xmm13 + movl %edx,%edi + addl 48(%rsp),%ecx + punpcklqdq %xmm8,%xmm11 + xorl %eax,%ebp + roll $5,%edx + addl %esi,%ecx + psrldq $4,%xmm12 + andl %ebp,%edi + xorl %eax,%ebp + pxor %xmm7,%xmm11 + addl %edx,%ecx + rorl $7,%edx + pxor %xmm9,%xmm12 + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + pxor %xmm12,%xmm11 + xorl %ebp,%edx + roll $5,%ecx + movdqa %xmm13,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + movdqa %xmm11,%xmm3 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + movdqa %xmm11,%xmm12 + xorl %ebp,%esi + pslldq $12,%xmm3 + paddd %xmm11,%xmm11 + movl %ebx,%edi + addl 56(%rsp),%eax + psrld $31,%xmm12 + xorl %edx,%ecx + roll $5,%ebx + addl %esi,%eax + movdqa %xmm3,%xmm13 + andl %ecx,%edi + xorl %edx,%ecx + psrld $30,%xmm3 + addl %ebx,%eax + rorl $7,%ebx + cmpl $11,%r8d + jb .Laesenclast1 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 + je .Laesenclast1 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 +.Laesenclast1: +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 + por %xmm12,%xmm11 + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + pslld $2,%xmm13 + pxor %xmm3,%xmm11 + xorl %ecx,%ebx + movdqa 16(%r11),%xmm3 + roll $5,%eax + addl %edi,%ebp + andl %ebx,%esi + pxor %xmm13,%xmm11 + pshufd $238,%xmm10,%xmm13 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + pxor %xmm8,%xmm4 + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + punpcklqdq %xmm11,%xmm13 + xorl %ebx,%eax + roll $5,%ebp + pxor %xmm5,%xmm4 + addl %esi,%edx + movups 16(%r12),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,0(%r12,%r13,1) + xorps %xmm14,%xmm2 + movups -80(%r15),%xmm1 +.byte 102,15,56,220,208 + andl %eax,%edi + movdqa %xmm3,%xmm12 + xorl %ebx,%eax + paddd %xmm11,%xmm3 + addl %ebp,%edx + pxor %xmm13,%xmm4 + rorl $7,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 4(%rsp),%ecx + movdqa %xmm4,%xmm13 + xorl %eax,%ebp + roll $5,%edx + movdqa %xmm3,48(%rsp) + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + pslld $2,%xmm4 + addl %edx,%ecx + rorl $7,%edx + psrld $30,%xmm13 + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + movups -64(%r15),%xmm0 +.byte 102,15,56,220,209 + por %xmm13,%xmm4 + xorl %ebp,%edx + roll $5,%ecx + pshufd $238,%xmm11,%xmm3 + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pxor %xmm9,%xmm5 + addl 16(%rsp),%ebp + movups -48(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%esi + punpcklqdq %xmm4,%xmm3 + movl %eax,%edi + roll $5,%eax + pxor %xmm6,%xmm5 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm12,%xmm13 + rorl $7,%ebx + paddd %xmm4,%xmm12 + addl %eax,%ebp + pxor %xmm3,%xmm5 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm5,%xmm3 + addl %edi,%edx + xorl %ebx,%esi + movdqa %xmm12,0(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 24(%rsp),%ecx + pslld $2,%xmm5 + xorl %eax,%esi + movl %edx,%edi + psrld $30,%xmm3 + roll $5,%edx + addl %esi,%ecx + movups -32(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%edi + rorl $7,%ebp + por %xmm3,%xmm5 + addl %edx,%ecx + addl 28(%rsp),%ebx + pshufd $238,%xmm4,%xmm12 + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + pxor %xmm10,%xmm6 + addl 32(%rsp),%eax + xorl %edx,%esi + punpcklqdq %xmm5,%xmm12 + movl %ebx,%edi + roll $5,%ebx + pxor %xmm7,%xmm6 + addl %esi,%eax + xorl %edx,%edi + movdqa 32(%r11),%xmm3 + rorl $7,%ecx + paddd %xmm5,%xmm13 + addl %ebx,%eax + pxor %xmm12,%xmm6 + addl 36(%rsp),%ebp + movups -16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm6,%xmm12 + addl %edi,%ebp + xorl %ecx,%esi + movdqa %xmm13,16(%rsp) + rorl $7,%ebx + addl %eax,%ebp + addl 40(%rsp),%edx + pslld $2,%xmm6 + xorl %ebx,%esi + movl %ebp,%edi + psrld $30,%xmm12 + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + por %xmm12,%xmm6 + addl %ebp,%edx + addl 44(%rsp),%ecx + pshufd $238,%xmm5,%xmm13 + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + movups 0(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + pxor %xmm11,%xmm7 + addl 48(%rsp),%ebx + xorl %ebp,%esi + punpcklqdq %xmm6,%xmm13 + movl %ecx,%edi + roll $5,%ecx + pxor %xmm8,%xmm7 + addl %esi,%ebx + xorl %ebp,%edi + movdqa %xmm3,%xmm12 + rorl $7,%edx + paddd %xmm6,%xmm3 + addl %ecx,%ebx + pxor %xmm13,%xmm7 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm7,%xmm13 + addl %edi,%eax + xorl %edx,%esi + movdqa %xmm3,32(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 56(%rsp),%ebp + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 + pslld $2,%xmm7 + xorl %ecx,%esi + movl %eax,%edi + psrld $30,%xmm13 + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + por %xmm13,%xmm7 + addl %eax,%ebp + addl 60(%rsp),%edx + pshufd $238,%xmm6,%xmm3 + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + pxor %xmm4,%xmm8 + addl 0(%rsp),%ecx + xorl %eax,%esi + punpcklqdq %xmm7,%xmm3 + movl %edx,%edi + roll $5,%edx + pxor %xmm9,%xmm8 + addl %esi,%ecx + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%edi + movdqa %xmm12,%xmm13 + rorl $7,%ebp + paddd %xmm7,%xmm12 + addl %edx,%ecx + pxor %xmm3,%xmm8 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm8,%xmm3 + addl %edi,%ebx + xorl %ebp,%esi + movdqa %xmm12,48(%rsp) + rorl $7,%edx + addl %ecx,%ebx + addl 8(%rsp),%eax + pslld $2,%xmm8 + xorl %edx,%esi + movl %ebx,%edi + psrld $30,%xmm3 + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + por %xmm3,%xmm8 + addl %ebx,%eax + addl 12(%rsp),%ebp + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + pshufd $238,%xmm7,%xmm12 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + pxor %xmm5,%xmm9 + addl 16(%rsp),%edx + xorl %ebx,%esi + punpcklqdq %xmm8,%xmm12 + movl %ebp,%edi + roll $5,%ebp + pxor %xmm10,%xmm9 + addl %esi,%edx + xorl %ebx,%edi + movdqa %xmm13,%xmm3 + rorl $7,%eax + paddd %xmm8,%xmm13 + addl %ebp,%edx + pxor %xmm12,%xmm9 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm9,%xmm12 + addl %edi,%ecx + cmpl $11,%r8d + jb .Laesenclast2 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 + je .Laesenclast2 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 +.Laesenclast2: +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 + xorl %eax,%esi + movdqa %xmm13,0(%rsp) + rorl $7,%ebp + addl %edx,%ecx + addl 24(%rsp),%ebx + pslld $2,%xmm9 + xorl %ebp,%esi + movl %ecx,%edi + psrld $30,%xmm12 + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + por %xmm12,%xmm9 + addl %ecx,%ebx + addl 28(%rsp),%eax + pshufd $238,%xmm8,%xmm13 + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + pxor %xmm6,%xmm10 + addl 32(%rsp),%ebp + movups 32(%r12),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,16(%r13,%r12,1) + xorps %xmm14,%xmm2 + movups -80(%r15),%xmm1 +.byte 102,15,56,220,208 + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + punpcklqdq %xmm9,%xmm13 + movl %eax,%edi + xorl %ecx,%esi + pxor %xmm11,%xmm10 + roll $5,%eax + addl %esi,%ebp + movdqa %xmm3,%xmm12 + xorl %ebx,%edi + paddd %xmm9,%xmm3 + xorl %ecx,%ebx + pxor %xmm13,%xmm10 + addl %eax,%ebp + addl 36(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movdqa %xmm10,%xmm13 + movl %ebp,%esi + xorl %ebx,%edi + movdqa %xmm3,16(%rsp) + roll $5,%ebp + addl %edi,%edx + movups -64(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%esi + pslld $2,%xmm10 + xorl %ebx,%eax + addl %ebp,%edx + psrld $30,%xmm13 + addl 40(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + por %xmm13,%xmm10 + rorl $7,%ebp + movl %edx,%edi + xorl %eax,%esi + roll $5,%edx + pshufd $238,%xmm9,%xmm3 + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movups -48(%r15),%xmm1 +.byte 102,15,56,220,208 + movl %ecx,%esi + xorl %ebp,%edi + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + pxor %xmm7,%xmm11 + addl 48(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + rorl $7,%ecx + punpcklqdq %xmm10,%xmm3 + movl %ebx,%edi + xorl %edx,%esi + pxor %xmm4,%xmm11 + roll $5,%ebx + addl %esi,%eax + movdqa 48(%r11),%xmm13 + xorl %ecx,%edi + paddd %xmm10,%xmm12 + xorl %edx,%ecx + pxor %xmm3,%xmm11 + addl %ebx,%eax + addl 52(%rsp),%ebp + movups -32(%r15),%xmm0 +.byte 102,15,56,220,209 + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movdqa %xmm11,%xmm3 + movl %eax,%esi + xorl %ecx,%edi + movdqa %xmm12,32(%rsp) + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + pslld $2,%xmm11 + xorl %ecx,%ebx + addl %eax,%ebp + psrld $30,%xmm3 + addl 56(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + por %xmm3,%xmm11 + rorl $7,%eax + movl %ebp,%edi + xorl %ebx,%esi + roll $5,%ebp + pshufd $238,%xmm10,%xmm12 + addl %esi,%edx + movups -16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movl %edx,%esi + xorl %eax,%edi + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + pxor %xmm8,%xmm4 + addl 0(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + rorl $7,%edx + movups 0(%r15),%xmm0 +.byte 102,15,56,220,209 + punpcklqdq %xmm11,%xmm12 + movl %ecx,%edi + xorl %ebp,%esi + pxor %xmm5,%xmm4 + roll $5,%ecx + addl %esi,%ebx + movdqa %xmm13,%xmm3 + xorl %edx,%edi + paddd %xmm11,%xmm13 + xorl %ebp,%edx + pxor %xmm12,%xmm4 + addl %ecx,%ebx + addl 4(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movdqa %xmm4,%xmm12 + movl %ebx,%esi + xorl %edx,%edi + movdqa %xmm13,48(%rsp) + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + pslld $2,%xmm4 + xorl %edx,%ecx + addl %ebx,%eax + psrld $30,%xmm12 + addl 8(%rsp),%ebp + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 + andl %ecx,%esi + xorl %edx,%ecx + por %xmm12,%xmm4 + rorl $7,%ebx + movl %eax,%edi + xorl %ecx,%esi + roll $5,%eax + pshufd $238,%xmm11,%xmm13 + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movl %ebp,%esi + xorl %ebx,%edi + roll $5,%ebp + addl %edi,%edx + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + pxor %xmm9,%xmm5 + addl 16(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%ebp + punpcklqdq %xmm4,%xmm13 + movl %edx,%edi + xorl %eax,%esi + pxor %xmm6,%xmm5 + roll $5,%edx + addl %esi,%ecx + movdqa %xmm3,%xmm12 + xorl %ebp,%edi + paddd %xmm4,%xmm3 + xorl %eax,%ebp + pxor %xmm13,%xmm5 + addl %edx,%ecx + addl 20(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm5,%xmm13 + movl %ecx,%esi + xorl %ebp,%edi + movdqa %xmm3,0(%rsp) + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + pslld $2,%xmm5 + xorl %ebp,%edx + addl %ecx,%ebx + psrld $30,%xmm13 + addl 24(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + por %xmm13,%xmm5 + rorl $7,%ecx + movl %ebx,%edi + xorl %edx,%esi + roll $5,%ebx + pshufd $238,%xmm4,%xmm3 + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + cmpl $11,%r8d + jb .Laesenclast3 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 + je .Laesenclast3 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 +.Laesenclast3: +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%edi + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + pxor %xmm10,%xmm6 + addl 32(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + punpcklqdq %xmm5,%xmm3 + movl %ebp,%edi + xorl %ebx,%esi + pxor %xmm7,%xmm6 + roll $5,%ebp + addl %esi,%edx + movups 48(%r12),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,32(%r13,%r12,1) + xorps %xmm14,%xmm2 + movups -80(%r15),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm12,%xmm13 + xorl %eax,%edi + paddd %xmm5,%xmm12 + xorl %ebx,%eax + pxor %xmm3,%xmm6 + addl %ebp,%edx + addl 36(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movdqa %xmm6,%xmm3 + movl %edx,%esi + xorl %eax,%edi + movdqa %xmm12,16(%rsp) + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + pslld $2,%xmm6 + xorl %eax,%ebp + addl %edx,%ecx + psrld $30,%xmm3 + addl 40(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + por %xmm3,%xmm6 + rorl $7,%edx + movups -64(%r15),%xmm0 +.byte 102,15,56,220,209 + movl %ecx,%edi + xorl %ebp,%esi + roll $5,%ecx + pshufd $238,%xmm5,%xmm12 + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + pxor %xmm11,%xmm7 + addl 48(%rsp),%ebp + movups -48(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%esi + punpcklqdq %xmm6,%xmm12 + movl %eax,%edi + roll $5,%eax + pxor %xmm8,%xmm7 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm13,%xmm3 + rorl $7,%ebx + paddd %xmm6,%xmm13 + addl %eax,%ebp + pxor %xmm12,%xmm7 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm7,%xmm12 + addl %edi,%edx + xorl %ebx,%esi + movdqa %xmm13,32(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 56(%rsp),%ecx + pslld $2,%xmm7 + xorl %eax,%esi + movl %edx,%edi + psrld $30,%xmm12 + roll $5,%edx + addl %esi,%ecx + movups -32(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%edi + rorl $7,%ebp + por %xmm12,%xmm7 + addl %edx,%ecx + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + paddd %xmm7,%xmm3 + addl %esi,%eax + xorl %edx,%edi + movdqa %xmm3,48(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + movups -16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + movups 0(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + cmpq %r14,%r10 + je .Ldone_ssse3 + movdqa 64(%r11),%xmm3 + movdqa 0(%r11),%xmm13 + movdqu 0(%r10),%xmm4 + movdqu 16(%r10),%xmm5 + movdqu 32(%r10),%xmm6 + movdqu 48(%r10),%xmm7 +.byte 102,15,56,0,227 + addq $64,%r10 + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi +.byte 102,15,56,0,235 + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + paddd %xmm13,%xmm4 + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + movdqa %xmm4,0(%rsp) + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + psubd %xmm13,%xmm4 + addl %ebx,%eax + addl 24(%rsp),%ebp + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi +.byte 102,15,56,0,243 + roll $5,%edx + addl %esi,%ecx + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%edi + rorl $7,%ebp + paddd %xmm13,%xmm5 + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + movdqa %xmm5,16(%rsp) + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + psubd %xmm13,%xmm5 + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi +.byte 102,15,56,0,251 + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + paddd %xmm13,%xmm6 + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + movdqa %xmm6,32(%rsp) + roll $5,%edx + addl %edi,%ecx + cmpl $11,%r8d + jb .Laesenclast4 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 + je .Laesenclast4 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 +.Laesenclast4: +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 + xorl %eax,%esi + rorl $7,%ebp + psubd %xmm13,%xmm6 + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + movups %xmm2,48(%r13,%r12,1) + leaq 64(%r12),%r12 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + addl 12(%r9),%edx + movl %eax,0(%r9) + addl 16(%r9),%ebp + movl %esi,4(%r9) + movl %esi,%ebx + movl %ecx,8(%r9) + movl %ecx,%edi + movl %edx,12(%r9) + xorl %edx,%edi + movl %ebp,16(%r9) + andl %edi,%esi + jmp .Loop_ssse3 + +.Ldone_ssse3: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + cmpl $11,%r8d + jb .Laesenclast5 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 + je .Laesenclast5 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 +.Laesenclast5: +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + movups %xmm2,48(%r13,%r12,1) + movq 88(%rsp),%r8 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + movl %eax,0(%r9) + addl 12(%r9),%edx + movl %esi,4(%r9) + addl 16(%r9),%ebp + movl %ecx,8(%r9) + movl %edx,12(%r9) + movl %ebp,16(%r9) + movups %xmm2,(%r8) + leaq 104(%rsp),%rsi +.cfi_def_cfa %rsi,56 + movq 0(%rsi),%r15 +.cfi_restore %r15 + movq 8(%rsi),%r14 +.cfi_restore %r14 + movq 16(%rsi),%r13 +.cfi_restore %r13 + movq 24(%rsi),%r12 +.cfi_restore %r12 + movq 32(%rsi),%rbp +.cfi_restore %rbp + movq 40(%rsi),%rbx +.cfi_restore %rbx + leaq 48(%rsi),%rsp +.cfi_def_cfa %rsp,8 +.Lepilogue_ssse3: + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3 +.type aesni_cbc_sha1_enc_avx,@function +.align 32 +aesni_cbc_sha1_enc_avx: +.cfi_startproc + movq 8(%rsp),%r10 + + + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + leaq -104(%rsp),%rsp +.cfi_adjust_cfa_offset 104 + + + vzeroall + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + leaq 112(%rcx),%r15 + vmovdqu (%r8),%xmm12 + movq %r8,88(%rsp) + shlq $6,%r14 + subq %r12,%r13 + movl 240-112(%r15),%r8d + addq %r10,%r14 + + leaq K_XX_XX(%rip),%r11 + movl 0(%r9),%eax + movl 4(%r9),%ebx + movl 8(%r9),%ecx + movl 12(%r9),%edx + movl %ebx,%esi + movl 16(%r9),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + + vmovdqa 64(%r11),%xmm6 + vmovdqa 0(%r11),%xmm10 + vmovdqu 0(%r10),%xmm0 + vmovdqu 16(%r10),%xmm1 + vmovdqu 32(%r10),%xmm2 + vmovdqu 48(%r10),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + addq $64,%r10 + vpshufb %xmm6,%xmm1,%xmm1 + vpshufb %xmm6,%xmm2,%xmm2 + vpshufb %xmm6,%xmm3,%xmm3 + vpaddd %xmm10,%xmm0,%xmm4 + vpaddd %xmm10,%xmm1,%xmm5 + vpaddd %xmm10,%xmm2,%xmm6 + vmovdqa %xmm4,0(%rsp) + vmovdqa %xmm5,16(%rsp) + vmovdqa %xmm6,32(%rsp) + vmovups -112(%r15),%xmm15 + vmovups 16-112(%r15),%xmm14 + jmp .Loop_avx +.align 32 +.Loop_avx: + shrdl $2,%ebx,%ebx + vmovdqu 0(%r12),%xmm13 + vpxor %xmm15,%xmm13,%xmm13 + vpxor %xmm13,%xmm12,%xmm12 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -80(%r15),%xmm15 + xorl %edx,%esi + vpalignr $8,%xmm0,%xmm1,%xmm4 + movl %eax,%edi + addl 0(%rsp),%ebp + vpaddd %xmm3,%xmm10,%xmm9 + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrldq $4,%xmm3,%xmm8 + addl %esi,%ebp + andl %ebx,%edi + vpxor %xmm0,%xmm4,%xmm4 + xorl %ecx,%ebx + addl %eax,%ebp + vpxor %xmm2,%xmm8,%xmm8 + shrdl $7,%eax,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + vpxor %xmm8,%xmm4,%xmm4 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vmovdqa %xmm9,48(%rsp) + addl %edi,%edx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -64(%r15),%xmm14 + andl %eax,%esi + vpsrld $31,%xmm4,%xmm8 + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%esi + vpslldq $12,%xmm4,%xmm9 + vpaddd %xmm4,%xmm4,%xmm4 + movl %edx,%edi + addl 8(%rsp),%ecx + xorl %eax,%ebp + shldl $5,%edx,%edx + vpor %xmm8,%xmm4,%xmm4 + vpsrld $30,%xmm9,%xmm8 + addl %esi,%ecx + andl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + vpslld $2,%xmm9,%xmm9 + vpxor %xmm8,%xmm4,%xmm4 + shrdl $7,%edx,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -48(%r15),%xmm15 + vpxor %xmm9,%xmm4,%xmm4 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + addl %edi,%ebx + andl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %ebp,%esi + vpalignr $8,%xmm1,%xmm2,%xmm5 + movl %ebx,%edi + addl 16(%rsp),%eax + vpaddd %xmm4,%xmm10,%xmm9 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrldq $4,%xmm4,%xmm8 + addl %esi,%eax + andl %ecx,%edi + vpxor %xmm1,%xmm5,%xmm5 + xorl %edx,%ecx + addl %ebx,%eax + vpxor %xmm3,%xmm8,%xmm8 + shrdl $7,%ebx,%ebx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -32(%r15),%xmm14 + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + vpxor %xmm8,%xmm5,%xmm5 + xorl %ecx,%ebx + shldl $5,%eax,%eax + vmovdqa %xmm9,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + vpsrld $31,%xmm5,%xmm8 + xorl %ecx,%ebx + addl %eax,%ebp + shrdl $7,%eax,%eax + xorl %ecx,%esi + vpslldq $12,%xmm5,%xmm9 + vpaddd %xmm5,%xmm5,%xmm5 + movl %ebp,%edi + addl 24(%rsp),%edx + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vpor %xmm8,%xmm5,%xmm5 + vpsrld $30,%xmm9,%xmm8 + addl %esi,%edx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -16(%r15),%xmm15 + andl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + vpslld $2,%xmm9,%xmm9 + vpxor %xmm8,%xmm5,%xmm5 + shrdl $7,%ebp,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + vpxor %xmm9,%xmm5,%xmm5 + xorl %eax,%ebp + shldl $5,%edx,%edx + vmovdqa 16(%r11),%xmm10 + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + vpalignr $8,%xmm2,%xmm3,%xmm6 + movl %ecx,%edi + addl 32(%rsp),%ebx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 0(%r15),%xmm14 + vpaddd %xmm5,%xmm10,%xmm9 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + vpsrldq $4,%xmm5,%xmm8 + addl %esi,%ebx + andl %edx,%edi + vpxor %xmm2,%xmm6,%xmm6 + xorl %ebp,%edx + addl %ecx,%ebx + vpxor %xmm4,%xmm8,%xmm8 + shrdl $7,%ecx,%ecx + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + vpxor %xmm8,%xmm6,%xmm6 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vmovdqa %xmm9,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + vpsrld $31,%xmm6,%xmm8 + xorl %edx,%ecx + addl %ebx,%eax + shrdl $7,%ebx,%ebx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 16(%r15),%xmm15 + xorl %edx,%esi + vpslldq $12,%xmm6,%xmm9 + vpaddd %xmm6,%xmm6,%xmm6 + movl %eax,%edi + addl 40(%rsp),%ebp + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpor %xmm8,%xmm6,%xmm6 + vpsrld $30,%xmm9,%xmm8 + addl %esi,%ebp + andl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + vpslld $2,%xmm9,%xmm9 + vpxor %xmm8,%xmm6,%xmm6 + shrdl $7,%eax,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + vpxor %xmm9,%xmm6,%xmm6 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + addl %edi,%edx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 32(%r15),%xmm14 + andl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%esi + vpalignr $8,%xmm3,%xmm4,%xmm7 + movl %edx,%edi + addl 48(%rsp),%ecx + vpaddd %xmm6,%xmm10,%xmm9 + xorl %eax,%ebp + shldl $5,%edx,%edx + vpsrldq $4,%xmm6,%xmm8 + addl %esi,%ecx + andl %ebp,%edi + vpxor %xmm3,%xmm7,%xmm7 + xorl %eax,%ebp + addl %edx,%ecx + vpxor %xmm5,%xmm8,%xmm8 + shrdl $7,%edx,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 48(%r15),%xmm15 + vpxor %xmm8,%xmm7,%xmm7 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + vmovdqa %xmm9,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + vpsrld $31,%xmm7,%xmm8 + xorl %ebp,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %ebp,%esi + vpslldq $12,%xmm7,%xmm9 + vpaddd %xmm7,%xmm7,%xmm7 + movl %ebx,%edi + addl 56(%rsp),%eax + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpor %xmm8,%xmm7,%xmm7 + vpsrld $30,%xmm9,%xmm8 + addl %esi,%eax + andl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + vpslld $2,%xmm9,%xmm9 + vpxor %xmm8,%xmm7,%xmm7 + shrdl $7,%ebx,%ebx + cmpl $11,%r8d + jb .Lvaesenclast6 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 64(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 80(%r15),%xmm15 + je .Lvaesenclast6 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 96(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 112(%r15),%xmm15 +.Lvaesenclast6: + vaesenclast %xmm15,%xmm12,%xmm12 + vmovups -112(%r15),%xmm15 + vmovups 16-112(%r15),%xmm14 + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + vpxor %xmm9,%xmm7,%xmm7 + xorl %ecx,%ebx + shldl $5,%eax,%eax + addl %edi,%ebp + andl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm6,%xmm7,%xmm8 + vpxor %xmm4,%xmm0,%xmm0 + shrdl $7,%eax,%eax + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + vpxor %xmm1,%xmm0,%xmm0 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vpaddd %xmm7,%xmm10,%xmm9 + addl %esi,%edx + vmovdqu 16(%r12),%xmm13 + vpxor %xmm15,%xmm13,%xmm13 + vmovups %xmm12,0(%r12,%r13,1) + vpxor %xmm13,%xmm12,%xmm12 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -80(%r15),%xmm15 + andl %eax,%edi + vpxor %xmm8,%xmm0,%xmm0 + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%edi + vpsrld $30,%xmm0,%xmm8 + vmovdqa %xmm9,48(%rsp) + movl %edx,%esi + addl 4(%rsp),%ecx + xorl %eax,%ebp + shldl $5,%edx,%edx + vpslld $2,%xmm0,%xmm0 + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -64(%r15),%xmm14 + vpor %xmm8,%xmm0,%xmm0 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm7,%xmm0,%xmm8 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -48(%r15),%xmm15 + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + vpxor %xmm2,%xmm1,%xmm1 + addl %esi,%ebp + xorl %ecx,%edi + vpaddd %xmm0,%xmm10,%xmm9 + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpxor %xmm8,%xmm1,%xmm1 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpsrld $30,%xmm1,%xmm8 + vmovdqa %xmm9,0(%rsp) + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpslld $2,%xmm1,%xmm1 + addl 24(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -32(%r15),%xmm14 + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpor %xmm8,%xmm1,%xmm1 + addl 28(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpalignr $8,%xmm0,%xmm1,%xmm8 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + vpxor %xmm3,%xmm2,%xmm2 + addl %esi,%eax + xorl %edx,%edi + vpaddd %xmm1,%xmm10,%xmm9 + vmovdqa 32(%r11),%xmm10 + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpxor %xmm8,%xmm2,%xmm2 + addl 36(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -16(%r15),%xmm15 + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + vpsrld $30,%xmm2,%xmm8 + vmovdqa %xmm9,16(%rsp) + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpslld $2,%xmm2,%xmm2 + addl 40(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpor %xmm8,%xmm2,%xmm2 + addl 44(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 0(%r15),%xmm14 + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpalignr $8,%xmm1,%xmm2,%xmm8 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpxor %xmm4,%xmm3,%xmm3 + addl %esi,%ebx + xorl %ebp,%edi + vpaddd %xmm2,%xmm10,%xmm9 + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpxor %xmm8,%xmm3,%xmm3 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + vpsrld $30,%xmm3,%xmm8 + vmovdqa %xmm9,32(%rsp) + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpslld $2,%xmm3,%xmm3 + addl 56(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 16(%r15),%xmm15 + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpor %xmm8,%xmm3,%xmm3 + addl 60(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpalignr $8,%xmm2,%xmm3,%xmm8 + vpxor %xmm0,%xmm4,%xmm4 + addl 0(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + vpxor %xmm5,%xmm4,%xmm4 + addl %esi,%ecx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 32(%r15),%xmm14 + xorl %eax,%edi + vpaddd %xmm3,%xmm10,%xmm9 + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpxor %xmm8,%xmm4,%xmm4 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + vpsrld $30,%xmm4,%xmm8 + vmovdqa %xmm9,48(%rsp) + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpslld $2,%xmm4,%xmm4 + addl 8(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpor %xmm8,%xmm4,%xmm4 + addl 12(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 48(%r15),%xmm15 + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm3,%xmm4,%xmm8 + vpxor %xmm1,%xmm5,%xmm5 + addl 16(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpxor %xmm6,%xmm5,%xmm5 + addl %esi,%edx + xorl %ebx,%edi + vpaddd %xmm4,%xmm10,%xmm9 + shrdl $7,%eax,%eax + addl %ebp,%edx + vpxor %xmm8,%xmm5,%xmm5 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + vpsrld $30,%xmm5,%xmm8 + vmovdqa %xmm9,0(%rsp) + addl %edi,%ecx + cmpl $11,%r8d + jb .Lvaesenclast7 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 64(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 80(%r15),%xmm15 + je .Lvaesenclast7 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 96(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 112(%r15),%xmm15 +.Lvaesenclast7: + vaesenclast %xmm15,%xmm12,%xmm12 + vmovups -112(%r15),%xmm15 + vmovups 16-112(%r15),%xmm14 + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpslld $2,%xmm5,%xmm5 + addl 24(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpor %xmm8,%xmm5,%xmm5 + addl 28(%rsp),%eax + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm4,%xmm5,%xmm8 + vpxor %xmm2,%xmm6,%xmm6 + addl 32(%rsp),%ebp + vmovdqu 32(%r12),%xmm13 + vpxor %xmm15,%xmm13,%xmm13 + vmovups %xmm12,16(%r13,%r12,1) + vpxor %xmm13,%xmm12,%xmm12 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -80(%r15),%xmm15 + andl %ecx,%esi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + vpxor %xmm7,%xmm6,%xmm6 + movl %eax,%edi + xorl %ecx,%esi + vpaddd %xmm5,%xmm10,%xmm9 + shldl $5,%eax,%eax + addl %esi,%ebp + vpxor %xmm8,%xmm6,%xmm6 + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 36(%rsp),%edx + vpsrld $30,%xmm6,%xmm8 + vmovdqa %xmm9,16(%rsp) + andl %ebx,%edi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%esi + vpslld $2,%xmm6,%xmm6 + xorl %ebx,%edi + shldl $5,%ebp,%ebp + addl %edi,%edx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -64(%r15),%xmm14 + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + addl 40(%rsp),%ecx + andl %eax,%esi + vpor %xmm8,%xmm6,%xmm6 + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%edi + xorl %eax,%esi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + shrdl $7,%edx,%edx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -48(%r15),%xmm15 + movl %ecx,%esi + xorl %ebp,%edi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + vpalignr $8,%xmm5,%xmm6,%xmm8 + vpxor %xmm3,%xmm7,%xmm7 + addl 48(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + vpxor %xmm0,%xmm7,%xmm7 + movl %ebx,%edi + xorl %edx,%esi + vpaddd %xmm6,%xmm10,%xmm9 + vmovdqa 48(%r11),%xmm10 + shldl $5,%ebx,%ebx + addl %esi,%eax + vpxor %xmm8,%xmm7,%xmm7 + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%rsp),%ebp + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -32(%r15),%xmm14 + vpsrld $30,%xmm7,%xmm8 + vmovdqa %xmm9,32(%rsp) + andl %ecx,%edi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + vpslld $2,%xmm7,%xmm7 + xorl %ecx,%edi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + addl 56(%rsp),%edx + andl %ebx,%esi + vpor %xmm8,%xmm7,%xmm7 + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%edi + xorl %ebx,%esi + shldl $5,%ebp,%ebp + addl %esi,%edx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -16(%r15),%xmm15 + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%esi + xorl %eax,%edi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + vpalignr $8,%xmm6,%xmm7,%xmm8 + vpxor %xmm4,%xmm0,%xmm0 + addl 0(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + shrdl $7,%edx,%edx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 0(%r15),%xmm14 + vpxor %xmm1,%xmm0,%xmm0 + movl %ecx,%edi + xorl %ebp,%esi + vpaddd %xmm7,%xmm10,%xmm9 + shldl $5,%ecx,%ecx + addl %esi,%ebx + vpxor %xmm8,%xmm0,%xmm0 + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 4(%rsp),%eax + vpsrld $30,%xmm0,%xmm8 + vmovdqa %xmm9,48(%rsp) + andl %edx,%edi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + vpslld $2,%xmm0,%xmm0 + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 16(%r15),%xmm15 + andl %ecx,%esi + vpor %xmm8,%xmm0,%xmm0 + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%edi + xorl %ecx,%esi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%esi + xorl %ebx,%edi + shldl $5,%ebp,%ebp + addl %edi,%edx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 32(%r15),%xmm14 + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + vpalignr $8,%xmm7,%xmm0,%xmm8 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + vpxor %xmm2,%xmm1,%xmm1 + movl %edx,%edi + xorl %eax,%esi + vpaddd %xmm0,%xmm10,%xmm9 + shldl $5,%edx,%edx + addl %esi,%ecx + vpxor %xmm8,%xmm1,%xmm1 + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 20(%rsp),%ebx + vpsrld $30,%xmm1,%xmm8 + vmovdqa %xmm9,0(%rsp) + andl %ebp,%edi + xorl %eax,%ebp + shrdl $7,%edx,%edx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 48(%r15),%xmm15 + movl %ecx,%esi + vpslld $2,%xmm1,%xmm1 + xorl %ebp,%edi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + addl 24(%rsp),%eax + andl %edx,%esi + vpor %xmm8,%xmm1,%xmm1 + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%edi + xorl %edx,%esi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + cmpl $11,%r8d + jb .Lvaesenclast8 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 64(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 80(%r15),%xmm15 + je .Lvaesenclast8 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 96(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 112(%r15),%xmm15 +.Lvaesenclast8: + vaesenclast %xmm15,%xmm12,%xmm12 + vmovups -112(%r15),%xmm15 + vmovups 16-112(%r15),%xmm14 + andl %ecx,%edi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + xorl %ecx,%edi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm0,%xmm1,%xmm8 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + vpxor %xmm3,%xmm2,%xmm2 + movl %ebp,%edi + xorl %ebx,%esi + vpaddd %xmm1,%xmm10,%xmm9 + shldl $5,%ebp,%ebp + addl %esi,%edx + vmovdqu 48(%r12),%xmm13 + vpxor %xmm15,%xmm13,%xmm13 + vmovups %xmm12,32(%r13,%r12,1) + vpxor %xmm13,%xmm12,%xmm12 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -80(%r15),%xmm15 + vpxor %xmm8,%xmm2,%xmm2 + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 36(%rsp),%ecx + vpsrld $30,%xmm2,%xmm8 + vmovdqa %xmm9,16(%rsp) + andl %eax,%edi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%esi + vpslld $2,%xmm2,%xmm2 + xorl %eax,%edi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + addl 40(%rsp),%ebx + andl %ebp,%esi + vpor %xmm8,%xmm2,%xmm2 + xorl %eax,%ebp + shrdl $7,%edx,%edx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -64(%r15),%xmm14 + movl %ecx,%edi + xorl %ebp,%esi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + vpalignr $8,%xmm1,%xmm2,%xmm8 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -48(%r15),%xmm15 + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + vpxor %xmm4,%xmm3,%xmm3 + addl %esi,%ebp + xorl %ecx,%edi + vpaddd %xmm2,%xmm10,%xmm9 + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpxor %xmm8,%xmm3,%xmm3 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpsrld $30,%xmm3,%xmm8 + vmovdqa %xmm9,32(%rsp) + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpslld $2,%xmm3,%xmm3 + addl 56(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -32(%r15),%xmm14 + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpor %xmm8,%xmm3,%xmm3 + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + vpaddd %xmm3,%xmm10,%xmm9 + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + vmovdqa %xmm9,48(%rsp) + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -16(%r15),%xmm15 + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 0(%r15),%xmm14 + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + cmpq %r14,%r10 + je .Ldone_avx + vmovdqa 64(%r11),%xmm9 + vmovdqa 0(%r11),%xmm10 + vmovdqu 0(%r10),%xmm0 + vmovdqu 16(%r10),%xmm1 + vmovdqu 32(%r10),%xmm2 + vmovdqu 48(%r10),%xmm3 + vpshufb %xmm9,%xmm0,%xmm0 + addq $64,%r10 + addl 16(%rsp),%ebx + xorl %ebp,%esi + vpshufb %xmm9,%xmm1,%xmm1 + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpaddd %xmm10,%xmm0,%xmm8 + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vmovdqa %xmm8,0(%rsp) + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 16(%r15),%xmm15 + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + vpshufb %xmm9,%xmm2,%xmm2 + movl %edx,%edi + shldl $5,%edx,%edx + vpaddd %xmm10,%xmm1,%xmm8 + addl %esi,%ecx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 32(%r15),%xmm14 + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vmovdqa %xmm8,16(%rsp) + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 48(%r15),%xmm15 + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + vpshufb %xmm9,%xmm3,%xmm3 + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpaddd %xmm10,%xmm2,%xmm8 + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + vmovdqa %xmm8,32(%rsp) + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + cmpl $11,%r8d + jb .Lvaesenclast9 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 64(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 80(%r15),%xmm15 + je .Lvaesenclast9 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 96(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 112(%r15),%xmm15 +.Lvaesenclast9: + vaesenclast %xmm15,%xmm12,%xmm12 + vmovups -112(%r15),%xmm15 + vmovups 16-112(%r15),%xmm14 + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vmovups %xmm12,48(%r13,%r12,1) + leaq 64(%r12),%r12 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + addl 12(%r9),%edx + movl %eax,0(%r9) + addl 16(%r9),%ebp + movl %esi,4(%r9) + movl %esi,%ebx + movl %ecx,8(%r9) + movl %ecx,%edi + movl %edx,12(%r9) + xorl %edx,%edi + movl %ebp,16(%r9) + andl %edi,%esi + jmp .Loop_avx + +.Ldone_avx: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 16(%r15),%xmm15 + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 32(%r15),%xmm14 + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 48(%r15),%xmm15 + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + cmpl $11,%r8d + jb .Lvaesenclast10 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 64(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 80(%r15),%xmm15 + je .Lvaesenclast10 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 96(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 112(%r15),%xmm15 +.Lvaesenclast10: + vaesenclast %xmm15,%xmm12,%xmm12 + vmovups -112(%r15),%xmm15 + vmovups 16-112(%r15),%xmm14 + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vmovups %xmm12,48(%r13,%r12,1) + movq 88(%rsp),%r8 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + movl %eax,0(%r9) + addl 12(%r9),%edx + movl %esi,4(%r9) + addl 16(%r9),%ebp + movl %ecx,8(%r9) + movl %edx,12(%r9) + movl %ebp,16(%r9) + vmovups %xmm12,(%r8) + vzeroall + leaq 104(%rsp),%rsi +.cfi_def_cfa %rsi,56 + movq 0(%rsi),%r15 +.cfi_restore %r15 + movq 8(%rsi),%r14 +.cfi_restore %r14 + movq 16(%rsi),%r13 +.cfi_restore %r13 + movq 24(%rsi),%r12 +.cfi_restore %r12 + movq 32(%rsi),%rbp +.cfi_restore %rbp + movq 40(%rsi),%rbx +.cfi_restore %rbx + leaq 48(%rsi),%rsp +.cfi_def_cfa %rsp,8 +.Lepilogue_avx: + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_cbc_sha1_enc_avx,.-aesni_cbc_sha1_enc_avx +.section .rodata +.align 64 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 + +.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +.previous +.type aesni_cbc_sha1_enc_shaext,@function +.align 32 +aesni_cbc_sha1_enc_shaext: +.cfi_startproc + movq 8(%rsp),%r10 + movdqu (%r9),%xmm8 + movd 16(%r9),%xmm9 + movdqa K_XX_XX+80(%rip),%xmm7 + + movl 240(%rcx),%r11d + subq %rdi,%rsi + movups (%rcx),%xmm15 + movups (%r8),%xmm2 + movups 16(%rcx),%xmm0 + leaq 112(%rcx),%rcx + + pshufd $27,%xmm8,%xmm8 + pshufd $27,%xmm9,%xmm9 + jmp .Loop_shaext + +.align 16 +.Loop_shaext: + movups 0(%rdi),%xmm14 + xorps %xmm15,%xmm14 + xorps %xmm14,%xmm2 + movups -80(%rcx),%xmm1 +.byte 102,15,56,220,208 + movdqu (%r10),%xmm3 + movdqa %xmm9,%xmm12 +.byte 102,15,56,0,223 + movdqu 16(%r10),%xmm4 + movdqa %xmm8,%xmm11 + movups -64(%rcx),%xmm0 +.byte 102,15,56,220,209 +.byte 102,15,56,0,231 + + paddd %xmm3,%xmm9 + movdqu 32(%r10),%xmm5 + leaq 64(%r10),%r10 + pxor %xmm12,%xmm3 + movups -48(%rcx),%xmm1 +.byte 102,15,56,220,208 + pxor %xmm12,%xmm3 + movdqa %xmm8,%xmm10 +.byte 102,15,56,0,239 +.byte 69,15,58,204,193,0 +.byte 68,15,56,200,212 + movups -32(%rcx),%xmm0 +.byte 102,15,56,220,209 +.byte 15,56,201,220 + movdqu -16(%r10),%xmm6 + movdqa %xmm8,%xmm9 +.byte 102,15,56,0,247 + movups -16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 69,15,58,204,194,0 +.byte 68,15,56,200,205 + pxor %xmm5,%xmm3 +.byte 15,56,201,229 + movups 0(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,0 +.byte 68,15,56,200,214 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,222 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + movups 32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,0 +.byte 68,15,56,200,203 + movups 48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,227 + pxor %xmm3,%xmm5 +.byte 15,56,201,243 + cmpl $11,%r11d + jb .Laesenclast11 + movups 64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 80(%rcx),%xmm1 +.byte 102,15,56,220,208 + je .Laesenclast11 + movups 96(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 112(%rcx),%xmm1 +.byte 102,15,56,220,208 +.Laesenclast11: +.byte 102,15,56,221,209 + movups 16-112(%rcx),%xmm0 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,0 +.byte 68,15,56,200,212 + movups 16(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,0(%rsi,%rdi,1) + xorps %xmm14,%xmm2 + movups -80(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,236 + pxor %xmm4,%xmm6 +.byte 15,56,201,220 + movups -64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,1 +.byte 68,15,56,200,205 + movups -48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,245 + pxor %xmm5,%xmm3 +.byte 15,56,201,229 + movups -32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,1 +.byte 68,15,56,200,214 + movups -16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,222 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + movups 0(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,1 +.byte 68,15,56,200,203 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,227 + pxor %xmm3,%xmm5 +.byte 15,56,201,243 + movups 32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,1 +.byte 68,15,56,200,212 + movups 48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,236 + pxor %xmm4,%xmm6 +.byte 15,56,201,220 + cmpl $11,%r11d + jb .Laesenclast12 + movups 64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 80(%rcx),%xmm1 +.byte 102,15,56,220,208 + je .Laesenclast12 + movups 96(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 112(%rcx),%xmm1 +.byte 102,15,56,220,208 +.Laesenclast12: +.byte 102,15,56,221,209 + movups 16-112(%rcx),%xmm0 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,1 +.byte 68,15,56,200,205 + movups 32(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,16(%rsi,%rdi,1) + xorps %xmm14,%xmm2 + movups -80(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,245 + pxor %xmm5,%xmm3 +.byte 15,56,201,229 + movups -64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,2 +.byte 68,15,56,200,214 + movups -48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,222 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + movups -32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,2 +.byte 68,15,56,200,203 + movups -16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,227 + pxor %xmm3,%xmm5 +.byte 15,56,201,243 + movups 0(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,2 +.byte 68,15,56,200,212 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,236 + pxor %xmm4,%xmm6 +.byte 15,56,201,220 + movups 32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,2 +.byte 68,15,56,200,205 + movups 48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,245 + pxor %xmm5,%xmm3 +.byte 15,56,201,229 + cmpl $11,%r11d + jb .Laesenclast13 + movups 64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 80(%rcx),%xmm1 +.byte 102,15,56,220,208 + je .Laesenclast13 + movups 96(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 112(%rcx),%xmm1 +.byte 102,15,56,220,208 +.Laesenclast13: +.byte 102,15,56,221,209 + movups 16-112(%rcx),%xmm0 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,2 +.byte 68,15,56,200,214 + movups 48(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,32(%rsi,%rdi,1) + xorps %xmm14,%xmm2 + movups -80(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,222 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + movups -64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,3 +.byte 68,15,56,200,203 + movups -48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,227 + pxor %xmm3,%xmm5 +.byte 15,56,201,243 + movups -32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,3 +.byte 68,15,56,200,212 +.byte 15,56,202,236 + pxor %xmm4,%xmm6 + movups -16(%rcx),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,3 +.byte 68,15,56,200,205 +.byte 15,56,202,245 + movups 0(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm12,%xmm5 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,3 +.byte 68,15,56,200,214 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,3 +.byte 68,15,56,200,205 + movups 32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 48(%rcx),%xmm1 +.byte 102,15,56,220,208 + cmpl $11,%r11d + jb .Laesenclast14 + movups 64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 80(%rcx),%xmm1 +.byte 102,15,56,220,208 + je .Laesenclast14 + movups 96(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 112(%rcx),%xmm1 +.byte 102,15,56,220,208 +.Laesenclast14: +.byte 102,15,56,221,209 + movups 16-112(%rcx),%xmm0 + decq %rdx + + paddd %xmm11,%xmm8 + movups %xmm2,48(%rsi,%rdi,1) + leaq 64(%rdi),%rdi + jnz .Loop_shaext + + pshufd $27,%xmm8,%xmm8 + pshufd $27,%xmm9,%xmm9 + movups %xmm2,(%r8) + movdqu %xmm8,(%r9) + movd %xmm9,16(%r9) + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_cbc_sha1_enc_shaext,.-aesni_cbc_sha1_enc_shaext diff --git a/contrib/openssl-cmake/asm/crypto/aes/aesni-sha256-x86_64.s b/contrib/openssl-cmake/asm/crypto/aes/aesni-sha256-x86_64.s new file mode 100644 index 000000000000..fb156de5edae --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/aes/aesni-sha256-x86_64.s @@ -0,0 +1,4437 @@ +.text + + +.globl aesni_cbc_sha256_enc +.type aesni_cbc_sha256_enc,@function +.align 16 +aesni_cbc_sha256_enc: +.cfi_startproc + leaq OPENSSL_ia32cap_P(%rip),%r11 + movl $1,%eax + cmpq $0,%rdi + je .Lprobe + movl 0(%r11),%eax + movq 4(%r11),%r10 + btq $61,%r10 + jc aesni_cbc_sha256_enc_shaext + movq %r10,%r11 + shrq $32,%r11 + + testl $2048,%r10d + jnz aesni_cbc_sha256_enc_xop + andl $296,%r11d + cmpl $296,%r11d + je aesni_cbc_sha256_enc_avx2 + andl $268435456,%r10d + jnz aesni_cbc_sha256_enc_avx + ud2 + xorl %eax,%eax + cmpq $0,%rdi + je .Lprobe + ud2 +.Lprobe: + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_cbc_sha256_enc,.-aesni_cbc_sha256_enc + +.section .rodata +.align 64 +.type K256,@object +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0,0,0,0, 0,0,0,0, -1,-1,-1,-1 +.long 0,0,0,0, 0,0,0,0 +.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +.previous +.type aesni_cbc_sha256_enc_xop,@function +.align 64 +aesni_cbc_sha256_enc_xop: +.cfi_startproc +.Lxop_shortcut: + movq 8(%rsp),%r10 + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + subq $128,%rsp + andq $-64,%rsp + + shlq $6,%rdx + subq %rdi,%rsi + subq %rdi,%r10 + addq %rdi,%rdx + + + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + + movq %r8,64+32(%rsp) + movq %r9,64+40(%rsp) + movq %r10,64+48(%rsp) + movq %rax,120(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08 +.Lprologue_xop: + vzeroall + + movq %rdi,%r12 + leaq 128(%rcx),%rdi + leaq K256+544(%rip),%r13 + movl 240-128(%rdi),%r14d + movq %r9,%r15 + movq %r10,%rsi + vmovdqu (%r8),%xmm8 + subq $9,%r14 + + movl 0(%r15),%eax + movl 4(%r15),%ebx + movl 8(%r15),%ecx + movl 12(%r15),%edx + movl 16(%r15),%r8d + movl 20(%r15),%r9d + movl 24(%r15),%r10d + movl 28(%r15),%r11d + + vmovdqa 0(%r13,%r14,8),%xmm14 + vmovdqa 16(%r13,%r14,8),%xmm13 + vmovdqa 32(%r13,%r14,8),%xmm12 + vmovdqu 0-128(%rdi),%xmm10 + jmp .Lloop_xop +.align 16 +.Lloop_xop: + vmovdqa K256+512(%rip),%xmm7 + vmovdqu 0(%rsi,%r12,1),%xmm0 + vmovdqu 16(%rsi,%r12,1),%xmm1 + vmovdqu 32(%rsi,%r12,1),%xmm2 + vmovdqu 48(%rsi,%r12,1),%xmm3 + vpshufb %xmm7,%xmm0,%xmm0 + leaq K256(%rip),%rbp + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd 0(%rbp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 32(%rbp),%xmm1,%xmm5 + vpaddd 64(%rbp),%xmm2,%xmm6 + vpaddd 96(%rbp),%xmm3,%xmm7 + vmovdqa %xmm4,0(%rsp) + movl %eax,%r14d + vmovdqa %xmm5,16(%rsp) + movl %ebx,%esi + vmovdqa %xmm6,32(%rsp) + xorl %ecx,%esi + vmovdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp .Lxop_00_47 + +.align 16 +.Lxop_00_47: + subq $-32*4,%rbp + vmovdqu (%r12),%xmm9 + movq %r12,64+0(%rsp) + vpalignr $4,%xmm0,%xmm1,%xmm4 + rorl $14,%r13d + movl %r14d,%eax + vpalignr $4,%xmm2,%xmm3,%xmm7 + movl %r9d,%r12d + xorl %r8d,%r13d +.byte 143,232,120,194,236,14 + rorl $9,%r14d + xorl %r10d,%r12d + vpsrld $3,%xmm4,%xmm4 + rorl $5,%r13d + xorl %eax,%r14d + vpaddd %xmm7,%xmm0,%xmm0 + andl %r8d,%r12d + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d +.byte 143,232,120,194,245,11 + rorl $11,%r14d + xorl %r10d,%r12d + vpxor %xmm5,%xmm4,%xmm4 + xorl %ebx,%r15d + rorl $6,%r13d + addl %r12d,%r11d + andl %r15d,%esi +.byte 143,232,120,194,251,13 + xorl %eax,%r14d + addl %r13d,%r11d + vpxor %xmm6,%xmm4,%xmm4 + xorl %ebx,%esi + addl %r11d,%edx + vpsrld $10,%xmm3,%xmm6 + rorl $2,%r14d + addl %esi,%r11d + vpaddd %xmm4,%xmm0,%xmm0 + movl %edx,%r13d + addl %r11d,%r14d +.byte 143,232,120,194,239,2 + rorl $14,%r13d + movl %r14d,%r11d + vpxor %xmm6,%xmm7,%xmm7 + movl %r8d,%r12d + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%r12d + vpxor %xmm5,%xmm7,%xmm7 + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vpxor %xmm8,%xmm9,%xmm9 + xorl %edx,%r13d + vpsrldq $8,%xmm7,%xmm7 + addl 4(%rsp),%r10d + movl %r11d,%esi + rorl $11,%r14d + xorl %r9d,%r12d + vpaddd %xmm7,%xmm0,%xmm0 + xorl %eax,%esi + rorl $6,%r13d + addl %r12d,%r10d + andl %esi,%r15d +.byte 143,232,120,194,248,13 + xorl %r11d,%r14d + addl %r13d,%r10d + vpsrld $10,%xmm0,%xmm6 + xorl %eax,%r15d + addl %r10d,%ecx +.byte 143,232,120,194,239,2 + rorl $2,%r14d + addl %r15d,%r10d + vpxor %xmm6,%xmm7,%xmm7 + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + vpxor %xmm5,%xmm7,%xmm7 + movl %edx,%r12d + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r12d + vpslldq $8,%xmm7,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %ecx,%r13d + vpaddd %xmm7,%xmm0,%xmm0 + addl 8(%rsp),%r9d + movl %r10d,%r15d + rorl $11,%r14d + xorl %r8d,%r12d + vpaddd 0(%rbp),%xmm0,%xmm6 + xorl %r11d,%r15d + rorl $6,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + rorl $2,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%esi + rorl $11,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + rorl $6,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + rorl $2,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,0(%rsp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + rorl $14,%r13d + movl %r14d,%r8d + vpalignr $4,%xmm3,%xmm0,%xmm7 + movl %ebx,%r12d + xorl %eax,%r13d +.byte 143,232,120,194,236,14 + rorl $9,%r14d + xorl %ecx,%r12d + vpsrld $3,%xmm4,%xmm4 + rorl $5,%r13d + xorl %r8d,%r14d + vpaddd %xmm7,%xmm1,%xmm1 + andl %eax,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d +.byte 143,232,120,194,245,11 + rorl $11,%r14d + xorl %ecx,%r12d + vpxor %xmm5,%xmm4,%xmm4 + xorl %r9d,%r15d + rorl $6,%r13d + addl %r12d,%edx + andl %r15d,%esi +.byte 143,232,120,194,248,13 + xorl %r8d,%r14d + addl %r13d,%edx + vpxor %xmm6,%xmm4,%xmm4 + xorl %r9d,%esi + addl %edx,%r11d + vpsrld $10,%xmm0,%xmm6 + rorl $2,%r14d + addl %esi,%edx + vpaddd %xmm4,%xmm1,%xmm1 + movl %r11d,%r13d + addl %edx,%r14d +.byte 143,232,120,194,239,2 + rorl $14,%r13d + movl %r14d,%edx + vpxor %xmm6,%xmm7,%xmm7 + movl %eax,%r12d + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%r12d + vpxor %xmm5,%xmm7,%xmm7 + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r11d,%r13d + vpsrldq $8,%xmm7,%xmm7 + addl 20(%rsp),%ecx + movl %edx,%esi + rorl $11,%r14d + xorl %ebx,%r12d + vpaddd %xmm7,%xmm1,%xmm1 + xorl %r8d,%esi + rorl $6,%r13d + addl %r12d,%ecx + andl %esi,%r15d +.byte 143,232,120,194,249,13 + xorl %edx,%r14d + addl %r13d,%ecx + vpsrld $10,%xmm1,%xmm6 + xorl %r8d,%r15d + addl %ecx,%r10d +.byte 143,232,120,194,239,2 + rorl $2,%r14d + addl %r15d,%ecx + vpxor %xmm6,%xmm7,%xmm7 + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + vpxor %xmm5,%xmm7,%xmm7 + movl %r11d,%r12d + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r12d + vpslldq $8,%xmm7,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r10d,%r13d + vpaddd %xmm7,%xmm1,%xmm1 + addl 24(%rsp),%ebx + movl %ecx,%r15d + rorl $11,%r14d + xorl %eax,%r12d + vpaddd 32(%rbp),%xmm1,%xmm6 + xorl %edx,%r15d + rorl $6,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + rorl $2,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%esi + rorl $11,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + rorl $6,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + rorl $2,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,16(%rsp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + rorl $14,%r13d + movl %r14d,%eax + vpalignr $4,%xmm0,%xmm1,%xmm7 + movl %r9d,%r12d + xorl %r8d,%r13d +.byte 143,232,120,194,236,14 + rorl $9,%r14d + xorl %r10d,%r12d + vpsrld $3,%xmm4,%xmm4 + rorl $5,%r13d + xorl %eax,%r14d + vpaddd %xmm7,%xmm2,%xmm2 + andl %r8d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d +.byte 143,232,120,194,245,11 + rorl $11,%r14d + xorl %r10d,%r12d + vpxor %xmm5,%xmm4,%xmm4 + xorl %ebx,%r15d + rorl $6,%r13d + addl %r12d,%r11d + andl %r15d,%esi +.byte 143,232,120,194,249,13 + xorl %eax,%r14d + addl %r13d,%r11d + vpxor %xmm6,%xmm4,%xmm4 + xorl %ebx,%esi + addl %r11d,%edx + vpsrld $10,%xmm1,%xmm6 + rorl $2,%r14d + addl %esi,%r11d + vpaddd %xmm4,%xmm2,%xmm2 + movl %edx,%r13d + addl %r11d,%r14d +.byte 143,232,120,194,239,2 + rorl $14,%r13d + movl %r14d,%r11d + vpxor %xmm6,%xmm7,%xmm7 + movl %r8d,%r12d + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%r12d + vpxor %xmm5,%xmm7,%xmm7 + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %edx,%r13d + vpsrldq $8,%xmm7,%xmm7 + addl 36(%rsp),%r10d + movl %r11d,%esi + rorl $11,%r14d + xorl %r9d,%r12d + vpaddd %xmm7,%xmm2,%xmm2 + xorl %eax,%esi + rorl $6,%r13d + addl %r12d,%r10d + andl %esi,%r15d +.byte 143,232,120,194,250,13 + xorl %r11d,%r14d + addl %r13d,%r10d + vpsrld $10,%xmm2,%xmm6 + xorl %eax,%r15d + addl %r10d,%ecx +.byte 143,232,120,194,239,2 + rorl $2,%r14d + addl %r15d,%r10d + vpxor %xmm6,%xmm7,%xmm7 + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + vpxor %xmm5,%xmm7,%xmm7 + movl %edx,%r12d + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r12d + vpslldq $8,%xmm7,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %ecx,%r13d + vpaddd %xmm7,%xmm2,%xmm2 + addl 40(%rsp),%r9d + movl %r10d,%r15d + rorl $11,%r14d + xorl %r8d,%r12d + vpaddd 64(%rbp),%xmm2,%xmm6 + xorl %r11d,%r15d + rorl $6,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + rorl $2,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%esi + rorl $11,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + rorl $6,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + rorl $2,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,32(%rsp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + rorl $14,%r13d + movl %r14d,%r8d + vpalignr $4,%xmm1,%xmm2,%xmm7 + movl %ebx,%r12d + xorl %eax,%r13d +.byte 143,232,120,194,236,14 + rorl $9,%r14d + xorl %ecx,%r12d + vpsrld $3,%xmm4,%xmm4 + rorl $5,%r13d + xorl %r8d,%r14d + vpaddd %xmm7,%xmm3,%xmm3 + andl %eax,%r12d + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d +.byte 143,232,120,194,245,11 + rorl $11,%r14d + xorl %ecx,%r12d + vpxor %xmm5,%xmm4,%xmm4 + xorl %r9d,%r15d + rorl $6,%r13d + addl %r12d,%edx + andl %r15d,%esi +.byte 143,232,120,194,250,13 + xorl %r8d,%r14d + addl %r13d,%edx + vpxor %xmm6,%xmm4,%xmm4 + xorl %r9d,%esi + addl %edx,%r11d + vpsrld $10,%xmm2,%xmm6 + rorl $2,%r14d + addl %esi,%edx + vpaddd %xmm4,%xmm3,%xmm3 + movl %r11d,%r13d + addl %edx,%r14d +.byte 143,232,120,194,239,2 + rorl $14,%r13d + movl %r14d,%edx + vpxor %xmm6,%xmm7,%xmm7 + movl %eax,%r12d + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%r12d + vpxor %xmm5,%xmm7,%xmm7 + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r11d,%r13d + vpsrldq $8,%xmm7,%xmm7 + addl 52(%rsp),%ecx + movl %edx,%esi + rorl $11,%r14d + xorl %ebx,%r12d + vpaddd %xmm7,%xmm3,%xmm3 + xorl %r8d,%esi + rorl $6,%r13d + addl %r12d,%ecx + andl %esi,%r15d +.byte 143,232,120,194,251,13 + xorl %edx,%r14d + addl %r13d,%ecx + vpsrld $10,%xmm3,%xmm6 + xorl %r8d,%r15d + addl %ecx,%r10d +.byte 143,232,120,194,239,2 + rorl $2,%r14d + addl %r15d,%ecx + vpxor %xmm6,%xmm7,%xmm7 + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + vpxor %xmm5,%xmm7,%xmm7 + movl %r11d,%r12d + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r12d + vpslldq $8,%xmm7,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r10d,%r13d + vpaddd %xmm7,%xmm3,%xmm3 + addl 56(%rsp),%ebx + movl %ecx,%r15d + rorl $11,%r14d + xorl %eax,%r12d + vpaddd 96(%rbp),%xmm3,%xmm6 + xorl %edx,%r15d + rorl $6,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + rorl $2,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%esi + rorl $11,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + rorl $6,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + rorl $2,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,48(%rsp) + movq 64+0(%rsp),%r12 + vpand %xmm14,%xmm11,%xmm11 + movq 64+8(%rsp),%r15 + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r15,%r12,1) + leaq 16(%r12),%r12 + cmpb $0,131(%rbp) + jne .Lxop_00_47 + vmovdqu (%r12),%xmm9 + movq %r12,64+0(%rsp) + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + rorl $11,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + rorl $6,%r13d + addl %r12d,%r11d + andl %r15d,%esi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + addl %r11d,%edx + rorl $2,%r14d + addl %esi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vpxor %xmm8,%xmm9,%xmm9 + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%esi + rorl $11,%r14d + xorl %r9d,%r12d + xorl %eax,%esi + rorl $6,%r13d + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + rorl $2,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + rorl $11,%r14d + xorl %r8d,%r12d + xorl %r11d,%r15d + rorl $6,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + rorl $2,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%esi + rorl $11,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + rorl $6,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + rorl $2,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + rorl $11,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + rorl $6,%r13d + addl %r12d,%edx + andl %r15d,%esi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + addl %edx,%r11d + rorl $2,%r14d + addl %esi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%esi + rorl $11,%r14d + xorl %ebx,%r12d + xorl %r8d,%esi + rorl $6,%r13d + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + rorl $2,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + rorl $11,%r14d + xorl %eax,%r12d + xorl %edx,%r15d + rorl $6,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + rorl $2,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%esi + rorl $11,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + rorl $6,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + rorl $2,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + rorl $11,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + rorl $6,%r13d + addl %r12d,%r11d + andl %r15d,%esi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + addl %r11d,%edx + rorl $2,%r14d + addl %esi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%esi + rorl $11,%r14d + xorl %r9d,%r12d + xorl %eax,%esi + rorl $6,%r13d + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + rorl $2,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + rorl $11,%r14d + xorl %r8d,%r12d + xorl %r11d,%r15d + rorl $6,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + rorl $2,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%esi + rorl $11,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + rorl $6,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + rorl $2,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + rorl $11,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + rorl $6,%r13d + addl %r12d,%edx + andl %r15d,%esi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + addl %edx,%r11d + rorl $2,%r14d + addl %esi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%esi + rorl $11,%r14d + xorl %ebx,%r12d + xorl %r8d,%esi + rorl $6,%r13d + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + rorl $2,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + rorl $11,%r14d + xorl %eax,%r12d + xorl %edx,%r15d + rorl $6,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + rorl $2,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%esi + rorl $11,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + rorl $6,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + rorl $2,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%r12 + movq 64+8(%rsp),%r13 + movq 64+40(%rsp),%r15 + movq 64+48(%rsp),%rsi + + vpand %xmm14,%xmm11,%xmm11 + movl %r14d,%eax + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r12,%r13,1) + leaq 16(%r12),%r12 + + addl 0(%r15),%eax + addl 4(%r15),%ebx + addl 8(%r15),%ecx + addl 12(%r15),%edx + addl 16(%r15),%r8d + addl 20(%r15),%r9d + addl 24(%r15),%r10d + addl 28(%r15),%r11d + + cmpq 64+16(%rsp),%r12 + + movl %eax,0(%r15) + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + movl %r8d,16(%r15) + movl %r9d,20(%r15) + movl %r10d,24(%r15) + movl %r11d,28(%r15) + + jb .Lloop_xop + + movq 64+32(%rsp),%r8 + movq 120(%rsp),%rsi +.cfi_def_cfa %rsi,8 + vmovdqu %xmm8,(%r8) + vzeroall + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_xop: + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_cbc_sha256_enc_xop,.-aesni_cbc_sha256_enc_xop +.type aesni_cbc_sha256_enc_avx,@function +.align 64 +aesni_cbc_sha256_enc_avx: +.cfi_startproc +.Lavx_shortcut: + movq 8(%rsp),%r10 + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + subq $128,%rsp + andq $-64,%rsp + + shlq $6,%rdx + subq %rdi,%rsi + subq %rdi,%r10 + addq %rdi,%rdx + + + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + + movq %r8,64+32(%rsp) + movq %r9,64+40(%rsp) + movq %r10,64+48(%rsp) + movq %rax,120(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08 +.Lprologue_avx: + vzeroall + + movq %rdi,%r12 + leaq 128(%rcx),%rdi + leaq K256+544(%rip),%r13 + movl 240-128(%rdi),%r14d + movq %r9,%r15 + movq %r10,%rsi + vmovdqu (%r8),%xmm8 + subq $9,%r14 + + movl 0(%r15),%eax + movl 4(%r15),%ebx + movl 8(%r15),%ecx + movl 12(%r15),%edx + movl 16(%r15),%r8d + movl 20(%r15),%r9d + movl 24(%r15),%r10d + movl 28(%r15),%r11d + + vmovdqa 0(%r13,%r14,8),%xmm14 + vmovdqa 16(%r13,%r14,8),%xmm13 + vmovdqa 32(%r13,%r14,8),%xmm12 + vmovdqu 0-128(%rdi),%xmm10 + jmp .Lloop_avx +.align 16 +.Lloop_avx: + vmovdqa K256+512(%rip),%xmm7 + vmovdqu 0(%rsi,%r12,1),%xmm0 + vmovdqu 16(%rsi,%r12,1),%xmm1 + vmovdqu 32(%rsi,%r12,1),%xmm2 + vmovdqu 48(%rsi,%r12,1),%xmm3 + vpshufb %xmm7,%xmm0,%xmm0 + leaq K256(%rip),%rbp + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd 0(%rbp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 32(%rbp),%xmm1,%xmm5 + vpaddd 64(%rbp),%xmm2,%xmm6 + vpaddd 96(%rbp),%xmm3,%xmm7 + vmovdqa %xmm4,0(%rsp) + movl %eax,%r14d + vmovdqa %xmm5,16(%rsp) + movl %ebx,%esi + vmovdqa %xmm6,32(%rsp) + xorl %ecx,%esi + vmovdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp .Lavx_00_47 + +.align 16 +.Lavx_00_47: + subq $-32*4,%rbp + vmovdqu (%r12),%xmm9 + movq %r12,64+0(%rsp) + vpalignr $4,%xmm0,%xmm1,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + vpalignr $4,%xmm2,%xmm3,%xmm7 + xorl %r8d,%r13d + shrdl $9,%r14d,%r14d + xorl %r10d,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpaddd %xmm7,%xmm0,%xmm0 + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + vpsrld $3,%xmm4,%xmm7 + shrdl $11,%r14d,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + vpslld $14,%xmm4,%xmm5 + shrdl $6,%r13d,%r13d + addl %r12d,%r11d + andl %r15d,%esi + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + vpshufd $250,%xmm3,%xmm7 + addl %r11d,%edx + shrdl $2,%r14d,%r14d + addl %esi,%r11d + vpsrld $11,%xmm6,%xmm6 + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + vpslld $11,%xmm5,%xmm5 + shrdl $9,%r14d,%r14d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %r11d,%r14d + andl %edx,%r12d + vpxor %xmm8,%xmm9,%xmm9 + xorl %edx,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 4(%rsp),%r10d + movl %r11d,%esi + shrdl $11,%r14d,%r14d + vpxor %xmm5,%xmm4,%xmm4 + xorl %r9d,%r12d + xorl %eax,%esi + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + vpaddd %xmm4,%xmm0,%xmm0 + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $2,%r14d,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + vpxor %xmm7,%xmm6,%xmm6 + movl %edx,%r12d + xorl %ecx,%r13d + shrdl $9,%r14d,%r14d + vpshufd $132,%xmm6,%xmm6 + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + vpsrldq $8,%xmm6,%xmm6 + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + vpaddd %xmm6,%xmm0,%xmm0 + movl %r10d,%r15d + shrdl $11,%r14d,%r14d + xorl %r8d,%r12d + vpshufd $80,%xmm0,%xmm7 + xorl %r11d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r9d + vpsrld $10,%xmm7,%xmm6 + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + vpsrlq $17,%xmm7,%xmm7 + xorl %r11d,%esi + addl %r9d,%ebx + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + vpsrlq $2,%xmm7,%xmm7 + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + vpxor %xmm7,%xmm6,%xmm6 + xorl %ebx,%r13d + shrdl $9,%r14d,%r14d + xorl %edx,%r12d + vpshufd $232,%xmm6,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vpslldq $8,%xmm6,%xmm6 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%esi + vpaddd %xmm6,%xmm0,%xmm0 + shrdl $11,%r14d,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + vpaddd 0(%rbp),%xmm0,%xmm6 + shrdl $6,%r13d,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + shrdl $2,%r14d,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,0(%rsp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + vpalignr $4,%xmm3,%xmm0,%xmm7 + xorl %eax,%r13d + shrdl $9,%r14d,%r14d + xorl %ecx,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpaddd %xmm7,%xmm1,%xmm1 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + vpsrld $3,%xmm4,%xmm7 + shrdl $11,%r14d,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + vpslld $14,%xmm4,%xmm5 + shrdl $6,%r13d,%r13d + addl %r12d,%edx + andl %r15d,%esi + vpxor %xmm6,%xmm7,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + vpshufd $250,%xmm0,%xmm7 + addl %edx,%r11d + shrdl $2,%r14d,%r14d + addl %esi,%edx + vpsrld $11,%xmm6,%xmm6 + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + vpslld $11,%xmm5,%xmm5 + shrdl $9,%r14d,%r14d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %edx,%r14d + andl %r11d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r11d,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 20(%rsp),%ecx + movl %edx,%esi + shrdl $11,%r14d,%r14d + vpxor %xmm5,%xmm4,%xmm4 + xorl %ebx,%r12d + xorl %r8d,%esi + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + vpaddd %xmm4,%xmm1,%xmm1 + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $2,%r14d,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + movl %r11d,%r12d + xorl %r10d,%r13d + shrdl $9,%r14d,%r14d + vpshufd $132,%xmm6,%xmm6 + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + vpsrldq $8,%xmm6,%xmm6 + andl %r10d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + vpaddd %xmm6,%xmm1,%xmm1 + movl %ecx,%r15d + shrdl $11,%r14d,%r14d + xorl %eax,%r12d + vpshufd $80,%xmm1,%xmm7 + xorl %edx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%ebx + vpsrld $10,%xmm7,%xmm6 + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + vpsrlq $17,%xmm7,%xmm7 + xorl %edx,%esi + addl %ebx,%r9d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + vpsrlq $2,%xmm7,%xmm7 + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + vpxor %xmm7,%xmm6,%xmm6 + xorl %r9d,%r13d + shrdl $9,%r14d,%r14d + xorl %r11d,%r12d + vpshufd $232,%xmm6,%xmm6 + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpslldq $8,%xmm6,%xmm6 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%esi + vpaddd %xmm6,%xmm1,%xmm1 + shrdl $11,%r14d,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + vpaddd 32(%rbp),%xmm1,%xmm6 + shrdl $6,%r13d,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + shrdl $2,%r14d,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,16(%rsp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + vpalignr $4,%xmm0,%xmm1,%xmm7 + xorl %r8d,%r13d + shrdl $9,%r14d,%r14d + xorl %r10d,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpaddd %xmm7,%xmm2,%xmm2 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + vpsrld $3,%xmm4,%xmm7 + shrdl $11,%r14d,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + vpslld $14,%xmm4,%xmm5 + shrdl $6,%r13d,%r13d + addl %r12d,%r11d + andl %r15d,%esi + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + vpshufd $250,%xmm1,%xmm7 + addl %r11d,%edx + shrdl $2,%r14d,%r14d + addl %esi,%r11d + vpsrld $11,%xmm6,%xmm6 + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + vpslld $11,%xmm5,%xmm5 + shrdl $9,%r14d,%r14d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %r11d,%r14d + andl %edx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %edx,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 36(%rsp),%r10d + movl %r11d,%esi + shrdl $11,%r14d,%r14d + vpxor %xmm5,%xmm4,%xmm4 + xorl %r9d,%r12d + xorl %eax,%esi + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + vpaddd %xmm4,%xmm2,%xmm2 + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $2,%r14d,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + vpxor %xmm7,%xmm6,%xmm6 + movl %edx,%r12d + xorl %ecx,%r13d + shrdl $9,%r14d,%r14d + vpshufd $132,%xmm6,%xmm6 + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + vpsrldq $8,%xmm6,%xmm6 + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + vpaddd %xmm6,%xmm2,%xmm2 + movl %r10d,%r15d + shrdl $11,%r14d,%r14d + xorl %r8d,%r12d + vpshufd $80,%xmm2,%xmm7 + xorl %r11d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r9d + vpsrld $10,%xmm7,%xmm6 + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + vpsrlq $17,%xmm7,%xmm7 + xorl %r11d,%esi + addl %r9d,%ebx + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + vpsrlq $2,%xmm7,%xmm7 + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + vpxor %xmm7,%xmm6,%xmm6 + xorl %ebx,%r13d + shrdl $9,%r14d,%r14d + xorl %edx,%r12d + vpshufd $232,%xmm6,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vpslldq $8,%xmm6,%xmm6 + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%esi + vpaddd %xmm6,%xmm2,%xmm2 + shrdl $11,%r14d,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + vpaddd 64(%rbp),%xmm2,%xmm6 + shrdl $6,%r13d,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + shrdl $2,%r14d,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,32(%rsp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + vpalignr $4,%xmm1,%xmm2,%xmm7 + xorl %eax,%r13d + shrdl $9,%r14d,%r14d + xorl %ecx,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpaddd %xmm7,%xmm3,%xmm3 + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + vpsrld $3,%xmm4,%xmm7 + shrdl $11,%r14d,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + vpslld $14,%xmm4,%xmm5 + shrdl $6,%r13d,%r13d + addl %r12d,%edx + andl %r15d,%esi + vpxor %xmm6,%xmm7,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + vpshufd $250,%xmm2,%xmm7 + addl %edx,%r11d + shrdl $2,%r14d,%r14d + addl %esi,%edx + vpsrld $11,%xmm6,%xmm6 + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + vpslld $11,%xmm5,%xmm5 + shrdl $9,%r14d,%r14d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %edx,%r14d + andl %r11d,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r11d,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 52(%rsp),%ecx + movl %edx,%esi + shrdl $11,%r14d,%r14d + vpxor %xmm5,%xmm4,%xmm4 + xorl %ebx,%r12d + xorl %r8d,%esi + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + vpaddd %xmm4,%xmm3,%xmm3 + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $2,%r14d,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + movl %r11d,%r12d + xorl %r10d,%r13d + shrdl $9,%r14d,%r14d + vpshufd $132,%xmm6,%xmm6 + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + vpsrldq $8,%xmm6,%xmm6 + andl %r10d,%r12d + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + vpaddd %xmm6,%xmm3,%xmm3 + movl %ecx,%r15d + shrdl $11,%r14d,%r14d + xorl %eax,%r12d + vpshufd $80,%xmm3,%xmm7 + xorl %edx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%ebx + vpsrld $10,%xmm7,%xmm6 + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + vpsrlq $17,%xmm7,%xmm7 + xorl %edx,%esi + addl %ebx,%r9d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + vpsrlq $2,%xmm7,%xmm7 + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + vpxor %xmm7,%xmm6,%xmm6 + xorl %r9d,%r13d + shrdl $9,%r14d,%r14d + xorl %r11d,%r12d + vpshufd $232,%xmm6,%xmm6 + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpslldq $8,%xmm6,%xmm6 + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%esi + vpaddd %xmm6,%xmm3,%xmm3 + shrdl $11,%r14d,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + vpaddd 96(%rbp),%xmm3,%xmm6 + shrdl $6,%r13d,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + shrdl $2,%r14d,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,48(%rsp) + movq 64+0(%rsp),%r12 + vpand %xmm14,%xmm11,%xmm11 + movq 64+8(%rsp),%r15 + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r15,%r12,1) + leaq 16(%r12),%r12 + cmpb $0,131(%rbp) + jne .Lavx_00_47 + vmovdqu (%r12),%xmm9 + movq %r12,64+0(%rsp) + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + xorl %r8d,%r13d + shrdl $9,%r14d,%r14d + xorl %r10d,%r12d + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + shrdl $11,%r14d,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r11d + andl %r15d,%esi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + addl %r11d,%edx + shrdl $2,%r14d,%r14d + addl %esi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + shrdl $9,%r14d,%r14d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vpxor %xmm8,%xmm9,%xmm9 + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%esi + shrdl $11,%r14d,%r14d + xorl %r9d,%r12d + xorl %eax,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + shrdl $2,%r14d,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + movl %edx,%r12d + xorl %ecx,%r13d + shrdl $9,%r14d,%r14d + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + shrdl $11,%r14d,%r14d + xorl %r8d,%r12d + xorl %r11d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + shrdl $2,%r14d,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + shrdl $9,%r14d,%r14d + xorl %edx,%r12d + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%esi + shrdl $11,%r14d,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + shrdl $2,%r14d,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + xorl %eax,%r13d + shrdl $9,%r14d,%r14d + xorl %ecx,%r12d + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + shrdl $11,%r14d,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%edx + andl %r15d,%esi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + addl %edx,%r11d + shrdl $2,%r14d,%r14d + addl %esi,%edx + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + shrdl $9,%r14d,%r14d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%esi + shrdl $11,%r14d,%r14d + xorl %ebx,%r12d + xorl %r8d,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + shrdl $2,%r14d,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + xorl %r10d,%r13d + shrdl $9,%r14d,%r14d + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + shrdl $11,%r14d,%r14d + xorl %eax,%r12d + xorl %edx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + shrdl $2,%r14d,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + shrdl $9,%r14d,%r14d + xorl %r11d,%r12d + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%esi + shrdl $11,%r14d,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + shrdl $2,%r14d,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + xorl %r8d,%r13d + shrdl $9,%r14d,%r14d + xorl %r10d,%r12d + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + shrdl $11,%r14d,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r11d + andl %r15d,%esi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + addl %r11d,%edx + shrdl $2,%r14d,%r14d + addl %esi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + shrdl $9,%r14d,%r14d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%esi + shrdl $11,%r14d,%r14d + xorl %r9d,%r12d + xorl %eax,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + shrdl $2,%r14d,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + movl %edx,%r12d + xorl %ecx,%r13d + shrdl $9,%r14d,%r14d + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + shrdl $11,%r14d,%r14d + xorl %r8d,%r12d + xorl %r11d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + shrdl $2,%r14d,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + shrdl $9,%r14d,%r14d + xorl %edx,%r12d + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%esi + shrdl $11,%r14d,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + shrdl $2,%r14d,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + xorl %eax,%r13d + shrdl $9,%r14d,%r14d + xorl %ecx,%r12d + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + shrdl $11,%r14d,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%edx + andl %r15d,%esi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + addl %edx,%r11d + shrdl $2,%r14d,%r14d + addl %esi,%edx + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + shrdl $9,%r14d,%r14d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%esi + shrdl $11,%r14d,%r14d + xorl %ebx,%r12d + xorl %r8d,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + shrdl $2,%r14d,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + xorl %r10d,%r13d + shrdl $9,%r14d,%r14d + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + shrdl $11,%r14d,%r14d + xorl %eax,%r12d + xorl %edx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + shrdl $2,%r14d,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + shrdl $9,%r14d,%r14d + xorl %r11d,%r12d + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%esi + shrdl $11,%r14d,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + shrdl $2,%r14d,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%r12 + movq 64+8(%rsp),%r13 + movq 64+40(%rsp),%r15 + movq 64+48(%rsp),%rsi + + vpand %xmm14,%xmm11,%xmm11 + movl %r14d,%eax + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r12,%r13,1) + leaq 16(%r12),%r12 + + addl 0(%r15),%eax + addl 4(%r15),%ebx + addl 8(%r15),%ecx + addl 12(%r15),%edx + addl 16(%r15),%r8d + addl 20(%r15),%r9d + addl 24(%r15),%r10d + addl 28(%r15),%r11d + + cmpq 64+16(%rsp),%r12 + + movl %eax,0(%r15) + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + movl %r8d,16(%r15) + movl %r9d,20(%r15) + movl %r10d,24(%r15) + movl %r11d,28(%r15) + jb .Lloop_avx + + movq 64+32(%rsp),%r8 + movq 120(%rsp),%rsi +.cfi_def_cfa %rsi,8 + vmovdqu %xmm8,(%r8) + vzeroall + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_avx: + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_cbc_sha256_enc_avx,.-aesni_cbc_sha256_enc_avx +.type aesni_cbc_sha256_enc_avx2,@function +.align 64 +aesni_cbc_sha256_enc_avx2: +.cfi_startproc +.Lavx2_shortcut: + movq 8(%rsp),%r10 + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + subq $576,%rsp + andq $-1024,%rsp + addq $448,%rsp + + shlq $6,%rdx + subq %rdi,%rsi + subq %rdi,%r10 + addq %rdi,%rdx + + + + movq %rdx,64+16(%rsp) + + movq %r8,64+32(%rsp) + movq %r9,64+40(%rsp) + movq %r10,64+48(%rsp) + movq %rax,120(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08 +.Lprologue_avx2: + vzeroall + + movq %rdi,%r13 + vpinsrq $1,%rsi,%xmm15,%xmm15 + leaq 128(%rcx),%rdi + leaq K256+544(%rip),%r12 + movl 240-128(%rdi),%r14d + movq %r9,%r15 + movq %r10,%rsi + vmovdqu (%r8),%xmm8 + leaq -9(%r14),%r14 + + vmovdqa 0(%r12,%r14,8),%xmm14 + vmovdqa 16(%r12,%r14,8),%xmm13 + vmovdqa 32(%r12,%r14,8),%xmm12 + + subq $-64,%r13 + movl 0(%r15),%eax + leaq (%rsi,%r13,1),%r12 + movl 4(%r15),%ebx + cmpq %rdx,%r13 + movl 8(%r15),%ecx + cmoveq %rsp,%r12 + movl 12(%r15),%edx + movl 16(%r15),%r8d + movl 20(%r15),%r9d + movl 24(%r15),%r10d + movl 28(%r15),%r11d + vmovdqu 0-128(%rdi),%xmm10 + jmp .Loop_avx2 +.align 16 +.Loop_avx2: + vmovdqa K256+512(%rip),%ymm7 + vmovdqu -64+0(%rsi,%r13,1),%xmm0 + vmovdqu -64+16(%rsi,%r13,1),%xmm1 + vmovdqu -64+32(%rsi,%r13,1),%xmm2 + vmovdqu -64+48(%rsi,%r13,1),%xmm3 + + vinserti128 $1,(%r12),%ymm0,%ymm0 + vinserti128 $1,16(%r12),%ymm1,%ymm1 + vpshufb %ymm7,%ymm0,%ymm0 + vinserti128 $1,32(%r12),%ymm2,%ymm2 + vpshufb %ymm7,%ymm1,%ymm1 + vinserti128 $1,48(%r12),%ymm3,%ymm3 + + leaq K256(%rip),%rbp + vpshufb %ymm7,%ymm2,%ymm2 + leaq -64(%r13),%r13 + vpaddd 0(%rbp),%ymm0,%ymm4 + vpshufb %ymm7,%ymm3,%ymm3 + vpaddd 32(%rbp),%ymm1,%ymm5 + vpaddd 64(%rbp),%ymm2,%ymm6 + vpaddd 96(%rbp),%ymm3,%ymm7 + vmovdqa %ymm4,0(%rsp) + xorl %r14d,%r14d + vmovdqa %ymm5,32(%rsp) + + movq 120(%rsp),%rsi +.cfi_def_cfa %rsi,8 + leaq -64(%rsp),%rsp + + + + movq %rsi,-8(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08 + movl %ebx,%esi + vmovdqa %ymm6,0(%rsp) + xorl %ecx,%esi + vmovdqa %ymm7,32(%rsp) + movl %r9d,%r12d + subq $-32*4,%rbp + jmp .Lavx2_00_47 + +.align 16 +.Lavx2_00_47: + vmovdqu (%r13),%xmm9 + vpinsrq $0,%r13,%xmm15,%xmm15 + leaq -64(%rsp),%rsp +.cfi_escape 0x0f,0x05,0x77,0x38,0x06,0x23,0x08 + + pushq 64-8(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x00,0x06,0x23,0x08 + leaq 8(%rsp),%rsp +.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08 + vpalignr $4,%ymm0,%ymm1,%ymm4 + addl 0+128(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + vpalignr $4,%ymm2,%ymm3,%ymm7 + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + vpsrld $7,%ymm4,%ymm6 + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + vpaddd %ymm7,%ymm0,%ymm0 + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%esi + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + vpshufd $250,%ymm3,%ymm7 + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 4+128(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + vpslld $11,%ymm5,%ymm5 + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + vpsrlq $17,%ymm7,%ymm7 + andl %esi,%r15d + vpxor %xmm8,%xmm9,%xmm9 + xorl %r12d,%r14d + xorl %eax,%r15d + vpaddd %ymm4,%ymm0,%ymm0 + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 8+128(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + vpxor %ymm7,%ymm6,%ymm6 + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + vpshufd $132,%ymm6,%ymm6 + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + vpsrldq $8,%ymm6,%ymm6 + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + vpaddd %ymm6,%ymm0,%ymm0 + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + vpshufd $80,%ymm0,%ymm7 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + vpsrld $10,%ymm7,%ymm6 + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + vpsrlq $17,%ymm7,%ymm7 + addl 12+128(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + vpxor %ymm7,%ymm6,%ymm6 + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + vpsrlq $2,%ymm7,%ymm7 + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + vpxor %ymm7,%ymm6,%ymm6 + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + vpshufd $232,%ymm6,%ymm6 + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + vpslldq $8,%ymm6,%ymm6 + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + vpaddd %ymm6,%ymm0,%ymm0 + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + vpaddd 0(%rbp),%ymm0,%ymm6 + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + vmovdqa %ymm6,0(%rsp) + vpalignr $4,%ymm1,%ymm2,%ymm4 + addl 32+128(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + vpalignr $4,%ymm3,%ymm0,%ymm7 + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + vpsrld $7,%ymm4,%ymm6 + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + vpaddd %ymm7,%ymm1,%ymm1 + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + vpshufd $250,%ymm0,%ymm7 + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 36+128(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + vpslld $11,%ymm5,%ymm5 + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + vpsrlq $17,%ymm7,%ymm7 + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + vpaddd %ymm4,%ymm1,%ymm1 + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 40+128(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + vpxor %ymm7,%ymm6,%ymm6 + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + vpshufd $132,%ymm6,%ymm6 + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + vpsrldq $8,%ymm6,%ymm6 + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + vpaddd %ymm6,%ymm1,%ymm1 + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + vpshufd $80,%ymm1,%ymm7 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + vpsrld $10,%ymm7,%ymm6 + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + vpsrlq $17,%ymm7,%ymm7 + addl 44+128(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + vpxor %ymm7,%ymm6,%ymm6 + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + vpsrlq $2,%ymm7,%ymm7 + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + vpxor %ymm7,%ymm6,%ymm6 + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + vpshufd $232,%ymm6,%ymm6 + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + vpslldq $8,%ymm6,%ymm6 + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + vpaddd %ymm6,%ymm1,%ymm1 + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + vpaddd 32(%rbp),%ymm1,%ymm6 + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovdqa %ymm6,32(%rsp) + leaq -64(%rsp),%rsp +.cfi_escape 0x0f,0x05,0x77,0x38,0x06,0x23,0x08 + + pushq 64-8(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x00,0x06,0x23,0x08 + leaq 8(%rsp),%rsp +.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08 + vpalignr $4,%ymm2,%ymm3,%ymm4 + addl 0+128(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + vpalignr $4,%ymm0,%ymm1,%ymm7 + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + vpsrld $7,%ymm4,%ymm6 + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + vpaddd %ymm7,%ymm2,%ymm2 + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + vpshufd $250,%ymm1,%ymm7 + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 4+128(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + vpslld $11,%ymm5,%ymm5 + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + vpsrlq $17,%ymm7,%ymm7 + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %eax,%r15d + vpaddd %ymm4,%ymm2,%ymm2 + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 8+128(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + vpxor %ymm7,%ymm6,%ymm6 + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + vpshufd $132,%ymm6,%ymm6 + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + vpsrldq $8,%ymm6,%ymm6 + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + vpaddd %ymm6,%ymm2,%ymm2 + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + vpshufd $80,%ymm2,%ymm7 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + vpsrld $10,%ymm7,%ymm6 + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + vpsrlq $17,%ymm7,%ymm7 + addl 12+128(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + vpxor %ymm7,%ymm6,%ymm6 + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + vpsrlq $2,%ymm7,%ymm7 + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + vpxor %ymm7,%ymm6,%ymm6 + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + vpshufd $232,%ymm6,%ymm6 + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + vpslldq $8,%ymm6,%ymm6 + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + vpaddd %ymm6,%ymm2,%ymm2 + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + vpaddd 64(%rbp),%ymm2,%ymm6 + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + vmovdqa %ymm6,0(%rsp) + vpalignr $4,%ymm3,%ymm0,%ymm4 + addl 32+128(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + vpalignr $4,%ymm1,%ymm2,%ymm7 + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + vpsrld $7,%ymm4,%ymm6 + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + vpaddd %ymm7,%ymm3,%ymm3 + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%esi + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + vpshufd $250,%ymm2,%ymm7 + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 36+128(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + vpslld $11,%ymm5,%ymm5 + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + vpsrlq $17,%ymm7,%ymm7 + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + vpaddd %ymm4,%ymm3,%ymm3 + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 40+128(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + vpxor %ymm7,%ymm6,%ymm6 + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + vpshufd $132,%ymm6,%ymm6 + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + vpsrldq $8,%ymm6,%ymm6 + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + vpaddd %ymm6,%ymm3,%ymm3 + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + vpshufd $80,%ymm3,%ymm7 + andl %r15d,%esi + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + vpsrld $10,%ymm7,%ymm6 + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + vpsrlq $17,%ymm7,%ymm7 + addl 44+128(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + vpxor %ymm7,%ymm6,%ymm6 + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + vpsrlq $2,%ymm7,%ymm7 + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + vpxor %ymm7,%ymm6,%ymm6 + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + vpshufd $232,%ymm6,%ymm6 + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + vpslldq $8,%ymm6,%ymm6 + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + vpaddd %ymm6,%ymm3,%ymm3 + andl %esi,%r15d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + vpaddd 96(%rbp),%ymm3,%ymm6 + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovdqa %ymm6,32(%rsp) + vmovq %xmm15,%r13 + vpextrq $1,%xmm15,%r15 + vpand %xmm14,%xmm11,%xmm11 + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r15,%r13,1) + leaq 16(%r13),%r13 + leaq 128(%rbp),%rbp + cmpb $0,3(%rbp) + jne .Lavx2_00_47 + vmovdqu (%r13),%xmm9 + vpinsrq $0,%r13,%xmm15,%xmm15 + addl 0+64(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%esi + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + addl 4+64(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %esi,%r15d + vpxor %xmm8,%xmm9,%xmm9 + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+64(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + addl 12+64(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+64(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + addl 36+64(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+64(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + addl 44+64(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + addl 0(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + addl 4(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + addl 12(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%esi + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + addl 36(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%esi + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + addl 44(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %esi,%r15d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vpextrq $1,%xmm15,%r12 + vmovq %xmm15,%r13 + movq 552(%rsp),%r15 + addl %r14d,%eax + leaq 448(%rsp),%rbp + + vpand %xmm14,%xmm11,%xmm11 + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r12,%r13,1) + leaq 16(%r13),%r13 + + addl 0(%r15),%eax + addl 4(%r15),%ebx + addl 8(%r15),%ecx + addl 12(%r15),%edx + addl 16(%r15),%r8d + addl 20(%r15),%r9d + addl 24(%r15),%r10d + addl 28(%r15),%r11d + + movl %eax,0(%r15) + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + movl %r8d,16(%r15) + movl %r9d,20(%r15) + movl %r10d,24(%r15) + movl %r11d,28(%r15) + + cmpq 80(%rbp),%r13 + je .Ldone_avx2 + + xorl %r14d,%r14d + movl %ebx,%esi + movl %r9d,%r12d + xorl %ecx,%esi + jmp .Lower_avx2 +.align 16 +.Lower_avx2: + vmovdqu (%r13),%xmm9 + vpinsrq $0,%r13,%xmm15,%xmm15 + addl 0+16(%rbp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%esi + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + addl 4+16(%rbp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %esi,%r15d + vpxor %xmm8,%xmm9,%xmm9 + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+16(%rbp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + addl 12+16(%rbp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+16(%rbp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + addl 36+16(%rbp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+16(%rbp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + addl 44+16(%rbp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + leaq -64(%rbp),%rbp + addl 0+16(%rbp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + addl 4+16(%rbp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+16(%rbp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + addl 12+16(%rbp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+16(%rbp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%esi + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + addl 36+16(%rbp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+16(%rbp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%esi + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + addl 44+16(%rbp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %esi,%r15d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovq %xmm15,%r13 + vpextrq $1,%xmm15,%r15 + vpand %xmm14,%xmm11,%xmm11 + vpor %xmm11,%xmm8,%xmm8 + leaq -64(%rbp),%rbp + vmovdqu %xmm8,(%r15,%r13,1) + leaq 16(%r13),%r13 + cmpq %rsp,%rbp + jae .Lower_avx2 + + movq 552(%rsp),%r15 + leaq 64(%r13),%r13 + movq 560(%rsp),%rsi + addl %r14d,%eax + leaq 448(%rsp),%rsp + + addl 0(%r15),%eax + addl 4(%r15),%ebx + addl 8(%r15),%ecx + addl 12(%r15),%edx + addl 16(%r15),%r8d + addl 20(%r15),%r9d + addl 24(%r15),%r10d + leaq (%rsi,%r13,1),%r12 + addl 28(%r15),%r11d + + cmpq 64+16(%rsp),%r13 + + movl %eax,0(%r15) + cmoveq %rsp,%r12 + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + movl %r8d,16(%r15) + movl %r9d,20(%r15) + movl %r10d,24(%r15) + movl %r11d,28(%r15) + + jbe .Loop_avx2 + leaq (%rsp),%rbp + + +.cfi_escape 0x0f,0x06,0x76,0xf8,0x00,0x06,0x23,0x08 + +.Ldone_avx2: + movq 64+32(%rbp),%r8 + movq 64+56(%rbp),%rsi +.cfi_def_cfa %rsi,8 + vmovdqu %xmm8,(%r8) + vzeroall + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_avx2: + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_cbc_sha256_enc_avx2,.-aesni_cbc_sha256_enc_avx2 +.type aesni_cbc_sha256_enc_shaext,@function +.align 32 +aesni_cbc_sha256_enc_shaext: +.cfi_startproc + movq 8(%rsp),%r10 + leaq K256+128(%rip),%rax + movdqu (%r9),%xmm1 + movdqu 16(%r9),%xmm2 + movdqa 512-128(%rax),%xmm3 + + movl 240(%rcx),%r11d + subq %rdi,%rsi + movups (%rcx),%xmm15 + movups (%r8),%xmm6 + movups 16(%rcx),%xmm4 + leaq 112(%rcx),%rcx + + pshufd $0x1b,%xmm1,%xmm0 + pshufd $0xb1,%xmm1,%xmm1 + pshufd $0x1b,%xmm2,%xmm2 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,202,8 + punpcklqdq %xmm0,%xmm2 + + jmp .Loop_shaext + +.align 16 +.Loop_shaext: + movdqu (%r10),%xmm10 + movdqu 16(%r10),%xmm11 + movdqu 32(%r10),%xmm12 +.byte 102,68,15,56,0,211 + movdqu 48(%r10),%xmm13 + + movdqa 0-128(%rax),%xmm0 + paddd %xmm10,%xmm0 +.byte 102,68,15,56,0,219 + movdqa %xmm2,%xmm9 + movdqa %xmm1,%xmm8 + movups 0(%rdi),%xmm14 + xorps %xmm15,%xmm14 + xorps %xmm14,%xmm6 + movups -80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movups -64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 32-128(%rax),%xmm0 + paddd %xmm11,%xmm0 +.byte 102,68,15,56,0,227 + leaq 64(%r10),%r10 + movups -48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movups -32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 64-128(%rax),%xmm0 + paddd %xmm12,%xmm0 +.byte 102,68,15,56,0,235 +.byte 69,15,56,204,211 + movups -16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm13,%xmm3 +.byte 102,65,15,58,15,220,4 + paddd %xmm3,%xmm10 + movups 0(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 96-128(%rax),%xmm0 + paddd %xmm13,%xmm0 +.byte 69,15,56,205,213 +.byte 69,15,56,204,220 + movups 16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movups 32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,221,4 + paddd %xmm3,%xmm11 +.byte 15,56,203,202 + movdqa 128-128(%rax),%xmm0 + paddd %xmm10,%xmm0 +.byte 69,15,56,205,218 +.byte 69,15,56,204,229 + movups 48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 + paddd %xmm3,%xmm12 + cmpl $11,%r11d + jb .Laesenclast1 + movups 64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + je .Laesenclast1 + movups 96(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 112(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.Laesenclast1: + aesenclast %xmm5,%xmm6 + movups 16-112(%rcx),%xmm4 + nop +.byte 15,56,203,202 + movups 16(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm6,0(%rsi,%rdi,1) + xorps %xmm14,%xmm6 + movups -80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + movdqa 160-128(%rax),%xmm0 + paddd %xmm11,%xmm0 +.byte 69,15,56,205,227 +.byte 69,15,56,204,234 + movups -64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm12,%xmm3 +.byte 102,65,15,58,15,219,4 + paddd %xmm3,%xmm13 + movups -48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 192-128(%rax),%xmm0 + paddd %xmm12,%xmm0 +.byte 69,15,56,205,236 +.byte 69,15,56,204,211 + movups -32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm13,%xmm3 +.byte 102,65,15,58,15,220,4 + paddd %xmm3,%xmm10 + movups -16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 224-128(%rax),%xmm0 + paddd %xmm13,%xmm0 +.byte 69,15,56,205,213 +.byte 69,15,56,204,220 + movups 0(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,221,4 + paddd %xmm3,%xmm11 + movups 16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 256-128(%rax),%xmm0 + paddd %xmm10,%xmm0 +.byte 69,15,56,205,218 +.byte 69,15,56,204,229 + movups 32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 + paddd %xmm3,%xmm12 + movups 48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + cmpl $11,%r11d + jb .Laesenclast2 + movups 64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + je .Laesenclast2 + movups 96(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 112(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.Laesenclast2: + aesenclast %xmm5,%xmm6 + movups 16-112(%rcx),%xmm4 + nop +.byte 15,56,203,202 + movups 32(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm6,16(%rsi,%rdi,1) + xorps %xmm14,%xmm6 + movups -80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + movdqa 288-128(%rax),%xmm0 + paddd %xmm11,%xmm0 +.byte 69,15,56,205,227 +.byte 69,15,56,204,234 + movups -64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm12,%xmm3 +.byte 102,65,15,58,15,219,4 + paddd %xmm3,%xmm13 + movups -48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 320-128(%rax),%xmm0 + paddd %xmm12,%xmm0 +.byte 69,15,56,205,236 +.byte 69,15,56,204,211 + movups -32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm13,%xmm3 +.byte 102,65,15,58,15,220,4 + paddd %xmm3,%xmm10 + movups -16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 352-128(%rax),%xmm0 + paddd %xmm13,%xmm0 +.byte 69,15,56,205,213 +.byte 69,15,56,204,220 + movups 0(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,221,4 + paddd %xmm3,%xmm11 + movups 16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 384-128(%rax),%xmm0 + paddd %xmm10,%xmm0 +.byte 69,15,56,205,218 +.byte 69,15,56,204,229 + movups 32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 + paddd %xmm3,%xmm12 + movups 48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 416-128(%rax),%xmm0 + paddd %xmm11,%xmm0 +.byte 69,15,56,205,227 +.byte 69,15,56,204,234 + cmpl $11,%r11d + jb .Laesenclast3 + movups 64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + je .Laesenclast3 + movups 96(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 112(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.Laesenclast3: + aesenclast %xmm5,%xmm6 + movups 16-112(%rcx),%xmm4 + nop +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm12,%xmm3 +.byte 102,65,15,58,15,219,4 + paddd %xmm3,%xmm13 + movups 48(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm6,32(%rsi,%rdi,1) + xorps %xmm14,%xmm6 + movups -80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + movups -64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 448-128(%rax),%xmm0 + paddd %xmm12,%xmm0 +.byte 69,15,56,205,236 + movdqa %xmm7,%xmm3 + movups -48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movups -32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 480-128(%rax),%xmm0 + paddd %xmm13,%xmm0 + movups -16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + movups 0(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movups 16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + + movups 32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + cmpl $11,%r11d + jb .Laesenclast4 + movups 64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + je .Laesenclast4 + movups 96(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 112(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.Laesenclast4: + aesenclast %xmm5,%xmm6 + movups 16-112(%rcx),%xmm4 + nop + + paddd %xmm9,%xmm2 + paddd %xmm8,%xmm1 + + decq %rdx + movups %xmm6,48(%rsi,%rdi,1) + leaq 64(%rdi),%rdi + jnz .Loop_shaext + + pshufd $0xb1,%xmm2,%xmm2 + pshufd $0x1b,%xmm1,%xmm3 + pshufd $0xb1,%xmm1,%xmm1 + punpckhqdq %xmm2,%xmm1 +.byte 102,15,58,15,211,8 + + movups %xmm6,(%r8) + movdqu %xmm1,(%r9) + movdqu %xmm2,16(%r9) + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_cbc_sha256_enc_shaext,.-aesni_cbc_sha256_enc_shaext diff --git a/contrib/openssl-cmake/asm/crypto/aes/aesni-x86_64.s b/contrib/openssl-cmake/asm/crypto/aes/aesni-x86_64.s new file mode 100644 index 000000000000..6f79d526a264 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/aes/aesni-x86_64.s @@ -0,0 +1,4487 @@ +.text + +.globl aesni_encrypt +.type aesni_encrypt,@function +.align 16 +aesni_encrypt: +.cfi_startproc +.byte 243,15,30,250 + movups (%rdi),%xmm2 + movl 240(%rdx),%eax + movups (%rdx),%xmm0 + movups 16(%rdx),%xmm1 + leaq 32(%rdx),%rdx + xorps %xmm0,%xmm2 +.Loop_enc1_1: +.byte 102,15,56,220,209 + decl %eax + movups (%rdx),%xmm1 + leaq 16(%rdx),%rdx + jnz .Loop_enc1_1 +.byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_encrypt,.-aesni_encrypt + +.globl aesni_decrypt +.type aesni_decrypt,@function +.align 16 +aesni_decrypt: +.cfi_startproc +.byte 243,15,30,250 + movups (%rdi),%xmm2 + movl 240(%rdx),%eax + movups (%rdx),%xmm0 + movups 16(%rdx),%xmm1 + leaq 32(%rdx),%rdx + xorps %xmm0,%xmm2 +.Loop_dec1_2: +.byte 102,15,56,222,209 + decl %eax + movups (%rdx),%xmm1 + leaq 16(%rdx),%rdx + jnz .Loop_dec1_2 +.byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_decrypt, .-aesni_decrypt +.type _aesni_encrypt2,@function +.align 16 +_aesni_encrypt2: +.cfi_startproc + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +.Lenc_loop2: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lenc_loop2 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + .byte 0xf3,0xc3 +.cfi_endproc +.size _aesni_encrypt2,.-_aesni_encrypt2 +.type _aesni_decrypt2,@function +.align 16 +_aesni_decrypt2: +.cfi_startproc + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +.Ldec_loop2: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Ldec_loop2 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 + .byte 0xf3,0xc3 +.cfi_endproc +.size _aesni_decrypt2,.-_aesni_decrypt2 +.type _aesni_encrypt3,@function +.align 16 +_aesni_encrypt3: +.cfi_startproc + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +.Lenc_loop3: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lenc_loop3 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 + .byte 0xf3,0xc3 +.cfi_endproc +.size _aesni_encrypt3,.-_aesni_encrypt3 +.type _aesni_decrypt3,@function +.align 16 +_aesni_decrypt3: +.cfi_startproc + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +.Ldec_loop3: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Ldec_loop3 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 + .byte 0xf3,0xc3 +.cfi_endproc +.size _aesni_decrypt3,.-_aesni_decrypt3 +.type _aesni_encrypt4,@function +.align 16 +_aesni_encrypt4: +.cfi_startproc + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 0x0f,0x1f,0x00 + addq $16,%rax + +.Lenc_loop4: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lenc_loop4 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 + .byte 0xf3,0xc3 +.cfi_endproc +.size _aesni_encrypt4,.-_aesni_encrypt4 +.type _aesni_decrypt4,@function +.align 16 +_aesni_decrypt4: +.cfi_startproc + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 0x0f,0x1f,0x00 + addq $16,%rax + +.Ldec_loop4: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Ldec_loop4 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 + .byte 0xf3,0xc3 +.cfi_endproc +.size _aesni_decrypt4,.-_aesni_decrypt4 +.type _aesni_encrypt6,@function +.align 16 +_aesni_encrypt6: +.cfi_startproc + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,209 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,225 + pxor %xmm0,%xmm7 + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp .Lenc_loop6_enter +.align 16 +.Lenc_loop6: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.Lenc_loop6_enter: +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lenc_loop6 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + .byte 0xf3,0xc3 +.cfi_endproc +.size _aesni_encrypt6,.-_aesni_encrypt6 +.type _aesni_decrypt6,@function +.align 16 +_aesni_decrypt6: +.cfi_startproc + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,209 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,225 + pxor %xmm0,%xmm7 + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp .Ldec_loop6_enter +.align 16 +.Ldec_loop6: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.Ldec_loop6_enter: +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Ldec_loop6 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + .byte 0xf3,0xc3 +.cfi_endproc +.size _aesni_decrypt6,.-_aesni_decrypt6 +.type _aesni_encrypt8,@function +.align 16 +_aesni_encrypt8: +.cfi_startproc + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,209 + pxor %xmm0,%xmm7 + pxor %xmm0,%xmm8 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm9 + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp .Lenc_loop8_inner +.align 16 +.Lenc_loop8: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.Lenc_loop8_inner: +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 +.Lenc_loop8_enter: + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lenc_loop8 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 +.byte 102,68,15,56,221,192 +.byte 102,68,15,56,221,200 + .byte 0xf3,0xc3 +.cfi_endproc +.size _aesni_encrypt8,.-_aesni_encrypt8 +.type _aesni_decrypt8,@function +.align 16 +_aesni_decrypt8: +.cfi_startproc + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,222,209 + pxor %xmm0,%xmm7 + pxor %xmm0,%xmm8 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm9 + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp .Ldec_loop8_inner +.align 16 +.Ldec_loop8: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.Ldec_loop8_inner: +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 +.Ldec_loop8_enter: + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Ldec_loop8 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 +.byte 102,68,15,56,223,192 +.byte 102,68,15,56,223,200 + .byte 0xf3,0xc3 +.cfi_endproc +.size _aesni_decrypt8,.-_aesni_decrypt8 +.globl aesni_ecb_encrypt +.type aesni_ecb_encrypt,@function +.align 16 +aesni_ecb_encrypt: +.cfi_startproc +.byte 243,15,30,250 + andq $-16,%rdx + jz .Lecb_ret + + movl 240(%rcx),%eax + movups (%rcx),%xmm0 + movq %rcx,%r11 + movl %eax,%r10d + testl %r8d,%r8d + jz .Lecb_decrypt + + cmpq $0x80,%rdx + jb .Lecb_enc_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $0x80,%rdx + jmp .Lecb_enc_loop8_enter +.align 16 +.Lecb_enc_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +.Lecb_enc_loop8_enter: + + call _aesni_encrypt8 + + subq $0x80,%rdx + jnc .Lecb_enc_loop8 + + movups %xmm2,(%rsi) + movq %r11,%rcx + movups %xmm3,16(%rsi) + movl %r10d,%eax + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + addq $0x80,%rdx + jz .Lecb_ret + +.Lecb_enc_tail: + movups (%rdi),%xmm2 + cmpq $0x20,%rdx + jb .Lecb_enc_one + movups 16(%rdi),%xmm3 + je .Lecb_enc_two + movups 32(%rdi),%xmm4 + cmpq $0x40,%rdx + jb .Lecb_enc_three + movups 48(%rdi),%xmm5 + je .Lecb_enc_four + movups 64(%rdi),%xmm6 + cmpq $0x60,%rdx + jb .Lecb_enc_five + movups 80(%rdi),%xmm7 + je .Lecb_enc_six + movdqu 96(%rdi),%xmm8 + xorps %xmm9,%xmm9 + call _aesni_encrypt8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_3: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_3 +.byte 102,15,56,221,209 + movups %xmm2,(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_two: + call _aesni_encrypt2 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_three: + call _aesni_encrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_four: + call _aesni_encrypt4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_five: + xorps %xmm7,%xmm7 + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_six: + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + jmp .Lecb_ret + +.align 16 +.Lecb_decrypt: + cmpq $0x80,%rdx + jb .Lecb_dec_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $0x80,%rdx + jmp .Lecb_dec_loop8_enter +.align 16 +.Lecb_dec_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +.Lecb_dec_loop8_enter: + + call _aesni_decrypt8 + + movups (%r11),%xmm0 + subq $0x80,%rdx + jnc .Lecb_dec_loop8 + + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movq %r11,%rcx + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movl %r10d,%eax + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 + movups %xmm8,96(%rsi) + pxor %xmm8,%xmm8 + movups %xmm9,112(%rsi) + pxor %xmm9,%xmm9 + leaq 128(%rsi),%rsi + addq $0x80,%rdx + jz .Lecb_ret + +.Lecb_dec_tail: + movups (%rdi),%xmm2 + cmpq $0x20,%rdx + jb .Lecb_dec_one + movups 16(%rdi),%xmm3 + je .Lecb_dec_two + movups 32(%rdi),%xmm4 + cmpq $0x40,%rdx + jb .Lecb_dec_three + movups 48(%rdi),%xmm5 + je .Lecb_dec_four + movups 64(%rdi),%xmm6 + cmpq $0x60,%rdx + jb .Lecb_dec_five + movups 80(%rdi),%xmm7 + je .Lecb_dec_six + movups 96(%rdi),%xmm8 + movups (%rcx),%xmm0 + xorps %xmm9,%xmm9 + call _aesni_decrypt8 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 + movups %xmm8,96(%rsi) + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 + jmp .Lecb_ret +.align 16 +.Lecb_dec_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_4: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_4 +.byte 102,15,56,223,209 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + jmp .Lecb_ret +.align 16 +.Lecb_dec_two: + call _aesni_decrypt2 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + jmp .Lecb_ret +.align 16 +.Lecb_dec_three: + call _aesni_decrypt3 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + jmp .Lecb_ret +.align 16 +.Lecb_dec_four: + call _aesni_decrypt4 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + jmp .Lecb_ret +.align 16 +.Lecb_dec_five: + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + jmp .Lecb_ret +.align 16 +.Lecb_dec_six: + call _aesni_decrypt6 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 + +.Lecb_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_ecb_encrypt,.-aesni_ecb_encrypt +.globl aesni_ccm64_encrypt_blocks +.type aesni_ccm64_encrypt_blocks,@function +.align 16 +aesni_ccm64_encrypt_blocks: +.cfi_startproc +.byte 243,15,30,250 + movl 240(%rcx),%eax + movdqu (%r8),%xmm6 + movdqa .Lincrement64(%rip),%xmm9 + movdqa .Lbswap_mask(%rip),%xmm7 + + shll $4,%eax + movl $16,%r10d + leaq 0(%rcx),%r11 + movdqu (%r9),%xmm3 + movdqa %xmm6,%xmm2 + leaq 32(%rcx,%rax,1),%rcx +.byte 102,15,56,0,247 + subq %rax,%r10 + jmp .Lccm64_enc_outer +.align 16 +.Lccm64_enc_outer: + movups (%r11),%xmm0 + movq %r10,%rax + movups (%rdi),%xmm8 + + xorps %xmm0,%xmm2 + movups 16(%r11),%xmm1 + xorps %xmm8,%xmm0 + xorps %xmm0,%xmm3 + movups 32(%r11),%xmm0 + +.Lccm64_enc2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lccm64_enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + paddq %xmm9,%xmm6 + decq %rdx +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + + leaq 16(%rdi),%rdi + xorps %xmm2,%xmm8 + movdqa %xmm6,%xmm2 + movups %xmm8,(%rsi) +.byte 102,15,56,0,215 + leaq 16(%rsi),%rsi + jnz .Lccm64_enc_outer + + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movups %xmm3,(%r9) + pxor %xmm3,%xmm3 + pxor %xmm8,%xmm8 + pxor %xmm6,%xmm6 + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks +.globl aesni_ccm64_decrypt_blocks +.type aesni_ccm64_decrypt_blocks,@function +.align 16 +aesni_ccm64_decrypt_blocks: +.cfi_startproc +.byte 243,15,30,250 + movl 240(%rcx),%eax + movups (%r8),%xmm6 + movdqu (%r9),%xmm3 + movdqa .Lincrement64(%rip),%xmm9 + movdqa .Lbswap_mask(%rip),%xmm7 + + movaps %xmm6,%xmm2 + movl %eax,%r10d + movq %rcx,%r11 +.byte 102,15,56,0,247 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_5: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_5 +.byte 102,15,56,221,209 + shll $4,%r10d + movl $16,%eax + movups (%rdi),%xmm8 + paddq %xmm9,%xmm6 + leaq 16(%rdi),%rdi + subq %r10,%rax + leaq 32(%r11,%r10,1),%rcx + movq %rax,%r10 + jmp .Lccm64_dec_outer +.align 16 +.Lccm64_dec_outer: + xorps %xmm2,%xmm8 + movdqa %xmm6,%xmm2 + movups %xmm8,(%rsi) + leaq 16(%rsi),%rsi +.byte 102,15,56,0,215 + + subq $1,%rdx + jz .Lccm64_dec_break + + movups (%r11),%xmm0 + movq %r10,%rax + movups 16(%r11),%xmm1 + xorps %xmm0,%xmm8 + xorps %xmm0,%xmm2 + xorps %xmm8,%xmm3 + movups 32(%r11),%xmm0 + jmp .Lccm64_dec2_loop +.align 16 +.Lccm64_dec2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lccm64_dec2_loop + movups (%rdi),%xmm8 + paddq %xmm9,%xmm6 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + leaq 16(%rdi),%rdi + jmp .Lccm64_dec_outer + +.align 16 +.Lccm64_dec_break: + + movl 240(%r11),%eax + movups (%r11),%xmm0 + movups 16(%r11),%xmm1 + xorps %xmm0,%xmm8 + leaq 32(%r11),%r11 + xorps %xmm8,%xmm3 +.Loop_enc1_6: +.byte 102,15,56,220,217 + decl %eax + movups (%r11),%xmm1 + leaq 16(%r11),%r11 + jnz .Loop_enc1_6 +.byte 102,15,56,221,217 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movups %xmm3,(%r9) + pxor %xmm3,%xmm3 + pxor %xmm8,%xmm8 + pxor %xmm6,%xmm6 + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks +.globl aesni_ctr32_encrypt_blocks +.type aesni_ctr32_encrypt_blocks,@function +.align 16 +aesni_ctr32_encrypt_blocks: +.cfi_startproc +.byte 243,15,30,250 + cmpq $1,%rdx + jne .Lctr32_bulk + + + + movups (%r8),%xmm2 + movups (%rdi),%xmm3 + movl 240(%rcx),%edx + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_7: +.byte 102,15,56,220,209 + decl %edx + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_7 +.byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + xorps %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm2,%xmm2 + jmp .Lctr32_epilogue + +.align 16 +.Lctr32_bulk: + leaq (%rsp),%r11 +.cfi_def_cfa_register %r11 + pushq %rbp +.cfi_offset %rbp,-16 + subq $128,%rsp + andq $-16,%rsp + + + + + movdqu (%r8),%xmm2 + movdqu (%rcx),%xmm0 + movl 12(%r8),%r8d + pxor %xmm0,%xmm2 + movl 12(%rcx),%ebp + movdqa %xmm2,0(%rsp) + bswapl %r8d + movdqa %xmm2,%xmm3 + movdqa %xmm2,%xmm4 + movdqa %xmm2,%xmm5 + movdqa %xmm2,64(%rsp) + movdqa %xmm2,80(%rsp) + movdqa %xmm2,96(%rsp) + movq %rdx,%r10 + movdqa %xmm2,112(%rsp) + + leaq 1(%r8),%rax + leaq 2(%r8),%rdx + bswapl %eax + bswapl %edx + xorl %ebp,%eax + xorl %ebp,%edx +.byte 102,15,58,34,216,3 + leaq 3(%r8),%rax + movdqa %xmm3,16(%rsp) +.byte 102,15,58,34,226,3 + bswapl %eax + movq %r10,%rdx + leaq 4(%r8),%r10 + movdqa %xmm4,32(%rsp) + xorl %ebp,%eax + bswapl %r10d +.byte 102,15,58,34,232,3 + xorl %ebp,%r10d + movdqa %xmm5,48(%rsp) + leaq 5(%r8),%r9 + movl %r10d,64+12(%rsp) + bswapl %r9d + leaq 6(%r8),%r10 + movl 240(%rcx),%eax + xorl %ebp,%r9d + bswapl %r10d + movl %r9d,80+12(%rsp) + xorl %ebp,%r10d + leaq 7(%r8),%r9 + movl %r10d,96+12(%rsp) + bswapl %r9d + movl OPENSSL_ia32cap_P+4(%rip),%r10d + xorl %ebp,%r9d + andl $71303168,%r10d + movl %r9d,112+12(%rsp) + + movups 16(%rcx),%xmm1 + + movdqa 64(%rsp),%xmm6 + movdqa 80(%rsp),%xmm7 + + cmpq $8,%rdx + jb .Lctr32_tail + + subq $6,%rdx + cmpl $4194304,%r10d + je .Lctr32_6x + + leaq 128(%rcx),%rcx + subq $2,%rdx + jmp .Lctr32_loop8 + +.align 16 +.Lctr32_6x: + shll $4,%eax + movl $48,%r10d + bswapl %ebp + leaq 32(%rcx,%rax,1),%rcx + subq %rax,%r10 + jmp .Lctr32_loop6 + +.align 16 +.Lctr32_loop6: + addl $6,%r8d + movups -48(%rcx,%r10,1),%xmm0 +.byte 102,15,56,220,209 + movl %r8d,%eax + xorl %ebp,%eax +.byte 102,15,56,220,217 +.byte 0x0f,0x38,0xf1,0x44,0x24,12 + leal 1(%r8),%eax +.byte 102,15,56,220,225 + xorl %ebp,%eax +.byte 0x0f,0x38,0xf1,0x44,0x24,28 +.byte 102,15,56,220,233 + leal 2(%r8),%eax + xorl %ebp,%eax +.byte 102,15,56,220,241 +.byte 0x0f,0x38,0xf1,0x44,0x24,44 + leal 3(%r8),%eax +.byte 102,15,56,220,249 + movups -32(%rcx,%r10,1),%xmm1 + xorl %ebp,%eax + +.byte 102,15,56,220,208 +.byte 0x0f,0x38,0xf1,0x44,0x24,60 + leal 4(%r8),%eax +.byte 102,15,56,220,216 + xorl %ebp,%eax +.byte 0x0f,0x38,0xf1,0x44,0x24,76 +.byte 102,15,56,220,224 + leal 5(%r8),%eax + xorl %ebp,%eax +.byte 102,15,56,220,232 +.byte 0x0f,0x38,0xf1,0x44,0x24,92 + movq %r10,%rax +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -16(%rcx,%r10,1),%xmm0 + + call .Lenc_loop6 + + movdqu (%rdi),%xmm8 + movdqu 16(%rdi),%xmm9 + movdqu 32(%rdi),%xmm10 + movdqu 48(%rdi),%xmm11 + movdqu 64(%rdi),%xmm12 + movdqu 80(%rdi),%xmm13 + leaq 96(%rdi),%rdi + movups -64(%rcx,%r10,1),%xmm1 + pxor %xmm2,%xmm8 + movaps 0(%rsp),%xmm2 + pxor %xmm3,%xmm9 + movaps 16(%rsp),%xmm3 + pxor %xmm4,%xmm10 + movaps 32(%rsp),%xmm4 + pxor %xmm5,%xmm11 + movaps 48(%rsp),%xmm5 + pxor %xmm6,%xmm12 + movaps 64(%rsp),%xmm6 + pxor %xmm7,%xmm13 + movaps 80(%rsp),%xmm7 + movdqu %xmm8,(%rsi) + movdqu %xmm9,16(%rsi) + movdqu %xmm10,32(%rsi) + movdqu %xmm11,48(%rsi) + movdqu %xmm12,64(%rsi) + movdqu %xmm13,80(%rsi) + leaq 96(%rsi),%rsi + + subq $6,%rdx + jnc .Lctr32_loop6 + + addq $6,%rdx + jz .Lctr32_done + + leal -48(%r10),%eax + leaq -80(%rcx,%r10,1),%rcx + negl %eax + shrl $4,%eax + jmp .Lctr32_tail + +.align 32 +.Lctr32_loop8: + addl $8,%r8d + movdqa 96(%rsp),%xmm8 +.byte 102,15,56,220,209 + movl %r8d,%r9d + movdqa 112(%rsp),%xmm9 +.byte 102,15,56,220,217 + bswapl %r9d + movups 32-128(%rcx),%xmm0 +.byte 102,15,56,220,225 + xorl %ebp,%r9d + nop +.byte 102,15,56,220,233 + movl %r9d,0+12(%rsp) + leaq 1(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 48-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %ebp,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,16+12(%rsp) + leaq 2(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 64-128(%rcx),%xmm0 + bswapl %r9d +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + xorl %ebp,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movl %r9d,32+12(%rsp) + leaq 3(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 80-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %ebp,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,48+12(%rsp) + leaq 4(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 96-128(%rcx),%xmm0 + bswapl %r9d +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + xorl %ebp,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movl %r9d,64+12(%rsp) + leaq 5(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 112-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %ebp,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,80+12(%rsp) + leaq 6(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 128-128(%rcx),%xmm0 + bswapl %r9d +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + xorl %ebp,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movl %r9d,96+12(%rsp) + leaq 7(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 144-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + xorl %ebp,%r9d + movdqu 0(%rdi),%xmm10 +.byte 102,15,56,220,232 + movl %r9d,112+12(%rsp) + cmpl $11,%eax +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 160-128(%rcx),%xmm0 + + jb .Lctr32_enc_done + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 176-128(%rcx),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 192-128(%rcx),%xmm0 + je .Lctr32_enc_done + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 208-128(%rcx),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 224-128(%rcx),%xmm0 + jmp .Lctr32_enc_done + +.align 16 +.Lctr32_enc_done: + movdqu 16(%rdi),%xmm11 + pxor %xmm0,%xmm10 + movdqu 32(%rdi),%xmm12 + pxor %xmm0,%xmm11 + movdqu 48(%rdi),%xmm13 + pxor %xmm0,%xmm12 + movdqu 64(%rdi),%xmm14 + pxor %xmm0,%xmm13 + movdqu 80(%rdi),%xmm15 + pxor %xmm0,%xmm14 + pxor %xmm0,%xmm15 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movdqu 96(%rdi),%xmm1 + leaq 128(%rdi),%rdi + +.byte 102,65,15,56,221,210 + pxor %xmm0,%xmm1 + movdqu 112-128(%rdi),%xmm10 +.byte 102,65,15,56,221,219 + pxor %xmm0,%xmm10 + movdqa 0(%rsp),%xmm11 +.byte 102,65,15,56,221,228 +.byte 102,65,15,56,221,237 + movdqa 16(%rsp),%xmm12 + movdqa 32(%rsp),%xmm13 +.byte 102,65,15,56,221,246 +.byte 102,65,15,56,221,255 + movdqa 48(%rsp),%xmm14 + movdqa 64(%rsp),%xmm15 +.byte 102,68,15,56,221,193 + movdqa 80(%rsp),%xmm0 + movups 16-128(%rcx),%xmm1 +.byte 102,69,15,56,221,202 + + movups %xmm2,(%rsi) + movdqa %xmm11,%xmm2 + movups %xmm3,16(%rsi) + movdqa %xmm12,%xmm3 + movups %xmm4,32(%rsi) + movdqa %xmm13,%xmm4 + movups %xmm5,48(%rsi) + movdqa %xmm14,%xmm5 + movups %xmm6,64(%rsi) + movdqa %xmm15,%xmm6 + movups %xmm7,80(%rsi) + movdqa %xmm0,%xmm7 + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + + subq $8,%rdx + jnc .Lctr32_loop8 + + addq $8,%rdx + jz .Lctr32_done + leaq -128(%rcx),%rcx + +.Lctr32_tail: + + + leaq 16(%rcx),%rcx + cmpq $4,%rdx + jb .Lctr32_loop3 + je .Lctr32_loop4 + + + shll $4,%eax + movdqa 96(%rsp),%xmm8 + pxor %xmm9,%xmm9 + + movups 16(%rcx),%xmm0 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + leaq 32-16(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,225 + addq $16,%rax + movups (%rdi),%xmm10 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 + movups 16(%rdi),%xmm11 + movups 32(%rdi),%xmm12 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 + + call .Lenc_loop8_enter + + movdqu 48(%rdi),%xmm13 + pxor %xmm10,%xmm2 + movdqu 64(%rdi),%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm10,%xmm6 + movdqu %xmm5,48(%rsi) + movdqu %xmm6,64(%rsi) + cmpq $6,%rdx + jb .Lctr32_done + + movups 80(%rdi),%xmm11 + xorps %xmm11,%xmm7 + movups %xmm7,80(%rsi) + je .Lctr32_done + + movups 96(%rdi),%xmm12 + xorps %xmm12,%xmm8 + movups %xmm8,96(%rsi) + jmp .Lctr32_done + +.align 32 +.Lctr32_loop4: +.byte 102,15,56,220,209 + leaq 16(%rcx),%rcx + decl %eax +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%rcx),%xmm1 + jnz .Lctr32_loop4 +.byte 102,15,56,221,209 +.byte 102,15,56,221,217 + movups (%rdi),%xmm10 + movups 16(%rdi),%xmm11 +.byte 102,15,56,221,225 +.byte 102,15,56,221,233 + movups 32(%rdi),%xmm12 + movups 48(%rdi),%xmm13 + + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + xorps %xmm11,%xmm3 + movups %xmm3,16(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm4,32(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm5,48(%rsi) + jmp .Lctr32_done + +.align 32 +.Lctr32_loop3: +.byte 102,15,56,220,209 + leaq 16(%rcx),%rcx + decl %eax +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + movups (%rcx),%xmm1 + jnz .Lctr32_loop3 +.byte 102,15,56,221,209 +.byte 102,15,56,221,217 +.byte 102,15,56,221,225 + + movups (%rdi),%xmm10 + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + cmpq $2,%rdx + jb .Lctr32_done + + movups 16(%rdi),%xmm11 + xorps %xmm11,%xmm3 + movups %xmm3,16(%rsi) + je .Lctr32_done + + movups 32(%rdi),%xmm12 + xorps %xmm12,%xmm4 + movups %xmm4,32(%rsi) + +.Lctr32_done: + xorps %xmm0,%xmm0 + xorl %ebp,%ebp + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0(%rsp) + pxor %xmm8,%xmm8 + movaps %xmm0,16(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,32(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,48(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,64(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,80(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,96(%rsp) + pxor %xmm14,%xmm14 + movaps %xmm0,112(%rsp) + pxor %xmm15,%xmm15 + movq -8(%r11),%rbp +.cfi_restore %rbp + leaq (%r11),%rsp +.cfi_def_cfa_register %rsp +.Lctr32_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks +.globl aesni_xts_encrypt +.type aesni_xts_encrypt,@function +.align 16 +aesni_xts_encrypt: +.cfi_startproc +.byte 243,15,30,250 + leaq (%rsp),%r11 +.cfi_def_cfa_register %r11 + pushq %rbp +.cfi_offset %rbp,-16 + subq $112,%rsp + andq $-16,%rsp + movups (%r9),%xmm2 + movl 240(%r8),%eax + movl 240(%rcx),%r10d + movups (%r8),%xmm0 + movups 16(%r8),%xmm1 + leaq 32(%r8),%r8 + xorps %xmm0,%xmm2 +.Loop_enc1_8: +.byte 102,15,56,220,209 + decl %eax + movups (%r8),%xmm1 + leaq 16(%r8),%r8 + jnz .Loop_enc1_8 +.byte 102,15,56,221,209 + movups (%rcx),%xmm0 + movq %rcx,%rbp + movl %r10d,%eax + shll $4,%r10d + movq %rdx,%r9 + andq $-16,%rdx + + movups 16(%rcx,%r10,1),%xmm1 + + movdqa .Lxts_magic(%rip),%xmm8 + movdqa %xmm2,%xmm15 + pshufd $0x5f,%xmm2,%xmm9 + pxor %xmm0,%xmm1 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm10 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm10 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm11 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm11 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm12 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm12 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm13 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm13 + pxor %xmm14,%xmm15 + movdqa %xmm15,%xmm14 + psrad $31,%xmm9 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pxor %xmm0,%xmm14 + pxor %xmm9,%xmm15 + movaps %xmm1,96(%rsp) + + subq $96,%rdx + jc .Lxts_enc_short + + movl $16+96,%eax + leaq 32(%rbp,%r10,1),%rcx + subq %r10,%rax + movups 16(%rbp),%xmm1 + movq %rax,%r10 + leaq .Lxts_magic(%rip),%r8 + jmp .Lxts_enc_grandloop + +.align 32 +.Lxts_enc_grandloop: + movdqu 0(%rdi),%xmm2 + movdqa %xmm0,%xmm8 + movdqu 16(%rdi),%xmm3 + pxor %xmm10,%xmm2 + movdqu 32(%rdi),%xmm4 + pxor %xmm11,%xmm3 +.byte 102,15,56,220,209 + movdqu 48(%rdi),%xmm5 + pxor %xmm12,%xmm4 +.byte 102,15,56,220,217 + movdqu 64(%rdi),%xmm6 + pxor %xmm13,%xmm5 +.byte 102,15,56,220,225 + movdqu 80(%rdi),%xmm7 + pxor %xmm15,%xmm8 + movdqa 96(%rsp),%xmm9 + pxor %xmm14,%xmm6 +.byte 102,15,56,220,233 + movups 32(%rbp),%xmm0 + leaq 96(%rdi),%rdi + pxor %xmm8,%xmm7 + + pxor %xmm9,%xmm10 +.byte 102,15,56,220,241 + pxor %xmm9,%xmm11 + movdqa %xmm10,0(%rsp) +.byte 102,15,56,220,249 + movups 48(%rbp),%xmm1 + pxor %xmm9,%xmm12 + +.byte 102,15,56,220,208 + pxor %xmm9,%xmm13 + movdqa %xmm11,16(%rsp) +.byte 102,15,56,220,216 + pxor %xmm9,%xmm14 + movdqa %xmm12,32(%rsp) +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + pxor %xmm9,%xmm8 + movdqa %xmm14,64(%rsp) +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups 64(%rbp),%xmm0 + movdqa %xmm8,80(%rsp) + pshufd $0x5f,%xmm15,%xmm9 + jmp .Lxts_enc_loop6 +.align 32 +.Lxts_enc_loop6: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups -64(%rcx,%rax,1),%xmm1 + addq $32,%rax + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -80(%rcx,%rax,1),%xmm0 + jnz .Lxts_enc_loop6 + + movdqa (%r8),%xmm8 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 +.byte 102,15,56,220,209 + paddq %xmm15,%xmm15 + psrad $31,%xmm14 +.byte 102,15,56,220,217 + pand %xmm8,%xmm14 + movups (%rbp),%xmm10 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 + pxor %xmm14,%xmm15 + movaps %xmm10,%xmm11 +.byte 102,15,56,220,249 + movups -64(%rcx),%xmm1 + + movdqa %xmm9,%xmm14 +.byte 102,15,56,220,208 + paddd %xmm9,%xmm9 + pxor %xmm15,%xmm10 +.byte 102,15,56,220,216 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + pand %xmm8,%xmm14 + movaps %xmm11,%xmm12 +.byte 102,15,56,220,240 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 +.byte 102,15,56,220,248 + movups -48(%rcx),%xmm0 + + paddd %xmm9,%xmm9 +.byte 102,15,56,220,209 + pxor %xmm15,%xmm11 + psrad $31,%xmm14 +.byte 102,15,56,220,217 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movdqa %xmm13,48(%rsp) + pxor %xmm14,%xmm15 +.byte 102,15,56,220,241 + movaps %xmm12,%xmm13 + movdqa %xmm9,%xmm14 +.byte 102,15,56,220,249 + movups -32(%rcx),%xmm1 + + paddd %xmm9,%xmm9 +.byte 102,15,56,220,208 + pxor %xmm15,%xmm12 + psrad $31,%xmm14 +.byte 102,15,56,220,216 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 + pxor %xmm14,%xmm15 + movaps %xmm13,%xmm14 +.byte 102,15,56,220,248 + + movdqa %xmm9,%xmm0 + paddd %xmm9,%xmm9 +.byte 102,15,56,220,209 + pxor %xmm15,%xmm13 + psrad $31,%xmm0 +.byte 102,15,56,220,217 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm0 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm15 + movups (%rbp),%xmm0 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups 16(%rbp),%xmm1 + + pxor %xmm15,%xmm14 +.byte 102,15,56,221,84,36,0 + psrad $31,%xmm9 + paddq %xmm15,%xmm15 +.byte 102,15,56,221,92,36,16 +.byte 102,15,56,221,100,36,32 + pand %xmm8,%xmm9 + movq %r10,%rax +.byte 102,15,56,221,108,36,48 +.byte 102,15,56,221,116,36,64 +.byte 102,15,56,221,124,36,80 + pxor %xmm9,%xmm15 + + leaq 96(%rsi),%rsi + movups %xmm2,-96(%rsi) + movups %xmm3,-80(%rsi) + movups %xmm4,-64(%rsi) + movups %xmm5,-48(%rsi) + movups %xmm6,-32(%rsi) + movups %xmm7,-16(%rsi) + subq $96,%rdx + jnc .Lxts_enc_grandloop + + movl $16+96,%eax + subl %r10d,%eax + movq %rbp,%rcx + shrl $4,%eax + +.Lxts_enc_short: + + movl %eax,%r10d + pxor %xmm0,%xmm10 + addq $96,%rdx + jz .Lxts_enc_done + + pxor %xmm0,%xmm11 + cmpq $0x20,%rdx + jb .Lxts_enc_one + pxor %xmm0,%xmm12 + je .Lxts_enc_two + + pxor %xmm0,%xmm13 + cmpq $0x40,%rdx + jb .Lxts_enc_three + pxor %xmm0,%xmm14 + je .Lxts_enc_four + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + leaq 80(%rdi),%rdi + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm5 + pxor %xmm14,%xmm6 + pxor %xmm7,%xmm7 + + call _aesni_encrypt6 + + xorps %xmm10,%xmm2 + movdqa %xmm15,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + xorps %xmm14,%xmm6 + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_one: + movups (%rdi),%xmm2 + leaq 16(%rdi),%rdi + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_9: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_9 +.byte 102,15,56,221,209 + xorps %xmm10,%xmm2 + movdqa %xmm11,%xmm10 + movups %xmm2,(%rsi) + leaq 16(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_two: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + leaq 32(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + + call _aesni_encrypt2 + + xorps %xmm10,%xmm2 + movdqa %xmm12,%xmm10 + xorps %xmm11,%xmm3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + leaq 32(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_three: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + leaq 48(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + + call _aesni_encrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm13,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + leaq 48(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_four: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + xorps %xmm10,%xmm2 + movups 48(%rdi),%xmm5 + leaq 64(%rdi),%rdi + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + xorps %xmm13,%xmm5 + + call _aesni_encrypt4 + + pxor %xmm10,%xmm2 + movdqa %xmm14,%xmm10 + pxor %xmm11,%xmm3 + pxor %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_done: + andq $15,%r9 + jz .Lxts_enc_ret + movq %r9,%rdx + +.Lxts_enc_steal: + movzbl (%rdi),%eax + movzbl -16(%rsi),%ecx + leaq 1(%rdi),%rdi + movb %al,-16(%rsi) + movb %cl,0(%rsi) + leaq 1(%rsi),%rsi + subq $1,%rdx + jnz .Lxts_enc_steal + + subq %r9,%rsi + movq %rbp,%rcx + movl %r10d,%eax + + movups -16(%rsi),%xmm2 + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_10: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_10 +.byte 102,15,56,221,209 + xorps %xmm10,%xmm2 + movups %xmm2,-16(%rsi) + +.Lxts_enc_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0(%rsp) + pxor %xmm8,%xmm8 + movaps %xmm0,16(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,32(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,48(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,64(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,80(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,96(%rsp) + pxor %xmm14,%xmm14 + pxor %xmm15,%xmm15 + movq -8(%r11),%rbp +.cfi_restore %rbp + leaq (%r11),%rsp +.cfi_def_cfa_register %rsp +.Lxts_enc_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_xts_encrypt,.-aesni_xts_encrypt +.globl aesni_xts_decrypt +.type aesni_xts_decrypt,@function +.align 16 +aesni_xts_decrypt: +.cfi_startproc +.byte 243,15,30,250 + leaq (%rsp),%r11 +.cfi_def_cfa_register %r11 + pushq %rbp +.cfi_offset %rbp,-16 + subq $112,%rsp + andq $-16,%rsp + movups (%r9),%xmm2 + movl 240(%r8),%eax + movl 240(%rcx),%r10d + movups (%r8),%xmm0 + movups 16(%r8),%xmm1 + leaq 32(%r8),%r8 + xorps %xmm0,%xmm2 +.Loop_enc1_11: +.byte 102,15,56,220,209 + decl %eax + movups (%r8),%xmm1 + leaq 16(%r8),%r8 + jnz .Loop_enc1_11 +.byte 102,15,56,221,209 + xorl %eax,%eax + testq $15,%rdx + setnz %al + shlq $4,%rax + subq %rax,%rdx + + movups (%rcx),%xmm0 + movq %rcx,%rbp + movl %r10d,%eax + shll $4,%r10d + movq %rdx,%r9 + andq $-16,%rdx + + movups 16(%rcx,%r10,1),%xmm1 + + movdqa .Lxts_magic(%rip),%xmm8 + movdqa %xmm2,%xmm15 + pshufd $0x5f,%xmm2,%xmm9 + pxor %xmm0,%xmm1 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm10 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm10 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm11 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm11 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm12 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm12 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm13 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm13 + pxor %xmm14,%xmm15 + movdqa %xmm15,%xmm14 + psrad $31,%xmm9 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pxor %xmm0,%xmm14 + pxor %xmm9,%xmm15 + movaps %xmm1,96(%rsp) + + subq $96,%rdx + jc .Lxts_dec_short + + movl $16+96,%eax + leaq 32(%rbp,%r10,1),%rcx + subq %r10,%rax + movups 16(%rbp),%xmm1 + movq %rax,%r10 + leaq .Lxts_magic(%rip),%r8 + jmp .Lxts_dec_grandloop + +.align 32 +.Lxts_dec_grandloop: + movdqu 0(%rdi),%xmm2 + movdqa %xmm0,%xmm8 + movdqu 16(%rdi),%xmm3 + pxor %xmm10,%xmm2 + movdqu 32(%rdi),%xmm4 + pxor %xmm11,%xmm3 +.byte 102,15,56,222,209 + movdqu 48(%rdi),%xmm5 + pxor %xmm12,%xmm4 +.byte 102,15,56,222,217 + movdqu 64(%rdi),%xmm6 + pxor %xmm13,%xmm5 +.byte 102,15,56,222,225 + movdqu 80(%rdi),%xmm7 + pxor %xmm15,%xmm8 + movdqa 96(%rsp),%xmm9 + pxor %xmm14,%xmm6 +.byte 102,15,56,222,233 + movups 32(%rbp),%xmm0 + leaq 96(%rdi),%rdi + pxor %xmm8,%xmm7 + + pxor %xmm9,%xmm10 +.byte 102,15,56,222,241 + pxor %xmm9,%xmm11 + movdqa %xmm10,0(%rsp) +.byte 102,15,56,222,249 + movups 48(%rbp),%xmm1 + pxor %xmm9,%xmm12 + +.byte 102,15,56,222,208 + pxor %xmm9,%xmm13 + movdqa %xmm11,16(%rsp) +.byte 102,15,56,222,216 + pxor %xmm9,%xmm14 + movdqa %xmm12,32(%rsp) +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + pxor %xmm9,%xmm8 + movdqa %xmm14,64(%rsp) +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups 64(%rbp),%xmm0 + movdqa %xmm8,80(%rsp) + pshufd $0x5f,%xmm15,%xmm9 + jmp .Lxts_dec_loop6 +.align 32 +.Lxts_dec_loop6: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups -64(%rcx,%rax,1),%xmm1 + addq $32,%rax + +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups -80(%rcx,%rax,1),%xmm0 + jnz .Lxts_dec_loop6 + + movdqa (%r8),%xmm8 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 +.byte 102,15,56,222,209 + paddq %xmm15,%xmm15 + psrad $31,%xmm14 +.byte 102,15,56,222,217 + pand %xmm8,%xmm14 + movups (%rbp),%xmm10 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 + pxor %xmm14,%xmm15 + movaps %xmm10,%xmm11 +.byte 102,15,56,222,249 + movups -64(%rcx),%xmm1 + + movdqa %xmm9,%xmm14 +.byte 102,15,56,222,208 + paddd %xmm9,%xmm9 + pxor %xmm15,%xmm10 +.byte 102,15,56,222,216 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + pand %xmm8,%xmm14 + movaps %xmm11,%xmm12 +.byte 102,15,56,222,240 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 +.byte 102,15,56,222,248 + movups -48(%rcx),%xmm0 + + paddd %xmm9,%xmm9 +.byte 102,15,56,222,209 + pxor %xmm15,%xmm11 + psrad $31,%xmm14 +.byte 102,15,56,222,217 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movdqa %xmm13,48(%rsp) + pxor %xmm14,%xmm15 +.byte 102,15,56,222,241 + movaps %xmm12,%xmm13 + movdqa %xmm9,%xmm14 +.byte 102,15,56,222,249 + movups -32(%rcx),%xmm1 + + paddd %xmm9,%xmm9 +.byte 102,15,56,222,208 + pxor %xmm15,%xmm12 + psrad $31,%xmm14 +.byte 102,15,56,222,216 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 + pxor %xmm14,%xmm15 + movaps %xmm13,%xmm14 +.byte 102,15,56,222,248 + + movdqa %xmm9,%xmm0 + paddd %xmm9,%xmm9 +.byte 102,15,56,222,209 + pxor %xmm15,%xmm13 + psrad $31,%xmm0 +.byte 102,15,56,222,217 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm0 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm15 + movups (%rbp),%xmm0 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups 16(%rbp),%xmm1 + + pxor %xmm15,%xmm14 +.byte 102,15,56,223,84,36,0 + psrad $31,%xmm9 + paddq %xmm15,%xmm15 +.byte 102,15,56,223,92,36,16 +.byte 102,15,56,223,100,36,32 + pand %xmm8,%xmm9 + movq %r10,%rax +.byte 102,15,56,223,108,36,48 +.byte 102,15,56,223,116,36,64 +.byte 102,15,56,223,124,36,80 + pxor %xmm9,%xmm15 + + leaq 96(%rsi),%rsi + movups %xmm2,-96(%rsi) + movups %xmm3,-80(%rsi) + movups %xmm4,-64(%rsi) + movups %xmm5,-48(%rsi) + movups %xmm6,-32(%rsi) + movups %xmm7,-16(%rsi) + subq $96,%rdx + jnc .Lxts_dec_grandloop + + movl $16+96,%eax + subl %r10d,%eax + movq %rbp,%rcx + shrl $4,%eax + +.Lxts_dec_short: + + movl %eax,%r10d + pxor %xmm0,%xmm10 + pxor %xmm0,%xmm11 + addq $96,%rdx + jz .Lxts_dec_done + + pxor %xmm0,%xmm12 + cmpq $0x20,%rdx + jb .Lxts_dec_one + pxor %xmm0,%xmm13 + je .Lxts_dec_two + + pxor %xmm0,%xmm14 + cmpq $0x40,%rdx + jb .Lxts_dec_three + je .Lxts_dec_four + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + leaq 80(%rdi),%rdi + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm5 + pxor %xmm14,%xmm6 + + call _aesni_decrypt6 + + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + xorps %xmm14,%xmm6 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm14 + movdqu %xmm5,48(%rsi) + pcmpgtd %xmm15,%xmm14 + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + pshufd $0x13,%xmm14,%xmm11 + andq $15,%r9 + jz .Lxts_dec_ret + + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm11 + pxor %xmm15,%xmm11 + jmp .Lxts_dec_done2 + +.align 16 +.Lxts_dec_one: + movups (%rdi),%xmm2 + leaq 16(%rdi),%rdi + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_12: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_12 +.byte 102,15,56,223,209 + xorps %xmm10,%xmm2 + movdqa %xmm11,%xmm10 + movups %xmm2,(%rsi) + movdqa %xmm12,%xmm11 + leaq 16(%rsi),%rsi + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_two: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + leaq 32(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + + call _aesni_decrypt2 + + xorps %xmm10,%xmm2 + movdqa %xmm12,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm13,%xmm11 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + leaq 32(%rsi),%rsi + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_three: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + leaq 48(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + + call _aesni_decrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm13,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm14,%xmm11 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + leaq 48(%rsi),%rsi + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_four: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + xorps %xmm10,%xmm2 + movups 48(%rdi),%xmm5 + leaq 64(%rdi),%rdi + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + xorps %xmm13,%xmm5 + + call _aesni_decrypt4 + + pxor %xmm10,%xmm2 + movdqa %xmm14,%xmm10 + pxor %xmm11,%xmm3 + movdqa %xmm15,%xmm11 + pxor %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_done: + andq $15,%r9 + jz .Lxts_dec_ret +.Lxts_dec_done2: + movq %r9,%rdx + movq %rbp,%rcx + movl %r10d,%eax + + movups (%rdi),%xmm2 + xorps %xmm11,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_13: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_13 +.byte 102,15,56,223,209 + xorps %xmm11,%xmm2 + movups %xmm2,(%rsi) + +.Lxts_dec_steal: + movzbl 16(%rdi),%eax + movzbl (%rsi),%ecx + leaq 1(%rdi),%rdi + movb %al,(%rsi) + movb %cl,16(%rsi) + leaq 1(%rsi),%rsi + subq $1,%rdx + jnz .Lxts_dec_steal + + subq %r9,%rsi + movq %rbp,%rcx + movl %r10d,%eax + + movups (%rsi),%xmm2 + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_14: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_14 +.byte 102,15,56,223,209 + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + +.Lxts_dec_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0(%rsp) + pxor %xmm8,%xmm8 + movaps %xmm0,16(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,32(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,48(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,64(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,80(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,96(%rsp) + pxor %xmm14,%xmm14 + pxor %xmm15,%xmm15 + movq -8(%r11),%rbp +.cfi_restore %rbp + leaq (%r11),%rsp +.cfi_def_cfa_register %rsp +.Lxts_dec_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_xts_decrypt,.-aesni_xts_decrypt +.globl aesni_ocb_encrypt +.type aesni_ocb_encrypt,@function +.align 32 +aesni_ocb_encrypt: +.cfi_startproc +.byte 243,15,30,250 + leaq (%rsp),%rax + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + movq 8(%rax),%rbx + movq 8+8(%rax),%rbp + + movl 240(%rcx),%r10d + movq %rcx,%r11 + shll $4,%r10d + movups (%rcx),%xmm9 + movups 16(%rcx,%r10,1),%xmm1 + + movdqu (%r9),%xmm15 + pxor %xmm1,%xmm9 + pxor %xmm1,%xmm15 + + movl $16+32,%eax + leaq 32(%r11,%r10,1),%rcx + movups 16(%r11),%xmm1 + subq %r10,%rax + movq %rax,%r10 + + movdqu (%rbx),%xmm10 + movdqu (%rbp),%xmm8 + + testq $1,%r8 + jnz .Locb_enc_odd + + bsfq %r8,%r12 + addq $1,%r8 + shlq $4,%r12 + movdqu (%rbx,%r12,1),%xmm7 + movdqu (%rdi),%xmm2 + leaq 16(%rdi),%rdi + + call __ocb_encrypt1 + + movdqa %xmm7,%xmm15 + movups %xmm2,(%rsi) + leaq 16(%rsi),%rsi + subq $1,%rdx + jz .Locb_enc_done + +.Locb_enc_odd: + leaq 1(%r8),%r12 + leaq 3(%r8),%r13 + leaq 5(%r8),%r14 + leaq 6(%r8),%r8 + bsfq %r12,%r12 + bsfq %r13,%r13 + bsfq %r14,%r14 + shlq $4,%r12 + shlq $4,%r13 + shlq $4,%r14 + + subq $6,%rdx + jc .Locb_enc_short + jmp .Locb_enc_grandloop + +.align 32 +.Locb_enc_grandloop: + movdqu 0(%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + leaq 96(%rdi),%rdi + + call __ocb_encrypt6 + + movups %xmm2,0(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + subq $6,%rdx + jnc .Locb_enc_grandloop + +.Locb_enc_short: + addq $6,%rdx + jz .Locb_enc_done + + movdqu 0(%rdi),%xmm2 + cmpq $2,%rdx + jb .Locb_enc_one + movdqu 16(%rdi),%xmm3 + je .Locb_enc_two + + movdqu 32(%rdi),%xmm4 + cmpq $4,%rdx + jb .Locb_enc_three + movdqu 48(%rdi),%xmm5 + je .Locb_enc_four + + movdqu 64(%rdi),%xmm6 + pxor %xmm7,%xmm7 + + call __ocb_encrypt6 + + movdqa %xmm14,%xmm15 + movups %xmm2,0(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + + jmp .Locb_enc_done + +.align 16 +.Locb_enc_one: + movdqa %xmm10,%xmm7 + + call __ocb_encrypt1 + + movdqa %xmm7,%xmm15 + movups %xmm2,0(%rsi) + jmp .Locb_enc_done + +.align 16 +.Locb_enc_two: + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + + call __ocb_encrypt4 + + movdqa %xmm11,%xmm15 + movups %xmm2,0(%rsi) + movups %xmm3,16(%rsi) + + jmp .Locb_enc_done + +.align 16 +.Locb_enc_three: + pxor %xmm5,%xmm5 + + call __ocb_encrypt4 + + movdqa %xmm12,%xmm15 + movups %xmm2,0(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + + jmp .Locb_enc_done + +.align 16 +.Locb_enc_four: + call __ocb_encrypt4 + + movdqa %xmm13,%xmm15 + movups %xmm2,0(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + +.Locb_enc_done: + pxor %xmm0,%xmm15 + movdqu %xmm8,(%rbp) + movdqu %xmm15,(%r9) + + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 + pxor %xmm10,%xmm10 + pxor %xmm11,%xmm11 + pxor %xmm12,%xmm12 + pxor %xmm13,%xmm13 + pxor %xmm14,%xmm14 + pxor %xmm15,%xmm15 + leaq 40(%rsp),%rax +.cfi_def_cfa %rax,8 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Locb_enc_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_ocb_encrypt,.-aesni_ocb_encrypt + +.type __ocb_encrypt6,@function +.align 32 +__ocb_encrypt6: +.cfi_startproc + pxor %xmm9,%xmm15 + movdqu (%rbx,%r12,1),%xmm11 + movdqa %xmm10,%xmm12 + movdqu (%rbx,%r13,1),%xmm13 + movdqa %xmm10,%xmm14 + pxor %xmm15,%xmm10 + movdqu (%rbx,%r14,1),%xmm15 + pxor %xmm10,%xmm11 + pxor %xmm2,%xmm8 + pxor %xmm10,%xmm2 + pxor %xmm11,%xmm12 + pxor %xmm3,%xmm8 + pxor %xmm11,%xmm3 + pxor %xmm12,%xmm13 + pxor %xmm4,%xmm8 + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm14 + pxor %xmm5,%xmm8 + pxor %xmm13,%xmm5 + pxor %xmm14,%xmm15 + pxor %xmm6,%xmm8 + pxor %xmm14,%xmm6 + pxor %xmm7,%xmm8 + pxor %xmm15,%xmm7 + movups 32(%r11),%xmm0 + + leaq 1(%r8),%r12 + leaq 3(%r8),%r13 + leaq 5(%r8),%r14 + addq $6,%r8 + pxor %xmm9,%xmm10 + bsfq %r12,%r12 + bsfq %r13,%r13 + bsfq %r14,%r14 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + pxor %xmm9,%xmm11 + pxor %xmm9,%xmm12 +.byte 102,15,56,220,241 + pxor %xmm9,%xmm13 + pxor %xmm9,%xmm14 +.byte 102,15,56,220,249 + movups 48(%r11),%xmm1 + pxor %xmm9,%xmm15 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups 64(%r11),%xmm0 + shlq $4,%r12 + shlq $4,%r13 + jmp .Locb_enc_loop6 + +.align 32 +.Locb_enc_loop6: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Locb_enc_loop6 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups 16(%r11),%xmm1 + shlq $4,%r14 + +.byte 102,65,15,56,221,210 + movdqu (%rbx),%xmm10 + movq %r10,%rax +.byte 102,65,15,56,221,219 +.byte 102,65,15,56,221,228 +.byte 102,65,15,56,221,237 +.byte 102,65,15,56,221,246 +.byte 102,65,15,56,221,255 + .byte 0xf3,0xc3 +.cfi_endproc +.size __ocb_encrypt6,.-__ocb_encrypt6 + +.type __ocb_encrypt4,@function +.align 32 +__ocb_encrypt4: +.cfi_startproc + pxor %xmm9,%xmm15 + movdqu (%rbx,%r12,1),%xmm11 + movdqa %xmm10,%xmm12 + movdqu (%rbx,%r13,1),%xmm13 + pxor %xmm15,%xmm10 + pxor %xmm10,%xmm11 + pxor %xmm2,%xmm8 + pxor %xmm10,%xmm2 + pxor %xmm11,%xmm12 + pxor %xmm3,%xmm8 + pxor %xmm11,%xmm3 + pxor %xmm12,%xmm13 + pxor %xmm4,%xmm8 + pxor %xmm12,%xmm4 + pxor %xmm5,%xmm8 + pxor %xmm13,%xmm5 + movups 32(%r11),%xmm0 + + pxor %xmm9,%xmm10 + pxor %xmm9,%xmm11 + pxor %xmm9,%xmm12 + pxor %xmm9,%xmm13 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 48(%r11),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 64(%r11),%xmm0 + jmp .Locb_enc_loop4 + +.align 32 +.Locb_enc_loop4: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Locb_enc_loop4 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 16(%r11),%xmm1 + movq %r10,%rax + +.byte 102,65,15,56,221,210 +.byte 102,65,15,56,221,219 +.byte 102,65,15,56,221,228 +.byte 102,65,15,56,221,237 + .byte 0xf3,0xc3 +.cfi_endproc +.size __ocb_encrypt4,.-__ocb_encrypt4 + +.type __ocb_encrypt1,@function +.align 32 +__ocb_encrypt1: +.cfi_startproc + pxor %xmm15,%xmm7 + pxor %xmm9,%xmm7 + pxor %xmm2,%xmm8 + pxor %xmm7,%xmm2 + movups 32(%r11),%xmm0 + +.byte 102,15,56,220,209 + movups 48(%r11),%xmm1 + pxor %xmm9,%xmm7 + +.byte 102,15,56,220,208 + movups 64(%r11),%xmm0 + jmp .Locb_enc_loop1 + +.align 32 +.Locb_enc_loop1: +.byte 102,15,56,220,209 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax + +.byte 102,15,56,220,208 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Locb_enc_loop1 + +.byte 102,15,56,220,209 + movups 16(%r11),%xmm1 + movq %r10,%rax + +.byte 102,15,56,221,215 + .byte 0xf3,0xc3 +.cfi_endproc +.size __ocb_encrypt1,.-__ocb_encrypt1 + +.globl aesni_ocb_decrypt +.type aesni_ocb_decrypt,@function +.align 32 +aesni_ocb_decrypt: +.cfi_startproc +.byte 243,15,30,250 + leaq (%rsp),%rax + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + movq 8(%rax),%rbx + movq 8+8(%rax),%rbp + + movl 240(%rcx),%r10d + movq %rcx,%r11 + shll $4,%r10d + movups (%rcx),%xmm9 + movups 16(%rcx,%r10,1),%xmm1 + + movdqu (%r9),%xmm15 + pxor %xmm1,%xmm9 + pxor %xmm1,%xmm15 + + movl $16+32,%eax + leaq 32(%r11,%r10,1),%rcx + movups 16(%r11),%xmm1 + subq %r10,%rax + movq %rax,%r10 + + movdqu (%rbx),%xmm10 + movdqu (%rbp),%xmm8 + + testq $1,%r8 + jnz .Locb_dec_odd + + bsfq %r8,%r12 + addq $1,%r8 + shlq $4,%r12 + movdqu (%rbx,%r12,1),%xmm7 + movdqu (%rdi),%xmm2 + leaq 16(%rdi),%rdi + + call __ocb_decrypt1 + + movdqa %xmm7,%xmm15 + movups %xmm2,(%rsi) + xorps %xmm2,%xmm8 + leaq 16(%rsi),%rsi + subq $1,%rdx + jz .Locb_dec_done + +.Locb_dec_odd: + leaq 1(%r8),%r12 + leaq 3(%r8),%r13 + leaq 5(%r8),%r14 + leaq 6(%r8),%r8 + bsfq %r12,%r12 + bsfq %r13,%r13 + bsfq %r14,%r14 + shlq $4,%r12 + shlq $4,%r13 + shlq $4,%r14 + + subq $6,%rdx + jc .Locb_dec_short + jmp .Locb_dec_grandloop + +.align 32 +.Locb_dec_grandloop: + movdqu 0(%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + leaq 96(%rdi),%rdi + + call __ocb_decrypt6 + + movups %xmm2,0(%rsi) + pxor %xmm2,%xmm8 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm8 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm8 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm8 + movups %xmm6,64(%rsi) + pxor %xmm6,%xmm8 + movups %xmm7,80(%rsi) + pxor %xmm7,%xmm8 + leaq 96(%rsi),%rsi + subq $6,%rdx + jnc .Locb_dec_grandloop + +.Locb_dec_short: + addq $6,%rdx + jz .Locb_dec_done + + movdqu 0(%rdi),%xmm2 + cmpq $2,%rdx + jb .Locb_dec_one + movdqu 16(%rdi),%xmm3 + je .Locb_dec_two + + movdqu 32(%rdi),%xmm4 + cmpq $4,%rdx + jb .Locb_dec_three + movdqu 48(%rdi),%xmm5 + je .Locb_dec_four + + movdqu 64(%rdi),%xmm6 + pxor %xmm7,%xmm7 + + call __ocb_decrypt6 + + movdqa %xmm14,%xmm15 + movups %xmm2,0(%rsi) + pxor %xmm2,%xmm8 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm8 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm8 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm8 + movups %xmm6,64(%rsi) + pxor %xmm6,%xmm8 + + jmp .Locb_dec_done + +.align 16 +.Locb_dec_one: + movdqa %xmm10,%xmm7 + + call __ocb_decrypt1 + + movdqa %xmm7,%xmm15 + movups %xmm2,0(%rsi) + xorps %xmm2,%xmm8 + jmp .Locb_dec_done + +.align 16 +.Locb_dec_two: + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + + call __ocb_decrypt4 + + movdqa %xmm11,%xmm15 + movups %xmm2,0(%rsi) + xorps %xmm2,%xmm8 + movups %xmm3,16(%rsi) + xorps %xmm3,%xmm8 + + jmp .Locb_dec_done + +.align 16 +.Locb_dec_three: + pxor %xmm5,%xmm5 + + call __ocb_decrypt4 + + movdqa %xmm12,%xmm15 + movups %xmm2,0(%rsi) + xorps %xmm2,%xmm8 + movups %xmm3,16(%rsi) + xorps %xmm3,%xmm8 + movups %xmm4,32(%rsi) + xorps %xmm4,%xmm8 + + jmp .Locb_dec_done + +.align 16 +.Locb_dec_four: + call __ocb_decrypt4 + + movdqa %xmm13,%xmm15 + movups %xmm2,0(%rsi) + pxor %xmm2,%xmm8 + movups %xmm3,16(%rsi) + pxor %xmm3,%xmm8 + movups %xmm4,32(%rsi) + pxor %xmm4,%xmm8 + movups %xmm5,48(%rsi) + pxor %xmm5,%xmm8 + +.Locb_dec_done: + pxor %xmm0,%xmm15 + movdqu %xmm8,(%rbp) + movdqu %xmm15,(%r9) + + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 + pxor %xmm10,%xmm10 + pxor %xmm11,%xmm11 + pxor %xmm12,%xmm12 + pxor %xmm13,%xmm13 + pxor %xmm14,%xmm14 + pxor %xmm15,%xmm15 + leaq 40(%rsp),%rax +.cfi_def_cfa %rax,8 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Locb_dec_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_ocb_decrypt,.-aesni_ocb_decrypt + +.type __ocb_decrypt6,@function +.align 32 +__ocb_decrypt6: +.cfi_startproc + pxor %xmm9,%xmm15 + movdqu (%rbx,%r12,1),%xmm11 + movdqa %xmm10,%xmm12 + movdqu (%rbx,%r13,1),%xmm13 + movdqa %xmm10,%xmm14 + pxor %xmm15,%xmm10 + movdqu (%rbx,%r14,1),%xmm15 + pxor %xmm10,%xmm11 + pxor %xmm10,%xmm2 + pxor %xmm11,%xmm12 + pxor %xmm11,%xmm3 + pxor %xmm12,%xmm13 + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm14 + pxor %xmm13,%xmm5 + pxor %xmm14,%xmm15 + pxor %xmm14,%xmm6 + pxor %xmm15,%xmm7 + movups 32(%r11),%xmm0 + + leaq 1(%r8),%r12 + leaq 3(%r8),%r13 + leaq 5(%r8),%r14 + addq $6,%r8 + pxor %xmm9,%xmm10 + bsfq %r12,%r12 + bsfq %r13,%r13 + bsfq %r14,%r14 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + pxor %xmm9,%xmm11 + pxor %xmm9,%xmm12 +.byte 102,15,56,222,241 + pxor %xmm9,%xmm13 + pxor %xmm9,%xmm14 +.byte 102,15,56,222,249 + movups 48(%r11),%xmm1 + pxor %xmm9,%xmm15 + +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups 64(%r11),%xmm0 + shlq $4,%r12 + shlq $4,%r13 + jmp .Locb_dec_loop6 + +.align 32 +.Locb_dec_loop6: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax + +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Locb_dec_loop6 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups 16(%r11),%xmm1 + shlq $4,%r14 + +.byte 102,65,15,56,223,210 + movdqu (%rbx),%xmm10 + movq %r10,%rax +.byte 102,65,15,56,223,219 +.byte 102,65,15,56,223,228 +.byte 102,65,15,56,223,237 +.byte 102,65,15,56,223,246 +.byte 102,65,15,56,223,255 + .byte 0xf3,0xc3 +.cfi_endproc +.size __ocb_decrypt6,.-__ocb_decrypt6 + +.type __ocb_decrypt4,@function +.align 32 +__ocb_decrypt4: +.cfi_startproc + pxor %xmm9,%xmm15 + movdqu (%rbx,%r12,1),%xmm11 + movdqa %xmm10,%xmm12 + movdqu (%rbx,%r13,1),%xmm13 + pxor %xmm15,%xmm10 + pxor %xmm10,%xmm11 + pxor %xmm10,%xmm2 + pxor %xmm11,%xmm12 + pxor %xmm11,%xmm3 + pxor %xmm12,%xmm13 + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm5 + movups 32(%r11),%xmm0 + + pxor %xmm9,%xmm10 + pxor %xmm9,%xmm11 + pxor %xmm9,%xmm12 + pxor %xmm9,%xmm13 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 48(%r11),%xmm1 + +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups 64(%r11),%xmm0 + jmp .Locb_dec_loop4 + +.align 32 +.Locb_dec_loop4: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax + +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Locb_dec_loop4 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 16(%r11),%xmm1 + movq %r10,%rax + +.byte 102,65,15,56,223,210 +.byte 102,65,15,56,223,219 +.byte 102,65,15,56,223,228 +.byte 102,65,15,56,223,237 + .byte 0xf3,0xc3 +.cfi_endproc +.size __ocb_decrypt4,.-__ocb_decrypt4 + +.type __ocb_decrypt1,@function +.align 32 +__ocb_decrypt1: +.cfi_startproc + pxor %xmm15,%xmm7 + pxor %xmm9,%xmm7 + pxor %xmm7,%xmm2 + movups 32(%r11),%xmm0 + +.byte 102,15,56,222,209 + movups 48(%r11),%xmm1 + pxor %xmm9,%xmm7 + +.byte 102,15,56,222,208 + movups 64(%r11),%xmm0 + jmp .Locb_dec_loop1 + +.align 32 +.Locb_dec_loop1: +.byte 102,15,56,222,209 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax + +.byte 102,15,56,222,208 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Locb_dec_loop1 + +.byte 102,15,56,222,209 + movups 16(%r11),%xmm1 + movq %r10,%rax + +.byte 102,15,56,223,215 + .byte 0xf3,0xc3 +.cfi_endproc +.size __ocb_decrypt1,.-__ocb_decrypt1 +.globl aesni_cbc_encrypt +.type aesni_cbc_encrypt,@function +.align 16 +aesni_cbc_encrypt: +.cfi_startproc +.byte 243,15,30,250 + testq %rdx,%rdx + jz .Lcbc_ret + + movl 240(%rcx),%r10d + movq %rcx,%r11 + testl %r9d,%r9d + jz .Lcbc_decrypt + + movups (%r8),%xmm2 + movl %r10d,%eax + cmpq $16,%rdx + jb .Lcbc_enc_tail + subq $16,%rdx + jmp .Lcbc_enc_loop +.align 16 +.Lcbc_enc_loop: + movups (%rdi),%xmm3 + leaq 16(%rdi),%rdi + + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm3 + leaq 32(%rcx),%rcx + xorps %xmm3,%xmm2 +.Loop_enc1_15: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_15 +.byte 102,15,56,221,209 + movl %r10d,%eax + movq %r11,%rcx + movups %xmm2,0(%rsi) + leaq 16(%rsi),%rsi + subq $16,%rdx + jnc .Lcbc_enc_loop + addq $16,%rdx + jnz .Lcbc_enc_tail + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%r8) + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + jmp .Lcbc_ret + +.Lcbc_enc_tail: + movq %rdx,%rcx + xchgq %rdi,%rsi +.long 0x9066A4F3 + movl $16,%ecx + subq %rdx,%rcx + xorl %eax,%eax +.long 0x9066AAF3 + leaq -16(%rdi),%rdi + movl %r10d,%eax + movq %rdi,%rsi + movq %r11,%rcx + xorq %rdx,%rdx + jmp .Lcbc_enc_loop + +.align 16 +.Lcbc_decrypt: + cmpq $16,%rdx + jne .Lcbc_decrypt_bulk + + + + movdqu (%rdi),%xmm2 + movdqu (%r8),%xmm3 + movdqa %xmm2,%xmm4 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_16: +.byte 102,15,56,222,209 + decl %r10d + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_16 +.byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movdqu %xmm4,(%r8) + xorps %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + jmp .Lcbc_ret +.align 16 +.Lcbc_decrypt_bulk: + leaq (%rsp),%r11 +.cfi_def_cfa_register %r11 + pushq %rbp +.cfi_offset %rbp,-16 + subq $16,%rsp + andq $-16,%rsp + movq %rcx,%rbp + movups (%r8),%xmm10 + movl %r10d,%eax + cmpq $0x50,%rdx + jbe .Lcbc_dec_tail + + movups (%rcx),%xmm0 + movdqu 0(%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqa %xmm2,%xmm11 + movdqu 32(%rdi),%xmm4 + movdqa %xmm3,%xmm12 + movdqu 48(%rdi),%xmm5 + movdqa %xmm4,%xmm13 + movdqu 64(%rdi),%xmm6 + movdqa %xmm5,%xmm14 + movdqu 80(%rdi),%xmm7 + movdqa %xmm6,%xmm15 + movl OPENSSL_ia32cap_P+4(%rip),%r9d + cmpq $0x70,%rdx + jbe .Lcbc_dec_six_or_seven + + andl $71303168,%r9d + subq $0x50,%rdx + cmpl $4194304,%r9d + je .Lcbc_dec_loop6_enter + subq $0x20,%rdx + leaq 112(%rcx),%rcx + jmp .Lcbc_dec_loop8_enter +.align 16 +.Lcbc_dec_loop8: + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi +.Lcbc_dec_loop8_enter: + movdqu 96(%rdi),%xmm8 + pxor %xmm0,%xmm2 + movdqu 112(%rdi),%xmm9 + pxor %xmm0,%xmm3 + movups 16-112(%rcx),%xmm1 + pxor %xmm0,%xmm4 + movq $-1,%rbp + cmpq $0x70,%rdx + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + pxor %xmm0,%xmm7 + pxor %xmm0,%xmm8 + +.byte 102,15,56,222,209 + pxor %xmm0,%xmm9 + movups 32-112(%rcx),%xmm0 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 + adcq $0,%rbp + andq $128,%rbp +.byte 102,68,15,56,222,201 + addq %rdi,%rbp + movups 48-112(%rcx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 64-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 80-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 96-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 112-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 128-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 144-112(%rcx),%xmm1 + cmpl $11,%eax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 160-112(%rcx),%xmm0 + jb .Lcbc_dec_done +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 176-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 192-112(%rcx),%xmm0 + je .Lcbc_dec_done +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 208-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 224-112(%rcx),%xmm0 + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_done: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm10 + pxor %xmm0,%xmm11 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm12 + pxor %xmm0,%xmm13 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + pxor %xmm0,%xmm14 + pxor %xmm0,%xmm15 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movdqu 80(%rdi),%xmm1 + +.byte 102,65,15,56,223,210 + movdqu 96(%rdi),%xmm10 + pxor %xmm0,%xmm1 +.byte 102,65,15,56,223,219 + pxor %xmm0,%xmm10 + movdqu 112(%rdi),%xmm0 +.byte 102,65,15,56,223,228 + leaq 128(%rdi),%rdi + movdqu 0(%rbp),%xmm11 +.byte 102,65,15,56,223,237 +.byte 102,65,15,56,223,246 + movdqu 16(%rbp),%xmm12 + movdqu 32(%rbp),%xmm13 +.byte 102,65,15,56,223,255 +.byte 102,68,15,56,223,193 + movdqu 48(%rbp),%xmm14 + movdqu 64(%rbp),%xmm15 +.byte 102,69,15,56,223,202 + movdqa %xmm0,%xmm10 + movdqu 80(%rbp),%xmm1 + movups -112(%rcx),%xmm0 + + movups %xmm2,(%rsi) + movdqa %xmm11,%xmm2 + movups %xmm3,16(%rsi) + movdqa %xmm12,%xmm3 + movups %xmm4,32(%rsi) + movdqa %xmm13,%xmm4 + movups %xmm5,48(%rsi) + movdqa %xmm14,%xmm5 + movups %xmm6,64(%rsi) + movdqa %xmm15,%xmm6 + movups %xmm7,80(%rsi) + movdqa %xmm1,%xmm7 + movups %xmm8,96(%rsi) + leaq 112(%rsi),%rsi + + subq $0x80,%rdx + ja .Lcbc_dec_loop8 + + movaps %xmm9,%xmm2 + leaq -112(%rcx),%rcx + addq $0x70,%rdx + jle .Lcbc_dec_clear_tail_collected + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi + cmpq $0x50,%rdx + jbe .Lcbc_dec_tail + + movaps %xmm11,%xmm2 +.Lcbc_dec_six_or_seven: + cmpq $0x60,%rdx + ja .Lcbc_dec_seven + + movaps %xmm7,%xmm8 + call _aesni_decrypt6 + pxor %xmm10,%xmm2 + movaps %xmm8,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + pxor %xmm15,%xmm7 + movdqu %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + leaq 80(%rsi),%rsi + movdqa %xmm7,%xmm2 + pxor %xmm7,%xmm7 + jmp .Lcbc_dec_tail_collected + +.align 16 +.Lcbc_dec_seven: + movups 96(%rdi),%xmm8 + xorps %xmm9,%xmm9 + call _aesni_decrypt8 + movups 80(%rdi),%xmm9 + pxor %xmm10,%xmm2 + movups 96(%rdi),%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + pxor %xmm15,%xmm7 + movdqu %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + pxor %xmm9,%xmm8 + movdqu %xmm7,80(%rsi) + pxor %xmm7,%xmm7 + leaq 96(%rsi),%rsi + movdqa %xmm8,%xmm2 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 + jmp .Lcbc_dec_tail_collected + +.align 16 +.Lcbc_dec_loop6: + movups %xmm7,(%rsi) + leaq 16(%rsi),%rsi + movdqu 0(%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqa %xmm2,%xmm11 + movdqu 32(%rdi),%xmm4 + movdqa %xmm3,%xmm12 + movdqu 48(%rdi),%xmm5 + movdqa %xmm4,%xmm13 + movdqu 64(%rdi),%xmm6 + movdqa %xmm5,%xmm14 + movdqu 80(%rdi),%xmm7 + movdqa %xmm6,%xmm15 +.Lcbc_dec_loop6_enter: + leaq 96(%rdi),%rdi + movdqa %xmm7,%xmm8 + + call _aesni_decrypt6 + + pxor %xmm10,%xmm2 + movdqa %xmm8,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm6 + movq %rbp,%rcx + movdqu %xmm5,48(%rsi) + pxor %xmm15,%xmm7 + movl %r10d,%eax + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + subq $0x60,%rdx + ja .Lcbc_dec_loop6 + + movdqa %xmm7,%xmm2 + addq $0x50,%rdx + jle .Lcbc_dec_clear_tail_collected + movups %xmm7,(%rsi) + leaq 16(%rsi),%rsi + +.Lcbc_dec_tail: + movups (%rdi),%xmm2 + subq $0x10,%rdx + jbe .Lcbc_dec_one + + movups 16(%rdi),%xmm3 + movaps %xmm2,%xmm11 + subq $0x10,%rdx + jbe .Lcbc_dec_two + + movups 32(%rdi),%xmm4 + movaps %xmm3,%xmm12 + subq $0x10,%rdx + jbe .Lcbc_dec_three + + movups 48(%rdi),%xmm5 + movaps %xmm4,%xmm13 + subq $0x10,%rdx + jbe .Lcbc_dec_four + + movups 64(%rdi),%xmm6 + movaps %xmm5,%xmm14 + movaps %xmm6,%xmm15 + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + pxor %xmm10,%xmm2 + movaps %xmm15,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 + leaq 64(%rsi),%rsi + movdqa %xmm6,%xmm2 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + subq $0x10,%rdx + jmp .Lcbc_dec_tail_collected + +.align 16 +.Lcbc_dec_one: + movaps %xmm2,%xmm11 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_17: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_17 +.byte 102,15,56,223,209 + xorps %xmm10,%xmm2 + movaps %xmm11,%xmm10 + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_two: + movaps %xmm3,%xmm12 + call _aesni_decrypt2 + pxor %xmm10,%xmm2 + movaps %xmm12,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + movdqa %xmm3,%xmm2 + pxor %xmm3,%xmm3 + leaq 16(%rsi),%rsi + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_three: + movaps %xmm4,%xmm13 + call _aesni_decrypt3 + pxor %xmm10,%xmm2 + movaps %xmm13,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + movdqa %xmm4,%xmm2 + pxor %xmm4,%xmm4 + leaq 32(%rsi),%rsi + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_four: + movaps %xmm5,%xmm14 + call _aesni_decrypt4 + pxor %xmm10,%xmm2 + movaps %xmm14,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 + movdqa %xmm5,%xmm2 + pxor %xmm5,%xmm5 + leaq 48(%rsi),%rsi + jmp .Lcbc_dec_tail_collected + +.align 16 +.Lcbc_dec_clear_tail_collected: + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 +.Lcbc_dec_tail_collected: + movups %xmm10,(%r8) + andq $15,%rdx + jnz .Lcbc_dec_tail_partial + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + jmp .Lcbc_dec_ret +.align 16 +.Lcbc_dec_tail_partial: + movaps %xmm2,(%rsp) + pxor %xmm2,%xmm2 + movq $16,%rcx + movq %rsi,%rdi + subq %rdx,%rcx + leaq (%rsp),%rsi +.long 0x9066A4F3 + movdqa %xmm2,(%rsp) + +.Lcbc_dec_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movq -8(%r11),%rbp +.cfi_restore %rbp + leaq (%r11),%rsp +.cfi_def_cfa_register %rsp +.Lcbc_ret: + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_cbc_encrypt,.-aesni_cbc_encrypt +.globl aesni_set_decrypt_key +.type aesni_set_decrypt_key,@function +.align 16 +aesni_set_decrypt_key: +.cfi_startproc +.byte 0x48,0x83,0xEC,0x08 +.cfi_adjust_cfa_offset 8 + call __aesni_set_encrypt_key + shll $4,%esi + testl %eax,%eax + jnz .Ldec_key_ret + leaq 16(%rdx,%rsi,1),%rdi + + movups (%rdx),%xmm0 + movups (%rdi),%xmm1 + movups %xmm0,(%rdi) + movups %xmm1,(%rdx) + leaq 16(%rdx),%rdx + leaq -16(%rdi),%rdi + +.Ldec_key_inverse: + movups (%rdx),%xmm0 + movups (%rdi),%xmm1 +.byte 102,15,56,219,192 +.byte 102,15,56,219,201 + leaq 16(%rdx),%rdx + leaq -16(%rdi),%rdi + movups %xmm0,16(%rdi) + movups %xmm1,-16(%rdx) + cmpq %rdx,%rdi + ja .Ldec_key_inverse + + movups (%rdx),%xmm0 +.byte 102,15,56,219,192 + pxor %xmm1,%xmm1 + movups %xmm0,(%rdi) + pxor %xmm0,%xmm0 +.Ldec_key_ret: + addq $8,%rsp +.cfi_adjust_cfa_offset -8 + .byte 0xf3,0xc3 +.cfi_endproc +.LSEH_end_set_decrypt_key: +.size aesni_set_decrypt_key,.-aesni_set_decrypt_key +.globl aesni_set_encrypt_key +.type aesni_set_encrypt_key,@function +.align 16 +aesni_set_encrypt_key: +__aesni_set_encrypt_key: +.cfi_startproc +.byte 0x48,0x83,0xEC,0x08 +.cfi_adjust_cfa_offset 8 + movq $-1,%rax + testq %rdi,%rdi + jz .Lenc_key_ret + testq %rdx,%rdx + jz .Lenc_key_ret + + movl $268437504,%r10d + movups (%rdi),%xmm0 + xorps %xmm4,%xmm4 + andl OPENSSL_ia32cap_P+4(%rip),%r10d + leaq 16(%rdx),%rax + cmpl $256,%esi + je .L14rounds + cmpl $192,%esi + je .L12rounds + cmpl $128,%esi + jne .Lbad_keybits + +.L10rounds: + movl $9,%esi + cmpl $268435456,%r10d + je .L10rounds_alt + + movups %xmm0,(%rdx) +.byte 102,15,58,223,200,1 + call .Lkey_expansion_128_cold +.byte 102,15,58,223,200,2 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,4 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,8 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,16 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,32 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,64 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,128 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,27 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,54 + call .Lkey_expansion_128 + movups %xmm0,(%rax) + movl %esi,80(%rax) + xorl %eax,%eax + jmp .Lenc_key_ret + +.align 16 +.L10rounds_alt: + movdqa .Lkey_rotate(%rip),%xmm5 + movl $8,%r10d + movdqa .Lkey_rcon1(%rip),%xmm4 + movdqa %xmm0,%xmm2 + movdqu %xmm0,(%rdx) + jmp .Loop_key128 + +.align 16 +.Loop_key128: +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + leaq 16(%rax),%rax + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,-16(%rax) + movdqa %xmm0,%xmm2 + + decl %r10d + jnz .Loop_key128 + + movdqa .Lkey_rcon1b(%rip),%xmm4 + +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,(%rax) + + movdqa %xmm0,%xmm2 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,16(%rax) + + movl %esi,96(%rax) + xorl %eax,%eax + jmp .Lenc_key_ret + +.align 16 +.L12rounds: + movq 16(%rdi),%xmm2 + movl $11,%esi + cmpl $268435456,%r10d + je .L12rounds_alt + + movups %xmm0,(%rdx) +.byte 102,15,58,223,202,1 + call .Lkey_expansion_192a_cold +.byte 102,15,58,223,202,2 + call .Lkey_expansion_192b +.byte 102,15,58,223,202,4 + call .Lkey_expansion_192a +.byte 102,15,58,223,202,8 + call .Lkey_expansion_192b +.byte 102,15,58,223,202,16 + call .Lkey_expansion_192a +.byte 102,15,58,223,202,32 + call .Lkey_expansion_192b +.byte 102,15,58,223,202,64 + call .Lkey_expansion_192a +.byte 102,15,58,223,202,128 + call .Lkey_expansion_192b + movups %xmm0,(%rax) + movl %esi,48(%rax) + xorq %rax,%rax + jmp .Lenc_key_ret + +.align 16 +.L12rounds_alt: + movdqa .Lkey_rotate192(%rip),%xmm5 + movdqa .Lkey_rcon1(%rip),%xmm4 + movl $8,%r10d + movdqu %xmm0,(%rdx) + jmp .Loop_key192 + +.align 16 +.Loop_key192: + movq %xmm2,0(%rax) + movdqa %xmm2,%xmm1 +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + pslld $1,%xmm4 + leaq 24(%rax),%rax + + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + + pshufd $0xff,%xmm0,%xmm3 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm2 + movdqu %xmm0,-16(%rax) + + decl %r10d + jnz .Loop_key192 + + movl %esi,32(%rax) + xorl %eax,%eax + jmp .Lenc_key_ret + +.align 16 +.L14rounds: + movups 16(%rdi),%xmm2 + movl $13,%esi + leaq 16(%rax),%rax + cmpl $268435456,%r10d + je .L14rounds_alt + + movups %xmm0,(%rdx) + movups %xmm2,16(%rdx) +.byte 102,15,58,223,202,1 + call .Lkey_expansion_256a_cold +.byte 102,15,58,223,200,1 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,2 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,2 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,4 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,4 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,8 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,8 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,16 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,16 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,32 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,32 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,64 + call .Lkey_expansion_256a + movups %xmm0,(%rax) + movl %esi,16(%rax) + xorq %rax,%rax + jmp .Lenc_key_ret + +.align 16 +.L14rounds_alt: + movdqa .Lkey_rotate(%rip),%xmm5 + movdqa .Lkey_rcon1(%rip),%xmm4 + movl $7,%r10d + movdqu %xmm0,0(%rdx) + movdqa %xmm2,%xmm1 + movdqu %xmm2,16(%rdx) + jmp .Loop_key256 + +.align 16 +.Loop_key256: +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pslld $1,%xmm4 + + pxor %xmm2,%xmm0 + movdqu %xmm0,(%rax) + + decl %r10d + jz .Ldone_key256 + + pshufd $0xff,%xmm0,%xmm2 + pxor %xmm3,%xmm3 +.byte 102,15,56,221,211 + + movdqa %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm3,%xmm1 + + pxor %xmm1,%xmm2 + movdqu %xmm2,16(%rax) + leaq 32(%rax),%rax + movdqa %xmm2,%xmm1 + + jmp .Loop_key256 + +.Ldone_key256: + movl %esi,16(%rax) + xorl %eax,%eax + jmp .Lenc_key_ret + +.align 16 +.Lbad_keybits: + movq $-2,%rax +.Lenc_key_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + addq $8,%rsp +.cfi_adjust_cfa_offset -8 + .byte 0xf3,0xc3 +.LSEH_end_set_encrypt_key: + +.align 16 +.Lkey_expansion_128: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +.Lkey_expansion_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + .byte 0xf3,0xc3 + +.align 16 +.Lkey_expansion_192a: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +.Lkey_expansion_192a_cold: + movaps %xmm2,%xmm5 +.Lkey_expansion_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + .byte 0xf3,0xc3 + +.align 16 +.Lkey_expansion_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%rax) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%rax) + leaq 32(%rax),%rax + jmp .Lkey_expansion_192b_warm + +.align 16 +.Lkey_expansion_256a: + movups %xmm2,(%rax) + leaq 16(%rax),%rax +.Lkey_expansion_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + .byte 0xf3,0xc3 + +.align 16 +.Lkey_expansion_256b: + movups %xmm0,(%rax) + leaq 16(%rax),%rax + + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_set_encrypt_key,.-aesni_set_encrypt_key +.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key +.section .rodata +.align 64 +.Lbswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.Lincrement32: +.long 6,6,6,0 +.Lincrement64: +.long 1,0,0,0 +.Lxts_magic: +.long 0x87,0,1,0 +.Lincrement1: +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +.Lkey_rotate: +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d +.Lkey_rotate192: +.long 0x04070605,0x04070605,0x04070605,0x04070605 +.Lkey_rcon1: +.long 1,1,1,1 +.Lkey_rcon1b: +.long 0x1b,0x1b,0x1b,0x1b + +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +.previous diff --git a/contrib/openssl-cmake/asm/crypto/aes/aesni-xts-avx512.s b/contrib/openssl-cmake/asm/crypto/aes/aesni-xts-avx512.s new file mode 100644 index 000000000000..aede66622e76 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/aes/aesni-xts-avx512.s @@ -0,0 +1,8124 @@ +.text + +.globl aesni_xts_avx512_eligible +.type aesni_xts_avx512_eligible,@function +.align 32 +aesni_xts_avx512_eligible: + movl OPENSSL_ia32cap_P+8(%rip),%ecx + xorl %eax,%eax + + andl $0xc0030000,%ecx + cmpl $0xc0030000,%ecx + jne .L_done + movl OPENSSL_ia32cap_P+12(%rip),%ecx + + andl $0x640,%ecx + cmpl $0x640,%ecx + cmovel %ecx,%eax +.L_done: + .byte 0xf3,0xc3 +.size aesni_xts_avx512_eligible, .-aesni_xts_avx512_eligible +.globl aesni_xts_128_encrypt_avx512 +.hidden aesni_xts_128_encrypt_avx512 +.type aesni_xts_128_encrypt_avx512,@function +.align 32 +aesni_xts_128_encrypt_avx512: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbp + movq %rsp,%rbp + subq $136,%rsp + andq $0xffffffffffffffc0,%rsp + movq %rbx,128(%rsp) + movq $0x87,%r10 + vmovdqu (%r9),%xmm1 + vpxor (%r8),%xmm1,%xmm1 + vaesenc 16(%r8),%xmm1,%xmm1 + vaesenc 32(%r8),%xmm1,%xmm1 + vaesenc 48(%r8),%xmm1,%xmm1 + vaesenc 64(%r8),%xmm1,%xmm1 + vaesenc 80(%r8),%xmm1,%xmm1 + vaesenc 96(%r8),%xmm1,%xmm1 + vaesenc 112(%r8),%xmm1,%xmm1 + vaesenc 128(%r8),%xmm1,%xmm1 + vaesenc 144(%r8),%xmm1,%xmm1 + vaesenclast 160(%r8),%xmm1,%xmm1 + vmovdqa %xmm1,(%rsp) + + cmpq $0x80,%rdx + jl .L_less_than_128_bytes_hEgxyDlCngwrfFe + vpbroadcastq %r10,%zmm25 + cmpq $0x100,%rdx + jge .L_start_by16_hEgxyDlCngwrfFe + cmpq $0x80,%rdx + jge .L_start_by8_hEgxyDlCngwrfFe + +.L_do_n_blocks_hEgxyDlCngwrfFe: + cmpq $0x0,%rdx + je .L_ret_hEgxyDlCngwrfFe + cmpq $0x70,%rdx + jge .L_remaining_num_blocks_is_7_hEgxyDlCngwrfFe + cmpq $0x60,%rdx + jge .L_remaining_num_blocks_is_6_hEgxyDlCngwrfFe + cmpq $0x50,%rdx + jge .L_remaining_num_blocks_is_5_hEgxyDlCngwrfFe + cmpq $0x40,%rdx + jge .L_remaining_num_blocks_is_4_hEgxyDlCngwrfFe + cmpq $0x30,%rdx + jge .L_remaining_num_blocks_is_3_hEgxyDlCngwrfFe + cmpq $0x20,%rdx + jge .L_remaining_num_blocks_is_2_hEgxyDlCngwrfFe + cmpq $0x10,%rdx + jge .L_remaining_num_blocks_is_1_hEgxyDlCngwrfFe + vmovdqa %xmm0,%xmm8 + vmovdqa %xmm9,%xmm0 + jmp .L_steal_cipher_hEgxyDlCngwrfFe + +.L_remaining_num_blocks_is_7_hEgxyDlCngwrfFe: + movq $0x0000ffffffffffff,%r8 + kmovq %r8,%k1 + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2{%k1} + addq $0x70,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi){%k1} + addq $0x70,%rsi + vextracti32x4 $0x2,%zmm2,%xmm8 + vextracti32x4 $0x3,%zmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe + +.L_remaining_num_blocks_is_6_hEgxyDlCngwrfFe: + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%ymm2 + addq $0x60,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %ymm2,64(%rsi) + addq $0x60,%rsi + vextracti32x4 $0x1,%zmm2,%xmm8 + vextracti32x4 $0x2,%zmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe + +.L_remaining_num_blocks_is_5_hEgxyDlCngwrfFe: + vmovdqu8 (%rdi),%zmm1 + vmovdqu 64(%rdi),%xmm2 + addq $0x50,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqu8 %zmm1,(%rsi) + vmovdqu %xmm2,64(%rsi) + addq $0x50,%rsi + vmovdqa %xmm2,%xmm8 + vextracti32x4 $0x1,%zmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe + +.L_remaining_num_blocks_is_4_hEgxyDlCngwrfFe: + vmovdqu8 (%rdi),%zmm1 + addq $0x40,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vpxorq %zmm9,%zmm1,%zmm1 + vmovdqu8 %zmm1,(%rsi) + addq $0x40,%rsi + vextracti32x4 $0x3,%zmm1,%xmm8 + vmovdqa64 %xmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe +.L_remaining_num_blocks_is_3_hEgxyDlCngwrfFe: + movq $-1,%r8 + shrq $0x10,%r8 + kmovq %r8,%k1 + vmovdqu8 (%rdi),%zmm1{%k1} + addq $0x30,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vpxorq %zmm9,%zmm1,%zmm1 + vmovdqu8 %zmm1,(%rsi){%k1} + addq $0x30,%rsi + vextracti32x4 $0x2,%zmm1,%xmm8 + vextracti32x4 $0x3,%zmm9,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe +.L_remaining_num_blocks_is_2_hEgxyDlCngwrfFe: + vmovdqu8 (%rdi),%ymm1 + addq $0x20,%rdi + vbroadcasti32x4 (%rcx),%ymm0 + vpternlogq $0x96,%ymm0,%ymm9,%ymm1 + vbroadcasti32x4 16(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 32(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 48(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 64(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 80(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 96(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 112(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 128(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 144(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 160(%rcx),%ymm0 + vaesenclast %ymm0,%ymm1,%ymm1 + vpxorq %ymm9,%ymm1,%ymm1 + vmovdqu %ymm1,(%rsi) + addq $0x20,%rsi + vextracti32x4 $0x1,%zmm1,%xmm8 + vextracti32x4 $0x2,%zmm9,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe +.L_remaining_num_blocks_is_1_hEgxyDlCngwrfFe: + vmovdqu (%rdi),%xmm1 + addq $0x10,%rdi + vpxor %xmm9,%xmm1,%xmm1 + vpxor (%rcx),%xmm1,%xmm1 + vaesenc 16(%rcx),%xmm1,%xmm1 + vaesenc 32(%rcx),%xmm1,%xmm1 + vaesenc 48(%rcx),%xmm1,%xmm1 + vaesenc 64(%rcx),%xmm1,%xmm1 + vaesenc 80(%rcx),%xmm1,%xmm1 + vaesenc 96(%rcx),%xmm1,%xmm1 + vaesenc 112(%rcx),%xmm1,%xmm1 + vaesenc 128(%rcx),%xmm1,%xmm1 + vaesenc 144(%rcx),%xmm1,%xmm1 + vaesenclast 160(%rcx),%xmm1,%xmm1 + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu %xmm1,(%rsi) + addq $0x10,%rsi + vmovdqa %xmm1,%xmm8 + vextracti32x4 $0x1,%zmm9,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe + + +.L_start_by16_hEgxyDlCngwrfFe: + vbroadcasti32x4 (%rsp),%zmm0 + vbroadcasti32x4 shufb_15_7(%rip),%zmm8 + movq $0xaa,%r8 + kmovq %r8,%k2 + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + vpsrldq $0xf,%zmm9,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm9,%zmm11 + vpxord %zmm14,%zmm11,%zmm11 + vpsrldq $0xf,%zmm10,%zmm15 + vpclmulqdq $0x0,%zmm25,%zmm15,%zmm16 + vpslldq $0x1,%zmm10,%zmm12 + vpxord %zmm16,%zmm12,%zmm12 + +.L_main_loop_run_16_hEgxyDlCngwrfFe: + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2 + vmovdqu8 128(%rdi),%zmm3 + vmovdqu8 192(%rdi),%zmm4 + addq $0x100,%rdi + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vpxorq %zmm11,%zmm3,%zmm3 + vpxorq %zmm12,%zmm4,%zmm4 + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vpxorq %zmm0,%zmm3,%zmm3 + vpxorq %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm11,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm11,%zmm15 + vpxord %zmm14,%zmm15,%zmm15 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm12,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm12,%zmm16 + vpxord %zmm14,%zmm16,%zmm16 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm15,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm15,%zmm17 + vpxord %zmm14,%zmm17,%zmm17 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm16,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm16,%zmm18 + vpxord %zmm14,%zmm18,%zmm18 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vaesenclast %zmm0,%zmm3,%zmm3 + vaesenclast %zmm0,%zmm4,%zmm4 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vpxorq %zmm11,%zmm3,%zmm3 + vpxorq %zmm12,%zmm4,%zmm4 + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqa32 %zmm17,%zmm11 + vmovdqa32 %zmm18,%zmm12 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi) + vmovdqu8 %zmm3,128(%rsi) + vmovdqu8 %zmm4,192(%rsi) + addq $0x100,%rsi + subq $0x100,%rdx + cmpq $0x100,%rdx + jae .L_main_loop_run_16_hEgxyDlCngwrfFe + cmpq $0x80,%rdx + jae .L_main_loop_run_8_hEgxyDlCngwrfFe + vextracti32x4 $0x3,%zmm4,%xmm0 + jmp .L_do_n_blocks_hEgxyDlCngwrfFe + +.L_start_by8_hEgxyDlCngwrfFe: + vbroadcasti32x4 (%rsp),%zmm0 + vbroadcasti32x4 shufb_15_7(%rip),%zmm8 + movq $0xaa,%r8 + kmovq %r8,%k2 + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + +.L_main_loop_run_8_hEgxyDlCngwrfFe: + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2 + addq $0x80,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vpsrldq $0xf,%zmm9,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm9,%zmm15 + vpxord %zmm14,%zmm15,%zmm15 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vpsrldq $0xf,%zmm10,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm10,%zmm16 + vpxord %zmm14,%zmm16,%zmm16 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi) + addq $0x80,%rsi + subq $0x80,%rdx + cmpq $0x80,%rdx + jae .L_main_loop_run_8_hEgxyDlCngwrfFe + vextracti32x4 $0x3,%zmm2,%xmm0 + jmp .L_do_n_blocks_hEgxyDlCngwrfFe + +.L_steal_cipher_hEgxyDlCngwrfFe: + vmovdqa %xmm8,%xmm2 + leaq vpshufb_shf_table(%rip),%rax + vmovdqu (%rax,%rdx,1),%xmm10 + vpshufb %xmm10,%xmm8,%xmm8 + vmovdqu -16(%rdi,%rdx,1),%xmm3 + vmovdqu %xmm8,-16(%rsi,%rdx,1) + leaq vpshufb_shf_table(%rip),%rax + addq $16,%rax + subq %rdx,%rax + vmovdqu (%rax),%xmm10 + vpxor mask1(%rip),%xmm10,%xmm10 + vpshufb %xmm10,%xmm3,%xmm3 + vpblendvb %xmm10,%xmm2,%xmm3,%xmm3 + vpxor %xmm0,%xmm3,%xmm8 + vpxor (%rcx),%xmm8,%xmm8 + vaesenc 16(%rcx),%xmm8,%xmm8 + vaesenc 32(%rcx),%xmm8,%xmm8 + vaesenc 48(%rcx),%xmm8,%xmm8 + vaesenc 64(%rcx),%xmm8,%xmm8 + vaesenc 80(%rcx),%xmm8,%xmm8 + vaesenc 96(%rcx),%xmm8,%xmm8 + vaesenc 112(%rcx),%xmm8,%xmm8 + vaesenc 128(%rcx),%xmm8,%xmm8 + vaesenc 144(%rcx),%xmm8,%xmm8 + vaesenclast 160(%rcx),%xmm8,%xmm8 + vpxor %xmm0,%xmm8,%xmm8 + vmovdqu %xmm8,-16(%rsi) +.L_ret_hEgxyDlCngwrfFe: + movq 128(%rsp),%rbx + xorq %r8,%r8 + movq %r8,128(%rsp) + + vpxorq %zmm0,%zmm0,%zmm0 + movq %rbp,%rsp + popq %rbp + vzeroupper + .byte 0xf3,0xc3 + +.L_less_than_128_bytes_hEgxyDlCngwrfFe: + vpbroadcastq %r10,%zmm25 + cmpq $0x10,%rdx + jb .L_ret_hEgxyDlCngwrfFe + vbroadcasti32x4 (%rsp),%zmm0 + vbroadcasti32x4 shufb_15_7(%rip),%zmm8 + movl $0xaa,%r8d + kmovq %r8,%k2 + movq %rdx,%r8 + andq $0x70,%r8 + cmpq $0x60,%r8 + je .L_num_blocks_is_6_hEgxyDlCngwrfFe + cmpq $0x50,%r8 + je .L_num_blocks_is_5_hEgxyDlCngwrfFe + cmpq $0x40,%r8 + je .L_num_blocks_is_4_hEgxyDlCngwrfFe + cmpq $0x30,%r8 + je .L_num_blocks_is_3_hEgxyDlCngwrfFe + cmpq $0x20,%r8 + je .L_num_blocks_is_2_hEgxyDlCngwrfFe + cmpq $0x10,%r8 + je .L_num_blocks_is_1_hEgxyDlCngwrfFe + +.L_num_blocks_is_7_hEgxyDlCngwrfFe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + movq $0x0000ffffffffffff,%r8 + kmovq %r8,%k1 + vmovdqu8 0(%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2{%k1} + + addq $0x70,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqu8 %zmm1,0(%rsi) + vmovdqu8 %zmm2,64(%rsi){%k1} + addq $0x70,%rsi + vextracti32x4 $0x2,%zmm2,%xmm8 + vextracti32x4 $0x3,%zmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe +.L_num_blocks_is_6_hEgxyDlCngwrfFe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + vmovdqu8 0(%rdi),%zmm1 + vmovdqu8 64(%rdi),%ymm2 + addq $96,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqu8 %zmm1,0(%rsi) + vmovdqu8 %ymm2,64(%rsi) + addq $96,%rsi + + vextracti32x4 $0x1,%ymm2,%xmm8 + vextracti32x4 $0x2,%zmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe +.L_num_blocks_is_5_hEgxyDlCngwrfFe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + vmovdqu8 0(%rdi),%zmm1 + vmovdqu8 64(%rdi),%xmm2 + addq $80,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqu8 %zmm1,0(%rsi) + vmovdqu8 %xmm2,64(%rsi) + addq $80,%rsi + + vmovdqa %xmm2,%xmm8 + vextracti32x4 $0x1,%zmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe +.L_num_blocks_is_4_hEgxyDlCngwrfFe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + vmovdqu8 0(%rdi),%zmm1 + addq $64,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vpxorq %zmm9,%zmm1,%zmm1 + vmovdqu8 %zmm1,0(%rsi) + addq $64,%rsi + vextracti32x4 $0x3,%zmm1,%xmm8 + vmovdqa %xmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe +.L_num_blocks_is_3_hEgxyDlCngwrfFe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + movq $0x0000ffffffffffff,%r8 + kmovq %r8,%k1 + vmovdqu8 0(%rdi),%zmm1{%k1} + addq $48,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vpxorq %zmm9,%zmm1,%zmm1 + vmovdqu8 %zmm1,0(%rsi){%k1} + addq $48,%rsi + vextracti32x4 $2,%zmm1,%xmm8 + vextracti32x4 $3,%zmm9,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe +.L_num_blocks_is_2_hEgxyDlCngwrfFe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + + vmovdqu8 0(%rdi),%ymm1 + addq $32,%rdi + vbroadcasti32x4 (%rcx),%ymm0 + vpternlogq $0x96,%ymm0,%ymm9,%ymm1 + vbroadcasti32x4 16(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 32(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 48(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 64(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 80(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 96(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 112(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 128(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 144(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 160(%rcx),%ymm0 + vaesenclast %ymm0,%ymm1,%ymm1 + vpxorq %ymm9,%ymm1,%ymm1 + vmovdqu8 %ymm1,0(%rsi) + addq $32,%rsi + + vextracti32x4 $1,%ymm1,%xmm8 + vextracti32x4 $2,%zmm9,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe +.L_num_blocks_is_1_hEgxyDlCngwrfFe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + + vmovdqu8 0(%rdi),%xmm1 + addq $16,%rdi + vbroadcasti32x4 (%rcx),%ymm0 + vpternlogq $0x96,%ymm0,%ymm9,%ymm1 + vbroadcasti32x4 16(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 32(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 48(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 64(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 80(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 96(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 112(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 128(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 144(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 160(%rcx),%ymm0 + vaesenclast %ymm0,%ymm1,%ymm1 + vpxorq %ymm9,%ymm1,%ymm1 + vmovdqu8 %xmm1,0(%rsi) + addq $16,%rsi + + vmovdqa %xmm1,%xmm8 + vextracti32x4 $1,%zmm9,%xmm0 + andq $0xf,%rdx + je .L_ret_hEgxyDlCngwrfFe + jmp .L_steal_cipher_hEgxyDlCngwrfFe +.cfi_endproc +.globl aesni_xts_128_decrypt_avx512 +.hidden aesni_xts_128_decrypt_avx512 +.type aesni_xts_128_decrypt_avx512,@function +.align 32 +aesni_xts_128_decrypt_avx512: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbp + movq %rsp,%rbp + subq $136,%rsp + andq $0xffffffffffffffc0,%rsp + movq %rbx,128(%rsp) + movq $0x87,%r10 + vmovdqu (%r9),%xmm1 + vpxor (%r8),%xmm1,%xmm1 + vaesenc 16(%r8),%xmm1,%xmm1 + vaesenc 32(%r8),%xmm1,%xmm1 + vaesenc 48(%r8),%xmm1,%xmm1 + vaesenc 64(%r8),%xmm1,%xmm1 + vaesenc 80(%r8),%xmm1,%xmm1 + vaesenc 96(%r8),%xmm1,%xmm1 + vaesenc 112(%r8),%xmm1,%xmm1 + vaesenc 128(%r8),%xmm1,%xmm1 + vaesenc 144(%r8),%xmm1,%xmm1 + vaesenclast 160(%r8),%xmm1,%xmm1 + vmovdqa %xmm1,(%rsp) + + cmpq $0x80,%rdx + jb .L_less_than_128_bytes_amivrujEyduiFoi + vpbroadcastq %r10,%zmm25 + cmpq $0x100,%rdx + jge .L_start_by16_amivrujEyduiFoi + jmp .L_start_by8_amivrujEyduiFoi + +.L_do_n_blocks_amivrujEyduiFoi: + cmpq $0x0,%rdx + je .L_ret_amivrujEyduiFoi + cmpq $0x70,%rdx + jge .L_remaining_num_blocks_is_7_amivrujEyduiFoi + cmpq $0x60,%rdx + jge .L_remaining_num_blocks_is_6_amivrujEyduiFoi + cmpq $0x50,%rdx + jge .L_remaining_num_blocks_is_5_amivrujEyduiFoi + cmpq $0x40,%rdx + jge .L_remaining_num_blocks_is_4_amivrujEyduiFoi + cmpq $0x30,%rdx + jge .L_remaining_num_blocks_is_3_amivrujEyduiFoi + cmpq $0x20,%rdx + jge .L_remaining_num_blocks_is_2_amivrujEyduiFoi + cmpq $0x10,%rdx + jge .L_remaining_num_blocks_is_1_amivrujEyduiFoi + + + vmovdqu %xmm5,%xmm1 + + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu %xmm1,-16(%rsi) + vmovdqa %xmm1,%xmm8 + + + movq $0x1,%r8 + kmovq %r8,%k1 + vpsllq $0x3f,%xmm9,%xmm13 + vpsraq $0x3f,%xmm13,%xmm14 + vpandq %xmm25,%xmm14,%xmm5 + vpxorq %xmm5,%xmm9,%xmm9{%k1} + vpsrldq $0x8,%xmm9,%xmm10 +.byte 98, 211, 181, 8, 115, 194, 1 + vpslldq $0x8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm0,%xmm0 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_remaining_num_blocks_is_7_amivrujEyduiFoi: + movq $0xffffffffffffffff,%r8 + shrq $0x10,%r8 + kmovq %r8,%k1 + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2{%k1} + addq $0x70,%rdi + andq $0xf,%rdx + je .L_done_7_remain_amivrujEyduiFoi + vextracti32x4 $0x2,%zmm10,%xmm12 + vextracti32x4 $0x3,%zmm10,%xmm13 + vinserti32x4 $0x2,%xmm13,%zmm10,%zmm10 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi){%k1} + addq $0x70,%rsi + vextracti32x4 $0x2,%zmm2,%xmm8 + vmovdqa %xmm12,%xmm0 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_7_remain_amivrujEyduiFoi: + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi){%k1} + jmp .L_ret_amivrujEyduiFoi + +.L_remaining_num_blocks_is_6_amivrujEyduiFoi: + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%ymm2 + addq $0x60,%rdi + andq $0xf,%rdx + je .L_done_6_remain_amivrujEyduiFoi + vextracti32x4 $0x1,%zmm10,%xmm12 + vextracti32x4 $0x2,%zmm10,%xmm13 + vinserti32x4 $0x1,%xmm13,%zmm10,%zmm10 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %ymm2,64(%rsi) + addq $0x60,%rsi + vextracti32x4 $0x1,%zmm2,%xmm8 + vmovdqa %xmm12,%xmm0 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_6_remain_amivrujEyduiFoi: + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %ymm2,64(%rsi) + jmp .L_ret_amivrujEyduiFoi + +.L_remaining_num_blocks_is_5_amivrujEyduiFoi: + vmovdqu8 (%rdi),%zmm1 + vmovdqu 64(%rdi),%xmm2 + addq $0x50,%rdi + andq $0xf,%rdx + je .L_done_5_remain_amivrujEyduiFoi + vmovdqa %xmm10,%xmm12 + vextracti32x4 $0x1,%zmm10,%xmm10 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu %xmm2,64(%rsi) + addq $0x50,%rsi + vmovdqa %xmm2,%xmm8 + vmovdqa %xmm12,%xmm0 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_5_remain_amivrujEyduiFoi: + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %xmm2,64(%rsi) + jmp .L_ret_amivrujEyduiFoi + +.L_remaining_num_blocks_is_4_amivrujEyduiFoi: + vmovdqu8 (%rdi),%zmm1 + addq $0x40,%rdi + andq $0xf,%rdx + je .L_done_4_remain_amivrujEyduiFoi + vextracti32x4 $0x3,%zmm9,%xmm12 + vinserti32x4 $0x3,%xmm10,%zmm9,%zmm9 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + addq $0x40,%rsi + vextracti32x4 $0x3,%zmm1,%xmm8 + vmovdqa %xmm12,%xmm0 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_4_remain_amivrujEyduiFoi: + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + jmp .L_ret_amivrujEyduiFoi + +.L_remaining_num_blocks_is_3_amivrujEyduiFoi: + vmovdqu (%rdi),%xmm1 + vmovdqu 16(%rdi),%xmm2 + vmovdqu 32(%rdi),%xmm3 + addq $0x30,%rdi + andq $0xf,%rdx + je .L_done_3_remain_amivrujEyduiFoi + vextracti32x4 $0x2,%zmm9,%xmm13 + vextracti32x4 $0x1,%zmm9,%xmm10 + vextracti32x4 $0x3,%zmm9,%xmm11 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + addq $0x30,%rsi + vmovdqa %xmm3,%xmm8 + vmovdqa %xmm13,%xmm0 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_3_remain_amivrujEyduiFoi: + vextracti32x4 $0x1,%zmm9,%xmm10 + vextracti32x4 $0x2,%zmm9,%xmm11 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + jmp .L_ret_amivrujEyduiFoi + +.L_remaining_num_blocks_is_2_amivrujEyduiFoi: + vmovdqu (%rdi),%xmm1 + vmovdqu 16(%rdi),%xmm2 + addq $0x20,%rdi + andq $0xf,%rdx + je .L_done_2_remain_amivrujEyduiFoi + vextracti32x4 $0x2,%zmm9,%xmm10 + vextracti32x4 $0x1,%zmm9,%xmm12 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + addq $0x20,%rsi + vmovdqa %xmm2,%xmm8 + vmovdqa %xmm12,%xmm0 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_2_remain_amivrujEyduiFoi: + vextracti32x4 $0x1,%zmm9,%xmm10 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + jmp .L_ret_amivrujEyduiFoi + +.L_remaining_num_blocks_is_1_amivrujEyduiFoi: + vmovdqu (%rdi),%xmm1 + addq $0x10,%rdi + andq $0xf,%rdx + je .L_done_1_remain_amivrujEyduiFoi + vextracti32x4 $0x1,%zmm9,%xmm11 + vpxor %xmm11,%xmm1,%xmm1 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vpxor %xmm11,%xmm1,%xmm1 + vmovdqu %xmm1,(%rsi) + addq $0x10,%rsi + vmovdqa %xmm1,%xmm8 + vmovdqa %xmm9,%xmm0 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_1_remain_amivrujEyduiFoi: + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu %xmm1,(%rsi) + jmp .L_ret_amivrujEyduiFoi + +.L_start_by16_amivrujEyduiFoi: + vbroadcasti32x4 (%rsp),%zmm0 + vbroadcasti32x4 shufb_15_7(%rip),%zmm8 + movq $0xaa,%r8 + kmovq %r8,%k2 + + + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + + + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + + + vpsrldq $0xf,%zmm9,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm9,%zmm11 + vpxord %zmm14,%zmm11,%zmm11 + + vpsrldq $0xf,%zmm10,%zmm15 + vpclmulqdq $0x0,%zmm25,%zmm15,%zmm16 + vpslldq $0x1,%zmm10,%zmm12 + vpxord %zmm16,%zmm12,%zmm12 + +.L_main_loop_run_16_amivrujEyduiFoi: + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2 + vmovdqu8 128(%rdi),%zmm3 + vmovdqu8 192(%rdi),%zmm4 + vmovdqu8 240(%rdi),%xmm5 + addq $0x100,%rdi + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vpxorq %zmm11,%zmm3,%zmm3 + vpxorq %zmm12,%zmm4,%zmm4 + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vpxorq %zmm0,%zmm3,%zmm3 + vpxorq %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm11,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm11,%zmm15 + vpxord %zmm14,%zmm15,%zmm15 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm12,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm12,%zmm16 + vpxord %zmm14,%zmm16,%zmm16 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm15,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm15,%zmm17 + vpxord %zmm14,%zmm17,%zmm17 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm16,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm16,%zmm18 + vpxord %zmm14,%zmm18,%zmm18 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + vaesdeclast %zmm0,%zmm3,%zmm3 + vaesdeclast %zmm0,%zmm4,%zmm4 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vpxorq %zmm11,%zmm3,%zmm3 + vpxorq %zmm12,%zmm4,%zmm4 + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqa32 %zmm17,%zmm11 + vmovdqa32 %zmm18,%zmm12 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi) + vmovdqu8 %zmm3,128(%rsi) + vmovdqu8 %zmm4,192(%rsi) + addq $0x100,%rsi + subq $0x100,%rdx + cmpq $0x100,%rdx + jge .L_main_loop_run_16_amivrujEyduiFoi + + cmpq $0x80,%rdx + jge .L_main_loop_run_8_amivrujEyduiFoi + jmp .L_do_n_blocks_amivrujEyduiFoi + +.L_start_by8_amivrujEyduiFoi: + + vbroadcasti32x4 (%rsp),%zmm0 + vbroadcasti32x4 shufb_15_7(%rip),%zmm8 + movq $0xaa,%r8 + kmovq %r8,%k2 + + + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + + + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + +.L_main_loop_run_8_amivrujEyduiFoi: + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2 + vmovdqu8 112(%rdi),%xmm5 + addq $0x80,%rdi + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vpsrldq $0xf,%zmm9,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm9,%zmm15 + vpxord %zmm14,%zmm15,%zmm15 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vpsrldq $0xf,%zmm10,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm10,%zmm16 + vpxord %zmm14,%zmm16,%zmm16 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi) + addq $0x80,%rsi + subq $0x80,%rdx + cmpq $0x80,%rdx + jge .L_main_loop_run_8_amivrujEyduiFoi + jmp .L_do_n_blocks_amivrujEyduiFoi + +.L_steal_cipher_amivrujEyduiFoi: + + vmovdqa %xmm8,%xmm2 + + + leaq vpshufb_shf_table(%rip),%rax + vmovdqu (%rax,%rdx,1),%xmm10 + vpshufb %xmm10,%xmm8,%xmm8 + + + vmovdqu -16(%rdi,%rdx,1),%xmm3 + vmovdqu %xmm8,-16(%rsi,%rdx,1) + + + leaq vpshufb_shf_table(%rip),%rax + addq $16,%rax + subq %rdx,%rax + vmovdqu (%rax),%xmm10 + vpxor mask1(%rip),%xmm10,%xmm10 + vpshufb %xmm10,%xmm3,%xmm3 + + vpblendvb %xmm10,%xmm2,%xmm3,%xmm3 + + + vpxor %xmm0,%xmm3,%xmm8 + + + vpxor (%rcx),%xmm8,%xmm8 + vaesdec 16(%rcx),%xmm8,%xmm8 + vaesdec 32(%rcx),%xmm8,%xmm8 + vaesdec 48(%rcx),%xmm8,%xmm8 + vaesdec 64(%rcx),%xmm8,%xmm8 + vaesdec 80(%rcx),%xmm8,%xmm8 + vaesdec 96(%rcx),%xmm8,%xmm8 + vaesdec 112(%rcx),%xmm8,%xmm8 + vaesdec 128(%rcx),%xmm8,%xmm8 + vaesdec 144(%rcx),%xmm8,%xmm8 + vaesdeclast 160(%rcx),%xmm8,%xmm8 + + vpxor %xmm0,%xmm8,%xmm8 + +.L_done_amivrujEyduiFoi: + + vmovdqu %xmm8,-16(%rsi) +.L_ret_amivrujEyduiFoi: + movq 128(%rsp),%rbx + xorq %r8,%r8 + movq %r8,128(%rsp) + + vpxorq %zmm0,%zmm0,%zmm0 + movq %rbp,%rsp + popq %rbp + vzeroupper + .byte 0xf3,0xc3 + +.L_less_than_128_bytes_amivrujEyduiFoi: + cmpq $0x10,%rdx + jb .L_ret_amivrujEyduiFoi + + movq %rdx,%r8 + andq $0x70,%r8 + cmpq $0x60,%r8 + je .L_num_blocks_is_6_amivrujEyduiFoi + cmpq $0x50,%r8 + je .L_num_blocks_is_5_amivrujEyduiFoi + cmpq $0x40,%r8 + je .L_num_blocks_is_4_amivrujEyduiFoi + cmpq $0x30,%r8 + je .L_num_blocks_is_3_amivrujEyduiFoi + cmpq $0x20,%r8 + je .L_num_blocks_is_2_amivrujEyduiFoi + cmpq $0x10,%r8 + je .L_num_blocks_is_1_amivrujEyduiFoi + +.L_num_blocks_is_7_amivrujEyduiFoi: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,16 + 8(%rsp) + vmovdqa 16(%rsp),%xmm10 + vmovdqu 16(%rdi),%xmm2 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,32(%rsp) + movq %rbx,32 + 8(%rsp) + vmovdqa 32(%rsp),%xmm11 + vmovdqu 32(%rdi),%xmm3 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,48(%rsp) + movq %rbx,48 + 8(%rsp) + vmovdqa 48(%rsp),%xmm12 + vmovdqu 48(%rdi),%xmm4 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,64(%rsp) + movq %rbx,64 + 8(%rsp) + vmovdqa 64(%rsp),%xmm13 + vmovdqu 64(%rdi),%xmm5 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,80(%rsp) + movq %rbx,80 + 8(%rsp) + vmovdqa 80(%rsp),%xmm14 + vmovdqu 80(%rdi),%xmm6 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,96(%rsp) + movq %rbx,96 + 8(%rsp) + vmovdqa 96(%rsp),%xmm15 + vmovdqu 96(%rdi),%xmm7 + addq $0x70,%rdi + andq $0xf,%rdx + je .L_done_7_amivrujEyduiFoi + +.L_steal_cipher_7_amivrujEyduiFoi: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm15,%xmm16 + vmovdqa 16(%rsp),%xmm15 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm15,%xmm7,%xmm7 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vpxor %xmm0,%xmm5,%xmm5 + vpxor %xmm0,%xmm6,%xmm6 + vpxor %xmm0,%xmm7,%xmm7 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vaesdeclast %xmm0,%xmm6,%xmm6 + vaesdeclast %xmm0,%xmm7,%xmm7 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm15,%xmm7,%xmm7 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + vmovdqu %xmm5,64(%rsi) + vmovdqu %xmm6,80(%rsi) + addq $0x70,%rsi + vmovdqa64 %xmm16,%xmm0 + vmovdqa %xmm7,%xmm8 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_7_amivrujEyduiFoi: + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm15,%xmm7,%xmm7 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vpxor %xmm0,%xmm5,%xmm5 + vpxor %xmm0,%xmm6,%xmm6 + vpxor %xmm0,%xmm7,%xmm7 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vaesdeclast %xmm0,%xmm6,%xmm6 + vaesdeclast %xmm0,%xmm7,%xmm7 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm15,%xmm7,%xmm7 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + vmovdqu %xmm5,64(%rsi) + vmovdqu %xmm6,80(%rsi) + addq $0x70,%rsi + vmovdqa %xmm7,%xmm8 + jmp .L_done_amivrujEyduiFoi + +.L_num_blocks_is_6_amivrujEyduiFoi: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,16 + 8(%rsp) + vmovdqa 16(%rsp),%xmm10 + vmovdqu 16(%rdi),%xmm2 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,32(%rsp) + movq %rbx,32 + 8(%rsp) + vmovdqa 32(%rsp),%xmm11 + vmovdqu 32(%rdi),%xmm3 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,48(%rsp) + movq %rbx,48 + 8(%rsp) + vmovdqa 48(%rsp),%xmm12 + vmovdqu 48(%rdi),%xmm4 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,64(%rsp) + movq %rbx,64 + 8(%rsp) + vmovdqa 64(%rsp),%xmm13 + vmovdqu 64(%rdi),%xmm5 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,80(%rsp) + movq %rbx,80 + 8(%rsp) + vmovdqa 80(%rsp),%xmm14 + vmovdqu 80(%rdi),%xmm6 + addq $0x60,%rdi + andq $0xf,%rdx + je .L_done_6_amivrujEyduiFoi + +.L_steal_cipher_6_amivrujEyduiFoi: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm14,%xmm15 + vmovdqa 16(%rsp),%xmm14 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vpxor %xmm0,%xmm5,%xmm5 + vpxor %xmm0,%xmm6,%xmm6 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vaesdeclast %xmm0,%xmm6,%xmm6 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + vmovdqu %xmm5,64(%rsi) + addq $0x60,%rsi + vmovdqa %xmm15,%xmm0 + vmovdqa %xmm6,%xmm8 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_6_amivrujEyduiFoi: + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vpxor %xmm0,%xmm5,%xmm5 + vpxor %xmm0,%xmm6,%xmm6 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vaesdeclast %xmm0,%xmm6,%xmm6 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + vmovdqu %xmm5,64(%rsi) + addq $0x60,%rsi + vmovdqa %xmm6,%xmm8 + jmp .L_done_amivrujEyduiFoi + +.L_num_blocks_is_5_amivrujEyduiFoi: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,16 + 8(%rsp) + vmovdqa 16(%rsp),%xmm10 + vmovdqu 16(%rdi),%xmm2 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,32(%rsp) + movq %rbx,32 + 8(%rsp) + vmovdqa 32(%rsp),%xmm11 + vmovdqu 32(%rdi),%xmm3 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,48(%rsp) + movq %rbx,48 + 8(%rsp) + vmovdqa 48(%rsp),%xmm12 + vmovdqu 48(%rdi),%xmm4 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,64(%rsp) + movq %rbx,64 + 8(%rsp) + vmovdqa 64(%rsp),%xmm13 + vmovdqu 64(%rdi),%xmm5 + addq $0x50,%rdi + andq $0xf,%rdx + je .L_done_5_amivrujEyduiFoi + +.L_steal_cipher_5_amivrujEyduiFoi: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm13,%xmm14 + vmovdqa 16(%rsp),%xmm13 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vpxor %xmm0,%xmm5,%xmm5 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + addq $0x50,%rsi + vmovdqa %xmm14,%xmm0 + vmovdqa %xmm5,%xmm8 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_5_amivrujEyduiFoi: + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vpxor %xmm0,%xmm5,%xmm5 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + addq $0x50,%rsi + vmovdqa %xmm5,%xmm8 + jmp .L_done_amivrujEyduiFoi + +.L_num_blocks_is_4_amivrujEyduiFoi: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,16 + 8(%rsp) + vmovdqa 16(%rsp),%xmm10 + vmovdqu 16(%rdi),%xmm2 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,32(%rsp) + movq %rbx,32 + 8(%rsp) + vmovdqa 32(%rsp),%xmm11 + vmovdqu 32(%rdi),%xmm3 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,48(%rsp) + movq %rbx,48 + 8(%rsp) + vmovdqa 48(%rsp),%xmm12 + vmovdqu 48(%rdi),%xmm4 + addq $0x40,%rdi + andq $0xf,%rdx + je .L_done_4_amivrujEyduiFoi + +.L_steal_cipher_4_amivrujEyduiFoi: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm12,%xmm13 + vmovdqa 16(%rsp),%xmm12 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + addq $0x40,%rsi + vmovdqa %xmm13,%xmm0 + vmovdqa %xmm4,%xmm8 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_4_amivrujEyduiFoi: + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + addq $0x40,%rsi + vmovdqa %xmm4,%xmm8 + jmp .L_done_amivrujEyduiFoi + +.L_num_blocks_is_3_amivrujEyduiFoi: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,16 + 8(%rsp) + vmovdqa 16(%rsp),%xmm10 + vmovdqu 16(%rdi),%xmm2 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,32(%rsp) + movq %rbx,32 + 8(%rsp) + vmovdqa 32(%rsp),%xmm11 + vmovdqu 32(%rdi),%xmm3 + addq $0x30,%rdi + andq $0xf,%rdx + je .L_done_3_amivrujEyduiFoi + +.L_steal_cipher_3_amivrujEyduiFoi: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm11,%xmm12 + vmovdqa 16(%rsp),%xmm11 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + addq $0x30,%rsi + vmovdqa %xmm12,%xmm0 + vmovdqa %xmm3,%xmm8 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_3_amivrujEyduiFoi: + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + addq $0x30,%rsi + vmovdqa %xmm3,%xmm8 + jmp .L_done_amivrujEyduiFoi + +.L_num_blocks_is_2_amivrujEyduiFoi: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,16 + 8(%rsp) + vmovdqa 16(%rsp),%xmm10 + vmovdqu 16(%rdi),%xmm2 + addq $0x20,%rdi + andq $0xf,%rdx + je .L_done_2_amivrujEyduiFoi + +.L_steal_cipher_2_amivrujEyduiFoi: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm10,%xmm11 + vmovdqa 16(%rsp),%xmm10 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu %xmm1,(%rsi) + addq $0x20,%rsi + vmovdqa %xmm11,%xmm0 + vmovdqa %xmm2,%xmm8 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_2_amivrujEyduiFoi: + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu %xmm1,(%rsi) + addq $0x20,%rsi + vmovdqa %xmm2,%xmm8 + jmp .L_done_amivrujEyduiFoi + +.L_num_blocks_is_1_amivrujEyduiFoi: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + addq $0x10,%rdi + andq $0xf,%rdx + je .L_done_1_amivrujEyduiFoi + +.L_steal_cipher_1_amivrujEyduiFoi: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm9,%xmm10 + vmovdqa 16(%rsp),%xmm9 + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vpxor %xmm9,%xmm1,%xmm1 + addq $0x10,%rsi + vmovdqa %xmm10,%xmm0 + vmovdqa %xmm1,%xmm8 + jmp .L_steal_cipher_amivrujEyduiFoi + +.L_done_1_amivrujEyduiFoi: + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vpxor %xmm9,%xmm1,%xmm1 + addq $0x10,%rsi + vmovdqa %xmm1,%xmm8 + jmp .L_done_amivrujEyduiFoi +.cfi_endproc +.globl aesni_xts_256_encrypt_avx512 +.hidden aesni_xts_256_encrypt_avx512 +.type aesni_xts_256_encrypt_avx512,@function +.align 32 +aesni_xts_256_encrypt_avx512: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbp + movq %rsp,%rbp + subq $136,%rsp + andq $0xffffffffffffffc0,%rsp + movq %rbx,128(%rsp) + movq $0x87,%r10 + vmovdqu (%r9),%xmm1 + vpxor (%r8),%xmm1,%xmm1 + vaesenc 16(%r8),%xmm1,%xmm1 + vaesenc 32(%r8),%xmm1,%xmm1 + vaesenc 48(%r8),%xmm1,%xmm1 + vaesenc 64(%r8),%xmm1,%xmm1 + vaesenc 80(%r8),%xmm1,%xmm1 + vaesenc 96(%r8),%xmm1,%xmm1 + vaesenc 112(%r8),%xmm1,%xmm1 + vaesenc 128(%r8),%xmm1,%xmm1 + vaesenc 144(%r8),%xmm1,%xmm1 + vaesenc 160(%r8),%xmm1,%xmm1 + vaesenc 176(%r8),%xmm1,%xmm1 + vaesenc 192(%r8),%xmm1,%xmm1 + vaesenc 208(%r8),%xmm1,%xmm1 + vaesenclast 224(%r8),%xmm1,%xmm1 + vmovdqa %xmm1,(%rsp) + + cmpq $0x80,%rdx + jl .L_less_than_128_bytes_wcpqaDvsGlbjGoe + vpbroadcastq %r10,%zmm25 + cmpq $0x100,%rdx + jge .L_start_by16_wcpqaDvsGlbjGoe + cmpq $0x80,%rdx + jge .L_start_by8_wcpqaDvsGlbjGoe + +.L_do_n_blocks_wcpqaDvsGlbjGoe: + cmpq $0x0,%rdx + je .L_ret_wcpqaDvsGlbjGoe + cmpq $0x70,%rdx + jge .L_remaining_num_blocks_is_7_wcpqaDvsGlbjGoe + cmpq $0x60,%rdx + jge .L_remaining_num_blocks_is_6_wcpqaDvsGlbjGoe + cmpq $0x50,%rdx + jge .L_remaining_num_blocks_is_5_wcpqaDvsGlbjGoe + cmpq $0x40,%rdx + jge .L_remaining_num_blocks_is_4_wcpqaDvsGlbjGoe + cmpq $0x30,%rdx + jge .L_remaining_num_blocks_is_3_wcpqaDvsGlbjGoe + cmpq $0x20,%rdx + jge .L_remaining_num_blocks_is_2_wcpqaDvsGlbjGoe + cmpq $0x10,%rdx + jge .L_remaining_num_blocks_is_1_wcpqaDvsGlbjGoe + vmovdqa %xmm0,%xmm8 + vmovdqa %xmm9,%xmm0 + jmp .L_steal_cipher_wcpqaDvsGlbjGoe + +.L_remaining_num_blocks_is_7_wcpqaDvsGlbjGoe: + movq $0x0000ffffffffffff,%r8 + kmovq %r8,%k1 + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2{%k1} + addq $0x70,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi){%k1} + addq $0x70,%rsi + vextracti32x4 $0x2,%zmm2,%xmm8 + vextracti32x4 $0x3,%zmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe + +.L_remaining_num_blocks_is_6_wcpqaDvsGlbjGoe: + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%ymm2 + addq $0x60,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %ymm2,64(%rsi) + addq $0x60,%rsi + vextracti32x4 $0x1,%zmm2,%xmm8 + vextracti32x4 $0x2,%zmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe + +.L_remaining_num_blocks_is_5_wcpqaDvsGlbjGoe: + vmovdqu8 (%rdi),%zmm1 + vmovdqu 64(%rdi),%xmm2 + addq $0x50,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqu8 %zmm1,(%rsi) + vmovdqu %xmm2,64(%rsi) + addq $0x50,%rsi + vmovdqa %xmm2,%xmm8 + vextracti32x4 $0x1,%zmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe + +.L_remaining_num_blocks_is_4_wcpqaDvsGlbjGoe: + vmovdqu8 (%rdi),%zmm1 + addq $0x40,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vpxorq %zmm9,%zmm1,%zmm1 + vmovdqu8 %zmm1,(%rsi) + addq $0x40,%rsi + vextracti32x4 $0x3,%zmm1,%xmm8 + vmovdqa64 %xmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe +.L_remaining_num_blocks_is_3_wcpqaDvsGlbjGoe: + movq $-1,%r8 + shrq $0x10,%r8 + kmovq %r8,%k1 + vmovdqu8 (%rdi),%zmm1{%k1} + addq $0x30,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vpxorq %zmm9,%zmm1,%zmm1 + vmovdqu8 %zmm1,(%rsi){%k1} + addq $0x30,%rsi + vextracti32x4 $0x2,%zmm1,%xmm8 + vextracti32x4 $0x3,%zmm9,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe +.L_remaining_num_blocks_is_2_wcpqaDvsGlbjGoe: + vmovdqu8 (%rdi),%ymm1 + addq $0x20,%rdi + vbroadcasti32x4 (%rcx),%ymm0 + vpternlogq $0x96,%ymm0,%ymm9,%ymm1 + vbroadcasti32x4 16(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 32(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 48(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 64(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 80(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 96(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 112(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 128(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 144(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 160(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 176(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 192(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 208(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 224(%rcx),%ymm0 + vaesenclast %ymm0,%ymm1,%ymm1 + vpxorq %ymm9,%ymm1,%ymm1 + vmovdqu %ymm1,(%rsi) + addq $0x20,%rsi + vextracti32x4 $0x1,%zmm1,%xmm8 + vextracti32x4 $0x2,%zmm9,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe +.L_remaining_num_blocks_is_1_wcpqaDvsGlbjGoe: + vmovdqu (%rdi),%xmm1 + addq $0x10,%rdi + vpxor %xmm9,%xmm1,%xmm1 + vpxor (%rcx),%xmm1,%xmm1 + vaesenc 16(%rcx),%xmm1,%xmm1 + vaesenc 32(%rcx),%xmm1,%xmm1 + vaesenc 48(%rcx),%xmm1,%xmm1 + vaesenc 64(%rcx),%xmm1,%xmm1 + vaesenc 80(%rcx),%xmm1,%xmm1 + vaesenc 96(%rcx),%xmm1,%xmm1 + vaesenc 112(%rcx),%xmm1,%xmm1 + vaesenc 128(%rcx),%xmm1,%xmm1 + vaesenc 144(%rcx),%xmm1,%xmm1 + vaesenc 160(%rcx),%xmm1,%xmm1 + vaesenc 176(%rcx),%xmm1,%xmm1 + vaesenc 192(%rcx),%xmm1,%xmm1 + vaesenc 208(%rcx),%xmm1,%xmm1 + vaesenclast 224(%rcx),%xmm1,%xmm1 + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu %xmm1,(%rsi) + addq $0x10,%rsi + vmovdqa %xmm1,%xmm8 + vextracti32x4 $0x1,%zmm9,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe + + +.L_start_by16_wcpqaDvsGlbjGoe: + vbroadcasti32x4 (%rsp),%zmm0 + vbroadcasti32x4 shufb_15_7(%rip),%zmm8 + movq $0xaa,%r8 + kmovq %r8,%k2 + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + vpsrldq $0xf,%zmm9,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm9,%zmm11 + vpxord %zmm14,%zmm11,%zmm11 + vpsrldq $0xf,%zmm10,%zmm15 + vpclmulqdq $0x0,%zmm25,%zmm15,%zmm16 + vpslldq $0x1,%zmm10,%zmm12 + vpxord %zmm16,%zmm12,%zmm12 + +.L_main_loop_run_16_wcpqaDvsGlbjGoe: + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2 + vmovdqu8 128(%rdi),%zmm3 + vmovdqu8 192(%rdi),%zmm4 + addq $0x100,%rdi + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vpxorq %zmm11,%zmm3,%zmm3 + vpxorq %zmm12,%zmm4,%zmm4 + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vpxorq %zmm0,%zmm3,%zmm3 + vpxorq %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm11,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm11,%zmm15 + vpxord %zmm14,%zmm15,%zmm15 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm12,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm12,%zmm16 + vpxord %zmm14,%zmm16,%zmm16 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm15,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm15,%zmm17 + vpxord %zmm14,%zmm17,%zmm17 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm16,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm16,%zmm18 + vpxord %zmm14,%zmm18,%zmm18 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vaesenclast %zmm0,%zmm3,%zmm3 + vaesenclast %zmm0,%zmm4,%zmm4 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vpxorq %zmm11,%zmm3,%zmm3 + vpxorq %zmm12,%zmm4,%zmm4 + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqa32 %zmm17,%zmm11 + vmovdqa32 %zmm18,%zmm12 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi) + vmovdqu8 %zmm3,128(%rsi) + vmovdqu8 %zmm4,192(%rsi) + addq $0x100,%rsi + subq $0x100,%rdx + cmpq $0x100,%rdx + jae .L_main_loop_run_16_wcpqaDvsGlbjGoe + cmpq $0x80,%rdx + jae .L_main_loop_run_8_wcpqaDvsGlbjGoe + vextracti32x4 $0x3,%zmm4,%xmm0 + jmp .L_do_n_blocks_wcpqaDvsGlbjGoe + +.L_start_by8_wcpqaDvsGlbjGoe: + vbroadcasti32x4 (%rsp),%zmm0 + vbroadcasti32x4 shufb_15_7(%rip),%zmm8 + movq $0xaa,%r8 + kmovq %r8,%k2 + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + +.L_main_loop_run_8_wcpqaDvsGlbjGoe: + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2 + addq $0x80,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vpsrldq $0xf,%zmm9,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm9,%zmm15 + vpxord %zmm14,%zmm15,%zmm15 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vpsrldq $0xf,%zmm10,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm10,%zmm16 + vpxord %zmm14,%zmm16,%zmm16 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi) + addq $0x80,%rsi + subq $0x80,%rdx + cmpq $0x80,%rdx + jae .L_main_loop_run_8_wcpqaDvsGlbjGoe + vextracti32x4 $0x3,%zmm2,%xmm0 + jmp .L_do_n_blocks_wcpqaDvsGlbjGoe + +.L_steal_cipher_wcpqaDvsGlbjGoe: + vmovdqa %xmm8,%xmm2 + leaq vpshufb_shf_table(%rip),%rax + vmovdqu (%rax,%rdx,1),%xmm10 + vpshufb %xmm10,%xmm8,%xmm8 + vmovdqu -16(%rdi,%rdx,1),%xmm3 + vmovdqu %xmm8,-16(%rsi,%rdx,1) + leaq vpshufb_shf_table(%rip),%rax + addq $16,%rax + subq %rdx,%rax + vmovdqu (%rax),%xmm10 + vpxor mask1(%rip),%xmm10,%xmm10 + vpshufb %xmm10,%xmm3,%xmm3 + vpblendvb %xmm10,%xmm2,%xmm3,%xmm3 + vpxor %xmm0,%xmm3,%xmm8 + vpxor (%rcx),%xmm8,%xmm8 + vaesenc 16(%rcx),%xmm8,%xmm8 + vaesenc 32(%rcx),%xmm8,%xmm8 + vaesenc 48(%rcx),%xmm8,%xmm8 + vaesenc 64(%rcx),%xmm8,%xmm8 + vaesenc 80(%rcx),%xmm8,%xmm8 + vaesenc 96(%rcx),%xmm8,%xmm8 + vaesenc 112(%rcx),%xmm8,%xmm8 + vaesenc 128(%rcx),%xmm8,%xmm8 + vaesenc 144(%rcx),%xmm8,%xmm8 + vaesenc 160(%rcx),%xmm8,%xmm8 + vaesenc 176(%rcx),%xmm8,%xmm8 + vaesenc 192(%rcx),%xmm8,%xmm8 + vaesenc 208(%rcx),%xmm8,%xmm8 + vaesenclast 224(%rcx),%xmm8,%xmm8 + vpxor %xmm0,%xmm8,%xmm8 + vmovdqu %xmm8,-16(%rsi) +.L_ret_wcpqaDvsGlbjGoe: + movq 128(%rsp),%rbx + xorq %r8,%r8 + movq %r8,128(%rsp) + + vpxorq %zmm0,%zmm0,%zmm0 + movq %rbp,%rsp + popq %rbp + vzeroupper + .byte 0xf3,0xc3 + +.L_less_than_128_bytes_wcpqaDvsGlbjGoe: + vpbroadcastq %r10,%zmm25 + cmpq $0x10,%rdx + jb .L_ret_wcpqaDvsGlbjGoe + vbroadcasti32x4 (%rsp),%zmm0 + vbroadcasti32x4 shufb_15_7(%rip),%zmm8 + movl $0xaa,%r8d + kmovq %r8,%k2 + movq %rdx,%r8 + andq $0x70,%r8 + cmpq $0x60,%r8 + je .L_num_blocks_is_6_wcpqaDvsGlbjGoe + cmpq $0x50,%r8 + je .L_num_blocks_is_5_wcpqaDvsGlbjGoe + cmpq $0x40,%r8 + je .L_num_blocks_is_4_wcpqaDvsGlbjGoe + cmpq $0x30,%r8 + je .L_num_blocks_is_3_wcpqaDvsGlbjGoe + cmpq $0x20,%r8 + je .L_num_blocks_is_2_wcpqaDvsGlbjGoe + cmpq $0x10,%r8 + je .L_num_blocks_is_1_wcpqaDvsGlbjGoe + +.L_num_blocks_is_7_wcpqaDvsGlbjGoe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + movq $0x0000ffffffffffff,%r8 + kmovq %r8,%k1 + vmovdqu8 0(%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2{%k1} + + addq $0x70,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqu8 %zmm1,0(%rsi) + vmovdqu8 %zmm2,64(%rsi){%k1} + addq $0x70,%rsi + vextracti32x4 $0x2,%zmm2,%xmm8 + vextracti32x4 $0x3,%zmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe +.L_num_blocks_is_6_wcpqaDvsGlbjGoe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + vmovdqu8 0(%rdi),%zmm1 + vmovdqu8 64(%rdi),%ymm2 + addq $96,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqu8 %zmm1,0(%rsi) + vmovdqu8 %ymm2,64(%rsi) + addq $96,%rsi + + vextracti32x4 $0x1,%ymm2,%xmm8 + vextracti32x4 $0x2,%zmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe +.L_num_blocks_is_5_wcpqaDvsGlbjGoe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + vmovdqu8 0(%rdi),%zmm1 + vmovdqu8 64(%rdi),%xmm2 + addq $80,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vpternlogq $0x96,%zmm0,%zmm10,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vmovdqu8 %zmm1,0(%rsi) + vmovdqu8 %xmm2,64(%rsi) + addq $80,%rsi + + vmovdqa %xmm2,%xmm8 + vextracti32x4 $0x1,%zmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe +.L_num_blocks_is_4_wcpqaDvsGlbjGoe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x00,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + vmovdqu8 0(%rdi),%zmm1 + addq $64,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vpxorq %zmm9,%zmm1,%zmm1 + vmovdqu8 %zmm1,0(%rsi) + addq $64,%rsi + vextracti32x4 $0x3,%zmm1,%xmm8 + vmovdqa %xmm10,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe +.L_num_blocks_is_3_wcpqaDvsGlbjGoe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + movq $0x0000ffffffffffff,%r8 + kmovq %r8,%k1 + vmovdqu8 0(%rdi),%zmm1{%k1} + addq $48,%rdi + vbroadcasti32x4 (%rcx),%zmm0 + vpternlogq $0x96,%zmm0,%zmm9,%zmm1 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vpxorq %zmm9,%zmm1,%zmm1 + vmovdqu8 %zmm1,0(%rsi){%k1} + addq $48,%rsi + vextracti32x4 $2,%zmm1,%xmm8 + vextracti32x4 $3,%zmm9,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe +.L_num_blocks_is_2_wcpqaDvsGlbjGoe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + + vmovdqu8 0(%rdi),%ymm1 + addq $32,%rdi + vbroadcasti32x4 (%rcx),%ymm0 + vpternlogq $0x96,%ymm0,%ymm9,%ymm1 + vbroadcasti32x4 16(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 32(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 48(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 64(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 80(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 96(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 112(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 128(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 144(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 160(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 176(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 192(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 208(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 224(%rcx),%ymm0 + vaesenclast %ymm0,%ymm1,%ymm1 + vpxorq %ymm9,%ymm1,%ymm1 + vmovdqu8 %ymm1,0(%rsi) + addq $32,%rsi + + vextracti32x4 $1,%ymm1,%xmm8 + vextracti32x4 $2,%zmm9,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe +.L_num_blocks_is_1_wcpqaDvsGlbjGoe: + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x00,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + + vmovdqu8 0(%rdi),%xmm1 + addq $16,%rdi + vbroadcasti32x4 (%rcx),%ymm0 + vpternlogq $0x96,%ymm0,%ymm9,%ymm1 + vbroadcasti32x4 16(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 32(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 48(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 64(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 80(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 96(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 112(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 128(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 144(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 160(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 176(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 192(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 208(%rcx),%ymm0 + vaesenc %ymm0,%ymm1,%ymm1 + vbroadcasti32x4 224(%rcx),%ymm0 + vaesenclast %ymm0,%ymm1,%ymm1 + vpxorq %ymm9,%ymm1,%ymm1 + vmovdqu8 %xmm1,0(%rsi) + addq $16,%rsi + + vmovdqa %xmm1,%xmm8 + vextracti32x4 $1,%zmm9,%xmm0 + andq $0xf,%rdx + je .L_ret_wcpqaDvsGlbjGoe + jmp .L_steal_cipher_wcpqaDvsGlbjGoe +.cfi_endproc +.globl aesni_xts_256_decrypt_avx512 +.hidden aesni_xts_256_decrypt_avx512 +.type aesni_xts_256_decrypt_avx512,@function +.align 32 +aesni_xts_256_decrypt_avx512: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbp + movq %rsp,%rbp + subq $136,%rsp + andq $0xffffffffffffffc0,%rsp + movq %rbx,128(%rsp) + movq $0x87,%r10 + vmovdqu (%r9),%xmm1 + vpxor (%r8),%xmm1,%xmm1 + vaesenc 16(%r8),%xmm1,%xmm1 + vaesenc 32(%r8),%xmm1,%xmm1 + vaesenc 48(%r8),%xmm1,%xmm1 + vaesenc 64(%r8),%xmm1,%xmm1 + vaesenc 80(%r8),%xmm1,%xmm1 + vaesenc 96(%r8),%xmm1,%xmm1 + vaesenc 112(%r8),%xmm1,%xmm1 + vaesenc 128(%r8),%xmm1,%xmm1 + vaesenc 144(%r8),%xmm1,%xmm1 + vaesenc 160(%r8),%xmm1,%xmm1 + vaesenc 176(%r8),%xmm1,%xmm1 + vaesenc 192(%r8),%xmm1,%xmm1 + vaesenc 208(%r8),%xmm1,%xmm1 + vaesenclast 224(%r8),%xmm1,%xmm1 + vmovdqa %xmm1,(%rsp) + + cmpq $0x80,%rdx + jb .L_less_than_128_bytes_EmbgEptodyewbFa + vpbroadcastq %r10,%zmm25 + cmpq $0x100,%rdx + jge .L_start_by16_EmbgEptodyewbFa + jmp .L_start_by8_EmbgEptodyewbFa + +.L_do_n_blocks_EmbgEptodyewbFa: + cmpq $0x0,%rdx + je .L_ret_EmbgEptodyewbFa + cmpq $0x70,%rdx + jge .L_remaining_num_blocks_is_7_EmbgEptodyewbFa + cmpq $0x60,%rdx + jge .L_remaining_num_blocks_is_6_EmbgEptodyewbFa + cmpq $0x50,%rdx + jge .L_remaining_num_blocks_is_5_EmbgEptodyewbFa + cmpq $0x40,%rdx + jge .L_remaining_num_blocks_is_4_EmbgEptodyewbFa + cmpq $0x30,%rdx + jge .L_remaining_num_blocks_is_3_EmbgEptodyewbFa + cmpq $0x20,%rdx + jge .L_remaining_num_blocks_is_2_EmbgEptodyewbFa + cmpq $0x10,%rdx + jge .L_remaining_num_blocks_is_1_EmbgEptodyewbFa + + + vmovdqu %xmm5,%xmm1 + + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu %xmm1,-16(%rsi) + vmovdqa %xmm1,%xmm8 + + + movq $0x1,%r8 + kmovq %r8,%k1 + vpsllq $0x3f,%xmm9,%xmm13 + vpsraq $0x3f,%xmm13,%xmm14 + vpandq %xmm25,%xmm14,%xmm5 + vpxorq %xmm5,%xmm9,%xmm9{%k1} + vpsrldq $0x8,%xmm9,%xmm10 +.byte 98, 211, 181, 8, 115, 194, 1 + vpslldq $0x8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm0,%xmm0 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_remaining_num_blocks_is_7_EmbgEptodyewbFa: + movq $0xffffffffffffffff,%r8 + shrq $0x10,%r8 + kmovq %r8,%k1 + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2{%k1} + addq $0x70,%rdi + andq $0xf,%rdx + je .L_done_7_remain_EmbgEptodyewbFa + vextracti32x4 $0x2,%zmm10,%xmm12 + vextracti32x4 $0x3,%zmm10,%xmm13 + vinserti32x4 $0x2,%xmm13,%zmm10,%zmm10 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi){%k1} + addq $0x70,%rsi + vextracti32x4 $0x2,%zmm2,%xmm8 + vmovdqa %xmm12,%xmm0 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_7_remain_EmbgEptodyewbFa: + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi){%k1} + jmp .L_ret_EmbgEptodyewbFa + +.L_remaining_num_blocks_is_6_EmbgEptodyewbFa: + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%ymm2 + addq $0x60,%rdi + andq $0xf,%rdx + je .L_done_6_remain_EmbgEptodyewbFa + vextracti32x4 $0x1,%zmm10,%xmm12 + vextracti32x4 $0x2,%zmm10,%xmm13 + vinserti32x4 $0x1,%xmm13,%zmm10,%zmm10 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %ymm2,64(%rsi) + addq $0x60,%rsi + vextracti32x4 $0x1,%zmm2,%xmm8 + vmovdqa %xmm12,%xmm0 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_6_remain_EmbgEptodyewbFa: + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %ymm2,64(%rsi) + jmp .L_ret_EmbgEptodyewbFa + +.L_remaining_num_blocks_is_5_EmbgEptodyewbFa: + vmovdqu8 (%rdi),%zmm1 + vmovdqu 64(%rdi),%xmm2 + addq $0x50,%rdi + andq $0xf,%rdx + je .L_done_5_remain_EmbgEptodyewbFa + vmovdqa %xmm10,%xmm12 + vextracti32x4 $0x1,%zmm10,%xmm10 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu %xmm2,64(%rsi) + addq $0x50,%rsi + vmovdqa %xmm2,%xmm8 + vmovdqa %xmm12,%xmm0 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_5_remain_EmbgEptodyewbFa: + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %xmm2,64(%rsi) + jmp .L_ret_EmbgEptodyewbFa + +.L_remaining_num_blocks_is_4_EmbgEptodyewbFa: + vmovdqu8 (%rdi),%zmm1 + addq $0x40,%rdi + andq $0xf,%rdx + je .L_done_4_remain_EmbgEptodyewbFa + vextracti32x4 $0x3,%zmm9,%xmm12 + vinserti32x4 $0x3,%xmm10,%zmm9,%zmm9 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + addq $0x40,%rsi + vextracti32x4 $0x3,%zmm1,%xmm8 + vmovdqa %xmm12,%xmm0 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_4_remain_EmbgEptodyewbFa: + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + jmp .L_ret_EmbgEptodyewbFa + +.L_remaining_num_blocks_is_3_EmbgEptodyewbFa: + vmovdqu (%rdi),%xmm1 + vmovdqu 16(%rdi),%xmm2 + vmovdqu 32(%rdi),%xmm3 + addq $0x30,%rdi + andq $0xf,%rdx + je .L_done_3_remain_EmbgEptodyewbFa + vextracti32x4 $0x2,%zmm9,%xmm13 + vextracti32x4 $0x1,%zmm9,%xmm10 + vextracti32x4 $0x3,%zmm9,%xmm11 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + addq $0x30,%rsi + vmovdqa %xmm3,%xmm8 + vmovdqa %xmm13,%xmm0 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_3_remain_EmbgEptodyewbFa: + vextracti32x4 $0x1,%zmm9,%xmm10 + vextracti32x4 $0x2,%zmm9,%xmm11 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + jmp .L_ret_EmbgEptodyewbFa + +.L_remaining_num_blocks_is_2_EmbgEptodyewbFa: + vmovdqu (%rdi),%xmm1 + vmovdqu 16(%rdi),%xmm2 + addq $0x20,%rdi + andq $0xf,%rdx + je .L_done_2_remain_EmbgEptodyewbFa + vextracti32x4 $0x2,%zmm9,%xmm10 + vextracti32x4 $0x1,%zmm9,%xmm12 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + addq $0x20,%rsi + vmovdqa %xmm2,%xmm8 + vmovdqa %xmm12,%xmm0 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_2_remain_EmbgEptodyewbFa: + vextracti32x4 $0x1,%zmm9,%xmm10 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + jmp .L_ret_EmbgEptodyewbFa + +.L_remaining_num_blocks_is_1_EmbgEptodyewbFa: + vmovdqu (%rdi),%xmm1 + addq $0x10,%rdi + andq $0xf,%rdx + je .L_done_1_remain_EmbgEptodyewbFa + vextracti32x4 $0x1,%zmm9,%xmm11 + vpxor %xmm11,%xmm1,%xmm1 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vpxor %xmm11,%xmm1,%xmm1 + vmovdqu %xmm1,(%rsi) + addq $0x10,%rsi + vmovdqa %xmm1,%xmm8 + vmovdqa %xmm9,%xmm0 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_1_remain_EmbgEptodyewbFa: + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu %xmm1,(%rsi) + jmp .L_ret_EmbgEptodyewbFa + +.L_start_by16_EmbgEptodyewbFa: + vbroadcasti32x4 (%rsp),%zmm0 + vbroadcasti32x4 shufb_15_7(%rip),%zmm8 + movq $0xaa,%r8 + kmovq %r8,%k2 + + + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + + + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + + + vpsrldq $0xf,%zmm9,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm9,%zmm11 + vpxord %zmm14,%zmm11,%zmm11 + + vpsrldq $0xf,%zmm10,%zmm15 + vpclmulqdq $0x0,%zmm25,%zmm15,%zmm16 + vpslldq $0x1,%zmm10,%zmm12 + vpxord %zmm16,%zmm12,%zmm12 + +.L_main_loop_run_16_EmbgEptodyewbFa: + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2 + vmovdqu8 128(%rdi),%zmm3 + vmovdqu8 192(%rdi),%zmm4 + vmovdqu8 240(%rdi),%xmm5 + addq $0x100,%rdi + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vpxorq %zmm11,%zmm3,%zmm3 + vpxorq %zmm12,%zmm4,%zmm4 + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vpxorq %zmm0,%zmm3,%zmm3 + vpxorq %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm11,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm11,%zmm15 + vpxord %zmm14,%zmm15,%zmm15 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm12,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm12,%zmm16 + vpxord %zmm14,%zmm16,%zmm16 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm15,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm15,%zmm17 + vpxord %zmm14,%zmm17,%zmm17 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vpsrldq $0xf,%zmm16,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm16,%zmm18 + vpxord %zmm14,%zmm18,%zmm18 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + vaesdeclast %zmm0,%zmm3,%zmm3 + vaesdeclast %zmm0,%zmm4,%zmm4 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + vpxorq %zmm11,%zmm3,%zmm3 + vpxorq %zmm12,%zmm4,%zmm4 + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqa32 %zmm17,%zmm11 + vmovdqa32 %zmm18,%zmm12 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi) + vmovdqu8 %zmm3,128(%rsi) + vmovdqu8 %zmm4,192(%rsi) + addq $0x100,%rsi + subq $0x100,%rdx + cmpq $0x100,%rdx + jge .L_main_loop_run_16_EmbgEptodyewbFa + + cmpq $0x80,%rdx + jge .L_main_loop_run_8_EmbgEptodyewbFa + jmp .L_do_n_blocks_EmbgEptodyewbFa + +.L_start_by8_EmbgEptodyewbFa: + + vbroadcasti32x4 (%rsp),%zmm0 + vbroadcasti32x4 shufb_15_7(%rip),%zmm8 + movq $0xaa,%r8 + kmovq %r8,%k2 + + + vpshufb %zmm8,%zmm0,%zmm1 + vpsllvq const_dq3210(%rip),%zmm0,%zmm4 + vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 + vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3 + vpxorq %zmm2,%zmm4,%zmm4{%k2} + vpxord %zmm4,%zmm3,%zmm9 + + + vpsllvq const_dq7654(%rip),%zmm0,%zmm5 + vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 + vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7 + vpxorq %zmm6,%zmm5,%zmm5{%k2} + vpxord %zmm5,%zmm7,%zmm10 + +.L_main_loop_run_8_EmbgEptodyewbFa: + vmovdqu8 (%rdi),%zmm1 + vmovdqu8 64(%rdi),%zmm2 + vmovdqu8 112(%rdi),%xmm5 + addq $0x80,%rdi + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vbroadcasti32x4 (%rcx),%zmm0 + vpxorq %zmm0,%zmm1,%zmm1 + vpxorq %zmm0,%zmm2,%zmm2 + vpsrldq $0xf,%zmm9,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm9,%zmm15 + vpxord %zmm14,%zmm15,%zmm15 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vpsrldq $0xf,%zmm10,%zmm13 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 + vpslldq $0x1,%zmm10,%zmm16 + vpxord %zmm14,%zmm16,%zmm16 + + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + + + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm2,%zmm2 + + + vmovdqa32 %zmm15,%zmm9 + vmovdqa32 %zmm16,%zmm10 + vmovdqu8 %zmm1,(%rsi) + vmovdqu8 %zmm2,64(%rsi) + addq $0x80,%rsi + subq $0x80,%rdx + cmpq $0x80,%rdx + jge .L_main_loop_run_8_EmbgEptodyewbFa + jmp .L_do_n_blocks_EmbgEptodyewbFa + +.L_steal_cipher_EmbgEptodyewbFa: + + vmovdqa %xmm8,%xmm2 + + + leaq vpshufb_shf_table(%rip),%rax + vmovdqu (%rax,%rdx,1),%xmm10 + vpshufb %xmm10,%xmm8,%xmm8 + + + vmovdqu -16(%rdi,%rdx,1),%xmm3 + vmovdqu %xmm8,-16(%rsi,%rdx,1) + + + leaq vpshufb_shf_table(%rip),%rax + addq $16,%rax + subq %rdx,%rax + vmovdqu (%rax),%xmm10 + vpxor mask1(%rip),%xmm10,%xmm10 + vpshufb %xmm10,%xmm3,%xmm3 + + vpblendvb %xmm10,%xmm2,%xmm3,%xmm3 + + + vpxor %xmm0,%xmm3,%xmm8 + + + vpxor (%rcx),%xmm8,%xmm8 + vaesdec 16(%rcx),%xmm8,%xmm8 + vaesdec 32(%rcx),%xmm8,%xmm8 + vaesdec 48(%rcx),%xmm8,%xmm8 + vaesdec 64(%rcx),%xmm8,%xmm8 + vaesdec 80(%rcx),%xmm8,%xmm8 + vaesdec 96(%rcx),%xmm8,%xmm8 + vaesdec 112(%rcx),%xmm8,%xmm8 + vaesdec 128(%rcx),%xmm8,%xmm8 + vaesdec 144(%rcx),%xmm8,%xmm8 + vaesdec 160(%rcx),%xmm8,%xmm8 + vaesdec 176(%rcx),%xmm8,%xmm8 + vaesdec 192(%rcx),%xmm8,%xmm8 + vaesdec 208(%rcx),%xmm8,%xmm8 + vaesdeclast 224(%rcx),%xmm8,%xmm8 + + vpxor %xmm0,%xmm8,%xmm8 + +.L_done_EmbgEptodyewbFa: + + vmovdqu %xmm8,-16(%rsi) +.L_ret_EmbgEptodyewbFa: + movq 128(%rsp),%rbx + xorq %r8,%r8 + movq %r8,128(%rsp) + + vpxorq %zmm0,%zmm0,%zmm0 + movq %rbp,%rsp + popq %rbp + vzeroupper + .byte 0xf3,0xc3 + +.L_less_than_128_bytes_EmbgEptodyewbFa: + cmpq $0x10,%rdx + jb .L_ret_EmbgEptodyewbFa + + movq %rdx,%r8 + andq $0x70,%r8 + cmpq $0x60,%r8 + je .L_num_blocks_is_6_EmbgEptodyewbFa + cmpq $0x50,%r8 + je .L_num_blocks_is_5_EmbgEptodyewbFa + cmpq $0x40,%r8 + je .L_num_blocks_is_4_EmbgEptodyewbFa + cmpq $0x30,%r8 + je .L_num_blocks_is_3_EmbgEptodyewbFa + cmpq $0x20,%r8 + je .L_num_blocks_is_2_EmbgEptodyewbFa + cmpq $0x10,%r8 + je .L_num_blocks_is_1_EmbgEptodyewbFa + +.L_num_blocks_is_7_EmbgEptodyewbFa: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,16 + 8(%rsp) + vmovdqa 16(%rsp),%xmm10 + vmovdqu 16(%rdi),%xmm2 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,32(%rsp) + movq %rbx,32 + 8(%rsp) + vmovdqa 32(%rsp),%xmm11 + vmovdqu 32(%rdi),%xmm3 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,48(%rsp) + movq %rbx,48 + 8(%rsp) + vmovdqa 48(%rsp),%xmm12 + vmovdqu 48(%rdi),%xmm4 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,64(%rsp) + movq %rbx,64 + 8(%rsp) + vmovdqa 64(%rsp),%xmm13 + vmovdqu 64(%rdi),%xmm5 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,80(%rsp) + movq %rbx,80 + 8(%rsp) + vmovdqa 80(%rsp),%xmm14 + vmovdqu 80(%rdi),%xmm6 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,96(%rsp) + movq %rbx,96 + 8(%rsp) + vmovdqa 96(%rsp),%xmm15 + vmovdqu 96(%rdi),%xmm7 + addq $0x70,%rdi + andq $0xf,%rdx + je .L_done_7_EmbgEptodyewbFa + +.L_steal_cipher_7_EmbgEptodyewbFa: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm15,%xmm16 + vmovdqa 16(%rsp),%xmm15 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm15,%xmm7,%xmm7 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vpxor %xmm0,%xmm5,%xmm5 + vpxor %xmm0,%xmm6,%xmm6 + vpxor %xmm0,%xmm7,%xmm7 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vaesdeclast %xmm0,%xmm6,%xmm6 + vaesdeclast %xmm0,%xmm7,%xmm7 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm15,%xmm7,%xmm7 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + vmovdqu %xmm5,64(%rsi) + vmovdqu %xmm6,80(%rsi) + addq $0x70,%rsi + vmovdqa64 %xmm16,%xmm0 + vmovdqa %xmm7,%xmm8 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_7_EmbgEptodyewbFa: + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm15,%xmm7,%xmm7 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vpxor %xmm0,%xmm5,%xmm5 + vpxor %xmm0,%xmm6,%xmm6 + vpxor %xmm0,%xmm7,%xmm7 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vaesdeclast %xmm0,%xmm6,%xmm6 + vaesdeclast %xmm0,%xmm7,%xmm7 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm15,%xmm7,%xmm7 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + vmovdqu %xmm5,64(%rsi) + vmovdqu %xmm6,80(%rsi) + addq $0x70,%rsi + vmovdqa %xmm7,%xmm8 + jmp .L_done_EmbgEptodyewbFa + +.L_num_blocks_is_6_EmbgEptodyewbFa: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,16 + 8(%rsp) + vmovdqa 16(%rsp),%xmm10 + vmovdqu 16(%rdi),%xmm2 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,32(%rsp) + movq %rbx,32 + 8(%rsp) + vmovdqa 32(%rsp),%xmm11 + vmovdqu 32(%rdi),%xmm3 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,48(%rsp) + movq %rbx,48 + 8(%rsp) + vmovdqa 48(%rsp),%xmm12 + vmovdqu 48(%rdi),%xmm4 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,64(%rsp) + movq %rbx,64 + 8(%rsp) + vmovdqa 64(%rsp),%xmm13 + vmovdqu 64(%rdi),%xmm5 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,80(%rsp) + movq %rbx,80 + 8(%rsp) + vmovdqa 80(%rsp),%xmm14 + vmovdqu 80(%rdi),%xmm6 + addq $0x60,%rdi + andq $0xf,%rdx + je .L_done_6_EmbgEptodyewbFa + +.L_steal_cipher_6_EmbgEptodyewbFa: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm14,%xmm15 + vmovdqa 16(%rsp),%xmm14 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vpxor %xmm0,%xmm5,%xmm5 + vpxor %xmm0,%xmm6,%xmm6 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vaesdeclast %xmm0,%xmm6,%xmm6 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + vmovdqu %xmm5,64(%rsi) + addq $0x60,%rsi + vmovdqa %xmm15,%xmm0 + vmovdqa %xmm6,%xmm8 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_6_EmbgEptodyewbFa: + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vpxor %xmm0,%xmm5,%xmm5 + vpxor %xmm0,%xmm6,%xmm6 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vaesdeclast %xmm0,%xmm6,%xmm6 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vpxor %xmm14,%xmm6,%xmm6 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + vmovdqu %xmm5,64(%rsi) + addq $0x60,%rsi + vmovdqa %xmm6,%xmm8 + jmp .L_done_EmbgEptodyewbFa + +.L_num_blocks_is_5_EmbgEptodyewbFa: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,16 + 8(%rsp) + vmovdqa 16(%rsp),%xmm10 + vmovdqu 16(%rdi),%xmm2 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,32(%rsp) + movq %rbx,32 + 8(%rsp) + vmovdqa 32(%rsp),%xmm11 + vmovdqu 32(%rdi),%xmm3 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,48(%rsp) + movq %rbx,48 + 8(%rsp) + vmovdqa 48(%rsp),%xmm12 + vmovdqu 48(%rdi),%xmm4 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,64(%rsp) + movq %rbx,64 + 8(%rsp) + vmovdqa 64(%rsp),%xmm13 + vmovdqu 64(%rdi),%xmm5 + addq $0x50,%rdi + andq $0xf,%rdx + je .L_done_5_EmbgEptodyewbFa + +.L_steal_cipher_5_EmbgEptodyewbFa: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm13,%xmm14 + vmovdqa 16(%rsp),%xmm13 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vpxor %xmm0,%xmm5,%xmm5 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + addq $0x50,%rsi + vmovdqa %xmm14,%xmm0 + vmovdqa %xmm5,%xmm8 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_5_EmbgEptodyewbFa: + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vpxor %xmm0,%xmm5,%xmm5 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vpxor %xmm13,%xmm5,%xmm5 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + vmovdqu %xmm4,48(%rsi) + addq $0x50,%rsi + vmovdqa %xmm5,%xmm8 + jmp .L_done_EmbgEptodyewbFa + +.L_num_blocks_is_4_EmbgEptodyewbFa: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,16 + 8(%rsp) + vmovdqa 16(%rsp),%xmm10 + vmovdqu 16(%rdi),%xmm2 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,32(%rsp) + movq %rbx,32 + 8(%rsp) + vmovdqa 32(%rsp),%xmm11 + vmovdqu 32(%rdi),%xmm3 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,48(%rsp) + movq %rbx,48 + 8(%rsp) + vmovdqa 48(%rsp),%xmm12 + vmovdqu 48(%rdi),%xmm4 + addq $0x40,%rdi + andq $0xf,%rdx + je .L_done_4_EmbgEptodyewbFa + +.L_steal_cipher_4_EmbgEptodyewbFa: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm12,%xmm13 + vmovdqa 16(%rsp),%xmm12 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + addq $0x40,%rsi + vmovdqa %xmm13,%xmm0 + vmovdqa %xmm4,%xmm8 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_4_EmbgEptodyewbFa: + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm0,%xmm4,%xmm4 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vpxor %xmm12,%xmm4,%xmm4 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + vmovdqu %xmm3,32(%rsi) + addq $0x40,%rsi + vmovdqa %xmm4,%xmm8 + jmp .L_done_EmbgEptodyewbFa + +.L_num_blocks_is_3_EmbgEptodyewbFa: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,16 + 8(%rsp) + vmovdqa 16(%rsp),%xmm10 + vmovdqu 16(%rdi),%xmm2 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,32(%rsp) + movq %rbx,32 + 8(%rsp) + vmovdqa 32(%rsp),%xmm11 + vmovdqu 32(%rdi),%xmm3 + addq $0x30,%rdi + andq $0xf,%rdx + je .L_done_3_EmbgEptodyewbFa + +.L_steal_cipher_3_EmbgEptodyewbFa: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm11,%xmm12 + vmovdqa 16(%rsp),%xmm11 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + addq $0x30,%rsi + vmovdqa %xmm12,%xmm0 + vmovdqa %xmm3,%xmm8 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_3_EmbgEptodyewbFa: + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vpxor %xmm11,%xmm3,%xmm3 + vmovdqu %xmm1,(%rsi) + vmovdqu %xmm2,16(%rsi) + addq $0x30,%rsi + vmovdqa %xmm3,%xmm8 + jmp .L_done_EmbgEptodyewbFa + +.L_num_blocks_is_2_EmbgEptodyewbFa: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,16 + 8(%rsp) + vmovdqa 16(%rsp),%xmm10 + vmovdqu 16(%rdi),%xmm2 + addq $0x20,%rdi + andq $0xf,%rdx + je .L_done_2_EmbgEptodyewbFa + +.L_steal_cipher_2_EmbgEptodyewbFa: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm10,%xmm11 + vmovdqa 16(%rsp),%xmm10 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu %xmm1,(%rsi) + addq $0x20,%rsi + vmovdqa %xmm11,%xmm0 + vmovdqa %xmm2,%xmm8 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_2_EmbgEptodyewbFa: + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm0,%xmm2,%xmm2 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vpxor %xmm9,%xmm1,%xmm1 + vpxor %xmm10,%xmm2,%xmm2 + vmovdqu %xmm1,(%rsi) + addq $0x20,%rsi + vmovdqa %xmm2,%xmm8 + jmp .L_done_EmbgEptodyewbFa + +.L_num_blocks_is_1_EmbgEptodyewbFa: + vmovdqa 0(%rsp),%xmm9 + movq 0(%rsp),%rax + movq 8(%rsp),%rbx + vmovdqu 0(%rdi),%xmm1 + addq $0x10,%rdi + andq $0xf,%rdx + je .L_done_1_EmbgEptodyewbFa + +.L_steal_cipher_1_EmbgEptodyewbFa: + xorq %r11,%r11 + shlq $1,%rax + adcq %rbx,%rbx + cmovcq %r10,%r11 + xorq %r11,%rax + movq %rax,16(%rsp) + movq %rbx,24(%rsp) + vmovdqa64 %xmm9,%xmm10 + vmovdqa 16(%rsp),%xmm9 + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vpxor %xmm9,%xmm1,%xmm1 + addq $0x10,%rsi + vmovdqa %xmm10,%xmm0 + vmovdqa %xmm1,%xmm8 + jmp .L_steal_cipher_EmbgEptodyewbFa + +.L_done_1_EmbgEptodyewbFa: + vpxor %xmm9,%xmm1,%xmm1 + vmovdqu (%rcx),%xmm0 + vpxor %xmm0,%xmm1,%xmm1 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vpxor %xmm9,%xmm1,%xmm1 + addq $0x10,%rsi + vmovdqa %xmm1,%xmm8 + jmp .L_done_EmbgEptodyewbFa +.cfi_endproc +.section .rodata +.align 16 + +vpshufb_shf_table: +.quad 0x8786858483828100, 0x8f8e8d8c8b8a8988 +.quad 0x0706050403020100, 0x000e0d0c0b0a0908 + +mask1: +.quad 0x8080808080808080, 0x8080808080808080 + +const_dq3210: +.quad 0, 0, 1, 1, 2, 2, 3, 3 +const_dq5678: +.quad 8, 8, 7, 7, 6, 6, 5, 5 +const_dq7654: +.quad 4, 4, 5, 5, 6, 6, 7, 7 +const_dq1234: +.quad 4, 4, 3, 3, 2, 2, 1, 1 + +shufb_15_7: +.byte 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 7, 0xff, 0xff +.byte 0xff, 0xff, 0xff, 0xff, 0xff + +.text diff --git a/contrib/openssl-cmake/asm/crypto/aes/aesp8-ppc.s b/contrib/openssl-cmake/asm/crypto/aes/aesp8-ppc.s new file mode 100644 index 000000000000..decc4149b106 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/aes/aesp8-ppc.s @@ -0,0 +1,3703 @@ +.machine "any" + +.abiversion 2 +.text + +.align 7 +rcon: +.byte 0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00 +.byte 0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00 +.byte 0x0d,0x0e,0x0f,0x0c,0x0d,0x0e,0x0f,0x0c,0x0d,0x0e,0x0f,0x0c,0x0d,0x0e,0x0f,0x0c +.byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +.long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe +.Lconsts: + mflr 0 + bcl 20,31,$+4 + mflr 6 + addi 6,6,-0x58 + mtlr 0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.byte 65,69,83,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 + +.globl aes_p8_set_encrypt_key +.type aes_p8_set_encrypt_key,@function +.align 5 +aes_p8_set_encrypt_key: +.localentry aes_p8_set_encrypt_key,0 + +.Lset_encrypt_key: + mflr 11 + std 11,16(1) + + li 6,-1 + cmpldi 3,0 + beq- .Lenc_key_abort + cmpldi 5,0 + beq- .Lenc_key_abort + li 6,-2 + cmpwi 4,128 + blt- .Lenc_key_abort + cmpwi 4,256 + bgt- .Lenc_key_abort + andi. 0,4,0x3f + bne- .Lenc_key_abort + + lis 0,0xfff0 + li 12,-1 + or 0,0,0 + + bl .Lconsts + mtlr 11 + + neg 9,3 + lvx 1,0,3 + addi 3,3,15 + lvsr 3,0,9 + li 8,0x20 + cmpwi 4,192 + lvx 2,0,3 + + lvx 4,0,6 + + lvx 5,8,6 + addi 6,6,0x10 + vperm 1,1,2,3 + li 7,8 + vxor 0,0,0 + mtctr 7 + + lvsr 8,0,5 + vspltisb 9,-1 + lvx 10,0,5 + vperm 9,0,9,8 + + blt .Loop128 + addi 3,3,8 + beq .L192 + addi 3,3,8 + b .L256 + +.align 4 +.Loop128: + vperm 3,1,1,5 + vsldoi 6,0,1,12 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + .long 0x10632509 + stvx 7,0,5 + addi 5,5,16 + + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vadduwm 4,4,4 + vxor 1,1,3 + bdnz .Loop128 + + lvx 4,0,6 + + vperm 3,1,1,5 + vsldoi 6,0,1,12 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + .long 0x10632509 + stvx 7,0,5 + addi 5,5,16 + + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vadduwm 4,4,4 + vxor 1,1,3 + + vperm 3,1,1,5 + vsldoi 6,0,1,12 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + .long 0x10632509 + stvx 7,0,5 + addi 5,5,16 + + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vxor 1,1,3 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + stvx 7,0,5 + + addi 3,5,15 + addi 5,5,0x50 + + li 8,10 + b .Ldone + +.align 4 +.L192: + lvx 6,0,3 + li 7,4 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + stvx 7,0,5 + addi 5,5,16 + vperm 2,2,6,3 + vspltisb 3,8 + mtctr 7 + vsububm 5,5,3 + +.Loop192: + vperm 3,2,2,5 + vsldoi 6,0,1,12 + .long 0x10632509 + + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + + vsldoi 7,0,2,8 + vspltw 6,1,3 + vxor 6,6,2 + vsldoi 2,0,2,12 + vadduwm 4,4,4 + vxor 2,2,6 + vxor 1,1,3 + vxor 2,2,3 + vsldoi 7,7,1,8 + + vperm 3,2,2,5 + vsldoi 6,0,1,12 + vperm 11,7,7,8 + vsel 7,10,11,9 + vor 10,11,11 + .long 0x10632509 + stvx 7,0,5 + addi 5,5,16 + + vsldoi 7,1,2,8 + vxor 1,1,6 + vsldoi 6,0,6,12 + vperm 11,7,7,8 + vsel 7,10,11,9 + vor 10,11,11 + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + stvx 7,0,5 + addi 5,5,16 + + vspltw 6,1,3 + vxor 6,6,2 + vsldoi 2,0,2,12 + vadduwm 4,4,4 + vxor 2,2,6 + vxor 1,1,3 + vxor 2,2,3 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + stvx 7,0,5 + addi 3,5,15 + addi 5,5,16 + bdnz .Loop192 + + li 8,12 + addi 5,5,0x20 + b .Ldone + +.align 4 +.L256: + lvx 6,0,3 + li 7,7 + li 8,14 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + stvx 7,0,5 + addi 5,5,16 + vperm 2,2,6,3 + mtctr 7 + +.Loop256: + vperm 3,2,2,5 + vsldoi 6,0,1,12 + vperm 11,2,2,8 + vsel 7,10,11,9 + vor 10,11,11 + .long 0x10632509 + stvx 7,0,5 + addi 5,5,16 + + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vsldoi 6,0,6,12 + vxor 1,1,6 + vadduwm 4,4,4 + vxor 1,1,3 + vperm 11,1,1,8 + vsel 7,10,11,9 + vor 10,11,11 + stvx 7,0,5 + addi 3,5,15 + addi 5,5,16 + bdz .Ldone + + vspltw 3,1,3 + vsldoi 6,0,2,12 + .long 0x106305C8 + + vxor 2,2,6 + vsldoi 6,0,6,12 + vxor 2,2,6 + vsldoi 6,0,6,12 + vxor 2,2,6 + + vxor 2,2,3 + b .Loop256 + +.align 4 +.Ldone: + lvx 2,0,3 + vsel 2,10,2,9 + stvx 2,0,3 + li 6,0 + or 12,12,12 + stw 8,0(5) + +.Lenc_key_abort: + mr 3,6 + blr +.long 0 +.byte 0,12,0x14,1,0,0,3,0 +.long 0 +.size aes_p8_set_encrypt_key,.-aes_p8_set_encrypt_key + +.globl aes_p8_set_decrypt_key +.type aes_p8_set_decrypt_key,@function +.align 5 +aes_p8_set_decrypt_key: +.localentry aes_p8_set_decrypt_key,0 + + stdu 1,-64(1) + mflr 10 + std 10,64+16(1) + bl .Lset_encrypt_key + mtlr 10 + + cmpwi 3,0 + bne- .Ldec_key_abort + + slwi 7,8,4 + subi 3,5,240 + srwi 8,8,1 + add 5,3,7 + mtctr 8 + +.Ldeckey: + lwz 0, 0(3) + lwz 6, 4(3) + lwz 7, 8(3) + lwz 8, 12(3) + addi 3,3,16 + lwz 9, 0(5) + lwz 10,4(5) + lwz 11,8(5) + lwz 12,12(5) + stw 0, 0(5) + stw 6, 4(5) + stw 7, 8(5) + stw 8, 12(5) + subi 5,5,16 + stw 9, -16(3) + stw 10,-12(3) + stw 11,-8(3) + stw 12,-4(3) + bdnz .Ldeckey + + xor 3,3,3 +.Ldec_key_abort: + addi 1,1,64 + blr +.long 0 +.byte 0,12,4,1,0x80,0,3,0 +.long 0 +.size aes_p8_set_decrypt_key,.-aes_p8_set_decrypt_key +.globl aes_p8_encrypt +.type aes_p8_encrypt,@function +.align 5 +aes_p8_encrypt: +.localentry aes_p8_encrypt,0 + + lwz 6,240(5) + lis 0,0xfc00 + li 12,-1 + li 7,15 + or 0,0,0 + + lvx 0,0,3 + neg 11,4 + lvx 1,7,3 + lvsl 2,0,3 + + lvsl 3,0,11 + + li 7,16 + vperm 0,0,1,2 + lvx 1,0,5 + lvsl 5,0,5 + srwi 6,6,1 + lvx 2,7,5 + addi 7,7,16 + subi 6,6,1 + vperm 1,1,2,5 + + vxor 0,0,1 + lvx 1,7,5 + addi 7,7,16 + mtctr 6 + +.Loop_enc: + vperm 2,2,1,5 + .long 0x10001508 + lvx 2,7,5 + addi 7,7,16 + vperm 1,1,2,5 + .long 0x10000D08 + lvx 1,7,5 + addi 7,7,16 + bdnz .Loop_enc + + vperm 2,2,1,5 + .long 0x10001508 + lvx 2,7,5 + vperm 1,1,2,5 + .long 0x10000D09 + + vspltisb 2,-1 + vxor 1,1,1 + li 7,15 + vperm 2,1,2,3 + + lvx 1,0,4 + vperm 0,0,0,3 + vsel 1,1,0,2 + lvx 4,7,4 + stvx 1,0,4 + vsel 0,0,4,2 + stvx 0,7,4 + + or 12,12,12 + blr +.long 0 +.byte 0,12,0x14,0,0,0,3,0 +.long 0 +.size aes_p8_encrypt,.-aes_p8_encrypt +.globl aes_p8_decrypt +.type aes_p8_decrypt,@function +.align 5 +aes_p8_decrypt: +.localentry aes_p8_decrypt,0 + + lwz 6,240(5) + lis 0,0xfc00 + li 12,-1 + li 7,15 + or 0,0,0 + + lvx 0,0,3 + neg 11,4 + lvx 1,7,3 + lvsl 2,0,3 + + lvsl 3,0,11 + + li 7,16 + vperm 0,0,1,2 + lvx 1,0,5 + lvsl 5,0,5 + srwi 6,6,1 + lvx 2,7,5 + addi 7,7,16 + subi 6,6,1 + vperm 1,1,2,5 + + vxor 0,0,1 + lvx 1,7,5 + addi 7,7,16 + mtctr 6 + +.Loop_dec: + vperm 2,2,1,5 + .long 0x10001548 + lvx 2,7,5 + addi 7,7,16 + vperm 1,1,2,5 + .long 0x10000D48 + lvx 1,7,5 + addi 7,7,16 + bdnz .Loop_dec + + vperm 2,2,1,5 + .long 0x10001548 + lvx 2,7,5 + vperm 1,1,2,5 + .long 0x10000D49 + + vspltisb 2,-1 + vxor 1,1,1 + li 7,15 + vperm 2,1,2,3 + + lvx 1,0,4 + vperm 0,0,0,3 + vsel 1,1,0,2 + lvx 4,7,4 + stvx 1,0,4 + vsel 0,0,4,2 + stvx 0,7,4 + + or 12,12,12 + blr +.long 0 +.byte 0,12,0x14,0,0,0,3,0 +.long 0 +.size aes_p8_decrypt,.-aes_p8_decrypt +.globl aes_p8_cbc_encrypt +.type aes_p8_cbc_encrypt,@function +.align 5 +aes_p8_cbc_encrypt: +.localentry aes_p8_cbc_encrypt,0 + + cmpldi 5,16 + .long 0x4dc00020 + + cmpwi 8,0 + lis 0,0xffe0 + li 12,-1 + or 0,0,0 + + li 10,15 + vxor 0,0,0 + + + lvx 4,0,7 + lvsl 6,0,7 + lvx 5,10,7 + + vperm 4,4,5,6 + + neg 11,3 + lvsl 10,0,6 + lwz 9,240(6) + + lvsr 6,0,11 + lvx 5,0,3 + addi 3,3,15 + + + lvsr 8,0,4 + vspltisb 9,-1 + lvx 7,0,4 + vperm 9,0,9,8 + + + srwi 9,9,1 + li 10,16 + subi 9,9,1 + beq .Lcbc_dec + +.Lcbc_enc: + vor 2,5,5 + lvx 5,0,3 + addi 3,3,16 + mtctr 9 + subi 5,5,16 + + lvx 0,0,6 + vperm 2,2,5,6 + lvx 1,10,6 + addi 10,10,16 + vperm 0,0,1,10 + vxor 2,2,0 + lvx 0,10,6 + addi 10,10,16 + vxor 2,2,4 + +.Loop_cbc_enc: + vperm 1,1,0,10 + .long 0x10420D08 + lvx 1,10,6 + addi 10,10,16 + vperm 0,0,1,10 + .long 0x10420508 + lvx 0,10,6 + addi 10,10,16 + bdnz .Loop_cbc_enc + + vperm 1,1,0,10 + .long 0x10420D08 + lvx 1,10,6 + li 10,16 + vperm 0,0,1,10 + .long 0x10820509 + cmpldi 5,16 + + vperm 3,4,4,8 + vsel 2,7,3,9 + vor 7,3,3 + stvx 2,0,4 + addi 4,4,16 + bge .Lcbc_enc + + b .Lcbc_done + +.align 4 +.Lcbc_dec: + cmpldi 5,128 + bge _aesp8_cbc_decrypt8x + vor 3,5,5 + lvx 5,0,3 + addi 3,3,16 + mtctr 9 + subi 5,5,16 + + lvx 0,0,6 + vperm 3,3,5,6 + lvx 1,10,6 + addi 10,10,16 + vperm 0,0,1,10 + vxor 2,3,0 + lvx 0,10,6 + addi 10,10,16 + +.Loop_cbc_dec: + vperm 1,1,0,10 + .long 0x10420D48 + lvx 1,10,6 + addi 10,10,16 + vperm 0,0,1,10 + .long 0x10420548 + lvx 0,10,6 + addi 10,10,16 + bdnz .Loop_cbc_dec + + vperm 1,1,0,10 + .long 0x10420D48 + lvx 1,10,6 + li 10,16 + vperm 0,0,1,10 + .long 0x10420549 + cmpldi 5,16 + + vxor 2,2,4 + vor 4,3,3 + vperm 3,2,2,8 + vsel 2,7,3,9 + vor 7,3,3 + stvx 2,0,4 + addi 4,4,16 + bge .Lcbc_dec + +.Lcbc_done: + addi 4,4,-1 + lvx 2,0,4 + vsel 2,7,2,9 + stvx 2,0,4 + + neg 8,7 + li 10,15 + vxor 0,0,0 + vspltisb 9,-1 + + lvsl 8,0,8 + vperm 9,0,9,8 + + lvx 7,0,7 + vperm 4,4,4,8 + vsel 2,7,4,9 + lvx 5,10,7 + stvx 2,0,7 + vsel 2,4,5,9 + stvx 2,10,7 + + or 12,12,12 + blr +.long 0 +.byte 0,12,0x14,0,0,0,6,0 +.long 0 +.align 5 +_aesp8_cbc_decrypt8x: + stdu 1,-448(1) + li 10,207 + li 11,223 + stvx 20,10,1 + addi 10,10,32 + stvx 21,11,1 + addi 11,11,32 + stvx 22,10,1 + addi 10,10,32 + stvx 23,11,1 + addi 11,11,32 + stvx 24,10,1 + addi 10,10,32 + stvx 25,11,1 + addi 11,11,32 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + stvx 31,11,1 + li 0,-1 + stw 12,396(1) + li 8,0x10 + std 26,400(1) + li 26,0x20 + std 27,408(1) + li 27,0x30 + std 28,416(1) + li 28,0x40 + std 29,424(1) + li 29,0x50 + std 30,432(1) + li 30,0x60 + std 31,440(1) + li 31,0x70 + or 0,0,0 + + subi 9,9,3 + subi 5,5,128 + + lvx 23,0,6 + lvx 30,8,6 + addi 6,6,0x20 + lvx 31,0,6 + vperm 23,23,30,10 + addi 11,1,64+15 + mtctr 9 + +.Load_cbc_dec_key: + vperm 24,30,31,10 + lvx 30,8,6 + addi 6,6,0x20 + stvx 24,0,11 + vperm 25,31,30,10 + lvx 31,0,6 + stvx 25,8,11 + addi 11,11,0x20 + bdnz .Load_cbc_dec_key + + lvx 26,8,6 + vperm 24,30,31,10 + lvx 27,26,6 + stvx 24,0,11 + vperm 25,31,26,10 + lvx 28,27,6 + stvx 25,8,11 + addi 11,1,64+15 + vperm 26,26,27,10 + lvx 29,28,6 + vperm 27,27,28,10 + lvx 30,29,6 + vperm 28,28,29,10 + lvx 31,30,6 + vperm 29,29,30,10 + lvx 14,31,6 + vperm 30,30,31,10 + lvx 24,0,11 + vperm 31,31,14,10 + lvx 25,8,11 + + + + subi 3,3,15 + + + .long 0x7C001E99 + + + .long 0x7C281E99 + + .long 0x7C5A1E99 + + .long 0x7C7B1E99 + + .long 0x7D5C1E99 + + vxor 14,0,23 + .long 0x7D7D1E99 + + vxor 15,1,23 + .long 0x7D9E1E99 + + vxor 16,2,23 + .long 0x7DBF1E99 + addi 3,3,0x80 + + vxor 17,3,23 + + vxor 18,10,23 + + vxor 19,11,23 + vxor 20,12,23 + vxor 21,13,23 + + mtctr 9 + b .Loop_cbc_dec8x +.align 5 +.Loop_cbc_dec8x: + .long 0x11CEC548 + .long 0x11EFC548 + .long 0x1210C548 + .long 0x1231C548 + .long 0x1252C548 + .long 0x1273C548 + .long 0x1294C548 + .long 0x12B5C548 + lvx 24,26,11 + addi 11,11,0x20 + + .long 0x11CECD48 + .long 0x11EFCD48 + .long 0x1210CD48 + .long 0x1231CD48 + .long 0x1252CD48 + .long 0x1273CD48 + .long 0x1294CD48 + .long 0x12B5CD48 + lvx 25,8,11 + bdnz .Loop_cbc_dec8x + + subic 5,5,128 + .long 0x11CEC548 + .long 0x11EFC548 + .long 0x1210C548 + .long 0x1231C548 + .long 0x1252C548 + .long 0x1273C548 + .long 0x1294C548 + .long 0x12B5C548 + + subfe. 0,0,0 + .long 0x11CECD48 + .long 0x11EFCD48 + .long 0x1210CD48 + .long 0x1231CD48 + .long 0x1252CD48 + .long 0x1273CD48 + .long 0x1294CD48 + .long 0x12B5CD48 + + and 0,0,5 + .long 0x11CED548 + .long 0x11EFD548 + .long 0x1210D548 + .long 0x1231D548 + .long 0x1252D548 + .long 0x1273D548 + .long 0x1294D548 + .long 0x12B5D548 + + add 3,3,0 + + + + .long 0x11CEDD48 + .long 0x11EFDD48 + .long 0x1210DD48 + .long 0x1231DD48 + .long 0x1252DD48 + .long 0x1273DD48 + .long 0x1294DD48 + .long 0x12B5DD48 + + addi 11,1,64+15 + .long 0x11CEE548 + .long 0x11EFE548 + .long 0x1210E548 + .long 0x1231E548 + .long 0x1252E548 + .long 0x1273E548 + .long 0x1294E548 + .long 0x12B5E548 + lvx 24,0,11 + + .long 0x11CEED48 + .long 0x11EFED48 + .long 0x1210ED48 + .long 0x1231ED48 + .long 0x1252ED48 + .long 0x1273ED48 + .long 0x1294ED48 + .long 0x12B5ED48 + lvx 25,8,11 + + .long 0x11CEF548 + vxor 4,4,31 + .long 0x11EFF548 + vxor 0,0,31 + .long 0x1210F548 + vxor 1,1,31 + .long 0x1231F548 + vxor 2,2,31 + .long 0x1252F548 + vxor 3,3,31 + .long 0x1273F548 + vxor 10,10,31 + .long 0x1294F548 + vxor 11,11,31 + .long 0x12B5F548 + vxor 12,12,31 + + .long 0x11CE2549 + .long 0x11EF0549 + .long 0x7C001E99 + .long 0x12100D49 + .long 0x7C281E99 + .long 0x12311549 + + .long 0x7C5A1E99 + .long 0x12521D49 + + .long 0x7C7B1E99 + .long 0x12735549 + + .long 0x7D5C1E99 + .long 0x12945D49 + + .long 0x7D7D1E99 + .long 0x12B56549 + + .long 0x7D9E1E99 + vor 4,13,13 + + .long 0x7DBF1E99 + addi 3,3,0x80 + + + + .long 0x7DC02799 + + vxor 14,0,23 + + .long 0x7DE82799 + + vxor 15,1,23 + + .long 0x7E1A2799 + vxor 16,2,23 + + .long 0x7E3B2799 + vxor 17,3,23 + + .long 0x7E5C2799 + vxor 18,10,23 + + .long 0x7E7D2799 + vxor 19,11,23 + + .long 0x7E9E2799 + vxor 20,12,23 + .long 0x7EBF2799 + addi 4,4,0x80 + vxor 21,13,23 + + mtctr 9 + beq .Loop_cbc_dec8x + + addic. 5,5,128 + beq .Lcbc_dec8x_done + nop + nop + +.Loop_cbc_dec8x_tail: + .long 0x11EFC548 + .long 0x1210C548 + .long 0x1231C548 + .long 0x1252C548 + .long 0x1273C548 + .long 0x1294C548 + .long 0x12B5C548 + lvx 24,26,11 + addi 11,11,0x20 + + .long 0x11EFCD48 + .long 0x1210CD48 + .long 0x1231CD48 + .long 0x1252CD48 + .long 0x1273CD48 + .long 0x1294CD48 + .long 0x12B5CD48 + lvx 25,8,11 + bdnz .Loop_cbc_dec8x_tail + + .long 0x11EFC548 + .long 0x1210C548 + .long 0x1231C548 + .long 0x1252C548 + .long 0x1273C548 + .long 0x1294C548 + .long 0x12B5C548 + + .long 0x11EFCD48 + .long 0x1210CD48 + .long 0x1231CD48 + .long 0x1252CD48 + .long 0x1273CD48 + .long 0x1294CD48 + .long 0x12B5CD48 + + .long 0x11EFD548 + .long 0x1210D548 + .long 0x1231D548 + .long 0x1252D548 + .long 0x1273D548 + .long 0x1294D548 + .long 0x12B5D548 + + .long 0x11EFDD48 + .long 0x1210DD48 + .long 0x1231DD48 + .long 0x1252DD48 + .long 0x1273DD48 + .long 0x1294DD48 + .long 0x12B5DD48 + + .long 0x11EFE548 + .long 0x1210E548 + .long 0x1231E548 + .long 0x1252E548 + .long 0x1273E548 + .long 0x1294E548 + .long 0x12B5E548 + + .long 0x11EFED48 + .long 0x1210ED48 + .long 0x1231ED48 + .long 0x1252ED48 + .long 0x1273ED48 + .long 0x1294ED48 + .long 0x12B5ED48 + + .long 0x11EFF548 + vxor 4,4,31 + .long 0x1210F548 + vxor 1,1,31 + .long 0x1231F548 + vxor 2,2,31 + .long 0x1252F548 + vxor 3,3,31 + .long 0x1273F548 + vxor 10,10,31 + .long 0x1294F548 + vxor 11,11,31 + .long 0x12B5F548 + vxor 12,12,31 + + cmplwi 5,32 + blt .Lcbc_dec8x_one + nop + beq .Lcbc_dec8x_two + cmplwi 5,64 + blt .Lcbc_dec8x_three + nop + beq .Lcbc_dec8x_four + cmplwi 5,96 + blt .Lcbc_dec8x_five + nop + beq .Lcbc_dec8x_six + +.Lcbc_dec8x_seven: + .long 0x11EF2549 + .long 0x12100D49 + .long 0x12311549 + .long 0x12521D49 + .long 0x12735549 + .long 0x12945D49 + .long 0x12B56549 + vor 4,13,13 + + + + .long 0x7DE02799 + + .long 0x7E082799 + + .long 0x7E3A2799 + + .long 0x7E5B2799 + + .long 0x7E7C2799 + + .long 0x7E9D2799 + .long 0x7EBE2799 + addi 4,4,0x70 + b .Lcbc_dec8x_done + +.align 5 +.Lcbc_dec8x_six: + .long 0x12102549 + .long 0x12311549 + .long 0x12521D49 + .long 0x12735549 + .long 0x12945D49 + .long 0x12B56549 + vor 4,13,13 + + + + .long 0x7E002799 + + .long 0x7E282799 + + .long 0x7E5A2799 + + .long 0x7E7B2799 + + .long 0x7E9C2799 + .long 0x7EBD2799 + addi 4,4,0x60 + b .Lcbc_dec8x_done + +.align 5 +.Lcbc_dec8x_five: + .long 0x12312549 + .long 0x12521D49 + .long 0x12735549 + .long 0x12945D49 + .long 0x12B56549 + vor 4,13,13 + + + + .long 0x7E202799 + + .long 0x7E482799 + + .long 0x7E7A2799 + + .long 0x7E9B2799 + .long 0x7EBC2799 + addi 4,4,0x50 + b .Lcbc_dec8x_done + +.align 5 +.Lcbc_dec8x_four: + .long 0x12522549 + .long 0x12735549 + .long 0x12945D49 + .long 0x12B56549 + vor 4,13,13 + + + + .long 0x7E402799 + + .long 0x7E682799 + + .long 0x7E9A2799 + .long 0x7EBB2799 + addi 4,4,0x40 + b .Lcbc_dec8x_done + +.align 5 +.Lcbc_dec8x_three: + .long 0x12732549 + .long 0x12945D49 + .long 0x12B56549 + vor 4,13,13 + + + + .long 0x7E602799 + + .long 0x7E882799 + .long 0x7EBA2799 + addi 4,4,0x30 + b .Lcbc_dec8x_done + +.align 5 +.Lcbc_dec8x_two: + .long 0x12942549 + .long 0x12B56549 + vor 4,13,13 + + + + .long 0x7E802799 + .long 0x7EA82799 + addi 4,4,0x20 + b .Lcbc_dec8x_done + +.align 5 +.Lcbc_dec8x_one: + .long 0x12B52549 + vor 4,13,13 + + + .long 0x7EA02799 + addi 4,4,0x10 + +.Lcbc_dec8x_done: + + .long 0x7C803F99 + + li 10,79 + li 11,95 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + + or 12,12,12 + lvx 20,10,1 + addi 10,10,32 + lvx 21,11,1 + addi 11,11,32 + lvx 22,10,1 + addi 10,10,32 + lvx 23,11,1 + addi 11,11,32 + lvx 24,10,1 + addi 10,10,32 + lvx 25,11,1 + addi 11,11,32 + lvx 26,10,1 + addi 10,10,32 + lvx 27,11,1 + addi 11,11,32 + lvx 28,10,1 + addi 10,10,32 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + ld 26,400(1) + ld 27,408(1) + ld 28,416(1) + ld 29,424(1) + ld 30,432(1) + ld 31,440(1) + addi 1,1,448 + blr +.long 0 +.byte 0,12,0x04,0,0x80,6,6,0 +.long 0 +.size aes_p8_cbc_encrypt,.-aes_p8_cbc_encrypt +.globl aes_p8_ctr32_encrypt_blocks +.type aes_p8_ctr32_encrypt_blocks,@function +.align 5 +aes_p8_ctr32_encrypt_blocks: +.localentry aes_p8_ctr32_encrypt_blocks,0 + + cmpldi 5,1 + .long 0x4dc00020 + + lis 0,0xfff0 + li 12,-1 + or 0,0,0 + + li 10,15 + vxor 0,0,0 + + + lvx 4,0,7 + lvsl 6,0,7 + lvx 5,10,7 + vspltisb 11,1 + + vperm 4,4,5,6 + vsldoi 11,0,11,1 + + neg 11,3 + lvsl 10,0,6 + lwz 9,240(6) + + lvsr 6,0,11 + lvx 5,0,3 + addi 3,3,15 + + + srwi 9,9,1 + li 10,16 + subi 9,9,1 + + cmpldi 5,8 + bge _aesp8_ctr32_encrypt8x + + lvsr 8,0,4 + vspltisb 9,-1 + lvx 7,0,4 + vperm 9,0,9,8 + + + lvx 0,0,6 + mtctr 9 + lvx 1,10,6 + addi 10,10,16 + vperm 0,0,1,10 + vxor 2,4,0 + lvx 0,10,6 + addi 10,10,16 + b .Loop_ctr32_enc + +.align 5 +.Loop_ctr32_enc: + vperm 1,1,0,10 + .long 0x10420D08 + lvx 1,10,6 + addi 10,10,16 + vperm 0,0,1,10 + .long 0x10420508 + lvx 0,10,6 + addi 10,10,16 + bdnz .Loop_ctr32_enc + + vadduwm 4,4,11 + vor 3,5,5 + lvx 5,0,3 + addi 3,3,16 + subic. 5,5,1 + + vperm 1,1,0,10 + .long 0x10420D08 + lvx 1,10,6 + vperm 3,3,5,6 + li 10,16 + vperm 1,0,1,10 + lvx 0,0,6 + vxor 3,3,1 + .long 0x10421D09 + + lvx 1,10,6 + addi 10,10,16 + vperm 2,2,2,8 + vsel 3,7,2,9 + mtctr 9 + vperm 0,0,1,10 + vor 7,2,2 + vxor 2,4,0 + lvx 0,10,6 + addi 10,10,16 + stvx 3,0,4 + addi 4,4,16 + bne .Loop_ctr32_enc + + addi 4,4,-1 + lvx 2,0,4 + vsel 2,7,2,9 + stvx 2,0,4 + + or 12,12,12 + blr +.long 0 +.byte 0,12,0x14,0,0,0,6,0 +.long 0 +.align 5 +_aesp8_ctr32_encrypt8x: + stdu 1,-448(1) + li 10,207 + li 11,223 + stvx 20,10,1 + addi 10,10,32 + stvx 21,11,1 + addi 11,11,32 + stvx 22,10,1 + addi 10,10,32 + stvx 23,11,1 + addi 11,11,32 + stvx 24,10,1 + addi 10,10,32 + stvx 25,11,1 + addi 11,11,32 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + stvx 31,11,1 + li 0,-1 + stw 12,396(1) + li 8,0x10 + std 26,400(1) + li 26,0x20 + std 27,408(1) + li 27,0x30 + std 28,416(1) + li 28,0x40 + std 29,424(1) + li 29,0x50 + std 30,432(1) + li 30,0x60 + std 31,440(1) + li 31,0x70 + or 0,0,0 + + subi 9,9,3 + + lvx 23,0,6 + lvx 30,8,6 + addi 6,6,0x20 + lvx 31,0,6 + vperm 23,23,30,10 + addi 11,1,64+15 + mtctr 9 + +.Load_ctr32_enc_key: + vperm 24,30,31,10 + lvx 30,8,6 + addi 6,6,0x20 + stvx 24,0,11 + vperm 25,31,30,10 + lvx 31,0,6 + stvx 25,8,11 + addi 11,11,0x20 + bdnz .Load_ctr32_enc_key + + lvx 26,8,6 + vperm 24,30,31,10 + lvx 27,26,6 + stvx 24,0,11 + vperm 25,31,26,10 + lvx 28,27,6 + stvx 25,8,11 + addi 11,1,64+15 + vperm 26,26,27,10 + lvx 29,28,6 + vperm 27,27,28,10 + lvx 30,29,6 + vperm 28,28,29,10 + lvx 31,30,6 + vperm 29,29,30,10 + lvx 15,31,6 + vperm 30,30,31,10 + lvx 24,0,11 + vperm 31,31,15,10 + lvx 25,8,11 + + vadduwm 7,11,11 + subi 3,3,15 + sldi 5,5,4 + + vadduwm 16,4,11 + vadduwm 17,4,7 + vxor 15,4,23 + + vadduwm 18,16,7 + vxor 16,16,23 + + vadduwm 19,17,7 + vxor 17,17,23 + + vadduwm 20,18,7 + vxor 18,18,23 + + vadduwm 21,19,7 + vxor 19,19,23 + vadduwm 22,20,7 + vxor 20,20,23 + vadduwm 4,21,7 + vxor 21,21,23 + vxor 22,22,23 + + mtctr 9 + b .Loop_ctr32_enc8x +.align 5 +.Loop_ctr32_enc8x: + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 +.Loop_ctr32_enc8x_middle: + lvx 24,26,11 + addi 11,11,0x20 + + .long 0x11EFCD08 + .long 0x1210CD08 + .long 0x1231CD08 + .long 0x1252CD08 + .long 0x1273CD08 + .long 0x1294CD08 + .long 0x12B5CD08 + .long 0x12D6CD08 + lvx 25,8,11 + bdnz .Loop_ctr32_enc8x + + subic 11,5,256 + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + subfe 0,0,0 + .long 0x11EFCD08 + .long 0x1210CD08 + .long 0x1231CD08 + .long 0x1252CD08 + .long 0x1273CD08 + .long 0x1294CD08 + .long 0x12B5CD08 + .long 0x12D6CD08 + + and 0,0,11 + addi 11,1,64+15 + .long 0x11EFD508 + .long 0x1210D508 + .long 0x1231D508 + .long 0x1252D508 + .long 0x1273D508 + .long 0x1294D508 + .long 0x12B5D508 + .long 0x12D6D508 + lvx 24,0,11 + + subic 5,5,129 + .long 0x11EFDD08 + addi 5,5,1 + .long 0x1210DD08 + .long 0x1231DD08 + .long 0x1252DD08 + .long 0x1273DD08 + .long 0x1294DD08 + .long 0x12B5DD08 + .long 0x12D6DD08 + lvx 25,8,11 + + .long 0x11EFE508 + .long 0x7C001E99 + .long 0x1210E508 + .long 0x7C281E99 + .long 0x1231E508 + .long 0x7C5A1E99 + .long 0x1252E508 + .long 0x7C7B1E99 + .long 0x1273E508 + .long 0x7D5C1E99 + .long 0x1294E508 + .long 0x7D9D1E99 + .long 0x12B5E508 + .long 0x7DBE1E99 + .long 0x12D6E508 + .long 0x7DDF1E99 + addi 3,3,0x80 + + .long 0x11EFED08 + + .long 0x1210ED08 + + .long 0x1231ED08 + + .long 0x1252ED08 + + .long 0x1273ED08 + + .long 0x1294ED08 + + .long 0x12B5ED08 + + .long 0x12D6ED08 + + + add 3,3,0 + + + + subfe. 0,0,0 + .long 0x11EFF508 + vxor 0,0,31 + .long 0x1210F508 + vxor 1,1,31 + .long 0x1231F508 + vxor 2,2,31 + .long 0x1252F508 + vxor 3,3,31 + .long 0x1273F508 + vxor 10,10,31 + .long 0x1294F508 + vxor 12,12,31 + .long 0x12B5F508 + vxor 13,13,31 + .long 0x12D6F508 + vxor 14,14,31 + + bne .Lctr32_enc8x_break + + .long 0x100F0509 + .long 0x10300D09 + vadduwm 16,4,11 + .long 0x10511509 + vadduwm 17,4,7 + vxor 15,4,23 + .long 0x10721D09 + vadduwm 18,16,7 + vxor 16,16,23 + .long 0x11535509 + vadduwm 19,17,7 + vxor 17,17,23 + .long 0x11946509 + vadduwm 20,18,7 + vxor 18,18,23 + .long 0x11B56D09 + vadduwm 21,19,7 + vxor 19,19,23 + .long 0x11D67509 + vadduwm 22,20,7 + vxor 20,20,23 + + vadduwm 4,21,7 + vxor 21,21,23 + + vxor 22,22,23 + mtctr 9 + + .long 0x11EFC508 + .long 0x7C002799 + + .long 0x1210C508 + .long 0x7C282799 + + .long 0x1231C508 + .long 0x7C5A2799 + + .long 0x1252C508 + .long 0x7C7B2799 + + .long 0x1273C508 + .long 0x7D5C2799 + + .long 0x1294C508 + .long 0x7D9D2799 + + .long 0x12B5C508 + .long 0x7DBE2799 + .long 0x12D6C508 + .long 0x7DDF2799 + addi 4,4,0x80 + + b .Loop_ctr32_enc8x_middle + +.align 5 +.Lctr32_enc8x_break: + cmpwi 5,-0x60 + blt .Lctr32_enc8x_one + nop + beq .Lctr32_enc8x_two + cmpwi 5,-0x40 + blt .Lctr32_enc8x_three + nop + beq .Lctr32_enc8x_four + cmpwi 5,-0x20 + blt .Lctr32_enc8x_five + nop + beq .Lctr32_enc8x_six + cmpwi 5,0x00 + blt .Lctr32_enc8x_seven + +.Lctr32_enc8x_eight: + .long 0x11EF0509 + .long 0x12100D09 + .long 0x12311509 + .long 0x12521D09 + .long 0x12735509 + .long 0x12946509 + .long 0x12B56D09 + .long 0x12D67509 + + + + .long 0x7DE02799 + + .long 0x7E082799 + + .long 0x7E3A2799 + + .long 0x7E5B2799 + + .long 0x7E7C2799 + + .long 0x7E9D2799 + + .long 0x7EBE2799 + .long 0x7EDF2799 + addi 4,4,0x80 + b .Lctr32_enc8x_done + +.align 5 +.Lctr32_enc8x_seven: + .long 0x11EF0D09 + .long 0x12101509 + .long 0x12311D09 + .long 0x12525509 + .long 0x12736509 + .long 0x12946D09 + .long 0x12B57509 + + + + .long 0x7DE02799 + + .long 0x7E082799 + + .long 0x7E3A2799 + + .long 0x7E5B2799 + + .long 0x7E7C2799 + + .long 0x7E9D2799 + .long 0x7EBE2799 + addi 4,4,0x70 + b .Lctr32_enc8x_done + +.align 5 +.Lctr32_enc8x_six: + .long 0x11EF1509 + .long 0x12101D09 + .long 0x12315509 + .long 0x12526509 + .long 0x12736D09 + .long 0x12947509 + + + + .long 0x7DE02799 + + .long 0x7E082799 + + .long 0x7E3A2799 + + .long 0x7E5B2799 + + .long 0x7E7C2799 + .long 0x7E9D2799 + addi 4,4,0x60 + b .Lctr32_enc8x_done + +.align 5 +.Lctr32_enc8x_five: + .long 0x11EF1D09 + .long 0x12105509 + .long 0x12316509 + .long 0x12526D09 + .long 0x12737509 + + + + .long 0x7DE02799 + + .long 0x7E082799 + + .long 0x7E3A2799 + + .long 0x7E5B2799 + .long 0x7E7C2799 + addi 4,4,0x50 + b .Lctr32_enc8x_done + +.align 5 +.Lctr32_enc8x_four: + .long 0x11EF5509 + .long 0x12106509 + .long 0x12316D09 + .long 0x12527509 + + + + .long 0x7DE02799 + + .long 0x7E082799 + + .long 0x7E3A2799 + .long 0x7E5B2799 + addi 4,4,0x40 + b .Lctr32_enc8x_done + +.align 5 +.Lctr32_enc8x_three: + .long 0x11EF6509 + .long 0x12106D09 + .long 0x12317509 + + + + .long 0x7DE02799 + + .long 0x7E082799 + .long 0x7E3A2799 + addi 4,4,0x30 + b .Lctr32_enc8x_done + +.align 5 +.Lctr32_enc8x_two: + .long 0x11EF6D09 + .long 0x12107509 + + + + .long 0x7DE02799 + .long 0x7E082799 + addi 4,4,0x20 + b .Lctr32_enc8x_done + +.align 5 +.Lctr32_enc8x_one: + .long 0x11EF7509 + + + .long 0x7DE02799 + addi 4,4,0x10 + +.Lctr32_enc8x_done: + li 10,79 + li 11,95 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + stvx 6,10,1 + addi 10,10,32 + stvx 6,11,1 + addi 11,11,32 + + or 12,12,12 + lvx 20,10,1 + addi 10,10,32 + lvx 21,11,1 + addi 11,11,32 + lvx 22,10,1 + addi 10,10,32 + lvx 23,11,1 + addi 11,11,32 + lvx 24,10,1 + addi 10,10,32 + lvx 25,11,1 + addi 11,11,32 + lvx 26,10,1 + addi 10,10,32 + lvx 27,11,1 + addi 11,11,32 + lvx 28,10,1 + addi 10,10,32 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + ld 26,400(1) + ld 27,408(1) + ld 28,416(1) + ld 29,424(1) + ld 30,432(1) + ld 31,440(1) + addi 1,1,448 + blr +.long 0 +.byte 0,12,0x04,0,0x80,6,6,0 +.long 0 +.size aes_p8_ctr32_encrypt_blocks,.-aes_p8_ctr32_encrypt_blocks +.globl aes_p8_xts_encrypt +.type aes_p8_xts_encrypt,@function +.align 5 +aes_p8_xts_encrypt: +.localentry aes_p8_xts_encrypt,0 + + mr 10,3 + li 3,-1 + cmpldi 5,16 + .long 0x4dc00020 + + lis 0,0xfff0 + li 12,-1 + li 11,0 + or 0,0,0 + + vspltisb 9,0x07 + + + + + li 3,15 + lvx 8,0,8 + lvsl 5,0,8 + lvx 4,3,8 + + vperm 8,8,4,5 + + neg 11,10 + lvsr 5,0,11 + lvx 2,0,10 + addi 10,10,15 + + + cmpldi 7,0 + beq .Lxts_enc_no_key2 + + lvsl 7,0,7 + lwz 9,240(7) + srwi 9,9,1 + subi 9,9,1 + li 3,16 + + lvx 0,0,7 + lvx 1,3,7 + addi 3,3,16 + vperm 0,0,1,7 + vxor 8,8,0 + lvx 0,3,7 + addi 3,3,16 + mtctr 9 + +.Ltweak_xts_enc: + vperm 1,1,0,7 + .long 0x11080D08 + lvx 1,3,7 + addi 3,3,16 + vperm 0,0,1,7 + .long 0x11080508 + lvx 0,3,7 + addi 3,3,16 + bdnz .Ltweak_xts_enc + + vperm 1,1,0,7 + .long 0x11080D08 + lvx 1,3,7 + vperm 0,0,1,7 + .long 0x11080509 + + li 8,0 + b .Lxts_enc + +.Lxts_enc_no_key2: + li 3,-16 + and 5,5,3 + + +.Lxts_enc: + lvx 4,0,10 + addi 10,10,16 + + lvsl 7,0,6 + lwz 9,240(6) + srwi 9,9,1 + subi 9,9,1 + li 3,16 + + vslb 10,9,9 + vor 10,10,9 + vspltisb 11,1 + vsldoi 10,10,11,15 + + cmpldi 5,96 + bge _aesp8_xts_encrypt6x + + andi. 7,5,15 + subic 0,5,32 + subi 7,7,16 + subfe 0,0,0 + and 0,0,7 + add 10,10,0 + + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + vperm 2,2,4,5 + vperm 0,0,1,7 + vxor 2,2,8 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + mtctr 9 + b .Loop_xts_enc + +.align 5 +.Loop_xts_enc: + vperm 1,1,0,7 + .long 0x10420D08 + lvx 1,3,6 + addi 3,3,16 + vperm 0,0,1,7 + .long 0x10420508 + lvx 0,3,6 + addi 3,3,16 + bdnz .Loop_xts_enc + + vperm 1,1,0,7 + .long 0x10420D08 + lvx 1,3,6 + li 3,16 + vperm 0,0,1,7 + vxor 0,0,8 + .long 0x10620509 + + + nop + + .long 0x7C602799 + addi 4,4,16 + + subic. 5,5,16 + beq .Lxts_enc_done + + vor 2,4,4 + lvx 4,0,10 + addi 10,10,16 + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + + subic 0,5,32 + subfe 0,0,0 + and 0,0,7 + add 10,10,0 + + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 8,8,11 + + vperm 2,2,4,5 + vperm 0,0,1,7 + vxor 2,2,8 + vxor 3,3,0 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + + mtctr 9 + cmpldi 5,16 + bge .Loop_xts_enc + + vxor 3,3,8 + lvsr 5,0,5 + vxor 4,4,4 + vspltisb 11,-1 + vperm 4,4,11,5 + vsel 2,2,3,4 + + subi 11,4,17 + subi 4,4,16 + mtctr 5 + li 5,16 +.Loop_xts_enc_steal: + lbzu 0,1(11) + stb 0,16(11) + bdnz .Loop_xts_enc_steal + + mtctr 9 + b .Loop_xts_enc + +.Lxts_enc_done: + cmpldi 8,0 + beq .Lxts_enc_ret + + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 8,8,11 + + + .long 0x7D004799 + +.Lxts_enc_ret: + or 12,12,12 + li 3,0 + blr +.long 0 +.byte 0,12,0x04,0,0x80,6,6,0 +.long 0 +.size aes_p8_xts_encrypt,.-aes_p8_xts_encrypt + +.globl aes_p8_xts_decrypt +.type aes_p8_xts_decrypt,@function +.align 5 +aes_p8_xts_decrypt: +.localentry aes_p8_xts_decrypt,0 + + mr 10,3 + li 3,-1 + cmpldi 5,16 + .long 0x4dc00020 + + lis 0,0xfff8 + li 12,-1 + li 11,0 + or 0,0,0 + + andi. 0,5,15 + neg 0,0 + andi. 0,0,16 + sub 5,5,0 + + vspltisb 9,0x07 + + + + + li 3,15 + lvx 8,0,8 + lvsl 5,0,8 + lvx 4,3,8 + + vperm 8,8,4,5 + + neg 11,10 + lvsr 5,0,11 + lvx 2,0,10 + addi 10,10,15 + + + cmpldi 7,0 + beq .Lxts_dec_no_key2 + + lvsl 7,0,7 + lwz 9,240(7) + srwi 9,9,1 + subi 9,9,1 + li 3,16 + + lvx 0,0,7 + lvx 1,3,7 + addi 3,3,16 + vperm 0,0,1,7 + vxor 8,8,0 + lvx 0,3,7 + addi 3,3,16 + mtctr 9 + +.Ltweak_xts_dec: + vperm 1,1,0,7 + .long 0x11080D08 + lvx 1,3,7 + addi 3,3,16 + vperm 0,0,1,7 + .long 0x11080508 + lvx 0,3,7 + addi 3,3,16 + bdnz .Ltweak_xts_dec + + vperm 1,1,0,7 + .long 0x11080D08 + lvx 1,3,7 + vperm 0,0,1,7 + .long 0x11080509 + + li 8,0 + b .Lxts_dec + +.Lxts_dec_no_key2: + neg 3,5 + andi. 3,3,15 + add 5,5,3 + + +.Lxts_dec: + lvx 4,0,10 + addi 10,10,16 + + lvsl 7,0,6 + lwz 9,240(6) + srwi 9,9,1 + subi 9,9,1 + li 3,16 + + vslb 10,9,9 + vor 10,10,9 + vspltisb 11,1 + vsldoi 10,10,11,15 + + cmpldi 5,96 + bge _aesp8_xts_decrypt6x + + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + vperm 2,2,4,5 + vperm 0,0,1,7 + vxor 2,2,8 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + mtctr 9 + + cmpldi 5,16 + blt .Ltail_xts_dec + b .Loop_xts_dec + +.align 5 +.Loop_xts_dec: + vperm 1,1,0,7 + .long 0x10420D48 + lvx 1,3,6 + addi 3,3,16 + vperm 0,0,1,7 + .long 0x10420548 + lvx 0,3,6 + addi 3,3,16 + bdnz .Loop_xts_dec + + vperm 1,1,0,7 + .long 0x10420D48 + lvx 1,3,6 + li 3,16 + vperm 0,0,1,7 + vxor 0,0,8 + .long 0x10620549 + + + nop + + .long 0x7C602799 + addi 4,4,16 + + subic. 5,5,16 + beq .Lxts_dec_done + + vor 2,4,4 + lvx 4,0,10 + addi 10,10,16 + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 8,8,11 + + vperm 2,2,4,5 + vperm 0,0,1,7 + vxor 2,2,8 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + + mtctr 9 + cmpldi 5,16 + bge .Loop_xts_dec + +.Ltail_xts_dec: + vsrab 11,8,9 + vaddubm 12,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 12,12,11 + + subi 10,10,16 + add 10,10,5 + + vxor 2,2,8 + vxor 2,2,12 + +.Loop_xts_dec_short: + vperm 1,1,0,7 + .long 0x10420D48 + lvx 1,3,6 + addi 3,3,16 + vperm 0,0,1,7 + .long 0x10420548 + lvx 0,3,6 + addi 3,3,16 + bdnz .Loop_xts_dec_short + + vperm 1,1,0,7 + .long 0x10420D48 + lvx 1,3,6 + li 3,16 + vperm 0,0,1,7 + vxor 0,0,12 + .long 0x10620549 + + + nop + + .long 0x7C602799 + + vor 2,4,4 + lvx 4,0,10 + + lvx 0,0,6 + lvx 1,3,6 + addi 3,3,16 + vperm 2,2,4,5 + vperm 0,0,1,7 + + lvsr 5,0,5 + vxor 4,4,4 + vspltisb 11,-1 + vperm 4,4,11,5 + vsel 2,2,3,4 + + vxor 0,0,8 + vxor 2,2,0 + lvx 0,3,6 + addi 3,3,16 + + subi 11,4,1 + mtctr 5 + li 5,16 +.Loop_xts_dec_steal: + lbzu 0,1(11) + stb 0,16(11) + bdnz .Loop_xts_dec_steal + + mtctr 9 + b .Loop_xts_dec + +.Lxts_dec_done: + cmpldi 8,0 + beq .Lxts_dec_ret + + vsrab 11,8,9 + vaddubm 8,8,8 + vsldoi 11,11,11,15 + vand 11,11,10 + vxor 8,8,11 + + + .long 0x7D004799 + +.Lxts_dec_ret: + or 12,12,12 + li 3,0 + blr +.long 0 +.byte 0,12,0x04,0,0x80,6,6,0 +.long 0 +.size aes_p8_xts_decrypt,.-aes_p8_xts_decrypt +.align 5 +_aesp8_xts_encrypt6x: + stdu 1,-448(1) + mflr 11 + li 7,207 + li 3,223 + std 11,464(1) + stvx 20,7,1 + addi 7,7,32 + stvx 21,3,1 + addi 3,3,32 + stvx 22,7,1 + addi 7,7,32 + stvx 23,3,1 + addi 3,3,32 + stvx 24,7,1 + addi 7,7,32 + stvx 25,3,1 + addi 3,3,32 + stvx 26,7,1 + addi 7,7,32 + stvx 27,3,1 + addi 3,3,32 + stvx 28,7,1 + addi 7,7,32 + stvx 29,3,1 + addi 3,3,32 + stvx 30,7,1 + stvx 31,3,1 + li 0,-1 + stw 12,396(1) + li 3,0x10 + std 26,400(1) + li 26,0x20 + std 27,408(1) + li 27,0x30 + std 28,416(1) + li 28,0x40 + std 29,424(1) + li 29,0x50 + std 30,432(1) + li 30,0x60 + std 31,440(1) + li 31,0x70 + or 0,0,0 + + + xxlor 2, 32+10, 32+10 + vsldoi 10,11,10,1 + xxlor 1, 32+10, 32+10 + + + mr 31, 6 + bl .Lconsts + lxvw4x 0, 28, 6 + mr 6, 31 + li 31,0x70 + + subi 9,9,3 + + lvx 23,0,6 + lvx 30,3,6 + addi 6,6,0x20 + lvx 31,0,6 + vperm 23,23,30,7 + addi 7,1,64+15 + mtctr 9 + +.Load_xts_enc_key: + vperm 24,30,31,7 + lvx 30,3,6 + addi 6,6,0x20 + stvx 24,0,7 + vperm 25,31,30,7 + lvx 31,0,6 + stvx 25,3,7 + addi 7,7,0x20 + bdnz .Load_xts_enc_key + + lvx 26,3,6 + vperm 24,30,31,7 + lvx 27,26,6 + stvx 24,0,7 + vperm 25,31,26,7 + lvx 28,27,6 + stvx 25,3,7 + addi 7,1,64+15 + vperm 26,26,27,7 + lvx 29,28,6 + vperm 27,27,28,7 + lvx 30,29,6 + vperm 28,28,29,7 + lvx 31,30,6 + vperm 29,29,30,7 + lvx 22,31,6 + vperm 30,30,31,7 + lvx 24,0,7 + vperm 31,31,22,7 + lvx 25,3,7 + + + + + + + + + + vperm 0,2,4,5 + subi 10,10,31 + vxor 17,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vand 11,11,10 + vxor 7,0,17 + xxlor 32+1, 0, 0 + vpermxor 8, 8, 11, 1 + + .long 0x7C235699 + vxor 18,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + + vand 11,11,10 + vxor 12,1,18 + xxlor 32+2, 0, 0 + vpermxor 8, 8, 11, 2 + + .long 0x7C5A5699 + andi. 31,5,15 + vxor 19,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + + vand 11,11,10 + vxor 13,2,19 + xxlor 32+3, 0, 0 + vpermxor 8, 8, 11, 3 + + .long 0x7C7B5699 + sub 5,5,31 + vxor 20,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + + vand 11,11,10 + vxor 14,3,20 + xxlor 32+4, 0, 0 + vpermxor 8, 8, 11, 4 + + .long 0x7C9C5699 + subi 5,5,0x60 + vxor 21,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + + vand 11,11,10 + vxor 15,4,21 + xxlor 32+5, 0, 0 + vpermxor 8, 8, 11, 5 + + .long 0x7CBD5699 + addi 10,10,0x60 + vxor 22,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + + vand 11,11,10 + vxor 16,5,22 + xxlor 32+0, 0, 0 + vpermxor 8, 8, 11, 0 + + vxor 31,31,23 + mtctr 9 + b .Loop_xts_enc6x + +.align 5 +.Loop_xts_enc6x: + .long 0x10E7C508 + .long 0x118CC508 + .long 0x11ADC508 + .long 0x11CEC508 + .long 0x11EFC508 + .long 0x1210C508 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD08 + .long 0x118CCD08 + .long 0x11ADCD08 + .long 0x11CECD08 + .long 0x11EFCD08 + .long 0x1210CD08 + lvx 25,3,7 + bdnz .Loop_xts_enc6x + + xxlor 32+10, 1, 1 + + subic 5,5,96 + vxor 0,17,31 + .long 0x10E7C508 + .long 0x118CC508 + vsrab 11,8,9 + vxor 17,8,23 + vaddubm 8,8,8 + .long 0x11ADC508 + .long 0x11CEC508 + .long 0x11EFC508 + .long 0x1210C508 + + subfe. 0,0,0 + vand 11,11,10 + .long 0x10E7CD08 + .long 0x118CCD08 + xxlor 32+1, 0, 0 + vpermxor 8, 8, 11, 1 + .long 0x11ADCD08 + .long 0x11CECD08 + vxor 1,18,31 + vsrab 11,8,9 + vxor 18,8,23 + .long 0x11EFCD08 + .long 0x1210CD08 + + and 0,0,5 + vaddubm 8,8,8 + .long 0x10E7D508 + .long 0x118CD508 + vand 11,11,10 + .long 0x11ADD508 + .long 0x11CED508 + xxlor 32+2, 0, 0 + vpermxor 8, 8, 11, 2 + .long 0x11EFD508 + .long 0x1210D508 + + add 10,10,0 + + + + vxor 2,19,31 + vsrab 11,8,9 + vxor 19,8,23 + vaddubm 8,8,8 + .long 0x10E7DD08 + .long 0x118CDD08 + .long 0x11ADDD08 + .long 0x11CEDD08 + vand 11,11,10 + .long 0x11EFDD08 + .long 0x1210DD08 + + addi 7,1,64+15 + xxlor 32+3, 0, 0 + vpermxor 8, 8, 11, 3 + .long 0x10E7E508 + .long 0x118CE508 + vxor 3,20,31 + vsrab 11,8,9 + vxor 20,8,23 + .long 0x11ADE508 + .long 0x11CEE508 + vaddubm 8,8,8 + .long 0x11EFE508 + .long 0x1210E508 + lvx 24,0,7 + vand 11,11,10 + + .long 0x10E7ED08 + .long 0x118CED08 + xxlor 32+4, 0, 0 + vpermxor 8, 8, 11, 4 + .long 0x11ADED08 + .long 0x11CEED08 + vxor 4,21,31 + vsrab 11,8,9 + vxor 21,8,23 + .long 0x11EFED08 + .long 0x1210ED08 + lvx 25,3,7 + vaddubm 8,8,8 + + .long 0x10E7F508 + .long 0x118CF508 + vand 11,11,10 + .long 0x11ADF508 + .long 0x11CEF508 + xxlor 32+5, 0, 0 + vpermxor 8, 8, 11, 5 + .long 0x11EFF508 + .long 0x1210F508 + vxor 5,22,31 + vsrab 11,8,9 + vxor 22,8,23 + + .long 0x10E70509 + .long 0x7C005699 + vaddubm 8,8,8 + .long 0x118C0D09 + .long 0x7C235699 + .long 0x11AD1509 + + .long 0x7C5A5699 + vand 11,11,10 + .long 0x11CE1D09 + + .long 0x7C7B5699 + .long 0x11EF2509 + + .long 0x7C9C5699 + xxlor 10, 32+0, 32+0 + xxlor 32+0, 0, 0 + vpermxor 8, 8, 11, 0 + xxlor 32+0, 10, 10 + .long 0x11702D09 + + + .long 0x7CBD5699 + addi 10,10,0x60 + + + + + + .long 0x7CE02799 + vxor 7,0,17 + + .long 0x7D832799 + vxor 12,1,18 + + .long 0x7DBA2799 + vxor 13,2,19 + + .long 0x7DDB2799 + vxor 14,3,20 + + .long 0x7DFC2799 + vxor 15,4,21 + + .long 0x7D7D2799 + vxor 16,5,22 + addi 4,4,0x60 + + mtctr 9 + beq .Loop_xts_enc6x + + xxlor 32+10, 2, 2 + + addic. 5,5,0x60 + beq .Lxts_enc6x_zero + cmpwi 5,0x20 + blt .Lxts_enc6x_one + nop + beq .Lxts_enc6x_two + cmpwi 5,0x40 + blt .Lxts_enc6x_three + nop + beq .Lxts_enc6x_four + +.Lxts_enc6x_five: + vxor 7,1,17 + vxor 12,2,18 + vxor 13,3,19 + vxor 14,4,20 + vxor 15,5,21 + + bl _aesp8_xts_enc5x + + + vor 17,22,22 + + .long 0x7CE02799 + + .long 0x7D832799 + + .long 0x7DBA2799 + vxor 11,15,22 + + .long 0x7DDB2799 + .long 0x7DFC2799 + addi 4,4,0x50 + bne .Lxts_enc6x_steal + b .Lxts_enc6x_done + +.align 4 +.Lxts_enc6x_four: + vxor 7,2,17 + vxor 12,3,18 + vxor 13,4,19 + vxor 14,5,20 + vxor 15,15,15 + + bl _aesp8_xts_enc5x + + + vor 17,21,21 + + .long 0x7CE02799 + + .long 0x7D832799 + vxor 11,14,21 + + .long 0x7DBA2799 + .long 0x7DDB2799 + addi 4,4,0x40 + bne .Lxts_enc6x_steal + b .Lxts_enc6x_done + +.align 4 +.Lxts_enc6x_three: + vxor 7,3,17 + vxor 12,4,18 + vxor 13,5,19 + vxor 14,14,14 + vxor 15,15,15 + + bl _aesp8_xts_enc5x + + + vor 17,20,20 + + .long 0x7CE02799 + vxor 11,13,20 + + .long 0x7D832799 + .long 0x7DBA2799 + addi 4,4,0x30 + bne .Lxts_enc6x_steal + b .Lxts_enc6x_done + +.align 4 +.Lxts_enc6x_two: + vxor 7,4,17 + vxor 12,5,18 + vxor 13,13,13 + vxor 14,14,14 + vxor 15,15,15 + + bl _aesp8_xts_enc5x + + + vor 17,19,19 + vxor 11,12,19 + + .long 0x7CE02799 + .long 0x7D832799 + addi 4,4,0x20 + bne .Lxts_enc6x_steal + b .Lxts_enc6x_done + +.align 4 +.Lxts_enc6x_one: + vxor 7,5,17 + nop +.Loop_xts_enc1x: + .long 0x10E7C508 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD08 + lvx 25,3,7 + bdnz .Loop_xts_enc1x + + add 10,10,31 + cmpwi 31,0 + .long 0x10E7C508 + + subi 10,10,16 + .long 0x10E7CD08 + + lvsr 5,0,31 + .long 0x10E7D508 + + .long 0x7C005699 + .long 0x10E7DD08 + + addi 7,1,64+15 + .long 0x10E7E508 + lvx 24,0,7 + + .long 0x10E7ED08 + lvx 25,3,7 + vxor 17,17,31 + + + .long 0x10E7F508 + + vperm 0,0,0,5 + .long 0x10E78D09 + + vor 17,18,18 + vxor 11,7,18 + + .long 0x7CE02799 + addi 4,4,0x10 + bne .Lxts_enc6x_steal + b .Lxts_enc6x_done + +.align 4 +.Lxts_enc6x_zero: + cmpwi 31,0 + beq .Lxts_enc6x_done + + add 10,10,31 + subi 10,10,16 + .long 0x7C005699 + lvsr 5,0,31 + + vperm 0,0,0,5 + vxor 11,11,17 +.Lxts_enc6x_steal: + vxor 0,0,17 + vxor 7,7,7 + vspltisb 12,-1 + vperm 7,7,12,5 + vsel 7,0,11,7 + + subi 30,4,17 + subi 4,4,16 + mtctr 31 +.Loop_xts_enc6x_steal: + lbzu 0,1(30) + stb 0,16(30) + bdnz .Loop_xts_enc6x_steal + + li 31,0 + mtctr 9 + b .Loop_xts_enc1x + +.align 4 +.Lxts_enc6x_done: + cmpldi 8,0 + beq .Lxts_enc6x_ret + + vxor 8,17,23 + + .long 0x7D004799 + +.Lxts_enc6x_ret: + mtlr 11 + li 10,79 + li 11,95 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + + or 12,12,12 + lvx 20,10,1 + addi 10,10,32 + lvx 21,11,1 + addi 11,11,32 + lvx 22,10,1 + addi 10,10,32 + lvx 23,11,1 + addi 11,11,32 + lvx 24,10,1 + addi 10,10,32 + lvx 25,11,1 + addi 11,11,32 + lvx 26,10,1 + addi 10,10,32 + lvx 27,11,1 + addi 11,11,32 + lvx 28,10,1 + addi 10,10,32 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + ld 26,400(1) + ld 27,408(1) + ld 28,416(1) + ld 29,424(1) + ld 30,432(1) + ld 31,440(1) + addi 1,1,448 + blr +.long 0 +.byte 0,12,0x04,1,0x80,6,6,0 +.long 0 + +.align 5 +_aesp8_xts_enc5x: + .long 0x10E7C508 + .long 0x118CC508 + .long 0x11ADC508 + .long 0x11CEC508 + .long 0x11EFC508 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD08 + .long 0x118CCD08 + .long 0x11ADCD08 + .long 0x11CECD08 + .long 0x11EFCD08 + lvx 25,3,7 + bdnz _aesp8_xts_enc5x + + add 10,10,31 + cmpwi 31,0 + .long 0x10E7C508 + .long 0x118CC508 + .long 0x11ADC508 + .long 0x11CEC508 + .long 0x11EFC508 + + subi 10,10,16 + .long 0x10E7CD08 + .long 0x118CCD08 + .long 0x11ADCD08 + .long 0x11CECD08 + .long 0x11EFCD08 + vxor 17,17,31 + + .long 0x10E7D508 + lvsr 5,0,31 + .long 0x118CD508 + .long 0x11ADD508 + .long 0x11CED508 + .long 0x11EFD508 + vxor 1,18,31 + + .long 0x10E7DD08 + .long 0x7C005699 + .long 0x118CDD08 + .long 0x11ADDD08 + .long 0x11CEDD08 + .long 0x11EFDD08 + vxor 2,19,31 + + addi 7,1,64+15 + .long 0x10E7E508 + .long 0x118CE508 + .long 0x11ADE508 + .long 0x11CEE508 + .long 0x11EFE508 + lvx 24,0,7 + vxor 3,20,31 + + .long 0x10E7ED08 + + .long 0x118CED08 + .long 0x11ADED08 + .long 0x11CEED08 + .long 0x11EFED08 + lvx 25,3,7 + vxor 4,21,31 + + .long 0x10E7F508 + vperm 0,0,0,5 + .long 0x118CF508 + .long 0x11ADF508 + .long 0x11CEF508 + .long 0x11EFF508 + + .long 0x10E78D09 + .long 0x118C0D09 + .long 0x11AD1509 + .long 0x11CE1D09 + .long 0x11EF2509 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 + +.align 5 +_aesp8_xts_decrypt6x: + stdu 1,-448(1) + mflr 11 + li 7,207 + li 3,223 + std 11,464(1) + stvx 20,7,1 + addi 7,7,32 + stvx 21,3,1 + addi 3,3,32 + stvx 22,7,1 + addi 7,7,32 + stvx 23,3,1 + addi 3,3,32 + stvx 24,7,1 + addi 7,7,32 + stvx 25,3,1 + addi 3,3,32 + stvx 26,7,1 + addi 7,7,32 + stvx 27,3,1 + addi 3,3,32 + stvx 28,7,1 + addi 7,7,32 + stvx 29,3,1 + addi 3,3,32 + stvx 30,7,1 + stvx 31,3,1 + li 0,-1 + stw 12,396(1) + li 3,0x10 + std 26,400(1) + li 26,0x20 + std 27,408(1) + li 27,0x30 + std 28,416(1) + li 28,0x40 + std 29,424(1) + li 29,0x50 + std 30,432(1) + li 30,0x60 + std 31,440(1) + li 31,0x70 + or 0,0,0 + + + xxlor 2, 32+10, 32+10 + vsldoi 10,11,10,1 + xxlor 1, 32+10, 32+10 + + + mr 31, 6 + bl .Lconsts + lxvw4x 0, 28, 6 + mr 6, 31 + li 31,0x70 + + subi 9,9,3 + + lvx 23,0,6 + lvx 30,3,6 + addi 6,6,0x20 + lvx 31,0,6 + vperm 23,23,30,7 + addi 7,1,64+15 + mtctr 9 + +.Load_xts_dec_key: + vperm 24,30,31,7 + lvx 30,3,6 + addi 6,6,0x20 + stvx 24,0,7 + vperm 25,31,30,7 + lvx 31,0,6 + stvx 25,3,7 + addi 7,7,0x20 + bdnz .Load_xts_dec_key + + lvx 26,3,6 + vperm 24,30,31,7 + lvx 27,26,6 + stvx 24,0,7 + vperm 25,31,26,7 + lvx 28,27,6 + stvx 25,3,7 + addi 7,1,64+15 + vperm 26,26,27,7 + lvx 29,28,6 + vperm 27,27,28,7 + lvx 30,29,6 + vperm 28,28,29,7 + lvx 31,30,6 + vperm 29,29,30,7 + lvx 22,31,6 + vperm 30,30,31,7 + lvx 24,0,7 + vperm 31,31,22,7 + lvx 25,3,7 + + vperm 0,2,4,5 + subi 10,10,31 + vxor 17,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + vand 11,11,10 + vxor 7,0,17 + xxlor 32+1, 0, 0 + vpermxor 8, 8, 11, 1 + + .long 0x7C235699 + vxor 18,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + + vand 11,11,10 + vxor 12,1,18 + xxlor 32+2, 0, 0 + vpermxor 8, 8, 11, 2 + + .long 0x7C5A5699 + andi. 31,5,15 + vxor 19,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + + vand 11,11,10 + vxor 13,2,19 + xxlor 32+3, 0, 0 + vpermxor 8, 8, 11, 3 + + .long 0x7C7B5699 + sub 5,5,31 + vxor 20,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + + vand 11,11,10 + vxor 14,3,20 + xxlor 32+4, 0, 0 + vpermxor 8, 8, 11, 4 + + .long 0x7C9C5699 + subi 5,5,0x60 + vxor 21,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + + vand 11,11,10 + vxor 15,4,21 + xxlor 32+5, 0, 0 + vpermxor 8, 8, 11, 5 + + .long 0x7CBD5699 + addi 10,10,0x60 + vxor 22,8,23 + vsrab 11,8,9 + vaddubm 8,8,8 + + vand 11,11,10 + vxor 16,5,22 + xxlor 32+0, 0, 0 + vpermxor 8, 8, 11, 0 + + vxor 31,31,23 + mtctr 9 + b .Loop_xts_dec6x + +.align 5 +.Loop_xts_dec6x: + .long 0x10E7C548 + .long 0x118CC548 + .long 0x11ADC548 + .long 0x11CEC548 + .long 0x11EFC548 + .long 0x1210C548 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD48 + .long 0x118CCD48 + .long 0x11ADCD48 + .long 0x11CECD48 + .long 0x11EFCD48 + .long 0x1210CD48 + lvx 25,3,7 + bdnz .Loop_xts_dec6x + + xxlor 32+10, 1, 1 + + subic 5,5,96 + vxor 0,17,31 + .long 0x10E7C548 + .long 0x118CC548 + vsrab 11,8,9 + vxor 17,8,23 + vaddubm 8,8,8 + .long 0x11ADC548 + .long 0x11CEC548 + .long 0x11EFC548 + .long 0x1210C548 + + subfe. 0,0,0 + vand 11,11,10 + .long 0x10E7CD48 + .long 0x118CCD48 + xxlor 32+1, 0, 0 + vpermxor 8, 8, 11, 1 + .long 0x11ADCD48 + .long 0x11CECD48 + vxor 1,18,31 + vsrab 11,8,9 + vxor 18,8,23 + .long 0x11EFCD48 + .long 0x1210CD48 + + and 0,0,5 + vaddubm 8,8,8 + .long 0x10E7D548 + .long 0x118CD548 + vand 11,11,10 + .long 0x11ADD548 + .long 0x11CED548 + xxlor 32+2, 0, 0 + vpermxor 8, 8, 11, 2 + .long 0x11EFD548 + .long 0x1210D548 + + add 10,10,0 + + + + vxor 2,19,31 + vsrab 11,8,9 + vxor 19,8,23 + vaddubm 8,8,8 + .long 0x10E7DD48 + .long 0x118CDD48 + .long 0x11ADDD48 + .long 0x11CEDD48 + vand 11,11,10 + .long 0x11EFDD48 + .long 0x1210DD48 + + addi 7,1,64+15 + xxlor 32+3, 0, 0 + vpermxor 8, 8, 11, 3 + .long 0x10E7E548 + .long 0x118CE548 + vxor 3,20,31 + vsrab 11,8,9 + vxor 20,8,23 + .long 0x11ADE548 + .long 0x11CEE548 + vaddubm 8,8,8 + .long 0x11EFE548 + .long 0x1210E548 + lvx 24,0,7 + vand 11,11,10 + + .long 0x10E7ED48 + .long 0x118CED48 + xxlor 32+4, 0, 0 + vpermxor 8, 8, 11, 4 + .long 0x11ADED48 + .long 0x11CEED48 + vxor 4,21,31 + vsrab 11,8,9 + vxor 21,8,23 + .long 0x11EFED48 + .long 0x1210ED48 + lvx 25,3,7 + vaddubm 8,8,8 + + .long 0x10E7F548 + .long 0x118CF548 + vand 11,11,10 + .long 0x11ADF548 + .long 0x11CEF548 + xxlor 32+5, 0, 0 + vpermxor 8, 8, 11, 5 + .long 0x11EFF548 + .long 0x1210F548 + vxor 5,22,31 + vsrab 11,8,9 + vxor 22,8,23 + + .long 0x10E70549 + .long 0x7C005699 + vaddubm 8,8,8 + .long 0x118C0D49 + .long 0x7C235699 + .long 0x11AD1549 + + .long 0x7C5A5699 + vand 11,11,10 + .long 0x11CE1D49 + + .long 0x7C7B5699 + .long 0x11EF2549 + + .long 0x7C9C5699 + xxlor 10, 32+0, 32+0 + xxlor 32+0, 0, 0 + vpermxor 8, 8, 11, 0 + xxlor 32+0, 10, 10 + .long 0x12102D49 + + .long 0x7CBD5699 + addi 10,10,0x60 + + + + + + .long 0x7CE02799 + vxor 7,0,17 + + .long 0x7D832799 + vxor 12,1,18 + + .long 0x7DBA2799 + vxor 13,2,19 + + .long 0x7DDB2799 + vxor 14,3,20 + + .long 0x7DFC2799 + vxor 15,4,21 + .long 0x7E1D2799 + vxor 16,5,22 + addi 4,4,0x60 + + mtctr 9 + beq .Loop_xts_dec6x + + xxlor 32+10, 2, 2 + + addic. 5,5,0x60 + beq .Lxts_dec6x_zero + cmpwi 5,0x20 + blt .Lxts_dec6x_one + nop + beq .Lxts_dec6x_two + cmpwi 5,0x40 + blt .Lxts_dec6x_three + nop + beq .Lxts_dec6x_four + +.Lxts_dec6x_five: + vxor 7,1,17 + vxor 12,2,18 + vxor 13,3,19 + vxor 14,4,20 + vxor 15,5,21 + + bl _aesp8_xts_dec5x + + + vor 17,22,22 + vxor 18,8,23 + + .long 0x7CE02799 + vxor 7,0,18 + + .long 0x7D832799 + + .long 0x7DBA2799 + + .long 0x7DDB2799 + .long 0x7DFC2799 + addi 4,4,0x50 + bne .Lxts_dec6x_steal + b .Lxts_dec6x_done + +.align 4 +.Lxts_dec6x_four: + vxor 7,2,17 + vxor 12,3,18 + vxor 13,4,19 + vxor 14,5,20 + vxor 15,15,15 + + bl _aesp8_xts_dec5x + + + vor 17,21,21 + vor 18,22,22 + + .long 0x7CE02799 + vxor 7,0,22 + + .long 0x7D832799 + + .long 0x7DBA2799 + .long 0x7DDB2799 + addi 4,4,0x40 + bne .Lxts_dec6x_steal + b .Lxts_dec6x_done + +.align 4 +.Lxts_dec6x_three: + vxor 7,3,17 + vxor 12,4,18 + vxor 13,5,19 + vxor 14,14,14 + vxor 15,15,15 + + bl _aesp8_xts_dec5x + + + vor 17,20,20 + vor 18,21,21 + + .long 0x7CE02799 + vxor 7,0,21 + + .long 0x7D832799 + .long 0x7DBA2799 + addi 4,4,0x30 + bne .Lxts_dec6x_steal + b .Lxts_dec6x_done + +.align 4 +.Lxts_dec6x_two: + vxor 7,4,17 + vxor 12,5,18 + vxor 13,13,13 + vxor 14,14,14 + vxor 15,15,15 + + bl _aesp8_xts_dec5x + + + vor 17,19,19 + vor 18,20,20 + + .long 0x7CE02799 + vxor 7,0,20 + .long 0x7D832799 + addi 4,4,0x20 + bne .Lxts_dec6x_steal + b .Lxts_dec6x_done + +.align 4 +.Lxts_dec6x_one: + vxor 7,5,17 + nop +.Loop_xts_dec1x: + .long 0x10E7C548 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD48 + lvx 25,3,7 + bdnz .Loop_xts_dec1x + + subi 0,31,1 + .long 0x10E7C548 + + andi. 0,0,16 + cmpwi 31,0 + .long 0x10E7CD48 + + sub 10,10,0 + .long 0x10E7D548 + + .long 0x7C005699 + .long 0x10E7DD48 + + addi 7,1,64+15 + .long 0x10E7E548 + lvx 24,0,7 + + .long 0x10E7ED48 + lvx 25,3,7 + vxor 17,17,31 + + + .long 0x10E7F548 + + mtctr 9 + .long 0x10E78D49 + + vor 17,18,18 + vor 18,19,19 + + .long 0x7CE02799 + addi 4,4,0x10 + vxor 7,0,19 + bne .Lxts_dec6x_steal + b .Lxts_dec6x_done + +.align 4 +.Lxts_dec6x_zero: + cmpwi 31,0 + beq .Lxts_dec6x_done + + .long 0x7C005699 + + vxor 7,0,18 +.Lxts_dec6x_steal: + .long 0x10E7C548 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD48 + lvx 25,3,7 + bdnz .Lxts_dec6x_steal + + add 10,10,31 + .long 0x10E7C548 + + cmpwi 31,0 + .long 0x10E7CD48 + + .long 0x7C005699 + .long 0x10E7D548 + + lvsr 5,0,31 + .long 0x10E7DD48 + + addi 7,1,64+15 + .long 0x10E7E548 + lvx 24,0,7 + + .long 0x10E7ED48 + lvx 25,3,7 + vxor 18,18,31 + + + .long 0x10E7F548 + + vperm 0,0,0,5 + .long 0x11679549 + + + + .long 0x7D602799 + + vxor 7,7,7 + vspltisb 12,-1 + vperm 7,7,12,5 + vsel 7,0,11,7 + vxor 7,7,17 + + subi 30,4,1 + mtctr 31 +.Loop_xts_dec6x_steal: + lbzu 0,1(30) + stb 0,16(30) + bdnz .Loop_xts_dec6x_steal + + li 31,0 + mtctr 9 + b .Loop_xts_dec1x + +.align 4 +.Lxts_dec6x_done: + cmpldi 8,0 + beq .Lxts_dec6x_ret + + vxor 8,17,23 + + .long 0x7D004799 + +.Lxts_dec6x_ret: + mtlr 11 + li 10,79 + li 11,95 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + stvx 9,10,1 + addi 10,10,32 + stvx 9,11,1 + addi 11,11,32 + + or 12,12,12 + lvx 20,10,1 + addi 10,10,32 + lvx 21,11,1 + addi 11,11,32 + lvx 22,10,1 + addi 10,10,32 + lvx 23,11,1 + addi 11,11,32 + lvx 24,10,1 + addi 10,10,32 + lvx 25,11,1 + addi 11,11,32 + lvx 26,10,1 + addi 10,10,32 + lvx 27,11,1 + addi 11,11,32 + lvx 28,10,1 + addi 10,10,32 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + ld 26,400(1) + ld 27,408(1) + ld 28,416(1) + ld 29,424(1) + ld 30,432(1) + ld 31,440(1) + addi 1,1,448 + blr +.long 0 +.byte 0,12,0x04,1,0x80,6,6,0 +.long 0 + +.align 5 +_aesp8_xts_dec5x: + .long 0x10E7C548 + .long 0x118CC548 + .long 0x11ADC548 + .long 0x11CEC548 + .long 0x11EFC548 + lvx 24,26,7 + addi 7,7,0x20 + + .long 0x10E7CD48 + .long 0x118CCD48 + .long 0x11ADCD48 + .long 0x11CECD48 + .long 0x11EFCD48 + lvx 25,3,7 + bdnz _aesp8_xts_dec5x + + subi 0,31,1 + .long 0x10E7C548 + .long 0x118CC548 + .long 0x11ADC548 + .long 0x11CEC548 + .long 0x11EFC548 + + andi. 0,0,16 + cmpwi 31,0 + .long 0x10E7CD48 + .long 0x118CCD48 + .long 0x11ADCD48 + .long 0x11CECD48 + .long 0x11EFCD48 + vxor 17,17,31 + + sub 10,10,0 + .long 0x10E7D548 + .long 0x118CD548 + .long 0x11ADD548 + .long 0x11CED548 + .long 0x11EFD548 + vxor 1,18,31 + + .long 0x10E7DD48 + .long 0x7C005699 + .long 0x118CDD48 + .long 0x11ADDD48 + .long 0x11CEDD48 + .long 0x11EFDD48 + vxor 2,19,31 + + addi 7,1,64+15 + .long 0x10E7E548 + .long 0x118CE548 + .long 0x11ADE548 + .long 0x11CEE548 + .long 0x11EFE548 + lvx 24,0,7 + vxor 3,20,31 + + .long 0x10E7ED48 + + .long 0x118CED48 + .long 0x11ADED48 + .long 0x11CEED48 + .long 0x11EFED48 + lvx 25,3,7 + vxor 4,21,31 + + .long 0x10E7F548 + .long 0x118CF548 + .long 0x11ADF548 + .long 0x11CEF548 + .long 0x11EFF548 + + .long 0x10E78D49 + .long 0x118C0D49 + .long 0x11AD1549 + .long 0x11CE1D49 + .long 0x11EF2549 + mtctr 9 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 diff --git a/contrib/openssl-cmake/asm/crypto/aes/aesv8-armx.S b/contrib/openssl-cmake/asm/crypto/aes/aesv8-armx.S new file mode 100644 index 000000000000..a5f1ed7ac942 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/aes/aesv8-armx.S @@ -0,0 +1,3916 @@ +#include "arm_arch.h" + +#if __ARM_MAX_ARCH__>=7 +.arch armv8-a+crypto +.text +.section .rodata +.align 5 +.Lrcon: +.long 0x01,0x01,0x01,0x01 +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat +.long 0x1b,0x1b,0x1b,0x1b +.previous +.globl aes_v8_set_encrypt_key +.type aes_v8_set_encrypt_key,%function +.align 5 +aes_v8_set_encrypt_key: +.Lenc_key: + AARCH64_VALID_CALL_TARGET + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + mov x3,#-1 + cmp x0,#0 + b.eq .Lenc_key_abort + cmp x2,#0 + b.eq .Lenc_key_abort + mov x3,#-2 + cmp w1,#128 + b.lt .Lenc_key_abort + cmp w1,#256 + b.gt .Lenc_key_abort + tst w1,#0x3f + b.ne .Lenc_key_abort + + adrp x3,.Lrcon + add x3,x3,#:lo12:.Lrcon + cmp w1,#192 + + eor v0.16b,v0.16b,v0.16b + ld1 {v3.16b},[x0],#16 + mov w1,#8 // reuse w1 + ld1 {v1.4s,v2.4s},[x3],#32 + + b.lt .Loop128 + b.eq .L192 + b .L256 + +.align 4 +.Loop128: + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + b.ne .Loop128 + + ld1 {v1.4s},[x3] + + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + eor v3.16b,v3.16b,v6.16b + st1 {v3.4s},[x2] + add x2,x2,#0x50 + + mov w12,#10 + b .Ldone + +.align 4 +.L192: + ld1 {v4.8b},[x0],#8 + movi v6.16b,#8 // borrow v6.16b + st1 {v3.4s},[x2],#16 + sub v2.16b,v2.16b,v6.16b // adjust the mask + +.Loop192: + tbl v6.16b,{v4.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 +#ifdef __AARCH64EB__ + st1 {v4.4s},[x2],#16 + sub x2,x2,#8 +#else + st1 {v4.8b},[x2],#8 +#endif + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + + dup v5.4s,v3.s[3] + eor v5.16b,v5.16b,v4.16b + eor v6.16b,v6.16b,v1.16b + ext v4.16b,v0.16b,v4.16b,#12 + shl v1.16b,v1.16b,#1 + eor v4.16b,v4.16b,v5.16b + eor v3.16b,v3.16b,v6.16b + eor v4.16b,v4.16b,v6.16b + st1 {v3.4s},[x2],#16 + b.ne .Loop192 + + mov w12,#12 + add x2,x2,#0x20 + b .Ldone + +.align 4 +.L256: + ld1 {v4.16b},[x0] + mov w1,#7 + mov w12,#14 + st1 {v3.4s},[x2],#16 + +.Loop256: + tbl v6.16b,{v4.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v4.4s},[x2],#16 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + st1 {v3.4s},[x2],#16 + b.eq .Ldone + + dup v6.4s,v3.s[3] // just splat + ext v5.16b,v0.16b,v4.16b,#12 + aese v6.16b,v0.16b + + eor v4.16b,v4.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v4.16b,v4.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v4.16b,v4.16b,v5.16b + + eor v4.16b,v4.16b,v6.16b + b .Loop256 + +.Ldone: + str w12,[x2] + mov x3,#0 + +.Lenc_key_abort: + mov x0,x3 // return value + ldr x29,[sp],#16 + ret +.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key + +.globl aes_v8_set_decrypt_key +.type aes_v8_set_decrypt_key,%function +.align 5 +aes_v8_set_decrypt_key: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + bl .Lenc_key + + cmp x0,#0 + b.ne .Ldec_key_abort + + sub x2,x2,#240 // restore original x2 + mov x4,#-16 + add x0,x2,x12,lsl#4 // end of key schedule + + ld1 {v0.4s},[x2] + ld1 {v1.4s},[x0] + st1 {v0.4s},[x0],x4 + st1 {v1.4s},[x2],#16 + +.Loop_imc: + ld1 {v0.4s},[x2] + ld1 {v1.4s},[x0] + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + st1 {v0.4s},[x0],x4 + st1 {v1.4s},[x2],#16 + cmp x0,x2 + b.hi .Loop_imc + + ld1 {v0.4s},[x2] + aesimc v0.16b,v0.16b + st1 {v0.4s},[x0] + + eor x0,x0,x0 // return value +.Ldec_key_abort: + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key +.globl aes_v8_encrypt +.type aes_v8_encrypt,%function +.align 5 +aes_v8_encrypt: + AARCH64_VALID_CALL_TARGET + ldr w3,[x2,#240] + ld1 {v0.4s},[x2],#16 + ld1 {v2.16b},[x0] + sub w3,w3,#2 + ld1 {v1.4s},[x2],#16 + +.Loop_enc: + aese v2.16b,v0.16b + aesmc v2.16b,v2.16b + ld1 {v0.4s},[x2],#16 + subs w3,w3,#2 + aese v2.16b,v1.16b + aesmc v2.16b,v2.16b + ld1 {v1.4s},[x2],#16 + b.gt .Loop_enc + + aese v2.16b,v0.16b + aesmc v2.16b,v2.16b + ld1 {v0.4s},[x2] + aese v2.16b,v1.16b + eor v2.16b,v2.16b,v0.16b + + st1 {v2.16b},[x1] + ret +.size aes_v8_encrypt,.-aes_v8_encrypt +.globl aes_v8_decrypt +.type aes_v8_decrypt,%function +.align 5 +aes_v8_decrypt: + AARCH64_VALID_CALL_TARGET + ldr w3,[x2,#240] + ld1 {v0.4s},[x2],#16 + ld1 {v2.16b},[x0] + sub w3,w3,#2 + ld1 {v1.4s},[x2],#16 + +.Loop_dec: + aesd v2.16b,v0.16b + aesimc v2.16b,v2.16b + ld1 {v0.4s},[x2],#16 + subs w3,w3,#2 + aesd v2.16b,v1.16b + aesimc v2.16b,v2.16b + ld1 {v1.4s},[x2],#16 + b.gt .Loop_dec + + aesd v2.16b,v0.16b + aesimc v2.16b,v2.16b + ld1 {v0.4s},[x2] + aesd v2.16b,v1.16b + eor v2.16b,v2.16b,v0.16b + + st1 {v2.16b},[x1] + ret +.size aes_v8_decrypt,.-aes_v8_decrypt +.globl aes_v8_ecb_encrypt +.type aes_v8_ecb_encrypt,%function +.align 5 +aes_v8_ecb_encrypt: + AARCH64_VALID_CALL_TARGET + subs x2,x2,#16 + // Original input data size bigger than 16, jump to big size processing. + b.ne .Lecb_big_size + ld1 {v0.16b},[x0] + cmp w4,#0 // en- or decrypting? + ldr w5,[x3,#240] + ld1 {v5.4s,v6.4s},[x3],#32 // load key schedule... + + b.eq .Lecb_small_dec + aese v0.16b,v5.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule... + aese v0.16b,v6.16b + aesmc v0.16b,v0.16b + subs w5,w5,#10 // if rounds==10, jump to aes-128-ecb processing + b.eq .Lecb_128_enc +.Lecb_round_loop: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s},[x3],#16 // load key schedule... + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x3],#16 // load key schedule... + subs w5,w5,#2 // bias + b.gt .Lecb_round_loop +.Lecb_128_enc: + ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule... + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule... + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule... + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + ld1 {v7.4s},[x3] + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v0.16b,v23.16b + eor v0.16b,v0.16b,v7.16b + st1 {v0.16b},[x1] + b .Lecb_Final_abort +.Lecb_small_dec: + aesd v0.16b,v5.16b + aesimc v0.16b,v0.16b + ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule... + aesd v0.16b,v6.16b + aesimc v0.16b,v0.16b + subs w5,w5,#10 // bias + b.eq .Lecb_128_dec +.Lecb_dec_round_loop: + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + ld1 {v16.4s},[x3],#16 // load key schedule... + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + ld1 {v17.4s},[x3],#16 // load key schedule... + subs w5,w5,#2 // bias + b.gt .Lecb_dec_round_loop +.Lecb_128_dec: + ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule... + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule... + aesd v0.16b,v18.16b + aesimc v0.16b,v0.16b + aesd v0.16b,v19.16b + aesimc v0.16b,v0.16b + ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule... + aesd v0.16b,v20.16b + aesimc v0.16b,v0.16b + aesd v0.16b,v21.16b + aesimc v0.16b,v0.16b + ld1 {v7.4s},[x3] + aesd v0.16b,v22.16b + aesimc v0.16b,v0.16b + aesd v0.16b,v23.16b + eor v0.16b,v0.16b,v7.16b + st1 {v0.16b},[x1] + b .Lecb_Final_abort +.Lecb_big_size: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + mov x8,#16 + b.lo .Lecb_done + csel x8,xzr,x8,eq + + cmp w4,#0 // en- or decrypting? + ldr w5,[x3,#240] + and x2,x2,#-16 + ld1 {v0.16b},[x0],x8 + + ld1 {v16.4s,v17.4s},[x3] // load key schedule... + sub w5,w5,#6 + add x7,x3,x5,lsl#4 // pointer to last 7 round keys + sub w5,w5,#2 + ld1 {v18.4s,v19.4s},[x7],#32 + ld1 {v20.4s,v21.4s},[x7],#32 + ld1 {v22.4s,v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + + add x7,x3,#32 + mov w6,w5 + b.eq .Lecb_dec + + ld1 {v1.16b},[x0],#16 + subs x2,x2,#32 // bias + add w6,w5,#2 + orr v3.16b,v1.16b,v1.16b + orr v24.16b,v1.16b,v1.16b + orr v1.16b,v0.16b,v0.16b + b.lo .Lecb_enc_tail + + orr v1.16b,v3.16b,v3.16b + ld1 {v24.16b},[x0],#16 + cmp x2,#32 + b.lo .Loop3x_ecb_enc + + ld1 {v25.16b},[x0],#16 + ld1 {v26.16b},[x0],#16 + sub x2,x2,#32 // bias + mov w6,w5 + +.Loop5x_ecb_enc: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + aese v25.16b,v16.16b + aesmc v25.16b,v25.16b + aese v26.16b,v16.16b + aesmc v26.16b,v26.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + aese v25.16b,v17.16b + aesmc v25.16b,v25.16b + aese v26.16b,v17.16b + aesmc v26.16b,v26.16b + ld1 {v17.4s},[x7],#16 + b.gt .Loop5x_ecb_enc + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + aese v25.16b,v16.16b + aesmc v25.16b,v25.16b + aese v26.16b,v16.16b + aesmc v26.16b,v26.16b + cmp x2,#0x40 // because .Lecb_enc_tail4x + sub x2,x2,#0x50 + + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + aese v25.16b,v17.16b + aesmc v25.16b,v25.16b + aese v26.16b,v17.16b + aesmc v26.16b,v26.16b + csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo + mov x7,x3 + + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + aese v1.16b,v18.16b + aesmc v1.16b,v1.16b + aese v24.16b,v18.16b + aesmc v24.16b,v24.16b + aese v25.16b,v18.16b + aesmc v25.16b,v25.16b + aese v26.16b,v18.16b + aesmc v26.16b,v26.16b + add x0,x0,x6 // x0 is adjusted in such way that + // at exit from the loop v1.16b-v26.16b + // are loaded with last "words" + add x6,x2,#0x60 // because .Lecb_enc_tail4x + + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + aese v1.16b,v19.16b + aesmc v1.16b,v1.16b + aese v24.16b,v19.16b + aesmc v24.16b,v24.16b + aese v25.16b,v19.16b + aesmc v25.16b,v25.16b + aese v26.16b,v19.16b + aesmc v26.16b,v26.16b + + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v1.16b,v20.16b + aesmc v1.16b,v1.16b + aese v24.16b,v20.16b + aesmc v24.16b,v24.16b + aese v25.16b,v20.16b + aesmc v25.16b,v25.16b + aese v26.16b,v20.16b + aesmc v26.16b,v26.16b + + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v1.16b,v21.16b + aesmc v1.16b,v1.16b + aese v24.16b,v21.16b + aesmc v24.16b,v24.16b + aese v25.16b,v21.16b + aesmc v25.16b,v25.16b + aese v26.16b,v21.16b + aesmc v26.16b,v26.16b + + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v1.16b,v22.16b + aesmc v1.16b,v1.16b + aese v24.16b,v22.16b + aesmc v24.16b,v24.16b + aese v25.16b,v22.16b + aesmc v25.16b,v25.16b + aese v26.16b,v22.16b + aesmc v26.16b,v26.16b + + aese v0.16b,v23.16b + ld1 {v2.16b},[x0],#16 + aese v1.16b,v23.16b + ld1 {v3.16b},[x0],#16 + aese v24.16b,v23.16b + ld1 {v27.16b},[x0],#16 + aese v25.16b,v23.16b + ld1 {v28.16b},[x0],#16 + aese v26.16b,v23.16b + ld1 {v29.16b},[x0],#16 + cbz x6,.Lecb_enc_tail4x + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + eor v4.16b,v7.16b,v0.16b + orr v0.16b,v2.16b,v2.16b + eor v5.16b,v7.16b,v1.16b + orr v1.16b,v3.16b,v3.16b + eor v17.16b,v7.16b,v24.16b + orr v24.16b,v27.16b,v27.16b + eor v30.16b,v7.16b,v25.16b + orr v25.16b,v28.16b,v28.16b + eor v31.16b,v7.16b,v26.16b + st1 {v4.16b},[x1],#16 + orr v26.16b,v29.16b,v29.16b + st1 {v5.16b},[x1],#16 + mov w6,w5 + st1 {v17.16b},[x1],#16 + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v30.16b},[x1],#16 + st1 {v31.16b},[x1],#16 + b.hs .Loop5x_ecb_enc + + add x2,x2,#0x50 + cbz x2,.Lecb_done + + add w6,w5,#2 + subs x2,x2,#0x30 + orr v0.16b,v27.16b,v27.16b + orr v1.16b,v28.16b,v28.16b + orr v24.16b,v29.16b,v29.16b + b.lo .Lecb_enc_tail + + b .Loop3x_ecb_enc + +.align 4 +.Lecb_enc_tail4x: + eor v5.16b,v7.16b,v1.16b + eor v17.16b,v7.16b,v24.16b + eor v30.16b,v7.16b,v25.16b + eor v31.16b,v7.16b,v26.16b + st1 {v5.16b},[x1],#16 + st1 {v17.16b},[x1],#16 + st1 {v30.16b},[x1],#16 + st1 {v31.16b},[x1],#16 + + b .Lecb_done +.align 4 +.Loop3x_ecb_enc: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + ld1 {v17.4s},[x7],#16 + b.gt .Loop3x_ecb_enc + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + subs x2,x2,#0x30 + csel x6,x2,x6,lo // x6, w6, is zero at this point + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + add x0,x0,x6 // x0 is adjusted in such way that + // at exit from the loop v1.16b-v24.16b + // are loaded with last "words" + mov x7,x3 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v1.16b,v20.16b + aesmc v1.16b,v1.16b + aese v24.16b,v20.16b + aesmc v24.16b,v24.16b + ld1 {v2.16b},[x0],#16 + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v1.16b,v21.16b + aesmc v1.16b,v1.16b + aese v24.16b,v21.16b + aesmc v24.16b,v24.16b + ld1 {v3.16b},[x0],#16 + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v1.16b,v22.16b + aesmc v1.16b,v1.16b + aese v24.16b,v22.16b + aesmc v24.16b,v24.16b + ld1 {v27.16b},[x0],#16 + aese v0.16b,v23.16b + aese v1.16b,v23.16b + aese v24.16b,v23.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + add w6,w5,#2 + eor v4.16b,v7.16b,v0.16b + eor v5.16b,v7.16b,v1.16b + eor v24.16b,v24.16b,v7.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v4.16b},[x1],#16 + orr v0.16b,v2.16b,v2.16b + st1 {v5.16b},[x1],#16 + orr v1.16b,v3.16b,v3.16b + st1 {v24.16b},[x1],#16 + orr v24.16b,v27.16b,v27.16b + b.hs .Loop3x_ecb_enc + + cmn x2,#0x30 + b.eq .Lecb_done + nop + +.Lecb_enc_tail: + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + ld1 {v17.4s},[x7],#16 + b.gt .Lecb_enc_tail + + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + aese v1.16b,v20.16b + aesmc v1.16b,v1.16b + aese v24.16b,v20.16b + aesmc v24.16b,v24.16b + cmn x2,#0x20 + aese v1.16b,v21.16b + aesmc v1.16b,v1.16b + aese v24.16b,v21.16b + aesmc v24.16b,v24.16b + aese v1.16b,v22.16b + aesmc v1.16b,v1.16b + aese v24.16b,v22.16b + aesmc v24.16b,v24.16b + aese v1.16b,v23.16b + aese v24.16b,v23.16b + b.eq .Lecb_enc_one + eor v5.16b,v7.16b,v1.16b + eor v17.16b,v7.16b,v24.16b + st1 {v5.16b},[x1],#16 + st1 {v17.16b},[x1],#16 + b .Lecb_done + +.Lecb_enc_one: + eor v5.16b,v7.16b,v24.16b + st1 {v5.16b},[x1],#16 + b .Lecb_done +.align 5 +.Lecb_dec: + ld1 {v1.16b},[x0],#16 + subs x2,x2,#32 // bias + add w6,w5,#2 + orr v3.16b,v1.16b,v1.16b + orr v24.16b,v1.16b,v1.16b + orr v1.16b,v0.16b,v0.16b + b.lo .Lecb_dec_tail + + orr v1.16b,v3.16b,v3.16b + ld1 {v24.16b},[x0],#16 + cmp x2,#32 + b.lo .Loop3x_ecb_dec + + ld1 {v25.16b},[x0],#16 + ld1 {v26.16b},[x0],#16 + sub x2,x2,#32 // bias + mov w6,w5 + +.Loop5x_ecb_dec: + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v16.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v16.16b + aesimc v26.16b,v26.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v17.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v17.16b + aesimc v26.16b,v26.16b + ld1 {v17.4s},[x7],#16 + b.gt .Loop5x_ecb_dec + + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v16.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v16.16b + aesimc v26.16b,v26.16b + cmp x2,#0x40 // because .Lecb_tail4x + sub x2,x2,#0x50 + + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v17.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v17.16b + aesimc v26.16b,v26.16b + csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo + mov x7,x3 + + aesd v0.16b,v18.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v18.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v18.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v18.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v18.16b + aesimc v26.16b,v26.16b + add x0,x0,x6 // x0 is adjusted in such way that + // at exit from the loop v1.16b-v26.16b + // are loaded with last "words" + add x6,x2,#0x60 // because .Lecb_tail4x + + aesd v0.16b,v19.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v19.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v19.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v19.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v19.16b + aesimc v26.16b,v26.16b + + aesd v0.16b,v20.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v20.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v20.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v20.16b + aesimc v26.16b,v26.16b + + aesd v0.16b,v21.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v21.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v21.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v21.16b + aesimc v26.16b,v26.16b + + aesd v0.16b,v22.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v22.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v22.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v22.16b + aesimc v26.16b,v26.16b + + aesd v0.16b,v23.16b + ld1 {v2.16b},[x0],#16 + aesd v1.16b,v23.16b + ld1 {v3.16b},[x0],#16 + aesd v24.16b,v23.16b + ld1 {v27.16b},[x0],#16 + aesd v25.16b,v23.16b + ld1 {v28.16b},[x0],#16 + aesd v26.16b,v23.16b + ld1 {v29.16b},[x0],#16 + cbz x6,.Lecb_tail4x + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + eor v4.16b,v7.16b,v0.16b + orr v0.16b,v2.16b,v2.16b + eor v5.16b,v7.16b,v1.16b + orr v1.16b,v3.16b,v3.16b + eor v17.16b,v7.16b,v24.16b + orr v24.16b,v27.16b,v27.16b + eor v30.16b,v7.16b,v25.16b + orr v25.16b,v28.16b,v28.16b + eor v31.16b,v7.16b,v26.16b + st1 {v4.16b},[x1],#16 + orr v26.16b,v29.16b,v29.16b + st1 {v5.16b},[x1],#16 + mov w6,w5 + st1 {v17.16b},[x1],#16 + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v30.16b},[x1],#16 + st1 {v31.16b},[x1],#16 + b.hs .Loop5x_ecb_dec + + add x2,x2,#0x50 + cbz x2,.Lecb_done + + add w6,w5,#2 + subs x2,x2,#0x30 + orr v0.16b,v27.16b,v27.16b + orr v1.16b,v28.16b,v28.16b + orr v24.16b,v29.16b,v29.16b + b.lo .Lecb_dec_tail + + b .Loop3x_ecb_dec + +.align 4 +.Lecb_tail4x: + eor v5.16b,v7.16b,v1.16b + eor v17.16b,v7.16b,v24.16b + eor v30.16b,v7.16b,v25.16b + eor v31.16b,v7.16b,v26.16b + st1 {v5.16b},[x1],#16 + st1 {v17.16b},[x1],#16 + st1 {v30.16b},[x1],#16 + st1 {v31.16b},[x1],#16 + + b .Lecb_done +.align 4 +.Loop3x_ecb_dec: + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + ld1 {v17.4s},[x7],#16 + b.gt .Loop3x_ecb_dec + + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + subs x2,x2,#0x30 + csel x6,x2,x6,lo // x6, w6, is zero at this point + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + add x0,x0,x6 // x0 is adjusted in such way that + // at exit from the loop v1.16b-v24.16b + // are loaded with last "words" + mov x7,x3 + aesd v0.16b,v20.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v20.16b + aesimc v24.16b,v24.16b + ld1 {v2.16b},[x0],#16 + aesd v0.16b,v21.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v21.16b + aesimc v24.16b,v24.16b + ld1 {v3.16b},[x0],#16 + aesd v0.16b,v22.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v22.16b + aesimc v24.16b,v24.16b + ld1 {v27.16b},[x0],#16 + aesd v0.16b,v23.16b + aesd v1.16b,v23.16b + aesd v24.16b,v23.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + add w6,w5,#2 + eor v4.16b,v7.16b,v0.16b + eor v5.16b,v7.16b,v1.16b + eor v24.16b,v24.16b,v7.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v4.16b},[x1],#16 + orr v0.16b,v2.16b,v2.16b + st1 {v5.16b},[x1],#16 + orr v1.16b,v3.16b,v3.16b + st1 {v24.16b},[x1],#16 + orr v24.16b,v27.16b,v27.16b + b.hs .Loop3x_ecb_dec + + cmn x2,#0x30 + b.eq .Lecb_done + nop + +.Lecb_dec_tail: + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + ld1 {v17.4s},[x7],#16 + b.gt .Lecb_dec_tail + + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v20.16b + aesimc v24.16b,v24.16b + cmn x2,#0x20 + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v21.16b + aesimc v24.16b,v24.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v22.16b + aesimc v24.16b,v24.16b + aesd v1.16b,v23.16b + aesd v24.16b,v23.16b + b.eq .Lecb_dec_one + eor v5.16b,v7.16b,v1.16b + eor v17.16b,v7.16b,v24.16b + st1 {v5.16b},[x1],#16 + st1 {v17.16b},[x1],#16 + b .Lecb_done + +.Lecb_dec_one: + eor v5.16b,v7.16b,v24.16b + st1 {v5.16b},[x1],#16 + +.Lecb_done: + ldr x29,[sp],#16 +.Lecb_Final_abort: + ret +.size aes_v8_ecb_encrypt,.-aes_v8_ecb_encrypt +.globl aes_v8_cbc_encrypt +.type aes_v8_cbc_encrypt,%function +.align 5 +aes_v8_cbc_encrypt: + AARCH64_VALID_CALL_TARGET + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + subs x2,x2,#16 + mov x8,#16 + b.lo .Lcbc_abort + csel x8,xzr,x8,eq + + cmp w5,#0 // en- or decrypting? + ldr w5,[x3,#240] + and x2,x2,#-16 + ld1 {v6.16b},[x4] + ld1 {v0.16b},[x0],x8 + + ld1 {v16.4s,v17.4s},[x3] // load key schedule... + sub w5,w5,#6 + add x7,x3,x5,lsl#4 // pointer to last 7 round keys + sub w5,w5,#2 + ld1 {v18.4s,v19.4s},[x7],#32 + ld1 {v20.4s,v21.4s},[x7],#32 + ld1 {v22.4s,v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + + add x7,x3,#32 + mov w6,w5 + b.eq .Lcbc_dec + + cmp w5,#2 + eor v0.16b,v0.16b,v6.16b + eor v5.16b,v16.16b,v7.16b + b.eq .Lcbc_enc128 + + ld1 {v2.4s,v3.4s},[x7] + add x7,x3,#16 + add x6,x3,#16*4 + add x12,x3,#16*5 + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + add x14,x3,#16*6 + add x3,x3,#16*7 + b .Lenter_cbc_enc + +.align 4 +.Loop_cbc_enc: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + st1 {v6.16b},[x1],#16 +.Lenter_cbc_enc: + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v0.16b,v2.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s},[x6] + cmp w5,#4 + aese v0.16b,v3.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x12] + b.eq .Lcbc_enc192 + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s},[x14] + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x3] + nop + +.Lcbc_enc192: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + subs x2,x2,#16 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + csel x8,xzr,x8,eq + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + ld1 {v16.16b},[x0],x8 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + eor v16.16b,v16.16b,v5.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x7] // re-pre-load rndkey[1] + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v0.16b,v23.16b + eor v6.16b,v0.16b,v7.16b + b.hs .Loop_cbc_enc + + st1 {v6.16b},[x1],#16 + b .Lcbc_done + +.align 5 +.Lcbc_enc128: + ld1 {v2.4s,v3.4s},[x7] + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + b .Lenter_cbc_enc128 +.Loop_cbc_enc128: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + st1 {v6.16b},[x1],#16 +.Lenter_cbc_enc128: + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + subs x2,x2,#16 + aese v0.16b,v2.16b + aesmc v0.16b,v0.16b + csel x8,xzr,x8,eq + aese v0.16b,v3.16b + aesmc v0.16b,v0.16b + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + ld1 {v16.16b},[x0],x8 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + eor v16.16b,v16.16b,v5.16b + aese v0.16b,v23.16b + eor v6.16b,v0.16b,v7.16b + b.hs .Loop_cbc_enc128 + + st1 {v6.16b},[x1],#16 + b .Lcbc_done +.align 5 +.Lcbc_dec: + ld1 {v24.16b},[x0],#16 + subs x2,x2,#32 // bias + add w6,w5,#2 + orr v3.16b,v0.16b,v0.16b + orr v1.16b,v0.16b,v0.16b + orr v27.16b,v24.16b,v24.16b + b.lo .Lcbc_dec_tail + + orr v1.16b,v24.16b,v24.16b + ld1 {v24.16b},[x0],#16 + orr v2.16b,v0.16b,v0.16b + orr v3.16b,v1.16b,v1.16b + orr v27.16b,v24.16b,v24.16b + cmp x2,#32 + b.lo .Loop3x_cbc_dec + + ld1 {v25.16b},[x0],#16 + ld1 {v26.16b},[x0],#16 + sub x2,x2,#32 // bias + mov w6,w5 + orr v28.16b,v25.16b,v25.16b + orr v29.16b,v26.16b,v26.16b + +.Loop5x_cbc_dec: + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v16.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v16.16b + aesimc v26.16b,v26.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v17.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v17.16b + aesimc v26.16b,v26.16b + ld1 {v17.4s},[x7],#16 + b.gt .Loop5x_cbc_dec + + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v16.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v16.16b + aesimc v26.16b,v26.16b + cmp x2,#0x40 // because .Lcbc_tail4x + sub x2,x2,#0x50 + + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v17.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v17.16b + aesimc v26.16b,v26.16b + csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo + mov x7,x3 + + aesd v0.16b,v18.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v18.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v18.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v18.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v18.16b + aesimc v26.16b,v26.16b + add x0,x0,x6 // x0 is adjusted in such way that + // at exit from the loop v1.16b-v26.16b + // are loaded with last "words" + add x6,x2,#0x60 // because .Lcbc_tail4x + + aesd v0.16b,v19.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v19.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v19.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v19.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v19.16b + aesimc v26.16b,v26.16b + + aesd v0.16b,v20.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v20.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v20.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v20.16b + aesimc v26.16b,v26.16b + + aesd v0.16b,v21.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v21.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v21.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v21.16b + aesimc v26.16b,v26.16b + + aesd v0.16b,v22.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v22.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v22.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v22.16b + aesimc v26.16b,v26.16b + + eor v4.16b,v6.16b,v7.16b + aesd v0.16b,v23.16b + eor v5.16b,v2.16b,v7.16b + ld1 {v2.16b},[x0],#16 + aesd v1.16b,v23.16b + eor v17.16b,v3.16b,v7.16b + ld1 {v3.16b},[x0],#16 + aesd v24.16b,v23.16b + eor v30.16b,v27.16b,v7.16b + ld1 {v27.16b},[x0],#16 + aesd v25.16b,v23.16b + eor v31.16b,v28.16b,v7.16b + ld1 {v28.16b},[x0],#16 + aesd v26.16b,v23.16b + orr v6.16b,v29.16b,v29.16b + ld1 {v29.16b},[x0],#16 + cbz x6,.Lcbc_tail4x + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + eor v4.16b,v4.16b,v0.16b + orr v0.16b,v2.16b,v2.16b + eor v5.16b,v5.16b,v1.16b + orr v1.16b,v3.16b,v3.16b + eor v17.16b,v17.16b,v24.16b + orr v24.16b,v27.16b,v27.16b + eor v30.16b,v30.16b,v25.16b + orr v25.16b,v28.16b,v28.16b + eor v31.16b,v31.16b,v26.16b + st1 {v4.16b},[x1],#16 + orr v26.16b,v29.16b,v29.16b + st1 {v5.16b},[x1],#16 + mov w6,w5 + st1 {v17.16b},[x1],#16 + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v30.16b},[x1],#16 + st1 {v31.16b},[x1],#16 + b.hs .Loop5x_cbc_dec + + add x2,x2,#0x50 + cbz x2,.Lcbc_done + + add w6,w5,#2 + subs x2,x2,#0x30 + orr v0.16b,v27.16b,v27.16b + orr v2.16b,v27.16b,v27.16b + orr v1.16b,v28.16b,v28.16b + orr v3.16b,v28.16b,v28.16b + orr v24.16b,v29.16b,v29.16b + orr v27.16b,v29.16b,v29.16b + b.lo .Lcbc_dec_tail + + b .Loop3x_cbc_dec + +.align 4 +.Lcbc_tail4x: + eor v5.16b,v4.16b,v1.16b + eor v17.16b,v17.16b,v24.16b + eor v30.16b,v30.16b,v25.16b + eor v31.16b,v31.16b,v26.16b + st1 {v5.16b},[x1],#16 + st1 {v17.16b},[x1],#16 + st1 {v30.16b},[x1],#16 + st1 {v31.16b},[x1],#16 + + b .Lcbc_done +.align 4 +.Loop3x_cbc_dec: + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + ld1 {v17.4s},[x7],#16 + b.gt .Loop3x_cbc_dec + + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + eor v4.16b,v6.16b,v7.16b + subs x2,x2,#0x30 + eor v5.16b,v2.16b,v7.16b + csel x6,x2,x6,lo // x6, w6, is zero at this point + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + eor v17.16b,v3.16b,v7.16b + add x0,x0,x6 // x0 is adjusted in such way that + // at exit from the loop v1.16b-v24.16b + // are loaded with last "words" + orr v6.16b,v27.16b,v27.16b + mov x7,x3 + aesd v0.16b,v20.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v20.16b + aesimc v24.16b,v24.16b + ld1 {v2.16b},[x0],#16 + aesd v0.16b,v21.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v21.16b + aesimc v24.16b,v24.16b + ld1 {v3.16b},[x0],#16 + aesd v0.16b,v22.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v22.16b + aesimc v24.16b,v24.16b + ld1 {v27.16b},[x0],#16 + aesd v0.16b,v23.16b + aesd v1.16b,v23.16b + aesd v24.16b,v23.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + add w6,w5,#2 + eor v4.16b,v4.16b,v0.16b + eor v5.16b,v5.16b,v1.16b + eor v24.16b,v24.16b,v17.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v4.16b},[x1],#16 + orr v0.16b,v2.16b,v2.16b + st1 {v5.16b},[x1],#16 + orr v1.16b,v3.16b,v3.16b + st1 {v24.16b},[x1],#16 + orr v24.16b,v27.16b,v27.16b + b.hs .Loop3x_cbc_dec + + cmn x2,#0x30 + b.eq .Lcbc_done + nop + +.Lcbc_dec_tail: + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + ld1 {v17.4s},[x7],#16 + b.gt .Lcbc_dec_tail + + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v20.16b + aesimc v24.16b,v24.16b + cmn x2,#0x20 + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v21.16b + aesimc v24.16b,v24.16b + eor v5.16b,v6.16b,v7.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v22.16b + aesimc v24.16b,v24.16b + eor v17.16b,v3.16b,v7.16b + aesd v1.16b,v23.16b + aesd v24.16b,v23.16b + b.eq .Lcbc_dec_one + eor v5.16b,v5.16b,v1.16b + eor v17.16b,v17.16b,v24.16b + orr v6.16b,v27.16b,v27.16b + st1 {v5.16b},[x1],#16 + st1 {v17.16b},[x1],#16 + b .Lcbc_done + +.Lcbc_dec_one: + eor v5.16b,v5.16b,v24.16b + orr v6.16b,v27.16b,v27.16b + st1 {v5.16b},[x1],#16 + +.Lcbc_done: + st1 {v6.16b},[x4] +.Lcbc_abort: + ldr x29,[sp],#16 + ret +.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt +.globl aes_v8_ctr32_encrypt_blocks_unroll12_eor3 +.type aes_v8_ctr32_encrypt_blocks_unroll12_eor3,%function +.align 5 +aes_v8_ctr32_encrypt_blocks_unroll12_eor3: + AARCH64_VALID_CALL_TARGET + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + stp x29,x30,[sp,#-80]! + stp d8,d9,[sp, #16] + stp d10,d11,[sp, #32] + stp d12,d13,[sp, #48] + stp d14,d15,[sp, #64] + add x29,sp,#0 + + ldr w5,[x3,#240] + + ldr w8, [x4, #12] +#ifdef __AARCH64EB__ + ld1 {v24.16b},[x4] +#else + ld1 {v24.4s},[x4] +#endif + ld1 {v2.4s,v3.4s},[x3] // load key schedule... + sub w5,w5,#4 + cmp x2,#2 + add x7,x3,x5,lsl#4 // pointer to last round key + sub w5,w5,#2 + add x7, x7, #64 + ld1 {v1.4s},[x7] + add x7,x3,#32 + mov w6,w5 +#ifndef __AARCH64EB__ + rev w8, w8 +#endif + + orr v25.16b,v24.16b,v24.16b + add w10, w8, #1 + orr v26.16b,v24.16b,v24.16b + add w8, w8, #2 + orr v0.16b,v24.16b,v24.16b + rev w10, w10 + mov v25.s[3],w10 + b.ls .Lctr32_tail_unroll + cmp x2,#6 + rev w12, w8 + sub x2,x2,#3 // bias + mov v26.s[3],w12 + b.lo .Loop3x_ctr32_unroll + cmp x2,#9 + orr v27.16b,v24.16b,v24.16b + add w11, w8, #1 + orr v28.16b,v24.16b,v24.16b + add w13, w8, #2 + rev w11, w11 + orr v29.16b,v24.16b,v24.16b + add w8, w8, #3 + rev w13, w13 + mov v27.s[3],w11 + rev w14, w8 + mov v28.s[3],w13 + mov v29.s[3],w14 + sub x2,x2,#3 + b.lo .Loop6x_ctr32_unroll + + // push regs to stack when 12 data chunks are interleaved + stp x19,x20,[sp,#-16]! + stp x21,x22,[sp,#-16]! + stp x23,x24,[sp,#-16]! + stp d8,d9,[sp,#-32]! + stp d10,d11,[sp,#-32]! + + add w15,w8,#1 + add w19,w8,#2 + add w20,w8,#3 + add w21,w8,#4 + add w22,w8,#5 + add w8,w8,#6 + orr v30.16b,v24.16b,v24.16b + rev w15,w15 + orr v31.16b,v24.16b,v24.16b + rev w19,w19 + orr v8.16b,v24.16b,v24.16b + rev w20,w20 + orr v9.16b,v24.16b,v24.16b + rev w21,w21 + orr v10.16b,v24.16b,v24.16b + rev w22,w22 + orr v11.16b,v24.16b,v24.16b + rev w23,w8 + + sub x2,x2,#6 // bias + mov v30.s[3],w15 + mov v31.s[3],w19 + mov v8.s[3],w20 + mov v9.s[3],w21 + mov v10.s[3],w22 + mov v11.s[3],w23 + b .Loop12x_ctr32_unroll + +.align 4 +.Loop12x_ctr32_unroll: + aese v24.16b,v2.16b + aesmc v24.16b,v24.16b + aese v25.16b,v2.16b + aesmc v25.16b,v25.16b + aese v26.16b,v2.16b + aesmc v26.16b,v26.16b + aese v27.16b,v2.16b + aesmc v27.16b,v27.16b + aese v28.16b,v2.16b + aesmc v28.16b,v28.16b + aese v29.16b,v2.16b + aesmc v29.16b,v29.16b + aese v30.16b,v2.16b + aesmc v30.16b,v30.16b + aese v31.16b,v2.16b + aesmc v31.16b,v31.16b + aese v8.16b,v2.16b + aesmc v8.16b,v8.16b + aese v9.16b,v2.16b + aesmc v9.16b,v9.16b + aese v10.16b,v2.16b + aesmc v10.16b,v10.16b + aese v11.16b,v2.16b + aesmc v11.16b,v11.16b + ld1 {v2.4s},[x7],#16 + subs w6,w6,#2 + aese v24.16b,v3.16b + aesmc v24.16b,v24.16b + aese v25.16b,v3.16b + aesmc v25.16b,v25.16b + aese v26.16b,v3.16b + aesmc v26.16b,v26.16b + aese v27.16b,v3.16b + aesmc v27.16b,v27.16b + aese v28.16b,v3.16b + aesmc v28.16b,v28.16b + aese v29.16b,v3.16b + aesmc v29.16b,v29.16b + aese v30.16b,v3.16b + aesmc v30.16b,v30.16b + aese v31.16b,v3.16b + aesmc v31.16b,v31.16b + aese v8.16b,v3.16b + aesmc v8.16b,v8.16b + aese v9.16b,v3.16b + aesmc v9.16b,v9.16b + aese v10.16b,v3.16b + aesmc v10.16b,v10.16b + aese v11.16b,v3.16b + aesmc v11.16b,v11.16b + ld1 {v3.4s},[x7],#16 + b.gt .Loop12x_ctr32_unroll + + aese v24.16b,v2.16b + aesmc v24.16b,v24.16b + aese v25.16b,v2.16b + aesmc v25.16b,v25.16b + aese v26.16b,v2.16b + aesmc v26.16b,v26.16b + aese v27.16b,v2.16b + aesmc v27.16b,v27.16b + aese v28.16b,v2.16b + aesmc v28.16b,v28.16b + aese v29.16b,v2.16b + aesmc v29.16b,v29.16b + aese v30.16b,v2.16b + aesmc v30.16b,v30.16b + aese v31.16b,v2.16b + aesmc v31.16b,v31.16b + aese v8.16b,v2.16b + aesmc v8.16b,v8.16b + aese v9.16b,v2.16b + aesmc v9.16b,v9.16b + aese v10.16b,v2.16b + aesmc v10.16b,v10.16b + aese v11.16b,v2.16b + aesmc v11.16b,v11.16b + ld1 {v2.4s},[x7],#16 + + aese v24.16b,v3.16b + aesmc v24.16b,v24.16b + aese v25.16b,v3.16b + aesmc v25.16b,v25.16b + aese v26.16b,v3.16b + aesmc v26.16b,v26.16b + aese v27.16b,v3.16b + aesmc v27.16b,v27.16b + aese v28.16b,v3.16b + aesmc v28.16b,v28.16b + aese v29.16b,v3.16b + aesmc v29.16b,v29.16b + aese v30.16b,v3.16b + aesmc v30.16b,v30.16b + aese v31.16b,v3.16b + aesmc v31.16b,v31.16b + aese v8.16b,v3.16b + aesmc v8.16b,v8.16b + aese v9.16b,v3.16b + aesmc v9.16b,v9.16b + aese v10.16b,v3.16b + aesmc v10.16b,v10.16b + aese v11.16b,v3.16b + aesmc v11.16b,v11.16b + ld1 {v3.4s},[x7],#16 + + aese v24.16b,v2.16b + aesmc v24.16b,v24.16b + add w9,w8,#1 + add w10,w8,#2 + aese v25.16b,v2.16b + aesmc v25.16b,v25.16b + add w12,w8,#3 + add w11,w8,#4 + aese v26.16b,v2.16b + aesmc v26.16b,v26.16b + add w13,w8,#5 + add w14,w8,#6 + rev w9,w9 + aese v27.16b,v2.16b + aesmc v27.16b,v27.16b + add w15,w8,#7 + add w19,w8,#8 + rev w10,w10 + rev w12,w12 + aese v28.16b,v2.16b + aesmc v28.16b,v28.16b + add w20,w8,#9 + add w21,w8,#10 + rev w11,w11 + rev w13,w13 + aese v29.16b,v2.16b + aesmc v29.16b,v29.16b + add w22,w8,#11 + add w23,w8,#12 + rev w14,w14 + rev w15,w15 + aese v30.16b,v2.16b + aesmc v30.16b,v30.16b + rev w19,w19 + rev w20,w20 + aese v31.16b,v2.16b + aesmc v31.16b,v31.16b + rev w21,w21 + rev w22,w22 + aese v8.16b,v2.16b + aesmc v8.16b,v8.16b + rev w23,w23 + aese v9.16b,v2.16b + aesmc v9.16b,v9.16b + aese v10.16b,v2.16b + aesmc v10.16b,v10.16b + aese v11.16b,v2.16b + aesmc v11.16b,v11.16b + ld1 {v2.4s},[x7],#16 + + aese v24.16b,v3.16b + aesmc v24.16b,v24.16b + aese v25.16b,v3.16b + aesmc v25.16b,v25.16b + aese v26.16b,v3.16b + aesmc v26.16b,v26.16b + aese v27.16b,v3.16b + aesmc v27.16b,v27.16b + ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x0],#64 + aese v28.16b,v3.16b + aesmc v28.16b,v28.16b + aese v29.16b,v3.16b + aesmc v29.16b,v29.16b + aese v30.16b,v3.16b + aesmc v30.16b,v30.16b + aese v31.16b,v3.16b + aesmc v31.16b,v31.16b + ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x0],#64 + aese v8.16b,v3.16b + aesmc v8.16b,v8.16b + aese v9.16b,v3.16b + aesmc v9.16b,v9.16b + aese v10.16b,v3.16b + aesmc v10.16b,v10.16b + aese v11.16b,v3.16b + aesmc v11.16b,v11.16b + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x0],#64 + ld1 {v3.4s},[x7],#16 + + mov x7, x3 + aese v24.16b,v2.16b + aesmc v24.16b,v24.16b + aese v25.16b,v2.16b + aesmc v25.16b,v25.16b + aese v26.16b,v2.16b + aesmc v26.16b,v26.16b + aese v27.16b,v2.16b + aesmc v27.16b,v27.16b + aese v28.16b,v2.16b + aesmc v28.16b,v28.16b + aese v29.16b,v2.16b + aesmc v29.16b,v29.16b + aese v30.16b,v2.16b + aesmc v30.16b,v30.16b + aese v31.16b,v2.16b + aesmc v31.16b,v31.16b + aese v8.16b,v2.16b + aesmc v8.16b,v8.16b + aese v9.16b,v2.16b + aesmc v9.16b,v9.16b + aese v10.16b,v2.16b + aesmc v10.16b,v10.16b + aese v11.16b,v2.16b + aesmc v11.16b,v11.16b + ld1 {v2.4s},[x7],#16 // re-pre-load rndkey[0] + + aese v24.16b,v3.16b +.inst 0xce016084 //eor3 v4.16b,v4.16b,v1.16b,v24.16b + orr v24.16b,v0.16b,v0.16b + aese v25.16b,v3.16b +.inst 0xce0164a5 //eor3 v5.16b,v5.16b,v1.16b,v25.16b + orr v25.16b,v0.16b,v0.16b + aese v26.16b,v3.16b +.inst 0xce0168c6 //eor3 v6.16b,v6.16b,v1.16b,v26.16b + orr v26.16b,v0.16b,v0.16b + aese v27.16b,v3.16b +.inst 0xce016ce7 //eor3 v7.16b,v7.16b,v1.16b,v27.16b + orr v27.16b,v0.16b,v0.16b + aese v28.16b,v3.16b +.inst 0xce017210 //eor3 v16.16b,v16.16b,v1.16b,v28.16b + orr v28.16b,v0.16b,v0.16b + aese v29.16b,v3.16b +.inst 0xce017631 //eor3 v17.16b,v17.16b,v1.16b,v29.16b + orr v29.16b,v0.16b,v0.16b + aese v30.16b,v3.16b +.inst 0xce017a52 //eor3 v18.16b,v18.16b,v1.16b,v30.16b + orr v30.16b,v0.16b,v0.16b + aese v31.16b,v3.16b +.inst 0xce017e73 //eor3 v19.16b,v19.16b,v1.16b,v31.16b + orr v31.16b,v0.16b,v0.16b + aese v8.16b,v3.16b +.inst 0xce012294 //eor3 v20.16b,v20.16b,v1.16b,v8.16b + orr v8.16b,v0.16b,v0.16b + aese v9.16b,v3.16b +.inst 0xce0126b5 //eor3 v21.16b,v21.16b,v1.16b,v9.16b + orr v9.16b,v0.16b,v0.16b + aese v10.16b,v3.16b +.inst 0xce012ad6 //eor3 v22.16b,v22.16b,v1.16b,v10.16b + orr v10.16b,v0.16b,v0.16b + aese v11.16b,v3.16b +.inst 0xce012ef7 //eor3 v23.16b,v23.16b,v1.16b,v11.16b + orr v11.16b,v0.16b,v0.16b + ld1 {v3.4s},[x7],#16 // re-pre-load rndkey[1] + + mov v24.s[3],w9 + mov v25.s[3],w10 + mov v26.s[3],w12 + mov v27.s[3],w11 + st1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 + mov v28.s[3],w13 + mov v29.s[3],w14 + mov v30.s[3],w15 + mov v31.s[3],w19 + st1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 + mov v8.s[3],w20 + mov v9.s[3],w21 + mov v10.s[3],w22 + mov v11.s[3],w23 + st1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 + + mov w6,w5 + + add w8,w8,#12 + subs x2,x2,#12 + b.hs .Loop12x_ctr32_unroll + + // pop regs from stack when 12 data chunks are interleaved + ldp d10,d11,[sp],#32 + ldp d8,d9,[sp],#32 + ldp x23,x24,[sp],#16 + ldp x21,x22,[sp],#16 + ldp x19,x20,[sp],#16 + + add x2,x2,#12 + cbz x2,.Lctr32_done_unroll + sub w8,w8,#12 + + cmp x2,#2 + b.ls .Lctr32_tail_unroll + + cmp x2,#6 + sub x2,x2,#3 // bias + add w8,w8,#3 + b.lo .Loop3x_ctr32_unroll + + sub x2,x2,#3 + add w8,w8,#3 + b.lo .Loop6x_ctr32_unroll + +.align 4 +.Loop6x_ctr32_unroll: + aese v24.16b,v2.16b + aesmc v24.16b,v24.16b + aese v25.16b,v2.16b + aesmc v25.16b,v25.16b + aese v26.16b,v2.16b + aesmc v26.16b,v26.16b + aese v27.16b,v2.16b + aesmc v27.16b,v27.16b + aese v28.16b,v2.16b + aesmc v28.16b,v28.16b + aese v29.16b,v2.16b + aesmc v29.16b,v29.16b + ld1 {v2.4s},[x7],#16 + subs w6,w6,#2 + aese v24.16b,v3.16b + aesmc v24.16b,v24.16b + aese v25.16b,v3.16b + aesmc v25.16b,v25.16b + aese v26.16b,v3.16b + aesmc v26.16b,v26.16b + aese v27.16b,v3.16b + aesmc v27.16b,v27.16b + aese v28.16b,v3.16b + aesmc v28.16b,v28.16b + aese v29.16b,v3.16b + aesmc v29.16b,v29.16b + ld1 {v3.4s},[x7],#16 + b.gt .Loop6x_ctr32_unroll + + aese v24.16b,v2.16b + aesmc v24.16b,v24.16b + aese v25.16b,v2.16b + aesmc v25.16b,v25.16b + aese v26.16b,v2.16b + aesmc v26.16b,v26.16b + aese v27.16b,v2.16b + aesmc v27.16b,v27.16b + aese v28.16b,v2.16b + aesmc v28.16b,v28.16b + aese v29.16b,v2.16b + aesmc v29.16b,v29.16b + ld1 {v2.4s},[x7],#16 + + aese v24.16b,v3.16b + aesmc v24.16b,v24.16b + aese v25.16b,v3.16b + aesmc v25.16b,v25.16b + aese v26.16b,v3.16b + aesmc v26.16b,v26.16b + aese v27.16b,v3.16b + aesmc v27.16b,v27.16b + aese v28.16b,v3.16b + aesmc v28.16b,v28.16b + aese v29.16b,v3.16b + aesmc v29.16b,v29.16b + ld1 {v3.4s},[x7],#16 + + aese v24.16b,v2.16b + aesmc v24.16b,v24.16b + add w9,w8,#1 + add w10,w8,#2 + aese v25.16b,v2.16b + aesmc v25.16b,v25.16b + add w12,w8,#3 + add w11,w8,#4 + aese v26.16b,v2.16b + aesmc v26.16b,v26.16b + add w13,w8,#5 + add w14,w8,#6 + rev w9,w9 + aese v27.16b,v2.16b + aesmc v27.16b,v27.16b + rev w10,w10 + rev w12,w12 + aese v28.16b,v2.16b + aesmc v28.16b,v28.16b + rev w11,w11 + rev w13,w13 + aese v29.16b,v2.16b + aesmc v29.16b,v29.16b + rev w14,w14 + ld1 {v2.4s},[x7],#16 + + aese v24.16b,v3.16b + aesmc v24.16b,v24.16b + aese v25.16b,v3.16b + aesmc v25.16b,v25.16b + ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x0],#64 + aese v26.16b,v3.16b + aesmc v26.16b,v26.16b + aese v27.16b,v3.16b + aesmc v27.16b,v27.16b + ld1 {v16.16b,v17.16b},[x0],#32 + aese v28.16b,v3.16b + aesmc v28.16b,v28.16b + aese v29.16b,v3.16b + aesmc v29.16b,v29.16b + ld1 {v3.4s},[x7],#16 + + mov x7, x3 + aese v24.16b,v2.16b + aesmc v24.16b,v24.16b + aese v25.16b,v2.16b + aesmc v25.16b,v25.16b + aese v26.16b,v2.16b + aesmc v26.16b,v26.16b + aese v27.16b,v2.16b + aesmc v27.16b,v27.16b + aese v28.16b,v2.16b + aesmc v28.16b,v28.16b + aese v29.16b,v2.16b + aesmc v29.16b,v29.16b + ld1 {v2.4s},[x7],#16 // re-pre-load rndkey[0] + + aese v24.16b,v3.16b +.inst 0xce016084 //eor3 v4.16b,v4.16b,v1.16b,v24.16b + aese v25.16b,v3.16b +.inst 0xce0164a5 //eor3 v5.16b,v5.16b,v1.16b,v25.16b + aese v26.16b,v3.16b +.inst 0xce0168c6 //eor3 v6.16b,v6.16b,v1.16b,v26.16b + aese v27.16b,v3.16b +.inst 0xce016ce7 //eor3 v7.16b,v7.16b,v1.16b,v27.16b + aese v28.16b,v3.16b +.inst 0xce017210 //eor3 v16.16b,v16.16b,v1.16b,v28.16b + aese v29.16b,v3.16b +.inst 0xce017631 //eor3 v17.16b,v17.16b,v1.16b,v29.16b + ld1 {v3.4s},[x7],#16 // re-pre-load rndkey[1] + + orr v24.16b,v0.16b,v0.16b + orr v25.16b,v0.16b,v0.16b + orr v26.16b,v0.16b,v0.16b + orr v27.16b,v0.16b,v0.16b + orr v28.16b,v0.16b,v0.16b + orr v29.16b,v0.16b,v0.16b + + mov v24.s[3],w9 + mov v25.s[3],w10 + st1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 + mov v26.s[3],w12 + mov v27.s[3],w11 + st1 {v16.16b,v17.16b},[x1],#32 + mov v28.s[3],w13 + mov v29.s[3],w14 + + cbz x2,.Lctr32_done_unroll + mov w6,w5 + + cmp x2,#2 + b.ls .Lctr32_tail_unroll + + sub x2,x2,#3 // bias + add w8,w8,#3 + b .Loop3x_ctr32_unroll + +.align 4 +.Loop3x_ctr32_unroll: + aese v24.16b,v2.16b + aesmc v24.16b,v24.16b + aese v25.16b,v2.16b + aesmc v25.16b,v25.16b + aese v26.16b,v2.16b + aesmc v26.16b,v26.16b + ld1 {v2.4s},[x7],#16 + subs w6,w6,#2 + aese v24.16b,v3.16b + aesmc v24.16b,v24.16b + aese v25.16b,v3.16b + aesmc v25.16b,v25.16b + aese v26.16b,v3.16b + aesmc v26.16b,v26.16b + ld1 {v3.4s},[x7],#16 + b.gt .Loop3x_ctr32_unroll + + aese v24.16b,v2.16b + aesmc v9.16b,v24.16b + aese v25.16b,v2.16b + aesmc v10.16b,v25.16b + ld1 {v4.16b,v5.16b,v6.16b},[x0],#48 + orr v24.16b,v0.16b,v0.16b + aese v26.16b,v2.16b + aesmc v26.16b,v26.16b + ld1 {v2.4s},[x7],#16 + orr v25.16b,v0.16b,v0.16b + aese v9.16b,v3.16b + aesmc v9.16b,v9.16b + aese v10.16b,v3.16b + aesmc v10.16b,v10.16b + aese v26.16b,v3.16b + aesmc v11.16b,v26.16b + ld1 {v3.4s},[x7],#16 + orr v26.16b,v0.16b,v0.16b + add w9,w8,#1 + aese v9.16b,v2.16b + aesmc v9.16b,v9.16b + aese v10.16b,v2.16b + aesmc v10.16b,v10.16b + add w10,w8,#2 + aese v11.16b,v2.16b + aesmc v11.16b,v11.16b + ld1 {v2.4s},[x7],#16 + add w8,w8,#3 + aese v9.16b,v3.16b + aesmc v9.16b,v9.16b + aese v10.16b,v3.16b + aesmc v10.16b,v10.16b + + rev w9,w9 + aese v11.16b,v3.16b + aesmc v11.16b,v11.16b + ld1 {v3.4s},[x7],#16 + mov v24.s[3], w9 + mov x7,x3 + rev w10,w10 + aese v9.16b,v2.16b + aesmc v9.16b,v9.16b + + aese v10.16b,v2.16b + aesmc v10.16b,v10.16b + mov v25.s[3], w10 + rev w12,w8 + aese v11.16b,v2.16b + aesmc v11.16b,v11.16b + mov v26.s[3], w12 + + aese v9.16b,v3.16b + aese v10.16b,v3.16b + aese v11.16b,v3.16b + +.inst 0xce012484 //eor3 v4.16b,v4.16b,v1.16b,v9.16b + ld1 {v2.4s},[x7],#16 // re-pre-load rndkey[0] +.inst 0xce0128a5 //eor3 v5.16b,v5.16b,v1.16b,v10.16b + mov w6,w5 +.inst 0xce012cc6 //eor3 v6.16b,v6.16b,v1.16b,v11.16b + ld1 {v3.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v4.16b,v5.16b,v6.16b},[x1],#48 + + cbz x2,.Lctr32_done_unroll + +.Lctr32_tail_unroll: + cmp x2,#1 + b.eq .Lctr32_tail_1_unroll + +.Lctr32_tail_2_unroll: + aese v24.16b,v2.16b + aesmc v24.16b,v24.16b + aese v25.16b,v2.16b + aesmc v25.16b,v25.16b + ld1 {v2.4s},[x7],#16 + subs w6,w6,#2 + aese v24.16b,v3.16b + aesmc v24.16b,v24.16b + aese v25.16b,v3.16b + aesmc v25.16b,v25.16b + ld1 {v3.4s},[x7],#16 + b.gt .Lctr32_tail_2_unroll + + aese v24.16b,v2.16b + aesmc v24.16b,v24.16b + aese v25.16b,v2.16b + aesmc v25.16b,v25.16b + ld1 {v2.4s},[x7],#16 + aese v24.16b,v3.16b + aesmc v24.16b,v24.16b + aese v25.16b,v3.16b + aesmc v25.16b,v25.16b + ld1 {v3.4s},[x7],#16 + ld1 {v4.16b,v5.16b},[x0],#32 + aese v24.16b,v2.16b + aesmc v24.16b,v24.16b + aese v25.16b,v2.16b + aesmc v25.16b,v25.16b + ld1 {v2.4s},[x7],#16 + aese v24.16b,v3.16b + aesmc v24.16b,v24.16b + aese v25.16b,v3.16b + aesmc v25.16b,v25.16b + ld1 {v3.4s},[x7],#16 + aese v24.16b,v2.16b + aesmc v24.16b,v24.16b + aese v25.16b,v2.16b + aesmc v25.16b,v25.16b + aese v24.16b,v3.16b + aese v25.16b,v3.16b + +.inst 0xce016084 //eor3 v4.16b,v4.16b,v1.16b,v24.16b +.inst 0xce0164a5 //eor3 v5.16b,v5.16b,v1.16b,v25.16b + st1 {v4.16b,v5.16b},[x1],#32 + b .Lctr32_done_unroll + +.Lctr32_tail_1_unroll: + aese v24.16b,v2.16b + aesmc v24.16b,v24.16b + ld1 {v2.4s},[x7],#16 + subs w6,w6,#2 + aese v24.16b,v3.16b + aesmc v24.16b,v24.16b + ld1 {v3.4s},[x7],#16 + b.gt .Lctr32_tail_1_unroll + + aese v24.16b,v2.16b + aesmc v24.16b,v24.16b + ld1 {v2.4s},[x7],#16 + aese v24.16b,v3.16b + aesmc v24.16b,v24.16b + ld1 {v3.4s},[x7],#16 + ld1 {v4.16b},[x0] + aese v24.16b,v2.16b + aesmc v24.16b,v24.16b + ld1 {v2.4s},[x7],#16 + aese v24.16b,v3.16b + aesmc v24.16b,v24.16b + ld1 {v3.4s},[x7],#16 + aese v24.16b,v2.16b + aesmc v24.16b,v24.16b + aese v24.16b,v3.16b + +.inst 0xce016084 //eor3 v4.16b,v4.16b,v1.16b,v24.16b + st1 {v4.16b},[x1],#16 + +.Lctr32_done_unroll: + ldp d8,d9,[sp, #16] + ldp d10,d11,[sp, #32] + ldp d12,d13,[sp, #48] + ldp d14,d15,[sp, #64] + ldr x29,[sp],#80 + ret +.size aes_v8_ctr32_encrypt_blocks_unroll12_eor3,.-aes_v8_ctr32_encrypt_blocks_unroll12_eor3 +.globl aes_v8_ctr32_encrypt_blocks +.type aes_v8_ctr32_encrypt_blocks,%function +.align 5 +aes_v8_ctr32_encrypt_blocks: + AARCH64_VALID_CALL_TARGET + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + ldr w5,[x3,#240] + + ldr w8, [x4, #12] +#ifdef __AARCH64EB__ + ld1 {v0.16b},[x4] +#else + ld1 {v0.4s},[x4] +#endif + ld1 {v16.4s,v17.4s},[x3] // load key schedule... + sub w5,w5,#4 + mov x12,#16 + cmp x2,#2 + add x7,x3,x5,lsl#4 // pointer to last 5 round keys + sub w5,w5,#2 + ld1 {v20.4s,v21.4s},[x7],#32 + ld1 {v22.4s,v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + add x7,x3,#32 + mov w6,w5 + csel x12,xzr,x12,lo +#ifndef __AARCH64EB__ + rev w8, w8 +#endif + orr v1.16b,v0.16b,v0.16b + add w10, w8, #1 + orr v18.16b,v0.16b,v0.16b + add w8, w8, #2 + orr v6.16b,v0.16b,v0.16b + rev w10, w10 + mov v1.s[3],w10 + b.ls .Lctr32_tail + rev w12, w8 + sub x2,x2,#3 // bias + mov v18.s[3],w12 + cmp x2,#32 + b.lo .Loop3x_ctr32 + + add w13,w8,#1 + add w14,w8,#2 + orr v24.16b,v0.16b,v0.16b + rev w13,w13 + orr v25.16b,v0.16b,v0.16b + rev w14,w14 + mov v24.s[3],w13 + sub x2,x2,#2 // bias + mov v25.s[3],w14 + add w8,w8,#2 + b .Loop5x_ctr32 + +.align 4 +.Loop5x_ctr32: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v18.16b,v16.16b + aesmc v18.16b,v18.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + aese v25.16b,v16.16b + aesmc v25.16b,v25.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v18.16b,v17.16b + aesmc v18.16b,v18.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + aese v25.16b,v17.16b + aesmc v25.16b,v25.16b + ld1 {v17.4s},[x7],#16 + b.gt .Loop5x_ctr32 + + mov x7,x3 + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v18.16b,v16.16b + aesmc v18.16b,v18.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + aese v25.16b,v16.16b + aesmc v25.16b,v25.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v18.16b,v17.16b + aesmc v18.16b,v18.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + aese v25.16b,v17.16b + aesmc v25.16b,v25.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + add w9,w8,#1 + add w10,w8,#2 + aese v1.16b,v20.16b + aesmc v1.16b,v1.16b + add w12,w8,#3 + add w13,w8,#4 + aese v18.16b,v20.16b + aesmc v18.16b,v18.16b + add w14,w8,#5 + rev w9,w9 + aese v24.16b,v20.16b + aesmc v24.16b,v24.16b + rev w10,w10 + rev w12,w12 + aese v25.16b,v20.16b + aesmc v25.16b,v25.16b + rev w13,w13 + rev w14,w14 + + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v1.16b,v21.16b + aesmc v1.16b,v1.16b + aese v18.16b,v21.16b + aesmc v18.16b,v18.16b + aese v24.16b,v21.16b + aesmc v24.16b,v24.16b + aese v25.16b,v21.16b + aesmc v25.16b,v25.16b + + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + ld1 {v2.16b},[x0],#16 + aese v1.16b,v22.16b + aesmc v1.16b,v1.16b + ld1 {v3.16b},[x0],#16 + aese v18.16b,v22.16b + aesmc v18.16b,v18.16b + ld1 {v19.16b},[x0],#16 + aese v24.16b,v22.16b + aesmc v24.16b,v24.16b + ld1 {v26.16b},[x0],#16 + aese v25.16b,v22.16b + aesmc v25.16b,v25.16b + ld1 {v27.16b},[x0],#16 + + aese v0.16b,v23.16b + eor v2.16b,v2.16b,v7.16b + aese v1.16b,v23.16b + eor v3.16b,v3.16b,v7.16b + aese v18.16b,v23.16b + eor v19.16b,v19.16b,v7.16b + aese v24.16b,v23.16b + eor v26.16b,v26.16b,v7.16b + aese v25.16b,v23.16b + eor v27.16b,v27.16b,v7.16b + + eor v2.16b,v2.16b,v0.16b + orr v0.16b,v6.16b,v6.16b + eor v3.16b,v3.16b,v1.16b + orr v1.16b,v6.16b,v6.16b + eor v19.16b,v19.16b,v18.16b + orr v18.16b,v6.16b,v6.16b + eor v26.16b,v26.16b,v24.16b + orr v24.16b,v6.16b,v6.16b + eor v27.16b,v27.16b,v25.16b + orr v25.16b,v6.16b,v6.16b + + st1 {v2.16b},[x1],#16 + mov v0.s[3],w9 + st1 {v3.16b},[x1],#16 + mov v1.s[3],w10 + st1 {v19.16b},[x1],#16 + mov v18.s[3],w12 + st1 {v26.16b},[x1],#16 + mov v24.s[3],w13 + st1 {v27.16b},[x1],#16 + mov v25.s[3],w14 + + mov w6,w5 + cbz x2,.Lctr32_done + + add w8,w8,#5 + subs x2,x2,#5 + b.hs .Loop5x_ctr32 + + add x2,x2,#5 + sub w8,w8,#5 + + cmp x2,#2 + mov x12,#16 + csel x12,xzr,x12,lo + b.ls .Lctr32_tail + + sub x2,x2,#3 // bias + add w8,w8,#3 + b .Loop3x_ctr32 + +.align 4 +.Loop3x_ctr32: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v18.16b,v16.16b + aesmc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v18.16b,v17.16b + aesmc v18.16b,v18.16b + ld1 {v17.4s},[x7],#16 + b.gt .Loop3x_ctr32 + + aese v0.16b,v16.16b + aesmc v4.16b,v0.16b + aese v1.16b,v16.16b + aesmc v5.16b,v1.16b + ld1 {v2.16b},[x0],#16 + orr v0.16b,v6.16b,v6.16b + aese v18.16b,v16.16b + aesmc v18.16b,v18.16b + ld1 {v3.16b},[x0],#16 + orr v1.16b,v6.16b,v6.16b + aese v4.16b,v17.16b + aesmc v4.16b,v4.16b + aese v5.16b,v17.16b + aesmc v5.16b,v5.16b + ld1 {v19.16b},[x0],#16 + mov x7,x3 + aese v18.16b,v17.16b + aesmc v17.16b,v18.16b + orr v18.16b,v6.16b,v6.16b + add w9,w8,#1 + aese v4.16b,v20.16b + aesmc v4.16b,v4.16b + aese v5.16b,v20.16b + aesmc v5.16b,v5.16b + eor v2.16b,v2.16b,v7.16b + add w10,w8,#2 + aese v17.16b,v20.16b + aesmc v17.16b,v17.16b + eor v3.16b,v3.16b,v7.16b + add w8,w8,#3 + aese v4.16b,v21.16b + aesmc v4.16b,v4.16b + aese v5.16b,v21.16b + aesmc v5.16b,v5.16b + eor v19.16b,v19.16b,v7.16b + rev w9,w9 + aese v17.16b,v21.16b + aesmc v17.16b,v17.16b + mov v0.s[3], w9 + rev w10,w10 + aese v4.16b,v22.16b + aesmc v4.16b,v4.16b + aese v5.16b,v22.16b + aesmc v5.16b,v5.16b + mov v1.s[3], w10 + rev w12,w8 + aese v17.16b,v22.16b + aesmc v17.16b,v17.16b + mov v18.s[3], w12 + subs x2,x2,#3 + aese v4.16b,v23.16b + aese v5.16b,v23.16b + aese v17.16b,v23.16b + + eor v2.16b,v2.16b,v4.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + st1 {v2.16b},[x1],#16 + eor v3.16b,v3.16b,v5.16b + mov w6,w5 + st1 {v3.16b},[x1],#16 + eor v19.16b,v19.16b,v17.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v19.16b},[x1],#16 + b.hs .Loop3x_ctr32 + + adds x2,x2,#3 + b.eq .Lctr32_done + cmp x2,#1 + mov x12,#16 + csel x12,xzr,x12,eq + +.Lctr32_tail: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + ld1 {v17.4s},[x7],#16 + b.gt .Lctr32_tail + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + ld1 {v2.16b},[x0],x12 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v1.16b,v20.16b + aesmc v1.16b,v1.16b + ld1 {v3.16b},[x0] + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v1.16b,v21.16b + aesmc v1.16b,v1.16b + eor v2.16b,v2.16b,v7.16b + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v1.16b,v22.16b + aesmc v1.16b,v1.16b + eor v3.16b,v3.16b,v7.16b + aese v0.16b,v23.16b + aese v1.16b,v23.16b + + cmp x2,#1 + eor v2.16b,v2.16b,v0.16b + eor v3.16b,v3.16b,v1.16b + st1 {v2.16b},[x1],#16 + b.eq .Lctr32_done + st1 {v3.16b},[x1] + +.Lctr32_done: + ldr x29,[sp],#16 + ret +.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks +.globl aes_v8_xts_encrypt +.type aes_v8_xts_encrypt,%function +.align 5 +aes_v8_xts_encrypt: + AARCH64_VALID_CALL_TARGET + cmp x2,#16 + // Original input data size bigger than 16, jump to big size processing. + b.ne .Lxts_enc_big_size + // Encrypt the iv with key2, as the first XEX iv. + ldr w6,[x4,#240] + ld1 {v0.4s},[x4],#16 + ld1 {v6.16b},[x5] + sub w6,w6,#2 + ld1 {v1.4s},[x4],#16 + +.Loop_enc_iv_enc: + aese v6.16b,v0.16b + aesmc v6.16b,v6.16b + ld1 {v0.4s},[x4],#16 + subs w6,w6,#2 + aese v6.16b,v1.16b + aesmc v6.16b,v6.16b + ld1 {v1.4s},[x4],#16 + b.gt .Loop_enc_iv_enc + + aese v6.16b,v0.16b + aesmc v6.16b,v6.16b + ld1 {v0.4s},[x4] + aese v6.16b,v1.16b + eor v6.16b,v6.16b,v0.16b + + ld1 {v0.16b},[x0] + eor v0.16b,v6.16b,v0.16b + + ldr w6,[x3,#240] + ld1 {v28.4s,v29.4s},[x3],#32 // load key schedule... + + aese v0.16b,v28.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule... + aese v0.16b,v29.16b + aesmc v0.16b,v0.16b + subs w6,w6,#10 // if rounds==10, jump to aes-128-xts processing + b.eq .Lxts_128_enc +.Lxts_enc_round_loop: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s},[x3],#16 // load key schedule... + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x3],#16 // load key schedule... + subs w6,w6,#2 // bias + b.gt .Lxts_enc_round_loop +.Lxts_128_enc: + ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule... + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule... + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule... + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + ld1 {v7.4s},[x3] + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v0.16b,v23.16b + eor v0.16b,v0.16b,v7.16b + eor v0.16b,v0.16b,v6.16b + st1 {v0.16b},[x1] + b .Lxts_enc_final_abort + +.align 4 +.Lxts_enc_big_size: + stp x19,x20,[sp,#-64]! + stp x21,x22,[sp,#48] + stp d8,d9,[sp,#32] + stp d10,d11,[sp,#16] + + // tailcnt store the tail value of length%16. + and x21,x2,#0xf + and x2,x2,#-16 + subs x2,x2,#16 + mov x8,#16 + b.lo .Lxts_abort + csel x8,xzr,x8,eq + + // Firstly, encrypt the iv with key2, as the first iv of XEX. + ldr w6,[x4,#240] + ld1 {v0.4s},[x4],#16 + ld1 {v6.16b},[x5] + sub w6,w6,#2 + ld1 {v1.4s},[x4],#16 + +.Loop_iv_enc: + aese v6.16b,v0.16b + aesmc v6.16b,v6.16b + ld1 {v0.4s},[x4],#16 + subs w6,w6,#2 + aese v6.16b,v1.16b + aesmc v6.16b,v6.16b + ld1 {v1.4s},[x4],#16 + b.gt .Loop_iv_enc + + aese v6.16b,v0.16b + aesmc v6.16b,v6.16b + ld1 {v0.4s},[x4] + aese v6.16b,v1.16b + eor v6.16b,v6.16b,v0.16b + + // The iv for second block + // x9- iv(low), x10 - iv(high) + // the five ivs stored into, v6.16b,v8.16b,v9.16b,v10.16b,v11.16b + fmov x9,d6 + fmov x10,v6.d[1] + mov w19,#0x87 + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d8,x9 + fmov v8.d[1],x10 + + ldr w5,[x3,#240] // next starting point + ld1 {v0.16b},[x0],x8 + + ld1 {v16.4s,v17.4s},[x3] // load key schedule... + sub w5,w5,#6 + add x7,x3,x5,lsl#4 // pointer to last 7 round keys + sub w5,w5,#2 + ld1 {v18.4s,v19.4s},[x7],#32 + ld1 {v20.4s,v21.4s},[x7],#32 + ld1 {v22.4s,v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + + add x7,x3,#32 + mov w6,w5 + + // Encryption +.Lxts_enc: + ld1 {v24.16b},[x0],#16 + subs x2,x2,#32 // bias + add w6,w5,#2 + orr v3.16b,v0.16b,v0.16b + orr v1.16b,v0.16b,v0.16b + orr v28.16b,v0.16b,v0.16b + orr v27.16b,v24.16b,v24.16b + orr v29.16b,v24.16b,v24.16b + b.lo .Lxts_inner_enc_tail + eor v0.16b,v0.16b,v6.16b // before encryption, xor with iv + eor v24.16b,v24.16b,v8.16b + + // The iv for third block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d9,x9 + fmov v9.d[1],x10 + + + orr v1.16b,v24.16b,v24.16b + ld1 {v24.16b},[x0],#16 + orr v2.16b,v0.16b,v0.16b + orr v3.16b,v1.16b,v1.16b + eor v27.16b,v24.16b,v9.16b // the third block + eor v24.16b,v24.16b,v9.16b + cmp x2,#32 + b.lo .Lxts_outer_enc_tail + + // The iv for fourth block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d10,x9 + fmov v10.d[1],x10 + + ld1 {v25.16b},[x0],#16 + // The iv for fifth block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d11,x9 + fmov v11.d[1],x10 + + ld1 {v26.16b},[x0],#16 + eor v25.16b,v25.16b,v10.16b // the fourth block + eor v26.16b,v26.16b,v11.16b + sub x2,x2,#32 // bias + mov w6,w5 + b .Loop5x_xts_enc + +.align 4 +.Loop5x_xts_enc: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + aese v25.16b,v16.16b + aesmc v25.16b,v25.16b + aese v26.16b,v16.16b + aesmc v26.16b,v26.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + aese v25.16b,v17.16b + aesmc v25.16b,v25.16b + aese v26.16b,v17.16b + aesmc v26.16b,v26.16b + ld1 {v17.4s},[x7],#16 + b.gt .Loop5x_xts_enc + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + aese v25.16b,v16.16b + aesmc v25.16b,v25.16b + aese v26.16b,v16.16b + aesmc v26.16b,v26.16b + subs x2,x2,#0x50 // because .Lxts_enc_tail4x + + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + aese v25.16b,v17.16b + aesmc v25.16b,v25.16b + aese v26.16b,v17.16b + aesmc v26.16b,v26.16b + csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo + mov x7,x3 + + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + aese v1.16b,v18.16b + aesmc v1.16b,v1.16b + aese v24.16b,v18.16b + aesmc v24.16b,v24.16b + aese v25.16b,v18.16b + aesmc v25.16b,v25.16b + aese v26.16b,v18.16b + aesmc v26.16b,v26.16b + add x0,x0,x6 // x0 is adjusted in such way that + // at exit from the loop v1.16b-v26.16b + // are loaded with last "words" + add x6,x2,#0x60 // because .Lxts_enc_tail4x + + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + aese v1.16b,v19.16b + aesmc v1.16b,v1.16b + aese v24.16b,v19.16b + aesmc v24.16b,v24.16b + aese v25.16b,v19.16b + aesmc v25.16b,v25.16b + aese v26.16b,v19.16b + aesmc v26.16b,v26.16b + + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v1.16b,v20.16b + aesmc v1.16b,v1.16b + aese v24.16b,v20.16b + aesmc v24.16b,v24.16b + aese v25.16b,v20.16b + aesmc v25.16b,v25.16b + aese v26.16b,v20.16b + aesmc v26.16b,v26.16b + + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v1.16b,v21.16b + aesmc v1.16b,v1.16b + aese v24.16b,v21.16b + aesmc v24.16b,v24.16b + aese v25.16b,v21.16b + aesmc v25.16b,v25.16b + aese v26.16b,v21.16b + aesmc v26.16b,v26.16b + + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v1.16b,v22.16b + aesmc v1.16b,v1.16b + aese v24.16b,v22.16b + aesmc v24.16b,v24.16b + aese v25.16b,v22.16b + aesmc v25.16b,v25.16b + aese v26.16b,v22.16b + aesmc v26.16b,v26.16b + + eor v4.16b,v7.16b,v6.16b + aese v0.16b,v23.16b + // The iv for first block of one iteration + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d6,x9 + fmov v6.d[1],x10 + eor v5.16b,v7.16b,v8.16b + ld1 {v2.16b},[x0],#16 + aese v1.16b,v23.16b + // The iv for second block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d8,x9 + fmov v8.d[1],x10 + eor v17.16b,v7.16b,v9.16b + ld1 {v3.16b},[x0],#16 + aese v24.16b,v23.16b + // The iv for third block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d9,x9 + fmov v9.d[1],x10 + eor v30.16b,v7.16b,v10.16b + ld1 {v27.16b},[x0],#16 + aese v25.16b,v23.16b + // The iv for fourth block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d10,x9 + fmov v10.d[1],x10 + eor v31.16b,v7.16b,v11.16b + ld1 {v28.16b},[x0],#16 + aese v26.16b,v23.16b + + // The iv for fifth block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d11,x9 + fmov v11.d[1],x10 + + ld1 {v29.16b},[x0],#16 + cbz x6,.Lxts_enc_tail4x + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + eor v4.16b,v4.16b,v0.16b + eor v0.16b,v2.16b,v6.16b + eor v5.16b,v5.16b,v1.16b + eor v1.16b,v3.16b,v8.16b + eor v17.16b,v17.16b,v24.16b + eor v24.16b,v27.16b,v9.16b + eor v30.16b,v30.16b,v25.16b + eor v25.16b,v28.16b,v10.16b + eor v31.16b,v31.16b,v26.16b + st1 {v4.16b},[x1],#16 + eor v26.16b,v29.16b,v11.16b + st1 {v5.16b},[x1],#16 + mov w6,w5 + st1 {v17.16b},[x1],#16 + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v30.16b},[x1],#16 + st1 {v31.16b},[x1],#16 + b.hs .Loop5x_xts_enc + + + // If left 4 blocks, borrow the five block's processing. + cmn x2,#0x10 + b.ne .Loop5x_enc_after + orr v11.16b,v10.16b,v10.16b + orr v10.16b,v9.16b,v9.16b + orr v9.16b,v8.16b,v8.16b + orr v8.16b,v6.16b,v6.16b + fmov x9,d11 + fmov x10,v11.d[1] + eor v0.16b,v6.16b,v2.16b + eor v1.16b,v8.16b,v3.16b + eor v24.16b,v27.16b,v9.16b + eor v25.16b,v28.16b,v10.16b + eor v26.16b,v29.16b,v11.16b + b.eq .Loop5x_xts_enc + +.Loop5x_enc_after: + add x2,x2,#0x50 + cbz x2,.Lxts_enc_done + + add w6,w5,#2 + subs x2,x2,#0x30 + b.lo .Lxts_inner_enc_tail + + eor v0.16b,v6.16b,v27.16b + eor v1.16b,v8.16b,v28.16b + eor v24.16b,v29.16b,v9.16b + b .Lxts_outer_enc_tail + +.align 4 +.Lxts_enc_tail4x: + add x0,x0,#16 + eor v5.16b,v1.16b,v5.16b + st1 {v5.16b},[x1],#16 + eor v17.16b,v24.16b,v17.16b + st1 {v17.16b},[x1],#16 + eor v30.16b,v25.16b,v30.16b + eor v31.16b,v26.16b,v31.16b + st1 {v30.16b,v31.16b},[x1],#32 + + b .Lxts_enc_done +.align 4 +.Lxts_outer_enc_tail: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + ld1 {v17.4s},[x7],#16 + b.gt .Lxts_outer_enc_tail + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + eor v4.16b,v6.16b,v7.16b + subs x2,x2,#0x30 + // The iv for first block + fmov x9,d9 + fmov x10,v9.d[1] + //mov w19,#0x87 + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr#31 + eor x9,x11,x9,lsl#1 + fmov d6,x9 + fmov v6.d[1],x10 + eor v5.16b,v8.16b,v7.16b + csel x6,x2,x6,lo // x6, w6, is zero at this point + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + eor v17.16b,v9.16b,v7.16b + + add x6,x6,#0x20 + add x0,x0,x6 + mov x7,x3 + + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v1.16b,v20.16b + aesmc v1.16b,v1.16b + aese v24.16b,v20.16b + aesmc v24.16b,v24.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v1.16b,v21.16b + aesmc v1.16b,v1.16b + aese v24.16b,v21.16b + aesmc v24.16b,v24.16b + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v1.16b,v22.16b + aesmc v1.16b,v1.16b + aese v24.16b,v22.16b + aesmc v24.16b,v24.16b + aese v0.16b,v23.16b + aese v1.16b,v23.16b + aese v24.16b,v23.16b + ld1 {v27.16b},[x0],#16 + add w6,w5,#2 + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + eor v4.16b,v4.16b,v0.16b + eor v5.16b,v5.16b,v1.16b + eor v24.16b,v24.16b,v17.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v4.16b},[x1],#16 + st1 {v5.16b},[x1],#16 + st1 {v24.16b},[x1],#16 + cmn x2,#0x30 + b.eq .Lxts_enc_done +.Lxts_encxor_one: + orr v28.16b,v3.16b,v3.16b + orr v29.16b,v27.16b,v27.16b + nop + +.Lxts_inner_enc_tail: + cmn x2,#0x10 + eor v1.16b,v28.16b,v6.16b + eor v24.16b,v29.16b,v8.16b + b.eq .Lxts_enc_tail_loop + eor v24.16b,v29.16b,v6.16b +.Lxts_enc_tail_loop: + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + ld1 {v17.4s},[x7],#16 + b.gt .Lxts_enc_tail_loop + + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + aese v1.16b,v20.16b + aesmc v1.16b,v1.16b + aese v24.16b,v20.16b + aesmc v24.16b,v24.16b + cmn x2,#0x20 + aese v1.16b,v21.16b + aesmc v1.16b,v1.16b + aese v24.16b,v21.16b + aesmc v24.16b,v24.16b + eor v5.16b,v6.16b,v7.16b + aese v1.16b,v22.16b + aesmc v1.16b,v1.16b + aese v24.16b,v22.16b + aesmc v24.16b,v24.16b + eor v17.16b,v8.16b,v7.16b + aese v1.16b,v23.16b + aese v24.16b,v23.16b + b.eq .Lxts_enc_one + eor v5.16b,v5.16b,v1.16b + st1 {v5.16b},[x1],#16 + eor v17.16b,v17.16b,v24.16b + orr v6.16b,v8.16b,v8.16b + st1 {v17.16b},[x1],#16 + fmov x9,d8 + fmov x10,v8.d[1] + mov w19,#0x87 + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d6,x9 + fmov v6.d[1],x10 + b .Lxts_enc_done + +.Lxts_enc_one: + eor v5.16b,v5.16b,v24.16b + orr v6.16b,v6.16b,v6.16b + st1 {v5.16b},[x1],#16 + fmov x9,d6 + fmov x10,v6.d[1] + mov w19,#0x87 + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d6,x9 + fmov v6.d[1],x10 + b .Lxts_enc_done +.align 5 +.Lxts_enc_done: + // Process the tail block with cipher stealing. + tst x21,#0xf + b.eq .Lxts_abort + + mov x20,x0 + mov x13,x1 + sub x1,x1,#16 +.composite_enc_loop: + subs x21,x21,#1 + ldrb w15,[x1,x21] + ldrb w14,[x20,x21] + strb w15,[x13,x21] + strb w14,[x1,x21] + b.gt .composite_enc_loop +.Lxts_enc_load_done: + ld1 {v26.16b},[x1] + eor v26.16b,v26.16b,v6.16b + + // Encrypt the composite block to get the last second encrypted text block + ldr w6,[x3,#240] // load key schedule... + ld1 {v0.4s},[x3],#16 + sub w6,w6,#2 + ld1 {v1.4s},[x3],#16 // load key schedule... +.Loop_final_enc: + aese v26.16b,v0.16b + aesmc v26.16b,v26.16b + ld1 {v0.4s},[x3],#16 + subs w6,w6,#2 + aese v26.16b,v1.16b + aesmc v26.16b,v26.16b + ld1 {v1.4s},[x3],#16 + b.gt .Loop_final_enc + + aese v26.16b,v0.16b + aesmc v26.16b,v26.16b + ld1 {v0.4s},[x3] + aese v26.16b,v1.16b + eor v26.16b,v26.16b,v0.16b + eor v26.16b,v26.16b,v6.16b + st1 {v26.16b},[x1] + +.Lxts_abort: + ldp x21,x22,[sp,#48] + ldp d8,d9,[sp,#32] + ldp d10,d11,[sp,#16] + ldp x19,x20,[sp],#64 +.Lxts_enc_final_abort: + ret +.size aes_v8_xts_encrypt,.-aes_v8_xts_encrypt +.globl aes_v8_xts_decrypt +.type aes_v8_xts_decrypt,%function +.align 5 +aes_v8_xts_decrypt: + AARCH64_VALID_CALL_TARGET + cmp x2,#16 + // Original input data size bigger than 16, jump to big size processing. + b.ne .Lxts_dec_big_size + // Encrypt the iv with key2, as the first XEX iv. + ldr w6,[x4,#240] + ld1 {v0.4s},[x4],#16 + ld1 {v6.16b},[x5] + sub w6,w6,#2 + ld1 {v1.4s},[x4],#16 + +.Loop_dec_small_iv_enc: + aese v6.16b,v0.16b + aesmc v6.16b,v6.16b + ld1 {v0.4s},[x4],#16 + subs w6,w6,#2 + aese v6.16b,v1.16b + aesmc v6.16b,v6.16b + ld1 {v1.4s},[x4],#16 + b.gt .Loop_dec_small_iv_enc + + aese v6.16b,v0.16b + aesmc v6.16b,v6.16b + ld1 {v0.4s},[x4] + aese v6.16b,v1.16b + eor v6.16b,v6.16b,v0.16b + + ld1 {v0.16b},[x0] + eor v0.16b,v6.16b,v0.16b + + ldr w6,[x3,#240] + ld1 {v28.4s,v29.4s},[x3],#32 // load key schedule... + + aesd v0.16b,v28.16b + aesimc v0.16b,v0.16b + ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule... + aesd v0.16b,v29.16b + aesimc v0.16b,v0.16b + subs w6,w6,#10 // bias + b.eq .Lxts_128_dec +.Lxts_dec_round_loop: + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + ld1 {v16.4s},[x3],#16 // load key schedule... + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + ld1 {v17.4s},[x3],#16 // load key schedule... + subs w6,w6,#2 // bias + b.gt .Lxts_dec_round_loop +.Lxts_128_dec: + ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule... + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule... + aesd v0.16b,v18.16b + aesimc v0.16b,v0.16b + aesd v0.16b,v19.16b + aesimc v0.16b,v0.16b + ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule... + aesd v0.16b,v20.16b + aesimc v0.16b,v0.16b + aesd v0.16b,v21.16b + aesimc v0.16b,v0.16b + ld1 {v7.4s},[x3] + aesd v0.16b,v22.16b + aesimc v0.16b,v0.16b + aesd v0.16b,v23.16b + eor v0.16b,v0.16b,v7.16b + eor v0.16b,v6.16b,v0.16b + st1 {v0.16b},[x1] + b .Lxts_dec_final_abort +.Lxts_dec_big_size: + stp x19,x20,[sp,#-64]! + stp x21,x22,[sp,#48] + stp d8,d9,[sp,#32] + stp d10,d11,[sp,#16] + + and x21,x2,#0xf + and x2,x2,#-16 + subs x2,x2,#16 + mov x8,#16 + b.lo .Lxts_dec_abort + + // Encrypt the iv with key2, as the first XEX iv + ldr w6,[x4,#240] + ld1 {v0.4s},[x4],#16 + ld1 {v6.16b},[x5] + sub w6,w6,#2 + ld1 {v1.4s},[x4],#16 + +.Loop_dec_iv_enc: + aese v6.16b,v0.16b + aesmc v6.16b,v6.16b + ld1 {v0.4s},[x4],#16 + subs w6,w6,#2 + aese v6.16b,v1.16b + aesmc v6.16b,v6.16b + ld1 {v1.4s},[x4],#16 + b.gt .Loop_dec_iv_enc + + aese v6.16b,v0.16b + aesmc v6.16b,v6.16b + ld1 {v0.4s},[x4] + aese v6.16b,v1.16b + eor v6.16b,v6.16b,v0.16b + + // The iv for second block + // x9- iv(low), x10 - iv(high) + // the five ivs stored into, v6.16b,v8.16b,v9.16b,v10.16b,v11.16b + fmov x9,d6 + fmov x10,v6.d[1] + mov w19,#0x87 + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d8,x9 + fmov v8.d[1],x10 + + ldr w5,[x3,#240] // load rounds number + + // The iv for third block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d9,x9 + fmov v9.d[1],x10 + + ld1 {v16.4s,v17.4s},[x3] // load key schedule... + sub w5,w5,#6 + add x7,x3,x5,lsl#4 // pointer to last 7 round keys + sub w5,w5,#2 + ld1 {v18.4s,v19.4s},[x7],#32 // load key schedule... + ld1 {v20.4s,v21.4s},[x7],#32 + ld1 {v22.4s,v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + + // The iv for fourth block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d10,x9 + fmov v10.d[1],x10 + + add x7,x3,#32 + mov w6,w5 + b .Lxts_dec + + // Decryption +.align 5 +.Lxts_dec: + tst x21,#0xf + b.eq .Lxts_dec_begin + subs x2,x2,#16 + csel x8,xzr,x8,eq + ld1 {v0.16b},[x0],#16 + b.lo .Lxts_done + sub x0,x0,#16 +.Lxts_dec_begin: + ld1 {v0.16b},[x0],x8 + subs x2,x2,#32 // bias + add w6,w5,#2 + orr v3.16b,v0.16b,v0.16b + orr v1.16b,v0.16b,v0.16b + orr v28.16b,v0.16b,v0.16b + ld1 {v24.16b},[x0],#16 + orr v27.16b,v24.16b,v24.16b + orr v29.16b,v24.16b,v24.16b + b.lo .Lxts_inner_dec_tail + eor v0.16b,v0.16b,v6.16b // before decryt, xor with iv + eor v24.16b,v24.16b,v8.16b + + orr v1.16b,v24.16b,v24.16b + ld1 {v24.16b},[x0],#16 + orr v2.16b,v0.16b,v0.16b + orr v3.16b,v1.16b,v1.16b + eor v27.16b,v24.16b,v9.16b // third block xox with third iv + eor v24.16b,v24.16b,v9.16b + cmp x2,#32 + b.lo .Lxts_outer_dec_tail + + ld1 {v25.16b},[x0],#16 + + // The iv for fifth block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d11,x9 + fmov v11.d[1],x10 + + ld1 {v26.16b},[x0],#16 + eor v25.16b,v25.16b,v10.16b // the fourth block + eor v26.16b,v26.16b,v11.16b + sub x2,x2,#32 // bias + mov w6,w5 + b .Loop5x_xts_dec + +.align 4 +.Loop5x_xts_dec: + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v16.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v16.16b + aesimc v26.16b,v26.16b + ld1 {v16.4s},[x7],#16 // load key schedule... + subs w6,w6,#2 + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v17.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v17.16b + aesimc v26.16b,v26.16b + ld1 {v17.4s},[x7],#16 // load key schedule... + b.gt .Loop5x_xts_dec + + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v16.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v16.16b + aesimc v26.16b,v26.16b + subs x2,x2,#0x50 // because .Lxts_dec_tail4x + + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v17.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v17.16b + aesimc v26.16b,v26.16b + csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo + mov x7,x3 + + aesd v0.16b,v18.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v18.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v18.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v18.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v18.16b + aesimc v26.16b,v26.16b + add x0,x0,x6 // x0 is adjusted in such way that + // at exit from the loop v1.16b-v26.16b + // are loaded with last "words" + add x6,x2,#0x60 // because .Lxts_dec_tail4x + + aesd v0.16b,v19.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v19.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v19.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v19.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v19.16b + aesimc v26.16b,v26.16b + + aesd v0.16b,v20.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v20.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v20.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v20.16b + aesimc v26.16b,v26.16b + + aesd v0.16b,v21.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v21.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v21.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v21.16b + aesimc v26.16b,v26.16b + + aesd v0.16b,v22.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v22.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v22.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v22.16b + aesimc v26.16b,v26.16b + + eor v4.16b,v7.16b,v6.16b + aesd v0.16b,v23.16b + // The iv for first block of next iteration. + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d6,x9 + fmov v6.d[1],x10 + eor v5.16b,v7.16b,v8.16b + ld1 {v2.16b},[x0],#16 + aesd v1.16b,v23.16b + // The iv for second block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d8,x9 + fmov v8.d[1],x10 + eor v17.16b,v7.16b,v9.16b + ld1 {v3.16b},[x0],#16 + aesd v24.16b,v23.16b + // The iv for third block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d9,x9 + fmov v9.d[1],x10 + eor v30.16b,v7.16b,v10.16b + ld1 {v27.16b},[x0],#16 + aesd v25.16b,v23.16b + // The iv for fourth block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d10,x9 + fmov v10.d[1],x10 + eor v31.16b,v7.16b,v11.16b + ld1 {v28.16b},[x0],#16 + aesd v26.16b,v23.16b + + // The iv for fifth block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d11,x9 + fmov v11.d[1],x10 + + ld1 {v29.16b},[x0],#16 + cbz x6,.Lxts_dec_tail4x + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + eor v4.16b,v4.16b,v0.16b + eor v0.16b,v2.16b,v6.16b + eor v5.16b,v5.16b,v1.16b + eor v1.16b,v3.16b,v8.16b + eor v17.16b,v17.16b,v24.16b + eor v24.16b,v27.16b,v9.16b + eor v30.16b,v30.16b,v25.16b + eor v25.16b,v28.16b,v10.16b + eor v31.16b,v31.16b,v26.16b + st1 {v4.16b},[x1],#16 + eor v26.16b,v29.16b,v11.16b + st1 {v5.16b},[x1],#16 + mov w6,w5 + st1 {v17.16b},[x1],#16 + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v30.16b},[x1],#16 + st1 {v31.16b},[x1],#16 + b.hs .Loop5x_xts_dec + + cmn x2,#0x10 + b.ne .Loop5x_dec_after + // If x2(x2) equal to -0x10, the left blocks is 4. + // After specially processing, utilize the five blocks processing again. + // It will use the following IVs: v6.16b,v6.16b,v8.16b,v9.16b,v10.16b. + orr v11.16b,v10.16b,v10.16b + orr v10.16b,v9.16b,v9.16b + orr v9.16b,v8.16b,v8.16b + orr v8.16b,v6.16b,v6.16b + fmov x9,d11 + fmov x10,v11.d[1] + eor v0.16b,v6.16b,v2.16b + eor v1.16b,v8.16b,v3.16b + eor v24.16b,v27.16b,v9.16b + eor v25.16b,v28.16b,v10.16b + eor v26.16b,v29.16b,v11.16b + b.eq .Loop5x_xts_dec + +.Loop5x_dec_after: + add x2,x2,#0x50 + cbz x2,.Lxts_done + + add w6,w5,#2 + subs x2,x2,#0x30 + b.lo .Lxts_inner_dec_tail + + eor v0.16b,v6.16b,v27.16b + eor v1.16b,v8.16b,v28.16b + eor v24.16b,v29.16b,v9.16b + b .Lxts_outer_dec_tail + +.align 4 +.Lxts_dec_tail4x: + add x0,x0,#16 + tst x21,#0xf + eor v5.16b,v1.16b,v4.16b + st1 {v5.16b},[x1],#16 + eor v17.16b,v24.16b,v17.16b + st1 {v17.16b},[x1],#16 + eor v30.16b,v25.16b,v30.16b + eor v31.16b,v26.16b,v31.16b + st1 {v30.16b,v31.16b},[x1],#32 + + b.eq .Lxts_dec_abort + ld1 {v0.16b},[x0],#16 + b .Lxts_done +.align 4 +.Lxts_outer_dec_tail: + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + ld1 {v17.4s},[x7],#16 + b.gt .Lxts_outer_dec_tail + + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + eor v4.16b,v6.16b,v7.16b + subs x2,x2,#0x30 + // The iv for first block + fmov x9,d9 + fmov x10,v9.d[1] + mov w19,#0x87 + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d6,x9 + fmov v6.d[1],x10 + eor v5.16b,v8.16b,v7.16b + csel x6,x2,x6,lo // x6, w6, is zero at this point + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + eor v17.16b,v9.16b,v7.16b + // The iv for second block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d8,x9 + fmov v8.d[1],x10 + + add x6,x6,#0x20 + add x0,x0,x6 // x0 is adjusted to the last data + + mov x7,x3 + + // The iv for third block + extr x22,x10,x10,#32 + extr x10,x10,x9,#63 + and w11,w19,w22,asr #31 + eor x9,x11,x9,lsl #1 + fmov d9,x9 + fmov v9.d[1],x10 + + aesd v0.16b,v20.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v20.16b + aesimc v24.16b,v24.16b + aesd v0.16b,v21.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v21.16b + aesimc v24.16b,v24.16b + aesd v0.16b,v22.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v22.16b + aesimc v24.16b,v24.16b + ld1 {v27.16b},[x0],#16 + aesd v0.16b,v23.16b + aesd v1.16b,v23.16b + aesd v24.16b,v23.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + add w6,w5,#2 + eor v4.16b,v4.16b,v0.16b + eor v5.16b,v5.16b,v1.16b + eor v24.16b,v24.16b,v17.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v4.16b},[x1],#16 + st1 {v5.16b},[x1],#16 + st1 {v24.16b},[x1],#16 + + cmn x2,#0x30 + add x2,x2,#0x30 + b.eq .Lxts_done + sub x2,x2,#0x30 + orr v28.16b,v3.16b,v3.16b + orr v29.16b,v27.16b,v27.16b + nop + +.Lxts_inner_dec_tail: + // x2 == -0x10 means two blocks left. + cmn x2,#0x10 + eor v1.16b,v28.16b,v6.16b + eor v24.16b,v29.16b,v8.16b + b.eq .Lxts_dec_tail_loop + eor v24.16b,v29.16b,v6.16b +.Lxts_dec_tail_loop: + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + ld1 {v17.4s},[x7],#16 + b.gt .Lxts_dec_tail_loop + + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v20.16b + aesimc v24.16b,v24.16b + cmn x2,#0x20 + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v21.16b + aesimc v24.16b,v24.16b + eor v5.16b,v6.16b,v7.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v22.16b + aesimc v24.16b,v24.16b + eor v17.16b,v8.16b,v7.16b + aesd v1.16b,v23.16b + aesd v24.16b,v23.16b + b.eq .Lxts_dec_one + eor v5.16b,v5.16b,v1.16b + eor v17.16b,v17.16b,v24.16b + orr v6.16b,v9.16b,v9.16b + orr v8.16b,v10.16b,v10.16b + st1 {v5.16b},[x1],#16 + st1 {v17.16b},[x1],#16 + add x2,x2,#16 + b .Lxts_done + +.Lxts_dec_one: + eor v5.16b,v5.16b,v24.16b + orr v6.16b,v8.16b,v8.16b + orr v8.16b,v9.16b,v9.16b + st1 {v5.16b},[x1],#16 + add x2,x2,#32 + +.Lxts_done: + tst x21,#0xf + b.eq .Lxts_dec_abort + // Processing the last two blocks with cipher stealing. + mov x7,x3 + cbnz x2,.Lxts_dec_1st_done + ld1 {v0.16b},[x0],#16 + + // Decrypt the last second block to get the last plain text block +.Lxts_dec_1st_done: + eor v26.16b,v0.16b,v8.16b + ldr w6,[x3,#240] + ld1 {v0.4s},[x3],#16 + sub w6,w6,#2 + ld1 {v1.4s},[x3],#16 +.Loop_final_2nd_dec: + aesd v26.16b,v0.16b + aesimc v26.16b,v26.16b + ld1 {v0.4s},[x3],#16 // load key schedule... + subs w6,w6,#2 + aesd v26.16b,v1.16b + aesimc v26.16b,v26.16b + ld1 {v1.4s},[x3],#16 // load key schedule... + b.gt .Loop_final_2nd_dec + + aesd v26.16b,v0.16b + aesimc v26.16b,v26.16b + ld1 {v0.4s},[x3] + aesd v26.16b,v1.16b + eor v26.16b,v26.16b,v0.16b + eor v26.16b,v26.16b,v8.16b + st1 {v26.16b},[x1] + + mov x20,x0 + add x13,x1,#16 + + // Composite the tailcnt "16 byte not aligned block" into the last second plain blocks + // to get the last encrypted block. +.composite_dec_loop: + subs x21,x21,#1 + ldrb w15,[x1,x21] + ldrb w14,[x20,x21] + strb w15,[x13,x21] + strb w14,[x1,x21] + b.gt .composite_dec_loop +.Lxts_dec_load_done: + ld1 {v26.16b},[x1] + eor v26.16b,v26.16b,v6.16b + + // Decrypt the composite block to get the last second plain text block + ldr w6,[x7,#240] + ld1 {v0.4s},[x7],#16 + sub w6,w6,#2 + ld1 {v1.4s},[x7],#16 +.Loop_final_dec: + aesd v26.16b,v0.16b + aesimc v26.16b,v26.16b + ld1 {v0.4s},[x7],#16 // load key schedule... + subs w6,w6,#2 + aesd v26.16b,v1.16b + aesimc v26.16b,v26.16b + ld1 {v1.4s},[x7],#16 // load key schedule... + b.gt .Loop_final_dec + + aesd v26.16b,v0.16b + aesimc v26.16b,v26.16b + ld1 {v0.4s},[x7] + aesd v26.16b,v1.16b + eor v26.16b,v26.16b,v0.16b + eor v26.16b,v26.16b,v6.16b + st1 {v26.16b},[x1] + +.Lxts_dec_abort: + ldp x21,x22,[sp,#48] + ldp d8,d9,[sp,#32] + ldp d10,d11,[sp,#16] + ldp x19,x20,[sp],#64 + +.Lxts_dec_final_abort: + ret +.size aes_v8_xts_decrypt,.-aes_v8_xts_decrypt +#endif diff --git a/contrib/openssl-cmake/asm/crypto/aes/bsaes-armv8.S b/contrib/openssl-cmake/asm/crypto/aes/bsaes-armv8.S new file mode 100644 index 000000000000..536cbdce0463 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/aes/bsaes-armv8.S @@ -0,0 +1,2355 @@ +// Copyright 2021-2025 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the OpenSSL license (the "License"). You may not use +// this file except in compliance with the License. You can obtain a copy +// in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html +// +// ==================================================================== +// Written by Ben Avison for the OpenSSL +// project. Rights for redistribution and usage in source and binary +// forms are granted according to the OpenSSL license. +// ==================================================================== +// +// This implementation is a translation of bsaes-armv7 for AArch64. +// No attempt has been made to carry across the build switches for +// kernel targets, since the Linux kernel crypto support has moved on +// from when it was based on OpenSSL. + +// A lot of hand-scheduling has been performed. Consequently, this code +// doesn't factor out neatly into macros in the same way that the +// AArch32 version did, and there is little to be gained by wrapping it +// up in Perl, and it is presented as pure assembly. + + +#include "crypto/arm_arch.h" + +.text + + + + + +.type _bsaes_decrypt8,%function +.align 4 +// On entry: +// x9 -> key (previously expanded using _bsaes_key_convert) +// x10 = number of rounds +// v0-v7 input data +// On exit: +// x9-x11 corrupted +// other general-purpose registers preserved +// v0-v7 output data +// v11-v15 preserved +// other SIMD registers corrupted +_bsaes_decrypt8: + ldr q8, [x9], #16 + adrp x11, .LM0ISR + add x11, x11, #:lo12:.LM0ISR + movi v9.16b, #0x55 + ldr q10, [x11], #16 + movi v16.16b, #0x33 + movi v17.16b, #0x0f + sub x10, x10, #1 + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v8.16b + eor v2.16b, v2.16b, v8.16b + eor v4.16b, v4.16b, v8.16b + eor v3.16b, v3.16b, v8.16b + eor v5.16b, v5.16b, v8.16b + tbl v0.16b, {v0.16b}, v10.16b + tbl v1.16b, {v1.16b}, v10.16b + tbl v2.16b, {v2.16b}, v10.16b + tbl v4.16b, {v4.16b}, v10.16b + eor v6.16b, v6.16b, v8.16b + eor v7.16b, v7.16b, v8.16b + tbl v3.16b, {v3.16b}, v10.16b + tbl v5.16b, {v5.16b}, v10.16b + tbl v6.16b, {v6.16b}, v10.16b + ushr v8.2d, v0.2d, #1 + tbl v7.16b, {v7.16b}, v10.16b + ushr v10.2d, v4.2d, #1 + ushr v18.2d, v2.2d, #1 + eor v8.16b, v8.16b, v1.16b + ushr v19.2d, v6.2d, #1 + eor v10.16b, v10.16b, v5.16b + eor v18.16b, v18.16b, v3.16b + and v8.16b, v8.16b, v9.16b + eor v19.16b, v19.16b, v7.16b + and v10.16b, v10.16b, v9.16b + and v18.16b, v18.16b, v9.16b + eor v1.16b, v1.16b, v8.16b + shl v8.2d, v8.2d, #1 + and v9.16b, v19.16b, v9.16b + eor v5.16b, v5.16b, v10.16b + shl v10.2d, v10.2d, #1 + eor v3.16b, v3.16b, v18.16b + shl v18.2d, v18.2d, #1 + eor v0.16b, v0.16b, v8.16b + shl v8.2d, v9.2d, #1 + eor v7.16b, v7.16b, v9.16b + eor v4.16b, v4.16b, v10.16b + eor v2.16b, v2.16b, v18.16b + ushr v9.2d, v1.2d, #2 + eor v6.16b, v6.16b, v8.16b + ushr v8.2d, v0.2d, #2 + ushr v10.2d, v5.2d, #2 + ushr v18.2d, v4.2d, #2 + eor v9.16b, v9.16b, v3.16b + eor v8.16b, v8.16b, v2.16b + eor v10.16b, v10.16b, v7.16b + eor v18.16b, v18.16b, v6.16b + and v9.16b, v9.16b, v16.16b + and v8.16b, v8.16b, v16.16b + and v10.16b, v10.16b, v16.16b + and v16.16b, v18.16b, v16.16b + eor v3.16b, v3.16b, v9.16b + shl v9.2d, v9.2d, #2 + eor v2.16b, v2.16b, v8.16b + shl v8.2d, v8.2d, #2 + eor v7.16b, v7.16b, v10.16b + shl v10.2d, v10.2d, #2 + eor v6.16b, v6.16b, v16.16b + shl v16.2d, v16.2d, #2 + eor v1.16b, v1.16b, v9.16b + eor v0.16b, v0.16b, v8.16b + eor v5.16b, v5.16b, v10.16b + eor v4.16b, v4.16b, v16.16b + ushr v8.2d, v3.2d, #4 + ushr v9.2d, v2.2d, #4 + ushr v10.2d, v1.2d, #4 + ushr v16.2d, v0.2d, #4 + eor v8.16b, v8.16b, v7.16b + eor v9.16b, v9.16b, v6.16b + eor v10.16b, v10.16b, v5.16b + eor v16.16b, v16.16b, v4.16b + and v8.16b, v8.16b, v17.16b + and v9.16b, v9.16b, v17.16b + and v10.16b, v10.16b, v17.16b + and v16.16b, v16.16b, v17.16b + eor v7.16b, v7.16b, v8.16b + shl v8.2d, v8.2d, #4 + eor v6.16b, v6.16b, v9.16b + shl v9.2d, v9.2d, #4 + eor v5.16b, v5.16b, v10.16b + shl v10.2d, v10.2d, #4 + eor v4.16b, v4.16b, v16.16b + shl v16.2d, v16.2d, #4 + eor v3.16b, v3.16b, v8.16b + eor v2.16b, v2.16b, v9.16b + eor v1.16b, v1.16b, v10.16b + eor v0.16b, v0.16b, v16.16b + b .Ldec_sbox +.align 4 +.Ldec_loop: + ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [x9], #64 + ldp q8, q9, [x9], #32 + eor v0.16b, v16.16b, v0.16b + ldr q10, [x9], #16 + eor v1.16b, v17.16b, v1.16b + ldr q16, [x9], #16 + eor v2.16b, v18.16b, v2.16b + eor v3.16b, v19.16b, v3.16b + eor v4.16b, v8.16b, v4.16b + eor v5.16b, v9.16b, v5.16b + eor v6.16b, v10.16b, v6.16b + eor v7.16b, v16.16b, v7.16b + tbl v0.16b, {v0.16b}, v28.16b + tbl v1.16b, {v1.16b}, v28.16b + tbl v2.16b, {v2.16b}, v28.16b + tbl v3.16b, {v3.16b}, v28.16b + tbl v4.16b, {v4.16b}, v28.16b + tbl v5.16b, {v5.16b}, v28.16b + tbl v6.16b, {v6.16b}, v28.16b + tbl v7.16b, {v7.16b}, v28.16b +.Ldec_sbox: + eor v1.16b, v1.16b, v4.16b + eor v3.16b, v3.16b, v4.16b + subs x10, x10, #1 + eor v4.16b, v4.16b, v7.16b + eor v2.16b, v2.16b, v7.16b + eor v1.16b, v1.16b, v6.16b + eor v6.16b, v6.16b, v4.16b + eor v2.16b, v2.16b, v5.16b + eor v0.16b, v0.16b, v1.16b + eor v7.16b, v7.16b, v6.16b + eor v8.16b, v6.16b, v2.16b + and v9.16b, v4.16b, v6.16b + eor v10.16b, v2.16b, v6.16b + eor v3.16b, v3.16b, v0.16b + eor v5.16b, v5.16b, v0.16b + eor v16.16b, v7.16b, v4.16b + eor v17.16b, v4.16b, v0.16b + and v18.16b, v0.16b, v2.16b + eor v19.16b, v7.16b, v4.16b + eor v1.16b, v1.16b, v3.16b + eor v20.16b, v3.16b, v0.16b + eor v21.16b, v5.16b, v2.16b + eor v22.16b, v3.16b, v7.16b + and v8.16b, v17.16b, v8.16b + orr v17.16b, v3.16b, v5.16b + eor v23.16b, v1.16b, v6.16b + eor v24.16b, v20.16b, v16.16b + eor v25.16b, v1.16b, v5.16b + orr v26.16b, v20.16b, v21.16b + and v20.16b, v20.16b, v21.16b + and v27.16b, v7.16b, v1.16b + eor v21.16b, v21.16b, v23.16b + orr v28.16b, v16.16b, v23.16b + orr v29.16b, v22.16b, v25.16b + eor v26.16b, v26.16b, v8.16b + and v16.16b, v16.16b, v23.16b + and v22.16b, v22.16b, v25.16b + and v21.16b, v24.16b, v21.16b + eor v8.16b, v28.16b, v8.16b + eor v23.16b, v5.16b, v2.16b + eor v24.16b, v1.16b, v6.16b + eor v16.16b, v16.16b, v22.16b + eor v22.16b, v3.16b, v0.16b + eor v25.16b, v29.16b, v21.16b + eor v21.16b, v26.16b, v21.16b + eor v8.16b, v8.16b, v20.16b + eor v26.16b, v23.16b, v24.16b + eor v16.16b, v16.16b, v20.16b + eor v28.16b, v22.16b, v19.16b + eor v20.16b, v25.16b, v20.16b + eor v9.16b, v21.16b, v9.16b + eor v8.16b, v8.16b, v18.16b + eor v18.16b, v5.16b, v1.16b + eor v21.16b, v16.16b, v17.16b + eor v16.16b, v16.16b, v17.16b + eor v17.16b, v20.16b, v27.16b + eor v20.16b, v3.16b, v7.16b + eor v25.16b, v9.16b, v8.16b + eor v27.16b, v0.16b, v4.16b + and v29.16b, v9.16b, v17.16b + eor v30.16b, v8.16b, v29.16b + eor v31.16b, v21.16b, v29.16b + eor v29.16b, v21.16b, v29.16b + bsl v30.16b, v17.16b, v21.16b + bsl v31.16b, v9.16b, v8.16b + bsl v16.16b, v30.16b, v29.16b + bsl v21.16b, v29.16b, v30.16b + eor v8.16b, v31.16b, v30.16b + and v1.16b, v1.16b, v31.16b + and v9.16b, v16.16b, v31.16b + and v6.16b, v6.16b, v30.16b + eor v16.16b, v17.16b, v21.16b + and v4.16b, v4.16b, v30.16b + eor v17.16b, v8.16b, v30.16b + and v21.16b, v24.16b, v8.16b + eor v9.16b, v9.16b, v25.16b + and v19.16b, v19.16b, v8.16b + eor v24.16b, v30.16b, v16.16b + eor v25.16b, v30.16b, v16.16b + and v7.16b, v7.16b, v17.16b + and v10.16b, v10.16b, v16.16b + eor v29.16b, v9.16b, v16.16b + eor v30.16b, v31.16b, v9.16b + and v0.16b, v24.16b, v0.16b + and v9.16b, v18.16b, v9.16b + and v2.16b, v25.16b, v2.16b + eor v10.16b, v10.16b, v6.16b + eor v18.16b, v29.16b, v16.16b + and v5.16b, v30.16b, v5.16b + eor v24.16b, v8.16b, v29.16b + and v25.16b, v26.16b, v29.16b + and v26.16b, v28.16b, v29.16b + eor v8.16b, v8.16b, v29.16b + eor v17.16b, v17.16b, v18.16b + eor v5.16b, v1.16b, v5.16b + and v23.16b, v24.16b, v23.16b + eor v21.16b, v21.16b, v25.16b + eor v19.16b, v19.16b, v26.16b + eor v0.16b, v4.16b, v0.16b + and v3.16b, v17.16b, v3.16b + eor v1.16b, v9.16b, v1.16b + eor v9.16b, v25.16b, v23.16b + eor v5.16b, v5.16b, v21.16b + eor v2.16b, v6.16b, v2.16b + and v6.16b, v8.16b, v22.16b + eor v3.16b, v7.16b, v3.16b + and v8.16b, v20.16b, v18.16b + eor v10.16b, v10.16b, v9.16b + eor v0.16b, v0.16b, v19.16b + eor v9.16b, v1.16b, v9.16b + eor v1.16b, v2.16b, v21.16b + eor v3.16b, v3.16b, v19.16b + and v16.16b, v27.16b, v16.16b + eor v17.16b, v26.16b, v6.16b + eor v6.16b, v8.16b, v7.16b + eor v7.16b, v1.16b, v9.16b + eor v1.16b, v5.16b, v3.16b + eor v2.16b, v10.16b, v3.16b + eor v4.16b, v16.16b, v4.16b + eor v8.16b, v6.16b, v17.16b + eor v5.16b, v9.16b, v3.16b + eor v9.16b, v0.16b, v1.16b + eor v6.16b, v7.16b, v1.16b + eor v0.16b, v4.16b, v17.16b + eor v4.16b, v8.16b, v7.16b + eor v7.16b, v9.16b, v2.16b + eor v8.16b, v3.16b, v0.16b + eor v7.16b, v7.16b, v5.16b + eor v3.16b, v4.16b, v7.16b + eor v4.16b, v7.16b, v0.16b + eor v7.16b, v8.16b, v3.16b + bcc .Ldec_done + ext v8.16b, v0.16b, v0.16b, #8 + ext v9.16b, v1.16b, v1.16b, #8 + ldr q28, [x11] // load from .LISR in common case (x10 > 0) + ext v10.16b, v6.16b, v6.16b, #8 + ext v16.16b, v3.16b, v3.16b, #8 + ext v17.16b, v5.16b, v5.16b, #8 + ext v18.16b, v4.16b, v4.16b, #8 + eor v8.16b, v8.16b, v0.16b + eor v9.16b, v9.16b, v1.16b + eor v10.16b, v10.16b, v6.16b + eor v16.16b, v16.16b, v3.16b + eor v17.16b, v17.16b, v5.16b + ext v19.16b, v2.16b, v2.16b, #8 + ext v20.16b, v7.16b, v7.16b, #8 + eor v18.16b, v18.16b, v4.16b + eor v6.16b, v6.16b, v8.16b + eor v8.16b, v2.16b, v10.16b + eor v4.16b, v4.16b, v9.16b + eor v2.16b, v19.16b, v2.16b + eor v9.16b, v20.16b, v7.16b + eor v0.16b, v0.16b, v16.16b + eor v1.16b, v1.16b, v16.16b + eor v6.16b, v6.16b, v17.16b + eor v8.16b, v8.16b, v16.16b + eor v7.16b, v7.16b, v18.16b + eor v4.16b, v4.16b, v16.16b + eor v2.16b, v3.16b, v2.16b + eor v1.16b, v1.16b, v17.16b + eor v3.16b, v5.16b, v9.16b + eor v5.16b, v8.16b, v17.16b + eor v7.16b, v7.16b, v17.16b + ext v8.16b, v0.16b, v0.16b, #12 + ext v9.16b, v6.16b, v6.16b, #12 + ext v10.16b, v4.16b, v4.16b, #12 + ext v16.16b, v1.16b, v1.16b, #12 + ext v17.16b, v5.16b, v5.16b, #12 + ext v18.16b, v7.16b, v7.16b, #12 + eor v0.16b, v0.16b, v8.16b + eor v6.16b, v6.16b, v9.16b + eor v4.16b, v4.16b, v10.16b + ext v19.16b, v2.16b, v2.16b, #12 + ext v20.16b, v3.16b, v3.16b, #12 + eor v1.16b, v1.16b, v16.16b + eor v5.16b, v5.16b, v17.16b + eor v7.16b, v7.16b, v18.16b + eor v2.16b, v2.16b, v19.16b + eor v16.16b, v16.16b, v0.16b + eor v3.16b, v3.16b, v20.16b + eor v17.16b, v17.16b, v4.16b + eor v10.16b, v10.16b, v6.16b + ext v0.16b, v0.16b, v0.16b, #8 + eor v9.16b, v9.16b, v1.16b + ext v1.16b, v1.16b, v1.16b, #8 + eor v8.16b, v8.16b, v3.16b + eor v16.16b, v16.16b, v3.16b + eor v18.16b, v18.16b, v5.16b + eor v19.16b, v19.16b, v7.16b + ext v21.16b, v5.16b, v5.16b, #8 + ext v5.16b, v7.16b, v7.16b, #8 + eor v7.16b, v20.16b, v2.16b + ext v4.16b, v4.16b, v4.16b, #8 + ext v20.16b, v3.16b, v3.16b, #8 + eor v17.16b, v17.16b, v3.16b + ext v2.16b, v2.16b, v2.16b, #8 + eor v3.16b, v10.16b, v3.16b + ext v10.16b, v6.16b, v6.16b, #8 + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v16.16b + eor v5.16b, v5.16b, v18.16b + eor v3.16b, v3.16b, v4.16b + eor v7.16b, v20.16b, v7.16b + eor v6.16b, v2.16b, v19.16b + eor v4.16b, v21.16b, v17.16b + eor v2.16b, v10.16b, v9.16b + bne .Ldec_loop + ldr q28, [x11, #16]! // load from .LISRM0 on last round (x10 == 0) + b .Ldec_loop +.align 4 +.Ldec_done: + ushr v8.2d, v0.2d, #1 + movi v9.16b, #0x55 + ldr q10, [x9] + ushr v16.2d, v2.2d, #1 + movi v17.16b, #0x33 + ushr v18.2d, v6.2d, #1 + movi v19.16b, #0x0f + eor v8.16b, v8.16b, v1.16b + ushr v20.2d, v3.2d, #1 + eor v16.16b, v16.16b, v7.16b + eor v18.16b, v18.16b, v4.16b + and v8.16b, v8.16b, v9.16b + eor v20.16b, v20.16b, v5.16b + and v16.16b, v16.16b, v9.16b + and v18.16b, v18.16b, v9.16b + shl v21.2d, v8.2d, #1 + eor v1.16b, v1.16b, v8.16b + and v8.16b, v20.16b, v9.16b + eor v7.16b, v7.16b, v16.16b + shl v9.2d, v16.2d, #1 + eor v4.16b, v4.16b, v18.16b + shl v16.2d, v18.2d, #1 + eor v0.16b, v0.16b, v21.16b + shl v18.2d, v8.2d, #1 + eor v5.16b, v5.16b, v8.16b + eor v2.16b, v2.16b, v9.16b + eor v6.16b, v6.16b, v16.16b + ushr v8.2d, v1.2d, #2 + eor v3.16b, v3.16b, v18.16b + ushr v9.2d, v0.2d, #2 + ushr v16.2d, v7.2d, #2 + ushr v18.2d, v2.2d, #2 + eor v8.16b, v8.16b, v4.16b + eor v9.16b, v9.16b, v6.16b + eor v16.16b, v16.16b, v5.16b + eor v18.16b, v18.16b, v3.16b + and v8.16b, v8.16b, v17.16b + and v9.16b, v9.16b, v17.16b + and v16.16b, v16.16b, v17.16b + and v17.16b, v18.16b, v17.16b + eor v4.16b, v4.16b, v8.16b + shl v8.2d, v8.2d, #2 + eor v6.16b, v6.16b, v9.16b + shl v9.2d, v9.2d, #2 + eor v5.16b, v5.16b, v16.16b + shl v16.2d, v16.2d, #2 + eor v3.16b, v3.16b, v17.16b + shl v17.2d, v17.2d, #2 + eor v1.16b, v1.16b, v8.16b + eor v0.16b, v0.16b, v9.16b + eor v7.16b, v7.16b, v16.16b + eor v2.16b, v2.16b, v17.16b + ushr v8.2d, v4.2d, #4 + ushr v9.2d, v6.2d, #4 + ushr v16.2d, v1.2d, #4 + ushr v17.2d, v0.2d, #4 + eor v8.16b, v8.16b, v5.16b + eor v9.16b, v9.16b, v3.16b + eor v16.16b, v16.16b, v7.16b + eor v17.16b, v17.16b, v2.16b + and v8.16b, v8.16b, v19.16b + and v9.16b, v9.16b, v19.16b + and v16.16b, v16.16b, v19.16b + and v17.16b, v17.16b, v19.16b + eor v5.16b, v5.16b, v8.16b + shl v8.2d, v8.2d, #4 + eor v3.16b, v3.16b, v9.16b + shl v9.2d, v9.2d, #4 + eor v7.16b, v7.16b, v16.16b + shl v16.2d, v16.2d, #4 + eor v2.16b, v2.16b, v17.16b + shl v17.2d, v17.2d, #4 + eor v4.16b, v4.16b, v8.16b + eor v6.16b, v6.16b, v9.16b + eor v7.16b, v7.16b, v10.16b + eor v1.16b, v1.16b, v16.16b + eor v2.16b, v2.16b, v10.16b + eor v0.16b, v0.16b, v17.16b + eor v4.16b, v4.16b, v10.16b + eor v6.16b, v6.16b, v10.16b + eor v3.16b, v3.16b, v10.16b + eor v5.16b, v5.16b, v10.16b + eor v1.16b, v1.16b, v10.16b + eor v0.16b, v0.16b, v10.16b + ret +.size _bsaes_decrypt8,.-_bsaes_decrypt8 + +.section .rodata +.type _bsaes_consts,%object +.align 6 +_bsaes_consts: +// InvShiftRows constants +// Used in _bsaes_decrypt8, which assumes contiguity +// .LM0ISR used with round 0 key +// .LISR used with middle round keys +// .LISRM0 used with final round key +.LM0ISR: +.quad 0x0a0e0206070b0f03, 0x0004080c0d010509 +.LISR: +.quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +.LISRM0: +.quad 0x01040b0e0205080f, 0x0306090c00070a0d + +// ShiftRows constants +// Used in _bsaes_encrypt8, which assumes contiguity +// .LM0SR used with round 0 key +// .LSR used with middle round keys +// .LSRM0 used with final round key +.LM0SR: +.quad 0x0a0e02060f03070b, 0x0004080c05090d01 +.LSR: +.quad 0x0504070600030201, 0x0f0e0d0c0a09080b +.LSRM0: +.quad 0x0304090e00050a0f, 0x01060b0c0207080d + +.LM0_bigendian: +.quad 0x02060a0e03070b0f, 0x0004080c0105090d +.LM0_littleendian: +.quad 0x0105090d0004080c, 0x03070b0f02060a0e + +// Used in ossl_bsaes_ctr32_encrypt_blocks, prior to dropping into +// _bsaes_encrypt8_alt, for round 0 key in place of .LM0SR +.LREVM0SR: +.quad 0x090d01050c000408, 0x03070b0f060a0e02 + +.align 6 +.size _bsaes_consts,.-_bsaes_consts + +.previous + +.type _bsaes_encrypt8,%function +.align 4 +// On entry: +// x9 -> key (previously expanded using _bsaes_key_convert) +// x10 = number of rounds +// v0-v7 input data +// On exit: +// x9-x11 corrupted +// other general-purpose registers preserved +// v0-v7 output data +// v11-v15 preserved +// other SIMD registers corrupted +_bsaes_encrypt8: + ldr q8, [x9], #16 + adrp x11, .LM0SR + add x11, x11, #:lo12:.LM0SR + ldr q9, [x11], #16 +_bsaes_encrypt8_alt: + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v8.16b + sub x10, x10, #1 + eor v2.16b, v2.16b, v8.16b + eor v4.16b, v4.16b, v8.16b + eor v3.16b, v3.16b, v8.16b + eor v5.16b, v5.16b, v8.16b + tbl v0.16b, {v0.16b}, v9.16b + tbl v1.16b, {v1.16b}, v9.16b + tbl v2.16b, {v2.16b}, v9.16b + tbl v4.16b, {v4.16b}, v9.16b + eor v6.16b, v6.16b, v8.16b + eor v7.16b, v7.16b, v8.16b + tbl v3.16b, {v3.16b}, v9.16b + tbl v5.16b, {v5.16b}, v9.16b + tbl v6.16b, {v6.16b}, v9.16b + ushr v8.2d, v0.2d, #1 + movi v10.16b, #0x55 + tbl v7.16b, {v7.16b}, v9.16b + ushr v9.2d, v4.2d, #1 + movi v16.16b, #0x33 + ushr v17.2d, v2.2d, #1 + eor v8.16b, v8.16b, v1.16b + movi v18.16b, #0x0f + ushr v19.2d, v6.2d, #1 + eor v9.16b, v9.16b, v5.16b + eor v17.16b, v17.16b, v3.16b + and v8.16b, v8.16b, v10.16b + eor v19.16b, v19.16b, v7.16b + and v9.16b, v9.16b, v10.16b + and v17.16b, v17.16b, v10.16b + eor v1.16b, v1.16b, v8.16b + shl v8.2d, v8.2d, #1 + and v10.16b, v19.16b, v10.16b + eor v5.16b, v5.16b, v9.16b + shl v9.2d, v9.2d, #1 + eor v3.16b, v3.16b, v17.16b + shl v17.2d, v17.2d, #1 + eor v0.16b, v0.16b, v8.16b + shl v8.2d, v10.2d, #1 + eor v7.16b, v7.16b, v10.16b + eor v4.16b, v4.16b, v9.16b + eor v2.16b, v2.16b, v17.16b + ushr v9.2d, v1.2d, #2 + eor v6.16b, v6.16b, v8.16b + ushr v8.2d, v0.2d, #2 + ushr v10.2d, v5.2d, #2 + ushr v17.2d, v4.2d, #2 + eor v9.16b, v9.16b, v3.16b + eor v8.16b, v8.16b, v2.16b + eor v10.16b, v10.16b, v7.16b + eor v17.16b, v17.16b, v6.16b + and v9.16b, v9.16b, v16.16b + and v8.16b, v8.16b, v16.16b + and v10.16b, v10.16b, v16.16b + and v16.16b, v17.16b, v16.16b + eor v3.16b, v3.16b, v9.16b + shl v9.2d, v9.2d, #2 + eor v2.16b, v2.16b, v8.16b + shl v8.2d, v8.2d, #2 + eor v7.16b, v7.16b, v10.16b + shl v10.2d, v10.2d, #2 + eor v6.16b, v6.16b, v16.16b + shl v16.2d, v16.2d, #2 + eor v1.16b, v1.16b, v9.16b + eor v0.16b, v0.16b, v8.16b + eor v5.16b, v5.16b, v10.16b + eor v4.16b, v4.16b, v16.16b + ushr v8.2d, v3.2d, #4 + ushr v9.2d, v2.2d, #4 + ushr v10.2d, v1.2d, #4 + ushr v16.2d, v0.2d, #4 + eor v8.16b, v8.16b, v7.16b + eor v9.16b, v9.16b, v6.16b + eor v10.16b, v10.16b, v5.16b + eor v16.16b, v16.16b, v4.16b + and v8.16b, v8.16b, v18.16b + and v9.16b, v9.16b, v18.16b + and v10.16b, v10.16b, v18.16b + and v16.16b, v16.16b, v18.16b + eor v7.16b, v7.16b, v8.16b + shl v8.2d, v8.2d, #4 + eor v6.16b, v6.16b, v9.16b + shl v9.2d, v9.2d, #4 + eor v5.16b, v5.16b, v10.16b + shl v10.2d, v10.2d, #4 + eor v4.16b, v4.16b, v16.16b + shl v16.2d, v16.2d, #4 + eor v3.16b, v3.16b, v8.16b + eor v2.16b, v2.16b, v9.16b + eor v1.16b, v1.16b, v10.16b + eor v0.16b, v0.16b, v16.16b + b .Lenc_sbox +.align 4 +.Lenc_loop: + ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [x9], #64 + ldp q8, q9, [x9], #32 + eor v0.16b, v16.16b, v0.16b + ldr q10, [x9], #16 + eor v1.16b, v17.16b, v1.16b + ldr q16, [x9], #16 + eor v2.16b, v18.16b, v2.16b + eor v3.16b, v19.16b, v3.16b + eor v4.16b, v8.16b, v4.16b + eor v5.16b, v9.16b, v5.16b + eor v6.16b, v10.16b, v6.16b + eor v7.16b, v16.16b, v7.16b + tbl v0.16b, {v0.16b}, v28.16b + tbl v1.16b, {v1.16b}, v28.16b + tbl v2.16b, {v2.16b}, v28.16b + tbl v3.16b, {v3.16b}, v28.16b + tbl v4.16b, {v4.16b}, v28.16b + tbl v5.16b, {v5.16b}, v28.16b + tbl v6.16b, {v6.16b}, v28.16b + tbl v7.16b, {v7.16b}, v28.16b +.Lenc_sbox: + eor v5.16b, v5.16b, v6.16b + eor v3.16b, v3.16b, v0.16b + subs x10, x10, #1 + eor v2.16b, v2.16b, v1.16b + eor v5.16b, v5.16b, v0.16b + eor v8.16b, v3.16b, v7.16b + eor v6.16b, v6.16b, v2.16b + eor v7.16b, v7.16b, v5.16b + eor v8.16b, v8.16b, v4.16b + eor v3.16b, v6.16b, v3.16b + eor v4.16b, v4.16b, v5.16b + eor v6.16b, v1.16b, v5.16b + eor v2.16b, v2.16b, v7.16b + eor v1.16b, v8.16b, v1.16b + eor v8.16b, v7.16b, v4.16b + eor v9.16b, v3.16b, v0.16b + eor v10.16b, v7.16b, v6.16b + eor v16.16b, v5.16b, v3.16b + eor v17.16b, v6.16b, v2.16b + eor v18.16b, v5.16b, v1.16b + eor v19.16b, v2.16b, v4.16b + eor v20.16b, v1.16b, v0.16b + orr v21.16b, v8.16b, v9.16b + orr v22.16b, v10.16b, v16.16b + eor v23.16b, v8.16b, v17.16b + eor v24.16b, v9.16b, v18.16b + and v19.16b, v19.16b, v20.16b + orr v20.16b, v17.16b, v18.16b + and v8.16b, v8.16b, v9.16b + and v9.16b, v17.16b, v18.16b + and v17.16b, v23.16b, v24.16b + and v10.16b, v10.16b, v16.16b + eor v16.16b, v21.16b, v19.16b + eor v18.16b, v20.16b, v19.16b + and v19.16b, v2.16b, v1.16b + and v20.16b, v6.16b, v5.16b + eor v21.16b, v22.16b, v17.16b + eor v9.16b, v9.16b, v10.16b + eor v10.16b, v16.16b, v17.16b + eor v16.16b, v18.16b, v8.16b + and v17.16b, v4.16b, v0.16b + orr v18.16b, v7.16b, v3.16b + eor v21.16b, v21.16b, v8.16b + eor v8.16b, v9.16b, v8.16b + eor v9.16b, v10.16b, v19.16b + eor v10.16b, v3.16b, v0.16b + eor v16.16b, v16.16b, v17.16b + eor v17.16b, v5.16b, v1.16b + eor v19.16b, v21.16b, v20.16b + eor v20.16b, v8.16b, v18.16b + eor v8.16b, v8.16b, v18.16b + eor v18.16b, v7.16b, v4.16b + eor v21.16b, v9.16b, v16.16b + eor v22.16b, v6.16b, v2.16b + and v23.16b, v9.16b, v19.16b + eor v24.16b, v10.16b, v17.16b + eor v25.16b, v0.16b, v1.16b + eor v26.16b, v7.16b, v6.16b + eor v27.16b, v18.16b, v22.16b + eor v28.16b, v3.16b, v5.16b + eor v29.16b, v16.16b, v23.16b + eor v30.16b, v20.16b, v23.16b + eor v23.16b, v20.16b, v23.16b + eor v31.16b, v4.16b, v2.16b + bsl v29.16b, v19.16b, v20.16b + bsl v30.16b, v9.16b, v16.16b + bsl v8.16b, v29.16b, v23.16b + bsl v20.16b, v23.16b, v29.16b + eor v9.16b, v30.16b, v29.16b + and v5.16b, v5.16b, v30.16b + and v8.16b, v8.16b, v30.16b + and v1.16b, v1.16b, v29.16b + eor v16.16b, v19.16b, v20.16b + and v2.16b, v2.16b, v29.16b + eor v19.16b, v9.16b, v29.16b + and v17.16b, v17.16b, v9.16b + eor v8.16b, v8.16b, v21.16b + and v20.16b, v22.16b, v9.16b + eor v21.16b, v29.16b, v16.16b + eor v22.16b, v29.16b, v16.16b + and v23.16b, v25.16b, v16.16b + and v6.16b, v6.16b, v19.16b + eor v25.16b, v8.16b, v16.16b + eor v29.16b, v30.16b, v8.16b + and v4.16b, v21.16b, v4.16b + and v8.16b, v28.16b, v8.16b + and v0.16b, v22.16b, v0.16b + eor v21.16b, v23.16b, v1.16b + eor v22.16b, v9.16b, v25.16b + eor v9.16b, v9.16b, v25.16b + eor v23.16b, v25.16b, v16.16b + and v3.16b, v29.16b, v3.16b + and v24.16b, v24.16b, v25.16b + and v25.16b, v27.16b, v25.16b + and v10.16b, v22.16b, v10.16b + and v9.16b, v9.16b, v18.16b + eor v18.16b, v19.16b, v23.16b + and v19.16b, v26.16b, v23.16b + eor v3.16b, v5.16b, v3.16b + eor v17.16b, v17.16b, v24.16b + eor v10.16b, v24.16b, v10.16b + and v16.16b, v31.16b, v16.16b + eor v20.16b, v20.16b, v25.16b + eor v9.16b, v25.16b, v9.16b + eor v4.16b, v2.16b, v4.16b + and v7.16b, v18.16b, v7.16b + eor v18.16b, v19.16b, v6.16b + eor v5.16b, v8.16b, v5.16b + eor v0.16b, v1.16b, v0.16b + eor v1.16b, v21.16b, v10.16b + eor v8.16b, v3.16b, v17.16b + eor v2.16b, v16.16b, v2.16b + eor v3.16b, v6.16b, v7.16b + eor v6.16b, v18.16b, v9.16b + eor v4.16b, v4.16b, v20.16b + eor v10.16b, v5.16b, v10.16b + eor v0.16b, v0.16b, v17.16b + eor v9.16b, v2.16b, v9.16b + eor v3.16b, v3.16b, v20.16b + eor v7.16b, v6.16b, v1.16b + eor v5.16b, v8.16b, v4.16b + eor v6.16b, v10.16b, v1.16b + eor v2.16b, v4.16b, v0.16b + eor v4.16b, v3.16b, v10.16b + eor v9.16b, v9.16b, v7.16b + eor v3.16b, v0.16b, v5.16b + eor v0.16b, v1.16b, v4.16b + eor v1.16b, v4.16b, v8.16b + eor v4.16b, v9.16b, v5.16b + eor v6.16b, v6.16b, v3.16b + bcc .Lenc_done + ext v8.16b, v0.16b, v0.16b, #12 + ext v9.16b, v4.16b, v4.16b, #12 + ldr q28, [x11] + ext v10.16b, v6.16b, v6.16b, #12 + ext v16.16b, v1.16b, v1.16b, #12 + ext v17.16b, v3.16b, v3.16b, #12 + ext v18.16b, v7.16b, v7.16b, #12 + eor v0.16b, v0.16b, v8.16b + eor v4.16b, v4.16b, v9.16b + eor v6.16b, v6.16b, v10.16b + ext v19.16b, v2.16b, v2.16b, #12 + ext v20.16b, v5.16b, v5.16b, #12 + eor v1.16b, v1.16b, v16.16b + eor v3.16b, v3.16b, v17.16b + eor v7.16b, v7.16b, v18.16b + eor v2.16b, v2.16b, v19.16b + eor v16.16b, v16.16b, v0.16b + eor v5.16b, v5.16b, v20.16b + eor v17.16b, v17.16b, v6.16b + eor v10.16b, v10.16b, v4.16b + ext v0.16b, v0.16b, v0.16b, #8 + eor v9.16b, v9.16b, v1.16b + ext v1.16b, v1.16b, v1.16b, #8 + eor v8.16b, v8.16b, v5.16b + eor v16.16b, v16.16b, v5.16b + eor v18.16b, v18.16b, v3.16b + eor v19.16b, v19.16b, v7.16b + ext v3.16b, v3.16b, v3.16b, #8 + ext v7.16b, v7.16b, v7.16b, #8 + eor v20.16b, v20.16b, v2.16b + ext v6.16b, v6.16b, v6.16b, #8 + ext v21.16b, v5.16b, v5.16b, #8 + eor v17.16b, v17.16b, v5.16b + ext v2.16b, v2.16b, v2.16b, #8 + eor v10.16b, v10.16b, v5.16b + ext v22.16b, v4.16b, v4.16b, #8 + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v16.16b + eor v5.16b, v7.16b, v18.16b + eor v4.16b, v3.16b, v17.16b + eor v3.16b, v6.16b, v10.16b + eor v7.16b, v21.16b, v20.16b + eor v6.16b, v2.16b, v19.16b + eor v2.16b, v22.16b, v9.16b + bne .Lenc_loop + ldr q28, [x11, #16]! // load from .LSRM0 on last round (x10 == 0) + b .Lenc_loop +.align 4 +.Lenc_done: + ushr v8.2d, v0.2d, #1 + movi v9.16b, #0x55 + ldr q10, [x9] + ushr v16.2d, v3.2d, #1 + movi v17.16b, #0x33 + ushr v18.2d, v4.2d, #1 + movi v19.16b, #0x0f + eor v8.16b, v8.16b, v1.16b + ushr v20.2d, v2.2d, #1 + eor v16.16b, v16.16b, v7.16b + eor v18.16b, v18.16b, v6.16b + and v8.16b, v8.16b, v9.16b + eor v20.16b, v20.16b, v5.16b + and v16.16b, v16.16b, v9.16b + and v18.16b, v18.16b, v9.16b + shl v21.2d, v8.2d, #1 + eor v1.16b, v1.16b, v8.16b + and v8.16b, v20.16b, v9.16b + eor v7.16b, v7.16b, v16.16b + shl v9.2d, v16.2d, #1 + eor v6.16b, v6.16b, v18.16b + shl v16.2d, v18.2d, #1 + eor v0.16b, v0.16b, v21.16b + shl v18.2d, v8.2d, #1 + eor v5.16b, v5.16b, v8.16b + eor v3.16b, v3.16b, v9.16b + eor v4.16b, v4.16b, v16.16b + ushr v8.2d, v1.2d, #2 + eor v2.16b, v2.16b, v18.16b + ushr v9.2d, v0.2d, #2 + ushr v16.2d, v7.2d, #2 + ushr v18.2d, v3.2d, #2 + eor v8.16b, v8.16b, v6.16b + eor v9.16b, v9.16b, v4.16b + eor v16.16b, v16.16b, v5.16b + eor v18.16b, v18.16b, v2.16b + and v8.16b, v8.16b, v17.16b + and v9.16b, v9.16b, v17.16b + and v16.16b, v16.16b, v17.16b + and v17.16b, v18.16b, v17.16b + eor v6.16b, v6.16b, v8.16b + shl v8.2d, v8.2d, #2 + eor v4.16b, v4.16b, v9.16b + shl v9.2d, v9.2d, #2 + eor v5.16b, v5.16b, v16.16b + shl v16.2d, v16.2d, #2 + eor v2.16b, v2.16b, v17.16b + shl v17.2d, v17.2d, #2 + eor v1.16b, v1.16b, v8.16b + eor v0.16b, v0.16b, v9.16b + eor v7.16b, v7.16b, v16.16b + eor v3.16b, v3.16b, v17.16b + ushr v8.2d, v6.2d, #4 + ushr v9.2d, v4.2d, #4 + ushr v16.2d, v1.2d, #4 + ushr v17.2d, v0.2d, #4 + eor v8.16b, v8.16b, v5.16b + eor v9.16b, v9.16b, v2.16b + eor v16.16b, v16.16b, v7.16b + eor v17.16b, v17.16b, v3.16b + and v8.16b, v8.16b, v19.16b + and v9.16b, v9.16b, v19.16b + and v16.16b, v16.16b, v19.16b + and v17.16b, v17.16b, v19.16b + eor v5.16b, v5.16b, v8.16b + shl v8.2d, v8.2d, #4 + eor v2.16b, v2.16b, v9.16b + shl v9.2d, v9.2d, #4 + eor v7.16b, v7.16b, v16.16b + shl v16.2d, v16.2d, #4 + eor v3.16b, v3.16b, v17.16b + shl v17.2d, v17.2d, #4 + eor v6.16b, v6.16b, v8.16b + eor v4.16b, v4.16b, v9.16b + eor v7.16b, v7.16b, v10.16b + eor v1.16b, v1.16b, v16.16b + eor v3.16b, v3.16b, v10.16b + eor v0.16b, v0.16b, v17.16b + eor v6.16b, v6.16b, v10.16b + eor v4.16b, v4.16b, v10.16b + eor v2.16b, v2.16b, v10.16b + eor v5.16b, v5.16b, v10.16b + eor v1.16b, v1.16b, v10.16b + eor v0.16b, v0.16b, v10.16b + ret +.size _bsaes_encrypt8,.-_bsaes_encrypt8 + +.type _bsaes_key_convert,%function +.align 4 +// On entry: +// x9 -> input key (big-endian) +// x10 = number of rounds +// x17 -> output key (native endianness) +// On exit: +// x9, x10 corrupted +// x11 -> .LM0_bigendian +// x17 -> last quadword of output key +// other general-purpose registers preserved +// v2-v6 preserved +// v7.16b[] = 0x63 +// v8-v14 preserved +// v15 = last round key (converted to native endianness) +// other SIMD registers corrupted +_bsaes_key_convert: +#ifdef __AARCH64EL__ + adrp x11, .LM0_littleendian + add x11, x11, #:lo12:.LM0_littleendian +#else + adrp x11, .LM0_bigendian + add x11, x11, #:lo12:.LM0_bigendian +#endif + ldr q0, [x9], #16 // load round 0 key + ldr q1, [x11] // .LM0 + ldr q15, [x9], #16 // load round 1 key + + movi v7.16b, #0x63 // compose .L63 + movi v16.16b, #0x01 // bit masks + movi v17.16b, #0x02 + movi v18.16b, #0x04 + movi v19.16b, #0x08 + movi v20.16b, #0x10 + movi v21.16b, #0x20 + movi v22.16b, #0x40 + movi v23.16b, #0x80 + +#ifdef __AARCH64EL__ + rev32 v0.16b, v0.16b +#endif + sub x10, x10, #1 + str q0, [x17], #16 // save round 0 key + +.align 4 +.Lkey_loop: + tbl v0.16b, {v15.16b}, v1.16b + ldr q15, [x9], #16 // load next round key + + eor v0.16b, v0.16b, v7.16b + cmtst v24.16b, v0.16b, v16.16b + cmtst v25.16b, v0.16b, v17.16b + cmtst v26.16b, v0.16b, v18.16b + cmtst v27.16b, v0.16b, v19.16b + cmtst v28.16b, v0.16b, v20.16b + cmtst v29.16b, v0.16b, v21.16b + cmtst v30.16b, v0.16b, v22.16b + cmtst v31.16b, v0.16b, v23.16b + sub x10, x10, #1 + st1 {v24.16b,v25.16b,v26.16b,v27.16b}, [x17], #64 // write bit-sliced round key + st1 {v28.16b,v29.16b,v30.16b,v31.16b}, [x17], #64 + cbnz x10, .Lkey_loop + + // don't save last round key +#ifdef __AARCH64EL__ + rev32 v15.16b, v15.16b + adrp x11, .LM0_bigendian + add x11, x11, #:lo12:.LM0_bigendian +#endif + ret +.size _bsaes_key_convert,.-_bsaes_key_convert + +.globl ossl_bsaes_cbc_encrypt +.type ossl_bsaes_cbc_encrypt,%function +.align 4 +// On entry: +// x0 -> input ciphertext +// x1 -> output plaintext +// x2 = size of ciphertext and plaintext in bytes (assumed a multiple of 16) +// x3 -> key +// x4 -> 128-bit initialisation vector (or preceding 128-bit block of ciphertext if continuing after an earlier call) +// w5 must be == 0 +// On exit: +// Output plaintext filled in +// Initialisation vector overwritten with last quadword of ciphertext +// No output registers, usual AAPCS64 register preservation +ossl_bsaes_cbc_encrypt: + AARCH64_VALID_CALL_TARGET + cmp x2, #128 + bhs .Lcbc_do_bsaes + b AES_cbc_encrypt +.Lcbc_do_bsaes: + + // it is up to the caller to make sure we are called with enc == 0 + + stp x29, x30, [sp, #-48]! + stp d8, d9, [sp, #16] + stp d10, d15, [sp, #32] + lsr x2, x2, #4 // len in 16 byte blocks + + ldr w15, [x3, #240] // get # of rounds + mov x14, sp + + // allocate the key schedule on the stack + add x17, sp, #96 + sub x17, x17, x15, lsl #7 // 128 bytes per inner round key, less 96 bytes + + // populate the key schedule + mov x9, x3 // pass key + mov x10, x15 // pass # of rounds + mov sp, x17 // sp is sp + bl _bsaes_key_convert + ldr q6, [sp] + str q15, [x17] // save last round key + eor v6.16b, v6.16b, v7.16b // fix up round 0 key (by XORing with 0x63) + str q6, [sp] + + ldr q15, [x4] // load IV + b .Lcbc_dec_loop + +.align 4 +.Lcbc_dec_loop: + subs x2, x2, #0x8 + bmi .Lcbc_dec_loop_finish + + ldr q0, [x0], #16 // load input + mov x9, sp // pass the key + ldr q1, [x0], #16 + mov x10, x15 + ldr q2, [x0], #16 + ldr q3, [x0], #16 + ldr q4, [x0], #16 + ldr q5, [x0], #16 + ldr q6, [x0], #16 + ldr q7, [x0], #-7*16 + + bl _bsaes_decrypt8 + + ldr q16, [x0], #16 // reload input + eor v0.16b, v0.16b, v15.16b // ^= IV + eor v1.16b, v1.16b, v16.16b + str q0, [x1], #16 // write output + ldr q0, [x0], #16 + str q1, [x1], #16 + ldr q1, [x0], #16 + eor v1.16b, v4.16b, v1.16b + ldr q4, [x0], #16 + eor v2.16b, v2.16b, v4.16b + eor v0.16b, v6.16b, v0.16b + ldr q4, [x0], #16 + str q0, [x1], #16 + str q1, [x1], #16 + eor v0.16b, v7.16b, v4.16b + ldr q1, [x0], #16 + str q2, [x1], #16 + ldr q2, [x0], #16 + ldr q15, [x0], #16 + str q0, [x1], #16 + eor v0.16b, v5.16b, v2.16b + eor v1.16b, v3.16b, v1.16b + str q1, [x1], #16 + str q0, [x1], #16 + + b .Lcbc_dec_loop + +.Lcbc_dec_loop_finish: + adds x2, x2, #8 + beq .Lcbc_dec_done + + ldr q0, [x0], #16 // load input + cmp x2, #2 + blo .Lcbc_dec_one + ldr q1, [x0], #16 + mov x9, sp // pass the key + mov x10, x15 + beq .Lcbc_dec_two + ldr q2, [x0], #16 + cmp x2, #4 + blo .Lcbc_dec_three + ldr q3, [x0], #16 + beq .Lcbc_dec_four + ldr q4, [x0], #16 + cmp x2, #6 + blo .Lcbc_dec_five + ldr q5, [x0], #16 + beq .Lcbc_dec_six + ldr q6, [x0], #-6*16 + + bl _bsaes_decrypt8 + + ldr q5, [x0], #16 // reload input + eor v0.16b, v0.16b, v15.16b // ^= IV + ldr q8, [x0], #16 + ldr q9, [x0], #16 + ldr q10, [x0], #16 + str q0, [x1], #16 // write output + ldr q0, [x0], #16 + eor v1.16b, v1.16b, v5.16b + ldr q5, [x0], #16 + eor v6.16b, v6.16b, v8.16b + ldr q15, [x0] + eor v4.16b, v4.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + str q1, [x1], #16 + eor v0.16b, v7.16b, v0.16b + str q6, [x1], #16 + eor v1.16b, v3.16b, v5.16b + str q4, [x1], #16 + str q2, [x1], #16 + str q0, [x1], #16 + str q1, [x1] + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_six: + sub x0, x0, #0x60 + bl _bsaes_decrypt8 + ldr q3, [x0], #16 // reload input + eor v0.16b, v0.16b, v15.16b // ^= IV + ldr q5, [x0], #16 + ldr q8, [x0], #16 + ldr q9, [x0], #16 + str q0, [x1], #16 // write output + ldr q0, [x0], #16 + eor v1.16b, v1.16b, v3.16b + ldr q15, [x0] + eor v3.16b, v6.16b, v5.16b + eor v4.16b, v4.16b, v8.16b + eor v2.16b, v2.16b, v9.16b + str q1, [x1], #16 + eor v0.16b, v7.16b, v0.16b + str q3, [x1], #16 + str q4, [x1], #16 + str q2, [x1], #16 + str q0, [x1] + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_five: + sub x0, x0, #0x50 + bl _bsaes_decrypt8 + ldr q3, [x0], #16 // reload input + eor v0.16b, v0.16b, v15.16b // ^= IV + ldr q5, [x0], #16 + ldr q7, [x0], #16 + ldr q8, [x0], #16 + str q0, [x1], #16 // write output + ldr q15, [x0] + eor v0.16b, v1.16b, v3.16b + eor v1.16b, v6.16b, v5.16b + eor v3.16b, v4.16b, v7.16b + str q0, [x1], #16 + eor v0.16b, v2.16b, v8.16b + str q1, [x1], #16 + str q3, [x1], #16 + str q0, [x1] + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_four: + sub x0, x0, #0x40 + bl _bsaes_decrypt8 + ldr q2, [x0], #16 // reload input + eor v0.16b, v0.16b, v15.16b // ^= IV + ldr q3, [x0], #16 + ldr q5, [x0], #16 + str q0, [x1], #16 // write output + ldr q15, [x0] + eor v0.16b, v1.16b, v2.16b + eor v1.16b, v6.16b, v3.16b + eor v2.16b, v4.16b, v5.16b + str q0, [x1], #16 + str q1, [x1], #16 + str q2, [x1] + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_three: + sub x0, x0, #0x30 + bl _bsaes_decrypt8 + ldr q2, [x0], #16 // reload input + eor v0.16b, v0.16b, v15.16b // ^= IV + ldr q3, [x0], #16 + ldr q15, [x0] + str q0, [x1], #16 // write output + eor v0.16b, v1.16b, v2.16b + eor v1.16b, v6.16b, v3.16b + str q0, [x1], #16 + str q1, [x1] + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_two: + sub x0, x0, #0x20 + bl _bsaes_decrypt8 + ldr q2, [x0], #16 // reload input + eor v0.16b, v0.16b, v15.16b // ^= IV + ldr q15, [x0] + str q0, [x1], #16 // write output + eor v0.16b, v1.16b, v2.16b + str q0, [x1] + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_one: + sub x0, x0, #0x10 + stp x1, x4, [sp, #-32]! + str x14, [sp, #16] + mov v8.16b, v15.16b + mov v15.16b, v0.16b + mov x2, x3 + bl AES_decrypt + ldr x14, [sp, #16] + ldp x1, x4, [sp], #32 + ldr q0, [x1] // load result + eor v0.16b, v0.16b, v8.16b // ^= IV + str q0, [x1] // write output + +.align 4 +.Lcbc_dec_done: + movi v0.16b, #0 + movi v1.16b, #0 +.Lcbc_dec_bzero: // wipe key schedule [if any] + stp q0, q1, [sp], #32 + cmp sp, x14 + bne .Lcbc_dec_bzero + str q15, [x4] // return IV + ldp d8, d9, [sp, #16] + ldp d10, d15, [sp, #32] + ldp x29, x30, [sp], #48 + ret +.size ossl_bsaes_cbc_encrypt,.-ossl_bsaes_cbc_encrypt + +.globl ossl_bsaes_ctr32_encrypt_blocks +.type ossl_bsaes_ctr32_encrypt_blocks,%function +.align 4 +// On entry: +// x0 -> input text (whole 16-byte blocks) +// x1 -> output text (whole 16-byte blocks) +// x2 = number of 16-byte blocks to encrypt/decrypt (> 0) +// x3 -> key +// x4 -> initial value of 128-bit counter (stored big-endian) which increments, modulo 2^32, for each block +// On exit: +// Output text filled in +// No output registers, usual AAPCS64 register preservation +ossl_bsaes_ctr32_encrypt_blocks: + AARCH64_VALID_CALL_TARGET + cmp x2, #8 // use plain AES for + blo .Lctr_enc_short // small sizes + + stp x29, x30, [sp, #-80]! + stp d8, d9, [sp, #16] + stp d10, d11, [sp, #32] + stp d12, d13, [sp, #48] + stp d14, d15, [sp, #64] + + ldr w15, [x3, #240] // get # of rounds + mov x14, sp + + // allocate the key schedule on the stack + add x17, sp, #96 + sub x17, x17, x15, lsl #7 // 128 bytes per inner round key, less 96 bytes + + // populate the key schedule + mov x9, x3 // pass key + mov x10, x15 // pass # of rounds + mov sp, x17 // sp is sp + bl _bsaes_key_convert + eor v7.16b, v7.16b, v15.16b // fix up last round key + str q7, [x17] // save last round key + + ldr q0, [x4] // load counter + add x13, x11, #.LREVM0SR-.LM0_bigendian + ldr q4, [sp] // load round0 key + + movi v8.4s, #1 // compose 1<<96 + movi v9.16b, #0 + rev32 v15.16b, v0.16b + rev32 v0.16b, v0.16b + ext v11.16b, v9.16b, v8.16b, #4 + rev32 v4.16b, v4.16b + add v12.4s, v11.4s, v11.4s // compose 2<<96 + str q4, [sp] // save adjusted round0 key + add v13.4s, v11.4s, v12.4s // compose 3<<96 + add v14.4s, v12.4s, v12.4s // compose 4<<96 + b .Lctr_enc_loop + +.align 4 +.Lctr_enc_loop: + // Intermix prologue from _bsaes_encrypt8 to use the opportunity + // to flip byte order in 32-bit counter + + add v1.4s, v15.4s, v11.4s // +1 + add x9, sp, #0x10 // pass next round key + add v2.4s, v15.4s, v12.4s // +2 + ldr q9, [x13] // .LREVM0SR + ldr q8, [sp] // load round0 key + add v3.4s, v15.4s, v13.4s // +3 + mov x10, x15 // pass rounds + sub x11, x13, #.LREVM0SR-.LSR // pass constants + add v6.4s, v2.4s, v14.4s + add v4.4s, v15.4s, v14.4s // +4 + add v7.4s, v3.4s, v14.4s + add v15.4s, v4.4s, v14.4s // next counter + add v5.4s, v1.4s, v14.4s + + bl _bsaes_encrypt8_alt + + subs x2, x2, #8 + blo .Lctr_enc_loop_done + + ldr q16, [x0], #16 + ldr q17, [x0], #16 + eor v1.16b, v1.16b, v17.16b + ldr q17, [x0], #16 + eor v0.16b, v0.16b, v16.16b + eor v4.16b, v4.16b, v17.16b + str q0, [x1], #16 + ldr q16, [x0], #16 + str q1, [x1], #16 + mov v0.16b, v15.16b + str q4, [x1], #16 + ldr q1, [x0], #16 + eor v4.16b, v6.16b, v16.16b + eor v1.16b, v3.16b, v1.16b + ldr q3, [x0], #16 + eor v3.16b, v7.16b, v3.16b + ldr q6, [x0], #16 + eor v2.16b, v2.16b, v6.16b + ldr q6, [x0], #16 + eor v5.16b, v5.16b, v6.16b + str q4, [x1], #16 + str q1, [x1], #16 + str q3, [x1], #16 + str q2, [x1], #16 + str q5, [x1], #16 + + bne .Lctr_enc_loop + b .Lctr_enc_done + +.align 4 +.Lctr_enc_loop_done: + add x2, x2, #8 + ldr q16, [x0], #16 // load input + eor v0.16b, v0.16b, v16.16b + str q0, [x1], #16 // write output + cmp x2, #2 + blo .Lctr_enc_done + ldr q17, [x0], #16 + eor v1.16b, v1.16b, v17.16b + str q1, [x1], #16 + beq .Lctr_enc_done + ldr q18, [x0], #16 + eor v4.16b, v4.16b, v18.16b + str q4, [x1], #16 + cmp x2, #4 + blo .Lctr_enc_done + ldr q19, [x0], #16 + eor v6.16b, v6.16b, v19.16b + str q6, [x1], #16 + beq .Lctr_enc_done + ldr q20, [x0], #16 + eor v3.16b, v3.16b, v20.16b + str q3, [x1], #16 + cmp x2, #6 + blo .Lctr_enc_done + ldr q21, [x0], #16 + eor v7.16b, v7.16b, v21.16b + str q7, [x1], #16 + beq .Lctr_enc_done + ldr q22, [x0] + eor v2.16b, v2.16b, v22.16b + str q2, [x1], #16 + +.Lctr_enc_done: + movi v0.16b, #0 + movi v1.16b, #0 +.Lctr_enc_bzero: // wipe key schedule [if any] + stp q0, q1, [sp], #32 + cmp sp, x14 + bne .Lctr_enc_bzero + + ldp d8, d9, [sp, #16] + ldp d10, d11, [sp, #32] + ldp d12, d13, [sp, #48] + ldp d14, d15, [sp, #64] + ldp x29, x30, [sp], #80 + ret + +.Lctr_enc_short: + stp x29, x30, [sp, #-96]! + stp x19, x20, [sp, #16] + stp x21, x22, [sp, #32] + str x23, [sp, #48] + + mov x19, x0 // copy arguments + mov x20, x1 + mov x21, x2 + mov x22, x3 + ldr w23, [x4, #12] // load counter .LSW + ldr q1, [x4] // load whole counter value +#ifdef __AARCH64EL__ + rev w23, w23 +#endif + str q1, [sp, #80] // copy counter value + +.Lctr_enc_short_loop: + add x0, sp, #80 // input counter value + add x1, sp, #64 // output on the stack + mov x2, x22 // key + + bl AES_encrypt + + ldr q0, [x19], #16 // load input + ldr q1, [sp, #64] // load encrypted counter + add x23, x23, #1 +#ifdef __AARCH64EL__ + rev w0, w23 + str w0, [sp, #80+12] // next counter value +#else + str w23, [sp, #80+12] // next counter value +#endif + eor v0.16b, v0.16b, v1.16b + str q0, [x20], #16 // store output + subs x21, x21, #1 + bne .Lctr_enc_short_loop + + movi v0.16b, #0 + movi v1.16b, #0 + stp q0, q1, [sp, #64] + + ldr x23, [sp, #48] + ldp x21, x22, [sp, #32] + ldp x19, x20, [sp, #16] + ldp x29, x30, [sp], #96 + ret +.size ossl_bsaes_ctr32_encrypt_blocks,.-ossl_bsaes_ctr32_encrypt_blocks + +.globl ossl_bsaes_xts_encrypt +.type ossl_bsaes_xts_encrypt,%function +.align 4 +// On entry: +// x0 -> input plaintext +// x1 -> output ciphertext +// x2 -> length of text in bytes (must be at least 16) +// x3 -> key1 (used to encrypt the XORed plaintext blocks) +// x4 -> key2 (used to encrypt the initial vector to yield the initial tweak) +// x5 -> 16-byte initial vector (typically, sector number) +// On exit: +// Output ciphertext filled in +// No output registers, usual AAPCS64 register preservation +ossl_bsaes_xts_encrypt: + AARCH64_VALID_CALL_TARGET + // Stack layout: + // sp -> + // nrounds*128-96 bytes: key schedule + // x19 -> + // 16 bytes: frame record + // 4*16 bytes: tweak storage across _bsaes_encrypt8 + // 6*8 bytes: storage for 5 callee-saved general-purpose registers + // 8*8 bytes: storage for 8 callee-saved SIMD registers + stp x29, x30, [sp, #-192]! + stp x19, x20, [sp, #80] + stp x21, x22, [sp, #96] + str x23, [sp, #112] + stp d8, d9, [sp, #128] + stp d10, d11, [sp, #144] + stp d12, d13, [sp, #160] + stp d14, d15, [sp, #176] + + mov x19, sp + mov x20, x0 + mov x21, x1 + mov x22, x2 + mov x23, x3 + + // generate initial tweak + sub sp, sp, #16 + mov x0, x5 // iv[] + mov x1, sp + mov x2, x4 // key2 + bl AES_encrypt + ldr q11, [sp], #16 + + ldr w1, [x23, #240] // get # of rounds + // allocate the key schedule on the stack + add x17, sp, #96 + sub x17, x17, x1, lsl #7 // 128 bytes per inner round key, less 96 bytes + + // populate the key schedule + mov x9, x23 // pass key + mov x10, x1 // pass # of rounds + mov sp, x17 + bl _bsaes_key_convert + eor v15.16b, v15.16b, v7.16b // fix up last round key + str q15, [x17] // save last round key + + subs x22, x22, #0x80 + blo .Lxts_enc_short + b .Lxts_enc_loop + +.align 4 +.Lxts_enc_loop: + ldr q8, .Lxts_magic + mov x10, x1 // pass rounds + add x2, x19, #16 + ldr q0, [x20], #16 + sshr v1.2d, v11.2d, #63 + mov x9, sp // pass key schedule + ldr q6, .Lxts_magic+16 + add v2.2d, v11.2d, v11.2d + cmtst v3.2d, v11.2d, v6.2d + and v1.16b, v1.16b, v8.16b + ext v1.16b, v1.16b, v1.16b, #8 + and v3.16b, v3.16b, v8.16b + ldr q4, [x20], #16 + eor v12.16b, v2.16b, v1.16b + eor v1.16b, v4.16b, v12.16b + eor v0.16b, v0.16b, v11.16b + cmtst v2.2d, v12.2d, v6.2d + add v4.2d, v12.2d, v12.2d + add x0, x19, #16 + ext v3.16b, v3.16b, v3.16b, #8 + and v2.16b, v2.16b, v8.16b + eor v13.16b, v4.16b, v3.16b + ldr q3, [x20], #16 + ext v4.16b, v2.16b, v2.16b, #8 + eor v2.16b, v3.16b, v13.16b + ldr q3, [x20], #16 + add v5.2d, v13.2d, v13.2d + cmtst v7.2d, v13.2d, v6.2d + and v7.16b, v7.16b, v8.16b + ldr q9, [x20], #16 + ext v7.16b, v7.16b, v7.16b, #8 + ldr q10, [x20], #16 + eor v14.16b, v5.16b, v4.16b + ldr q16, [x20], #16 + add v4.2d, v14.2d, v14.2d + eor v3.16b, v3.16b, v14.16b + eor v15.16b, v4.16b, v7.16b + add v5.2d, v15.2d, v15.2d + ldr q7, [x20], #16 + cmtst v4.2d, v14.2d, v6.2d + and v17.16b, v4.16b, v8.16b + cmtst v18.2d, v15.2d, v6.2d + eor v4.16b, v9.16b, v15.16b + ext v9.16b, v17.16b, v17.16b, #8 + eor v9.16b, v5.16b, v9.16b + add v17.2d, v9.2d, v9.2d + and v18.16b, v18.16b, v8.16b + eor v5.16b, v10.16b, v9.16b + str q9, [x2], #16 + ext v10.16b, v18.16b, v18.16b, #8 + cmtst v9.2d, v9.2d, v6.2d + and v9.16b, v9.16b, v8.16b + eor v10.16b, v17.16b, v10.16b + cmtst v17.2d, v10.2d, v6.2d + eor v6.16b, v16.16b, v10.16b + str q10, [x2], #16 + ext v9.16b, v9.16b, v9.16b, #8 + add v10.2d, v10.2d, v10.2d + eor v9.16b, v10.16b, v9.16b + str q9, [x2], #16 + eor v7.16b, v7.16b, v9.16b + add v9.2d, v9.2d, v9.2d + and v8.16b, v17.16b, v8.16b + ext v8.16b, v8.16b, v8.16b, #8 + eor v8.16b, v9.16b, v8.16b + str q8, [x2] // next round tweak + + bl _bsaes_encrypt8 + + ldr q8, [x0], #16 + eor v0.16b, v0.16b, v11.16b + eor v1.16b, v1.16b, v12.16b + ldr q9, [x0], #16 + eor v4.16b, v4.16b, v13.16b + eor v6.16b, v6.16b, v14.16b + ldr q10, [x0], #16 + eor v3.16b, v3.16b, v15.16b + subs x22, x22, #0x80 + str q0, [x21], #16 + ldr q11, [x0] // next round tweak + str q1, [x21], #16 + eor v0.16b, v7.16b, v8.16b + eor v1.16b, v2.16b, v9.16b + str q4, [x21], #16 + eor v2.16b, v5.16b, v10.16b + str q6, [x21], #16 + str q3, [x21], #16 + str q0, [x21], #16 + str q1, [x21], #16 + str q2, [x21], #16 + bpl .Lxts_enc_loop + +.Lxts_enc_short: + adds x22, x22, #0x70 + bmi .Lxts_enc_done + + ldr q8, .Lxts_magic + sshr v1.2d, v11.2d, #63 + add v2.2d, v11.2d, v11.2d + ldr q9, .Lxts_magic+16 + subs x22, x22, #0x10 + ldr q0, [x20], #16 + and v1.16b, v1.16b, v8.16b + cmtst v3.2d, v11.2d, v9.2d + ext v1.16b, v1.16b, v1.16b, #8 + and v3.16b, v3.16b, v8.16b + eor v12.16b, v2.16b, v1.16b + ext v1.16b, v3.16b, v3.16b, #8 + add v2.2d, v12.2d, v12.2d + cmtst v3.2d, v12.2d, v9.2d + eor v13.16b, v2.16b, v1.16b + and v22.16b, v3.16b, v8.16b + bmi .Lxts_enc_1 + + ext v2.16b, v22.16b, v22.16b, #8 + add v3.2d, v13.2d, v13.2d + ldr q1, [x20], #16 + cmtst v4.2d, v13.2d, v9.2d + subs x22, x22, #0x10 + eor v14.16b, v3.16b, v2.16b + and v23.16b, v4.16b, v8.16b + bmi .Lxts_enc_2 + + ext v3.16b, v23.16b, v23.16b, #8 + add v4.2d, v14.2d, v14.2d + ldr q2, [x20], #16 + cmtst v5.2d, v14.2d, v9.2d + eor v0.16b, v0.16b, v11.16b + subs x22, x22, #0x10 + eor v15.16b, v4.16b, v3.16b + and v24.16b, v5.16b, v8.16b + bmi .Lxts_enc_3 + + ext v4.16b, v24.16b, v24.16b, #8 + add v5.2d, v15.2d, v15.2d + ldr q3, [x20], #16 + cmtst v6.2d, v15.2d, v9.2d + eor v1.16b, v1.16b, v12.16b + subs x22, x22, #0x10 + eor v16.16b, v5.16b, v4.16b + and v25.16b, v6.16b, v8.16b + bmi .Lxts_enc_4 + + ext v5.16b, v25.16b, v25.16b, #8 + add v6.2d, v16.2d, v16.2d + add x0, x19, #16 + cmtst v7.2d, v16.2d, v9.2d + ldr q4, [x20], #16 + eor v2.16b, v2.16b, v13.16b + str q16, [x0], #16 + subs x22, x22, #0x10 + eor v17.16b, v6.16b, v5.16b + and v26.16b, v7.16b, v8.16b + bmi .Lxts_enc_5 + + ext v7.16b, v26.16b, v26.16b, #8 + add v18.2d, v17.2d, v17.2d + ldr q5, [x20], #16 + eor v3.16b, v3.16b, v14.16b + str q17, [x0], #16 + subs x22, x22, #0x10 + eor v18.16b, v18.16b, v7.16b + bmi .Lxts_enc_6 + + ldr q6, [x20], #16 + eor v4.16b, v4.16b, v15.16b + eor v5.16b, v5.16b, v16.16b + str q18, [x0] // next round tweak + mov x9, sp // pass key schedule + mov x10, x1 + add x0, x19, #16 + sub x22, x22, #0x10 + eor v6.16b, v6.16b, v17.16b + + bl _bsaes_encrypt8 + + ldr q16, [x0], #16 + eor v0.16b, v0.16b, v11.16b + eor v1.16b, v1.16b, v12.16b + ldr q17, [x0], #16 + eor v4.16b, v4.16b, v13.16b + eor v6.16b, v6.16b, v14.16b + eor v3.16b, v3.16b, v15.16b + ldr q11, [x0] // next round tweak + str q0, [x21], #16 + str q1, [x21], #16 + eor v0.16b, v7.16b, v16.16b + eor v1.16b, v2.16b, v17.16b + str q4, [x21], #16 + str q6, [x21], #16 + str q3, [x21], #16 + str q0, [x21], #16 + str q1, [x21], #16 + b .Lxts_enc_done + +.align 4 +.Lxts_enc_6: + eor v4.16b, v4.16b, v15.16b + eor v5.16b, v5.16b, v16.16b + mov x9, sp // pass key schedule + mov x10, x1 // pass rounds + add x0, x19, #16 + + bl _bsaes_encrypt8 + + ldr q16, [x0], #16 + eor v0.16b, v0.16b, v11.16b + eor v1.16b, v1.16b, v12.16b + eor v4.16b, v4.16b, v13.16b + eor v6.16b, v6.16b, v14.16b + ldr q11, [x0] // next round tweak + eor v3.16b, v3.16b, v15.16b + str q0, [x21], #16 + str q1, [x21], #16 + eor v0.16b, v7.16b, v16.16b + str q4, [x21], #16 + str q6, [x21], #16 + str q3, [x21], #16 + str q0, [x21], #16 + b .Lxts_enc_done + +.align 4 +.Lxts_enc_5: + eor v3.16b, v3.16b, v14.16b + eor v4.16b, v4.16b, v15.16b + mov x9, sp // pass key schedule + mov x10, x1 // pass rounds + add x0, x19, #16 + + bl _bsaes_encrypt8 + + eor v0.16b, v0.16b, v11.16b + eor v1.16b, v1.16b, v12.16b + ldr q11, [x0] // next round tweak + eor v4.16b, v4.16b, v13.16b + eor v6.16b, v6.16b, v14.16b + eor v3.16b, v3.16b, v15.16b + str q0, [x21], #16 + str q1, [x21], #16 + str q4, [x21], #16 + str q6, [x21], #16 + str q3, [x21], #16 + b .Lxts_enc_done + +.align 4 +.Lxts_enc_4: + eor v2.16b, v2.16b, v13.16b + eor v3.16b, v3.16b, v14.16b + mov x9, sp // pass key schedule + mov x10, x1 // pass rounds + add x0, x19, #16 + + bl _bsaes_encrypt8 + + eor v0.16b, v0.16b, v11.16b + eor v1.16b, v1.16b, v12.16b + eor v4.16b, v4.16b, v13.16b + eor v6.16b, v6.16b, v14.16b + mov v11.16b, v15.16b // next round tweak + str q0, [x21], #16 + str q1, [x21], #16 + str q4, [x21], #16 + str q6, [x21], #16 + b .Lxts_enc_done + +.align 4 +.Lxts_enc_3: + eor v1.16b, v1.16b, v12.16b + eor v2.16b, v2.16b, v13.16b + mov x9, sp // pass key schedule + mov x10, x1 // pass rounds + add x0, x19, #16 + + bl _bsaes_encrypt8 + + eor v0.16b, v0.16b, v11.16b + eor v1.16b, v1.16b, v12.16b + eor v4.16b, v4.16b, v13.16b + mov v11.16b, v14.16b // next round tweak + str q0, [x21], #16 + str q1, [x21], #16 + str q4, [x21], #16 + b .Lxts_enc_done + +.align 4 +.Lxts_enc_2: + eor v0.16b, v0.16b, v11.16b + eor v1.16b, v1.16b, v12.16b + mov x9, sp // pass key schedule + mov x10, x1 // pass rounds + add x0, x19, #16 + + bl _bsaes_encrypt8 + + eor v0.16b, v0.16b, v11.16b + eor v1.16b, v1.16b, v12.16b + mov v11.16b, v13.16b // next round tweak + str q0, [x21], #16 + str q1, [x21], #16 + b .Lxts_enc_done + +.align 4 +.Lxts_enc_1: + eor v0.16b, v0.16b, v11.16b + sub x0, sp, #16 + sub x1, sp, #16 + mov x2, x23 + mov v13.d[0], v11.d[1] // just in case AES_encrypt corrupts top half of callee-saved SIMD registers + mov v14.d[0], v12.d[1] + str q0, [sp, #-16]! + + bl AES_encrypt + + ldr q0, [sp], #16 + trn1 v13.2d, v11.2d, v13.2d + trn1 v11.2d, v12.2d, v14.2d // next round tweak + eor v0.16b, v0.16b, v13.16b + str q0, [x21], #16 + +.Lxts_enc_done: + adds x22, x22, #0x10 + beq .Lxts_enc_ret + + sub x6, x21, #0x10 + // Penultimate plaintext block produces final ciphertext part-block + // plus remaining part of final plaintext block. Move ciphertext part + // to final position and reuse penultimate ciphertext block buffer to + // construct final plaintext block +.Lxts_enc_steal: + ldrb w0, [x20], #1 + ldrb w1, [x21, #-0x10] + strb w0, [x21, #-0x10] + strb w1, [x21], #1 + + subs x22, x22, #1 + bhi .Lxts_enc_steal + + // Finally encrypt the penultimate ciphertext block using the + // last tweak + ldr q0, [x6] + eor v0.16b, v0.16b, v11.16b + str q0, [sp, #-16]! + mov x0, sp + mov x1, sp + mov x2, x23 + mov x21, x6 + mov v13.d[0], v11.d[1] // just in case AES_encrypt corrupts top half of callee-saved SIMD registers + + bl AES_encrypt + + trn1 v11.2d, v11.2d, v13.2d + ldr q0, [sp], #16 + eor v0.16b, v0.16b, v11.16b + str q0, [x21] + +.Lxts_enc_ret: + + movi v0.16b, #0 + movi v1.16b, #0 +.Lxts_enc_bzero: // wipe key schedule + stp q0, q1, [sp], #32 + cmp sp, x19 + bne .Lxts_enc_bzero + + ldp x19, x20, [sp, #80] + ldp x21, x22, [sp, #96] + ldr x23, [sp, #112] + ldp d8, d9, [sp, #128] + ldp d10, d11, [sp, #144] + ldp d12, d13, [sp, #160] + ldp d14, d15, [sp, #176] + ldp x29, x30, [sp], #192 + ret +.size ossl_bsaes_xts_encrypt,.-ossl_bsaes_xts_encrypt + +// The assembler doesn't seem capable of de-duplicating these when expressed +// using `ldr qd,=` syntax, so assign a symbolic address +.align 5 +.Lxts_magic: +.quad 1, 0x87, 0x4000000000000000, 0x4000000000000000 + +.globl ossl_bsaes_xts_decrypt +.type ossl_bsaes_xts_decrypt,%function +.align 4 +// On entry: +// x0 -> input ciphertext +// x1 -> output plaintext +// x2 -> length of text in bytes (must be at least 16) +// x3 -> key1 (used to decrypt the XORed ciphertext blocks) +// x4 -> key2 (used to encrypt the initial vector to yield the initial tweak) +// x5 -> 16-byte initial vector (typically, sector number) +// On exit: +// Output plaintext filled in +// No output registers, usual AAPCS64 register preservation +ossl_bsaes_xts_decrypt: + AARCH64_VALID_CALL_TARGET + // Stack layout: + // sp -> + // nrounds*128-96 bytes: key schedule + // x19 -> + // 16 bytes: frame record + // 4*16 bytes: tweak storage across _bsaes_decrypt8 + // 6*8 bytes: storage for 5 callee-saved general-purpose registers + // 8*8 bytes: storage for 8 callee-saved SIMD registers + stp x29, x30, [sp, #-192]! + stp x19, x20, [sp, #80] + stp x21, x22, [sp, #96] + str x23, [sp, #112] + stp d8, d9, [sp, #128] + stp d10, d11, [sp, #144] + stp d12, d13, [sp, #160] + stp d14, d15, [sp, #176] + + mov x19, sp + mov x20, x0 + mov x21, x1 + mov x22, x2 + mov x23, x3 + + // generate initial tweak + sub sp, sp, #16 + mov x0, x5 // iv[] + mov x1, sp + mov x2, x4 // key2 + bl AES_encrypt + ldr q11, [sp], #16 + + ldr w1, [x23, #240] // get # of rounds + // allocate the key schedule on the stack + add x17, sp, #96 + sub x17, x17, x1, lsl #7 // 128 bytes per inner round key, less 96 bytes + + // populate the key schedule + mov x9, x23 // pass key + mov x10, x1 // pass # of rounds + mov sp, x17 + bl _bsaes_key_convert + ldr q6, [sp] + str q15, [x17] // save last round key + eor v6.16b, v6.16b, v7.16b // fix up round 0 key (by XORing with 0x63) + str q6, [sp] + + sub x30, x22, #0x10 + tst x22, #0xf // if not multiple of 16 + csel x22, x30, x22, ne // subtract another 16 bytes + subs x22, x22, #0x80 + + blo .Lxts_dec_short + b .Lxts_dec_loop + +.align 4 +.Lxts_dec_loop: + ldr q8, .Lxts_magic + mov x10, x1 // pass rounds + add x2, x19, #16 + ldr q0, [x20], #16 + sshr v1.2d, v11.2d, #63 + mov x9, sp // pass key schedule + ldr q6, .Lxts_magic+16 + add v2.2d, v11.2d, v11.2d + cmtst v3.2d, v11.2d, v6.2d + and v1.16b, v1.16b, v8.16b + ext v1.16b, v1.16b, v1.16b, #8 + and v3.16b, v3.16b, v8.16b + ldr q4, [x20], #16 + eor v12.16b, v2.16b, v1.16b + eor v1.16b, v4.16b, v12.16b + eor v0.16b, v0.16b, v11.16b + cmtst v2.2d, v12.2d, v6.2d + add v4.2d, v12.2d, v12.2d + add x0, x19, #16 + ext v3.16b, v3.16b, v3.16b, #8 + and v2.16b, v2.16b, v8.16b + eor v13.16b, v4.16b, v3.16b + ldr q3, [x20], #16 + ext v4.16b, v2.16b, v2.16b, #8 + eor v2.16b, v3.16b, v13.16b + ldr q3, [x20], #16 + add v5.2d, v13.2d, v13.2d + cmtst v7.2d, v13.2d, v6.2d + and v7.16b, v7.16b, v8.16b + ldr q9, [x20], #16 + ext v7.16b, v7.16b, v7.16b, #8 + ldr q10, [x20], #16 + eor v14.16b, v5.16b, v4.16b + ldr q16, [x20], #16 + add v4.2d, v14.2d, v14.2d + eor v3.16b, v3.16b, v14.16b + eor v15.16b, v4.16b, v7.16b + add v5.2d, v15.2d, v15.2d + ldr q7, [x20], #16 + cmtst v4.2d, v14.2d, v6.2d + and v17.16b, v4.16b, v8.16b + cmtst v18.2d, v15.2d, v6.2d + eor v4.16b, v9.16b, v15.16b + ext v9.16b, v17.16b, v17.16b, #8 + eor v9.16b, v5.16b, v9.16b + add v17.2d, v9.2d, v9.2d + and v18.16b, v18.16b, v8.16b + eor v5.16b, v10.16b, v9.16b + str q9, [x2], #16 + ext v10.16b, v18.16b, v18.16b, #8 + cmtst v9.2d, v9.2d, v6.2d + and v9.16b, v9.16b, v8.16b + eor v10.16b, v17.16b, v10.16b + cmtst v17.2d, v10.2d, v6.2d + eor v6.16b, v16.16b, v10.16b + str q10, [x2], #16 + ext v9.16b, v9.16b, v9.16b, #8 + add v10.2d, v10.2d, v10.2d + eor v9.16b, v10.16b, v9.16b + str q9, [x2], #16 + eor v7.16b, v7.16b, v9.16b + add v9.2d, v9.2d, v9.2d + and v8.16b, v17.16b, v8.16b + ext v8.16b, v8.16b, v8.16b, #8 + eor v8.16b, v9.16b, v8.16b + str q8, [x2] // next round tweak + + bl _bsaes_decrypt8 + + eor v6.16b, v6.16b, v13.16b + eor v0.16b, v0.16b, v11.16b + ldr q8, [x0], #16 + eor v7.16b, v7.16b, v8.16b + str q0, [x21], #16 + eor v0.16b, v1.16b, v12.16b + ldr q1, [x0], #16 + eor v1.16b, v3.16b, v1.16b + subs x22, x22, #0x80 + eor v2.16b, v2.16b, v15.16b + eor v3.16b, v4.16b, v14.16b + ldr q4, [x0], #16 + str q0, [x21], #16 + ldr q11, [x0] // next round tweak + eor v0.16b, v5.16b, v4.16b + str q6, [x21], #16 + str q3, [x21], #16 + str q2, [x21], #16 + str q7, [x21], #16 + str q1, [x21], #16 + str q0, [x21], #16 + bpl .Lxts_dec_loop + +.Lxts_dec_short: + adds x22, x22, #0x70 + bmi .Lxts_dec_done + + ldr q8, .Lxts_magic + sshr v1.2d, v11.2d, #63 + add v2.2d, v11.2d, v11.2d + ldr q9, .Lxts_magic+16 + subs x22, x22, #0x10 + ldr q0, [x20], #16 + and v1.16b, v1.16b, v8.16b + cmtst v3.2d, v11.2d, v9.2d + ext v1.16b, v1.16b, v1.16b, #8 + and v3.16b, v3.16b, v8.16b + eor v12.16b, v2.16b, v1.16b + ext v1.16b, v3.16b, v3.16b, #8 + add v2.2d, v12.2d, v12.2d + cmtst v3.2d, v12.2d, v9.2d + eor v13.16b, v2.16b, v1.16b + and v22.16b, v3.16b, v8.16b + bmi .Lxts_dec_1 + + ext v2.16b, v22.16b, v22.16b, #8 + add v3.2d, v13.2d, v13.2d + ldr q1, [x20], #16 + cmtst v4.2d, v13.2d, v9.2d + subs x22, x22, #0x10 + eor v14.16b, v3.16b, v2.16b + and v23.16b, v4.16b, v8.16b + bmi .Lxts_dec_2 + + ext v3.16b, v23.16b, v23.16b, #8 + add v4.2d, v14.2d, v14.2d + ldr q2, [x20], #16 + cmtst v5.2d, v14.2d, v9.2d + eor v0.16b, v0.16b, v11.16b + subs x22, x22, #0x10 + eor v15.16b, v4.16b, v3.16b + and v24.16b, v5.16b, v8.16b + bmi .Lxts_dec_3 + + ext v4.16b, v24.16b, v24.16b, #8 + add v5.2d, v15.2d, v15.2d + ldr q3, [x20], #16 + cmtst v6.2d, v15.2d, v9.2d + eor v1.16b, v1.16b, v12.16b + subs x22, x22, #0x10 + eor v16.16b, v5.16b, v4.16b + and v25.16b, v6.16b, v8.16b + bmi .Lxts_dec_4 + + ext v5.16b, v25.16b, v25.16b, #8 + add v6.2d, v16.2d, v16.2d + add x0, x19, #16 + cmtst v7.2d, v16.2d, v9.2d + ldr q4, [x20], #16 + eor v2.16b, v2.16b, v13.16b + str q16, [x0], #16 + subs x22, x22, #0x10 + eor v17.16b, v6.16b, v5.16b + and v26.16b, v7.16b, v8.16b + bmi .Lxts_dec_5 + + ext v7.16b, v26.16b, v26.16b, #8 + add v18.2d, v17.2d, v17.2d + ldr q5, [x20], #16 + eor v3.16b, v3.16b, v14.16b + str q17, [x0], #16 + subs x22, x22, #0x10 + eor v18.16b, v18.16b, v7.16b + bmi .Lxts_dec_6 + + ldr q6, [x20], #16 + eor v4.16b, v4.16b, v15.16b + eor v5.16b, v5.16b, v16.16b + str q18, [x0] // next round tweak + mov x9, sp // pass key schedule + mov x10, x1 + add x0, x19, #16 + sub x22, x22, #0x10 + eor v6.16b, v6.16b, v17.16b + + bl _bsaes_decrypt8 + + ldr q16, [x0], #16 + eor v0.16b, v0.16b, v11.16b + eor v1.16b, v1.16b, v12.16b + ldr q17, [x0], #16 + eor v6.16b, v6.16b, v13.16b + eor v4.16b, v4.16b, v14.16b + eor v2.16b, v2.16b, v15.16b + ldr q11, [x0] // next round tweak + str q0, [x21], #16 + str q1, [x21], #16 + eor v0.16b, v7.16b, v16.16b + eor v1.16b, v3.16b, v17.16b + str q6, [x21], #16 + str q4, [x21], #16 + str q2, [x21], #16 + str q0, [x21], #16 + str q1, [x21], #16 + b .Lxts_dec_done + +.align 4 +.Lxts_dec_6: + eor v4.16b, v4.16b, v15.16b + eor v5.16b, v5.16b, v16.16b + mov x9, sp // pass key schedule + mov x10, x1 // pass rounds + add x0, x19, #16 + + bl _bsaes_decrypt8 + + ldr q16, [x0], #16 + eor v0.16b, v0.16b, v11.16b + eor v1.16b, v1.16b, v12.16b + eor v6.16b, v6.16b, v13.16b + eor v4.16b, v4.16b, v14.16b + ldr q11, [x0] // next round tweak + eor v2.16b, v2.16b, v15.16b + str q0, [x21], #16 + str q1, [x21], #16 + eor v0.16b, v7.16b, v16.16b + str q6, [x21], #16 + str q4, [x21], #16 + str q2, [x21], #16 + str q0, [x21], #16 + b .Lxts_dec_done + +.align 4 +.Lxts_dec_5: + eor v3.16b, v3.16b, v14.16b + eor v4.16b, v4.16b, v15.16b + mov x9, sp // pass key schedule + mov x10, x1 // pass rounds + add x0, x19, #16 + + bl _bsaes_decrypt8 + + eor v0.16b, v0.16b, v11.16b + eor v1.16b, v1.16b, v12.16b + ldr q11, [x0] // next round tweak + eor v6.16b, v6.16b, v13.16b + eor v4.16b, v4.16b, v14.16b + eor v2.16b, v2.16b, v15.16b + str q0, [x21], #16 + str q1, [x21], #16 + str q6, [x21], #16 + str q4, [x21], #16 + str q2, [x21], #16 + b .Lxts_dec_done + +.align 4 +.Lxts_dec_4: + eor v2.16b, v2.16b, v13.16b + eor v3.16b, v3.16b, v14.16b + mov x9, sp // pass key schedule + mov x10, x1 // pass rounds + add x0, x19, #16 + + bl _bsaes_decrypt8 + + eor v0.16b, v0.16b, v11.16b + eor v1.16b, v1.16b, v12.16b + eor v6.16b, v6.16b, v13.16b + eor v4.16b, v4.16b, v14.16b + mov v11.16b, v15.16b // next round tweak + str q0, [x21], #16 + str q1, [x21], #16 + str q6, [x21], #16 + str q4, [x21], #16 + b .Lxts_dec_done + +.align 4 +.Lxts_dec_3: + eor v1.16b, v1.16b, v12.16b + eor v2.16b, v2.16b, v13.16b + mov x9, sp // pass key schedule + mov x10, x1 // pass rounds + add x0, x19, #16 + + bl _bsaes_decrypt8 + + eor v0.16b, v0.16b, v11.16b + eor v1.16b, v1.16b, v12.16b + eor v6.16b, v6.16b, v13.16b + mov v11.16b, v14.16b // next round tweak + str q0, [x21], #16 + str q1, [x21], #16 + str q6, [x21], #16 + b .Lxts_dec_done + +.align 4 +.Lxts_dec_2: + eor v0.16b, v0.16b, v11.16b + eor v1.16b, v1.16b, v12.16b + mov x9, sp // pass key schedule + mov x10, x1 // pass rounds + add x0, x19, #16 + + bl _bsaes_decrypt8 + + eor v0.16b, v0.16b, v11.16b + eor v1.16b, v1.16b, v12.16b + mov v11.16b, v13.16b // next round tweak + str q0, [x21], #16 + str q1, [x21], #16 + b .Lxts_dec_done + +.align 4 +.Lxts_dec_1: + eor v0.16b, v0.16b, v11.16b + sub x0, sp, #16 + sub x1, sp, #16 + mov x2, x23 + mov v13.d[0], v11.d[1] // just in case AES_decrypt corrupts top half of callee-saved SIMD registers + mov v14.d[0], v12.d[1] + str q0, [sp, #-16]! + + bl AES_decrypt + + ldr q0, [sp], #16 + trn1 v13.2d, v11.2d, v13.2d + trn1 v11.2d, v12.2d, v14.2d // next round tweak + eor v0.16b, v0.16b, v13.16b + str q0, [x21], #16 + +.Lxts_dec_done: + adds x22, x22, #0x10 + beq .Lxts_dec_ret + + // calculate one round of extra tweak for the stolen ciphertext + ldr q8, .Lxts_magic + sshr v6.2d, v11.2d, #63 + and v6.16b, v6.16b, v8.16b + add v12.2d, v11.2d, v11.2d + ext v6.16b, v6.16b, v6.16b, #8 + eor v12.16b, v12.16b, v6.16b + + // perform the final decryption with the last tweak value + ldr q0, [x20], #16 + eor v0.16b, v0.16b, v12.16b + str q0, [sp, #-16]! + mov x0, sp + mov x1, sp + mov x2, x23 + mov v13.d[0], v11.d[1] // just in case AES_decrypt corrupts top half of callee-saved SIMD registers + mov v14.d[0], v12.d[1] + + bl AES_decrypt + + trn1 v12.2d, v12.2d, v14.2d + trn1 v11.2d, v11.2d, v13.2d + ldr q0, [sp], #16 + eor v0.16b, v0.16b, v12.16b + str q0, [x21] + + mov x6, x21 + // Penultimate ciphertext block produces final plaintext part-block + // plus remaining part of final ciphertext block. Move plaintext part + // to final position and reuse penultimate plaintext block buffer to + // construct final ciphertext block +.Lxts_dec_steal: + ldrb w1, [x21] + ldrb w0, [x20], #1 + strb w1, [x21, #0x10] + strb w0, [x21], #1 + + subs x22, x22, #1 + bhi .Lxts_dec_steal + + // Finally decrypt the penultimate plaintext block using the + // penultimate tweak + ldr q0, [x6] + eor v0.16b, v0.16b, v11.16b + str q0, [sp, #-16]! + mov x0, sp + mov x1, sp + mov x2, x23 + mov x21, x6 + + bl AES_decrypt + + trn1 v11.2d, v11.2d, v13.2d + ldr q0, [sp], #16 + eor v0.16b, v0.16b, v11.16b + str q0, [x21] + +.Lxts_dec_ret: + + movi v0.16b, #0 + movi v1.16b, #0 +.Lxts_dec_bzero: // wipe key schedule + stp q0, q1, [sp], #32 + cmp sp, x19 + bne .Lxts_dec_bzero + + ldp x19, x20, [sp, #80] + ldp x21, x22, [sp, #96] + ldr x23, [sp, #112] + ldp d8, d9, [sp, #128] + ldp d10, d11, [sp, #144] + ldp d12, d13, [sp, #160] + ldp d14, d15, [sp, #176] + ldp x29, x30, [sp], #192 + ret +.size ossl_bsaes_xts_decrypt,.-ossl_bsaes_xts_decrypt diff --git a/contrib/openssl-cmake/asm/crypto/aes/bsaes-x86_64.s b/contrib/openssl-cmake/asm/crypto/aes/bsaes-x86_64.s new file mode 100644 index 000000000000..7754c0df656e --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/aes/bsaes-x86_64.s @@ -0,0 +1,2600 @@ +.text + + + + +.type _bsaes_encrypt8,@function +.align 64 +_bsaes_encrypt8: +.cfi_startproc + leaq .LBS0(%rip),%r11 + + movdqa (%rax),%xmm8 + leaq 16(%rax),%rax + movdqa 80(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 + pxor %xmm8,%xmm1 + pxor %xmm8,%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm3 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm5 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 +_bsaes_encrypt8_bitslice: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $1,%xmm3 + pxor %xmm6,%xmm5 + pxor %xmm4,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm6 + psllq $1,%xmm5 + pxor %xmm3,%xmm4 + psllq $1,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm2,%xmm1 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm1 + pand %xmm7,%xmm15 + pxor %xmm1,%xmm2 + psllq $1,%xmm1 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm4,%xmm9 + psrlq $2,%xmm4 + movdqa %xmm3,%xmm10 + psrlq $2,%xmm3 + pxor %xmm6,%xmm4 + pxor %xmm5,%xmm3 + pand %xmm8,%xmm4 + pand %xmm8,%xmm3 + pxor %xmm4,%xmm6 + psllq $2,%xmm4 + pxor %xmm3,%xmm5 + psllq $2,%xmm3 + pxor %xmm9,%xmm4 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm2,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm2 + psllq $2,%xmm0 + pxor %xmm15,%xmm1 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm2,%xmm9 + psrlq $4,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $4,%xmm1 + pxor %xmm6,%xmm2 + pxor %xmm5,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm6 + psllq $4,%xmm2 + pxor %xmm1,%xmm5 + psllq $4,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm4 + psllq $4,%xmm0 + pxor %xmm15,%xmm3 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + decl %r10d + jmp .Lenc_sbox +.align 16 +.Lenc_loop: + pxor 0(%rax),%xmm15 + pxor 16(%rax),%xmm0 + pxor 32(%rax),%xmm1 + pxor 48(%rax),%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor 64(%rax),%xmm3 + pxor 80(%rax),%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor 96(%rax),%xmm5 + pxor 112(%rax),%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 + leaq 128(%rax),%rax +.Lenc_sbox: + pxor %xmm5,%xmm4 + pxor %xmm0,%xmm1 + pxor %xmm15,%xmm2 + pxor %xmm1,%xmm5 + pxor %xmm15,%xmm4 + + pxor %xmm2,%xmm5 + pxor %xmm6,%xmm2 + pxor %xmm4,%xmm6 + pxor %xmm3,%xmm2 + pxor %xmm4,%xmm3 + pxor %xmm0,%xmm2 + + pxor %xmm6,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm6,%xmm10 + movdqa %xmm0,%xmm9 + movdqa %xmm4,%xmm8 + movdqa %xmm1,%xmm12 + movdqa %xmm5,%xmm11 + + pxor %xmm3,%xmm10 + pxor %xmm1,%xmm9 + pxor %xmm2,%xmm8 + movdqa %xmm10,%xmm13 + pxor %xmm3,%xmm12 + movdqa %xmm9,%xmm7 + pxor %xmm15,%xmm11 + movdqa %xmm10,%xmm14 + + por %xmm8,%xmm9 + por %xmm11,%xmm10 + pxor %xmm7,%xmm14 + pand %xmm11,%xmm13 + pxor %xmm8,%xmm11 + pand %xmm8,%xmm7 + pand %xmm11,%xmm14 + movdqa %xmm2,%xmm11 + pxor %xmm15,%xmm11 + pand %xmm11,%xmm12 + pxor %xmm12,%xmm10 + pxor %xmm12,%xmm9 + movdqa %xmm6,%xmm12 + movdqa %xmm4,%xmm11 + pxor %xmm0,%xmm12 + pxor %xmm5,%xmm11 + movdqa %xmm12,%xmm8 + pand %xmm11,%xmm12 + por %xmm11,%xmm8 + pxor %xmm12,%xmm7 + pxor %xmm14,%xmm10 + pxor %xmm13,%xmm9 + pxor %xmm14,%xmm8 + movdqa %xmm1,%xmm11 + pxor %xmm13,%xmm7 + movdqa %xmm3,%xmm12 + pxor %xmm13,%xmm8 + movdqa %xmm0,%xmm13 + pand %xmm2,%xmm11 + movdqa %xmm6,%xmm14 + pand %xmm15,%xmm12 + pand %xmm4,%xmm13 + por %xmm5,%xmm14 + pxor %xmm11,%xmm10 + pxor %xmm12,%xmm9 + pxor %xmm13,%xmm8 + pxor %xmm14,%xmm7 + + + + + + movdqa %xmm10,%xmm11 + pand %xmm8,%xmm10 + pxor %xmm9,%xmm11 + + movdqa %xmm7,%xmm13 + movdqa %xmm11,%xmm14 + pxor %xmm10,%xmm13 + pand %xmm13,%xmm14 + + movdqa %xmm8,%xmm12 + pxor %xmm9,%xmm14 + pxor %xmm7,%xmm12 + + pxor %xmm9,%xmm10 + + pand %xmm10,%xmm12 + + movdqa %xmm13,%xmm9 + pxor %xmm7,%xmm12 + + pxor %xmm12,%xmm9 + pxor %xmm12,%xmm8 + + pand %xmm7,%xmm9 + + pxor %xmm9,%xmm13 + pxor %xmm9,%xmm8 + + pand %xmm14,%xmm13 + + pxor %xmm11,%xmm13 + movdqa %xmm5,%xmm11 + movdqa %xmm4,%xmm7 + movdqa %xmm14,%xmm9 + pxor %xmm13,%xmm9 + pand %xmm5,%xmm9 + pxor %xmm4,%xmm5 + pand %xmm14,%xmm4 + pand %xmm13,%xmm5 + pxor %xmm4,%xmm5 + pxor %xmm9,%xmm4 + pxor %xmm15,%xmm11 + pxor %xmm2,%xmm7 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm15,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm2,%xmm15 + pand %xmm14,%xmm7 + pand %xmm12,%xmm2 + pand %xmm13,%xmm11 + pand %xmm8,%xmm15 + pxor %xmm11,%xmm7 + pxor %xmm2,%xmm15 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm2 + pxor %xmm11,%xmm5 + pxor %xmm11,%xmm15 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm2 + + movdqa %xmm6,%xmm11 + movdqa %xmm0,%xmm7 + pxor %xmm3,%xmm11 + pxor %xmm1,%xmm7 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm3,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm1,%xmm3 + pand %xmm14,%xmm7 + pand %xmm12,%xmm1 + pand %xmm13,%xmm11 + pand %xmm8,%xmm3 + pxor %xmm11,%xmm7 + pxor %xmm1,%xmm3 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm1 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + pxor %xmm13,%xmm10 + pand %xmm6,%xmm10 + pxor %xmm0,%xmm6 + pand %xmm14,%xmm0 + pand %xmm13,%xmm6 + pxor %xmm0,%xmm6 + pxor %xmm10,%xmm0 + pxor %xmm11,%xmm6 + pxor %xmm11,%xmm3 + pxor %xmm7,%xmm0 + pxor %xmm7,%xmm1 + pxor %xmm15,%xmm6 + pxor %xmm5,%xmm0 + pxor %xmm6,%xmm3 + pxor %xmm15,%xmm5 + pxor %xmm0,%xmm15 + + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + pxor %xmm2,%xmm1 + pxor %xmm4,%xmm2 + pxor %xmm4,%xmm3 + + pxor %xmm2,%xmm5 + decl %r10d + jl .Lenc_done + pshufd $0x93,%xmm15,%xmm7 + pshufd $0x93,%xmm0,%xmm8 + pxor %xmm7,%xmm15 + pshufd $0x93,%xmm3,%xmm9 + pxor %xmm8,%xmm0 + pshufd $0x93,%xmm5,%xmm10 + pxor %xmm9,%xmm3 + pshufd $0x93,%xmm2,%xmm11 + pxor %xmm10,%xmm5 + pshufd $0x93,%xmm6,%xmm12 + pxor %xmm11,%xmm2 + pshufd $0x93,%xmm1,%xmm13 + pxor %xmm12,%xmm6 + pshufd $0x93,%xmm4,%xmm14 + pxor %xmm13,%xmm1 + pxor %xmm14,%xmm4 + + pxor %xmm15,%xmm8 + pxor %xmm4,%xmm7 + pxor %xmm4,%xmm8 + pshufd $0x4E,%xmm15,%xmm15 + pxor %xmm0,%xmm9 + pshufd $0x4E,%xmm0,%xmm0 + pxor %xmm2,%xmm12 + pxor %xmm7,%xmm15 + pxor %xmm6,%xmm13 + pxor %xmm8,%xmm0 + pxor %xmm5,%xmm11 + pshufd $0x4E,%xmm2,%xmm7 + pxor %xmm1,%xmm14 + pshufd $0x4E,%xmm6,%xmm8 + pxor %xmm3,%xmm10 + pshufd $0x4E,%xmm5,%xmm2 + pxor %xmm4,%xmm10 + pshufd $0x4E,%xmm4,%xmm6 + pxor %xmm4,%xmm11 + pshufd $0x4E,%xmm1,%xmm5 + pxor %xmm11,%xmm7 + pshufd $0x4E,%xmm3,%xmm1 + pxor %xmm12,%xmm8 + pxor %xmm10,%xmm2 + pxor %xmm14,%xmm6 + pxor %xmm13,%xmm5 + movdqa %xmm7,%xmm3 + pxor %xmm9,%xmm1 + movdqa %xmm8,%xmm4 + movdqa 48(%r11),%xmm7 + jnz .Lenc_loop + movdqa 64(%r11),%xmm7 + jmp .Lenc_loop +.align 16 +.Lenc_done: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm2,%xmm10 + psrlq $1,%xmm2 + pxor %xmm4,%xmm1 + pxor %xmm6,%xmm2 + pand %xmm7,%xmm1 + pand %xmm7,%xmm2 + pxor %xmm1,%xmm4 + psllq $1,%xmm1 + pxor %xmm2,%xmm6 + psllq $1,%xmm2 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm2 + movdqa %xmm3,%xmm9 + psrlq $1,%xmm3 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm5,%xmm3 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm3 + pand %xmm7,%xmm15 + pxor %xmm3,%xmm5 + psllq $1,%xmm3 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm3 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm6,%xmm9 + psrlq $2,%xmm6 + movdqa %xmm2,%xmm10 + psrlq $2,%xmm2 + pxor %xmm4,%xmm6 + pxor %xmm1,%xmm2 + pand %xmm8,%xmm6 + pand %xmm8,%xmm2 + pxor %xmm6,%xmm4 + psllq $2,%xmm6 + pxor %xmm2,%xmm1 + psllq $2,%xmm2 + pxor %xmm9,%xmm6 + pxor %xmm10,%xmm2 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm5,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm5 + psllq $2,%xmm0 + pxor %xmm15,%xmm3 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm5,%xmm9 + psrlq $4,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $4,%xmm3 + pxor %xmm4,%xmm5 + pxor %xmm1,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm4 + psllq $4,%xmm5 + pxor %xmm3,%xmm1 + psllq $4,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm6,%xmm0 + pxor %xmm2,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm6 + psllq $4,%xmm0 + pxor %xmm15,%xmm2 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa (%rax),%xmm7 + pxor %xmm7,%xmm3 + pxor %xmm7,%xmm5 + pxor %xmm7,%xmm2 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm1 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm15 + pxor %xmm7,%xmm0 + .byte 0xf3,0xc3 +.cfi_endproc +.size _bsaes_encrypt8,.-_bsaes_encrypt8 + +.type _bsaes_decrypt8,@function +.align 64 +_bsaes_decrypt8: +.cfi_startproc + leaq .LBS0(%rip),%r11 + + movdqa (%rax),%xmm8 + leaq 16(%rax),%rax + movdqa -48(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 + pxor %xmm8,%xmm1 + pxor %xmm8,%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm3 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm5 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $1,%xmm3 + pxor %xmm6,%xmm5 + pxor %xmm4,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm6 + psllq $1,%xmm5 + pxor %xmm3,%xmm4 + psllq $1,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm2,%xmm1 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm1 + pand %xmm7,%xmm15 + pxor %xmm1,%xmm2 + psllq $1,%xmm1 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm4,%xmm9 + psrlq $2,%xmm4 + movdqa %xmm3,%xmm10 + psrlq $2,%xmm3 + pxor %xmm6,%xmm4 + pxor %xmm5,%xmm3 + pand %xmm8,%xmm4 + pand %xmm8,%xmm3 + pxor %xmm4,%xmm6 + psllq $2,%xmm4 + pxor %xmm3,%xmm5 + psllq $2,%xmm3 + pxor %xmm9,%xmm4 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm2,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm2 + psllq $2,%xmm0 + pxor %xmm15,%xmm1 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm2,%xmm9 + psrlq $4,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $4,%xmm1 + pxor %xmm6,%xmm2 + pxor %xmm5,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm6 + psllq $4,%xmm2 + pxor %xmm1,%xmm5 + psllq $4,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm4 + psllq $4,%xmm0 + pxor %xmm15,%xmm3 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + decl %r10d + jmp .Ldec_sbox +.align 16 +.Ldec_loop: + pxor 0(%rax),%xmm15 + pxor 16(%rax),%xmm0 + pxor 32(%rax),%xmm1 + pxor 48(%rax),%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor 64(%rax),%xmm3 + pxor 80(%rax),%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor 96(%rax),%xmm5 + pxor 112(%rax),%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 + leaq 128(%rax),%rax +.Ldec_sbox: + pxor %xmm3,%xmm2 + + pxor %xmm6,%xmm3 + pxor %xmm6,%xmm1 + pxor %xmm3,%xmm5 + pxor %xmm5,%xmm6 + pxor %xmm6,%xmm0 + + pxor %xmm0,%xmm15 + pxor %xmm4,%xmm1 + pxor %xmm15,%xmm2 + pxor %xmm15,%xmm4 + pxor %xmm2,%xmm0 + movdqa %xmm2,%xmm10 + movdqa %xmm6,%xmm9 + movdqa %xmm0,%xmm8 + movdqa %xmm3,%xmm12 + movdqa %xmm4,%xmm11 + + pxor %xmm15,%xmm10 + pxor %xmm3,%xmm9 + pxor %xmm5,%xmm8 + movdqa %xmm10,%xmm13 + pxor %xmm15,%xmm12 + movdqa %xmm9,%xmm7 + pxor %xmm1,%xmm11 + movdqa %xmm10,%xmm14 + + por %xmm8,%xmm9 + por %xmm11,%xmm10 + pxor %xmm7,%xmm14 + pand %xmm11,%xmm13 + pxor %xmm8,%xmm11 + pand %xmm8,%xmm7 + pand %xmm11,%xmm14 + movdqa %xmm5,%xmm11 + pxor %xmm1,%xmm11 + pand %xmm11,%xmm12 + pxor %xmm12,%xmm10 + pxor %xmm12,%xmm9 + movdqa %xmm2,%xmm12 + movdqa %xmm0,%xmm11 + pxor %xmm6,%xmm12 + pxor %xmm4,%xmm11 + movdqa %xmm12,%xmm8 + pand %xmm11,%xmm12 + por %xmm11,%xmm8 + pxor %xmm12,%xmm7 + pxor %xmm14,%xmm10 + pxor %xmm13,%xmm9 + pxor %xmm14,%xmm8 + movdqa %xmm3,%xmm11 + pxor %xmm13,%xmm7 + movdqa %xmm15,%xmm12 + pxor %xmm13,%xmm8 + movdqa %xmm6,%xmm13 + pand %xmm5,%xmm11 + movdqa %xmm2,%xmm14 + pand %xmm1,%xmm12 + pand %xmm0,%xmm13 + por %xmm4,%xmm14 + pxor %xmm11,%xmm10 + pxor %xmm12,%xmm9 + pxor %xmm13,%xmm8 + pxor %xmm14,%xmm7 + + + + + + movdqa %xmm10,%xmm11 + pand %xmm8,%xmm10 + pxor %xmm9,%xmm11 + + movdqa %xmm7,%xmm13 + movdqa %xmm11,%xmm14 + pxor %xmm10,%xmm13 + pand %xmm13,%xmm14 + + movdqa %xmm8,%xmm12 + pxor %xmm9,%xmm14 + pxor %xmm7,%xmm12 + + pxor %xmm9,%xmm10 + + pand %xmm10,%xmm12 + + movdqa %xmm13,%xmm9 + pxor %xmm7,%xmm12 + + pxor %xmm12,%xmm9 + pxor %xmm12,%xmm8 + + pand %xmm7,%xmm9 + + pxor %xmm9,%xmm13 + pxor %xmm9,%xmm8 + + pand %xmm14,%xmm13 + + pxor %xmm11,%xmm13 + movdqa %xmm4,%xmm11 + movdqa %xmm0,%xmm7 + movdqa %xmm14,%xmm9 + pxor %xmm13,%xmm9 + pand %xmm4,%xmm9 + pxor %xmm0,%xmm4 + pand %xmm14,%xmm0 + pand %xmm13,%xmm4 + pxor %xmm0,%xmm4 + pxor %xmm9,%xmm0 + pxor %xmm1,%xmm11 + pxor %xmm5,%xmm7 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm1,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm5,%xmm1 + pand %xmm14,%xmm7 + pand %xmm12,%xmm5 + pand %xmm13,%xmm11 + pand %xmm8,%xmm1 + pxor %xmm11,%xmm7 + pxor %xmm5,%xmm1 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm5 + pxor %xmm11,%xmm4 + pxor %xmm11,%xmm1 + pxor %xmm7,%xmm0 + pxor %xmm7,%xmm5 + + movdqa %xmm2,%xmm11 + movdqa %xmm6,%xmm7 + pxor %xmm15,%xmm11 + pxor %xmm3,%xmm7 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm15,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm3,%xmm15 + pand %xmm14,%xmm7 + pand %xmm12,%xmm3 + pand %xmm13,%xmm11 + pand %xmm8,%xmm15 + pxor %xmm11,%xmm7 + pxor %xmm3,%xmm15 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm3 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + pxor %xmm13,%xmm10 + pand %xmm2,%xmm10 + pxor %xmm6,%xmm2 + pand %xmm14,%xmm6 + pand %xmm13,%xmm2 + pxor %xmm6,%xmm2 + pxor %xmm10,%xmm6 + pxor %xmm11,%xmm2 + pxor %xmm11,%xmm15 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm3 + pxor %xmm6,%xmm0 + pxor %xmm4,%xmm5 + + pxor %xmm0,%xmm3 + pxor %xmm6,%xmm1 + pxor %xmm6,%xmm4 + pxor %xmm1,%xmm3 + pxor %xmm15,%xmm6 + pxor %xmm4,%xmm3 + pxor %xmm5,%xmm2 + pxor %xmm0,%xmm5 + pxor %xmm3,%xmm2 + + pxor %xmm15,%xmm3 + pxor %xmm2,%xmm6 + decl %r10d + jl .Ldec_done + + pshufd $0x4E,%xmm15,%xmm7 + pshufd $0x4E,%xmm2,%xmm13 + pxor %xmm15,%xmm7 + pshufd $0x4E,%xmm4,%xmm14 + pxor %xmm2,%xmm13 + pshufd $0x4E,%xmm0,%xmm8 + pxor %xmm4,%xmm14 + pshufd $0x4E,%xmm5,%xmm9 + pxor %xmm0,%xmm8 + pshufd $0x4E,%xmm3,%xmm10 + pxor %xmm5,%xmm9 + pxor %xmm13,%xmm15 + pxor %xmm13,%xmm0 + pshufd $0x4E,%xmm1,%xmm11 + pxor %xmm3,%xmm10 + pxor %xmm7,%xmm5 + pxor %xmm8,%xmm3 + pshufd $0x4E,%xmm6,%xmm12 + pxor %xmm1,%xmm11 + pxor %xmm14,%xmm0 + pxor %xmm9,%xmm1 + pxor %xmm6,%xmm12 + + pxor %xmm14,%xmm5 + pxor %xmm13,%xmm3 + pxor %xmm13,%xmm1 + pxor %xmm10,%xmm6 + pxor %xmm11,%xmm2 + pxor %xmm14,%xmm1 + pxor %xmm14,%xmm6 + pxor %xmm12,%xmm4 + pshufd $0x93,%xmm15,%xmm7 + pshufd $0x93,%xmm0,%xmm8 + pxor %xmm7,%xmm15 + pshufd $0x93,%xmm5,%xmm9 + pxor %xmm8,%xmm0 + pshufd $0x93,%xmm3,%xmm10 + pxor %xmm9,%xmm5 + pshufd $0x93,%xmm1,%xmm11 + pxor %xmm10,%xmm3 + pshufd $0x93,%xmm6,%xmm12 + pxor %xmm11,%xmm1 + pshufd $0x93,%xmm2,%xmm13 + pxor %xmm12,%xmm6 + pshufd $0x93,%xmm4,%xmm14 + pxor %xmm13,%xmm2 + pxor %xmm14,%xmm4 + + pxor %xmm15,%xmm8 + pxor %xmm4,%xmm7 + pxor %xmm4,%xmm8 + pshufd $0x4E,%xmm15,%xmm15 + pxor %xmm0,%xmm9 + pshufd $0x4E,%xmm0,%xmm0 + pxor %xmm1,%xmm12 + pxor %xmm7,%xmm15 + pxor %xmm6,%xmm13 + pxor %xmm8,%xmm0 + pxor %xmm3,%xmm11 + pshufd $0x4E,%xmm1,%xmm7 + pxor %xmm2,%xmm14 + pshufd $0x4E,%xmm6,%xmm8 + pxor %xmm5,%xmm10 + pshufd $0x4E,%xmm3,%xmm1 + pxor %xmm4,%xmm10 + pshufd $0x4E,%xmm4,%xmm6 + pxor %xmm4,%xmm11 + pshufd $0x4E,%xmm2,%xmm3 + pxor %xmm11,%xmm7 + pshufd $0x4E,%xmm5,%xmm2 + pxor %xmm12,%xmm8 + pxor %xmm1,%xmm10 + pxor %xmm14,%xmm6 + pxor %xmm3,%xmm13 + movdqa %xmm7,%xmm3 + pxor %xmm9,%xmm2 + movdqa %xmm13,%xmm5 + movdqa %xmm8,%xmm4 + movdqa %xmm2,%xmm1 + movdqa %xmm10,%xmm2 + movdqa -16(%r11),%xmm7 + jnz .Ldec_loop + movdqa -32(%r11),%xmm7 + jmp .Ldec_loop +.align 16 +.Ldec_done: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm2,%xmm9 + psrlq $1,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $1,%xmm1 + pxor %xmm4,%xmm2 + pxor %xmm6,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm4 + psllq $1,%xmm2 + pxor %xmm1,%xmm6 + psllq $1,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm3,%xmm5 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm5 + pand %xmm7,%xmm15 + pxor %xmm5,%xmm3 + psllq $1,%xmm5 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm6,%xmm9 + psrlq $2,%xmm6 + movdqa %xmm1,%xmm10 + psrlq $2,%xmm1 + pxor %xmm4,%xmm6 + pxor %xmm2,%xmm1 + pand %xmm8,%xmm6 + pand %xmm8,%xmm1 + pxor %xmm6,%xmm4 + psllq $2,%xmm6 + pxor %xmm1,%xmm2 + psllq $2,%xmm1 + pxor %xmm9,%xmm6 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm3 + psllq $2,%xmm0 + pxor %xmm15,%xmm5 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm3,%xmm9 + psrlq $4,%xmm3 + movdqa %xmm5,%xmm10 + psrlq $4,%xmm5 + pxor %xmm4,%xmm3 + pxor %xmm2,%xmm5 + pand %xmm7,%xmm3 + pand %xmm7,%xmm5 + pxor %xmm3,%xmm4 + psllq $4,%xmm3 + pxor %xmm5,%xmm2 + psllq $4,%xmm5 + pxor %xmm9,%xmm3 + pxor %xmm10,%xmm5 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm6,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm6 + psllq $4,%xmm0 + pxor %xmm15,%xmm1 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa (%rax),%xmm7 + pxor %xmm7,%xmm5 + pxor %xmm7,%xmm3 + pxor %xmm7,%xmm1 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm2 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm15 + pxor %xmm7,%xmm0 + .byte 0xf3,0xc3 +.cfi_endproc +.size _bsaes_decrypt8,.-_bsaes_decrypt8 +.type _bsaes_key_convert,@function +.align 16 +_bsaes_key_convert: +.cfi_startproc + leaq .Lmasks(%rip),%r11 + movdqu (%rcx),%xmm7 + leaq 16(%rcx),%rcx + movdqa 0(%r11),%xmm0 + movdqa 16(%r11),%xmm1 + movdqa 32(%r11),%xmm2 + movdqa 48(%r11),%xmm3 + movdqa 64(%r11),%xmm4 + pcmpeqd %xmm5,%xmm5 + + movdqu (%rcx),%xmm6 + movdqa %xmm7,(%rax) + leaq 16(%rax),%rax + decl %r10d + jmp .Lkey_loop +.align 16 +.Lkey_loop: +.byte 102,15,56,0,244 + + movdqa %xmm0,%xmm8 + movdqa %xmm1,%xmm9 + + pand %xmm6,%xmm8 + pand %xmm6,%xmm9 + movdqa %xmm2,%xmm10 + pcmpeqb %xmm0,%xmm8 + psllq $4,%xmm0 + movdqa %xmm3,%xmm11 + pcmpeqb %xmm1,%xmm9 + psllq $4,%xmm1 + + pand %xmm6,%xmm10 + pand %xmm6,%xmm11 + movdqa %xmm0,%xmm12 + pcmpeqb %xmm2,%xmm10 + psllq $4,%xmm2 + movdqa %xmm1,%xmm13 + pcmpeqb %xmm3,%xmm11 + psllq $4,%xmm3 + + movdqa %xmm2,%xmm14 + movdqa %xmm3,%xmm15 + pxor %xmm5,%xmm8 + pxor %xmm5,%xmm9 + + pand %xmm6,%xmm12 + pand %xmm6,%xmm13 + movdqa %xmm8,0(%rax) + pcmpeqb %xmm0,%xmm12 + psrlq $4,%xmm0 + movdqa %xmm9,16(%rax) + pcmpeqb %xmm1,%xmm13 + psrlq $4,%xmm1 + leaq 16(%rcx),%rcx + + pand %xmm6,%xmm14 + pand %xmm6,%xmm15 + movdqa %xmm10,32(%rax) + pcmpeqb %xmm2,%xmm14 + psrlq $4,%xmm2 + movdqa %xmm11,48(%rax) + pcmpeqb %xmm3,%xmm15 + psrlq $4,%xmm3 + movdqu (%rcx),%xmm6 + + pxor %xmm5,%xmm13 + pxor %xmm5,%xmm14 + movdqa %xmm12,64(%rax) + movdqa %xmm13,80(%rax) + movdqa %xmm14,96(%rax) + movdqa %xmm15,112(%rax) + leaq 128(%rax),%rax + decl %r10d + jnz .Lkey_loop + + movdqa 80(%r11),%xmm7 + + .byte 0xf3,0xc3 +.cfi_endproc +.size _bsaes_key_convert,.-_bsaes_key_convert + +.globl ossl_bsaes_cbc_encrypt +.type ossl_bsaes_cbc_encrypt,@function +.align 16 +ossl_bsaes_cbc_encrypt: +.cfi_startproc +.byte 243,15,30,250 + cmpl $0,%r9d + jne asm_AES_cbc_encrypt + cmpq $128,%rdx + jb asm_AES_cbc_encrypt + + movq %rsp,%rax +.Lcbc_dec_prologue: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + leaq -72(%rsp),%rsp +.cfi_adjust_cfa_offset 0x48 + movq %rsp,%rbp +.cfi_def_cfa_register %rbp + movl 240(%rcx),%eax + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + movq %r8,%rbx + shrq $4,%r14 + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor (%rsp),%xmm7 + movdqa %xmm6,(%rax) + movdqa %xmm7,(%rsp) + + movdqu (%rbx),%xmm14 + subq $8,%r14 +.Lcbc_dec_loop: + movdqu 0(%r12),%xmm15 + movdqu 16(%r12),%xmm0 + movdqu 32(%r12),%xmm1 + movdqu 48(%r12),%xmm2 + movdqu 64(%r12),%xmm3 + movdqu 80(%r12),%xmm4 + movq %rsp,%rax + movdqu 96(%r12),%xmm5 + movl %edx,%r10d + movdqu 112(%r12),%xmm6 + movdqa %xmm14,32(%rbp) + + call _bsaes_decrypt8 + + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm6 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm2 + movdqu 112(%r12),%xmm14 + pxor %xmm13,%xmm4 + movdqu %xmm15,0(%r13) + leaq 128(%r12),%r12 + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + subq $8,%r14 + jnc .Lcbc_dec_loop + + addq $8,%r14 + jz .Lcbc_dec_done + + movdqu 0(%r12),%xmm15 + movq %rsp,%rax + movl %edx,%r10d + cmpq $2,%r14 + jb .Lcbc_dec_one + movdqu 16(%r12),%xmm0 + je .Lcbc_dec_two + movdqu 32(%r12),%xmm1 + cmpq $4,%r14 + jb .Lcbc_dec_three + movdqu 48(%r12),%xmm2 + je .Lcbc_dec_four + movdqu 64(%r12),%xmm3 + cmpq $6,%r14 + jb .Lcbc_dec_five + movdqu 80(%r12),%xmm4 + je .Lcbc_dec_six + movdqu 96(%r12),%xmm5 + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm6 + movdqu 96(%r12),%xmm14 + pxor %xmm12,%xmm2 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_six: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm14 + pxor %xmm11,%xmm6 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_five: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm14 + pxor %xmm10,%xmm1 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_four: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm14 + pxor %xmm9,%xmm3 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_three: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm14 + pxor %xmm8,%xmm5 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_two: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm14 + pxor %xmm7,%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_one: + leaq (%r12),%rdi + leaq 32(%rbp),%rsi + leaq (%r15),%rdx + call asm_AES_decrypt + pxor 32(%rbp),%xmm14 + movdqu %xmm14,(%r13) + movdqa %xmm15,%xmm14 + +.Lcbc_dec_done: + movdqu %xmm14,(%rbx) + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +.Lcbc_dec_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja .Lcbc_dec_bzero + + leaq 120(%rbp),%rax +.cfi_def_cfa %rax,8 + movq -48(%rax),%r15 +.cfi_restore %r15 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbx +.cfi_restore %rbx + movq -8(%rax),%rbp +.cfi_restore %rbp + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lcbc_dec_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_bsaes_cbc_encrypt,.-ossl_bsaes_cbc_encrypt + +.globl ossl_bsaes_ctr32_encrypt_blocks +.type ossl_bsaes_ctr32_encrypt_blocks,@function +.align 16 +ossl_bsaes_ctr32_encrypt_blocks: +.cfi_startproc +.byte 243,15,30,250 + movq %rsp,%rax +.Lctr_enc_prologue: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + leaq -72(%rsp),%rsp +.cfi_adjust_cfa_offset 0x48 + movq %rsp,%rbp +.cfi_def_cfa_register %rbp + movdqu (%r8),%xmm0 + movl 240(%rcx),%eax + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + movdqa %xmm0,32(%rbp) + cmpq $8,%rdx + jb .Lctr_enc_short + + movl %eax,%ebx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %ebx,%r10d + call _bsaes_key_convert + pxor %xmm6,%xmm7 + movdqa %xmm7,(%rax) + + movdqa (%rsp),%xmm8 + leaq .LADD1(%rip),%r11 + movdqa 32(%rbp),%xmm15 + movdqa -32(%r11),%xmm7 +.byte 102,68,15,56,0,199 +.byte 102,68,15,56,0,255 + movdqa %xmm8,(%rsp) + jmp .Lctr_enc_loop +.align 16 +.Lctr_enc_loop: + movdqa %xmm15,32(%rbp) + movdqa %xmm15,%xmm0 + movdqa %xmm15,%xmm1 + paddd 0(%r11),%xmm0 + movdqa %xmm15,%xmm2 + paddd 16(%r11),%xmm1 + movdqa %xmm15,%xmm3 + paddd 32(%r11),%xmm2 + movdqa %xmm15,%xmm4 + paddd 48(%r11),%xmm3 + movdqa %xmm15,%xmm5 + paddd 64(%r11),%xmm4 + movdqa %xmm15,%xmm6 + paddd 80(%r11),%xmm5 + paddd 96(%r11),%xmm6 + + + + movdqa (%rsp),%xmm8 + leaq 16(%rsp),%rax + movdqa -16(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 + pxor %xmm8,%xmm1 + pxor %xmm8,%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm3 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm5 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 + leaq .LBS0(%rip),%r11 + movl %ebx,%r10d + + call _bsaes_encrypt8_bitslice + + subq $8,%r14 + jc .Lctr_enc_loop_done + + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + movdqu 32(%r12),%xmm9 + movdqu 48(%r12),%xmm10 + movdqu 64(%r12),%xmm11 + movdqu 80(%r12),%xmm12 + movdqu 96(%r12),%xmm13 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + pxor %xmm15,%xmm7 + movdqa 32(%rbp),%xmm15 + pxor %xmm8,%xmm0 + movdqu %xmm7,0(%r13) + pxor %xmm9,%xmm3 + movdqu %xmm0,16(%r13) + pxor %xmm10,%xmm5 + movdqu %xmm3,32(%r13) + pxor %xmm11,%xmm2 + movdqu %xmm5,48(%r13) + pxor %xmm12,%xmm6 + movdqu %xmm2,64(%r13) + pxor %xmm13,%xmm1 + movdqu %xmm6,80(%r13) + pxor %xmm14,%xmm4 + movdqu %xmm1,96(%r13) + leaq .LADD1(%rip),%r11 + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + paddd 112(%r11),%xmm15 + jnz .Lctr_enc_loop + + jmp .Lctr_enc_done +.align 16 +.Lctr_enc_loop_done: + addq $8,%r14 + movdqu 0(%r12),%xmm7 + pxor %xmm7,%xmm15 + movdqu %xmm15,0(%r13) + cmpq $2,%r14 + jb .Lctr_enc_done + movdqu 16(%r12),%xmm8 + pxor %xmm8,%xmm0 + movdqu %xmm0,16(%r13) + je .Lctr_enc_done + movdqu 32(%r12),%xmm9 + pxor %xmm9,%xmm3 + movdqu %xmm3,32(%r13) + cmpq $4,%r14 + jb .Lctr_enc_done + movdqu 48(%r12),%xmm10 + pxor %xmm10,%xmm5 + movdqu %xmm5,48(%r13) + je .Lctr_enc_done + movdqu 64(%r12),%xmm11 + pxor %xmm11,%xmm2 + movdqu %xmm2,64(%r13) + cmpq $6,%r14 + jb .Lctr_enc_done + movdqu 80(%r12),%xmm12 + pxor %xmm12,%xmm6 + movdqu %xmm6,80(%r13) + je .Lctr_enc_done + movdqu 96(%r12),%xmm13 + pxor %xmm13,%xmm1 + movdqu %xmm1,96(%r13) + jmp .Lctr_enc_done + +.align 16 +.Lctr_enc_short: + leaq 32(%rbp),%rdi + leaq 48(%rbp),%rsi + leaq (%r15),%rdx + call asm_AES_encrypt + movdqu (%r12),%xmm0 + leaq 16(%r12),%r12 + movl 44(%rbp),%eax + bswapl %eax + pxor 48(%rbp),%xmm0 + incl %eax + movdqu %xmm0,(%r13) + bswapl %eax + leaq 16(%r13),%r13 + movl %eax,44(%rsp) + decq %r14 + jnz .Lctr_enc_short + +.Lctr_enc_done: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +.Lctr_enc_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja .Lctr_enc_bzero + + leaq 120(%rbp),%rax +.cfi_def_cfa %rax,8 + movq -48(%rax),%r15 +.cfi_restore %r15 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbx +.cfi_restore %rbx + movq -8(%rax),%rbp +.cfi_restore %rbp + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lctr_enc_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_bsaes_ctr32_encrypt_blocks,.-ossl_bsaes_ctr32_encrypt_blocks +.globl ossl_bsaes_xts_encrypt +.type ossl_bsaes_xts_encrypt,@function +.align 16 +ossl_bsaes_xts_encrypt: +.cfi_startproc +.byte 243,15,30,250 + movq %rsp,%rax +.Lxts_enc_prologue: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + leaq -72(%rsp),%rsp +.cfi_adjust_cfa_offset 0x48 + movq %rsp,%rbp +.cfi_def_cfa_register %rbp + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + + leaq (%r9),%rdi + leaq 32(%rbp),%rsi + leaq (%r8),%rdx + call asm_AES_encrypt + + movl 240(%r15),%eax + movq %r14,%rbx + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor %xmm6,%xmm7 + movdqa %xmm7,(%rax) + + andq $-16,%r14 + subq $0x80,%rsp + movdqa 32(%rbp),%xmm6 + + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + + subq $0x80,%r14 + jc .Lxts_enc_short + jmp .Lxts_enc_loop + +.align 16 +.Lxts_enc_loop: + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm15 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm0 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm1 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm2 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + movdqa %xmm6,112(%rsp) + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + pxor %xmm14,%xmm6 + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + pxor 96(%rsp),%xmm1 + movdqu %xmm6,80(%r13) + pxor 112(%rsp),%xmm4 + movdqu %xmm1,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + + subq $0x80,%r14 + jnc .Lxts_enc_loop + +.Lxts_enc_short: + addq $0x80,%r14 + jz .Lxts_enc_done + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + cmpq $16,%r14 + je .Lxts_enc_1 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + cmpq $32,%r14 + je .Lxts_enc_2 + pxor %xmm7,%xmm15 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + cmpq $48,%r14 + je .Lxts_enc_3 + pxor %xmm8,%xmm0 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + cmpq $64,%r14 + je .Lxts_enc_4 + pxor %xmm9,%xmm1 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + cmpq $80,%r14 + je .Lxts_enc_5 + pxor %xmm10,%xmm2 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + cmpq $96,%r14 + je .Lxts_enc_6 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqa %xmm6,112(%rsp) + leaq 112(%r12),%r12 + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + pxor 96(%rsp),%xmm1 + movdqu %xmm6,80(%r13) + movdqu %xmm1,96(%r13) + leaq 112(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_6: + pxor %xmm11,%xmm3 + leaq 96(%r12),%r12 + pxor %xmm12,%xmm4 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + movdqu %xmm6,80(%r13) + leaq 96(%r13),%r13 + + movdqa 96(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_5: + pxor %xmm10,%xmm2 + leaq 80(%r12),%r12 + pxor %xmm11,%xmm3 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + movdqu %xmm2,64(%r13) + leaq 80(%r13),%r13 + + movdqa 80(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_4: + pxor %xmm9,%xmm1 + leaq 64(%r12),%r12 + pxor %xmm10,%xmm2 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + movdqu %xmm5,48(%r13) + leaq 64(%r13),%r13 + + movdqa 64(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_3: + pxor %xmm8,%xmm0 + leaq 48(%r12),%r12 + pxor %xmm9,%xmm1 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + movdqu %xmm3,32(%r13) + leaq 48(%r13),%r13 + + movdqa 48(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_2: + pxor %xmm7,%xmm15 + leaq 32(%r12),%r12 + pxor %xmm8,%xmm0 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + leaq 32(%r13),%r13 + + movdqa 32(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_1: + pxor %xmm15,%xmm7 + leaq 16(%r12),%r12 + movdqa %xmm7,32(%rbp) + leaq 32(%rbp),%rdi + leaq 32(%rbp),%rsi + leaq (%r15),%rdx + call asm_AES_encrypt + pxor 32(%rbp),%xmm15 + + + + + + movdqu %xmm15,0(%r13) + leaq 16(%r13),%r13 + + movdqa 16(%rsp),%xmm6 + +.Lxts_enc_done: + andl $15,%ebx + jz .Lxts_enc_ret + movq %r13,%rdx + +.Lxts_enc_steal: + movzbl (%r12),%eax + movzbl -16(%rdx),%ecx + leaq 1(%r12),%r12 + movb %al,-16(%rdx) + movb %cl,0(%rdx) + leaq 1(%rdx),%rdx + subl $1,%ebx + jnz .Lxts_enc_steal + + movdqu -16(%r13),%xmm15 + leaq 32(%rbp),%rdi + pxor %xmm6,%xmm15 + leaq 32(%rbp),%rsi + movdqa %xmm15,32(%rbp) + leaq (%r15),%rdx + call asm_AES_encrypt + pxor 32(%rbp),%xmm6 + movdqu %xmm6,-16(%r13) + +.Lxts_enc_ret: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +.Lxts_enc_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja .Lxts_enc_bzero + + leaq 120(%rbp),%rax +.cfi_def_cfa %rax,8 + movq -48(%rax),%r15 +.cfi_restore %r15 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbx +.cfi_restore %rbx + movq -8(%rax),%rbp +.cfi_restore %rbp + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lxts_enc_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_bsaes_xts_encrypt,.-ossl_bsaes_xts_encrypt + +.globl ossl_bsaes_xts_decrypt +.type ossl_bsaes_xts_decrypt,@function +.align 16 +ossl_bsaes_xts_decrypt: +.cfi_startproc +.byte 243,15,30,250 + movq %rsp,%rax +.Lxts_dec_prologue: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + leaq -72(%rsp),%rsp +.cfi_adjust_cfa_offset 0x48 + movq %rsp,%rbp + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + + leaq (%r9),%rdi + leaq 32(%rbp),%rsi + leaq (%r8),%rdx + call asm_AES_encrypt + + movl 240(%r15),%eax + movq %r14,%rbx + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor (%rsp),%xmm7 + movdqa %xmm6,(%rax) + movdqa %xmm7,(%rsp) + + xorl %eax,%eax + andq $-16,%r14 + testl $15,%ebx + setnz %al + shlq $4,%rax + subq %rax,%r14 + + subq $0x80,%rsp + movdqa 32(%rbp),%xmm6 + + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + + subq $0x80,%r14 + jc .Lxts_dec_short + jmp .Lxts_dec_loop + +.align 16 +.Lxts_dec_loop: + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm15 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm0 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm1 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm2 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + movdqa %xmm6,112(%rsp) + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + pxor %xmm14,%xmm6 + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + pxor 96(%rsp),%xmm2 + movdqu %xmm6,80(%r13) + pxor 112(%rsp),%xmm4 + movdqu %xmm2,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + + subq $0x80,%r14 + jnc .Lxts_dec_loop + +.Lxts_dec_short: + addq $0x80,%r14 + jz .Lxts_dec_done + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + cmpq $16,%r14 + je .Lxts_dec_1 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + cmpq $32,%r14 + je .Lxts_dec_2 + pxor %xmm7,%xmm15 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + cmpq $48,%r14 + je .Lxts_dec_3 + pxor %xmm8,%xmm0 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + cmpq $64,%r14 + je .Lxts_dec_4 + pxor %xmm9,%xmm1 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + cmpq $80,%r14 + je .Lxts_dec_5 + pxor %xmm10,%xmm2 + pshufd $0x13,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + cmpq $96,%r14 + je .Lxts_dec_6 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqa %xmm6,112(%rsp) + leaq 112(%r12),%r12 + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + pxor 96(%rsp),%xmm2 + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + leaq 112(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_6: + pxor %xmm11,%xmm3 + leaq 96(%r12),%r12 + pxor %xmm12,%xmm4 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + leaq 96(%r13),%r13 + + movdqa 96(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_5: + pxor %xmm10,%xmm2 + leaq 80(%r12),%r12 + pxor %xmm11,%xmm3 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + leaq 80(%r13),%r13 + + movdqa 80(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_4: + pxor %xmm9,%xmm1 + leaq 64(%r12),%r12 + pxor %xmm10,%xmm2 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + leaq 64(%r13),%r13 + + movdqa 64(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_3: + pxor %xmm8,%xmm0 + leaq 48(%r12),%r12 + pxor %xmm9,%xmm1 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + leaq 48(%r13),%r13 + + movdqa 48(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_2: + pxor %xmm7,%xmm15 + leaq 32(%r12),%r12 + pxor %xmm8,%xmm0 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + leaq 32(%r13),%r13 + + movdqa 32(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_1: + pxor %xmm15,%xmm7 + leaq 16(%r12),%r12 + movdqa %xmm7,32(%rbp) + leaq 32(%rbp),%rdi + leaq 32(%rbp),%rsi + leaq (%r15),%rdx + call asm_AES_decrypt + pxor 32(%rbp),%xmm15 + + + + + + movdqu %xmm15,0(%r13) + leaq 16(%r13),%r13 + + movdqa 16(%rsp),%xmm6 + +.Lxts_dec_done: + andl $15,%ebx + jz .Lxts_dec_ret + + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $0x13,%xmm14,%xmm13 + movdqa %xmm6,%xmm5 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + movdqu (%r12),%xmm15 + pxor %xmm13,%xmm6 + + leaq 32(%rbp),%rdi + pxor %xmm6,%xmm15 + leaq 32(%rbp),%rsi + movdqa %xmm15,32(%rbp) + leaq (%r15),%rdx + call asm_AES_decrypt + pxor 32(%rbp),%xmm6 + movq %r13,%rdx + movdqu %xmm6,(%r13) + +.Lxts_dec_steal: + movzbl 16(%r12),%eax + movzbl (%rdx),%ecx + leaq 1(%r12),%r12 + movb %al,(%rdx) + movb %cl,16(%rdx) + leaq 1(%rdx),%rdx + subl $1,%ebx + jnz .Lxts_dec_steal + + movdqu (%r13),%xmm15 + leaq 32(%rbp),%rdi + pxor %xmm5,%xmm15 + leaq 32(%rbp),%rsi + movdqa %xmm15,32(%rbp) + leaq (%r15),%rdx + call asm_AES_decrypt + pxor 32(%rbp),%xmm5 + movdqu %xmm5,(%r13) + +.Lxts_dec_ret: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +.Lxts_dec_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja .Lxts_dec_bzero + + leaq 120(%rbp),%rax +.cfi_def_cfa %rax,8 + movq -48(%rax),%r15 +.cfi_restore %r15 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbx +.cfi_restore %rbx + movq -8(%rax),%rbp +.cfi_restore %rbp + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lxts_dec_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_bsaes_xts_decrypt,.-ossl_bsaes_xts_decrypt +.type _bsaes_const,@object +.section .rodata +.align 64 +_bsaes_const: +.LM0ISR: +.quad 0x0a0e0206070b0f03, 0x0004080c0d010509 +.LISRM0: +.quad 0x01040b0e0205080f, 0x0306090c00070a0d +.LISR: +.quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +.LBS0: +.quad 0x5555555555555555, 0x5555555555555555 +.LBS1: +.quad 0x3333333333333333, 0x3333333333333333 +.LBS2: +.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +.LSR: +.quad 0x0504070600030201, 0x0f0e0d0c0a09080b +.LSRM0: +.quad 0x0304090e00050a0f, 0x01060b0c0207080d +.LM0SR: +.quad 0x0a0e02060f03070b, 0x0004080c05090d01 +.LSWPUP: +.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908 +.LSWPUPM0SR: +.quad 0x0a0d02060c03070b, 0x0004080f05090e01 +.LADD1: +.quad 0x0000000000000000, 0x0000000100000000 +.LADD2: +.quad 0x0000000000000000, 0x0000000200000000 +.LADD3: +.quad 0x0000000000000000, 0x0000000300000000 +.LADD4: +.quad 0x0000000000000000, 0x0000000400000000 +.LADD5: +.quad 0x0000000000000000, 0x0000000500000000 +.LADD6: +.quad 0x0000000000000000, 0x0000000600000000 +.LADD7: +.quad 0x0000000000000000, 0x0000000700000000 +.LADD8: +.quad 0x0000000000000000, 0x0000000800000000 +.Lxts_magic: +.long 0x87,0,1,0 +.Lmasks: +.quad 0x0101010101010101, 0x0101010101010101 +.quad 0x0202020202020202, 0x0202020202020202 +.quad 0x0404040404040404, 0x0404040404040404 +.quad 0x0808080808080808, 0x0808080808080808 +.LM0: +.quad 0x02060a0e03070b0f, 0x0004080c0105090d +.L63: +.quad 0x6363636363636363, 0x6363636363636363 +.align 64 +.size _bsaes_const,.-_bsaes_const +.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44,32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32,65,110,100,121,32,80,111,108,121,97,107,111,118,0 diff --git a/contrib/openssl-cmake/asm/crypto/aes/vpaes-armv8.S b/contrib/openssl-cmake/asm/crypto/aes/vpaes-armv8.S new file mode 100644 index 000000000000..9d9be004162d --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/aes/vpaes-armv8.S @@ -0,0 +1,1218 @@ +#include "arm_arch.h" + +.section .rodata + +.type _vpaes_consts,%object +.align 7 // totally strategic alignment +_vpaes_consts: +.Lk_mc_forward: // mc_forward +.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 +.quad 0x080B0A0904070605, 0x000302010C0F0E0D +.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 +.quad 0x000302010C0F0E0D, 0x080B0A0904070605 +.Lk_mc_backward: // mc_backward +.quad 0x0605040702010003, 0x0E0D0C0F0A09080B +.quad 0x020100030E0D0C0F, 0x0A09080B06050407 +.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 +.quad 0x0A09080B06050407, 0x020100030E0D0C0F +.Lk_sr: // sr +.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 +.quad 0x030E09040F0A0500, 0x0B06010C07020D08 +.quad 0x0F060D040B020900, 0x070E050C030A0108 +.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 + +// +// "Hot" constants +// +.Lk_inv: // inv, inva +.quad 0x0E05060F0D080180, 0x040703090A0B0C02 +.quad 0x01040A060F0B0780, 0x030D0E0C02050809 +.Lk_ipt: // input transform (lo, hi) +.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 +.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 +.Lk_sbo: // sbou, sbot +.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 +.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA +.Lk_sb1: // sb1u, sb1t +.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF +.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 +.Lk_sb2: // sb2u, sb2t +.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A +.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD + +// +// Decryption stuff +// +.Lk_dipt: // decryption input transform +.quad 0x0F505B040B545F00, 0x154A411E114E451A +.quad 0x86E383E660056500, 0x12771772F491F194 +.Lk_dsbo: // decryption sbox final output +.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D +.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C +.Lk_dsb9: // decryption sbox output *9*u, *9*t +.quad 0x851C03539A86D600, 0xCAD51F504F994CC9 +.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 +.Lk_dsbd: // decryption sbox output *D*u, *D*t +.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 +.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 +.Lk_dsbb: // decryption sbox output *B*u, *B*t +.quad 0xD022649296B44200, 0x602646F6B0F2D404 +.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B +.Lk_dsbe: // decryption sbox output *E*u, *E*t +.quad 0x46F2929626D4D000, 0x2242600464B4F6B0 +.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 + +// +// Key schedule constants +// +.Lk_dksd: // decryption key schedule: invskew x*D +.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 +.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E +.Lk_dksb: // decryption key schedule: invskew x*B +.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 +.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 +.Lk_dkse: // decryption key schedule: invskew x*E + 0x63 +.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 +.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 +.Lk_dks9: // decryption key schedule: invskew x*9 +.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC +.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE + +.Lk_rcon: // rcon +.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 + +.Lk_opt: // output transform +.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 +.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 +.Lk_deskew: // deskew tables: inverts the sbox's "skew" +.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A +.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 + +.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,65,82,77,118,56,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 +.align 2 +.size _vpaes_consts,.-_vpaes_consts +.align 6 + +.text + +// +// _aes_preheat +// +// Fills register %r10 -> .aes_consts (so you can -fPIC) +// and %xmm9-%xmm15 as specified below. +// +.type _vpaes_encrypt_preheat,%function +.align 4 +_vpaes_encrypt_preheat: + adrp x10, .Lk_inv + add x10, x10, #:lo12:.Lk_inv + movi v17.16b, #0x0f + ld1 {v18.2d,v19.2d}, [x10],#32 // .Lk_inv + ld1 {v20.2d,v21.2d,v22.2d,v23.2d}, [x10],#64 // .Lk_ipt, .Lk_sbo + ld1 {v24.2d,v25.2d,v26.2d,v27.2d}, [x10] // .Lk_sb1, .Lk_sb2 + ret +.size _vpaes_encrypt_preheat,.-_vpaes_encrypt_preheat + +// +// _aes_encrypt_core +// +// AES-encrypt %xmm0. +// +// Inputs: +// %xmm0 = input +// %xmm9-%xmm15 as in _vpaes_preheat +// (%rdx) = scheduled keys +// +// Output in %xmm0 +// Clobbers %xmm1-%xmm5, %r9, %r10, %r11, %rax +// Preserves %xmm6 - %xmm8 so you get some local vectors +// +// +.type _vpaes_encrypt_core,%function +.align 4 +_vpaes_encrypt_core: + mov x9, x2 + ldr w8, [x2,#240] // pull rounds + adrp x11, .Lk_mc_forward+16 + add x11, x11, #:lo12:.Lk_mc_forward+16 + // vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo + ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key + and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 + ushr v0.16b, v7.16b, #4 // vpsrlb $4, %xmm0, %xmm0 + tbl v1.16b, {v20.16b}, v1.16b // vpshufb %xmm1, %xmm2, %xmm1 + // vmovdqa .Lk_ipt+16(%rip), %xmm3 # ipthi + tbl v2.16b, {v21.16b}, v0.16b // vpshufb %xmm0, %xmm3, %xmm2 + eor v0.16b, v1.16b, v16.16b // vpxor %xmm5, %xmm1, %xmm0 + eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 + b .Lenc_entry + +.align 4 +.Lenc_loop: + // middle of middle round + add x10, x11, #0x40 + tbl v4.16b, {v25.16b}, v2.16b // vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u + ld1 {v1.2d}, [x11], #16 // vmovdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[] + tbl v0.16b, {v24.16b}, v3.16b // vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t + eor v4.16b, v4.16b, v16.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k + tbl v5.16b, {v27.16b}, v2.16b // vpshufb %xmm2, %xmm15, %xmm5 # 4 = sb2u + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = A + tbl v2.16b, {v26.16b}, v3.16b // vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t + ld1 {v4.2d}, [x10] // vmovdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[] + tbl v3.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm3 # 0 = B + eor v2.16b, v2.16b, v5.16b // vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A + tbl v0.16b, {v0.16b}, v4.16b // vpshufb %xmm4, %xmm0, %xmm0 # 3 = D + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 0 = 2A+B + tbl v4.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm4 # 0 = 2B+C + eor v0.16b, v0.16b, v3.16b // vpxor %xmm3, %xmm0, %xmm0 # 3 = 2A+B+D + and x11, x11, #~(1<<6) // and $0x30, %r11 # ... mod 4 + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D + sub w8, w8, #1 // nr-- + +.Lenc_entry: + // top of round + and v1.16b, v0.16b, v17.16b // vpand %xmm0, %xmm9, %xmm1 # 0 = k + ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i + tbl v5.16b, {v19.16b}, v1.16b // vpshufb %xmm1, %xmm11, %xmm5 # 2 = a/k + eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j + tbl v3.16b, {v18.16b}, v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + tbl v4.16b, {v18.16b}, v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + eor v3.16b, v3.16b, v5.16b // vpxor %xmm5, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + eor v4.16b, v4.16b, v5.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + tbl v2.16b, {v18.16b}, v3.16b // vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak + tbl v3.16b, {v18.16b}, v4.16b // vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak + eor v2.16b, v2.16b, v1.16b // vpxor %xmm1, %xmm2, %xmm2 # 2 = io + eor v3.16b, v3.16b, v0.16b // vpxor %xmm0, %xmm3, %xmm3 # 3 = jo + ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm5 + cbnz w8, .Lenc_loop + + // middle of last round + add x10, x11, #0x80 + // vmovdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo + // vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16 + tbl v4.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou + ld1 {v1.2d}, [x10] // vmovdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[] + tbl v0.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t + eor v4.16b, v4.16b, v16.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = A + tbl v0.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm0 + ret +.size _vpaes_encrypt_core,.-_vpaes_encrypt_core + +.globl vpaes_encrypt +.type vpaes_encrypt,%function +.align 4 +vpaes_encrypt: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v7.16b}, [x0] + bl _vpaes_encrypt_preheat + bl _vpaes_encrypt_core + st1 {v0.16b}, [x1] + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpaes_encrypt,.-vpaes_encrypt + +.type _vpaes_encrypt_2x,%function +.align 4 +_vpaes_encrypt_2x: + mov x9, x2 + ldr w8, [x2,#240] // pull rounds + adrp x11, .Lk_mc_forward+16 + add x11, x11, #:lo12:.Lk_mc_forward+16 + // vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo + ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key + and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 + ushr v0.16b, v14.16b, #4 // vpsrlb $4, %xmm0, %xmm0 + and v9.16b, v15.16b, v17.16b + ushr v8.16b, v15.16b, #4 + tbl v1.16b, {v20.16b}, v1.16b // vpshufb %xmm1, %xmm2, %xmm1 + tbl v9.16b, {v20.16b}, v9.16b + // vmovdqa .Lk_ipt+16(%rip), %xmm3 # ipthi + tbl v2.16b, {v21.16b}, v0.16b // vpshufb %xmm0, %xmm3, %xmm2 + tbl v10.16b, {v21.16b}, v8.16b + eor v0.16b, v1.16b, v16.16b // vpxor %xmm5, %xmm1, %xmm0 + eor v8.16b, v9.16b, v16.16b + eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 + eor v8.16b, v8.16b, v10.16b + b .Lenc_2x_entry + +.align 4 +.Lenc_2x_loop: + // middle of middle round + add x10, x11, #0x40 + tbl v4.16b, {v25.16b}, v2.16b // vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u + tbl v12.16b, {v25.16b}, v10.16b + ld1 {v1.2d}, [x11], #16 // vmovdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[] + tbl v0.16b, {v24.16b}, v3.16b // vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t + tbl v8.16b, {v24.16b}, v11.16b + eor v4.16b, v4.16b, v16.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k + eor v12.16b, v12.16b, v16.16b + tbl v5.16b, {v27.16b}, v2.16b // vpshufb %xmm2, %xmm15, %xmm5 # 4 = sb2u + tbl v13.16b, {v27.16b}, v10.16b + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = A + eor v8.16b, v8.16b, v12.16b + tbl v2.16b, {v26.16b}, v3.16b // vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t + tbl v10.16b, {v26.16b}, v11.16b + ld1 {v4.2d}, [x10] // vmovdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[] + tbl v3.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm3 # 0 = B + tbl v11.16b, {v8.16b}, v1.16b + eor v2.16b, v2.16b, v5.16b // vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A + eor v10.16b, v10.16b, v13.16b + tbl v0.16b, {v0.16b}, v4.16b // vpshufb %xmm4, %xmm0, %xmm0 # 3 = D + tbl v8.16b, {v8.16b}, v4.16b + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 0 = 2A+B + eor v11.16b, v11.16b, v10.16b + tbl v4.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm4 # 0 = 2B+C + tbl v12.16b, {v11.16b},v1.16b + eor v0.16b, v0.16b, v3.16b // vpxor %xmm3, %xmm0, %xmm0 # 3 = 2A+B+D + eor v8.16b, v8.16b, v11.16b + and x11, x11, #~(1<<6) // and $0x30, %r11 # ... mod 4 + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D + eor v8.16b, v8.16b, v12.16b + sub w8, w8, #1 // nr-- + +.Lenc_2x_entry: + // top of round + and v1.16b, v0.16b, v17.16b // vpand %xmm0, %xmm9, %xmm1 # 0 = k + ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i + and v9.16b, v8.16b, v17.16b + ushr v8.16b, v8.16b, #4 + tbl v5.16b, {v19.16b},v1.16b // vpshufb %xmm1, %xmm11, %xmm5 # 2 = a/k + tbl v13.16b, {v19.16b},v9.16b + eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j + eor v9.16b, v9.16b, v8.16b + tbl v3.16b, {v18.16b},v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + tbl v11.16b, {v18.16b},v8.16b + tbl v4.16b, {v18.16b},v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + tbl v12.16b, {v18.16b},v9.16b + eor v3.16b, v3.16b, v5.16b // vpxor %xmm5, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + eor v11.16b, v11.16b, v13.16b + eor v4.16b, v4.16b, v5.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + eor v12.16b, v12.16b, v13.16b + tbl v2.16b, {v18.16b},v3.16b // vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak + tbl v10.16b, {v18.16b},v11.16b + tbl v3.16b, {v18.16b},v4.16b // vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak + tbl v11.16b, {v18.16b},v12.16b + eor v2.16b, v2.16b, v1.16b // vpxor %xmm1, %xmm2, %xmm2 # 2 = io + eor v10.16b, v10.16b, v9.16b + eor v3.16b, v3.16b, v0.16b // vpxor %xmm0, %xmm3, %xmm3 # 3 = jo + eor v11.16b, v11.16b, v8.16b + ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm5 + cbnz w8, .Lenc_2x_loop + + // middle of last round + add x10, x11, #0x80 + // vmovdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo + // vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16 + tbl v4.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou + tbl v12.16b, {v22.16b}, v10.16b + ld1 {v1.2d}, [x10] // vmovdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[] + tbl v0.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t + tbl v8.16b, {v23.16b}, v11.16b + eor v4.16b, v4.16b, v16.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k + eor v12.16b, v12.16b, v16.16b + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = A + eor v8.16b, v8.16b, v12.16b + tbl v0.16b, {v0.16b},v1.16b // vpshufb %xmm1, %xmm0, %xmm0 + tbl v1.16b, {v8.16b},v1.16b + ret +.size _vpaes_encrypt_2x,.-_vpaes_encrypt_2x + +.type _vpaes_decrypt_preheat,%function +.align 4 +_vpaes_decrypt_preheat: + adrp x10, .Lk_inv + add x10, x10, #:lo12:.Lk_inv + movi v17.16b, #0x0f + adrp x11, .Lk_dipt + add x11, x11, #:lo12:.Lk_dipt + ld1 {v18.2d,v19.2d}, [x10],#32 // .Lk_inv + ld1 {v20.2d,v21.2d,v22.2d,v23.2d}, [x11],#64 // .Lk_dipt, .Lk_dsbo + ld1 {v24.2d,v25.2d,v26.2d,v27.2d}, [x11],#64 // .Lk_dsb9, .Lk_dsbd + ld1 {v28.2d,v29.2d,v30.2d,v31.2d}, [x11] // .Lk_dsbb, .Lk_dsbe + ret +.size _vpaes_decrypt_preheat,.-_vpaes_decrypt_preheat + +// +// Decryption core +// +// Same API as encryption core. +// +.type _vpaes_decrypt_core,%function +.align 4 +_vpaes_decrypt_core: + mov x9, x2 + ldr w8, [x2,#240] // pull rounds + + // vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo + lsl x11, x8, #4 // mov %rax, %r11; shl $4, %r11 + eor x11, x11, #0x30 // xor $0x30, %r11 + adrp x10, .Lk_sr + add x10, x10, #:lo12:.Lk_sr + and x11, x11, #0x30 // and $0x30, %r11 + add x11, x11, x10 + adrp x10, .Lk_mc_forward+48 + add x10, x10, #:lo12:.Lk_mc_forward+48 + + ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key + and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 + ushr v0.16b, v7.16b, #4 // vpsrlb $4, %xmm0, %xmm0 + tbl v2.16b, {v20.16b}, v1.16b // vpshufb %xmm1, %xmm2, %xmm2 + ld1 {v5.2d}, [x10] // vmovdqa .Lk_mc_forward+48(%rip), %xmm5 + // vmovdqa .Lk_dipt+16(%rip), %xmm1 # ipthi + tbl v0.16b, {v21.16b}, v0.16b // vpshufb %xmm0, %xmm1, %xmm0 + eor v2.16b, v2.16b, v16.16b // vpxor %xmm4, %xmm2, %xmm2 + eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 + b .Ldec_entry + +.align 4 +.Ldec_loop: +// +// Inverse mix columns +// + // vmovdqa -0x20(%r10),%xmm4 # 4 : sb9u + // vmovdqa -0x10(%r10),%xmm1 # 0 : sb9t + tbl v4.16b, {v24.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sb9u + tbl v1.16b, {v25.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb9t + eor v0.16b, v4.16b, v16.16b // vpxor %xmm4, %xmm0, %xmm0 + // vmovdqa 0x00(%r10),%xmm4 # 4 : sbdu + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + // vmovdqa 0x10(%r10),%xmm1 # 0 : sbdt + + tbl v4.16b, {v26.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbdu + tbl v0.16b, {v0.16b}, v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch + tbl v1.16b, {v27.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbdt + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + // vmovdqa 0x20(%r10), %xmm4 # 4 : sbbu + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + // vmovdqa 0x30(%r10), %xmm1 # 0 : sbbt + + tbl v4.16b, {v28.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbbu + tbl v0.16b, {v0.16b}, v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch + tbl v1.16b, {v29.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbbt + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + // vmovdqa 0x40(%r10), %xmm4 # 4 : sbeu + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + // vmovdqa 0x50(%r10), %xmm1 # 0 : sbet + + tbl v4.16b, {v30.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbeu + tbl v0.16b, {v0.16b}, v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch + tbl v1.16b, {v31.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbet + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + ext v5.16b, v5.16b, v5.16b, #12 // vpalignr $12, %xmm5, %xmm5, %xmm5 + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + sub w8, w8, #1 // sub $1,%rax # nr-- + +.Ldec_entry: + // top of round + and v1.16b, v0.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 # 0 = k + ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i + tbl v2.16b, {v19.16b}, v1.16b // vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k + eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j + tbl v3.16b, {v18.16b}, v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + tbl v4.16b, {v18.16b}, v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + eor v4.16b, v4.16b, v2.16b // vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + tbl v2.16b, {v18.16b}, v3.16b // vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak + tbl v3.16b, {v18.16b}, v4.16b // vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak + eor v2.16b, v2.16b, v1.16b // vpxor %xmm1, %xmm2, %xmm2 # 2 = io + eor v3.16b, v3.16b, v0.16b // vpxor %xmm0, %xmm3, %xmm3 # 3 = jo + ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm0 + cbnz w8, .Ldec_loop + + // middle of last round + // vmovdqa 0x60(%r10), %xmm4 # 3 : sbou + tbl v4.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou + // vmovdqa 0x70(%r10), %xmm1 # 0 : sbot + ld1 {v2.2d}, [x11] // vmovdqa -0x160(%r11), %xmm2 # .Lk_sr-.Lk_dsbd=-0x160 + tbl v1.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb1t + eor v4.16b, v4.16b, v16.16b // vpxor %xmm0, %xmm4, %xmm4 # 4 = sb1u + k + eor v0.16b, v1.16b, v4.16b // vpxor %xmm4, %xmm1, %xmm0 # 0 = A + tbl v0.16b, {v0.16b}, v2.16b // vpshufb %xmm2, %xmm0, %xmm0 + ret +.size _vpaes_decrypt_core,.-_vpaes_decrypt_core + +.globl vpaes_decrypt +.type vpaes_decrypt,%function +.align 4 +vpaes_decrypt: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v7.16b}, [x0] + bl _vpaes_decrypt_preheat + bl _vpaes_decrypt_core + st1 {v0.16b}, [x1] + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpaes_decrypt,.-vpaes_decrypt + +// v14-v15 input, v0-v1 output +.type _vpaes_decrypt_2x,%function +.align 4 +_vpaes_decrypt_2x: + mov x9, x2 + ldr w8, [x2,#240] // pull rounds + + // vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo + lsl x11, x8, #4 // mov %rax, %r11; shl $4, %r11 + eor x11, x11, #0x30 // xor $0x30, %r11 + adrp x10, .Lk_sr + add x10, x10, #:lo12:.Lk_sr + and x11, x11, #0x30 // and $0x30, %r11 + add x11, x11, x10 + adrp x10, .Lk_mc_forward+48 + add x10, x10, #:lo12:.Lk_mc_forward+48 + + ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key + and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 + ushr v0.16b, v14.16b, #4 // vpsrlb $4, %xmm0, %xmm0 + and v9.16b, v15.16b, v17.16b + ushr v8.16b, v15.16b, #4 + tbl v2.16b, {v20.16b},v1.16b // vpshufb %xmm1, %xmm2, %xmm2 + tbl v10.16b, {v20.16b},v9.16b + ld1 {v5.2d}, [x10] // vmovdqa .Lk_mc_forward+48(%rip), %xmm5 + // vmovdqa .Lk_dipt+16(%rip), %xmm1 # ipthi + tbl v0.16b, {v21.16b},v0.16b // vpshufb %xmm0, %xmm1, %xmm0 + tbl v8.16b, {v21.16b},v8.16b + eor v2.16b, v2.16b, v16.16b // vpxor %xmm4, %xmm2, %xmm2 + eor v10.16b, v10.16b, v16.16b + eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 + eor v8.16b, v8.16b, v10.16b + b .Ldec_2x_entry + +.align 4 +.Ldec_2x_loop: +// +// Inverse mix columns +// + // vmovdqa -0x20(%r10),%xmm4 # 4 : sb9u + // vmovdqa -0x10(%r10),%xmm1 # 0 : sb9t + tbl v4.16b, {v24.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sb9u + tbl v12.16b, {v24.16b}, v10.16b + tbl v1.16b, {v25.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb9t + tbl v9.16b, {v25.16b}, v11.16b + eor v0.16b, v4.16b, v16.16b // vpxor %xmm4, %xmm0, %xmm0 + eor v8.16b, v12.16b, v16.16b + // vmovdqa 0x00(%r10),%xmm4 # 4 : sbdu + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + eor v8.16b, v8.16b, v9.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + // vmovdqa 0x10(%r10),%xmm1 # 0 : sbdt + + tbl v4.16b, {v26.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbdu + tbl v12.16b, {v26.16b}, v10.16b + tbl v0.16b, {v0.16b},v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch + tbl v8.16b, {v8.16b},v5.16b + tbl v1.16b, {v27.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbdt + tbl v9.16b, {v27.16b}, v11.16b + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + eor v8.16b, v8.16b, v12.16b + // vmovdqa 0x20(%r10), %xmm4 # 4 : sbbu + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + eor v8.16b, v8.16b, v9.16b + // vmovdqa 0x30(%r10), %xmm1 # 0 : sbbt + + tbl v4.16b, {v28.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbbu + tbl v12.16b, {v28.16b}, v10.16b + tbl v0.16b, {v0.16b},v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch + tbl v8.16b, {v8.16b},v5.16b + tbl v1.16b, {v29.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbbt + tbl v9.16b, {v29.16b}, v11.16b + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + eor v8.16b, v8.16b, v12.16b + // vmovdqa 0x40(%r10), %xmm4 # 4 : sbeu + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + eor v8.16b, v8.16b, v9.16b + // vmovdqa 0x50(%r10), %xmm1 # 0 : sbet + + tbl v4.16b, {v30.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbeu + tbl v12.16b, {v30.16b}, v10.16b + tbl v0.16b, {v0.16b},v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch + tbl v8.16b, {v8.16b},v5.16b + tbl v1.16b, {v31.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbet + tbl v9.16b, {v31.16b}, v11.16b + eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + eor v8.16b, v8.16b, v12.16b + ext v5.16b, v5.16b, v5.16b, #12 // vpalignr $12, %xmm5, %xmm5, %xmm5 + eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + eor v8.16b, v8.16b, v9.16b + sub w8, w8, #1 // sub $1,%rax # nr-- + +.Ldec_2x_entry: + // top of round + and v1.16b, v0.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 # 0 = k + ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i + and v9.16b, v8.16b, v17.16b + ushr v8.16b, v8.16b, #4 + tbl v2.16b, {v19.16b},v1.16b // vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k + tbl v10.16b, {v19.16b},v9.16b + eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j + eor v9.16b, v9.16b, v8.16b + tbl v3.16b, {v18.16b},v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + tbl v11.16b, {v18.16b},v8.16b + tbl v4.16b, {v18.16b},v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + tbl v12.16b, {v18.16b},v9.16b + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + eor v11.16b, v11.16b, v10.16b + eor v4.16b, v4.16b, v2.16b // vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + eor v12.16b, v12.16b, v10.16b + tbl v2.16b, {v18.16b},v3.16b // vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak + tbl v10.16b, {v18.16b},v11.16b + tbl v3.16b, {v18.16b},v4.16b // vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak + tbl v11.16b, {v18.16b},v12.16b + eor v2.16b, v2.16b, v1.16b // vpxor %xmm1, %xmm2, %xmm2 # 2 = io + eor v10.16b, v10.16b, v9.16b + eor v3.16b, v3.16b, v0.16b // vpxor %xmm0, %xmm3, %xmm3 # 3 = jo + eor v11.16b, v11.16b, v8.16b + ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm0 + cbnz w8, .Ldec_2x_loop + + // middle of last round + // vmovdqa 0x60(%r10), %xmm4 # 3 : sbou + tbl v4.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou + tbl v12.16b, {v22.16b}, v10.16b + // vmovdqa 0x70(%r10), %xmm1 # 0 : sbot + tbl v1.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb1t + tbl v9.16b, {v23.16b}, v11.16b + ld1 {v2.2d}, [x11] // vmovdqa -0x160(%r11), %xmm2 # .Lk_sr-.Lk_dsbd=-0x160 + eor v4.16b, v4.16b, v16.16b // vpxor %xmm0, %xmm4, %xmm4 # 4 = sb1u + k + eor v12.16b, v12.16b, v16.16b + eor v0.16b, v1.16b, v4.16b // vpxor %xmm4, %xmm1, %xmm0 # 0 = A + eor v8.16b, v9.16b, v12.16b + tbl v0.16b, {v0.16b},v2.16b // vpshufb %xmm2, %xmm0, %xmm0 + tbl v1.16b, {v8.16b},v2.16b + ret +.size _vpaes_decrypt_2x,.-_vpaes_decrypt_2x +//////////////////////////////////////////////////////// +// // +// AES key schedule // +// // +//////////////////////////////////////////////////////// +.type _vpaes_key_preheat,%function +.align 4 +_vpaes_key_preheat: + adrp x10, .Lk_inv + add x10, x10, #:lo12:.Lk_inv + movi v16.16b, #0x5b // .Lk_s63 + adrp x11, .Lk_sb1 + add x11, x11, #:lo12:.Lk_sb1 + movi v17.16b, #0x0f // .Lk_s0F + ld1 {v18.2d,v19.2d,v20.2d,v21.2d}, [x10] // .Lk_inv, .Lk_ipt + adrp x10, .Lk_dksd + add x10, x10, #:lo12:.Lk_dksd + ld1 {v22.2d,v23.2d}, [x11] // .Lk_sb1 + adrp x11, .Lk_mc_forward + add x11, x11, #:lo12:.Lk_mc_forward + ld1 {v24.2d,v25.2d,v26.2d,v27.2d}, [x10],#64 // .Lk_dksd, .Lk_dksb + ld1 {v28.2d,v29.2d,v30.2d,v31.2d}, [x10],#64 // .Lk_dkse, .Lk_dks9 + ld1 {v8.2d}, [x10] // .Lk_rcon + ld1 {v9.2d}, [x11] // .Lk_mc_forward[0] + ret +.size _vpaes_key_preheat,.-_vpaes_key_preheat + +.type _vpaes_schedule_core,%function +.align 4 +_vpaes_schedule_core: + AARCH64_SIGN_LINK_REGISTER + stp x29, x30, [sp,#-16]! + add x29,sp,#0 + + bl _vpaes_key_preheat // load the tables + + ld1 {v0.16b}, [x0],#16 // vmovdqu (%rdi), %xmm0 # load key (unaligned) + + // input transform + mov v3.16b, v0.16b // vmovdqa %xmm0, %xmm3 + bl _vpaes_schedule_transform + mov v7.16b, v0.16b // vmovdqa %xmm0, %xmm7 + + adrp x10, .Lk_sr + add x10, x10, #:lo12:.Lk_sr + add x8, x8, x10 + cbnz w3, .Lschedule_am_decrypting + + // encrypting, output zeroth round key after transform + st1 {v0.2d}, [x2] // vmovdqu %xmm0, (%rdx) + b .Lschedule_go + +.Lschedule_am_decrypting: + // decrypting, output zeroth round key after shiftrows + ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10), %xmm1 + tbl v3.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 + st1 {v3.2d}, [x2] // vmovdqu %xmm3, (%rdx) + eor x8, x8, #0x30 // xor $0x30, %r8 + +.Lschedule_go: + cmp w1, #192 // cmp $192, %esi + b.hi .Lschedule_256 + b.eq .Lschedule_192 + // 128: fall though + +// +// .schedule_128 +// +// 128-bit specific part of key schedule. +// +// This schedule is really simple, because all its parts +// are accomplished by the subroutines. +// +.Lschedule_128: + mov x0, #10 // mov $10, %esi + +.Loop_schedule_128: + sub x0, x0, #1 // dec %esi + bl _vpaes_schedule_round + cbz x0, .Lschedule_mangle_last + bl _vpaes_schedule_mangle // write output + b .Loop_schedule_128 + +// +// .aes_schedule_192 +// +// 192-bit specific part of key schedule. +// +// The main body of this schedule is the same as the 128-bit +// schedule, but with more smearing. The long, high side is +// stored in %xmm7 as before, and the short, low side is in +// the high bits of %xmm6. +// +// This schedule is somewhat nastier, however, because each +// round produces 192 bits of key material, or 1.5 round keys. +// Therefore, on each cycle we do 2 rounds and produce 3 round +// keys. +// +.align 4 +.Lschedule_192: + sub x0, x0, #8 + ld1 {v0.16b}, [x0] // vmovdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned) + bl _vpaes_schedule_transform // input transform + mov v6.16b, v0.16b // vmovdqa %xmm0, %xmm6 # save short part + eor v4.16b, v4.16b, v4.16b // vpxor %xmm4, %xmm4, %xmm4 # clear 4 + ins v6.d[0], v4.d[0] // vmovhlps %xmm4, %xmm6, %xmm6 # clobber low side with zeros + mov x0, #4 // mov $4, %esi + +.Loop_schedule_192: + sub x0, x0, #1 // dec %esi + bl _vpaes_schedule_round + ext v0.16b, v6.16b, v0.16b, #8 // vpalignr $8,%xmm6,%xmm0,%xmm0 + bl _vpaes_schedule_mangle // save key n + bl _vpaes_schedule_192_smear + bl _vpaes_schedule_mangle // save key n+1 + bl _vpaes_schedule_round + cbz x0, .Lschedule_mangle_last + bl _vpaes_schedule_mangle // save key n+2 + bl _vpaes_schedule_192_smear + b .Loop_schedule_192 + +// +// .aes_schedule_256 +// +// 256-bit specific part of key schedule. +// +// The structure here is very similar to the 128-bit +// schedule, but with an additional "low side" in +// %xmm6. The low side's rounds are the same as the +// high side's, except no rcon and no rotation. +// +.align 4 +.Lschedule_256: + ld1 {v0.16b}, [x0] // vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned) + bl _vpaes_schedule_transform // input transform + mov x0, #7 // mov $7, %esi + +.Loop_schedule_256: + sub x0, x0, #1 // dec %esi + bl _vpaes_schedule_mangle // output low result + mov v6.16b, v0.16b // vmovdqa %xmm0, %xmm6 # save cur_lo in xmm6 + + // high round + bl _vpaes_schedule_round + cbz x0, .Lschedule_mangle_last + bl _vpaes_schedule_mangle + + // low round. swap xmm7 and xmm6 + dup v0.4s, v0.s[3] // vpshufd $0xFF, %xmm0, %xmm0 + movi v4.16b, #0 + mov v5.16b, v7.16b // vmovdqa %xmm7, %xmm5 + mov v7.16b, v6.16b // vmovdqa %xmm6, %xmm7 + bl _vpaes_schedule_low_round + mov v7.16b, v5.16b // vmovdqa %xmm5, %xmm7 + + b .Loop_schedule_256 + +// +// .aes_schedule_mangle_last +// +// Mangler for last round of key schedule +// Mangles %xmm0 +// when encrypting, outputs out(%xmm0) ^ 63 +// when decrypting, outputs unskew(%xmm0) +// +// Always called right before return... jumps to cleanup and exits +// +.align 4 +.Lschedule_mangle_last: + // schedule last round key from xmm0 + adrp x11, .Lk_deskew + add x11, x11, #:lo12:.Lk_deskew + cbnz w3, .Lschedule_mangle_last_dec + + // encrypting + ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10),%xmm1 + adrp x11, .Lk_opt + add x11, x11, #:lo12:.Lk_opt + add x2, x2, #32 // add $32, %rdx + tbl v0.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm0 # output permute + +.Lschedule_mangle_last_dec: + ld1 {v20.2d,v21.2d}, [x11] // reload constants + sub x2, x2, #16 // add $-16, %rdx + eor v0.16b, v0.16b, v16.16b // vpxor .Lk_s63(%rip), %xmm0, %xmm0 + bl _vpaes_schedule_transform // output transform + st1 {v0.2d}, [x2] // vmovdqu %xmm0, (%rdx) # save last key + + // cleanup + eor v0.16b, v0.16b, v0.16b // vpxor %xmm0, %xmm0, %xmm0 + eor v1.16b, v1.16b, v1.16b // vpxor %xmm1, %xmm1, %xmm1 + eor v2.16b, v2.16b, v2.16b // vpxor %xmm2, %xmm2, %xmm2 + eor v3.16b, v3.16b, v3.16b // vpxor %xmm3, %xmm3, %xmm3 + eor v4.16b, v4.16b, v4.16b // vpxor %xmm4, %xmm4, %xmm4 + eor v5.16b, v5.16b, v5.16b // vpxor %xmm5, %xmm5, %xmm5 + eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6 + eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7 + ldp x29, x30, [sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size _vpaes_schedule_core,.-_vpaes_schedule_core + +// +// .aes_schedule_192_smear +// +// Smear the short, low side in the 192-bit key schedule. +// +// Inputs: +// %xmm7: high side, b a x y +// %xmm6: low side, d c 0 0 +// %xmm13: 0 +// +// Outputs: +// %xmm6: b+c+d b+c 0 0 +// %xmm0: b+c+d b+c b a +// +.type _vpaes_schedule_192_smear,%function +.align 4 +_vpaes_schedule_192_smear: + movi v1.16b, #0 + dup v0.4s, v7.s[3] + ins v1.s[3], v6.s[2] // vpshufd $0x80, %xmm6, %xmm1 # d c 0 0 -> c 0 0 0 + ins v0.s[0], v7.s[2] // vpshufd $0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a + eor v6.16b, v6.16b, v1.16b // vpxor %xmm1, %xmm6, %xmm6 # -> c+d c 0 0 + eor v1.16b, v1.16b, v1.16b // vpxor %xmm1, %xmm1, %xmm1 + eor v6.16b, v6.16b, v0.16b // vpxor %xmm0, %xmm6, %xmm6 # -> b+c+d b+c b a + mov v0.16b, v6.16b // vmovdqa %xmm6, %xmm0 + ins v6.d[0], v1.d[0] // vmovhlps %xmm1, %xmm6, %xmm6 # clobber low side with zeros + ret +.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear + +// +// .aes_schedule_round +// +// Runs one main round of the key schedule on %xmm0, %xmm7 +// +// Specifically, runs subbytes on the high dword of %xmm0 +// then rotates it by one byte and xors into the low dword of +// %xmm7. +// +// Adds rcon from low byte of %xmm8, then rotates %xmm8 for +// next rcon. +// +// Smears the dwords of %xmm7 by xoring the low into the +// second low, result into third, result into highest. +// +// Returns results in %xmm7 = %xmm0. +// Clobbers %xmm1-%xmm4, %r11. +// +.type _vpaes_schedule_round,%function +.align 4 +_vpaes_schedule_round: + // extract rcon from xmm8 + movi v4.16b, #0 // vpxor %xmm4, %xmm4, %xmm4 + ext v1.16b, v8.16b, v4.16b, #15 // vpalignr $15, %xmm8, %xmm4, %xmm1 + ext v8.16b, v8.16b, v8.16b, #15 // vpalignr $15, %xmm8, %xmm8, %xmm8 + eor v7.16b, v7.16b, v1.16b // vpxor %xmm1, %xmm7, %xmm7 + + // rotate + dup v0.4s, v0.s[3] // vpshufd $0xFF, %xmm0, %xmm0 + ext v0.16b, v0.16b, v0.16b, #1 // vpalignr $1, %xmm0, %xmm0, %xmm0 + + // fall through... + + // low round: same as high round, but no rotation and no rcon. +_vpaes_schedule_low_round: + // smear xmm7 + ext v1.16b, v4.16b, v7.16b, #12 // vpslldq $4, %xmm7, %xmm1 + eor v7.16b, v7.16b, v1.16b // vpxor %xmm1, %xmm7, %xmm7 + ext v4.16b, v4.16b, v7.16b, #8 // vpslldq $8, %xmm7, %xmm4 + + // subbytes + and v1.16b, v0.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 # 0 = k + ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i + eor v7.16b, v7.16b, v4.16b // vpxor %xmm4, %xmm7, %xmm7 + tbl v2.16b, {v19.16b}, v1.16b // vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k + eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j + tbl v3.16b, {v18.16b}, v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + tbl v4.16b, {v18.16b}, v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + eor v7.16b, v7.16b, v16.16b // vpxor .Lk_s63(%rip), %xmm7, %xmm7 + tbl v3.16b, {v18.16b}, v3.16b // vpshufb %xmm3, %xmm10, %xmm3 # 2 = 1/iak + eor v4.16b, v4.16b, v2.16b // vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + tbl v2.16b, {v18.16b}, v4.16b // vpshufb %xmm4, %xmm10, %xmm2 # 3 = 1/jak + eor v3.16b, v3.16b, v1.16b // vpxor %xmm1, %xmm3, %xmm3 # 2 = io + eor v2.16b, v2.16b, v0.16b // vpxor %xmm0, %xmm2, %xmm2 # 3 = jo + tbl v4.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm13, %xmm4 # 4 = sbou + tbl v1.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm12, %xmm1 # 0 = sb1t + eor v1.16b, v1.16b, v4.16b // vpxor %xmm4, %xmm1, %xmm1 # 0 = sbox output + + // add in smeared stuff + eor v0.16b, v1.16b, v7.16b // vpxor %xmm7, %xmm1, %xmm0 + eor v7.16b, v1.16b, v7.16b // vmovdqa %xmm0, %xmm7 + ret +.size _vpaes_schedule_round,.-_vpaes_schedule_round + +// +// .aes_schedule_transform +// +// Linear-transform %xmm0 according to tables at (%r11) +// +// Requires that %xmm9 = 0x0F0F... as in preheat +// Output in %xmm0 +// Clobbers %xmm1, %xmm2 +// +.type _vpaes_schedule_transform,%function +.align 4 +_vpaes_schedule_transform: + and v1.16b, v0.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 + ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 + // vmovdqa (%r11), %xmm2 # lo + tbl v2.16b, {v20.16b}, v1.16b // vpshufb %xmm1, %xmm2, %xmm2 + // vmovdqa 16(%r11), %xmm1 # hi + tbl v0.16b, {v21.16b}, v0.16b // vpshufb %xmm0, %xmm1, %xmm0 + eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 + ret +.size _vpaes_schedule_transform,.-_vpaes_schedule_transform + +// +// .aes_schedule_mangle +// +// Mangle xmm0 from (basis-transformed) standard version +// to our version. +// +// On encrypt, +// xor with 0x63 +// multiply by circulant 0,1,1,1 +// apply shiftrows transform +// +// On decrypt, +// xor with 0x63 +// multiply by "inverse mixcolumns" circulant E,B,D,9 +// deskew +// apply shiftrows transform +// +// +// Writes out to (%rdx), and increments or decrements it +// Keeps track of round number mod 4 in %r8 +// Preserves xmm0 +// Clobbers xmm1-xmm5 +// +.type _vpaes_schedule_mangle,%function +.align 4 +_vpaes_schedule_mangle: + mov v4.16b, v0.16b // vmovdqa %xmm0, %xmm4 # save xmm0 for later + // vmovdqa .Lk_mc_forward(%rip),%xmm5 + cbnz w3, .Lschedule_mangle_dec + + // encrypting + eor v4.16b, v0.16b, v16.16b // vpxor .Lk_s63(%rip), %xmm0, %xmm4 + add x2, x2, #16 // add $16, %rdx + tbl v4.16b, {v4.16b}, v9.16b // vpshufb %xmm5, %xmm4, %xmm4 + tbl v1.16b, {v4.16b}, v9.16b // vpshufb %xmm5, %xmm4, %xmm1 + tbl v3.16b, {v1.16b}, v9.16b // vpshufb %xmm5, %xmm1, %xmm3 + eor v4.16b, v4.16b, v1.16b // vpxor %xmm1, %xmm4, %xmm4 + ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10), %xmm1 + eor v3.16b, v3.16b, v4.16b // vpxor %xmm4, %xmm3, %xmm3 + + b .Lschedule_mangle_both +.align 4 +.Lschedule_mangle_dec: + // inverse mix columns + // lea .Lk_dksd(%rip),%r11 + ushr v1.16b, v4.16b, #4 // vpsrlb $4, %xmm4, %xmm1 # 1 = hi + and v4.16b, v4.16b, v17.16b // vpand %xmm9, %xmm4, %xmm4 # 4 = lo + + // vmovdqa 0x00(%r11), %xmm2 + tbl v2.16b, {v24.16b}, v4.16b // vpshufb %xmm4, %xmm2, %xmm2 + // vmovdqa 0x10(%r11), %xmm3 + tbl v3.16b, {v25.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 + tbl v3.16b, {v3.16b}, v9.16b // vpshufb %xmm5, %xmm3, %xmm3 + + // vmovdqa 0x20(%r11), %xmm2 + tbl v2.16b, {v26.16b}, v4.16b // vpshufb %xmm4, %xmm2, %xmm2 + eor v2.16b, v2.16b, v3.16b // vpxor %xmm3, %xmm2, %xmm2 + // vmovdqa 0x30(%r11), %xmm3 + tbl v3.16b, {v27.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 + tbl v3.16b, {v3.16b}, v9.16b // vpshufb %xmm5, %xmm3, %xmm3 + + // vmovdqa 0x40(%r11), %xmm2 + tbl v2.16b, {v28.16b}, v4.16b // vpshufb %xmm4, %xmm2, %xmm2 + eor v2.16b, v2.16b, v3.16b // vpxor %xmm3, %xmm2, %xmm2 + // vmovdqa 0x50(%r11), %xmm3 + tbl v3.16b, {v29.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 + eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 + + // vmovdqa 0x60(%r11), %xmm2 + tbl v2.16b, {v30.16b}, v4.16b // vpshufb %xmm4, %xmm2, %xmm2 + tbl v3.16b, {v3.16b}, v9.16b // vpshufb %xmm5, %xmm3, %xmm3 + // vmovdqa 0x70(%r11), %xmm4 + tbl v4.16b, {v31.16b}, v1.16b // vpshufb %xmm1, %xmm4, %xmm4 + ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10), %xmm1 + eor v2.16b, v2.16b, v3.16b // vpxor %xmm3, %xmm2, %xmm2 + eor v3.16b, v4.16b, v2.16b // vpxor %xmm2, %xmm4, %xmm3 + + sub x2, x2, #16 // add $-16, %rdx + +.Lschedule_mangle_both: + tbl v3.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 + add x8, x8, #64-16 // add $-16, %r8 + and x8, x8, #~(1<<6) // and $0x30, %r8 + st1 {v3.2d}, [x2] // vmovdqu %xmm3, (%rdx) + ret +.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle + +.globl vpaes_set_encrypt_key +.type vpaes_set_encrypt_key,%function +.align 4 +vpaes_set_encrypt_key: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + stp d8,d9,[sp,#-16]! // ABI spec says so + + lsr w9, w1, #5 // shr $5,%eax + add w9, w9, #5 // $5,%eax + str w9, [x2,#240] // mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; + + mov w3, #0 // mov $0,%ecx + mov x8, #0x30 // mov $0x30,%r8d + bl _vpaes_schedule_core + eor x0, x0, x0 + + ldp d8,d9,[sp],#16 + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key + +.globl vpaes_set_decrypt_key +.type vpaes_set_decrypt_key,%function +.align 4 +vpaes_set_decrypt_key: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + stp d8,d9,[sp,#-16]! // ABI spec says so + + lsr w9, w1, #5 // shr $5,%eax + add w9, w9, #5 // $5,%eax + str w9, [x2,#240] // mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; + lsl w9, w9, #4 // shl $4,%eax + add x2, x2, #16 // lea 16(%rdx,%rax),%rdx + add x2, x2, x9 + + mov w3, #1 // mov $1,%ecx + lsr w8, w1, #1 // shr $1,%r8d + and x8, x8, #32 // and $32,%r8d + eor x8, x8, #32 // xor $32,%r8d # nbits==192?0:32 + bl _vpaes_schedule_core + + ldp d8,d9,[sp],#16 + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key +.globl vpaes_cbc_encrypt +.type vpaes_cbc_encrypt,%function +.align 4 +vpaes_cbc_encrypt: + AARCH64_SIGN_LINK_REGISTER + cbz x2, .Lcbc_abort + cmp w5, #0 // check direction + b.eq vpaes_cbc_decrypt + + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + mov x17, x2 // reassign + mov x2, x3 // reassign + + ld1 {v0.16b}, [x4] // load ivec + bl _vpaes_encrypt_preheat + b .Lcbc_enc_loop + +.align 4 +.Lcbc_enc_loop: + ld1 {v7.16b}, [x0],#16 // load input + eor v7.16b, v7.16b, v0.16b // xor with ivec + bl _vpaes_encrypt_core + st1 {v0.16b}, [x1],#16 // save output + subs x17, x17, #16 + b.hi .Lcbc_enc_loop + + st1 {v0.16b}, [x4] // write ivec + + ldp x29,x30,[sp],#16 +.Lcbc_abort: + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt + +.type vpaes_cbc_decrypt,%function +.align 4 +vpaes_cbc_decrypt: + // Not adding AARCH64_SIGN_LINK_REGISTER here because vpaes_cbc_decrypt is jumped to + // only from vpaes_cbc_encrypt which has already signed the return address. + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + stp d8,d9,[sp,#-16]! // ABI spec says so + stp d10,d11,[sp,#-16]! + stp d12,d13,[sp,#-16]! + stp d14,d15,[sp,#-16]! + + mov x17, x2 // reassign + mov x2, x3 // reassign + ld1 {v6.16b}, [x4] // load ivec + bl _vpaes_decrypt_preheat + tst x17, #16 + b.eq .Lcbc_dec_loop2x + + ld1 {v7.16b}, [x0], #16 // load input + bl _vpaes_decrypt_core + eor v0.16b, v0.16b, v6.16b // xor with ivec + orr v6.16b, v7.16b, v7.16b // next ivec value + st1 {v0.16b}, [x1], #16 + subs x17, x17, #16 + b.ls .Lcbc_dec_done + +.align 4 +.Lcbc_dec_loop2x: + ld1 {v14.16b,v15.16b}, [x0], #32 + bl _vpaes_decrypt_2x + eor v0.16b, v0.16b, v6.16b // xor with ivec + eor v1.16b, v1.16b, v14.16b + orr v6.16b, v15.16b, v15.16b + st1 {v0.16b,v1.16b}, [x1], #32 + subs x17, x17, #32 + b.hi .Lcbc_dec_loop2x + +.Lcbc_dec_done: + st1 {v6.16b}, [x4] + + ldp d14,d15,[sp],#16 + ldp d12,d13,[sp],#16 + ldp d10,d11,[sp],#16 + ldp d8,d9,[sp],#16 + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpaes_cbc_decrypt,.-vpaes_cbc_decrypt +.globl vpaes_ecb_encrypt +.type vpaes_ecb_encrypt,%function +.align 4 +vpaes_ecb_encrypt: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + stp d8,d9,[sp,#-16]! // ABI spec says so + stp d10,d11,[sp,#-16]! + stp d12,d13,[sp,#-16]! + stp d14,d15,[sp,#-16]! + + mov x17, x2 + mov x2, x3 + bl _vpaes_encrypt_preheat + tst x17, #16 + b.eq .Lecb_enc_loop + + ld1 {v7.16b}, [x0],#16 + bl _vpaes_encrypt_core + st1 {v0.16b}, [x1],#16 + subs x17, x17, #16 + b.ls .Lecb_enc_done + +.align 4 +.Lecb_enc_loop: + ld1 {v14.16b,v15.16b}, [x0], #32 + bl _vpaes_encrypt_2x + st1 {v0.16b,v1.16b}, [x1], #32 + subs x17, x17, #32 + b.hi .Lecb_enc_loop + +.Lecb_enc_done: + ldp d14,d15,[sp],#16 + ldp d12,d13,[sp],#16 + ldp d10,d11,[sp],#16 + ldp d8,d9,[sp],#16 + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpaes_ecb_encrypt,.-vpaes_ecb_encrypt + +.globl vpaes_ecb_decrypt +.type vpaes_ecb_decrypt,%function +.align 4 +vpaes_ecb_decrypt: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + stp d8,d9,[sp,#-16]! // ABI spec says so + stp d10,d11,[sp,#-16]! + stp d12,d13,[sp,#-16]! + stp d14,d15,[sp,#-16]! + + mov x17, x2 + mov x2, x3 + bl _vpaes_decrypt_preheat + tst x17, #16 + b.eq .Lecb_dec_loop + + ld1 {v7.16b}, [x0],#16 + bl _vpaes_encrypt_core + st1 {v0.16b}, [x1],#16 + subs x17, x17, #16 + b.ls .Lecb_dec_done + +.align 4 +.Lecb_dec_loop: + ld1 {v14.16b,v15.16b}, [x0], #32 + bl _vpaes_decrypt_2x + st1 {v0.16b,v1.16b}, [x1], #32 + subs x17, x17, #32 + b.hi .Lecb_dec_loop + +.Lecb_dec_done: + ldp d14,d15,[sp],#16 + ldp d12,d13,[sp],#16 + ldp d10,d11,[sp],#16 + ldp d8,d9,[sp],#16 + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpaes_ecb_decrypt,.-vpaes_ecb_decrypt diff --git a/contrib/openssl-cmake/asm/crypto/aes/vpaes-x86_64.s b/contrib/openssl-cmake/asm/crypto/aes/vpaes-x86_64.s new file mode 100644 index 000000000000..9a2084d782f1 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/aes/vpaes-x86_64.s @@ -0,0 +1,859 @@ +.text + + + + + + + + + + + + + + + + +.type _vpaes_encrypt_core,@function +.align 16 +_vpaes_encrypt_core: +.cfi_startproc + movq %rdx,%r9 + movq $16,%r11 + movl 240(%rdx),%eax + movdqa %xmm9,%xmm1 + movdqa .Lk_ipt(%rip),%xmm2 + pandn %xmm0,%xmm1 + movdqu (%r9),%xmm5 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa .Lk_ipt+16(%rip),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm5,%xmm2 + addq $16,%r9 + pxor %xmm2,%xmm0 + leaq .Lk_mc_backward(%rip),%r10 + jmp .Lenc_entry + +.align 16 +.Lenc_loop: + + movdqa %xmm13,%xmm4 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,226 +.byte 102,15,56,0,195 + pxor %xmm5,%xmm4 + movdqa %xmm15,%xmm5 + pxor %xmm4,%xmm0 + movdqa -64(%r11,%r10,1),%xmm1 +.byte 102,15,56,0,234 + movdqa (%r11,%r10,1),%xmm4 + movdqa %xmm14,%xmm2 +.byte 102,15,56,0,211 + movdqa %xmm0,%xmm3 + pxor %xmm5,%xmm2 +.byte 102,15,56,0,193 + addq $16,%r9 + pxor %xmm2,%xmm0 +.byte 102,15,56,0,220 + addq $16,%r11 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,193 + andq $0x30,%r11 + subq $1,%rax + pxor %xmm3,%xmm0 + +.Lenc_entry: + + movdqa %xmm9,%xmm1 + movdqa %xmm11,%xmm5 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,232 + movdqa %xmm10,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm10,%xmm4 + pxor %xmm5,%xmm3 +.byte 102,15,56,0,224 + movdqa %xmm10,%xmm2 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm10,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%r9),%xmm5 + pxor %xmm1,%xmm3 + jnz .Lenc_loop + + + movdqa -96(%r10),%xmm4 + movdqa -80(%r10),%xmm0 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,195 + movdqa 64(%r11,%r10,1),%xmm1 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,193 + .byte 0xf3,0xc3 +.cfi_endproc +.size _vpaes_encrypt_core,.-_vpaes_encrypt_core + + + + + + +.type _vpaes_decrypt_core,@function +.align 16 +_vpaes_decrypt_core: +.cfi_startproc + movq %rdx,%r9 + movl 240(%rdx),%eax + movdqa %xmm9,%xmm1 + movdqa .Lk_dipt(%rip),%xmm2 + pandn %xmm0,%xmm1 + movq %rax,%r11 + psrld $4,%xmm1 + movdqu (%r9),%xmm5 + shlq $4,%r11 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa .Lk_dipt+16(%rip),%xmm0 + xorq $0x30,%r11 + leaq .Lk_dsbd(%rip),%r10 +.byte 102,15,56,0,193 + andq $0x30,%r11 + pxor %xmm5,%xmm2 + movdqa .Lk_mc_forward+48(%rip),%xmm5 + pxor %xmm2,%xmm0 + addq $16,%r9 + addq %r10,%r11 + jmp .Ldec_entry + +.align 16 +.Ldec_loop: + + + + movdqa -32(%r10),%xmm4 + movdqa -16(%r10),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 0(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 16(%r10),%xmm1 + +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 32(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 48(%r10),%xmm1 + +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 64(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 80(%r10),%xmm1 + +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + addq $16,%r9 +.byte 102,15,58,15,237,12 + pxor %xmm1,%xmm0 + subq $1,%rax + +.Ldec_entry: + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + movdqa %xmm11,%xmm2 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa %xmm10,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm10,%xmm4 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + movdqa %xmm10,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%r9),%xmm0 + pxor %xmm1,%xmm3 + jnz .Ldec_loop + + + movdqa 96(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 112(%r10),%xmm0 + movdqa -352(%r11),%xmm2 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,194 + .byte 0xf3,0xc3 +.cfi_endproc +.size _vpaes_decrypt_core,.-_vpaes_decrypt_core + + + + + + +.type _vpaes_schedule_core,@function +.align 16 +_vpaes_schedule_core: +.cfi_startproc + + + + + + call _vpaes_preheat + movdqa .Lk_rcon(%rip),%xmm8 + movdqu (%rdi),%xmm0 + + + movdqa %xmm0,%xmm3 + leaq .Lk_ipt(%rip),%r11 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm7 + + leaq .Lk_sr(%rip),%r10 + testq %rcx,%rcx + jnz .Lschedule_am_decrypting + + + movdqu %xmm0,(%rdx) + jmp .Lschedule_go + +.Lschedule_am_decrypting: + + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,217 + movdqu %xmm3,(%rdx) + xorq $0x30,%r8 + +.Lschedule_go: + cmpl $192,%esi + ja .Lschedule_256 + je .Lschedule_192 + + + + + + + + + + +.Lschedule_128: + movl $10,%esi + +.Loop_schedule_128: + call _vpaes_schedule_round + decq %rsi + jz .Lschedule_mangle_last + call _vpaes_schedule_mangle + jmp .Loop_schedule_128 + + + + + + + + + + + + + + + + +.align 16 +.Lschedule_192: + movdqu 8(%rdi),%xmm0 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm6 + pxor %xmm4,%xmm4 + movhlps %xmm4,%xmm6 + movl $4,%esi + +.Loop_schedule_192: + call _vpaes_schedule_round +.byte 102,15,58,15,198,8 + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + call _vpaes_schedule_mangle + call _vpaes_schedule_round + decq %rsi + jz .Lschedule_mangle_last + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + jmp .Loop_schedule_192 + + + + + + + + + + + +.align 16 +.Lschedule_256: + movdqu 16(%rdi),%xmm0 + call _vpaes_schedule_transform + movl $7,%esi + +.Loop_schedule_256: + call _vpaes_schedule_mangle + movdqa %xmm0,%xmm6 + + + call _vpaes_schedule_round + decq %rsi + jz .Lschedule_mangle_last + call _vpaes_schedule_mangle + + + pshufd $0xFF,%xmm0,%xmm0 + movdqa %xmm7,%xmm5 + movdqa %xmm6,%xmm7 + call _vpaes_schedule_low_round + movdqa %xmm5,%xmm7 + + jmp .Loop_schedule_256 + + + + + + + + + + + + +.align 16 +.Lschedule_mangle_last: + + leaq .Lk_deskew(%rip),%r11 + testq %rcx,%rcx + jnz .Lschedule_mangle_last_dec + + + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,193 + leaq .Lk_opt(%rip),%r11 + addq $32,%rdx + +.Lschedule_mangle_last_dec: + addq $-16,%rdx + pxor .Lk_s63(%rip),%xmm0 + call _vpaes_schedule_transform + movdqu %xmm0,(%rdx) + + + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + .byte 0xf3,0xc3 +.cfi_endproc +.size _vpaes_schedule_core,.-_vpaes_schedule_core + + + + + + + + + + + + + + + +.type _vpaes_schedule_192_smear,@function +.align 16 +_vpaes_schedule_192_smear: +.cfi_startproc + pshufd $0x80,%xmm6,%xmm1 + pshufd $0xFE,%xmm7,%xmm0 + pxor %xmm1,%xmm6 + pxor %xmm1,%xmm1 + pxor %xmm0,%xmm6 + movdqa %xmm6,%xmm0 + movhlps %xmm1,%xmm6 + .byte 0xf3,0xc3 +.cfi_endproc +.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear + + + + + + + + + + + + + + + + + + + +.type _vpaes_schedule_round,@function +.align 16 +_vpaes_schedule_round: +.cfi_startproc + + pxor %xmm1,%xmm1 +.byte 102,65,15,58,15,200,15 +.byte 102,69,15,58,15,192,15 + pxor %xmm1,%xmm7 + + + pshufd $0xFF,%xmm0,%xmm0 +.byte 102,15,58,15,192,1 + + + + +_vpaes_schedule_low_round: + + movdqa %xmm7,%xmm1 + pslldq $4,%xmm7 + pxor %xmm1,%xmm7 + movdqa %xmm7,%xmm1 + pslldq $8,%xmm7 + pxor %xmm1,%xmm7 + pxor .Lk_s63(%rip),%xmm7 + + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm10,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqa %xmm13,%xmm4 +.byte 102,15,56,0,226 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + + + pxor %xmm7,%xmm0 + movdqa %xmm0,%xmm7 + .byte 0xf3,0xc3 +.cfi_endproc +.size _vpaes_schedule_round,.-_vpaes_schedule_round + + + + + + + + + + +.type _vpaes_schedule_transform,@function +.align 16 +_vpaes_schedule_transform: +.cfi_startproc + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa (%r11),%xmm2 +.byte 102,15,56,0,208 + movdqa 16(%r11),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm2,%xmm0 + .byte 0xf3,0xc3 +.cfi_endproc +.size _vpaes_schedule_transform,.-_vpaes_schedule_transform + + + + + + + + + + + + + + + + + + + + + + + + +.type _vpaes_schedule_mangle,@function +.align 16 +_vpaes_schedule_mangle: +.cfi_startproc + movdqa %xmm0,%xmm4 + movdqa .Lk_mc_forward(%rip),%xmm5 + testq %rcx,%rcx + jnz .Lschedule_mangle_dec + + + addq $16,%rdx + pxor .Lk_s63(%rip),%xmm4 +.byte 102,15,56,0,229 + movdqa %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 + + jmp .Lschedule_mangle_both +.align 16 +.Lschedule_mangle_dec: + + leaq .Lk_dksd(%rip),%r11 + movdqa %xmm9,%xmm1 + pandn %xmm4,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm4 + + movdqa 0(%r11),%xmm2 +.byte 102,15,56,0,212 + movdqa 16(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 32(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 48(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 64(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 80(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 96(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 112(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + + addq $-16,%rdx + +.Lschedule_mangle_both: + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,217 + addq $-16,%r8 + andq $0x30,%r8 + movdqu %xmm3,(%rdx) + .byte 0xf3,0xc3 +.cfi_endproc +.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle + + + + +.globl vpaes_set_encrypt_key +.type vpaes_set_encrypt_key,@function +.align 16 +vpaes_set_encrypt_key: +.cfi_startproc +.byte 243,15,30,250 + movl %esi,%eax + shrl $5,%eax + addl $5,%eax + movl %eax,240(%rdx) + + movl $0,%ecx + movl $0x30,%r8d + call _vpaes_schedule_core + xorl %eax,%eax + .byte 0xf3,0xc3 +.cfi_endproc +.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key + +.globl vpaes_set_decrypt_key +.type vpaes_set_decrypt_key,@function +.align 16 +vpaes_set_decrypt_key: +.cfi_startproc +.byte 243,15,30,250 + movl %esi,%eax + shrl $5,%eax + addl $5,%eax + movl %eax,240(%rdx) + shll $4,%eax + leaq 16(%rdx,%rax,1),%rdx + + movl $1,%ecx + movl %esi,%r8d + shrl $1,%r8d + andl $32,%r8d + xorl $32,%r8d + call _vpaes_schedule_core + xorl %eax,%eax + .byte 0xf3,0xc3 +.cfi_endproc +.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key + +.globl vpaes_encrypt +.type vpaes_encrypt,@function +.align 16 +vpaes_encrypt: +.cfi_startproc +.byte 243,15,30,250 + movdqu (%rdi),%xmm0 + call _vpaes_preheat + call _vpaes_encrypt_core + movdqu %xmm0,(%rsi) + .byte 0xf3,0xc3 +.cfi_endproc +.size vpaes_encrypt,.-vpaes_encrypt + +.globl vpaes_decrypt +.type vpaes_decrypt,@function +.align 16 +vpaes_decrypt: +.cfi_startproc +.byte 243,15,30,250 + movdqu (%rdi),%xmm0 + call _vpaes_preheat + call _vpaes_decrypt_core + movdqu %xmm0,(%rsi) + .byte 0xf3,0xc3 +.cfi_endproc +.size vpaes_decrypt,.-vpaes_decrypt +.globl vpaes_cbc_encrypt +.type vpaes_cbc_encrypt,@function +.align 16 +vpaes_cbc_encrypt: +.cfi_startproc +.byte 243,15,30,250 + xchgq %rcx,%rdx + subq $16,%rcx + jc .Lcbc_abort + movdqu (%r8),%xmm6 + subq %rdi,%rsi + call _vpaes_preheat + cmpl $0,%r9d + je .Lcbc_dec_loop + jmp .Lcbc_enc_loop +.align 16 +.Lcbc_enc_loop: + movdqu (%rdi),%xmm0 + pxor %xmm6,%xmm0 + call _vpaes_encrypt_core + movdqa %xmm0,%xmm6 + movdqu %xmm0,(%rsi,%rdi,1) + leaq 16(%rdi),%rdi + subq $16,%rcx + jnc .Lcbc_enc_loop + jmp .Lcbc_done +.align 16 +.Lcbc_dec_loop: + movdqu (%rdi),%xmm0 + movdqa %xmm0,%xmm7 + call _vpaes_decrypt_core + pxor %xmm6,%xmm0 + movdqa %xmm7,%xmm6 + movdqu %xmm0,(%rsi,%rdi,1) + leaq 16(%rdi),%rdi + subq $16,%rcx + jnc .Lcbc_dec_loop +.Lcbc_done: + movdqu %xmm6,(%r8) +.Lcbc_abort: + .byte 0xf3,0xc3 +.cfi_endproc +.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt + + + + + + +.type _vpaes_preheat,@function +.align 16 +_vpaes_preheat: +.cfi_startproc + leaq .Lk_s0F(%rip),%r10 + movdqa -32(%r10),%xmm10 + movdqa -16(%r10),%xmm11 + movdqa 0(%r10),%xmm9 + movdqa 48(%r10),%xmm13 + movdqa 64(%r10),%xmm12 + movdqa 80(%r10),%xmm15 + movdqa 96(%r10),%xmm14 + .byte 0xf3,0xc3 +.cfi_endproc +.size _vpaes_preheat,.-_vpaes_preheat + + + + + +.type _vpaes_consts,@object +.section .rodata +.align 64 +_vpaes_consts: +.Lk_inv: +.quad 0x0E05060F0D080180, 0x040703090A0B0C02 +.quad 0x01040A060F0B0780, 0x030D0E0C02050809 + +.Lk_s0F: +.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F + +.Lk_ipt: +.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 +.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 + +.Lk_sb1: +.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 +.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF +.Lk_sb2: +.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD +.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A +.Lk_sbo: +.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 +.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA + +.Lk_mc_forward: +.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 +.quad 0x080B0A0904070605, 0x000302010C0F0E0D +.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 +.quad 0x000302010C0F0E0D, 0x080B0A0904070605 + +.Lk_mc_backward: +.quad 0x0605040702010003, 0x0E0D0C0F0A09080B +.quad 0x020100030E0D0C0F, 0x0A09080B06050407 +.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 +.quad 0x0A09080B06050407, 0x020100030E0D0C0F + +.Lk_sr: +.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 +.quad 0x030E09040F0A0500, 0x0B06010C07020D08 +.quad 0x0F060D040B020900, 0x070E050C030A0108 +.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 + +.Lk_rcon: +.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 + +.Lk_s63: +.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B + +.Lk_opt: +.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 +.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 + +.Lk_deskew: +.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A +.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 + + + + + +.Lk_dksd: +.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 +.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E +.Lk_dksb: +.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 +.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 +.Lk_dkse: +.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 +.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 +.Lk_dks9: +.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC +.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE + + + + + +.Lk_dipt: +.quad 0x0F505B040B545F00, 0x154A411E114E451A +.quad 0x86E383E660056500, 0x12771772F491F194 + +.Lk_dsb9: +.quad 0x851C03539A86D600, 0xCAD51F504F994CC9 +.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 +.Lk_dsbd: +.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 +.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 +.Lk_dsbb: +.quad 0xD022649296B44200, 0x602646F6B0F2D404 +.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B +.Lk_dsbe: +.quad 0x46F2929626D4D000, 0x2242600464B4F6B0 +.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 +.Lk_dsbo: +.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D +.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C +.align 64 +.size _vpaes_consts,.-_vpaes_consts +.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 diff --git a/contrib/openssl-cmake/asm/crypto/arm64cpuid.S b/contrib/openssl-cmake/asm/crypto/arm64cpuid.S new file mode 100644 index 000000000000..5cc56673f3d0 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/arm64cpuid.S @@ -0,0 +1,274 @@ +#include "arm_arch.h" + +.text +.arch armv8-a+crypto + +.align 5 +.globl _armv7_neon_probe +.type _armv7_neon_probe,%function +_armv7_neon_probe: + AARCH64_VALID_CALL_TARGET + orr v15.16b, v15.16b, v15.16b + ret +.size _armv7_neon_probe,.-_armv7_neon_probe + +.globl _armv7_tick +.type _armv7_tick,%function +_armv7_tick: + AARCH64_VALID_CALL_TARGET +#ifdef __APPLE__ + mrs x0, CNTPCT_EL0 +#else + mrs x0, CNTVCT_EL0 +#endif + ret +.size _armv7_tick,.-_armv7_tick + +.globl _armv8_aes_probe +.type _armv8_aes_probe,%function +_armv8_aes_probe: + AARCH64_VALID_CALL_TARGET + aese v0.16b, v0.16b + ret +.size _armv8_aes_probe,.-_armv8_aes_probe + +.globl _armv8_sha1_probe +.type _armv8_sha1_probe,%function +_armv8_sha1_probe: + AARCH64_VALID_CALL_TARGET + sha1h s0, s0 + ret +.size _armv8_sha1_probe,.-_armv8_sha1_probe + +.globl _armv8_sha256_probe +.type _armv8_sha256_probe,%function +_armv8_sha256_probe: + AARCH64_VALID_CALL_TARGET + sha256su0 v0.4s, v0.4s + ret +.size _armv8_sha256_probe,.-_armv8_sha256_probe + +.globl _armv8_pmull_probe +.type _armv8_pmull_probe,%function +_armv8_pmull_probe: + AARCH64_VALID_CALL_TARGET + pmull v0.1q, v0.1d, v0.1d + ret +.size _armv8_pmull_probe,.-_armv8_pmull_probe + +.globl _armv8_sm4_probe +.type _armv8_sm4_probe,%function +_armv8_sm4_probe: + AARCH64_VALID_CALL_TARGET +.inst 0xcec08400 // sm4e v0.4s, v0.4s + ret +.size _armv8_sm4_probe,.-_armv8_sm4_probe + +.globl _armv8_sha512_probe +.type _armv8_sha512_probe,%function +_armv8_sha512_probe: + AARCH64_VALID_CALL_TARGET +.inst 0xcec08000 // sha512su0 v0.2d,v0.2d + ret +.size _armv8_sha512_probe,.-_armv8_sha512_probe + +.globl _armv8_eor3_probe +.type _armv8_eor3_probe,%function +_armv8_eor3_probe: + AARCH64_VALID_CALL_TARGET +.inst 0xce010800 // eor3 v0.16b, v0.16b, v1.16b, v2.16b + ret +.size _armv8_eor3_probe,.-_armv8_eor3_probe + +.globl _armv8_sve_probe +.type _armv8_sve_probe,%function +_armv8_sve_probe: + AARCH64_VALID_CALL_TARGET +.inst 0x04a03000 // eor z0.d,z0.d,z0.d + ret +.size _armv8_sve_probe,.-_armv8_sve_probe + +.globl _armv8_sve2_probe +.type _armv8_sve2_probe,%function +_armv8_sve2_probe: + AARCH64_VALID_CALL_TARGET +.inst 0x04e03400 // xar z0.d,z0.d,z0.d + ret +.size _armv8_sve2_probe,.-_armv8_sve2_probe + +.globl _armv8_cpuid_probe +.type _armv8_cpuid_probe,%function +_armv8_cpuid_probe: + AARCH64_VALID_CALL_TARGET + mrs x0, midr_el1 + ret +.size _armv8_cpuid_probe,.-_armv8_cpuid_probe + +.globl _armv8_sm3_probe +.type _armv8_sm3_probe,%function +_armv8_sm3_probe: + AARCH64_VALID_CALL_TARGET +.inst 0xce63c004 // sm3partw1 v4.4s, v0.4s, v3.4s + ret +.size _armv8_sm3_probe,.-_armv8_sm3_probe + +.globl OPENSSL_cleanse +.type OPENSSL_cleanse,%function +.align 5 +OPENSSL_cleanse: + AARCH64_VALID_CALL_TARGET + cbz x1,.Lret // len==0? + cmp x1,#15 + b.hi .Lot // len>15 + nop +.Little: + strb wzr,[x0],#1 // store byte-by-byte + subs x1,x1,#1 + b.ne .Little +.Lret: ret + +.align 4 +.Lot: tst x0,#7 + b.eq .Laligned // inp is aligned + strb wzr,[x0],#1 // store byte-by-byte + sub x1,x1,#1 + b .Lot + +.align 4 +.Laligned: + str xzr,[x0],#8 // store word-by-word + sub x1,x1,#8 + tst x1,#-8 + b.ne .Laligned // len>=8 + cbnz x1,.Little // len!=0? + ret +.size OPENSSL_cleanse,.-OPENSSL_cleanse + +.globl CRYPTO_memcmp +.type CRYPTO_memcmp,%function +.align 4 +CRYPTO_memcmp: + AARCH64_VALID_CALL_TARGET + eor w3,w3,w3 + cbz x2,.Lno_data // len==0? + cmp x2,#16 + b.ne .Loop_cmp + ldp x8,x9,[x0] + ldp x10,x11,[x1] + eor x8,x8,x10 + eor x9,x9,x11 + orr x8,x8,x9 + mov x0,#1 + cmp x8,#0 + csel x0,xzr,x0,eq + ret + +.align 4 +.Loop_cmp: + ldrb w4,[x0],#1 + ldrb w5,[x1],#1 + eor w4,w4,w5 + orr w3,w3,w4 + subs x2,x2,#1 + b.ne .Loop_cmp + +.Lno_data: + neg w0,w3 + lsr w0,w0,#31 + ret +.size CRYPTO_memcmp,.-CRYPTO_memcmp + +.globl _armv8_rng_probe +.type _armv8_rng_probe,%function +_armv8_rng_probe: + AARCH64_VALID_CALL_TARGET + mrs x0, s3_3_c2_c4_0 // rndr + mrs x0, s3_3_c2_c4_1 // rndrrs + ret +.size _armv8_rng_probe,.-_armv8_rng_probe +// Fill buffer with Randomly Generated Bytes +// inputs: char * in x0 - Pointer to buffer +// size_t in x1 - Number of bytes to write to buffer +// outputs: size_t in x0 - Number of bytes successfully written to buffer +.globl OPENSSL_rndr_asm +.type OPENSSL_rndr_asm,%function +.align 4 +OPENSSL_rndr_asm: + AARCH64_VALID_CALL_TARGET + mov x2,xzr + mov x3,xzr + +.align 4 +.Loop_rndr: + cmp x1,#0 + b.eq .rndr_done + mov x3,xzr + mrs x3,s3_3_c2_c4_0 + b.eq .rndr_done + + cmp x1,#8 + b.lt .Loop_single_byte_rndr + + str x3,[x0] + add x0,x0,#8 + add x2,x2,#8 + subs x1,x1,#8 + b.ge .Loop_rndr + +.align 4 +.Loop_single_byte_rndr: + strb w3,[x0] + lsr x3,x3,#8 + add x2,x2,#1 + add x0,x0,#1 + subs x1,x1,#1 + b.gt .Loop_single_byte_rndr + +.align 4 +.rndr_done: + mov x0,x2 + ret +.size OPENSSL_rndr_asm,.-OPENSSL_rndr_asm +// Fill buffer with Randomly Generated Bytes +// inputs: char * in x0 - Pointer to buffer +// size_t in x1 - Number of bytes to write to buffer +// outputs: size_t in x0 - Number of bytes successfully written to buffer +.globl OPENSSL_rndrrs_asm +.type OPENSSL_rndrrs_asm,%function +.align 4 +OPENSSL_rndrrs_asm: + AARCH64_VALID_CALL_TARGET + mov x2,xzr + mov x3,xzr + +.align 4 +.Loop_rndrrs: + cmp x1,#0 + b.eq .rndrrs_done + mov x3,xzr + mrs x3,s3_3_c2_c4_1 + b.eq .rndrrs_done + + cmp x1,#8 + b.lt .Loop_single_byte_rndrrs + + str x3,[x0] + add x0,x0,#8 + add x2,x2,#8 + subs x1,x1,#8 + b.ge .Loop_rndrrs + +.align 4 +.Loop_single_byte_rndrrs: + strb w3,[x0] + lsr x3,x3,#8 + add x2,x2,#1 + add x0,x0,#1 + subs x1,x1,#1 + b.gt .Loop_single_byte_rndrrs + +.align 4 +.rndrrs_done: + mov x0,x2 + ret +.size OPENSSL_rndrrs_asm,.-OPENSSL_rndrrs_asm diff --git a/contrib/openssl-cmake/asm/crypto/bn/armv8-mont.S b/contrib/openssl-cmake/asm/crypto/bn/armv8-mont.S new file mode 100644 index 000000000000..c9fe7621f422 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/bn/armv8-mont.S @@ -0,0 +1,2136 @@ +#include "arm_arch.h" +#ifndef __KERNEL__ + +.hidden OPENSSL_armv8_rsa_neonized +#endif +.text + +.globl bn_mul_mont +.type bn_mul_mont,%function +.align 5 +bn_mul_mont: + AARCH64_SIGN_LINK_REGISTER +.Lbn_mul_mont: + tst x5,#3 + b.ne .Lmul_mont + cmp x5,#32 + b.le .Lscalar_impl +#ifndef __KERNEL__ +#ifndef __AARCH64EB__ + adrp x17,OPENSSL_armv8_rsa_neonized + ldr w17,[x17,#:lo12:OPENSSL_armv8_rsa_neonized] + cbnz w17, bn_mul8x_mont_neon +#endif +#endif + +.Lscalar_impl: + tst x5,#7 + b.eq __bn_sqr8x_mont + tst x5,#3 + b.eq __bn_mul4x_mont + +.Lmul_mont: + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + ldr x9,[x2],#8 // bp[0] + sub x22,sp,x5,lsl#3 + ldp x7,x8,[x1],#16 // ap[0..1] + lsl x5,x5,#3 + ldr x4,[x4] // *n0 + and x22,x22,#-16 // ABI says so + ldp x13,x14,[x3],#16 // np[0..1] + + mul x6,x7,x9 // ap[0]*bp[0] + sub x21,x5,#16 // j=num-2 + umulh x7,x7,x9 + mul x10,x8,x9 // ap[1]*bp[0] + umulh x11,x8,x9 + + mul x15,x6,x4 // "tp[0]"*n0 + mov sp,x22 // alloca + + // (*) mul x12,x13,x15 // np[0]*m1 + umulh x13,x13,x15 + mul x16,x14,x15 // np[1]*m1 + // (*) adds x12,x12,x6 // discarded + // (*) As for removal of first multiplication and addition + // instructions. The outcome of first addition is + // guaranteed to be zero, which leaves two computationally + // significant outcomes: it either carries or not. Then + // question is when does it carry? Is there alternative + // way to deduce it? If you follow operations, you can + // observe that condition for carry is quite simple: + // x6 being non-zero. So that carry can be calculated + // by adding -1 to x6. That's what next instruction does. + subs xzr,x6,#1 // (*) + umulh x17,x14,x15 + adc x13,x13,xzr + cbz x21,.L1st_skip + +.L1st: + ldr x8,[x1],#8 + adds x6,x10,x7 + sub x21,x21,#8 // j-- + adc x7,x11,xzr + + ldr x14,[x3],#8 + adds x12,x16,x13 + mul x10,x8,x9 // ap[j]*bp[0] + adc x13,x17,xzr + umulh x11,x8,x9 + + adds x12,x12,x6 + mul x16,x14,x15 // np[j]*m1 + adc x13,x13,xzr + umulh x17,x14,x15 + str x12,[x22],#8 // tp[j-1] + cbnz x21,.L1st + +.L1st_skip: + adds x6,x10,x7 + sub x1,x1,x5 // rewind x1 + adc x7,x11,xzr + + adds x12,x16,x13 + sub x3,x3,x5 // rewind x3 + adc x13,x17,xzr + + adds x12,x12,x6 + sub x20,x5,#8 // i=num-1 + adcs x13,x13,x7 + + adc x19,xzr,xzr // upmost overflow bit + stp x12,x13,[x22] + +.Louter: + ldr x9,[x2],#8 // bp[i] + ldp x7,x8,[x1],#16 + ldr x23,[sp] // tp[0] + add x22,sp,#8 + + mul x6,x7,x9 // ap[0]*bp[i] + sub x21,x5,#16 // j=num-2 + umulh x7,x7,x9 + ldp x13,x14,[x3],#16 + mul x10,x8,x9 // ap[1]*bp[i] + adds x6,x6,x23 + umulh x11,x8,x9 + adc x7,x7,xzr + + mul x15,x6,x4 + sub x20,x20,#8 // i-- + + // (*) mul x12,x13,x15 // np[0]*m1 + umulh x13,x13,x15 + mul x16,x14,x15 // np[1]*m1 + // (*) adds x12,x12,x6 + subs xzr,x6,#1 // (*) + umulh x17,x14,x15 + cbz x21,.Linner_skip + +.Linner: + ldr x8,[x1],#8 + adc x13,x13,xzr + ldr x23,[x22],#8 // tp[j] + adds x6,x10,x7 + sub x21,x21,#8 // j-- + adc x7,x11,xzr + + adds x12,x16,x13 + ldr x14,[x3],#8 + adc x13,x17,xzr + + mul x10,x8,x9 // ap[j]*bp[i] + adds x6,x6,x23 + umulh x11,x8,x9 + adc x7,x7,xzr + + mul x16,x14,x15 // np[j]*m1 + adds x12,x12,x6 + umulh x17,x14,x15 + stur x12,[x22,#-16] // tp[j-1] + cbnz x21,.Linner + +.Linner_skip: + ldr x23,[x22],#8 // tp[j] + adc x13,x13,xzr + adds x6,x10,x7 + sub x1,x1,x5 // rewind x1 + adc x7,x11,xzr + + adds x12,x16,x13 + sub x3,x3,x5 // rewind x3 + adcs x13,x17,x19 + adc x19,xzr,xzr + + adds x6,x6,x23 + adc x7,x7,xzr + + adds x12,x12,x6 + adcs x13,x13,x7 + adc x19,x19,xzr // upmost overflow bit + stp x12,x13,[x22,#-16] + + cbnz x20,.Louter + + // Final step. We see if result is larger than modulus, and + // if it is, subtract the modulus. But comparison implies + // subtraction. So we subtract modulus, see if it borrowed, + // and conditionally copy original value. + ldr x23,[sp] // tp[0] + add x22,sp,#8 + ldr x14,[x3],#8 // np[0] + subs x21,x5,#8 // j=num-1 and clear borrow + mov x1,x0 +.Lsub: + sbcs x8,x23,x14 // tp[j]-np[j] + ldr x23,[x22],#8 + sub x21,x21,#8 // j-- + ldr x14,[x3],#8 + str x8,[x1],#8 // rp[j]=tp[j]-np[j] + cbnz x21,.Lsub + + sbcs x8,x23,x14 + sbcs x19,x19,xzr // did it borrow? + str x8,[x1],#8 // rp[num-1] + + ldr x23,[sp] // tp[0] + add x22,sp,#8 + ldr x8,[x0],#8 // rp[0] + sub x5,x5,#8 // num-- + nop +.Lcond_copy: + sub x5,x5,#8 // num-- + csel x14,x23,x8,lo // did it borrow? + ldr x23,[x22],#8 + ldr x8,[x0],#8 + stur xzr,[x22,#-16] // wipe tp + stur x14,[x0,#-16] + cbnz x5,.Lcond_copy + + csel x14,x23,x8,lo + stur xzr,[x22,#-8] // wipe tp + stur x14,[x0,#-8] + + ldp x19,x20,[x29,#16] + mov sp,x29 + ldp x21,x22,[x29,#32] + mov x0,#1 + ldp x23,x24,[x29,#48] + ldr x29,[sp],#64 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size bn_mul_mont,.-bn_mul_mont +.type bn_mul8x_mont_neon,%function +.align 5 +bn_mul8x_mont_neon: + // Not adding AARCH64_SIGN_LINK_REGISTER here because bn_mul8x_mont_neon is jumped to + // only from bn_mul_mont which has already signed the return address. + stp x29,x30,[sp,#-80]! + mov x16,sp + stp d8,d9,[sp,#16] + stp d10,d11,[sp,#32] + stp d12,d13,[sp,#48] + stp d14,d15,[sp,#64] + lsl x5,x5,#1 + eor v14.16b,v14.16b,v14.16b + +.align 4 +.LNEON_8n: + eor v6.16b,v6.16b,v6.16b + sub x7,sp,#128 + eor v7.16b,v7.16b,v7.16b + sub x7,x7,x5,lsl#4 + eor v8.16b,v8.16b,v8.16b + and x7,x7,#-64 + eor v9.16b,v9.16b,v9.16b + mov sp,x7 // alloca + eor v10.16b,v10.16b,v10.16b + add x7,x7,#256 + eor v11.16b,v11.16b,v11.16b + sub x8,x5,#8 + eor v12.16b,v12.16b,v12.16b + eor v13.16b,v13.16b,v13.16b + +.LNEON_8n_init: + st1 {v6.2d,v7.2d},[x7],#32 + subs x8,x8,#8 + st1 {v8.2d,v9.2d},[x7],#32 + st1 {v10.2d,v11.2d},[x7],#32 + st1 {v12.2d,v13.2d},[x7],#32 + bne .LNEON_8n_init + + add x6,sp,#256 + ld1 {v0.4s,v1.4s},[x1],#32 + add x10,sp,#8 + ldr s30,[x4],#4 + mov x9,x5 + b .LNEON_8n_outer + +.align 4 +.LNEON_8n_outer: + ldr s28,[x2],#4 // *b++ + uxtl v28.4s,v28.4h + add x7,sp,#128 + ld1 {v2.4s,v3.4s},[x3],#32 + + umlal v6.2d,v28.2s,v0.s[0] + umlal v7.2d,v28.2s,v0.s[1] + umlal v8.2d,v28.2s,v0.s[2] + shl v29.2d,v6.2d,#16 + ext v29.16b,v29.16b,v29.16b,#8 + umlal v9.2d,v28.2s,v0.s[3] + add v29.2d,v29.2d,v6.2d + umlal v10.2d,v28.2s,v1.s[0] + mul v29.2s,v29.2s,v30.2s + umlal v11.2d,v28.2s,v1.s[1] + st1 {v28.2s},[sp] // put aside smashed b[8*i+0] + umlal v12.2d,v28.2s,v1.s[2] + uxtl v29.4s,v29.4h + umlal v13.2d,v28.2s,v1.s[3] + ldr s28,[x2],#4 // *b++ + umlal v6.2d,v29.2s,v2.s[0] + umlal v7.2d,v29.2s,v2.s[1] + uxtl v28.4s,v28.4h + umlal v8.2d,v29.2s,v2.s[2] + ushr v15.2d,v6.2d,#16 + umlal v9.2d,v29.2s,v2.s[3] + umlal v10.2d,v29.2s,v3.s[0] + ext v6.16b,v6.16b,v6.16b,#8 + add v6.2d,v6.2d,v15.2d + umlal v11.2d,v29.2s,v3.s[1] + ushr v6.2d,v6.2d,#16 + umlal v12.2d,v29.2s,v3.s[2] + umlal v13.2d,v29.2s,v3.s[3] + add v16.2d,v7.2d,v6.2d + ins v7.d[0],v16.d[0] + st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+0] + umlal v7.2d,v28.2s,v0.s[0] + ld1 {v6.2d},[x6],#16 + umlal v8.2d,v28.2s,v0.s[1] + umlal v9.2d,v28.2s,v0.s[2] + shl v29.2d,v7.2d,#16 + ext v29.16b,v29.16b,v29.16b,#8 + umlal v10.2d,v28.2s,v0.s[3] + add v29.2d,v29.2d,v7.2d + umlal v11.2d,v28.2s,v1.s[0] + mul v29.2s,v29.2s,v30.2s + umlal v12.2d,v28.2s,v1.s[1] + st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+1] + umlal v13.2d,v28.2s,v1.s[2] + uxtl v29.4s,v29.4h + umlal v6.2d,v28.2s,v1.s[3] + ldr s28,[x2],#4 // *b++ + umlal v7.2d,v29.2s,v2.s[0] + umlal v8.2d,v29.2s,v2.s[1] + uxtl v28.4s,v28.4h + umlal v9.2d,v29.2s,v2.s[2] + ushr v15.2d,v7.2d,#16 + umlal v10.2d,v29.2s,v2.s[3] + umlal v11.2d,v29.2s,v3.s[0] + ext v7.16b,v7.16b,v7.16b,#8 + add v7.2d,v7.2d,v15.2d + umlal v12.2d,v29.2s,v3.s[1] + ushr v7.2d,v7.2d,#16 + umlal v13.2d,v29.2s,v3.s[2] + umlal v6.2d,v29.2s,v3.s[3] + add v16.2d,v8.2d,v7.2d + ins v8.d[0],v16.d[0] + st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+1] + umlal v8.2d,v28.2s,v0.s[0] + ld1 {v7.2d},[x6],#16 + umlal v9.2d,v28.2s,v0.s[1] + umlal v10.2d,v28.2s,v0.s[2] + shl v29.2d,v8.2d,#16 + ext v29.16b,v29.16b,v29.16b,#8 + umlal v11.2d,v28.2s,v0.s[3] + add v29.2d,v29.2d,v8.2d + umlal v12.2d,v28.2s,v1.s[0] + mul v29.2s,v29.2s,v30.2s + umlal v13.2d,v28.2s,v1.s[1] + st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+2] + umlal v6.2d,v28.2s,v1.s[2] + uxtl v29.4s,v29.4h + umlal v7.2d,v28.2s,v1.s[3] + ldr s28,[x2],#4 // *b++ + umlal v8.2d,v29.2s,v2.s[0] + umlal v9.2d,v29.2s,v2.s[1] + uxtl v28.4s,v28.4h + umlal v10.2d,v29.2s,v2.s[2] + ushr v15.2d,v8.2d,#16 + umlal v11.2d,v29.2s,v2.s[3] + umlal v12.2d,v29.2s,v3.s[0] + ext v8.16b,v8.16b,v8.16b,#8 + add v8.2d,v8.2d,v15.2d + umlal v13.2d,v29.2s,v3.s[1] + ushr v8.2d,v8.2d,#16 + umlal v6.2d,v29.2s,v3.s[2] + umlal v7.2d,v29.2s,v3.s[3] + add v16.2d,v9.2d,v8.2d + ins v9.d[0],v16.d[0] + st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+2] + umlal v9.2d,v28.2s,v0.s[0] + ld1 {v8.2d},[x6],#16 + umlal v10.2d,v28.2s,v0.s[1] + umlal v11.2d,v28.2s,v0.s[2] + shl v29.2d,v9.2d,#16 + ext v29.16b,v29.16b,v29.16b,#8 + umlal v12.2d,v28.2s,v0.s[3] + add v29.2d,v29.2d,v9.2d + umlal v13.2d,v28.2s,v1.s[0] + mul v29.2s,v29.2s,v30.2s + umlal v6.2d,v28.2s,v1.s[1] + st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+3] + umlal v7.2d,v28.2s,v1.s[2] + uxtl v29.4s,v29.4h + umlal v8.2d,v28.2s,v1.s[3] + ldr s28,[x2],#4 // *b++ + umlal v9.2d,v29.2s,v2.s[0] + umlal v10.2d,v29.2s,v2.s[1] + uxtl v28.4s,v28.4h + umlal v11.2d,v29.2s,v2.s[2] + ushr v15.2d,v9.2d,#16 + umlal v12.2d,v29.2s,v2.s[3] + umlal v13.2d,v29.2s,v3.s[0] + ext v9.16b,v9.16b,v9.16b,#8 + add v9.2d,v9.2d,v15.2d + umlal v6.2d,v29.2s,v3.s[1] + ushr v9.2d,v9.2d,#16 + umlal v7.2d,v29.2s,v3.s[2] + umlal v8.2d,v29.2s,v3.s[3] + add v16.2d,v10.2d,v9.2d + ins v10.d[0],v16.d[0] + st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+3] + umlal v10.2d,v28.2s,v0.s[0] + ld1 {v9.2d},[x6],#16 + umlal v11.2d,v28.2s,v0.s[1] + umlal v12.2d,v28.2s,v0.s[2] + shl v29.2d,v10.2d,#16 + ext v29.16b,v29.16b,v29.16b,#8 + umlal v13.2d,v28.2s,v0.s[3] + add v29.2d,v29.2d,v10.2d + umlal v6.2d,v28.2s,v1.s[0] + mul v29.2s,v29.2s,v30.2s + umlal v7.2d,v28.2s,v1.s[1] + st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+4] + umlal v8.2d,v28.2s,v1.s[2] + uxtl v29.4s,v29.4h + umlal v9.2d,v28.2s,v1.s[3] + ldr s28,[x2],#4 // *b++ + umlal v10.2d,v29.2s,v2.s[0] + umlal v11.2d,v29.2s,v2.s[1] + uxtl v28.4s,v28.4h + umlal v12.2d,v29.2s,v2.s[2] + ushr v15.2d,v10.2d,#16 + umlal v13.2d,v29.2s,v2.s[3] + umlal v6.2d,v29.2s,v3.s[0] + ext v10.16b,v10.16b,v10.16b,#8 + add v10.2d,v10.2d,v15.2d + umlal v7.2d,v29.2s,v3.s[1] + ushr v10.2d,v10.2d,#16 + umlal v8.2d,v29.2s,v3.s[2] + umlal v9.2d,v29.2s,v3.s[3] + add v16.2d,v11.2d,v10.2d + ins v11.d[0],v16.d[0] + st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+4] + umlal v11.2d,v28.2s,v0.s[0] + ld1 {v10.2d},[x6],#16 + umlal v12.2d,v28.2s,v0.s[1] + umlal v13.2d,v28.2s,v0.s[2] + shl v29.2d,v11.2d,#16 + ext v29.16b,v29.16b,v29.16b,#8 + umlal v6.2d,v28.2s,v0.s[3] + add v29.2d,v29.2d,v11.2d + umlal v7.2d,v28.2s,v1.s[0] + mul v29.2s,v29.2s,v30.2s + umlal v8.2d,v28.2s,v1.s[1] + st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+5] + umlal v9.2d,v28.2s,v1.s[2] + uxtl v29.4s,v29.4h + umlal v10.2d,v28.2s,v1.s[3] + ldr s28,[x2],#4 // *b++ + umlal v11.2d,v29.2s,v2.s[0] + umlal v12.2d,v29.2s,v2.s[1] + uxtl v28.4s,v28.4h + umlal v13.2d,v29.2s,v2.s[2] + ushr v15.2d,v11.2d,#16 + umlal v6.2d,v29.2s,v2.s[3] + umlal v7.2d,v29.2s,v3.s[0] + ext v11.16b,v11.16b,v11.16b,#8 + add v11.2d,v11.2d,v15.2d + umlal v8.2d,v29.2s,v3.s[1] + ushr v11.2d,v11.2d,#16 + umlal v9.2d,v29.2s,v3.s[2] + umlal v10.2d,v29.2s,v3.s[3] + add v16.2d,v12.2d,v11.2d + ins v12.d[0],v16.d[0] + st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+5] + umlal v12.2d,v28.2s,v0.s[0] + ld1 {v11.2d},[x6],#16 + umlal v13.2d,v28.2s,v0.s[1] + umlal v6.2d,v28.2s,v0.s[2] + shl v29.2d,v12.2d,#16 + ext v29.16b,v29.16b,v29.16b,#8 + umlal v7.2d,v28.2s,v0.s[3] + add v29.2d,v29.2d,v12.2d + umlal v8.2d,v28.2s,v1.s[0] + mul v29.2s,v29.2s,v30.2s + umlal v9.2d,v28.2s,v1.s[1] + st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+6] + umlal v10.2d,v28.2s,v1.s[2] + uxtl v29.4s,v29.4h + umlal v11.2d,v28.2s,v1.s[3] + ldr s28,[x2],#4 // *b++ + umlal v12.2d,v29.2s,v2.s[0] + umlal v13.2d,v29.2s,v2.s[1] + uxtl v28.4s,v28.4h + umlal v6.2d,v29.2s,v2.s[2] + ushr v15.2d,v12.2d,#16 + umlal v7.2d,v29.2s,v2.s[3] + umlal v8.2d,v29.2s,v3.s[0] + ext v12.16b,v12.16b,v12.16b,#8 + add v12.2d,v12.2d,v15.2d + umlal v9.2d,v29.2s,v3.s[1] + ushr v12.2d,v12.2d,#16 + umlal v10.2d,v29.2s,v3.s[2] + umlal v11.2d,v29.2s,v3.s[3] + add v16.2d,v13.2d,v12.2d + ins v13.d[0],v16.d[0] + st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+6] + umlal v13.2d,v28.2s,v0.s[0] + ld1 {v12.2d},[x6],#16 + umlal v6.2d,v28.2s,v0.s[1] + umlal v7.2d,v28.2s,v0.s[2] + shl v29.2d,v13.2d,#16 + ext v29.16b,v29.16b,v29.16b,#8 + umlal v8.2d,v28.2s,v0.s[3] + add v29.2d,v29.2d,v13.2d + umlal v9.2d,v28.2s,v1.s[0] + mul v29.2s,v29.2s,v30.2s + umlal v10.2d,v28.2s,v1.s[1] + st1 {v28.2s},[x10],#8 // put aside smashed b[8*i+7] + umlal v11.2d,v28.2s,v1.s[2] + uxtl v29.4s,v29.4h + umlal v12.2d,v28.2s,v1.s[3] + ld1 {v28.2s},[sp] // pull smashed b[8*i+0] + umlal v13.2d,v29.2s,v2.s[0] + ld1 {v0.4s,v1.4s},[x1],#32 + umlal v6.2d,v29.2s,v2.s[1] + umlal v7.2d,v29.2s,v2.s[2] + mov v5.16b,v13.16b + ushr v5.2d,v5.2d,#16 + ext v13.16b,v13.16b,v13.16b,#8 + umlal v8.2d,v29.2s,v2.s[3] + umlal v9.2d,v29.2s,v3.s[0] + add v13.2d,v13.2d,v5.2d + umlal v10.2d,v29.2s,v3.s[1] + ushr v13.2d,v13.2d,#16 + eor v15.16b,v15.16b,v15.16b + ins v13.d[1],v15.d[0] + umlal v11.2d,v29.2s,v3.s[2] + umlal v12.2d,v29.2s,v3.s[3] + add v6.2d,v6.2d,v13.2d + st1 {v29.2s},[x10],#8 // put aside smashed m[8*i+7] + add x10,sp,#8 // rewind + sub x8,x5,#8 + b .LNEON_8n_inner + +.align 4 +.LNEON_8n_inner: + subs x8,x8,#8 + umlal v6.2d,v28.2s,v0.s[0] + ld1 {v13.2d},[x6] + umlal v7.2d,v28.2s,v0.s[1] + ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+0] + umlal v8.2d,v28.2s,v0.s[2] + ld1 {v2.4s,v3.4s},[x3],#32 + umlal v9.2d,v28.2s,v0.s[3] + b.eq .LInner_jump + add x6,x6,#16 // don't advance in last iteration +.LInner_jump: + umlal v10.2d,v28.2s,v1.s[0] + umlal v11.2d,v28.2s,v1.s[1] + umlal v12.2d,v28.2s,v1.s[2] + umlal v13.2d,v28.2s,v1.s[3] + ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+1] + umlal v6.2d,v29.2s,v2.s[0] + umlal v7.2d,v29.2s,v2.s[1] + umlal v8.2d,v29.2s,v2.s[2] + umlal v9.2d,v29.2s,v2.s[3] + umlal v10.2d,v29.2s,v3.s[0] + umlal v11.2d,v29.2s,v3.s[1] + umlal v12.2d,v29.2s,v3.s[2] + umlal v13.2d,v29.2s,v3.s[3] + st1 {v6.2d},[x7],#16 + umlal v7.2d,v28.2s,v0.s[0] + ld1 {v6.2d},[x6] + umlal v8.2d,v28.2s,v0.s[1] + ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+1] + umlal v9.2d,v28.2s,v0.s[2] + b.eq .LInner_jump1 + add x6,x6,#16 // don't advance in last iteration +.LInner_jump1: + umlal v10.2d,v28.2s,v0.s[3] + umlal v11.2d,v28.2s,v1.s[0] + umlal v12.2d,v28.2s,v1.s[1] + umlal v13.2d,v28.2s,v1.s[2] + umlal v6.2d,v28.2s,v1.s[3] + ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+2] + umlal v7.2d,v29.2s,v2.s[0] + umlal v8.2d,v29.2s,v2.s[1] + umlal v9.2d,v29.2s,v2.s[2] + umlal v10.2d,v29.2s,v2.s[3] + umlal v11.2d,v29.2s,v3.s[0] + umlal v12.2d,v29.2s,v3.s[1] + umlal v13.2d,v29.2s,v3.s[2] + umlal v6.2d,v29.2s,v3.s[3] + st1 {v7.2d},[x7],#16 + umlal v8.2d,v28.2s,v0.s[0] + ld1 {v7.2d},[x6] + umlal v9.2d,v28.2s,v0.s[1] + ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+2] + umlal v10.2d,v28.2s,v0.s[2] + b.eq .LInner_jump2 + add x6,x6,#16 // don't advance in last iteration +.LInner_jump2: + umlal v11.2d,v28.2s,v0.s[3] + umlal v12.2d,v28.2s,v1.s[0] + umlal v13.2d,v28.2s,v1.s[1] + umlal v6.2d,v28.2s,v1.s[2] + umlal v7.2d,v28.2s,v1.s[3] + ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+3] + umlal v8.2d,v29.2s,v2.s[0] + umlal v9.2d,v29.2s,v2.s[1] + umlal v10.2d,v29.2s,v2.s[2] + umlal v11.2d,v29.2s,v2.s[3] + umlal v12.2d,v29.2s,v3.s[0] + umlal v13.2d,v29.2s,v3.s[1] + umlal v6.2d,v29.2s,v3.s[2] + umlal v7.2d,v29.2s,v3.s[3] + st1 {v8.2d},[x7],#16 + umlal v9.2d,v28.2s,v0.s[0] + ld1 {v8.2d},[x6] + umlal v10.2d,v28.2s,v0.s[1] + ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+3] + umlal v11.2d,v28.2s,v0.s[2] + b.eq .LInner_jump3 + add x6,x6,#16 // don't advance in last iteration +.LInner_jump3: + umlal v12.2d,v28.2s,v0.s[3] + umlal v13.2d,v28.2s,v1.s[0] + umlal v6.2d,v28.2s,v1.s[1] + umlal v7.2d,v28.2s,v1.s[2] + umlal v8.2d,v28.2s,v1.s[3] + ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+4] + umlal v9.2d,v29.2s,v2.s[0] + umlal v10.2d,v29.2s,v2.s[1] + umlal v11.2d,v29.2s,v2.s[2] + umlal v12.2d,v29.2s,v2.s[3] + umlal v13.2d,v29.2s,v3.s[0] + umlal v6.2d,v29.2s,v3.s[1] + umlal v7.2d,v29.2s,v3.s[2] + umlal v8.2d,v29.2s,v3.s[3] + st1 {v9.2d},[x7],#16 + umlal v10.2d,v28.2s,v0.s[0] + ld1 {v9.2d},[x6] + umlal v11.2d,v28.2s,v0.s[1] + ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+4] + umlal v12.2d,v28.2s,v0.s[2] + b.eq .LInner_jump4 + add x6,x6,#16 // don't advance in last iteration +.LInner_jump4: + umlal v13.2d,v28.2s,v0.s[3] + umlal v6.2d,v28.2s,v1.s[0] + umlal v7.2d,v28.2s,v1.s[1] + umlal v8.2d,v28.2s,v1.s[2] + umlal v9.2d,v28.2s,v1.s[3] + ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+5] + umlal v10.2d,v29.2s,v2.s[0] + umlal v11.2d,v29.2s,v2.s[1] + umlal v12.2d,v29.2s,v2.s[2] + umlal v13.2d,v29.2s,v2.s[3] + umlal v6.2d,v29.2s,v3.s[0] + umlal v7.2d,v29.2s,v3.s[1] + umlal v8.2d,v29.2s,v3.s[2] + umlal v9.2d,v29.2s,v3.s[3] + st1 {v10.2d},[x7],#16 + umlal v11.2d,v28.2s,v0.s[0] + ld1 {v10.2d},[x6] + umlal v12.2d,v28.2s,v0.s[1] + ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+5] + umlal v13.2d,v28.2s,v0.s[2] + b.eq .LInner_jump5 + add x6,x6,#16 // don't advance in last iteration +.LInner_jump5: + umlal v6.2d,v28.2s,v0.s[3] + umlal v7.2d,v28.2s,v1.s[0] + umlal v8.2d,v28.2s,v1.s[1] + umlal v9.2d,v28.2s,v1.s[2] + umlal v10.2d,v28.2s,v1.s[3] + ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+6] + umlal v11.2d,v29.2s,v2.s[0] + umlal v12.2d,v29.2s,v2.s[1] + umlal v13.2d,v29.2s,v2.s[2] + umlal v6.2d,v29.2s,v2.s[3] + umlal v7.2d,v29.2s,v3.s[0] + umlal v8.2d,v29.2s,v3.s[1] + umlal v9.2d,v29.2s,v3.s[2] + umlal v10.2d,v29.2s,v3.s[3] + st1 {v11.2d},[x7],#16 + umlal v12.2d,v28.2s,v0.s[0] + ld1 {v11.2d},[x6] + umlal v13.2d,v28.2s,v0.s[1] + ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+6] + umlal v6.2d,v28.2s,v0.s[2] + b.eq .LInner_jump6 + add x6,x6,#16 // don't advance in last iteration +.LInner_jump6: + umlal v7.2d,v28.2s,v0.s[3] + umlal v8.2d,v28.2s,v1.s[0] + umlal v9.2d,v28.2s,v1.s[1] + umlal v10.2d,v28.2s,v1.s[2] + umlal v11.2d,v28.2s,v1.s[3] + ld1 {v28.2s},[x10],#8 // pull smashed b[8*i+7] + umlal v12.2d,v29.2s,v2.s[0] + umlal v13.2d,v29.2s,v2.s[1] + umlal v6.2d,v29.2s,v2.s[2] + umlal v7.2d,v29.2s,v2.s[3] + umlal v8.2d,v29.2s,v3.s[0] + umlal v9.2d,v29.2s,v3.s[1] + umlal v10.2d,v29.2s,v3.s[2] + umlal v11.2d,v29.2s,v3.s[3] + st1 {v12.2d},[x7],#16 + umlal v13.2d,v28.2s,v0.s[0] + ld1 {v12.2d},[x6] + umlal v6.2d,v28.2s,v0.s[1] + ld1 {v29.2s},[x10],#8 // pull smashed m[8*i+7] + umlal v7.2d,v28.2s,v0.s[2] + b.eq .LInner_jump7 + add x6,x6,#16 // don't advance in last iteration +.LInner_jump7: + umlal v8.2d,v28.2s,v0.s[3] + umlal v9.2d,v28.2s,v1.s[0] + umlal v10.2d,v28.2s,v1.s[1] + umlal v11.2d,v28.2s,v1.s[2] + umlal v12.2d,v28.2s,v1.s[3] + b.ne .LInner_after_rewind8 + sub x1,x1,x5,lsl#2 // rewind +.LInner_after_rewind8: + umlal v13.2d,v29.2s,v2.s[0] + ld1 {v28.2s},[sp] // pull smashed b[8*i+0] + umlal v6.2d,v29.2s,v2.s[1] + ld1 {v0.4s,v1.4s},[x1],#32 + umlal v7.2d,v29.2s,v2.s[2] + add x10,sp,#8 // rewind + umlal v8.2d,v29.2s,v2.s[3] + umlal v9.2d,v29.2s,v3.s[0] + umlal v10.2d,v29.2s,v3.s[1] + umlal v11.2d,v29.2s,v3.s[2] + st1 {v13.2d},[x7],#16 + umlal v12.2d,v29.2s,v3.s[3] + + bne .LNEON_8n_inner + add x6,sp,#128 + st1 {v6.2d,v7.2d},[x7],#32 + eor v2.16b,v2.16b,v2.16b // v2 + st1 {v8.2d,v9.2d},[x7],#32 + eor v3.16b,v3.16b,v3.16b // v3 + st1 {v10.2d,v11.2d},[x7],#32 + st1 {v12.2d},[x7] + + subs x9,x9,#8 + ld1 {v6.2d,v7.2d},[x6],#32 + ld1 {v8.2d,v9.2d},[x6],#32 + ld1 {v10.2d,v11.2d},[x6],#32 + ld1 {v12.2d,v13.2d},[x6],#32 + + b.eq .LInner_8n_jump_2steps + sub x3,x3,x5,lsl#2 // rewind + b .LNEON_8n_outer + +.LInner_8n_jump_2steps: + add x7,sp,#128 + st1 {v2.2d,v3.2d}, [sp],#32 // start wiping stack frame + mov v5.16b,v6.16b + ushr v15.2d,v6.2d,#16 + ext v6.16b,v6.16b,v6.16b,#8 + st1 {v2.2d,v3.2d}, [sp],#32 + add v6.2d,v6.2d,v15.2d + st1 {v2.2d,v3.2d}, [sp],#32 + ushr v15.2d,v6.2d,#16 + st1 {v2.2d,v3.2d}, [sp],#32 + zip1 v6.4h,v5.4h,v6.4h + ins v15.d[1],v14.d[0] + + mov x8,x5 + b .LNEON_tail_entry + +.align 4 +.LNEON_tail: + add v6.2d,v6.2d,v15.2d + mov v5.16b,v6.16b + ushr v15.2d,v6.2d,#16 + ext v6.16b,v6.16b,v6.16b,#8 + ld1 {v8.2d,v9.2d}, [x6],#32 + add v6.2d,v6.2d,v15.2d + ld1 {v10.2d,v11.2d}, [x6],#32 + ushr v15.2d,v6.2d,#16 + ld1 {v12.2d,v13.2d}, [x6],#32 + zip1 v6.4h,v5.4h,v6.4h + ins v15.d[1],v14.d[0] + +.LNEON_tail_entry: + add v7.2d,v7.2d,v15.2d + st1 {v6.s}[0], [x7],#4 + ushr v15.2d,v7.2d,#16 + mov v5.16b,v7.16b + ext v7.16b,v7.16b,v7.16b,#8 + add v7.2d,v7.2d,v15.2d + ushr v15.2d,v7.2d,#16 + zip1 v7.4h,v5.4h,v7.4h + ins v15.d[1],v14.d[0] + add v8.2d,v8.2d,v15.2d + st1 {v7.s}[0], [x7],#4 + ushr v15.2d,v8.2d,#16 + mov v5.16b,v8.16b + ext v8.16b,v8.16b,v8.16b,#8 + add v8.2d,v8.2d,v15.2d + ushr v15.2d,v8.2d,#16 + zip1 v8.4h,v5.4h,v8.4h + ins v15.d[1],v14.d[0] + add v9.2d,v9.2d,v15.2d + st1 {v8.s}[0], [x7],#4 + ushr v15.2d,v9.2d,#16 + mov v5.16b,v9.16b + ext v9.16b,v9.16b,v9.16b,#8 + add v9.2d,v9.2d,v15.2d + ushr v15.2d,v9.2d,#16 + zip1 v9.4h,v5.4h,v9.4h + ins v15.d[1],v14.d[0] + add v10.2d,v10.2d,v15.2d + st1 {v9.s}[0], [x7],#4 + ushr v15.2d,v10.2d,#16 + mov v5.16b,v10.16b + ext v10.16b,v10.16b,v10.16b,#8 + add v10.2d,v10.2d,v15.2d + ushr v15.2d,v10.2d,#16 + zip1 v10.4h,v5.4h,v10.4h + ins v15.d[1],v14.d[0] + add v11.2d,v11.2d,v15.2d + st1 {v10.s}[0], [x7],#4 + ushr v15.2d,v11.2d,#16 + mov v5.16b,v11.16b + ext v11.16b,v11.16b,v11.16b,#8 + add v11.2d,v11.2d,v15.2d + ushr v15.2d,v11.2d,#16 + zip1 v11.4h,v5.4h,v11.4h + ins v15.d[1],v14.d[0] + add v12.2d,v12.2d,v15.2d + st1 {v11.s}[0], [x7],#4 + ushr v15.2d,v12.2d,#16 + mov v5.16b,v12.16b + ext v12.16b,v12.16b,v12.16b,#8 + add v12.2d,v12.2d,v15.2d + ushr v15.2d,v12.2d,#16 + zip1 v12.4h,v5.4h,v12.4h + ins v15.d[1],v14.d[0] + add v13.2d,v13.2d,v15.2d + st1 {v12.s}[0], [x7],#4 + ushr v15.2d,v13.2d,#16 + mov v5.16b,v13.16b + ext v13.16b,v13.16b,v13.16b,#8 + add v13.2d,v13.2d,v15.2d + ushr v15.2d,v13.2d,#16 + zip1 v13.4h,v5.4h,v13.4h + ins v15.d[1],v14.d[0] + ld1 {v6.2d,v7.2d}, [x6],#32 + subs x8,x8,#8 + st1 {v13.s}[0], [x7],#4 + bne .LNEON_tail + + st1 {v15.s}[0], [x7],#4 // top-most bit + sub x3,x3,x5,lsl#2 // rewind x3 + subs x1,sp,#0 // clear carry flag + add x2,sp,x5,lsl#2 + +.LNEON_sub: + ldp w4,w5,[x1],#8 + ldp w6,w7,[x1],#8 + ldp w8,w9,[x3],#8 + ldp w10,w11,[x3],#8 + sbcs w8,w4,w8 + sbcs w9,w5,w9 + sbcs w10,w6,w10 + sbcs w11,w7,w11 + sub x17,x2,x1 + stp w8,w9,[x0],#8 + stp w10,w11,[x0],#8 + cbnz x17,.LNEON_sub + + ldr w10, [x1] // load top-most bit + mov x11,sp + eor v0.16b,v0.16b,v0.16b + sub x11,x2,x11 // this is num*4 + eor v1.16b,v1.16b,v1.16b + mov x1,sp + sub x0,x0,x11 // rewind x0 + mov x3,x2 // second 3/4th of frame + sbcs w10,w10,wzr // result is carry flag + +.LNEON_copy_n_zap: + ldp w4,w5,[x1],#8 + ldp w6,w7,[x1],#8 + ldp w8,w9,[x0],#8 + ldp w10,w11,[x0] + sub x0,x0,#8 + b.cs .LCopy_1 + mov w8,w4 + mov w9,w5 + mov w10,w6 + mov w11,w7 +.LCopy_1: + st1 {v0.2d,v1.2d}, [x3],#32 // wipe + st1 {v0.2d,v1.2d}, [x3],#32 // wipe + ldp w4,w5,[x1],#8 + ldp w6,w7,[x1],#8 + stp w8,w9,[x0],#8 + stp w10,w11,[x0],#8 + sub x1,x1,#32 + ldp w8,w9,[x0],#8 + ldp w10,w11,[x0] + sub x0,x0,#8 + b.cs .LCopy_2 + mov w8, w4 + mov w9, w5 + mov w10, w6 + mov w11, w7 +.LCopy_2: + st1 {v0.2d,v1.2d}, [x1],#32 // wipe + st1 {v0.2d,v1.2d}, [x3],#32 // wipe + sub x17,x2,x1 // preserves carry + stp w8,w9,[x0],#8 + stp w10,w11,[x0],#8 + cbnz x17,.LNEON_copy_n_zap + + mov sp,x16 + ldp d14,d15,[sp,#64] + ldp d12,d13,[sp,#48] + ldp d10,d11,[sp,#32] + ldp d8,d9,[sp,#16] + ldr x29,[sp],#80 + AARCH64_VALIDATE_LINK_REGISTER + ret // bx lr + +.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon +.type __bn_sqr8x_mont,%function +.align 5 +__bn_sqr8x_mont: + cmp x1,x2 + b.ne __bn_mul4x_mont +.Lsqr8x_mont: + // Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to + // only from bn_mul_mont which has already signed the return address. + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + stp x0,x3,[sp,#96] // offload rp and np + + ldp x6,x7,[x1,#8*0] + ldp x8,x9,[x1,#8*2] + ldp x10,x11,[x1,#8*4] + ldp x12,x13,[x1,#8*6] + + sub x2,sp,x5,lsl#4 + lsl x5,x5,#3 + ldr x4,[x4] // *n0 + mov sp,x2 // alloca + sub x27,x5,#8*8 + b .Lsqr8x_zero_start + +.Lsqr8x_zero: + sub x27,x27,#8*8 + stp xzr,xzr,[x2,#8*0] + stp xzr,xzr,[x2,#8*2] + stp xzr,xzr,[x2,#8*4] + stp xzr,xzr,[x2,#8*6] +.Lsqr8x_zero_start: + stp xzr,xzr,[x2,#8*8] + stp xzr,xzr,[x2,#8*10] + stp xzr,xzr,[x2,#8*12] + stp xzr,xzr,[x2,#8*14] + add x2,x2,#8*16 + cbnz x27,.Lsqr8x_zero + + add x3,x1,x5 + add x1,x1,#8*8 + mov x19,xzr + mov x20,xzr + mov x21,xzr + mov x22,xzr + mov x23,xzr + mov x24,xzr + mov x25,xzr + mov x26,xzr + mov x2,sp + str x4,[x29,#112] // offload n0 + + // Multiply everything but a[i]*a[i] +.align 4 +.Lsqr8x_outer_loop: + // a[1]a[0] (i) + // a[2]a[0] + // a[3]a[0] + // a[4]a[0] + // a[5]a[0] + // a[6]a[0] + // a[7]a[0] + // a[2]a[1] (ii) + // a[3]a[1] + // a[4]a[1] + // a[5]a[1] + // a[6]a[1] + // a[7]a[1] + // a[3]a[2] (iii) + // a[4]a[2] + // a[5]a[2] + // a[6]a[2] + // a[7]a[2] + // a[4]a[3] (iv) + // a[5]a[3] + // a[6]a[3] + // a[7]a[3] + // a[5]a[4] (v) + // a[6]a[4] + // a[7]a[4] + // a[6]a[5] (vi) + // a[7]a[5] + // a[7]a[6] (vii) + + mul x14,x7,x6 // lo(a[1..7]*a[0]) (i) + mul x15,x8,x6 + mul x16,x9,x6 + mul x17,x10,x6 + adds x20,x20,x14 // t[1]+lo(a[1]*a[0]) + mul x14,x11,x6 + adcs x21,x21,x15 + mul x15,x12,x6 + adcs x22,x22,x16 + mul x16,x13,x6 + adcs x23,x23,x17 + umulh x17,x7,x6 // hi(a[1..7]*a[0]) + adcs x24,x24,x14 + umulh x14,x8,x6 + adcs x25,x25,x15 + umulh x15,x9,x6 + adcs x26,x26,x16 + umulh x16,x10,x6 + stp x19,x20,[x2],#8*2 // t[0..1] + adc x19,xzr,xzr // t[8] + adds x21,x21,x17 // t[2]+lo(a[1]*a[0]) + umulh x17,x11,x6 + adcs x22,x22,x14 + umulh x14,x12,x6 + adcs x23,x23,x15 + umulh x15,x13,x6 + adcs x24,x24,x16 + mul x16,x8,x7 // lo(a[2..7]*a[1]) (ii) + adcs x25,x25,x17 + mul x17,x9,x7 + adcs x26,x26,x14 + mul x14,x10,x7 + adc x19,x19,x15 + + mul x15,x11,x7 + adds x22,x22,x16 + mul x16,x12,x7 + adcs x23,x23,x17 + mul x17,x13,x7 + adcs x24,x24,x14 + umulh x14,x8,x7 // hi(a[2..7]*a[1]) + adcs x25,x25,x15 + umulh x15,x9,x7 + adcs x26,x26,x16 + umulh x16,x10,x7 + adcs x19,x19,x17 + umulh x17,x11,x7 + stp x21,x22,[x2],#8*2 // t[2..3] + adc x20,xzr,xzr // t[9] + adds x23,x23,x14 + umulh x14,x12,x7 + adcs x24,x24,x15 + umulh x15,x13,x7 + adcs x25,x25,x16 + mul x16,x9,x8 // lo(a[3..7]*a[2]) (iii) + adcs x26,x26,x17 + mul x17,x10,x8 + adcs x19,x19,x14 + mul x14,x11,x8 + adc x20,x20,x15 + + mul x15,x12,x8 + adds x24,x24,x16 + mul x16,x13,x8 + adcs x25,x25,x17 + umulh x17,x9,x8 // hi(a[3..7]*a[2]) + adcs x26,x26,x14 + umulh x14,x10,x8 + adcs x19,x19,x15 + umulh x15,x11,x8 + adcs x20,x20,x16 + umulh x16,x12,x8 + stp x23,x24,[x2],#8*2 // t[4..5] + adc x21,xzr,xzr // t[10] + adds x25,x25,x17 + umulh x17,x13,x8 + adcs x26,x26,x14 + mul x14,x10,x9 // lo(a[4..7]*a[3]) (iv) + adcs x19,x19,x15 + mul x15,x11,x9 + adcs x20,x20,x16 + mul x16,x12,x9 + adc x21,x21,x17 + + mul x17,x13,x9 + adds x26,x26,x14 + umulh x14,x10,x9 // hi(a[4..7]*a[3]) + adcs x19,x19,x15 + umulh x15,x11,x9 + adcs x20,x20,x16 + umulh x16,x12,x9 + adcs x21,x21,x17 + umulh x17,x13,x9 + stp x25,x26,[x2],#8*2 // t[6..7] + adc x22,xzr,xzr // t[11] + adds x19,x19,x14 + mul x14,x11,x10 // lo(a[5..7]*a[4]) (v) + adcs x20,x20,x15 + mul x15,x12,x10 + adcs x21,x21,x16 + mul x16,x13,x10 + adc x22,x22,x17 + + umulh x17,x11,x10 // hi(a[5..7]*a[4]) + adds x20,x20,x14 + umulh x14,x12,x10 + adcs x21,x21,x15 + umulh x15,x13,x10 + adcs x22,x22,x16 + mul x16,x12,x11 // lo(a[6..7]*a[5]) (vi) + adc x23,xzr,xzr // t[12] + adds x21,x21,x17 + mul x17,x13,x11 + adcs x22,x22,x14 + umulh x14,x12,x11 // hi(a[6..7]*a[5]) + adc x23,x23,x15 + + umulh x15,x13,x11 + adds x22,x22,x16 + mul x16,x13,x12 // lo(a[7]*a[6]) (vii) + adcs x23,x23,x17 + umulh x17,x13,x12 // hi(a[7]*a[6]) + adc x24,xzr,xzr // t[13] + adds x23,x23,x14 + sub x27,x3,x1 // done yet? + adc x24,x24,x15 + + adds x24,x24,x16 + sub x14,x3,x5 // rewinded ap + adc x25,xzr,xzr // t[14] + add x25,x25,x17 + + cbz x27,.Lsqr8x_outer_break + + mov x4,x6 + ldp x6,x7,[x2,#8*0] + ldp x8,x9,[x2,#8*2] + ldp x10,x11,[x2,#8*4] + ldp x12,x13,[x2,#8*6] + adds x19,x19,x6 + adcs x20,x20,x7 + ldp x6,x7,[x1,#8*0] + adcs x21,x21,x8 + adcs x22,x22,x9 + ldp x8,x9,[x1,#8*2] + adcs x23,x23,x10 + adcs x24,x24,x11 + ldp x10,x11,[x1,#8*4] + adcs x25,x25,x12 + mov x0,x1 + adcs x26,xzr,x13 + ldp x12,x13,[x1,#8*6] + add x1,x1,#8*8 + //adc x28,xzr,xzr // moved below + mov x27,#-8*8 + + // a[8]a[0] + // a[9]a[0] + // a[a]a[0] + // a[b]a[0] + // a[c]a[0] + // a[d]a[0] + // a[e]a[0] + // a[f]a[0] + // a[8]a[1] + // a[f]a[1]........................ + // a[8]a[2] + // a[f]a[2]........................ + // a[8]a[3] + // a[f]a[3]........................ + // a[8]a[4] + // a[f]a[4]........................ + // a[8]a[5] + // a[f]a[5]........................ + // a[8]a[6] + // a[f]a[6]........................ + // a[8]a[7] + // a[f]a[7]........................ +.Lsqr8x_mul: + mul x14,x6,x4 + adc x28,xzr,xzr // carry bit, modulo-scheduled + mul x15,x7,x4 + add x27,x27,#8 + mul x16,x8,x4 + mul x17,x9,x4 + adds x19,x19,x14 + mul x14,x10,x4 + adcs x20,x20,x15 + mul x15,x11,x4 + adcs x21,x21,x16 + mul x16,x12,x4 + adcs x22,x22,x17 + mul x17,x13,x4 + adcs x23,x23,x14 + umulh x14,x6,x4 + adcs x24,x24,x15 + umulh x15,x7,x4 + adcs x25,x25,x16 + umulh x16,x8,x4 + adcs x26,x26,x17 + umulh x17,x9,x4 + adc x28,x28,xzr + str x19,[x2],#8 + adds x19,x20,x14 + umulh x14,x10,x4 + adcs x20,x21,x15 + umulh x15,x11,x4 + adcs x21,x22,x16 + umulh x16,x12,x4 + adcs x22,x23,x17 + umulh x17,x13,x4 + ldr x4,[x0,x27] + adcs x23,x24,x14 + adcs x24,x25,x15 + adcs x25,x26,x16 + adcs x26,x28,x17 + //adc x28,xzr,xzr // moved above + cbnz x27,.Lsqr8x_mul + // note that carry flag is guaranteed + // to be zero at this point + cmp x1,x3 // done yet? + b.eq .Lsqr8x_break + + ldp x6,x7,[x2,#8*0] + ldp x8,x9,[x2,#8*2] + ldp x10,x11,[x2,#8*4] + ldp x12,x13,[x2,#8*6] + adds x19,x19,x6 + ldur x4,[x0,#-8*8] + adcs x20,x20,x7 + ldp x6,x7,[x1,#8*0] + adcs x21,x21,x8 + adcs x22,x22,x9 + ldp x8,x9,[x1,#8*2] + adcs x23,x23,x10 + adcs x24,x24,x11 + ldp x10,x11,[x1,#8*4] + adcs x25,x25,x12 + mov x27,#-8*8 + adcs x26,x26,x13 + ldp x12,x13,[x1,#8*6] + add x1,x1,#8*8 + //adc x28,xzr,xzr // moved above + b .Lsqr8x_mul + +.align 4 +.Lsqr8x_break: + ldp x6,x7,[x0,#8*0] + add x1,x0,#8*8 + ldp x8,x9,[x0,#8*2] + sub x14,x3,x1 // is it last iteration? + ldp x10,x11,[x0,#8*4] + sub x15,x2,x14 + ldp x12,x13,[x0,#8*6] + cbz x14,.Lsqr8x_outer_loop + + stp x19,x20,[x2,#8*0] + ldp x19,x20,[x15,#8*0] + stp x21,x22,[x2,#8*2] + ldp x21,x22,[x15,#8*2] + stp x23,x24,[x2,#8*4] + ldp x23,x24,[x15,#8*4] + stp x25,x26,[x2,#8*6] + mov x2,x15 + ldp x25,x26,[x15,#8*6] + b .Lsqr8x_outer_loop + +.align 4 +.Lsqr8x_outer_break: + // Now multiply above result by 2 and add a[n-1]*a[n-1]|...|a[0]*a[0] + ldp x7,x9,[x14,#8*0] // recall that x14 is &a[0] + ldp x15,x16,[sp,#8*1] + ldp x11,x13,[x14,#8*2] + add x1,x14,#8*4 + ldp x17,x14,[sp,#8*3] + + stp x19,x20,[x2,#8*0] + mul x19,x7,x7 + stp x21,x22,[x2,#8*2] + umulh x7,x7,x7 + stp x23,x24,[x2,#8*4] + mul x8,x9,x9 + stp x25,x26,[x2,#8*6] + mov x2,sp + umulh x9,x9,x9 + adds x20,x7,x15,lsl#1 + extr x15,x16,x15,#63 + sub x27,x5,#8*4 + +.Lsqr4x_shift_n_add: + adcs x21,x8,x15 + extr x16,x17,x16,#63 + sub x27,x27,#8*4 + adcs x22,x9,x16 + ldp x15,x16,[x2,#8*5] + mul x10,x11,x11 + ldp x7,x9,[x1],#8*2 + umulh x11,x11,x11 + mul x12,x13,x13 + umulh x13,x13,x13 + extr x17,x14,x17,#63 + stp x19,x20,[x2,#8*0] + adcs x23,x10,x17 + extr x14,x15,x14,#63 + stp x21,x22,[x2,#8*2] + adcs x24,x11,x14 + ldp x17,x14,[x2,#8*7] + extr x15,x16,x15,#63 + adcs x25,x12,x15 + extr x16,x17,x16,#63 + adcs x26,x13,x16 + ldp x15,x16,[x2,#8*9] + mul x6,x7,x7 + ldp x11,x13,[x1],#8*2 + umulh x7,x7,x7 + mul x8,x9,x9 + umulh x9,x9,x9 + stp x23,x24,[x2,#8*4] + extr x17,x14,x17,#63 + stp x25,x26,[x2,#8*6] + add x2,x2,#8*8 + adcs x19,x6,x17 + extr x14,x15,x14,#63 + adcs x20,x7,x14 + ldp x17,x14,[x2,#8*3] + extr x15,x16,x15,#63 + cbnz x27,.Lsqr4x_shift_n_add + ldp x1,x4,[x29,#104] // pull np and n0 + + adcs x21,x8,x15 + extr x16,x17,x16,#63 + adcs x22,x9,x16 + ldp x15,x16,[x2,#8*5] + mul x10,x11,x11 + umulh x11,x11,x11 + stp x19,x20,[x2,#8*0] + mul x12,x13,x13 + umulh x13,x13,x13 + stp x21,x22,[x2,#8*2] + extr x17,x14,x17,#63 + adcs x23,x10,x17 + extr x14,x15,x14,#63 + ldp x19,x20,[sp,#8*0] + adcs x24,x11,x14 + extr x15,x16,x15,#63 + ldp x6,x7,[x1,#8*0] + adcs x25,x12,x15 + extr x16,xzr,x16,#63 + ldp x8,x9,[x1,#8*2] + adc x26,x13,x16 + ldp x10,x11,[x1,#8*4] + + // Reduce by 512 bits per iteration + mul x28,x4,x19 // t[0]*n0 + ldp x12,x13,[x1,#8*6] + add x3,x1,x5 + ldp x21,x22,[sp,#8*2] + stp x23,x24,[x2,#8*4] + ldp x23,x24,[sp,#8*4] + stp x25,x26,[x2,#8*6] + ldp x25,x26,[sp,#8*6] + add x1,x1,#8*8 + mov x30,xzr // initial top-most carry + mov x2,sp + mov x27,#8 + +.Lsqr8x_reduction: + // (*) mul x14,x6,x28 // lo(n[0-7])*lo(t[0]*n0) + mul x15,x7,x28 + sub x27,x27,#1 + mul x16,x8,x28 + str x28,[x2],#8 // put aside t[0]*n0 for tail processing + mul x17,x9,x28 + // (*) adds xzr,x19,x14 + subs xzr,x19,#1 // (*) + mul x14,x10,x28 + adcs x19,x20,x15 + mul x15,x11,x28 + adcs x20,x21,x16 + mul x16,x12,x28 + adcs x21,x22,x17 + mul x17,x13,x28 + adcs x22,x23,x14 + umulh x14,x6,x28 // hi(n[0-7])*lo(t[0]*n0) + adcs x23,x24,x15 + umulh x15,x7,x28 + adcs x24,x25,x16 + umulh x16,x8,x28 + adcs x25,x26,x17 + umulh x17,x9,x28 + adc x26,xzr,xzr + adds x19,x19,x14 + umulh x14,x10,x28 + adcs x20,x20,x15 + umulh x15,x11,x28 + adcs x21,x21,x16 + umulh x16,x12,x28 + adcs x22,x22,x17 + umulh x17,x13,x28 + mul x28,x4,x19 // next t[0]*n0 + adcs x23,x23,x14 + adcs x24,x24,x15 + adcs x25,x25,x16 + adc x26,x26,x17 + cbnz x27,.Lsqr8x_reduction + + ldp x14,x15,[x2,#8*0] + ldp x16,x17,[x2,#8*2] + mov x0,x2 + sub x27,x3,x1 // done yet? + adds x19,x19,x14 + adcs x20,x20,x15 + ldp x14,x15,[x2,#8*4] + adcs x21,x21,x16 + adcs x22,x22,x17 + ldp x16,x17,[x2,#8*6] + adcs x23,x23,x14 + adcs x24,x24,x15 + adcs x25,x25,x16 + adcs x26,x26,x17 + //adc x28,xzr,xzr // moved below + cbz x27,.Lsqr8x8_post_condition + + ldur x4,[x2,#-8*8] + ldp x6,x7,[x1,#8*0] + ldp x8,x9,[x1,#8*2] + ldp x10,x11,[x1,#8*4] + mov x27,#-8*8 + ldp x12,x13,[x1,#8*6] + add x1,x1,#8*8 + +.Lsqr8x_tail: + mul x14,x6,x4 + adc x28,xzr,xzr // carry bit, modulo-scheduled + mul x15,x7,x4 + add x27,x27,#8 + mul x16,x8,x4 + mul x17,x9,x4 + adds x19,x19,x14 + mul x14,x10,x4 + adcs x20,x20,x15 + mul x15,x11,x4 + adcs x21,x21,x16 + mul x16,x12,x4 + adcs x22,x22,x17 + mul x17,x13,x4 + adcs x23,x23,x14 + umulh x14,x6,x4 + adcs x24,x24,x15 + umulh x15,x7,x4 + adcs x25,x25,x16 + umulh x16,x8,x4 + adcs x26,x26,x17 + umulh x17,x9,x4 + adc x28,x28,xzr + str x19,[x2],#8 + adds x19,x20,x14 + umulh x14,x10,x4 + adcs x20,x21,x15 + umulh x15,x11,x4 + adcs x21,x22,x16 + umulh x16,x12,x4 + adcs x22,x23,x17 + umulh x17,x13,x4 + ldr x4,[x0,x27] + adcs x23,x24,x14 + adcs x24,x25,x15 + adcs x25,x26,x16 + adcs x26,x28,x17 + //adc x28,xzr,xzr // moved above + cbnz x27,.Lsqr8x_tail + // note that carry flag is guaranteed + // to be zero at this point + ldp x6,x7,[x2,#8*0] + sub x27,x3,x1 // done yet? + sub x16,x3,x5 // rewinded np + ldp x8,x9,[x2,#8*2] + ldp x10,x11,[x2,#8*4] + ldp x12,x13,[x2,#8*6] + cbz x27,.Lsqr8x_tail_break + + ldur x4,[x0,#-8*8] + adds x19,x19,x6 + adcs x20,x20,x7 + ldp x6,x7,[x1,#8*0] + adcs x21,x21,x8 + adcs x22,x22,x9 + ldp x8,x9,[x1,#8*2] + adcs x23,x23,x10 + adcs x24,x24,x11 + ldp x10,x11,[x1,#8*4] + adcs x25,x25,x12 + mov x27,#-8*8 + adcs x26,x26,x13 + ldp x12,x13,[x1,#8*6] + add x1,x1,#8*8 + //adc x28,xzr,xzr // moved above + b .Lsqr8x_tail + +.align 4 +.Lsqr8x_tail_break: + ldr x4,[x29,#112] // pull n0 + add x27,x2,#8*8 // end of current t[num] window + + subs xzr,x30,#1 // "move" top-most carry to carry bit + adcs x14,x19,x6 + adcs x15,x20,x7 + ldp x19,x20,[x0,#8*0] + adcs x21,x21,x8 + ldp x6,x7,[x16,#8*0] // recall that x16 is &n[0] + adcs x22,x22,x9 + ldp x8,x9,[x16,#8*2] + adcs x23,x23,x10 + adcs x24,x24,x11 + ldp x10,x11,[x16,#8*4] + adcs x25,x25,x12 + adcs x26,x26,x13 + ldp x12,x13,[x16,#8*6] + add x1,x16,#8*8 + adc x30,xzr,xzr // top-most carry + mul x28,x4,x19 + stp x14,x15,[x2,#8*0] + stp x21,x22,[x2,#8*2] + ldp x21,x22,[x0,#8*2] + stp x23,x24,[x2,#8*4] + ldp x23,x24,[x0,#8*4] + cmp x27,x29 // did we hit the bottom? + stp x25,x26,[x2,#8*6] + mov x2,x0 // slide the window + ldp x25,x26,[x0,#8*6] + mov x27,#8 + b.ne .Lsqr8x_reduction + + // Final step. We see if result is larger than modulus, and + // if it is, subtract the modulus. But comparison implies + // subtraction. So we subtract modulus, see if it borrowed, + // and conditionally copy original value. + ldr x0,[x29,#96] // pull rp + add x2,x2,#8*8 + subs x14,x19,x6 + sbcs x15,x20,x7 + sub x27,x5,#8*8 + mov x3,x0 // x0 copy + +.Lsqr8x_sub: + sbcs x16,x21,x8 + ldp x6,x7,[x1,#8*0] + sbcs x17,x22,x9 + stp x14,x15,[x0,#8*0] + sbcs x14,x23,x10 + ldp x8,x9,[x1,#8*2] + sbcs x15,x24,x11 + stp x16,x17,[x0,#8*2] + sbcs x16,x25,x12 + ldp x10,x11,[x1,#8*4] + sbcs x17,x26,x13 + ldp x12,x13,[x1,#8*6] + add x1,x1,#8*8 + ldp x19,x20,[x2,#8*0] + sub x27,x27,#8*8 + ldp x21,x22,[x2,#8*2] + ldp x23,x24,[x2,#8*4] + ldp x25,x26,[x2,#8*6] + add x2,x2,#8*8 + stp x14,x15,[x0,#8*4] + sbcs x14,x19,x6 + stp x16,x17,[x0,#8*6] + add x0,x0,#8*8 + sbcs x15,x20,x7 + cbnz x27,.Lsqr8x_sub + + sbcs x16,x21,x8 + mov x2,sp + add x1,sp,x5 + ldp x6,x7,[x3,#8*0] + sbcs x17,x22,x9 + stp x14,x15,[x0,#8*0] + sbcs x14,x23,x10 + ldp x8,x9,[x3,#8*2] + sbcs x15,x24,x11 + stp x16,x17,[x0,#8*2] + sbcs x16,x25,x12 + ldp x19,x20,[x1,#8*0] + sbcs x17,x26,x13 + ldp x21,x22,[x1,#8*2] + sbcs xzr,x30,xzr // did it borrow? + ldr x30,[x29,#8] // pull return address + stp x14,x15,[x0,#8*4] + stp x16,x17,[x0,#8*6] + + sub x27,x5,#8*4 +.Lsqr4x_cond_copy: + sub x27,x27,#8*4 + csel x14,x19,x6,lo + stp xzr,xzr,[x2,#8*0] + csel x15,x20,x7,lo + ldp x6,x7,[x3,#8*4] + ldp x19,x20,[x1,#8*4] + csel x16,x21,x8,lo + stp xzr,xzr,[x2,#8*2] + add x2,x2,#8*4 + csel x17,x22,x9,lo + ldp x8,x9,[x3,#8*6] + ldp x21,x22,[x1,#8*6] + add x1,x1,#8*4 + stp x14,x15,[x3,#8*0] + stp x16,x17,[x3,#8*2] + add x3,x3,#8*4 + stp xzr,xzr,[x1,#8*0] + stp xzr,xzr,[x1,#8*2] + cbnz x27,.Lsqr4x_cond_copy + + csel x14,x19,x6,lo + stp xzr,xzr,[x2,#8*0] + csel x15,x20,x7,lo + stp xzr,xzr,[x2,#8*2] + csel x16,x21,x8,lo + csel x17,x22,x9,lo + stp x14,x15,[x3,#8*0] + stp x16,x17,[x3,#8*2] + + b .Lsqr8x_done + +.align 4 +.Lsqr8x8_post_condition: + adc x28,xzr,xzr + ldr x30,[x29,#8] // pull return address + // x19-7,x28 hold result, x6-7 hold modulus + subs x6,x19,x6 + ldr x1,[x29,#96] // pull rp + sbcs x7,x20,x7 + stp xzr,xzr,[sp,#8*0] + sbcs x8,x21,x8 + stp xzr,xzr,[sp,#8*2] + sbcs x9,x22,x9 + stp xzr,xzr,[sp,#8*4] + sbcs x10,x23,x10 + stp xzr,xzr,[sp,#8*6] + sbcs x11,x24,x11 + stp xzr,xzr,[sp,#8*8] + sbcs x12,x25,x12 + stp xzr,xzr,[sp,#8*10] + sbcs x13,x26,x13 + stp xzr,xzr,[sp,#8*12] + sbcs x28,x28,xzr // did it borrow? + stp xzr,xzr,[sp,#8*14] + + // x6-7 hold result-modulus + csel x6,x19,x6,lo + csel x7,x20,x7,lo + csel x8,x21,x8,lo + csel x9,x22,x9,lo + stp x6,x7,[x1,#8*0] + csel x10,x23,x10,lo + csel x11,x24,x11,lo + stp x8,x9,[x1,#8*2] + csel x12,x25,x12,lo + csel x13,x26,x13,lo + stp x10,x11,[x1,#8*4] + stp x12,x13,[x1,#8*6] + +.Lsqr8x_done: + ldp x19,x20,[x29,#16] + mov sp,x29 + ldp x21,x22,[x29,#32] + mov x0,#1 + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + // x30 is loaded earlier + AARCH64_VALIDATE_LINK_REGISTER + ret +.size __bn_sqr8x_mont,.-__bn_sqr8x_mont +.type __bn_mul4x_mont,%function +.align 5 +__bn_mul4x_mont: + // Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to + // only from bn_mul_mont (or __bn_sqr8x_mont from bn_mul_mont) which has already signed the return address. + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + sub x26,sp,x5,lsl#3 + lsl x5,x5,#3 + ldr x4,[x4] // *n0 + sub sp,x26,#8*4 // alloca + + add x10,x2,x5 + add x27,x1,x5 + stp x0,x10,[x29,#96] // offload rp and &b[num] + + ldr x24,[x2,#8*0] // b[0] + ldp x6,x7,[x1,#8*0] // a[0..3] + ldp x8,x9,[x1,#8*2] + add x1,x1,#8*4 + mov x19,xzr + mov x20,xzr + mov x21,xzr + mov x22,xzr + ldp x14,x15,[x3,#8*0] // n[0..3] + ldp x16,x17,[x3,#8*2] + adds x3,x3,#8*4 // clear carry bit + mov x0,xzr + mov x28,#0 + mov x26,sp + +.Loop_mul4x_1st_reduction: + mul x10,x6,x24 // lo(a[0..3]*b[0]) + adc x0,x0,xzr // modulo-scheduled + mul x11,x7,x24 + add x28,x28,#8 + mul x12,x8,x24 + and x28,x28,#31 + mul x13,x9,x24 + adds x19,x19,x10 + umulh x10,x6,x24 // hi(a[0..3]*b[0]) + adcs x20,x20,x11 + mul x25,x19,x4 // t[0]*n0 + adcs x21,x21,x12 + umulh x11,x7,x24 + adcs x22,x22,x13 + umulh x12,x8,x24 + adc x23,xzr,xzr + umulh x13,x9,x24 + ldr x24,[x2,x28] // next b[i] (or b[0]) + adds x20,x20,x10 + // (*) mul x10,x14,x25 // lo(n[0..3]*t[0]*n0) + str x25,[x26],#8 // put aside t[0]*n0 for tail processing + adcs x21,x21,x11 + mul x11,x15,x25 + adcs x22,x22,x12 + mul x12,x16,x25 + adc x23,x23,x13 // can't overflow + mul x13,x17,x25 + // (*) adds xzr,x19,x10 + subs xzr,x19,#1 // (*) + umulh x10,x14,x25 // hi(n[0..3]*t[0]*n0) + adcs x19,x20,x11 + umulh x11,x15,x25 + adcs x20,x21,x12 + umulh x12,x16,x25 + adcs x21,x22,x13 + umulh x13,x17,x25 + adcs x22,x23,x0 + adc x0,xzr,xzr + adds x19,x19,x10 + sub x10,x27,x1 + adcs x20,x20,x11 + adcs x21,x21,x12 + adcs x22,x22,x13 + //adc x0,x0,xzr + cbnz x28,.Loop_mul4x_1st_reduction + + cbz x10,.Lmul4x4_post_condition + + ldp x6,x7,[x1,#8*0] // a[4..7] + ldp x8,x9,[x1,#8*2] + add x1,x1,#8*4 + ldr x25,[sp] // a[0]*n0 + ldp x14,x15,[x3,#8*0] // n[4..7] + ldp x16,x17,[x3,#8*2] + add x3,x3,#8*4 + +.Loop_mul4x_1st_tail: + mul x10,x6,x24 // lo(a[4..7]*b[i]) + adc x0,x0,xzr // modulo-scheduled + mul x11,x7,x24 + add x28,x28,#8 + mul x12,x8,x24 + and x28,x28,#31 + mul x13,x9,x24 + adds x19,x19,x10 + umulh x10,x6,x24 // hi(a[4..7]*b[i]) + adcs x20,x20,x11 + umulh x11,x7,x24 + adcs x21,x21,x12 + umulh x12,x8,x24 + adcs x22,x22,x13 + umulh x13,x9,x24 + adc x23,xzr,xzr + ldr x24,[x2,x28] // next b[i] (or b[0]) + adds x20,x20,x10 + mul x10,x14,x25 // lo(n[4..7]*a[0]*n0) + adcs x21,x21,x11 + mul x11,x15,x25 + adcs x22,x22,x12 + mul x12,x16,x25 + adc x23,x23,x13 // can't overflow + mul x13,x17,x25 + adds x19,x19,x10 + umulh x10,x14,x25 // hi(n[4..7]*a[0]*n0) + adcs x20,x20,x11 + umulh x11,x15,x25 + adcs x21,x21,x12 + umulh x12,x16,x25 + adcs x22,x22,x13 + adcs x23,x23,x0 + umulh x13,x17,x25 + adc x0,xzr,xzr + ldr x25,[sp,x28] // next t[0]*n0 + str x19,[x26],#8 // result!!! + adds x19,x20,x10 + sub x10,x27,x1 // done yet? + adcs x20,x21,x11 + adcs x21,x22,x12 + adcs x22,x23,x13 + //adc x0,x0,xzr + cbnz x28,.Loop_mul4x_1st_tail + + sub x11,x27,x5 // rewinded x1 + cbz x10,.Lmul4x_proceed + + ldp x6,x7,[x1,#8*0] + ldp x8,x9,[x1,#8*2] + add x1,x1,#8*4 + ldp x14,x15,[x3,#8*0] + ldp x16,x17,[x3,#8*2] + add x3,x3,#8*4 + b .Loop_mul4x_1st_tail + +.align 5 +.Lmul4x_proceed: + ldr x24,[x2,#8*4]! // *++b + adc x30,x0,xzr + ldp x6,x7,[x11,#8*0] // a[0..3] + sub x3,x3,x5 // rewind np + ldp x8,x9,[x11,#8*2] + add x1,x11,#8*4 + + stp x19,x20,[x26,#8*0] // result!!! + ldp x19,x20,[sp,#8*4] // t[0..3] + stp x21,x22,[x26,#8*2] // result!!! + ldp x21,x22,[sp,#8*6] + + ldp x14,x15,[x3,#8*0] // n[0..3] + mov x26,sp + ldp x16,x17,[x3,#8*2] + adds x3,x3,#8*4 // clear carry bit + mov x0,xzr + +.align 4 +.Loop_mul4x_reduction: + mul x10,x6,x24 // lo(a[0..3]*b[4]) + adc x0,x0,xzr // modulo-scheduled + mul x11,x7,x24 + add x28,x28,#8 + mul x12,x8,x24 + and x28,x28,#31 + mul x13,x9,x24 + adds x19,x19,x10 + umulh x10,x6,x24 // hi(a[0..3]*b[4]) + adcs x20,x20,x11 + mul x25,x19,x4 // t[0]*n0 + adcs x21,x21,x12 + umulh x11,x7,x24 + adcs x22,x22,x13 + umulh x12,x8,x24 + adc x23,xzr,xzr + umulh x13,x9,x24 + ldr x24,[x2,x28] // next b[i] + adds x20,x20,x10 + // (*) mul x10,x14,x25 + str x25,[x26],#8 // put aside t[0]*n0 for tail processing + adcs x21,x21,x11 + mul x11,x15,x25 // lo(n[0..3]*t[0]*n0 + adcs x22,x22,x12 + mul x12,x16,x25 + adc x23,x23,x13 // can't overflow + mul x13,x17,x25 + // (*) adds xzr,x19,x10 + subs xzr,x19,#1 // (*) + umulh x10,x14,x25 // hi(n[0..3]*t[0]*n0 + adcs x19,x20,x11 + umulh x11,x15,x25 + adcs x20,x21,x12 + umulh x12,x16,x25 + adcs x21,x22,x13 + umulh x13,x17,x25 + adcs x22,x23,x0 + adc x0,xzr,xzr + adds x19,x19,x10 + adcs x20,x20,x11 + adcs x21,x21,x12 + adcs x22,x22,x13 + //adc x0,x0,xzr + cbnz x28,.Loop_mul4x_reduction + + adc x0,x0,xzr + ldp x10,x11,[x26,#8*4] // t[4..7] + ldp x12,x13,[x26,#8*6] + ldp x6,x7,[x1,#8*0] // a[4..7] + ldp x8,x9,[x1,#8*2] + add x1,x1,#8*4 + adds x19,x19,x10 + adcs x20,x20,x11 + adcs x21,x21,x12 + adcs x22,x22,x13 + //adc x0,x0,xzr + + ldr x25,[sp] // t[0]*n0 + ldp x14,x15,[x3,#8*0] // n[4..7] + ldp x16,x17,[x3,#8*2] + add x3,x3,#8*4 + +.align 4 +.Loop_mul4x_tail: + mul x10,x6,x24 // lo(a[4..7]*b[4]) + adc x0,x0,xzr // modulo-scheduled + mul x11,x7,x24 + add x28,x28,#8 + mul x12,x8,x24 + and x28,x28,#31 + mul x13,x9,x24 + adds x19,x19,x10 + umulh x10,x6,x24 // hi(a[4..7]*b[4]) + adcs x20,x20,x11 + umulh x11,x7,x24 + adcs x21,x21,x12 + umulh x12,x8,x24 + adcs x22,x22,x13 + umulh x13,x9,x24 + adc x23,xzr,xzr + ldr x24,[x2,x28] // next b[i] + adds x20,x20,x10 + mul x10,x14,x25 // lo(n[4..7]*t[0]*n0) + adcs x21,x21,x11 + mul x11,x15,x25 + adcs x22,x22,x12 + mul x12,x16,x25 + adc x23,x23,x13 // can't overflow + mul x13,x17,x25 + adds x19,x19,x10 + umulh x10,x14,x25 // hi(n[4..7]*t[0]*n0) + adcs x20,x20,x11 + umulh x11,x15,x25 + adcs x21,x21,x12 + umulh x12,x16,x25 + adcs x22,x22,x13 + umulh x13,x17,x25 + adcs x23,x23,x0 + ldr x25,[sp,x28] // next a[0]*n0 + adc x0,xzr,xzr + str x19,[x26],#8 // result!!! + adds x19,x20,x10 + sub x10,x27,x1 // done yet? + adcs x20,x21,x11 + adcs x21,x22,x12 + adcs x22,x23,x13 + //adc x0,x0,xzr + cbnz x28,.Loop_mul4x_tail + + sub x11,x3,x5 // rewinded np? + adc x0,x0,xzr + cbz x10,.Loop_mul4x_break + + ldp x10,x11,[x26,#8*4] + ldp x12,x13,[x26,#8*6] + ldp x6,x7,[x1,#8*0] + ldp x8,x9,[x1,#8*2] + add x1,x1,#8*4 + adds x19,x19,x10 + adcs x20,x20,x11 + adcs x21,x21,x12 + adcs x22,x22,x13 + //adc x0,x0,xzr + ldp x14,x15,[x3,#8*0] + ldp x16,x17,[x3,#8*2] + add x3,x3,#8*4 + b .Loop_mul4x_tail + +.align 4 +.Loop_mul4x_break: + ldp x12,x13,[x29,#96] // pull rp and &b[num] + adds x19,x19,x30 + add x2,x2,#8*4 // bp++ + adcs x20,x20,xzr + sub x1,x1,x5 // rewind ap + adcs x21,x21,xzr + stp x19,x20,[x26,#8*0] // result!!! + adcs x22,x22,xzr + ldp x19,x20,[sp,#8*4] // t[0..3] + adc x30,x0,xzr + stp x21,x22,[x26,#8*2] // result!!! + cmp x2,x13 // done yet? + ldp x21,x22,[sp,#8*6] + ldp x14,x15,[x11,#8*0] // n[0..3] + ldp x16,x17,[x11,#8*2] + add x3,x11,#8*4 + b.eq .Lmul4x_post + + ldr x24,[x2] + ldp x6,x7,[x1,#8*0] // a[0..3] + ldp x8,x9,[x1,#8*2] + adds x1,x1,#8*4 // clear carry bit + mov x0,xzr + mov x26,sp + b .Loop_mul4x_reduction + +.align 4 +.Lmul4x_post: + // Final step. We see if result is larger than modulus, and + // if it is, subtract the modulus. But comparison implies + // subtraction. So we subtract modulus, see if it borrowed, + // and conditionally copy original value. + mov x0,x12 + mov x27,x12 // x0 copy + subs x10,x19,x14 + add x26,sp,#8*8 + sbcs x11,x20,x15 + sub x28,x5,#8*4 + +.Lmul4x_sub: + sbcs x12,x21,x16 + ldp x14,x15,[x3,#8*0] + sub x28,x28,#8*4 + ldp x19,x20,[x26,#8*0] + sbcs x13,x22,x17 + ldp x16,x17,[x3,#8*2] + add x3,x3,#8*4 + ldp x21,x22,[x26,#8*2] + add x26,x26,#8*4 + stp x10,x11,[x0,#8*0] + sbcs x10,x19,x14 + stp x12,x13,[x0,#8*2] + add x0,x0,#8*4 + sbcs x11,x20,x15 + cbnz x28,.Lmul4x_sub + + sbcs x12,x21,x16 + mov x26,sp + add x1,sp,#8*4 + ldp x6,x7,[x27,#8*0] + sbcs x13,x22,x17 + stp x10,x11,[x0,#8*0] + ldp x8,x9,[x27,#8*2] + stp x12,x13,[x0,#8*2] + ldp x19,x20,[x1,#8*0] + ldp x21,x22,[x1,#8*2] + sbcs xzr,x30,xzr // did it borrow? + ldr x30,[x29,#8] // pull return address + + sub x28,x5,#8*4 +.Lmul4x_cond_copy: + sub x28,x28,#8*4 + csel x10,x19,x6,lo + stp xzr,xzr,[x26,#8*0] + csel x11,x20,x7,lo + ldp x6,x7,[x27,#8*4] + ldp x19,x20,[x1,#8*4] + csel x12,x21,x8,lo + stp xzr,xzr,[x26,#8*2] + add x26,x26,#8*4 + csel x13,x22,x9,lo + ldp x8,x9,[x27,#8*6] + ldp x21,x22,[x1,#8*6] + add x1,x1,#8*4 + stp x10,x11,[x27,#8*0] + stp x12,x13,[x27,#8*2] + add x27,x27,#8*4 + cbnz x28,.Lmul4x_cond_copy + + csel x10,x19,x6,lo + stp xzr,xzr,[x26,#8*0] + csel x11,x20,x7,lo + stp xzr,xzr,[x26,#8*2] + csel x12,x21,x8,lo + stp xzr,xzr,[x26,#8*3] + csel x13,x22,x9,lo + stp xzr,xzr,[x26,#8*4] + stp x10,x11,[x27,#8*0] + stp x12,x13,[x27,#8*2] + + b .Lmul4x_done + +.align 4 +.Lmul4x4_post_condition: + adc x0,x0,xzr + ldr x1,[x29,#96] // pull rp + // x19-3,x0 hold result, x14-7 hold modulus + subs x6,x19,x14 + ldr x30,[x29,#8] // pull return address + sbcs x7,x20,x15 + stp xzr,xzr,[sp,#8*0] + sbcs x8,x21,x16 + stp xzr,xzr,[sp,#8*2] + sbcs x9,x22,x17 + stp xzr,xzr,[sp,#8*4] + sbcs xzr,x0,xzr // did it borrow? + stp xzr,xzr,[sp,#8*6] + + // x6-3 hold result-modulus + csel x6,x19,x6,lo + csel x7,x20,x7,lo + csel x8,x21,x8,lo + csel x9,x22,x9,lo + stp x6,x7,[x1,#8*0] + stp x8,x9,[x1,#8*2] + +.Lmul4x_done: + ldp x19,x20,[x29,#16] + mov sp,x29 + ldp x21,x22,[x29,#32] + mov x0,#1 + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldr x29,[sp],#128 + // x30 loaded earlier + AARCH64_VALIDATE_LINK_REGISTER + ret +.size __bn_mul4x_mont,.-__bn_mul4x_mont +.section .rodata +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 4 diff --git a/contrib/openssl-cmake/asm/crypto/bn/rsaz-2k-avx512.s b/contrib/openssl-cmake/asm/crypto/bn/rsaz-2k-avx512.s new file mode 100644 index 000000000000..f5e5615830a6 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/bn/rsaz-2k-avx512.s @@ -0,0 +1,895 @@ + +.globl ossl_rsaz_avx512ifma_eligible +.type ossl_rsaz_avx512ifma_eligible,@function +.align 32 +ossl_rsaz_avx512ifma_eligible: + movl OPENSSL_ia32cap_P+8(%rip),%ecx + xorl %eax,%eax + andl $2149777408,%ecx + cmpl $2149777408,%ecx + cmovel %ecx,%eax + .byte 0xf3,0xc3 +.size ossl_rsaz_avx512ifma_eligible, .-ossl_rsaz_avx512ifma_eligible +.text + +.globl ossl_rsaz_amm52x20_x1_ifma256 +.type ossl_rsaz_amm52x20_x1_ifma256,@function +.align 32 +ossl_rsaz_amm52x20_x1_ifma256: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lossl_rsaz_amm52x20_x1_ifma256_body: + + + vpxord %ymm0,%ymm0,%ymm0 + vmovdqa64 %ymm0,%ymm3 + vmovdqa64 %ymm0,%ymm16 + vmovdqa64 %ymm0,%ymm17 + vmovdqa64 %ymm0,%ymm18 + vmovdqa64 %ymm0,%ymm19 + + xorl %r9d,%r9d + + movq %rdx,%r11 + movq $0xfffffffffffff,%rax + + + movl $5,%ebx + +.align 32 +.Lloop5: + movq 0(%r11),%r13 + + vpbroadcastq %r13,%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vpbroadcastq %r13,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + vpmadd52luq 0(%rsi),%ymm1,%ymm3 + vpmadd52luq 32(%rsi),%ymm1,%ymm16 + vpmadd52luq 64(%rsi),%ymm1,%ymm17 + vpmadd52luq 96(%rsi),%ymm1,%ymm18 + vpmadd52luq 128(%rsi),%ymm1,%ymm19 + + vpmadd52luq 0(%rcx),%ymm2,%ymm3 + vpmadd52luq 32(%rcx),%ymm2,%ymm16 + vpmadd52luq 64(%rcx),%ymm2,%ymm17 + vpmadd52luq 96(%rcx),%ymm2,%ymm18 + vpmadd52luq 128(%rcx),%ymm2,%ymm19 + + + valignq $1,%ymm3,%ymm16,%ymm3 + valignq $1,%ymm16,%ymm17,%ymm16 + valignq $1,%ymm17,%ymm18,%ymm17 + valignq $1,%ymm18,%ymm19,%ymm18 + valignq $1,%ymm19,%ymm0,%ymm19 + + vmovq %xmm3,%r13 + addq %r13,%r9 + + vpmadd52huq 0(%rsi),%ymm1,%ymm3 + vpmadd52huq 32(%rsi),%ymm1,%ymm16 + vpmadd52huq 64(%rsi),%ymm1,%ymm17 + vpmadd52huq 96(%rsi),%ymm1,%ymm18 + vpmadd52huq 128(%rsi),%ymm1,%ymm19 + + vpmadd52huq 0(%rcx),%ymm2,%ymm3 + vpmadd52huq 32(%rcx),%ymm2,%ymm16 + vpmadd52huq 64(%rcx),%ymm2,%ymm17 + vpmadd52huq 96(%rcx),%ymm2,%ymm18 + vpmadd52huq 128(%rcx),%ymm2,%ymm19 + movq 8(%r11),%r13 + + vpbroadcastq %r13,%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vpbroadcastq %r13,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + vpmadd52luq 0(%rsi),%ymm1,%ymm3 + vpmadd52luq 32(%rsi),%ymm1,%ymm16 + vpmadd52luq 64(%rsi),%ymm1,%ymm17 + vpmadd52luq 96(%rsi),%ymm1,%ymm18 + vpmadd52luq 128(%rsi),%ymm1,%ymm19 + + vpmadd52luq 0(%rcx),%ymm2,%ymm3 + vpmadd52luq 32(%rcx),%ymm2,%ymm16 + vpmadd52luq 64(%rcx),%ymm2,%ymm17 + vpmadd52luq 96(%rcx),%ymm2,%ymm18 + vpmadd52luq 128(%rcx),%ymm2,%ymm19 + + + valignq $1,%ymm3,%ymm16,%ymm3 + valignq $1,%ymm16,%ymm17,%ymm16 + valignq $1,%ymm17,%ymm18,%ymm17 + valignq $1,%ymm18,%ymm19,%ymm18 + valignq $1,%ymm19,%ymm0,%ymm19 + + vmovq %xmm3,%r13 + addq %r13,%r9 + + vpmadd52huq 0(%rsi),%ymm1,%ymm3 + vpmadd52huq 32(%rsi),%ymm1,%ymm16 + vpmadd52huq 64(%rsi),%ymm1,%ymm17 + vpmadd52huq 96(%rsi),%ymm1,%ymm18 + vpmadd52huq 128(%rsi),%ymm1,%ymm19 + + vpmadd52huq 0(%rcx),%ymm2,%ymm3 + vpmadd52huq 32(%rcx),%ymm2,%ymm16 + vpmadd52huq 64(%rcx),%ymm2,%ymm17 + vpmadd52huq 96(%rcx),%ymm2,%ymm18 + vpmadd52huq 128(%rcx),%ymm2,%ymm19 + movq 16(%r11),%r13 + + vpbroadcastq %r13,%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vpbroadcastq %r13,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + vpmadd52luq 0(%rsi),%ymm1,%ymm3 + vpmadd52luq 32(%rsi),%ymm1,%ymm16 + vpmadd52luq 64(%rsi),%ymm1,%ymm17 + vpmadd52luq 96(%rsi),%ymm1,%ymm18 + vpmadd52luq 128(%rsi),%ymm1,%ymm19 + + vpmadd52luq 0(%rcx),%ymm2,%ymm3 + vpmadd52luq 32(%rcx),%ymm2,%ymm16 + vpmadd52luq 64(%rcx),%ymm2,%ymm17 + vpmadd52luq 96(%rcx),%ymm2,%ymm18 + vpmadd52luq 128(%rcx),%ymm2,%ymm19 + + + valignq $1,%ymm3,%ymm16,%ymm3 + valignq $1,%ymm16,%ymm17,%ymm16 + valignq $1,%ymm17,%ymm18,%ymm17 + valignq $1,%ymm18,%ymm19,%ymm18 + valignq $1,%ymm19,%ymm0,%ymm19 + + vmovq %xmm3,%r13 + addq %r13,%r9 + + vpmadd52huq 0(%rsi),%ymm1,%ymm3 + vpmadd52huq 32(%rsi),%ymm1,%ymm16 + vpmadd52huq 64(%rsi),%ymm1,%ymm17 + vpmadd52huq 96(%rsi),%ymm1,%ymm18 + vpmadd52huq 128(%rsi),%ymm1,%ymm19 + + vpmadd52huq 0(%rcx),%ymm2,%ymm3 + vpmadd52huq 32(%rcx),%ymm2,%ymm16 + vpmadd52huq 64(%rcx),%ymm2,%ymm17 + vpmadd52huq 96(%rcx),%ymm2,%ymm18 + vpmadd52huq 128(%rcx),%ymm2,%ymm19 + movq 24(%r11),%r13 + + vpbroadcastq %r13,%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vpbroadcastq %r13,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + vpmadd52luq 0(%rsi),%ymm1,%ymm3 + vpmadd52luq 32(%rsi),%ymm1,%ymm16 + vpmadd52luq 64(%rsi),%ymm1,%ymm17 + vpmadd52luq 96(%rsi),%ymm1,%ymm18 + vpmadd52luq 128(%rsi),%ymm1,%ymm19 + + vpmadd52luq 0(%rcx),%ymm2,%ymm3 + vpmadd52luq 32(%rcx),%ymm2,%ymm16 + vpmadd52luq 64(%rcx),%ymm2,%ymm17 + vpmadd52luq 96(%rcx),%ymm2,%ymm18 + vpmadd52luq 128(%rcx),%ymm2,%ymm19 + + + valignq $1,%ymm3,%ymm16,%ymm3 + valignq $1,%ymm16,%ymm17,%ymm16 + valignq $1,%ymm17,%ymm18,%ymm17 + valignq $1,%ymm18,%ymm19,%ymm18 + valignq $1,%ymm19,%ymm0,%ymm19 + + vmovq %xmm3,%r13 + addq %r13,%r9 + + vpmadd52huq 0(%rsi),%ymm1,%ymm3 + vpmadd52huq 32(%rsi),%ymm1,%ymm16 + vpmadd52huq 64(%rsi),%ymm1,%ymm17 + vpmadd52huq 96(%rsi),%ymm1,%ymm18 + vpmadd52huq 128(%rsi),%ymm1,%ymm19 + + vpmadd52huq 0(%rcx),%ymm2,%ymm3 + vpmadd52huq 32(%rcx),%ymm2,%ymm16 + vpmadd52huq 64(%rcx),%ymm2,%ymm17 + vpmadd52huq 96(%rcx),%ymm2,%ymm18 + vpmadd52huq 128(%rcx),%ymm2,%ymm19 + leaq 32(%r11),%r11 + decl %ebx + jne .Lloop5 + + vpbroadcastq %r9,%ymm0 + vpblendd $3,%ymm0,%ymm3,%ymm3 + + + + vpsrlq $52,%ymm3,%ymm0 + vpsrlq $52,%ymm16,%ymm1 + vpsrlq $52,%ymm17,%ymm2 + vpsrlq $52,%ymm18,%ymm25 + vpsrlq $52,%ymm19,%ymm26 + + + valignq $3,%ymm25,%ymm26,%ymm26 + valignq $3,%ymm2,%ymm25,%ymm25 + valignq $3,%ymm1,%ymm2,%ymm2 + valignq $3,%ymm0,%ymm1,%ymm1 + valignq $3,.Lzeros(%rip),%ymm0,%ymm0 + + + vpandq .Lmask52x4(%rip),%ymm3,%ymm3 + vpandq .Lmask52x4(%rip),%ymm16,%ymm16 + vpandq .Lmask52x4(%rip),%ymm17,%ymm17 + vpandq .Lmask52x4(%rip),%ymm18,%ymm18 + vpandq .Lmask52x4(%rip),%ymm19,%ymm19 + + + vpaddq %ymm0,%ymm3,%ymm3 + vpaddq %ymm1,%ymm16,%ymm16 + vpaddq %ymm2,%ymm17,%ymm17 + vpaddq %ymm25,%ymm18,%ymm18 + vpaddq %ymm26,%ymm19,%ymm19 + + + + vpcmpuq $6,.Lmask52x4(%rip),%ymm3,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm16,%k2 + vpcmpuq $6,.Lmask52x4(%rip),%ymm17,%k3 + vpcmpuq $6,.Lmask52x4(%rip),%ymm18,%k4 + vpcmpuq $6,.Lmask52x4(%rip),%ymm19,%k5 + kmovb %k1,%r14d + kmovb %k2,%r13d + kmovb %k3,%r12d + kmovb %k4,%r11d + kmovb %k5,%r10d + + + vpcmpuq $0,.Lmask52x4(%rip),%ymm3,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm16,%k2 + vpcmpuq $0,.Lmask52x4(%rip),%ymm17,%k3 + vpcmpuq $0,.Lmask52x4(%rip),%ymm18,%k4 + vpcmpuq $0,.Lmask52x4(%rip),%ymm19,%k5 + kmovb %k1,%r9d + kmovb %k2,%r8d + kmovb %k3,%ebx + kmovb %k4,%ecx + kmovb %k5,%edx + + + + shlb $4,%r13b + orb %r13b,%r14b + shlb $4,%r11b + orb %r11b,%r12b + + addb %r14b,%r14b + adcb %r12b,%r12b + adcb %r10b,%r10b + + shlb $4,%r8b + orb %r8b,%r9b + shlb $4,%cl + orb %cl,%bl + + addb %r9b,%r14b + adcb %bl,%r12b + adcb %dl,%r10b + + xorb %r9b,%r14b + xorb %bl,%r12b + xorb %dl,%r10b + + kmovb %r14d,%k1 + shrb $4,%r14b + kmovb %r14d,%k2 + kmovb %r12d,%k3 + shrb $4,%r12b + kmovb %r12d,%k4 + kmovb %r10d,%k5 + + + vpsubq .Lmask52x4(%rip),%ymm3,%ymm3{%k1} + vpsubq .Lmask52x4(%rip),%ymm16,%ymm16{%k2} + vpsubq .Lmask52x4(%rip),%ymm17,%ymm17{%k3} + vpsubq .Lmask52x4(%rip),%ymm18,%ymm18{%k4} + vpsubq .Lmask52x4(%rip),%ymm19,%ymm19{%k5} + + vpandq .Lmask52x4(%rip),%ymm3,%ymm3 + vpandq .Lmask52x4(%rip),%ymm16,%ymm16 + vpandq .Lmask52x4(%rip),%ymm17,%ymm17 + vpandq .Lmask52x4(%rip),%ymm18,%ymm18 + vpandq .Lmask52x4(%rip),%ymm19,%ymm19 + + vmovdqu64 %ymm3,0(%rdi) + vmovdqu64 %ymm16,32(%rdi) + vmovdqu64 %ymm17,64(%rdi) + vmovdqu64 %ymm18,96(%rdi) + vmovdqu64 %ymm19,128(%rdi) + + vzeroupper + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbp +.cfi_restore %rbp + movq 40(%rsp),%rbx +.cfi_restore %rbx + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lossl_rsaz_amm52x20_x1_ifma256_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_rsaz_amm52x20_x1_ifma256, .-ossl_rsaz_amm52x20_x1_ifma256 +.section .rodata +.align 32 +.Lmask52x4: +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.text + +.globl ossl_rsaz_amm52x20_x2_ifma256 +.type ossl_rsaz_amm52x20_x2_ifma256,@function +.align 32 +ossl_rsaz_amm52x20_x2_ifma256: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lossl_rsaz_amm52x20_x2_ifma256_body: + + + vpxord %ymm0,%ymm0,%ymm0 + vmovdqa64 %ymm0,%ymm3 + vmovdqa64 %ymm0,%ymm16 + vmovdqa64 %ymm0,%ymm17 + vmovdqa64 %ymm0,%ymm18 + vmovdqa64 %ymm0,%ymm19 + vmovdqa64 %ymm0,%ymm4 + vmovdqa64 %ymm0,%ymm20 + vmovdqa64 %ymm0,%ymm21 + vmovdqa64 %ymm0,%ymm22 + vmovdqa64 %ymm0,%ymm23 + + xorl %r9d,%r9d + xorl %r15d,%r15d + + movq %rdx,%r11 + movq $0xfffffffffffff,%rax + + movl $20,%ebx + +.align 32 +.Lloop20: + movq 0(%r11),%r13 + + vpbroadcastq %r13,%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq (%r8),%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vpbroadcastq %r13,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + vpmadd52luq 0(%rsi),%ymm1,%ymm3 + vpmadd52luq 32(%rsi),%ymm1,%ymm16 + vpmadd52luq 64(%rsi),%ymm1,%ymm17 + vpmadd52luq 96(%rsi),%ymm1,%ymm18 + vpmadd52luq 128(%rsi),%ymm1,%ymm19 + + vpmadd52luq 0(%rcx),%ymm2,%ymm3 + vpmadd52luq 32(%rcx),%ymm2,%ymm16 + vpmadd52luq 64(%rcx),%ymm2,%ymm17 + vpmadd52luq 96(%rcx),%ymm2,%ymm18 + vpmadd52luq 128(%rcx),%ymm2,%ymm19 + + + valignq $1,%ymm3,%ymm16,%ymm3 + valignq $1,%ymm16,%ymm17,%ymm16 + valignq $1,%ymm17,%ymm18,%ymm17 + valignq $1,%ymm18,%ymm19,%ymm18 + valignq $1,%ymm19,%ymm0,%ymm19 + + vmovq %xmm3,%r13 + addq %r13,%r9 + + vpmadd52huq 0(%rsi),%ymm1,%ymm3 + vpmadd52huq 32(%rsi),%ymm1,%ymm16 + vpmadd52huq 64(%rsi),%ymm1,%ymm17 + vpmadd52huq 96(%rsi),%ymm1,%ymm18 + vpmadd52huq 128(%rsi),%ymm1,%ymm19 + + vpmadd52huq 0(%rcx),%ymm2,%ymm3 + vpmadd52huq 32(%rcx),%ymm2,%ymm16 + vpmadd52huq 64(%rcx),%ymm2,%ymm17 + vpmadd52huq 96(%rcx),%ymm2,%ymm18 + vpmadd52huq 128(%rcx),%ymm2,%ymm19 + movq 160(%r11),%r13 + + vpbroadcastq %r13,%ymm1 + movq 160(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r15 + movq %r12,%r10 + adcq $0,%r10 + + movq 8(%r8),%r13 + imulq %r15,%r13 + andq %rax,%r13 + + vpbroadcastq %r13,%ymm2 + movq 160(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r15 + adcq %r12,%r10 + + shrq $52,%r15 + salq $12,%r10 + orq %r10,%r15 + + vpmadd52luq 160(%rsi),%ymm1,%ymm4 + vpmadd52luq 192(%rsi),%ymm1,%ymm20 + vpmadd52luq 224(%rsi),%ymm1,%ymm21 + vpmadd52luq 256(%rsi),%ymm1,%ymm22 + vpmadd52luq 288(%rsi),%ymm1,%ymm23 + + vpmadd52luq 160(%rcx),%ymm2,%ymm4 + vpmadd52luq 192(%rcx),%ymm2,%ymm20 + vpmadd52luq 224(%rcx),%ymm2,%ymm21 + vpmadd52luq 256(%rcx),%ymm2,%ymm22 + vpmadd52luq 288(%rcx),%ymm2,%ymm23 + + + valignq $1,%ymm4,%ymm20,%ymm4 + valignq $1,%ymm20,%ymm21,%ymm20 + valignq $1,%ymm21,%ymm22,%ymm21 + valignq $1,%ymm22,%ymm23,%ymm22 + valignq $1,%ymm23,%ymm0,%ymm23 + + vmovq %xmm4,%r13 + addq %r13,%r15 + + vpmadd52huq 160(%rsi),%ymm1,%ymm4 + vpmadd52huq 192(%rsi),%ymm1,%ymm20 + vpmadd52huq 224(%rsi),%ymm1,%ymm21 + vpmadd52huq 256(%rsi),%ymm1,%ymm22 + vpmadd52huq 288(%rsi),%ymm1,%ymm23 + + vpmadd52huq 160(%rcx),%ymm2,%ymm4 + vpmadd52huq 192(%rcx),%ymm2,%ymm20 + vpmadd52huq 224(%rcx),%ymm2,%ymm21 + vpmadd52huq 256(%rcx),%ymm2,%ymm22 + vpmadd52huq 288(%rcx),%ymm2,%ymm23 + leaq 8(%r11),%r11 + decl %ebx + jne .Lloop20 + + vpbroadcastq %r9,%ymm0 + vpblendd $3,%ymm0,%ymm3,%ymm3 + + + + vpsrlq $52,%ymm3,%ymm0 + vpsrlq $52,%ymm16,%ymm1 + vpsrlq $52,%ymm17,%ymm2 + vpsrlq $52,%ymm18,%ymm25 + vpsrlq $52,%ymm19,%ymm26 + + + valignq $3,%ymm25,%ymm26,%ymm26 + valignq $3,%ymm2,%ymm25,%ymm25 + valignq $3,%ymm1,%ymm2,%ymm2 + valignq $3,%ymm0,%ymm1,%ymm1 + valignq $3,.Lzeros(%rip),%ymm0,%ymm0 + + + vpandq .Lmask52x4(%rip),%ymm3,%ymm3 + vpandq .Lmask52x4(%rip),%ymm16,%ymm16 + vpandq .Lmask52x4(%rip),%ymm17,%ymm17 + vpandq .Lmask52x4(%rip),%ymm18,%ymm18 + vpandq .Lmask52x4(%rip),%ymm19,%ymm19 + + + vpaddq %ymm0,%ymm3,%ymm3 + vpaddq %ymm1,%ymm16,%ymm16 + vpaddq %ymm2,%ymm17,%ymm17 + vpaddq %ymm25,%ymm18,%ymm18 + vpaddq %ymm26,%ymm19,%ymm19 + + + + vpcmpuq $6,.Lmask52x4(%rip),%ymm3,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm16,%k2 + vpcmpuq $6,.Lmask52x4(%rip),%ymm17,%k3 + vpcmpuq $6,.Lmask52x4(%rip),%ymm18,%k4 + vpcmpuq $6,.Lmask52x4(%rip),%ymm19,%k5 + kmovb %k1,%r14d + kmovb %k2,%r13d + kmovb %k3,%r12d + kmovb %k4,%r11d + kmovb %k5,%r10d + + + vpcmpuq $0,.Lmask52x4(%rip),%ymm3,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm16,%k2 + vpcmpuq $0,.Lmask52x4(%rip),%ymm17,%k3 + vpcmpuq $0,.Lmask52x4(%rip),%ymm18,%k4 + vpcmpuq $0,.Lmask52x4(%rip),%ymm19,%k5 + kmovb %k1,%r9d + kmovb %k2,%r8d + kmovb %k3,%ebx + kmovb %k4,%ecx + kmovb %k5,%edx + + + + shlb $4,%r13b + orb %r13b,%r14b + shlb $4,%r11b + orb %r11b,%r12b + + addb %r14b,%r14b + adcb %r12b,%r12b + adcb %r10b,%r10b + + shlb $4,%r8b + orb %r8b,%r9b + shlb $4,%cl + orb %cl,%bl + + addb %r9b,%r14b + adcb %bl,%r12b + adcb %dl,%r10b + + xorb %r9b,%r14b + xorb %bl,%r12b + xorb %dl,%r10b + + kmovb %r14d,%k1 + shrb $4,%r14b + kmovb %r14d,%k2 + kmovb %r12d,%k3 + shrb $4,%r12b + kmovb %r12d,%k4 + kmovb %r10d,%k5 + + + vpsubq .Lmask52x4(%rip),%ymm3,%ymm3{%k1} + vpsubq .Lmask52x4(%rip),%ymm16,%ymm16{%k2} + vpsubq .Lmask52x4(%rip),%ymm17,%ymm17{%k3} + vpsubq .Lmask52x4(%rip),%ymm18,%ymm18{%k4} + vpsubq .Lmask52x4(%rip),%ymm19,%ymm19{%k5} + + vpandq .Lmask52x4(%rip),%ymm3,%ymm3 + vpandq .Lmask52x4(%rip),%ymm16,%ymm16 + vpandq .Lmask52x4(%rip),%ymm17,%ymm17 + vpandq .Lmask52x4(%rip),%ymm18,%ymm18 + vpandq .Lmask52x4(%rip),%ymm19,%ymm19 + + vpbroadcastq %r15,%ymm0 + vpblendd $3,%ymm0,%ymm4,%ymm4 + + + + vpsrlq $52,%ymm4,%ymm0 + vpsrlq $52,%ymm20,%ymm1 + vpsrlq $52,%ymm21,%ymm2 + vpsrlq $52,%ymm22,%ymm25 + vpsrlq $52,%ymm23,%ymm26 + + + valignq $3,%ymm25,%ymm26,%ymm26 + valignq $3,%ymm2,%ymm25,%ymm25 + valignq $3,%ymm1,%ymm2,%ymm2 + valignq $3,%ymm0,%ymm1,%ymm1 + valignq $3,.Lzeros(%rip),%ymm0,%ymm0 + + + vpandq .Lmask52x4(%rip),%ymm4,%ymm4 + vpandq .Lmask52x4(%rip),%ymm20,%ymm20 + vpandq .Lmask52x4(%rip),%ymm21,%ymm21 + vpandq .Lmask52x4(%rip),%ymm22,%ymm22 + vpandq .Lmask52x4(%rip),%ymm23,%ymm23 + + + vpaddq %ymm0,%ymm4,%ymm4 + vpaddq %ymm1,%ymm20,%ymm20 + vpaddq %ymm2,%ymm21,%ymm21 + vpaddq %ymm25,%ymm22,%ymm22 + vpaddq %ymm26,%ymm23,%ymm23 + + + + vpcmpuq $6,.Lmask52x4(%rip),%ymm4,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm20,%k2 + vpcmpuq $6,.Lmask52x4(%rip),%ymm21,%k3 + vpcmpuq $6,.Lmask52x4(%rip),%ymm22,%k4 + vpcmpuq $6,.Lmask52x4(%rip),%ymm23,%k5 + kmovb %k1,%r14d + kmovb %k2,%r13d + kmovb %k3,%r12d + kmovb %k4,%r11d + kmovb %k5,%r10d + + + vpcmpuq $0,.Lmask52x4(%rip),%ymm4,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm20,%k2 + vpcmpuq $0,.Lmask52x4(%rip),%ymm21,%k3 + vpcmpuq $0,.Lmask52x4(%rip),%ymm22,%k4 + vpcmpuq $0,.Lmask52x4(%rip),%ymm23,%k5 + kmovb %k1,%r9d + kmovb %k2,%r8d + kmovb %k3,%ebx + kmovb %k4,%ecx + kmovb %k5,%edx + + + + shlb $4,%r13b + orb %r13b,%r14b + shlb $4,%r11b + orb %r11b,%r12b + + addb %r14b,%r14b + adcb %r12b,%r12b + adcb %r10b,%r10b + + shlb $4,%r8b + orb %r8b,%r9b + shlb $4,%cl + orb %cl,%bl + + addb %r9b,%r14b + adcb %bl,%r12b + adcb %dl,%r10b + + xorb %r9b,%r14b + xorb %bl,%r12b + xorb %dl,%r10b + + kmovb %r14d,%k1 + shrb $4,%r14b + kmovb %r14d,%k2 + kmovb %r12d,%k3 + shrb $4,%r12b + kmovb %r12d,%k4 + kmovb %r10d,%k5 + + + vpsubq .Lmask52x4(%rip),%ymm4,%ymm4{%k1} + vpsubq .Lmask52x4(%rip),%ymm20,%ymm20{%k2} + vpsubq .Lmask52x4(%rip),%ymm21,%ymm21{%k3} + vpsubq .Lmask52x4(%rip),%ymm22,%ymm22{%k4} + vpsubq .Lmask52x4(%rip),%ymm23,%ymm23{%k5} + + vpandq .Lmask52x4(%rip),%ymm4,%ymm4 + vpandq .Lmask52x4(%rip),%ymm20,%ymm20 + vpandq .Lmask52x4(%rip),%ymm21,%ymm21 + vpandq .Lmask52x4(%rip),%ymm22,%ymm22 + vpandq .Lmask52x4(%rip),%ymm23,%ymm23 + + vmovdqu64 %ymm3,0(%rdi) + vmovdqu64 %ymm16,32(%rdi) + vmovdqu64 %ymm17,64(%rdi) + vmovdqu64 %ymm18,96(%rdi) + vmovdqu64 %ymm19,128(%rdi) + + vmovdqu64 %ymm4,160(%rdi) + vmovdqu64 %ymm20,192(%rdi) + vmovdqu64 %ymm21,224(%rdi) + vmovdqu64 %ymm22,256(%rdi) + vmovdqu64 %ymm23,288(%rdi) + + vzeroupper + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbp +.cfi_restore %rbp + movq 40(%rsp),%rbx +.cfi_restore %rbx + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lossl_rsaz_amm52x20_x2_ifma256_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_rsaz_amm52x20_x2_ifma256, .-ossl_rsaz_amm52x20_x2_ifma256 +.text + +.align 32 +.globl ossl_extract_multiplier_2x20_win5 +.type ossl_extract_multiplier_2x20_win5,@function +ossl_extract_multiplier_2x20_win5: +.cfi_startproc +.byte 243,15,30,250 + vmovdqa64 .Lones(%rip),%ymm24 + vpbroadcastq %rdx,%ymm22 + vpbroadcastq %rcx,%ymm23 + leaq 10240(%rsi),%rax + + + vpxor %xmm0,%xmm0,%xmm0 + vmovdqa64 %ymm0,%ymm21 + vmovdqa64 %ymm0,%ymm1 + vmovdqa64 %ymm0,%ymm2 + vmovdqa64 %ymm0,%ymm3 + vmovdqa64 %ymm0,%ymm4 + vmovdqa64 %ymm0,%ymm5 + vmovdqa64 %ymm0,%ymm16 + vmovdqa64 %ymm0,%ymm17 + vmovdqa64 %ymm0,%ymm18 + vmovdqa64 %ymm0,%ymm19 + +.align 32 +.Lloop: + vpcmpq $0,%ymm21,%ymm22,%k1 + vpcmpq $0,%ymm21,%ymm23,%k2 + vmovdqu64 0(%rsi),%ymm20 + vpblendmq %ymm20,%ymm0,%ymm0{%k1} + vmovdqu64 32(%rsi),%ymm20 + vpblendmq %ymm20,%ymm1,%ymm1{%k1} + vmovdqu64 64(%rsi),%ymm20 + vpblendmq %ymm20,%ymm2,%ymm2{%k1} + vmovdqu64 96(%rsi),%ymm20 + vpblendmq %ymm20,%ymm3,%ymm3{%k1} + vmovdqu64 128(%rsi),%ymm20 + vpblendmq %ymm20,%ymm4,%ymm4{%k1} + vmovdqu64 160(%rsi),%ymm20 + vpblendmq %ymm20,%ymm5,%ymm5{%k2} + vmovdqu64 192(%rsi),%ymm20 + vpblendmq %ymm20,%ymm16,%ymm16{%k2} + vmovdqu64 224(%rsi),%ymm20 + vpblendmq %ymm20,%ymm17,%ymm17{%k2} + vmovdqu64 256(%rsi),%ymm20 + vpblendmq %ymm20,%ymm18,%ymm18{%k2} + vmovdqu64 288(%rsi),%ymm20 + vpblendmq %ymm20,%ymm19,%ymm19{%k2} + vpaddq %ymm24,%ymm21,%ymm21 + addq $320,%rsi + cmpq %rsi,%rax + jne .Lloop + vmovdqu64 %ymm0,0(%rdi) + vmovdqu64 %ymm1,32(%rdi) + vmovdqu64 %ymm2,64(%rdi) + vmovdqu64 %ymm3,96(%rdi) + vmovdqu64 %ymm4,128(%rdi) + vmovdqu64 %ymm5,160(%rdi) + vmovdqu64 %ymm16,192(%rdi) + vmovdqu64 %ymm17,224(%rdi) + vmovdqu64 %ymm18,256(%rdi) + vmovdqu64 %ymm19,288(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_extract_multiplier_2x20_win5, .-ossl_extract_multiplier_2x20_win5 +.section .rodata +.align 32 +.Lones: +.quad 1,1,1,1 +.Lzeros: +.quad 0,0,0,0 diff --git a/contrib/openssl-cmake/asm/crypto/bn/rsaz-2k-avxifma.s b/contrib/openssl-cmake/asm/crypto/bn/rsaz-2k-avxifma.s new file mode 100644 index 000000000000..52ee96759445 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/bn/rsaz-2k-avxifma.s @@ -0,0 +1,1146 @@ +.text + +.globl ossl_rsaz_avxifma_eligible +.type ossl_rsaz_avxifma_eligible,@function +.align 32 +ossl_rsaz_avxifma_eligible: + movl OPENSSL_ia32cap_P+20(%rip),%ecx + xorl %eax,%eax + andl $8388608,%ecx + cmpl $8388608,%ecx + cmovel %ecx,%eax + .byte 0xf3,0xc3 +.size ossl_rsaz_avxifma_eligible, .-ossl_rsaz_avxifma_eligible +.text + +.globl ossl_rsaz_amm52x20_x1_avxifma256 +.type ossl_rsaz_amm52x20_x1_avxifma256,@function +.align 32 +ossl_rsaz_amm52x20_x1_avxifma256: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lossl_rsaz_amm52x20_x1_avxifma256_body: + + + vpxor %ymm0,%ymm0,%ymm0 + vmovapd %ymm0,%ymm3 + vmovapd %ymm0,%ymm5 + vmovapd %ymm0,%ymm6 + vmovapd %ymm0,%ymm7 + vmovapd %ymm0,%ymm8 + + xorl %r9d,%r9d + + movq %rdx,%r11 + movq $0xfffffffffffff,%rax + + + movl $5,%ebx + +.align 32 +.Lloop5: + movq 0(%r11),%r13 + + vpbroadcastq 0(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -168(%rsp),%rsp +{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm5 +{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm6 +{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm7 +{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm8 + +{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm5 +{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm6 +{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm7 +{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm8 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm5,32(%rsp) + vmovdqu %ymm6,64(%rsp) + vmovdqu %ymm7,96(%rsp) + vmovdqu %ymm8,128(%rsp) + movq $0,160(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm5 + vmovdqu 72(%rsp),%ymm6 + vmovdqu 104(%rsp),%ymm7 + vmovdqu 136(%rsp),%ymm8 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm5 +{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm6 +{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm7 +{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm8 + +{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm5 +{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm6 +{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm7 +{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm8 + leaq 168(%rsp),%rsp + movq 8(%r11),%r13 + + vpbroadcastq 8(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -168(%rsp),%rsp +{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm5 +{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm6 +{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm7 +{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm8 + +{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm5 +{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm6 +{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm7 +{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm8 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm5,32(%rsp) + vmovdqu %ymm6,64(%rsp) + vmovdqu %ymm7,96(%rsp) + vmovdqu %ymm8,128(%rsp) + movq $0,160(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm5 + vmovdqu 72(%rsp),%ymm6 + vmovdqu 104(%rsp),%ymm7 + vmovdqu 136(%rsp),%ymm8 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm5 +{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm6 +{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm7 +{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm8 + +{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm5 +{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm6 +{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm7 +{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm8 + leaq 168(%rsp),%rsp + movq 16(%r11),%r13 + + vpbroadcastq 16(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -168(%rsp),%rsp +{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm5 +{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm6 +{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm7 +{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm8 + +{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm5 +{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm6 +{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm7 +{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm8 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm5,32(%rsp) + vmovdqu %ymm6,64(%rsp) + vmovdqu %ymm7,96(%rsp) + vmovdqu %ymm8,128(%rsp) + movq $0,160(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm5 + vmovdqu 72(%rsp),%ymm6 + vmovdqu 104(%rsp),%ymm7 + vmovdqu 136(%rsp),%ymm8 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm5 +{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm6 +{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm7 +{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm8 + +{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm5 +{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm6 +{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm7 +{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm8 + leaq 168(%rsp),%rsp + movq 24(%r11),%r13 + + vpbroadcastq 24(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -168(%rsp),%rsp +{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm5 +{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm6 +{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm7 +{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm8 + +{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm5 +{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm6 +{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm7 +{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm8 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm5,32(%rsp) + vmovdqu %ymm6,64(%rsp) + vmovdqu %ymm7,96(%rsp) + vmovdqu %ymm8,128(%rsp) + movq $0,160(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm5 + vmovdqu 72(%rsp),%ymm6 + vmovdqu 104(%rsp),%ymm7 + vmovdqu 136(%rsp),%ymm8 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm5 +{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm6 +{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm7 +{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm8 + +{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm5 +{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm6 +{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm7 +{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm8 + leaq 168(%rsp),%rsp + leaq 32(%r11),%r11 + decl %ebx + jne .Lloop5 + + vmovq %r9,%xmm0 + vpbroadcastq %xmm0,%ymm0 + vpblendd $3,%ymm0,%ymm3,%ymm3 + + + + vpsrlq $52,%ymm3,%ymm0 + vpsrlq $52,%ymm5,%ymm1 + vpsrlq $52,%ymm6,%ymm2 + vpsrlq $52,%ymm7,%ymm13 + vpsrlq $52,%ymm8,%ymm14 + + + vpermq $144,%ymm14,%ymm14 + vpermq $3,%ymm13,%ymm15 + vblendpd $1,%ymm15,%ymm14,%ymm14 + + vpermq $144,%ymm13,%ymm13 + vpermq $3,%ymm2,%ymm15 + vblendpd $1,%ymm15,%ymm13,%ymm13 + + vpermq $144,%ymm2,%ymm2 + vpermq $3,%ymm1,%ymm15 + vblendpd $1,%ymm15,%ymm2,%ymm2 + + vpermq $144,%ymm1,%ymm1 + vpermq $3,%ymm0,%ymm15 + vblendpd $1,%ymm15,%ymm1,%ymm1 + + vpermq $144,%ymm0,%ymm0 + vpand .Lhigh64x3(%rip),%ymm0,%ymm0 + + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + + + vpaddq %ymm0,%ymm3,%ymm3 + vpaddq %ymm1,%ymm5,%ymm5 + vpaddq %ymm2,%ymm6,%ymm6 + vpaddq %ymm13,%ymm7,%ymm7 + vpaddq %ymm14,%ymm8,%ymm8 + + + + vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm0 + vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm1 + vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm2 + vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm13 + vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm14 + vmovmskpd %ymm0,%r14d + vmovmskpd %ymm1,%r13d + vmovmskpd %ymm2,%r12d + vmovmskpd %ymm13,%r11d + vmovmskpd %ymm14,%r10d + + + vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm0 + vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm1 + vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm2 + vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm13 + vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm14 + vmovmskpd %ymm0,%r9d + vmovmskpd %ymm1,%r8d + vmovmskpd %ymm2,%ebx + vmovmskpd %ymm13,%ecx + vmovmskpd %ymm14,%edx + + + + shlb $4,%r13b + orb %r13b,%r14b + shlb $4,%r11b + orb %r11b,%r12b + + addb %r14b,%r14b + adcb %r12b,%r12b + adcb %r10b,%r10b + + shlb $4,%r8b + orb %r8b,%r9b + shlb $4,%cl + orb %cl,%bl + + addb %r9b,%r14b + adcb %bl,%r12b + adcb %dl,%r10b + + xorb %r9b,%r14b + xorb %bl,%r12b + xorb %dl,%r10b + + leaq .Lkmasklut(%rip),%rdx + + movb %r14b,%r13b + andq $0xf,%r14 + vpsubq .Lmask52x4(%rip),%ymm3,%ymm0 + shlq $5,%r14 + vmovapd (%rdx,%r14,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm3,%ymm3 + + shrb $4,%r13b + andq $0xf,%r13 + vpsubq .Lmask52x4(%rip),%ymm5,%ymm0 + shlq $5,%r13 + vmovapd (%rdx,%r13,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm5,%ymm5 + + movb %r12b,%r11b + andq $0xf,%r12 + vpsubq .Lmask52x4(%rip),%ymm6,%ymm0 + shlq $5,%r12 + vmovapd (%rdx,%r12,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm6,%ymm6 + + shrb $4,%r11b + andq $0xf,%r11 + vpsubq .Lmask52x4(%rip),%ymm7,%ymm0 + shlq $5,%r11 + vmovapd (%rdx,%r11,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm7,%ymm7 + + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm8,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm8,%ymm8 + + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + + vmovdqu %ymm3,0(%rdi) + vmovdqu %ymm5,32(%rdi) + vmovdqu %ymm6,64(%rdi) + vmovdqu %ymm7,96(%rdi) + vmovdqu %ymm8,128(%rdi) + + vzeroupper + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbp +.cfi_restore %rbp + movq 40(%rsp),%rbx +.cfi_restore %rbx + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lossl_rsaz_amm52x20_x1_avxifma256_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_rsaz_amm52x20_x1_avxifma256, .-ossl_rsaz_amm52x20_x1_avxifma256 +.section .rodata +.align 32 +.Lmask52x4: +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.Lhigh64x3: +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.Lkmasklut: + +.quad 0x0 +.quad 0x0 +.quad 0x0 +.quad 0x0 + +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0x0 +.quad 0x0 + +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0x0 + +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0x0 + +.quad 0x0 +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0x0 + +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0x0 + +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0x0 + +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0x0 + +.quad 0x0 +.quad 0x0 +.quad 0x0 +.quad 0xffffffffffffffff + +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0x0 +.quad 0xffffffffffffffff + +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0xffffffffffffffff + +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0xffffffffffffffff + +.quad 0x0 +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff + +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff + +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff + +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.text + +.globl ossl_rsaz_amm52x20_x2_avxifma256 +.type ossl_rsaz_amm52x20_x2_avxifma256,@function +.align 32 +ossl_rsaz_amm52x20_x2_avxifma256: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lossl_rsaz_amm52x20_x2_avxifma256_body: + + + vpxor %ymm0,%ymm0,%ymm0 + vmovapd %ymm0,%ymm3 + vmovapd %ymm0,%ymm5 + vmovapd %ymm0,%ymm6 + vmovapd %ymm0,%ymm7 + vmovapd %ymm0,%ymm8 + vmovapd %ymm0,%ymm4 + vmovapd %ymm0,%ymm9 + vmovapd %ymm0,%ymm10 + vmovapd %ymm0,%ymm11 + vmovapd %ymm0,%ymm12 + + xorl %r9d,%r9d + xorl %r15d,%r15d + + movq %rdx,%r11 + movq $0xfffffffffffff,%rax + + movl $20,%ebx + +.align 32 +.Lloop20: + movq 0(%r11),%r13 + + vpbroadcastq 0(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq (%r8),%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -168(%rsp),%rsp +{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm5 +{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm6 +{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm7 +{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm8 + +{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm5 +{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm6 +{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm7 +{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm8 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm5,32(%rsp) + vmovdqu %ymm6,64(%rsp) + vmovdqu %ymm7,96(%rsp) + vmovdqu %ymm8,128(%rsp) + movq $0,160(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm5 + vmovdqu 72(%rsp),%ymm6 + vmovdqu 104(%rsp),%ymm7 + vmovdqu 136(%rsp),%ymm8 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm5 +{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm6 +{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm7 +{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm8 + +{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm5 +{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm6 +{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm7 +{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm8 + leaq 168(%rsp),%rsp + movq 160(%r11),%r13 + + vpbroadcastq 160(%r11),%ymm1 + movq 160(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r15 + movq %r12,%r10 + adcq $0,%r10 + + movq 8(%r8),%r13 + imulq %r15,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 160(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r15 + adcq %r12,%r10 + + shrq $52,%r15 + salq $12,%r10 + orq %r10,%r15 + + leaq -168(%rsp),%rsp +{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm4 +{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10 +{vex} vpmadd52luq 256(%rsi),%ymm1,%ymm11 +{vex} vpmadd52luq 288(%rsi),%ymm1,%ymm12 + +{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm4 +{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10 +{vex} vpmadd52luq 256(%rcx),%ymm2,%ymm11 +{vex} vpmadd52luq 288(%rcx),%ymm2,%ymm12 + + + vmovdqu %ymm4,0(%rsp) + vmovdqu %ymm9,32(%rsp) + vmovdqu %ymm10,64(%rsp) + vmovdqu %ymm11,96(%rsp) + vmovdqu %ymm12,128(%rsp) + movq $0,160(%rsp) + + vmovdqu 8(%rsp),%ymm4 + vmovdqu 40(%rsp),%ymm9 + vmovdqu 72(%rsp),%ymm10 + vmovdqu 104(%rsp),%ymm11 + vmovdqu 136(%rsp),%ymm12 + + addq 8(%rsp),%r15 + +{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm4 +{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10 +{vex} vpmadd52huq 256(%rsi),%ymm1,%ymm11 +{vex} vpmadd52huq 288(%rsi),%ymm1,%ymm12 + +{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm4 +{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10 +{vex} vpmadd52huq 256(%rcx),%ymm2,%ymm11 +{vex} vpmadd52huq 288(%rcx),%ymm2,%ymm12 + leaq 168(%rsp),%rsp + leaq 8(%r11),%r11 + decl %ebx + jne .Lloop20 + + vmovq %r9,%xmm0 + vpbroadcastq %xmm0,%ymm0 + vpblendd $3,%ymm0,%ymm3,%ymm3 + + + + vpsrlq $52,%ymm3,%ymm0 + vpsrlq $52,%ymm5,%ymm1 + vpsrlq $52,%ymm6,%ymm2 + vpsrlq $52,%ymm7,%ymm13 + vpsrlq $52,%ymm8,%ymm14 + + + vpermq $144,%ymm14,%ymm14 + vpermq $3,%ymm13,%ymm15 + vblendpd $1,%ymm15,%ymm14,%ymm14 + + vpermq $144,%ymm13,%ymm13 + vpermq $3,%ymm2,%ymm15 + vblendpd $1,%ymm15,%ymm13,%ymm13 + + vpermq $144,%ymm2,%ymm2 + vpermq $3,%ymm1,%ymm15 + vblendpd $1,%ymm15,%ymm2,%ymm2 + + vpermq $144,%ymm1,%ymm1 + vpermq $3,%ymm0,%ymm15 + vblendpd $1,%ymm15,%ymm1,%ymm1 + + vpermq $144,%ymm0,%ymm0 + vpand .Lhigh64x3(%rip),%ymm0,%ymm0 + + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + + + vpaddq %ymm0,%ymm3,%ymm3 + vpaddq %ymm1,%ymm5,%ymm5 + vpaddq %ymm2,%ymm6,%ymm6 + vpaddq %ymm13,%ymm7,%ymm7 + vpaddq %ymm14,%ymm8,%ymm8 + + + + vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm0 + vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm1 + vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm2 + vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm13 + vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm14 + vmovmskpd %ymm0,%r14d + vmovmskpd %ymm1,%r13d + vmovmskpd %ymm2,%r12d + vmovmskpd %ymm13,%r11d + vmovmskpd %ymm14,%r10d + + + vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm0 + vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm1 + vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm2 + vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm13 + vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm14 + vmovmskpd %ymm0,%r9d + vmovmskpd %ymm1,%r8d + vmovmskpd %ymm2,%ebx + vmovmskpd %ymm13,%ecx + vmovmskpd %ymm14,%edx + + + + shlb $4,%r13b + orb %r13b,%r14b + shlb $4,%r11b + orb %r11b,%r12b + + addb %r14b,%r14b + adcb %r12b,%r12b + adcb %r10b,%r10b + + shlb $4,%r8b + orb %r8b,%r9b + shlb $4,%cl + orb %cl,%bl + + addb %r9b,%r14b + adcb %bl,%r12b + adcb %dl,%r10b + + xorb %r9b,%r14b + xorb %bl,%r12b + xorb %dl,%r10b + + leaq .Lkmasklut(%rip),%rdx + + movb %r14b,%r13b + andq $0xf,%r14 + vpsubq .Lmask52x4(%rip),%ymm3,%ymm0 + shlq $5,%r14 + vmovapd (%rdx,%r14,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm3,%ymm3 + + shrb $4,%r13b + andq $0xf,%r13 + vpsubq .Lmask52x4(%rip),%ymm5,%ymm0 + shlq $5,%r13 + vmovapd (%rdx,%r13,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm5,%ymm5 + + movb %r12b,%r11b + andq $0xf,%r12 + vpsubq .Lmask52x4(%rip),%ymm6,%ymm0 + shlq $5,%r12 + vmovapd (%rdx,%r12,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm6,%ymm6 + + shrb $4,%r11b + andq $0xf,%r11 + vpsubq .Lmask52x4(%rip),%ymm7,%ymm0 + shlq $5,%r11 + vmovapd (%rdx,%r11,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm7,%ymm7 + + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm8,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm8,%ymm8 + + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + + vmovq %r15,%xmm0 + vpbroadcastq %xmm0,%ymm0 + vpblendd $3,%ymm0,%ymm4,%ymm4 + + + + vpsrlq $52,%ymm4,%ymm0 + vpsrlq $52,%ymm9,%ymm1 + vpsrlq $52,%ymm10,%ymm2 + vpsrlq $52,%ymm11,%ymm13 + vpsrlq $52,%ymm12,%ymm14 + + + vpermq $144,%ymm14,%ymm14 + vpermq $3,%ymm13,%ymm15 + vblendpd $1,%ymm15,%ymm14,%ymm14 + + vpermq $144,%ymm13,%ymm13 + vpermq $3,%ymm2,%ymm15 + vblendpd $1,%ymm15,%ymm13,%ymm13 + + vpermq $144,%ymm2,%ymm2 + vpermq $3,%ymm1,%ymm15 + vblendpd $1,%ymm15,%ymm2,%ymm2 + + vpermq $144,%ymm1,%ymm1 + vpermq $3,%ymm0,%ymm15 + vblendpd $1,%ymm15,%ymm1,%ymm1 + + vpermq $144,%ymm0,%ymm0 + vpand .Lhigh64x3(%rip),%ymm0,%ymm0 + + + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + vpand .Lmask52x4(%rip),%ymm11,%ymm11 + vpand .Lmask52x4(%rip),%ymm12,%ymm12 + + + vpaddq %ymm0,%ymm4,%ymm4 + vpaddq %ymm1,%ymm9,%ymm9 + vpaddq %ymm2,%ymm10,%ymm10 + vpaddq %ymm13,%ymm11,%ymm11 + vpaddq %ymm14,%ymm12,%ymm12 + + + + vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm0 + vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm1 + vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm2 + vpcmpgtq .Lmask52x4(%rip),%ymm11,%ymm13 + vpcmpgtq .Lmask52x4(%rip),%ymm12,%ymm14 + vmovmskpd %ymm0,%r14d + vmovmskpd %ymm1,%r13d + vmovmskpd %ymm2,%r12d + vmovmskpd %ymm13,%r11d + vmovmskpd %ymm14,%r10d + + + vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm0 + vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm1 + vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm2 + vpcmpeqq .Lmask52x4(%rip),%ymm11,%ymm13 + vpcmpeqq .Lmask52x4(%rip),%ymm12,%ymm14 + vmovmskpd %ymm0,%r9d + vmovmskpd %ymm1,%r8d + vmovmskpd %ymm2,%ebx + vmovmskpd %ymm13,%ecx + vmovmskpd %ymm14,%edx + + + + shlb $4,%r13b + orb %r13b,%r14b + shlb $4,%r11b + orb %r11b,%r12b + + addb %r14b,%r14b + adcb %r12b,%r12b + adcb %r10b,%r10b + + shlb $4,%r8b + orb %r8b,%r9b + shlb $4,%cl + orb %cl,%bl + + addb %r9b,%r14b + adcb %bl,%r12b + adcb %dl,%r10b + + xorb %r9b,%r14b + xorb %bl,%r12b + xorb %dl,%r10b + + leaq .Lkmasklut(%rip),%rdx + + movb %r14b,%r13b + andq $0xf,%r14 + vpsubq .Lmask52x4(%rip),%ymm4,%ymm0 + shlq $5,%r14 + vmovapd (%rdx,%r14,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm4,%ymm4 + + shrb $4,%r13b + andq $0xf,%r13 + vpsubq .Lmask52x4(%rip),%ymm9,%ymm0 + shlq $5,%r13 + vmovapd (%rdx,%r13,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm9,%ymm9 + + movb %r12b,%r11b + andq $0xf,%r12 + vpsubq .Lmask52x4(%rip),%ymm10,%ymm0 + shlq $5,%r12 + vmovapd (%rdx,%r12,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm10,%ymm10 + + shrb $4,%r11b + andq $0xf,%r11 + vpsubq .Lmask52x4(%rip),%ymm11,%ymm0 + shlq $5,%r11 + vmovapd (%rdx,%r11,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm11,%ymm11 + + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm12,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm12,%ymm12 + + + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + vpand .Lmask52x4(%rip),%ymm11,%ymm11 + vpand .Lmask52x4(%rip),%ymm12,%ymm12 + + vmovdqu %ymm3,0(%rdi) + vmovdqu %ymm5,32(%rdi) + vmovdqu %ymm6,64(%rdi) + vmovdqu %ymm7,96(%rdi) + vmovdqu %ymm8,128(%rdi) + + vmovdqu %ymm4,160(%rdi) + vmovdqu %ymm9,192(%rdi) + vmovdqu %ymm10,224(%rdi) + vmovdqu %ymm11,256(%rdi) + vmovdqu %ymm12,288(%rdi) + + vzeroupper + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbp +.cfi_restore %rbp + movq 40(%rsp),%rbx +.cfi_restore %rbx + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lossl_rsaz_amm52x20_x2_avxifma256_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_rsaz_amm52x20_x2_avxifma256, .-ossl_rsaz_amm52x20_x2_avxifma256 +.text + +.align 32 +.globl ossl_extract_multiplier_2x20_win5_avx +.type ossl_extract_multiplier_2x20_win5_avx,@function +ossl_extract_multiplier_2x20_win5_avx: +.cfi_startproc +.byte 243,15,30,250 + vmovapd .Lones(%rip),%ymm14 + vmovq %rdx,%xmm10 + vpbroadcastq %xmm10,%ymm12 + vmovq %rcx,%xmm10 + vpbroadcastq %xmm10,%ymm13 + leaq 10240(%rsi),%rax + + + vpxor %xmm0,%xmm0,%xmm0 + vmovapd %ymm0,%ymm11 + vmovapd %ymm0,%ymm1 + vmovapd %ymm0,%ymm2 + vmovapd %ymm0,%ymm3 + vmovapd %ymm0,%ymm4 + vmovapd %ymm0,%ymm5 + vmovapd %ymm0,%ymm6 + vmovapd %ymm0,%ymm7 + vmovapd %ymm0,%ymm8 + vmovapd %ymm0,%ymm9 + +.align 32 +.Lloop: + vpcmpeqq %ymm11,%ymm12,%ymm15 + vmovdqu 0(%rsi),%ymm10 + vblendvpd %ymm15,%ymm10,%ymm0,%ymm0 + vmovdqu 32(%rsi),%ymm10 + vblendvpd %ymm15,%ymm10,%ymm1,%ymm1 + vmovdqu 64(%rsi),%ymm10 + vblendvpd %ymm15,%ymm10,%ymm2,%ymm2 + vmovdqu 96(%rsi),%ymm10 + vblendvpd %ymm15,%ymm10,%ymm3,%ymm3 + vmovdqu 128(%rsi),%ymm10 + vblendvpd %ymm15,%ymm10,%ymm4,%ymm4 + vpcmpeqq %ymm11,%ymm13,%ymm15 + vmovdqu 160(%rsi),%ymm10 + vblendvpd %ymm15,%ymm10,%ymm5,%ymm5 + vmovdqu 192(%rsi),%ymm10 + vblendvpd %ymm15,%ymm10,%ymm6,%ymm6 + vmovdqu 224(%rsi),%ymm10 + vblendvpd %ymm15,%ymm10,%ymm7,%ymm7 + vmovdqu 256(%rsi),%ymm10 + vblendvpd %ymm15,%ymm10,%ymm8,%ymm8 + vmovdqu 288(%rsi),%ymm10 + vblendvpd %ymm15,%ymm10,%ymm9,%ymm9 + vpaddq %ymm14,%ymm11,%ymm11 + addq $320,%rsi + cmpq %rsi,%rax + jne .Lloop + vmovdqu %ymm0,0(%rdi) + vmovdqu %ymm1,32(%rdi) + vmovdqu %ymm2,64(%rdi) + vmovdqu %ymm3,96(%rdi) + vmovdqu %ymm4,128(%rdi) + vmovdqu %ymm5,160(%rdi) + vmovdqu %ymm6,192(%rdi) + vmovdqu %ymm7,224(%rdi) + vmovdqu %ymm8,256(%rdi) + vmovdqu %ymm9,288(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_extract_multiplier_2x20_win5_avx, .-ossl_extract_multiplier_2x20_win5_avx +.section .rodata +.align 32 +.Lones: +.quad 1,1,1,1 +.Lzeros: +.quad 0,0,0,0 diff --git a/contrib/openssl-cmake/asm/crypto/bn/rsaz-3k-avx512.s b/contrib/openssl-cmake/asm/crypto/bn/rsaz-3k-avx512.s new file mode 100644 index 000000000000..0a389896a031 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/bn/rsaz-3k-avx512.s @@ -0,0 +1,1310 @@ +.text + +.globl ossl_rsaz_amm52x30_x1_ifma256 +.type ossl_rsaz_amm52x30_x1_ifma256,@function +.align 32 +ossl_rsaz_amm52x30_x1_ifma256: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + vpxord %ymm0,%ymm0,%ymm0 + vmovdqa64 %ymm0,%ymm3 + vmovdqa64 %ymm0,%ymm4 + vmovdqa64 %ymm0,%ymm5 + vmovdqa64 %ymm0,%ymm6 + vmovdqa64 %ymm0,%ymm7 + vmovdqa64 %ymm0,%ymm8 + vmovdqa64 %ymm0,%ymm9 + vmovdqa64 %ymm0,%ymm10 + + xorl %r9d,%r9d + + movq %rdx,%r11 + movq $0xfffffffffffff,%rax + + + movl $7,%ebx + +.align 32 +.Lloop7: + movq 0(%r11),%r13 + + vpbroadcastq %r13,%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vpbroadcastq %r13,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + vpmadd52luq 0(%rsi),%ymm1,%ymm3 + vpmadd52luq 32(%rsi),%ymm1,%ymm4 + vpmadd52luq 64(%rsi),%ymm1,%ymm5 + vpmadd52luq 96(%rsi),%ymm1,%ymm6 + vpmadd52luq 128(%rsi),%ymm1,%ymm7 + vpmadd52luq 160(%rsi),%ymm1,%ymm8 + vpmadd52luq 192(%rsi),%ymm1,%ymm9 + vpmadd52luq 224(%rsi),%ymm1,%ymm10 + + vpmadd52luq 0(%rcx),%ymm2,%ymm3 + vpmadd52luq 32(%rcx),%ymm2,%ymm4 + vpmadd52luq 64(%rcx),%ymm2,%ymm5 + vpmadd52luq 96(%rcx),%ymm2,%ymm6 + vpmadd52luq 128(%rcx),%ymm2,%ymm7 + vpmadd52luq 160(%rcx),%ymm2,%ymm8 + vpmadd52luq 192(%rcx),%ymm2,%ymm9 + vpmadd52luq 224(%rcx),%ymm2,%ymm10 + + + valignq $1,%ymm3,%ymm4,%ymm3 + valignq $1,%ymm4,%ymm5,%ymm4 + valignq $1,%ymm5,%ymm6,%ymm5 + valignq $1,%ymm6,%ymm7,%ymm6 + valignq $1,%ymm7,%ymm8,%ymm7 + valignq $1,%ymm8,%ymm9,%ymm8 + valignq $1,%ymm9,%ymm10,%ymm9 + valignq $1,%ymm10,%ymm0,%ymm10 + + vmovq %xmm3,%r13 + addq %r13,%r9 + + vpmadd52huq 0(%rsi),%ymm1,%ymm3 + vpmadd52huq 32(%rsi),%ymm1,%ymm4 + vpmadd52huq 64(%rsi),%ymm1,%ymm5 + vpmadd52huq 96(%rsi),%ymm1,%ymm6 + vpmadd52huq 128(%rsi),%ymm1,%ymm7 + vpmadd52huq 160(%rsi),%ymm1,%ymm8 + vpmadd52huq 192(%rsi),%ymm1,%ymm9 + vpmadd52huq 224(%rsi),%ymm1,%ymm10 + + vpmadd52huq 0(%rcx),%ymm2,%ymm3 + vpmadd52huq 32(%rcx),%ymm2,%ymm4 + vpmadd52huq 64(%rcx),%ymm2,%ymm5 + vpmadd52huq 96(%rcx),%ymm2,%ymm6 + vpmadd52huq 128(%rcx),%ymm2,%ymm7 + vpmadd52huq 160(%rcx),%ymm2,%ymm8 + vpmadd52huq 192(%rcx),%ymm2,%ymm9 + vpmadd52huq 224(%rcx),%ymm2,%ymm10 + movq 8(%r11),%r13 + + vpbroadcastq %r13,%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vpbroadcastq %r13,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + vpmadd52luq 0(%rsi),%ymm1,%ymm3 + vpmadd52luq 32(%rsi),%ymm1,%ymm4 + vpmadd52luq 64(%rsi),%ymm1,%ymm5 + vpmadd52luq 96(%rsi),%ymm1,%ymm6 + vpmadd52luq 128(%rsi),%ymm1,%ymm7 + vpmadd52luq 160(%rsi),%ymm1,%ymm8 + vpmadd52luq 192(%rsi),%ymm1,%ymm9 + vpmadd52luq 224(%rsi),%ymm1,%ymm10 + + vpmadd52luq 0(%rcx),%ymm2,%ymm3 + vpmadd52luq 32(%rcx),%ymm2,%ymm4 + vpmadd52luq 64(%rcx),%ymm2,%ymm5 + vpmadd52luq 96(%rcx),%ymm2,%ymm6 + vpmadd52luq 128(%rcx),%ymm2,%ymm7 + vpmadd52luq 160(%rcx),%ymm2,%ymm8 + vpmadd52luq 192(%rcx),%ymm2,%ymm9 + vpmadd52luq 224(%rcx),%ymm2,%ymm10 + + + valignq $1,%ymm3,%ymm4,%ymm3 + valignq $1,%ymm4,%ymm5,%ymm4 + valignq $1,%ymm5,%ymm6,%ymm5 + valignq $1,%ymm6,%ymm7,%ymm6 + valignq $1,%ymm7,%ymm8,%ymm7 + valignq $1,%ymm8,%ymm9,%ymm8 + valignq $1,%ymm9,%ymm10,%ymm9 + valignq $1,%ymm10,%ymm0,%ymm10 + + vmovq %xmm3,%r13 + addq %r13,%r9 + + vpmadd52huq 0(%rsi),%ymm1,%ymm3 + vpmadd52huq 32(%rsi),%ymm1,%ymm4 + vpmadd52huq 64(%rsi),%ymm1,%ymm5 + vpmadd52huq 96(%rsi),%ymm1,%ymm6 + vpmadd52huq 128(%rsi),%ymm1,%ymm7 + vpmadd52huq 160(%rsi),%ymm1,%ymm8 + vpmadd52huq 192(%rsi),%ymm1,%ymm9 + vpmadd52huq 224(%rsi),%ymm1,%ymm10 + + vpmadd52huq 0(%rcx),%ymm2,%ymm3 + vpmadd52huq 32(%rcx),%ymm2,%ymm4 + vpmadd52huq 64(%rcx),%ymm2,%ymm5 + vpmadd52huq 96(%rcx),%ymm2,%ymm6 + vpmadd52huq 128(%rcx),%ymm2,%ymm7 + vpmadd52huq 160(%rcx),%ymm2,%ymm8 + vpmadd52huq 192(%rcx),%ymm2,%ymm9 + vpmadd52huq 224(%rcx),%ymm2,%ymm10 + movq 16(%r11),%r13 + + vpbroadcastq %r13,%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vpbroadcastq %r13,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + vpmadd52luq 0(%rsi),%ymm1,%ymm3 + vpmadd52luq 32(%rsi),%ymm1,%ymm4 + vpmadd52luq 64(%rsi),%ymm1,%ymm5 + vpmadd52luq 96(%rsi),%ymm1,%ymm6 + vpmadd52luq 128(%rsi),%ymm1,%ymm7 + vpmadd52luq 160(%rsi),%ymm1,%ymm8 + vpmadd52luq 192(%rsi),%ymm1,%ymm9 + vpmadd52luq 224(%rsi),%ymm1,%ymm10 + + vpmadd52luq 0(%rcx),%ymm2,%ymm3 + vpmadd52luq 32(%rcx),%ymm2,%ymm4 + vpmadd52luq 64(%rcx),%ymm2,%ymm5 + vpmadd52luq 96(%rcx),%ymm2,%ymm6 + vpmadd52luq 128(%rcx),%ymm2,%ymm7 + vpmadd52luq 160(%rcx),%ymm2,%ymm8 + vpmadd52luq 192(%rcx),%ymm2,%ymm9 + vpmadd52luq 224(%rcx),%ymm2,%ymm10 + + + valignq $1,%ymm3,%ymm4,%ymm3 + valignq $1,%ymm4,%ymm5,%ymm4 + valignq $1,%ymm5,%ymm6,%ymm5 + valignq $1,%ymm6,%ymm7,%ymm6 + valignq $1,%ymm7,%ymm8,%ymm7 + valignq $1,%ymm8,%ymm9,%ymm8 + valignq $1,%ymm9,%ymm10,%ymm9 + valignq $1,%ymm10,%ymm0,%ymm10 + + vmovq %xmm3,%r13 + addq %r13,%r9 + + vpmadd52huq 0(%rsi),%ymm1,%ymm3 + vpmadd52huq 32(%rsi),%ymm1,%ymm4 + vpmadd52huq 64(%rsi),%ymm1,%ymm5 + vpmadd52huq 96(%rsi),%ymm1,%ymm6 + vpmadd52huq 128(%rsi),%ymm1,%ymm7 + vpmadd52huq 160(%rsi),%ymm1,%ymm8 + vpmadd52huq 192(%rsi),%ymm1,%ymm9 + vpmadd52huq 224(%rsi),%ymm1,%ymm10 + + vpmadd52huq 0(%rcx),%ymm2,%ymm3 + vpmadd52huq 32(%rcx),%ymm2,%ymm4 + vpmadd52huq 64(%rcx),%ymm2,%ymm5 + vpmadd52huq 96(%rcx),%ymm2,%ymm6 + vpmadd52huq 128(%rcx),%ymm2,%ymm7 + vpmadd52huq 160(%rcx),%ymm2,%ymm8 + vpmadd52huq 192(%rcx),%ymm2,%ymm9 + vpmadd52huq 224(%rcx),%ymm2,%ymm10 + movq 24(%r11),%r13 + + vpbroadcastq %r13,%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vpbroadcastq %r13,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + vpmadd52luq 0(%rsi),%ymm1,%ymm3 + vpmadd52luq 32(%rsi),%ymm1,%ymm4 + vpmadd52luq 64(%rsi),%ymm1,%ymm5 + vpmadd52luq 96(%rsi),%ymm1,%ymm6 + vpmadd52luq 128(%rsi),%ymm1,%ymm7 + vpmadd52luq 160(%rsi),%ymm1,%ymm8 + vpmadd52luq 192(%rsi),%ymm1,%ymm9 + vpmadd52luq 224(%rsi),%ymm1,%ymm10 + + vpmadd52luq 0(%rcx),%ymm2,%ymm3 + vpmadd52luq 32(%rcx),%ymm2,%ymm4 + vpmadd52luq 64(%rcx),%ymm2,%ymm5 + vpmadd52luq 96(%rcx),%ymm2,%ymm6 + vpmadd52luq 128(%rcx),%ymm2,%ymm7 + vpmadd52luq 160(%rcx),%ymm2,%ymm8 + vpmadd52luq 192(%rcx),%ymm2,%ymm9 + vpmadd52luq 224(%rcx),%ymm2,%ymm10 + + + valignq $1,%ymm3,%ymm4,%ymm3 + valignq $1,%ymm4,%ymm5,%ymm4 + valignq $1,%ymm5,%ymm6,%ymm5 + valignq $1,%ymm6,%ymm7,%ymm6 + valignq $1,%ymm7,%ymm8,%ymm7 + valignq $1,%ymm8,%ymm9,%ymm8 + valignq $1,%ymm9,%ymm10,%ymm9 + valignq $1,%ymm10,%ymm0,%ymm10 + + vmovq %xmm3,%r13 + addq %r13,%r9 + + vpmadd52huq 0(%rsi),%ymm1,%ymm3 + vpmadd52huq 32(%rsi),%ymm1,%ymm4 + vpmadd52huq 64(%rsi),%ymm1,%ymm5 + vpmadd52huq 96(%rsi),%ymm1,%ymm6 + vpmadd52huq 128(%rsi),%ymm1,%ymm7 + vpmadd52huq 160(%rsi),%ymm1,%ymm8 + vpmadd52huq 192(%rsi),%ymm1,%ymm9 + vpmadd52huq 224(%rsi),%ymm1,%ymm10 + + vpmadd52huq 0(%rcx),%ymm2,%ymm3 + vpmadd52huq 32(%rcx),%ymm2,%ymm4 + vpmadd52huq 64(%rcx),%ymm2,%ymm5 + vpmadd52huq 96(%rcx),%ymm2,%ymm6 + vpmadd52huq 128(%rcx),%ymm2,%ymm7 + vpmadd52huq 160(%rcx),%ymm2,%ymm8 + vpmadd52huq 192(%rcx),%ymm2,%ymm9 + vpmadd52huq 224(%rcx),%ymm2,%ymm10 + leaq 32(%r11),%r11 + decl %ebx + jne .Lloop7 + movq 0(%r11),%r13 + + vpbroadcastq %r13,%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vpbroadcastq %r13,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + vpmadd52luq 0(%rsi),%ymm1,%ymm3 + vpmadd52luq 32(%rsi),%ymm1,%ymm4 + vpmadd52luq 64(%rsi),%ymm1,%ymm5 + vpmadd52luq 96(%rsi),%ymm1,%ymm6 + vpmadd52luq 128(%rsi),%ymm1,%ymm7 + vpmadd52luq 160(%rsi),%ymm1,%ymm8 + vpmadd52luq 192(%rsi),%ymm1,%ymm9 + vpmadd52luq 224(%rsi),%ymm1,%ymm10 + + vpmadd52luq 0(%rcx),%ymm2,%ymm3 + vpmadd52luq 32(%rcx),%ymm2,%ymm4 + vpmadd52luq 64(%rcx),%ymm2,%ymm5 + vpmadd52luq 96(%rcx),%ymm2,%ymm6 + vpmadd52luq 128(%rcx),%ymm2,%ymm7 + vpmadd52luq 160(%rcx),%ymm2,%ymm8 + vpmadd52luq 192(%rcx),%ymm2,%ymm9 + vpmadd52luq 224(%rcx),%ymm2,%ymm10 + + + valignq $1,%ymm3,%ymm4,%ymm3 + valignq $1,%ymm4,%ymm5,%ymm4 + valignq $1,%ymm5,%ymm6,%ymm5 + valignq $1,%ymm6,%ymm7,%ymm6 + valignq $1,%ymm7,%ymm8,%ymm7 + valignq $1,%ymm8,%ymm9,%ymm8 + valignq $1,%ymm9,%ymm10,%ymm9 + valignq $1,%ymm10,%ymm0,%ymm10 + + vmovq %xmm3,%r13 + addq %r13,%r9 + + vpmadd52huq 0(%rsi),%ymm1,%ymm3 + vpmadd52huq 32(%rsi),%ymm1,%ymm4 + vpmadd52huq 64(%rsi),%ymm1,%ymm5 + vpmadd52huq 96(%rsi),%ymm1,%ymm6 + vpmadd52huq 128(%rsi),%ymm1,%ymm7 + vpmadd52huq 160(%rsi),%ymm1,%ymm8 + vpmadd52huq 192(%rsi),%ymm1,%ymm9 + vpmadd52huq 224(%rsi),%ymm1,%ymm10 + + vpmadd52huq 0(%rcx),%ymm2,%ymm3 + vpmadd52huq 32(%rcx),%ymm2,%ymm4 + vpmadd52huq 64(%rcx),%ymm2,%ymm5 + vpmadd52huq 96(%rcx),%ymm2,%ymm6 + vpmadd52huq 128(%rcx),%ymm2,%ymm7 + vpmadd52huq 160(%rcx),%ymm2,%ymm8 + vpmadd52huq 192(%rcx),%ymm2,%ymm9 + vpmadd52huq 224(%rcx),%ymm2,%ymm10 + movq 8(%r11),%r13 + + vpbroadcastq %r13,%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vpbroadcastq %r13,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + vpmadd52luq 0(%rsi),%ymm1,%ymm3 + vpmadd52luq 32(%rsi),%ymm1,%ymm4 + vpmadd52luq 64(%rsi),%ymm1,%ymm5 + vpmadd52luq 96(%rsi),%ymm1,%ymm6 + vpmadd52luq 128(%rsi),%ymm1,%ymm7 + vpmadd52luq 160(%rsi),%ymm1,%ymm8 + vpmadd52luq 192(%rsi),%ymm1,%ymm9 + vpmadd52luq 224(%rsi),%ymm1,%ymm10 + + vpmadd52luq 0(%rcx),%ymm2,%ymm3 + vpmadd52luq 32(%rcx),%ymm2,%ymm4 + vpmadd52luq 64(%rcx),%ymm2,%ymm5 + vpmadd52luq 96(%rcx),%ymm2,%ymm6 + vpmadd52luq 128(%rcx),%ymm2,%ymm7 + vpmadd52luq 160(%rcx),%ymm2,%ymm8 + vpmadd52luq 192(%rcx),%ymm2,%ymm9 + vpmadd52luq 224(%rcx),%ymm2,%ymm10 + + + valignq $1,%ymm3,%ymm4,%ymm3 + valignq $1,%ymm4,%ymm5,%ymm4 + valignq $1,%ymm5,%ymm6,%ymm5 + valignq $1,%ymm6,%ymm7,%ymm6 + valignq $1,%ymm7,%ymm8,%ymm7 + valignq $1,%ymm8,%ymm9,%ymm8 + valignq $1,%ymm9,%ymm10,%ymm9 + valignq $1,%ymm10,%ymm0,%ymm10 + + vmovq %xmm3,%r13 + addq %r13,%r9 + + vpmadd52huq 0(%rsi),%ymm1,%ymm3 + vpmadd52huq 32(%rsi),%ymm1,%ymm4 + vpmadd52huq 64(%rsi),%ymm1,%ymm5 + vpmadd52huq 96(%rsi),%ymm1,%ymm6 + vpmadd52huq 128(%rsi),%ymm1,%ymm7 + vpmadd52huq 160(%rsi),%ymm1,%ymm8 + vpmadd52huq 192(%rsi),%ymm1,%ymm9 + vpmadd52huq 224(%rsi),%ymm1,%ymm10 + + vpmadd52huq 0(%rcx),%ymm2,%ymm3 + vpmadd52huq 32(%rcx),%ymm2,%ymm4 + vpmadd52huq 64(%rcx),%ymm2,%ymm5 + vpmadd52huq 96(%rcx),%ymm2,%ymm6 + vpmadd52huq 128(%rcx),%ymm2,%ymm7 + vpmadd52huq 160(%rcx),%ymm2,%ymm8 + vpmadd52huq 192(%rcx),%ymm2,%ymm9 + vpmadd52huq 224(%rcx),%ymm2,%ymm10 + + vpbroadcastq %r9,%ymm0 + vpblendd $3,%ymm0,%ymm3,%ymm3 + + + + vpsrlq $52,%ymm3,%ymm0 + vpsrlq $52,%ymm4,%ymm1 + vpsrlq $52,%ymm5,%ymm2 + vpsrlq $52,%ymm6,%ymm19 + vpsrlq $52,%ymm7,%ymm20 + vpsrlq $52,%ymm8,%ymm21 + vpsrlq $52,%ymm9,%ymm22 + vpsrlq $52,%ymm10,%ymm23 + + + valignq $3,%ymm22,%ymm23,%ymm23 + valignq $3,%ymm21,%ymm22,%ymm22 + valignq $3,%ymm20,%ymm21,%ymm21 + valignq $3,%ymm19,%ymm20,%ymm20 + valignq $3,%ymm2,%ymm19,%ymm19 + valignq $3,%ymm1,%ymm2,%ymm2 + valignq $3,%ymm0,%ymm1,%ymm1 + valignq $3,.Lzeros(%rip),%ymm0,%ymm0 + + + vpandq .Lmask52x4(%rip),%ymm3,%ymm3 + vpandq .Lmask52x4(%rip),%ymm4,%ymm4 + vpandq .Lmask52x4(%rip),%ymm5,%ymm5 + vpandq .Lmask52x4(%rip),%ymm6,%ymm6 + vpandq .Lmask52x4(%rip),%ymm7,%ymm7 + vpandq .Lmask52x4(%rip),%ymm8,%ymm8 + vpandq .Lmask52x4(%rip),%ymm9,%ymm9 + vpandq .Lmask52x4(%rip),%ymm10,%ymm10 + + + vpaddq %ymm0,%ymm3,%ymm3 + vpaddq %ymm1,%ymm4,%ymm4 + vpaddq %ymm2,%ymm5,%ymm5 + vpaddq %ymm19,%ymm6,%ymm6 + vpaddq %ymm20,%ymm7,%ymm7 + vpaddq %ymm21,%ymm8,%ymm8 + vpaddq %ymm22,%ymm9,%ymm9 + vpaddq %ymm23,%ymm10,%ymm10 + + + + vpcmpuq $6,.Lmask52x4(%rip),%ymm3,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm4,%k2 + kmovb %k1,%r14d + kmovb %k2,%r13d + shlb $4,%r13b + orb %r13b,%r14b + + vpcmpuq $6,.Lmask52x4(%rip),%ymm5,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm6,%k2 + kmovb %k1,%r13d + kmovb %k2,%r12d + shlb $4,%r12b + orb %r12b,%r13b + + vpcmpuq $6,.Lmask52x4(%rip),%ymm7,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm8,%k2 + kmovb %k1,%r12d + kmovb %k2,%r11d + shlb $4,%r11b + orb %r11b,%r12b + + vpcmpuq $6,.Lmask52x4(%rip),%ymm9,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm10,%k2 + kmovb %k1,%r11d + kmovb %k2,%r10d + shlb $4,%r10b + orb %r10b,%r11b + + addb %r14b,%r14b + adcb %r13b,%r13b + adcb %r12b,%r12b + adcb %r11b,%r11b + + + vpcmpuq $0,.Lmask52x4(%rip),%ymm3,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm4,%k2 + kmovb %k1,%r9d + kmovb %k2,%r8d + shlb $4,%r8b + orb %r8b,%r9b + + vpcmpuq $0,.Lmask52x4(%rip),%ymm5,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm6,%k2 + kmovb %k1,%r8d + kmovb %k2,%edx + shlb $4,%dl + orb %dl,%r8b + + vpcmpuq $0,.Lmask52x4(%rip),%ymm7,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm8,%k2 + kmovb %k1,%edx + kmovb %k2,%ecx + shlb $4,%cl + orb %cl,%dl + + vpcmpuq $0,.Lmask52x4(%rip),%ymm9,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm10,%k2 + kmovb %k1,%ecx + kmovb %k2,%ebx + shlb $4,%bl + orb %bl,%cl + + addb %r9b,%r14b + adcb %r8b,%r13b + adcb %dl,%r12b + adcb %cl,%r11b + + xorb %r9b,%r14b + xorb %r8b,%r13b + xorb %dl,%r12b + xorb %cl,%r11b + + kmovb %r14d,%k1 + shrb $4,%r14b + kmovb %r14d,%k2 + kmovb %r13d,%k3 + shrb $4,%r13b + kmovb %r13d,%k4 + kmovb %r12d,%k5 + shrb $4,%r12b + kmovb %r12d,%k6 + kmovb %r11d,%k7 + + vpsubq .Lmask52x4(%rip),%ymm3,%ymm3{%k1} + vpsubq .Lmask52x4(%rip),%ymm4,%ymm4{%k2} + vpsubq .Lmask52x4(%rip),%ymm5,%ymm5{%k3} + vpsubq .Lmask52x4(%rip),%ymm6,%ymm6{%k4} + vpsubq .Lmask52x4(%rip),%ymm7,%ymm7{%k5} + vpsubq .Lmask52x4(%rip),%ymm8,%ymm8{%k6} + vpsubq .Lmask52x4(%rip),%ymm9,%ymm9{%k7} + + vpandq .Lmask52x4(%rip),%ymm3,%ymm3 + vpandq .Lmask52x4(%rip),%ymm4,%ymm4 + vpandq .Lmask52x4(%rip),%ymm5,%ymm5 + vpandq .Lmask52x4(%rip),%ymm6,%ymm6 + vpandq .Lmask52x4(%rip),%ymm7,%ymm7 + vpandq .Lmask52x4(%rip),%ymm8,%ymm8 + vpandq .Lmask52x4(%rip),%ymm9,%ymm9 + + shrb $4,%r11b + kmovb %r11d,%k1 + + vpsubq .Lmask52x4(%rip),%ymm10,%ymm10{%k1} + + vpandq .Lmask52x4(%rip),%ymm10,%ymm10 + + vmovdqu64 %ymm3,0(%rdi) + vmovdqu64 %ymm4,32(%rdi) + vmovdqu64 %ymm5,64(%rdi) + vmovdqu64 %ymm6,96(%rdi) + vmovdqu64 %ymm7,128(%rdi) + vmovdqu64 %ymm8,160(%rdi) + vmovdqu64 %ymm9,192(%rdi) + vmovdqu64 %ymm10,224(%rdi) + + vzeroupper + leaq (%rsp),%rax +.cfi_def_cfa_register %rax + movq 0(%rax),%r15 +.cfi_restore %r15 + movq 8(%rax),%r14 +.cfi_restore %r14 + movq 16(%rax),%r13 +.cfi_restore %r13 + movq 24(%rax),%r12 +.cfi_restore %r12 + movq 32(%rax),%rbp +.cfi_restore %rbp + movq 40(%rax),%rbx +.cfi_restore %rbx + leaq 48(%rax),%rsp +.cfi_def_cfa %rsp,8 +.Lossl_rsaz_amm52x30_x1_ifma256_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_rsaz_amm52x30_x1_ifma256, .-ossl_rsaz_amm52x30_x1_ifma256 +.section .rodata +.align 32 +.Lmask52x4: +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.text + +.globl ossl_rsaz_amm52x30_x2_ifma256 +.type ossl_rsaz_amm52x30_x2_ifma256,@function +.align 32 +ossl_rsaz_amm52x30_x2_ifma256: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + vpxord %ymm0,%ymm0,%ymm0 + vmovdqa64 %ymm0,%ymm3 + vmovdqa64 %ymm0,%ymm4 + vmovdqa64 %ymm0,%ymm5 + vmovdqa64 %ymm0,%ymm6 + vmovdqa64 %ymm0,%ymm7 + vmovdqa64 %ymm0,%ymm8 + vmovdqa64 %ymm0,%ymm9 + vmovdqa64 %ymm0,%ymm10 + + vmovdqa64 %ymm0,%ymm11 + vmovdqa64 %ymm0,%ymm12 + vmovdqa64 %ymm0,%ymm13 + vmovdqa64 %ymm0,%ymm14 + vmovdqa64 %ymm0,%ymm15 + vmovdqa64 %ymm0,%ymm16 + vmovdqa64 %ymm0,%ymm17 + vmovdqa64 %ymm0,%ymm18 + + + xorl %r9d,%r9d + xorl %r15d,%r15d + + movq %rdx,%r11 + movq $0xfffffffffffff,%rax + + movl $30,%ebx + +.align 32 +.Lloop30: + movq 0(%r11),%r13 + + vpbroadcastq %r13,%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq (%r8),%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vpbroadcastq %r13,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + vpmadd52luq 0(%rsi),%ymm1,%ymm3 + vpmadd52luq 32(%rsi),%ymm1,%ymm4 + vpmadd52luq 64(%rsi),%ymm1,%ymm5 + vpmadd52luq 96(%rsi),%ymm1,%ymm6 + vpmadd52luq 128(%rsi),%ymm1,%ymm7 + vpmadd52luq 160(%rsi),%ymm1,%ymm8 + vpmadd52luq 192(%rsi),%ymm1,%ymm9 + vpmadd52luq 224(%rsi),%ymm1,%ymm10 + + vpmadd52luq 0(%rcx),%ymm2,%ymm3 + vpmadd52luq 32(%rcx),%ymm2,%ymm4 + vpmadd52luq 64(%rcx),%ymm2,%ymm5 + vpmadd52luq 96(%rcx),%ymm2,%ymm6 + vpmadd52luq 128(%rcx),%ymm2,%ymm7 + vpmadd52luq 160(%rcx),%ymm2,%ymm8 + vpmadd52luq 192(%rcx),%ymm2,%ymm9 + vpmadd52luq 224(%rcx),%ymm2,%ymm10 + + + valignq $1,%ymm3,%ymm4,%ymm3 + valignq $1,%ymm4,%ymm5,%ymm4 + valignq $1,%ymm5,%ymm6,%ymm5 + valignq $1,%ymm6,%ymm7,%ymm6 + valignq $1,%ymm7,%ymm8,%ymm7 + valignq $1,%ymm8,%ymm9,%ymm8 + valignq $1,%ymm9,%ymm10,%ymm9 + valignq $1,%ymm10,%ymm0,%ymm10 + + vmovq %xmm3,%r13 + addq %r13,%r9 + + vpmadd52huq 0(%rsi),%ymm1,%ymm3 + vpmadd52huq 32(%rsi),%ymm1,%ymm4 + vpmadd52huq 64(%rsi),%ymm1,%ymm5 + vpmadd52huq 96(%rsi),%ymm1,%ymm6 + vpmadd52huq 128(%rsi),%ymm1,%ymm7 + vpmadd52huq 160(%rsi),%ymm1,%ymm8 + vpmadd52huq 192(%rsi),%ymm1,%ymm9 + vpmadd52huq 224(%rsi),%ymm1,%ymm10 + + vpmadd52huq 0(%rcx),%ymm2,%ymm3 + vpmadd52huq 32(%rcx),%ymm2,%ymm4 + vpmadd52huq 64(%rcx),%ymm2,%ymm5 + vpmadd52huq 96(%rcx),%ymm2,%ymm6 + vpmadd52huq 128(%rcx),%ymm2,%ymm7 + vpmadd52huq 160(%rcx),%ymm2,%ymm8 + vpmadd52huq 192(%rcx),%ymm2,%ymm9 + vpmadd52huq 224(%rcx),%ymm2,%ymm10 + movq 256(%r11),%r13 + + vpbroadcastq %r13,%ymm1 + movq 256(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r15 + movq %r12,%r10 + adcq $0,%r10 + + movq 8(%r8),%r13 + imulq %r15,%r13 + andq %rax,%r13 + + vpbroadcastq %r13,%ymm2 + movq 256(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r15 + adcq %r12,%r10 + + shrq $52,%r15 + salq $12,%r10 + orq %r10,%r15 + + vpmadd52luq 256(%rsi),%ymm1,%ymm11 + vpmadd52luq 288(%rsi),%ymm1,%ymm12 + vpmadd52luq 320(%rsi),%ymm1,%ymm13 + vpmadd52luq 352(%rsi),%ymm1,%ymm14 + vpmadd52luq 384(%rsi),%ymm1,%ymm15 + vpmadd52luq 416(%rsi),%ymm1,%ymm16 + vpmadd52luq 448(%rsi),%ymm1,%ymm17 + vpmadd52luq 480(%rsi),%ymm1,%ymm18 + + vpmadd52luq 256(%rcx),%ymm2,%ymm11 + vpmadd52luq 288(%rcx),%ymm2,%ymm12 + vpmadd52luq 320(%rcx),%ymm2,%ymm13 + vpmadd52luq 352(%rcx),%ymm2,%ymm14 + vpmadd52luq 384(%rcx),%ymm2,%ymm15 + vpmadd52luq 416(%rcx),%ymm2,%ymm16 + vpmadd52luq 448(%rcx),%ymm2,%ymm17 + vpmadd52luq 480(%rcx),%ymm2,%ymm18 + + + valignq $1,%ymm11,%ymm12,%ymm11 + valignq $1,%ymm12,%ymm13,%ymm12 + valignq $1,%ymm13,%ymm14,%ymm13 + valignq $1,%ymm14,%ymm15,%ymm14 + valignq $1,%ymm15,%ymm16,%ymm15 + valignq $1,%ymm16,%ymm17,%ymm16 + valignq $1,%ymm17,%ymm18,%ymm17 + valignq $1,%ymm18,%ymm0,%ymm18 + + vmovq %xmm11,%r13 + addq %r13,%r15 + + vpmadd52huq 256(%rsi),%ymm1,%ymm11 + vpmadd52huq 288(%rsi),%ymm1,%ymm12 + vpmadd52huq 320(%rsi),%ymm1,%ymm13 + vpmadd52huq 352(%rsi),%ymm1,%ymm14 + vpmadd52huq 384(%rsi),%ymm1,%ymm15 + vpmadd52huq 416(%rsi),%ymm1,%ymm16 + vpmadd52huq 448(%rsi),%ymm1,%ymm17 + vpmadd52huq 480(%rsi),%ymm1,%ymm18 + + vpmadd52huq 256(%rcx),%ymm2,%ymm11 + vpmadd52huq 288(%rcx),%ymm2,%ymm12 + vpmadd52huq 320(%rcx),%ymm2,%ymm13 + vpmadd52huq 352(%rcx),%ymm2,%ymm14 + vpmadd52huq 384(%rcx),%ymm2,%ymm15 + vpmadd52huq 416(%rcx),%ymm2,%ymm16 + vpmadd52huq 448(%rcx),%ymm2,%ymm17 + vpmadd52huq 480(%rcx),%ymm2,%ymm18 + leaq 8(%r11),%r11 + decl %ebx + jne .Lloop30 + + vpbroadcastq %r9,%ymm0 + vpblendd $3,%ymm0,%ymm3,%ymm3 + + + + vpsrlq $52,%ymm3,%ymm0 + vpsrlq $52,%ymm4,%ymm1 + vpsrlq $52,%ymm5,%ymm2 + vpsrlq $52,%ymm6,%ymm19 + vpsrlq $52,%ymm7,%ymm20 + vpsrlq $52,%ymm8,%ymm21 + vpsrlq $52,%ymm9,%ymm22 + vpsrlq $52,%ymm10,%ymm23 + + + valignq $3,%ymm22,%ymm23,%ymm23 + valignq $3,%ymm21,%ymm22,%ymm22 + valignq $3,%ymm20,%ymm21,%ymm21 + valignq $3,%ymm19,%ymm20,%ymm20 + valignq $3,%ymm2,%ymm19,%ymm19 + valignq $3,%ymm1,%ymm2,%ymm2 + valignq $3,%ymm0,%ymm1,%ymm1 + valignq $3,.Lzeros(%rip),%ymm0,%ymm0 + + + vpandq .Lmask52x4(%rip),%ymm3,%ymm3 + vpandq .Lmask52x4(%rip),%ymm4,%ymm4 + vpandq .Lmask52x4(%rip),%ymm5,%ymm5 + vpandq .Lmask52x4(%rip),%ymm6,%ymm6 + vpandq .Lmask52x4(%rip),%ymm7,%ymm7 + vpandq .Lmask52x4(%rip),%ymm8,%ymm8 + vpandq .Lmask52x4(%rip),%ymm9,%ymm9 + vpandq .Lmask52x4(%rip),%ymm10,%ymm10 + + + vpaddq %ymm0,%ymm3,%ymm3 + vpaddq %ymm1,%ymm4,%ymm4 + vpaddq %ymm2,%ymm5,%ymm5 + vpaddq %ymm19,%ymm6,%ymm6 + vpaddq %ymm20,%ymm7,%ymm7 + vpaddq %ymm21,%ymm8,%ymm8 + vpaddq %ymm22,%ymm9,%ymm9 + vpaddq %ymm23,%ymm10,%ymm10 + + + + vpcmpuq $6,.Lmask52x4(%rip),%ymm3,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm4,%k2 + kmovb %k1,%r14d + kmovb %k2,%r13d + shlb $4,%r13b + orb %r13b,%r14b + + vpcmpuq $6,.Lmask52x4(%rip),%ymm5,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm6,%k2 + kmovb %k1,%r13d + kmovb %k2,%r12d + shlb $4,%r12b + orb %r12b,%r13b + + vpcmpuq $6,.Lmask52x4(%rip),%ymm7,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm8,%k2 + kmovb %k1,%r12d + kmovb %k2,%r11d + shlb $4,%r11b + orb %r11b,%r12b + + vpcmpuq $6,.Lmask52x4(%rip),%ymm9,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm10,%k2 + kmovb %k1,%r11d + kmovb %k2,%r10d + shlb $4,%r10b + orb %r10b,%r11b + + addb %r14b,%r14b + adcb %r13b,%r13b + adcb %r12b,%r12b + adcb %r11b,%r11b + + + vpcmpuq $0,.Lmask52x4(%rip),%ymm3,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm4,%k2 + kmovb %k1,%r9d + kmovb %k2,%r8d + shlb $4,%r8b + orb %r8b,%r9b + + vpcmpuq $0,.Lmask52x4(%rip),%ymm5,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm6,%k2 + kmovb %k1,%r8d + kmovb %k2,%edx + shlb $4,%dl + orb %dl,%r8b + + vpcmpuq $0,.Lmask52x4(%rip),%ymm7,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm8,%k2 + kmovb %k1,%edx + kmovb %k2,%ecx + shlb $4,%cl + orb %cl,%dl + + vpcmpuq $0,.Lmask52x4(%rip),%ymm9,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm10,%k2 + kmovb %k1,%ecx + kmovb %k2,%ebx + shlb $4,%bl + orb %bl,%cl + + addb %r9b,%r14b + adcb %r8b,%r13b + adcb %dl,%r12b + adcb %cl,%r11b + + xorb %r9b,%r14b + xorb %r8b,%r13b + xorb %dl,%r12b + xorb %cl,%r11b + + kmovb %r14d,%k1 + shrb $4,%r14b + kmovb %r14d,%k2 + kmovb %r13d,%k3 + shrb $4,%r13b + kmovb %r13d,%k4 + kmovb %r12d,%k5 + shrb $4,%r12b + kmovb %r12d,%k6 + kmovb %r11d,%k7 + + vpsubq .Lmask52x4(%rip),%ymm3,%ymm3{%k1} + vpsubq .Lmask52x4(%rip),%ymm4,%ymm4{%k2} + vpsubq .Lmask52x4(%rip),%ymm5,%ymm5{%k3} + vpsubq .Lmask52x4(%rip),%ymm6,%ymm6{%k4} + vpsubq .Lmask52x4(%rip),%ymm7,%ymm7{%k5} + vpsubq .Lmask52x4(%rip),%ymm8,%ymm8{%k6} + vpsubq .Lmask52x4(%rip),%ymm9,%ymm9{%k7} + + vpandq .Lmask52x4(%rip),%ymm3,%ymm3 + vpandq .Lmask52x4(%rip),%ymm4,%ymm4 + vpandq .Lmask52x4(%rip),%ymm5,%ymm5 + vpandq .Lmask52x4(%rip),%ymm6,%ymm6 + vpandq .Lmask52x4(%rip),%ymm7,%ymm7 + vpandq .Lmask52x4(%rip),%ymm8,%ymm8 + vpandq .Lmask52x4(%rip),%ymm9,%ymm9 + + shrb $4,%r11b + kmovb %r11d,%k1 + + vpsubq .Lmask52x4(%rip),%ymm10,%ymm10{%k1} + + vpandq .Lmask52x4(%rip),%ymm10,%ymm10 + + vpbroadcastq %r15,%ymm0 + vpblendd $3,%ymm0,%ymm11,%ymm11 + + + + vpsrlq $52,%ymm11,%ymm0 + vpsrlq $52,%ymm12,%ymm1 + vpsrlq $52,%ymm13,%ymm2 + vpsrlq $52,%ymm14,%ymm19 + vpsrlq $52,%ymm15,%ymm20 + vpsrlq $52,%ymm16,%ymm21 + vpsrlq $52,%ymm17,%ymm22 + vpsrlq $52,%ymm18,%ymm23 + + + valignq $3,%ymm22,%ymm23,%ymm23 + valignq $3,%ymm21,%ymm22,%ymm22 + valignq $3,%ymm20,%ymm21,%ymm21 + valignq $3,%ymm19,%ymm20,%ymm20 + valignq $3,%ymm2,%ymm19,%ymm19 + valignq $3,%ymm1,%ymm2,%ymm2 + valignq $3,%ymm0,%ymm1,%ymm1 + valignq $3,.Lzeros(%rip),%ymm0,%ymm0 + + + vpandq .Lmask52x4(%rip),%ymm11,%ymm11 + vpandq .Lmask52x4(%rip),%ymm12,%ymm12 + vpandq .Lmask52x4(%rip),%ymm13,%ymm13 + vpandq .Lmask52x4(%rip),%ymm14,%ymm14 + vpandq .Lmask52x4(%rip),%ymm15,%ymm15 + vpandq .Lmask52x4(%rip),%ymm16,%ymm16 + vpandq .Lmask52x4(%rip),%ymm17,%ymm17 + vpandq .Lmask52x4(%rip),%ymm18,%ymm18 + + + vpaddq %ymm0,%ymm11,%ymm11 + vpaddq %ymm1,%ymm12,%ymm12 + vpaddq %ymm2,%ymm13,%ymm13 + vpaddq %ymm19,%ymm14,%ymm14 + vpaddq %ymm20,%ymm15,%ymm15 + vpaddq %ymm21,%ymm16,%ymm16 + vpaddq %ymm22,%ymm17,%ymm17 + vpaddq %ymm23,%ymm18,%ymm18 + + + + vpcmpuq $6,.Lmask52x4(%rip),%ymm11,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm12,%k2 + kmovb %k1,%r14d + kmovb %k2,%r13d + shlb $4,%r13b + orb %r13b,%r14b + + vpcmpuq $6,.Lmask52x4(%rip),%ymm13,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm14,%k2 + kmovb %k1,%r13d + kmovb %k2,%r12d + shlb $4,%r12b + orb %r12b,%r13b + + vpcmpuq $6,.Lmask52x4(%rip),%ymm15,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm16,%k2 + kmovb %k1,%r12d + kmovb %k2,%r11d + shlb $4,%r11b + orb %r11b,%r12b + + vpcmpuq $6,.Lmask52x4(%rip),%ymm17,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm18,%k2 + kmovb %k1,%r11d + kmovb %k2,%r10d + shlb $4,%r10b + orb %r10b,%r11b + + addb %r14b,%r14b + adcb %r13b,%r13b + adcb %r12b,%r12b + adcb %r11b,%r11b + + + vpcmpuq $0,.Lmask52x4(%rip),%ymm11,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm12,%k2 + kmovb %k1,%r9d + kmovb %k2,%r8d + shlb $4,%r8b + orb %r8b,%r9b + + vpcmpuq $0,.Lmask52x4(%rip),%ymm13,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm14,%k2 + kmovb %k1,%r8d + kmovb %k2,%edx + shlb $4,%dl + orb %dl,%r8b + + vpcmpuq $0,.Lmask52x4(%rip),%ymm15,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm16,%k2 + kmovb %k1,%edx + kmovb %k2,%ecx + shlb $4,%cl + orb %cl,%dl + + vpcmpuq $0,.Lmask52x4(%rip),%ymm17,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm18,%k2 + kmovb %k1,%ecx + kmovb %k2,%ebx + shlb $4,%bl + orb %bl,%cl + + addb %r9b,%r14b + adcb %r8b,%r13b + adcb %dl,%r12b + adcb %cl,%r11b + + xorb %r9b,%r14b + xorb %r8b,%r13b + xorb %dl,%r12b + xorb %cl,%r11b + + kmovb %r14d,%k1 + shrb $4,%r14b + kmovb %r14d,%k2 + kmovb %r13d,%k3 + shrb $4,%r13b + kmovb %r13d,%k4 + kmovb %r12d,%k5 + shrb $4,%r12b + kmovb %r12d,%k6 + kmovb %r11d,%k7 + + vpsubq .Lmask52x4(%rip),%ymm11,%ymm11{%k1} + vpsubq .Lmask52x4(%rip),%ymm12,%ymm12{%k2} + vpsubq .Lmask52x4(%rip),%ymm13,%ymm13{%k3} + vpsubq .Lmask52x4(%rip),%ymm14,%ymm14{%k4} + vpsubq .Lmask52x4(%rip),%ymm15,%ymm15{%k5} + vpsubq .Lmask52x4(%rip),%ymm16,%ymm16{%k6} + vpsubq .Lmask52x4(%rip),%ymm17,%ymm17{%k7} + + vpandq .Lmask52x4(%rip),%ymm11,%ymm11 + vpandq .Lmask52x4(%rip),%ymm12,%ymm12 + vpandq .Lmask52x4(%rip),%ymm13,%ymm13 + vpandq .Lmask52x4(%rip),%ymm14,%ymm14 + vpandq .Lmask52x4(%rip),%ymm15,%ymm15 + vpandq .Lmask52x4(%rip),%ymm16,%ymm16 + vpandq .Lmask52x4(%rip),%ymm17,%ymm17 + + shrb $4,%r11b + kmovb %r11d,%k1 + + vpsubq .Lmask52x4(%rip),%ymm18,%ymm18{%k1} + + vpandq .Lmask52x4(%rip),%ymm18,%ymm18 + + vmovdqu64 %ymm3,0(%rdi) + vmovdqu64 %ymm4,32(%rdi) + vmovdqu64 %ymm5,64(%rdi) + vmovdqu64 %ymm6,96(%rdi) + vmovdqu64 %ymm7,128(%rdi) + vmovdqu64 %ymm8,160(%rdi) + vmovdqu64 %ymm9,192(%rdi) + vmovdqu64 %ymm10,224(%rdi) + + vmovdqu64 %ymm11,256(%rdi) + vmovdqu64 %ymm12,288(%rdi) + vmovdqu64 %ymm13,320(%rdi) + vmovdqu64 %ymm14,352(%rdi) + vmovdqu64 %ymm15,384(%rdi) + vmovdqu64 %ymm16,416(%rdi) + vmovdqu64 %ymm17,448(%rdi) + vmovdqu64 %ymm18,480(%rdi) + + vzeroupper + leaq (%rsp),%rax +.cfi_def_cfa_register %rax + movq 0(%rax),%r15 +.cfi_restore %r15 + movq 8(%rax),%r14 +.cfi_restore %r14 + movq 16(%rax),%r13 +.cfi_restore %r13 + movq 24(%rax),%r12 +.cfi_restore %r12 + movq 32(%rax),%rbp +.cfi_restore %rbp + movq 40(%rax),%rbx +.cfi_restore %rbx + leaq 48(%rax),%rsp +.cfi_def_cfa %rsp,8 +.Lossl_rsaz_amm52x30_x2_ifma256_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_rsaz_amm52x30_x2_ifma256, .-ossl_rsaz_amm52x30_x2_ifma256 +.text + +.align 32 +.globl ossl_extract_multiplier_2x30_win5 +.type ossl_extract_multiplier_2x30_win5,@function +ossl_extract_multiplier_2x30_win5: +.cfi_startproc +.byte 243,15,30,250 + vmovdqa64 .Lones(%rip),%ymm30 + vpbroadcastq %rdx,%ymm28 + vpbroadcastq %rcx,%ymm29 + leaq 16384(%rsi),%rax + + + vpxor %xmm0,%xmm0,%xmm0 + vmovdqa64 %ymm0,%ymm27 + vmovdqa64 %ymm0,%ymm1 + vmovdqa64 %ymm0,%ymm2 + vmovdqa64 %ymm0,%ymm3 + vmovdqa64 %ymm0,%ymm4 + vmovdqa64 %ymm0,%ymm5 + vmovdqa64 %ymm0,%ymm16 + vmovdqa64 %ymm0,%ymm17 + vmovdqa64 %ymm0,%ymm18 + vmovdqa64 %ymm0,%ymm19 + vmovdqa64 %ymm0,%ymm20 + vmovdqa64 %ymm0,%ymm21 + vmovdqa64 %ymm0,%ymm22 + vmovdqa64 %ymm0,%ymm23 + vmovdqa64 %ymm0,%ymm24 + vmovdqa64 %ymm0,%ymm25 + +.align 32 +.Lloop: + vpcmpq $0,%ymm27,%ymm28,%k1 + vpcmpq $0,%ymm27,%ymm29,%k2 + vmovdqu64 0(%rsi),%ymm26 + vpblendmq %ymm26,%ymm0,%ymm0{%k1} + vmovdqu64 32(%rsi),%ymm26 + vpblendmq %ymm26,%ymm1,%ymm1{%k1} + vmovdqu64 64(%rsi),%ymm26 + vpblendmq %ymm26,%ymm2,%ymm2{%k1} + vmovdqu64 96(%rsi),%ymm26 + vpblendmq %ymm26,%ymm3,%ymm3{%k1} + vmovdqu64 128(%rsi),%ymm26 + vpblendmq %ymm26,%ymm4,%ymm4{%k1} + vmovdqu64 160(%rsi),%ymm26 + vpblendmq %ymm26,%ymm5,%ymm5{%k1} + vmovdqu64 192(%rsi),%ymm26 + vpblendmq %ymm26,%ymm16,%ymm16{%k1} + vmovdqu64 224(%rsi),%ymm26 + vpblendmq %ymm26,%ymm17,%ymm17{%k1} + vmovdqu64 256(%rsi),%ymm26 + vpblendmq %ymm26,%ymm18,%ymm18{%k2} + vmovdqu64 288(%rsi),%ymm26 + vpblendmq %ymm26,%ymm19,%ymm19{%k2} + vmovdqu64 320(%rsi),%ymm26 + vpblendmq %ymm26,%ymm20,%ymm20{%k2} + vmovdqu64 352(%rsi),%ymm26 + vpblendmq %ymm26,%ymm21,%ymm21{%k2} + vmovdqu64 384(%rsi),%ymm26 + vpblendmq %ymm26,%ymm22,%ymm22{%k2} + vmovdqu64 416(%rsi),%ymm26 + vpblendmq %ymm26,%ymm23,%ymm23{%k2} + vmovdqu64 448(%rsi),%ymm26 + vpblendmq %ymm26,%ymm24,%ymm24{%k2} + vmovdqu64 480(%rsi),%ymm26 + vpblendmq %ymm26,%ymm25,%ymm25{%k2} + vpaddq %ymm30,%ymm27,%ymm27 + addq $512,%rsi + cmpq %rsi,%rax + jne .Lloop + vmovdqu64 %ymm0,0(%rdi) + vmovdqu64 %ymm1,32(%rdi) + vmovdqu64 %ymm2,64(%rdi) + vmovdqu64 %ymm3,96(%rdi) + vmovdqu64 %ymm4,128(%rdi) + vmovdqu64 %ymm5,160(%rdi) + vmovdqu64 %ymm16,192(%rdi) + vmovdqu64 %ymm17,224(%rdi) + vmovdqu64 %ymm18,256(%rdi) + vmovdqu64 %ymm19,288(%rdi) + vmovdqu64 %ymm20,320(%rdi) + vmovdqu64 %ymm21,352(%rdi) + vmovdqu64 %ymm22,384(%rdi) + vmovdqu64 %ymm23,416(%rdi) + vmovdqu64 %ymm24,448(%rdi) + vmovdqu64 %ymm25,480(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_extract_multiplier_2x30_win5, .-ossl_extract_multiplier_2x30_win5 +.section .rodata +.align 32 +.Lones: +.quad 1,1,1,1 +.Lzeros: +.quad 0,0,0,0 diff --git a/contrib/openssl-cmake/asm/crypto/bn/rsaz-3k-avxifma.s b/contrib/openssl-cmake/asm/crypto/bn/rsaz-3k-avxifma.s new file mode 100644 index 000000000000..5ee447b0053a --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/bn/rsaz-3k-avxifma.s @@ -0,0 +1,1747 @@ +.text + +.globl ossl_rsaz_amm52x30_x1_avxifma256 +.type ossl_rsaz_amm52x30_x1_avxifma256,@function +.align 32 +ossl_rsaz_amm52x30_x1_avxifma256: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + vpxor %ymm0,%ymm0,%ymm0 + vmovapd %ymm0,%ymm3 + vmovapd %ymm0,%ymm4 + vmovapd %ymm0,%ymm5 + vmovapd %ymm0,%ymm6 + vmovapd %ymm0,%ymm7 + vmovapd %ymm0,%ymm8 + vmovapd %ymm0,%ymm9 + vmovapd %ymm0,%ymm10 + + xorl %r9d,%r9d + + movq %rdx,%r11 + movq $0xfffffffffffff,%rax + + + movl $7,%ebx + +.align 32 +.Lloop7: + movq 0(%r11),%r13 + + vpbroadcastq 0(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -264(%rsp),%rsp + +{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4 +{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5 +{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6 +{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7 +{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8 +{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10 + +{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4 +{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5 +{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6 +{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7 +{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8 +{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + movq $0,256(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4 +{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5 +{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6 +{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7 +{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8 +{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10 + +{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4 +{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5 +{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6 +{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7 +{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8 +{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10 + + leaq 264(%rsp),%rsp + movq 8(%r11),%r13 + + vpbroadcastq 8(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -264(%rsp),%rsp + +{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4 +{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5 +{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6 +{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7 +{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8 +{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10 + +{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4 +{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5 +{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6 +{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7 +{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8 +{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + movq $0,256(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4 +{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5 +{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6 +{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7 +{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8 +{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10 + +{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4 +{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5 +{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6 +{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7 +{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8 +{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10 + + leaq 264(%rsp),%rsp + movq 16(%r11),%r13 + + vpbroadcastq 16(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -264(%rsp),%rsp + +{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4 +{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5 +{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6 +{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7 +{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8 +{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10 + +{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4 +{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5 +{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6 +{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7 +{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8 +{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + movq $0,256(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4 +{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5 +{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6 +{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7 +{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8 +{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10 + +{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4 +{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5 +{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6 +{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7 +{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8 +{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10 + + leaq 264(%rsp),%rsp + movq 24(%r11),%r13 + + vpbroadcastq 24(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -264(%rsp),%rsp + +{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4 +{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5 +{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6 +{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7 +{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8 +{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10 + +{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4 +{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5 +{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6 +{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7 +{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8 +{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + movq $0,256(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4 +{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5 +{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6 +{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7 +{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8 +{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10 + +{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4 +{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5 +{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6 +{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7 +{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8 +{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10 + + leaq 264(%rsp),%rsp + leaq 32(%r11),%r11 + decl %ebx + jne .Lloop7 + movq 0(%r11),%r13 + + vpbroadcastq 0(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -264(%rsp),%rsp + +{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4 +{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5 +{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6 +{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7 +{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8 +{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10 + +{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4 +{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5 +{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6 +{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7 +{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8 +{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + movq $0,256(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4 +{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5 +{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6 +{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7 +{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8 +{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10 + +{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4 +{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5 +{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6 +{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7 +{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8 +{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10 + + leaq 264(%rsp),%rsp + movq 8(%r11),%r13 + + vpbroadcastq 8(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -264(%rsp),%rsp + +{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4 +{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5 +{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6 +{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7 +{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8 +{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10 + +{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4 +{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5 +{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6 +{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7 +{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8 +{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + movq $0,256(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4 +{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5 +{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6 +{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7 +{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8 +{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10 + +{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4 +{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5 +{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6 +{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7 +{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8 +{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10 + + leaq 264(%rsp),%rsp + + vmovq %r9,%xmm0 + vpbroadcastq %xmm0,%ymm0 + vpblendd $3,%ymm0,%ymm3,%ymm3 + + + + vpsrlq $52,%ymm3,%ymm0 + vpsrlq $52,%ymm4,%ymm1 + vpsrlq $52,%ymm5,%ymm2 + vpsrlq $52,%ymm6,%ymm11 + vpsrlq $52,%ymm7,%ymm12 + vpsrlq $52,%ymm8,%ymm13 + vpsrlq $52,%ymm9,%ymm14 + vpsrlq $52,%ymm10,%ymm15 + + leaq -32(%rsp),%rsp + vmovupd %ymm3,(%rsp) + + + vpermq $144,%ymm15,%ymm15 + vpermq $3,%ymm14,%ymm3 + vblendpd $1,%ymm3,%ymm15,%ymm15 + + vpermq $144,%ymm14,%ymm14 + vpermq $3,%ymm13,%ymm3 + vblendpd $1,%ymm3,%ymm14,%ymm14 + + vpermq $144,%ymm13,%ymm13 + vpermq $3,%ymm12,%ymm3 + vblendpd $1,%ymm3,%ymm13,%ymm13 + + vpermq $144,%ymm12,%ymm12 + vpermq $3,%ymm11,%ymm3 + vblendpd $1,%ymm3,%ymm12,%ymm12 + + vpermq $144,%ymm11,%ymm11 + vpermq $3,%ymm2,%ymm3 + vblendpd $1,%ymm3,%ymm11,%ymm11 + + vpermq $144,%ymm2,%ymm2 + vpermq $3,%ymm1,%ymm3 + vblendpd $1,%ymm3,%ymm2,%ymm2 + + vpermq $144,%ymm1,%ymm1 + vpermq $3,%ymm0,%ymm3 + vblendpd $1,%ymm3,%ymm1,%ymm1 + + vpermq $144,%ymm0,%ymm0 + vpand .Lhigh64x3(%rip),%ymm0,%ymm0 + + vmovupd (%rsp),%ymm3 + leaq 32(%rsp),%rsp + + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + + + vpaddq %ymm0,%ymm3,%ymm3 + vpaddq %ymm1,%ymm4,%ymm4 + vpaddq %ymm2,%ymm5,%ymm5 + vpaddq %ymm11,%ymm6,%ymm6 + vpaddq %ymm12,%ymm7,%ymm7 + vpaddq %ymm13,%ymm8,%ymm8 + vpaddq %ymm14,%ymm9,%ymm9 + vpaddq %ymm15,%ymm10,%ymm10 + + + + vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm0 + vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm1 + vmovmskpd %ymm0,%r14d + vmovmskpd %ymm1,%r13d + shlb $4,%r13b + orb %r13b,%r14b + + vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm2 + vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm11 + vmovmskpd %ymm2,%r13d + vmovmskpd %ymm11,%r12d + shlb $4,%r12b + orb %r12b,%r13b + + vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm12 + vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13 + vmovmskpd %ymm12,%r12d + vmovmskpd %ymm13,%r11d + shlb $4,%r11b + orb %r11b,%r12b + + vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm14 + vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm15 + vmovmskpd %ymm14,%r11d + vmovmskpd %ymm15,%r10d + shlb $4,%r10b + orb %r10b,%r11b + + addb %r14b,%r14b + adcb %r13b,%r13b + adcb %r12b,%r12b + adcb %r11b,%r11b + + + vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm0 + vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm1 + vmovmskpd %ymm0,%r9d + vmovmskpd %ymm1,%r8d + shlb $4,%r8b + orb %r8b,%r9b + + vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm2 + vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm11 + vmovmskpd %ymm2,%r8d + vmovmskpd %ymm11,%edx + shlb $4,%dl + orb %dl,%r8b + + vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm12 + vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13 + vmovmskpd %ymm12,%edx + vmovmskpd %ymm13,%ecx + shlb $4,%cl + orb %cl,%dl + + vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm14 + vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm15 + vmovmskpd %ymm14,%ecx + vmovmskpd %ymm15,%ebx + shlb $4,%bl + orb %bl,%cl + + addb %r9b,%r14b + adcb %r8b,%r13b + adcb %dl,%r12b + adcb %cl,%r11b + + xorb %r9b,%r14b + xorb %r8b,%r13b + xorb %dl,%r12b + xorb %cl,%r11b + + leaq .Lkmasklut(%rip),%rdx + + movb %r14b,%r10b + andq $0xf,%r14 + vpsubq .Lmask52x4(%rip),%ymm3,%ymm0 + shlq $5,%r14 + vmovapd (%rdx,%r14,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm3,%ymm3 + + shrb $4,%r10b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm4,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm4,%ymm4 + + movb %r13b,%r10b + andq $0xf,%r13 + vpsubq .Lmask52x4(%rip),%ymm5,%ymm0 + shlq $5,%r13 + vmovapd (%rdx,%r13,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm5,%ymm5 + + shrb $4,%r10b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm6,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm6,%ymm6 + + movb %r12b,%r10b + andq $0xf,%r12 + vpsubq .Lmask52x4(%rip),%ymm7,%ymm0 + shlq $5,%r12 + vmovapd (%rdx,%r12,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm7,%ymm7 + + shrb $4,%r10b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm8,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm8,%ymm8 + + movb %r11b,%r10b + andq $0xf,%r11 + vpsubq .Lmask52x4(%rip),%ymm9,%ymm0 + shlq $5,%r11 + vmovapd (%rdx,%r11,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm9,%ymm9 + + shrb $4,%r10b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm10,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm10,%ymm10 + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + + vmovdqu %ymm3,0(%rdi) + vmovdqu %ymm4,32(%rdi) + vmovdqu %ymm5,64(%rdi) + vmovdqu %ymm6,96(%rdi) + vmovdqu %ymm7,128(%rdi) + vmovdqu %ymm8,160(%rdi) + vmovdqu %ymm9,192(%rdi) + vmovdqu %ymm10,224(%rdi) + + vzeroupper + leaq (%rsp),%rax +.cfi_def_cfa_register %rax + movq 0(%rax),%r15 +.cfi_restore %r15 + movq 8(%rax),%r14 +.cfi_restore %r14 + movq 16(%rax),%r13 +.cfi_restore %r13 + movq 24(%rax),%r12 +.cfi_restore %r12 + movq 32(%rax),%rbp +.cfi_restore %rbp + movq 40(%rax),%rbx +.cfi_restore %rbx + leaq 48(%rax),%rsp +.cfi_def_cfa %rsp,8 +.Lossl_rsaz_amm52x30_x1_avxifma256_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_rsaz_amm52x30_x1_avxifma256, .-ossl_rsaz_amm52x30_x1_avxifma256 +.section .rodata +.align 32 +.Lmask52x4: +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.Lhigh64x3: +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.Lkmasklut: + +.quad 0x0 +.quad 0x0 +.quad 0x0 +.quad 0x0 + +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0x0 +.quad 0x0 + +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0x0 + +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0x0 + +.quad 0x0 +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0x0 + +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0x0 + +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0x0 + +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0x0 + +.quad 0x0 +.quad 0x0 +.quad 0x0 +.quad 0xffffffffffffffff + +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0x0 +.quad 0xffffffffffffffff + +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0xffffffffffffffff + +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0xffffffffffffffff + +.quad 0x0 +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff + +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff + +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff + +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.text + +.globl ossl_rsaz_amm52x30_x2_avxifma256 +.type ossl_rsaz_amm52x30_x2_avxifma256,@function +.align 32 +ossl_rsaz_amm52x30_x2_avxifma256: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + vpxor %ymm0,%ymm0,%ymm0 + vmovapd %ymm0,%ymm3 + vmovapd %ymm0,%ymm4 + vmovapd %ymm0,%ymm5 + vmovapd %ymm0,%ymm6 + vmovapd %ymm0,%ymm7 + vmovapd %ymm0,%ymm8 + vmovapd %ymm0,%ymm9 + vmovapd %ymm0,%ymm10 + + xorl %r9d,%r9d + + movq %rdx,%r11 + movq $0xfffffffffffff,%rax + + movl $30,%ebx + +.align 32 +.Lloop30: + movq 0(%r11),%r13 + + vpbroadcastq 0(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq (%r8),%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -264(%rsp),%rsp + +{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4 +{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5 +{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6 +{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7 +{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8 +{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10 + +{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4 +{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5 +{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6 +{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7 +{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8 +{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + movq $0,256(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4 +{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5 +{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6 +{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7 +{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8 +{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10 + +{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4 +{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5 +{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6 +{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7 +{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8 +{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10 + + leaq 264(%rsp),%rsp + leaq 8(%r11),%r11 + decl %ebx + jne .Lloop30 + + pushq %r11 + pushq %rsi + pushq %rcx + pushq %r8 + + vmovq %r9,%xmm0 + vpbroadcastq %xmm0,%ymm0 + vpblendd $3,%ymm0,%ymm3,%ymm3 + + + + vpsrlq $52,%ymm3,%ymm0 + vpsrlq $52,%ymm4,%ymm1 + vpsrlq $52,%ymm5,%ymm2 + vpsrlq $52,%ymm6,%ymm11 + vpsrlq $52,%ymm7,%ymm12 + vpsrlq $52,%ymm8,%ymm13 + vpsrlq $52,%ymm9,%ymm14 + vpsrlq $52,%ymm10,%ymm15 + + leaq -32(%rsp),%rsp + vmovupd %ymm3,(%rsp) + + + vpermq $144,%ymm15,%ymm15 + vpermq $3,%ymm14,%ymm3 + vblendpd $1,%ymm3,%ymm15,%ymm15 + + vpermq $144,%ymm14,%ymm14 + vpermq $3,%ymm13,%ymm3 + vblendpd $1,%ymm3,%ymm14,%ymm14 + + vpermq $144,%ymm13,%ymm13 + vpermq $3,%ymm12,%ymm3 + vblendpd $1,%ymm3,%ymm13,%ymm13 + + vpermq $144,%ymm12,%ymm12 + vpermq $3,%ymm11,%ymm3 + vblendpd $1,%ymm3,%ymm12,%ymm12 + + vpermq $144,%ymm11,%ymm11 + vpermq $3,%ymm2,%ymm3 + vblendpd $1,%ymm3,%ymm11,%ymm11 + + vpermq $144,%ymm2,%ymm2 + vpermq $3,%ymm1,%ymm3 + vblendpd $1,%ymm3,%ymm2,%ymm2 + + vpermq $144,%ymm1,%ymm1 + vpermq $3,%ymm0,%ymm3 + vblendpd $1,%ymm3,%ymm1,%ymm1 + + vpermq $144,%ymm0,%ymm0 + vpand .Lhigh64x3(%rip),%ymm0,%ymm0 + + vmovupd (%rsp),%ymm3 + leaq 32(%rsp),%rsp + + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + + + vpaddq %ymm0,%ymm3,%ymm3 + vpaddq %ymm1,%ymm4,%ymm4 + vpaddq %ymm2,%ymm5,%ymm5 + vpaddq %ymm11,%ymm6,%ymm6 + vpaddq %ymm12,%ymm7,%ymm7 + vpaddq %ymm13,%ymm8,%ymm8 + vpaddq %ymm14,%ymm9,%ymm9 + vpaddq %ymm15,%ymm10,%ymm10 + + + + vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm0 + vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm1 + vmovmskpd %ymm0,%r14d + vmovmskpd %ymm1,%r13d + shlb $4,%r13b + orb %r13b,%r14b + + vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm2 + vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm11 + vmovmskpd %ymm2,%r13d + vmovmskpd %ymm11,%r12d + shlb $4,%r12b + orb %r12b,%r13b + + vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm12 + vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13 + vmovmskpd %ymm12,%r12d + vmovmskpd %ymm13,%r11d + shlb $4,%r11b + orb %r11b,%r12b + + vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm14 + vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm15 + vmovmskpd %ymm14,%r11d + vmovmskpd %ymm15,%r10d + shlb $4,%r10b + orb %r10b,%r11b + + addb %r14b,%r14b + adcb %r13b,%r13b + adcb %r12b,%r12b + adcb %r11b,%r11b + + + vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm0 + vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm1 + vmovmskpd %ymm0,%r9d + vmovmskpd %ymm1,%r8d + shlb $4,%r8b + orb %r8b,%r9b + + vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm2 + vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm11 + vmovmskpd %ymm2,%r8d + vmovmskpd %ymm11,%edx + shlb $4,%dl + orb %dl,%r8b + + vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm12 + vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13 + vmovmskpd %ymm12,%edx + vmovmskpd %ymm13,%ecx + shlb $4,%cl + orb %cl,%dl + + vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm14 + vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm15 + vmovmskpd %ymm14,%ecx + vmovmskpd %ymm15,%ebx + shlb $4,%bl + orb %bl,%cl + + addb %r9b,%r14b + adcb %r8b,%r13b + adcb %dl,%r12b + adcb %cl,%r11b + + xorb %r9b,%r14b + xorb %r8b,%r13b + xorb %dl,%r12b + xorb %cl,%r11b + + leaq .Lkmasklut(%rip),%rdx + + movb %r14b,%r10b + andq $0xf,%r14 + vpsubq .Lmask52x4(%rip),%ymm3,%ymm0 + shlq $5,%r14 + vmovapd (%rdx,%r14,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm3,%ymm3 + + shrb $4,%r10b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm4,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm4,%ymm4 + + movb %r13b,%r10b + andq $0xf,%r13 + vpsubq .Lmask52x4(%rip),%ymm5,%ymm0 + shlq $5,%r13 + vmovapd (%rdx,%r13,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm5,%ymm5 + + shrb $4,%r10b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm6,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm6,%ymm6 + + movb %r12b,%r10b + andq $0xf,%r12 + vpsubq .Lmask52x4(%rip),%ymm7,%ymm0 + shlq $5,%r12 + vmovapd (%rdx,%r12,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm7,%ymm7 + + shrb $4,%r10b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm8,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm8,%ymm8 + + movb %r11b,%r10b + andq $0xf,%r11 + vpsubq .Lmask52x4(%rip),%ymm9,%ymm0 + shlq $5,%r11 + vmovapd (%rdx,%r11,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm9,%ymm9 + + shrb $4,%r10b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm10,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm10,%ymm10 + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + popq %r8 + popq %rcx + popq %rsi + popq %r11 + + vmovdqu %ymm3,0(%rdi) + vmovdqu %ymm4,32(%rdi) + vmovdqu %ymm5,64(%rdi) + vmovdqu %ymm6,96(%rdi) + vmovdqu %ymm7,128(%rdi) + vmovdqu %ymm8,160(%rdi) + vmovdqu %ymm9,192(%rdi) + vmovdqu %ymm10,224(%rdi) + + xorl %r15d,%r15d + + leaq 16(%r11),%r11 + movq $0xfffffffffffff,%rax + + movl $30,%ebx + + vpxor %ymm0,%ymm0,%ymm0 + vmovapd %ymm0,%ymm3 + vmovapd %ymm0,%ymm4 + vmovapd %ymm0,%ymm5 + vmovapd %ymm0,%ymm6 + vmovapd %ymm0,%ymm7 + vmovapd %ymm0,%ymm8 + vmovapd %ymm0,%ymm9 + vmovapd %ymm0,%ymm10 +.align 32 +.Lloop40: + movq 0(%r11),%r13 + + vpbroadcastq 0(%r11),%ymm1 + movq 256(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq 8(%r8),%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 256(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -264(%rsp),%rsp + +{vex} vpmadd52luq 256(%rsi),%ymm1,%ymm3 +{vex} vpmadd52luq 288(%rsi),%ymm1,%ymm4 +{vex} vpmadd52luq 320(%rsi),%ymm1,%ymm5 +{vex} vpmadd52luq 352(%rsi),%ymm1,%ymm6 +{vex} vpmadd52luq 384(%rsi),%ymm1,%ymm7 +{vex} vpmadd52luq 416(%rsi),%ymm1,%ymm8 +{vex} vpmadd52luq 448(%rsi),%ymm1,%ymm9 +{vex} vpmadd52luq 480(%rsi),%ymm1,%ymm10 + +{vex} vpmadd52luq 256(%rcx),%ymm2,%ymm3 +{vex} vpmadd52luq 288(%rcx),%ymm2,%ymm4 +{vex} vpmadd52luq 320(%rcx),%ymm2,%ymm5 +{vex} vpmadd52luq 352(%rcx),%ymm2,%ymm6 +{vex} vpmadd52luq 384(%rcx),%ymm2,%ymm7 +{vex} vpmadd52luq 416(%rcx),%ymm2,%ymm8 +{vex} vpmadd52luq 448(%rcx),%ymm2,%ymm9 +{vex} vpmadd52luq 480(%rcx),%ymm2,%ymm10 + + + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + movq $0,256(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 256(%rsi),%ymm1,%ymm3 +{vex} vpmadd52huq 288(%rsi),%ymm1,%ymm4 +{vex} vpmadd52huq 320(%rsi),%ymm1,%ymm5 +{vex} vpmadd52huq 352(%rsi),%ymm1,%ymm6 +{vex} vpmadd52huq 384(%rsi),%ymm1,%ymm7 +{vex} vpmadd52huq 416(%rsi),%ymm1,%ymm8 +{vex} vpmadd52huq 448(%rsi),%ymm1,%ymm9 +{vex} vpmadd52huq 480(%rsi),%ymm1,%ymm10 + +{vex} vpmadd52huq 256(%rcx),%ymm2,%ymm3 +{vex} vpmadd52huq 288(%rcx),%ymm2,%ymm4 +{vex} vpmadd52huq 320(%rcx),%ymm2,%ymm5 +{vex} vpmadd52huq 352(%rcx),%ymm2,%ymm6 +{vex} vpmadd52huq 384(%rcx),%ymm2,%ymm7 +{vex} vpmadd52huq 416(%rcx),%ymm2,%ymm8 +{vex} vpmadd52huq 448(%rcx),%ymm2,%ymm9 +{vex} vpmadd52huq 480(%rcx),%ymm2,%ymm10 + + leaq 264(%rsp),%rsp + leaq 8(%r11),%r11 + decl %ebx + jne .Lloop40 + + vmovq %r9,%xmm0 + vpbroadcastq %xmm0,%ymm0 + vpblendd $3,%ymm0,%ymm3,%ymm3 + + + + vpsrlq $52,%ymm3,%ymm0 + vpsrlq $52,%ymm4,%ymm1 + vpsrlq $52,%ymm5,%ymm2 + vpsrlq $52,%ymm6,%ymm11 + vpsrlq $52,%ymm7,%ymm12 + vpsrlq $52,%ymm8,%ymm13 + vpsrlq $52,%ymm9,%ymm14 + vpsrlq $52,%ymm10,%ymm15 + + leaq -32(%rsp),%rsp + vmovupd %ymm3,(%rsp) + + + vpermq $144,%ymm15,%ymm15 + vpermq $3,%ymm14,%ymm3 + vblendpd $1,%ymm3,%ymm15,%ymm15 + + vpermq $144,%ymm14,%ymm14 + vpermq $3,%ymm13,%ymm3 + vblendpd $1,%ymm3,%ymm14,%ymm14 + + vpermq $144,%ymm13,%ymm13 + vpermq $3,%ymm12,%ymm3 + vblendpd $1,%ymm3,%ymm13,%ymm13 + + vpermq $144,%ymm12,%ymm12 + vpermq $3,%ymm11,%ymm3 + vblendpd $1,%ymm3,%ymm12,%ymm12 + + vpermq $144,%ymm11,%ymm11 + vpermq $3,%ymm2,%ymm3 + vblendpd $1,%ymm3,%ymm11,%ymm11 + + vpermq $144,%ymm2,%ymm2 + vpermq $3,%ymm1,%ymm3 + vblendpd $1,%ymm3,%ymm2,%ymm2 + + vpermq $144,%ymm1,%ymm1 + vpermq $3,%ymm0,%ymm3 + vblendpd $1,%ymm3,%ymm1,%ymm1 + + vpermq $144,%ymm0,%ymm0 + vpand .Lhigh64x3(%rip),%ymm0,%ymm0 + + vmovupd (%rsp),%ymm3 + leaq 32(%rsp),%rsp + + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + + + vpaddq %ymm0,%ymm3,%ymm3 + vpaddq %ymm1,%ymm4,%ymm4 + vpaddq %ymm2,%ymm5,%ymm5 + vpaddq %ymm11,%ymm6,%ymm6 + vpaddq %ymm12,%ymm7,%ymm7 + vpaddq %ymm13,%ymm8,%ymm8 + vpaddq %ymm14,%ymm9,%ymm9 + vpaddq %ymm15,%ymm10,%ymm10 + + + + vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm0 + vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm1 + vmovmskpd %ymm0,%r14d + vmovmskpd %ymm1,%r13d + shlb $4,%r13b + orb %r13b,%r14b + + vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm2 + vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm11 + vmovmskpd %ymm2,%r13d + vmovmskpd %ymm11,%r12d + shlb $4,%r12b + orb %r12b,%r13b + + vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm12 + vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13 + vmovmskpd %ymm12,%r12d + vmovmskpd %ymm13,%r11d + shlb $4,%r11b + orb %r11b,%r12b + + vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm14 + vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm15 + vmovmskpd %ymm14,%r11d + vmovmskpd %ymm15,%r10d + shlb $4,%r10b + orb %r10b,%r11b + + addb %r14b,%r14b + adcb %r13b,%r13b + adcb %r12b,%r12b + adcb %r11b,%r11b + + + vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm0 + vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm1 + vmovmskpd %ymm0,%r9d + vmovmskpd %ymm1,%r8d + shlb $4,%r8b + orb %r8b,%r9b + + vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm2 + vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm11 + vmovmskpd %ymm2,%r8d + vmovmskpd %ymm11,%edx + shlb $4,%dl + orb %dl,%r8b + + vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm12 + vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13 + vmovmskpd %ymm12,%edx + vmovmskpd %ymm13,%ecx + shlb $4,%cl + orb %cl,%dl + + vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm14 + vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm15 + vmovmskpd %ymm14,%ecx + vmovmskpd %ymm15,%ebx + shlb $4,%bl + orb %bl,%cl + + addb %r9b,%r14b + adcb %r8b,%r13b + adcb %dl,%r12b + adcb %cl,%r11b + + xorb %r9b,%r14b + xorb %r8b,%r13b + xorb %dl,%r12b + xorb %cl,%r11b + + leaq .Lkmasklut(%rip),%rdx + + movb %r14b,%r10b + andq $0xf,%r14 + vpsubq .Lmask52x4(%rip),%ymm3,%ymm0 + shlq $5,%r14 + vmovapd (%rdx,%r14,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm3,%ymm3 + + shrb $4,%r10b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm4,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm4,%ymm4 + + movb %r13b,%r10b + andq $0xf,%r13 + vpsubq .Lmask52x4(%rip),%ymm5,%ymm0 + shlq $5,%r13 + vmovapd (%rdx,%r13,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm5,%ymm5 + + shrb $4,%r10b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm6,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm6,%ymm6 + + movb %r12b,%r10b + andq $0xf,%r12 + vpsubq .Lmask52x4(%rip),%ymm7,%ymm0 + shlq $5,%r12 + vmovapd (%rdx,%r12,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm7,%ymm7 + + shrb $4,%r10b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm8,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm8,%ymm8 + + movb %r11b,%r10b + andq $0xf,%r11 + vpsubq .Lmask52x4(%rip),%ymm9,%ymm0 + shlq $5,%r11 + vmovapd (%rdx,%r11,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm9,%ymm9 + + shrb $4,%r10b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm10,%ymm0 + shlq $5,%r10 + vmovapd (%rdx,%r10,1),%ymm2 + vblendvpd %ymm2,%ymm0,%ymm10,%ymm10 + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + + vmovdqu %ymm3,256(%rdi) + vmovdqu %ymm4,288(%rdi) + vmovdqu %ymm5,320(%rdi) + vmovdqu %ymm6,352(%rdi) + vmovdqu %ymm7,384(%rdi) + vmovdqu %ymm8,416(%rdi) + vmovdqu %ymm9,448(%rdi) + vmovdqu %ymm10,480(%rdi) + + vzeroupper + leaq (%rsp),%rax +.cfi_def_cfa_register %rax + movq 0(%rax),%r15 +.cfi_restore %r15 + movq 8(%rax),%r14 +.cfi_restore %r14 + movq 16(%rax),%r13 +.cfi_restore %r13 + movq 24(%rax),%r12 +.cfi_restore %r12 + movq 32(%rax),%rbp +.cfi_restore %rbp + movq 40(%rax),%rbx +.cfi_restore %rbx + leaq 48(%rax),%rsp +.cfi_def_cfa %rsp,8 +.Lossl_rsaz_amm52x30_x2_avxifma256_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_rsaz_amm52x30_x2_avxifma256, .-ossl_rsaz_amm52x30_x2_avxifma256 +.text + +.align 32 +.globl ossl_extract_multiplier_2x30_win5_avx +.type ossl_extract_multiplier_2x30_win5_avx,@function +ossl_extract_multiplier_2x30_win5_avx: +.cfi_startproc +.byte 243,15,30,250 + vmovapd .Lones(%rip),%ymm12 + vmovq %rdx,%xmm8 + vpbroadcastq %xmm8,%ymm10 + vmovq %rcx,%xmm8 + vpbroadcastq %xmm8,%ymm11 + leaq 16384(%rsi),%rax + + + vpxor %xmm0,%xmm0,%xmm0 + vmovapd %ymm0,%ymm9 + vmovapd %ymm0,%ymm1 + vmovapd %ymm0,%ymm2 + vmovapd %ymm0,%ymm3 + vmovapd %ymm0,%ymm4 + vmovapd %ymm0,%ymm5 + vmovapd %ymm0,%ymm6 + vmovapd %ymm0,%ymm7 + +.align 32 +.Lloop: + vpcmpeqq %ymm9,%ymm10,%ymm13 + vmovdqu 0(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm0,%ymm0 + vmovdqu 32(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm1,%ymm1 + vmovdqu 64(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm2,%ymm2 + vmovdqu 96(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm3,%ymm3 + vmovdqu 128(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm4,%ymm4 + vmovdqu 160(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm5,%ymm5 + vmovdqu 192(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm6,%ymm6 + vmovdqu 224(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm7,%ymm7 + vpaddq %ymm12,%ymm9,%ymm9 + addq $512,%rsi + cmpq %rsi,%rax + jne .Lloop + vmovdqu %ymm0,0(%rdi) + vmovdqu %ymm1,32(%rdi) + vmovdqu %ymm2,64(%rdi) + vmovdqu %ymm3,96(%rdi) + vmovdqu %ymm4,128(%rdi) + vmovdqu %ymm5,160(%rdi) + vmovdqu %ymm6,192(%rdi) + vmovdqu %ymm7,224(%rdi) + leaq -16384(%rax),%rsi + + + vpxor %xmm0,%xmm0,%xmm0 + vmovapd %ymm0,%ymm9 + vmovapd %ymm0,%ymm0 + vmovapd %ymm0,%ymm1 + vmovapd %ymm0,%ymm2 + vmovapd %ymm0,%ymm3 + vmovapd %ymm0,%ymm4 + vmovapd %ymm0,%ymm5 + vmovapd %ymm0,%ymm6 + vmovapd %ymm0,%ymm7 + +.align 32 +.Lloop_8_15: + vpcmpeqq %ymm9,%ymm11,%ymm13 + vmovdqu 256(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm0,%ymm0 + vmovdqu 288(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm1,%ymm1 + vmovdqu 320(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm2,%ymm2 + vmovdqu 352(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm3,%ymm3 + vmovdqu 384(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm4,%ymm4 + vmovdqu 416(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm5,%ymm5 + vmovdqu 448(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm6,%ymm6 + vmovdqu 480(%rsi),%ymm8 + + vblendvpd %ymm13,%ymm8,%ymm7,%ymm7 + vpaddq %ymm12,%ymm9,%ymm9 + addq $512,%rsi + cmpq %rsi,%rax + jne .Lloop_8_15 + vmovdqu %ymm0,256(%rdi) + vmovdqu %ymm1,288(%rdi) + vmovdqu %ymm2,320(%rdi) + vmovdqu %ymm3,352(%rdi) + vmovdqu %ymm4,384(%rdi) + vmovdqu %ymm5,416(%rdi) + vmovdqu %ymm6,448(%rdi) + vmovdqu %ymm7,480(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_extract_multiplier_2x30_win5_avx, .-ossl_extract_multiplier_2x30_win5_avx +.section .rodata +.align 32 +.Lones: +.quad 1,1,1,1 +.Lzeros: +.quad 0,0,0,0 diff --git a/contrib/openssl-cmake/asm/crypto/bn/rsaz-4k-avx512.s b/contrib/openssl-cmake/asm/crypto/bn/rsaz-4k-avx512.s new file mode 100644 index 000000000000..bd1ee59a1cea --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/bn/rsaz-4k-avx512.s @@ -0,0 +1,1353 @@ +.text + +.globl ossl_rsaz_amm52x40_x1_ifma256 +.type ossl_rsaz_amm52x40_x1_ifma256,@function +.align 32 +ossl_rsaz_amm52x40_x1_ifma256: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + vpxord %ymm0,%ymm0,%ymm0 + vmovdqa64 %ymm0,%ymm3 + vmovdqa64 %ymm0,%ymm4 + vmovdqa64 %ymm0,%ymm5 + vmovdqa64 %ymm0,%ymm6 + vmovdqa64 %ymm0,%ymm7 + vmovdqa64 %ymm0,%ymm8 + vmovdqa64 %ymm0,%ymm9 + vmovdqa64 %ymm0,%ymm10 + vmovdqa64 %ymm0,%ymm11 + vmovdqa64 %ymm0,%ymm12 + + xorl %r9d,%r9d + + movq %rdx,%r11 + movq $0xfffffffffffff,%rax + + + movl $10,%ebx + +.align 32 +.Lloop10: + movq 0(%r11),%r13 + + vpbroadcastq %r13,%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vpbroadcastq %r13,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + vpmadd52luq 0(%rsi),%ymm1,%ymm3 + vpmadd52luq 32(%rsi),%ymm1,%ymm4 + vpmadd52luq 64(%rsi),%ymm1,%ymm5 + vpmadd52luq 96(%rsi),%ymm1,%ymm6 + vpmadd52luq 128(%rsi),%ymm1,%ymm7 + vpmadd52luq 160(%rsi),%ymm1,%ymm8 + vpmadd52luq 192(%rsi),%ymm1,%ymm9 + vpmadd52luq 224(%rsi),%ymm1,%ymm10 + vpmadd52luq 256(%rsi),%ymm1,%ymm11 + vpmadd52luq 288(%rsi),%ymm1,%ymm12 + + vpmadd52luq 0(%rcx),%ymm2,%ymm3 + vpmadd52luq 32(%rcx),%ymm2,%ymm4 + vpmadd52luq 64(%rcx),%ymm2,%ymm5 + vpmadd52luq 96(%rcx),%ymm2,%ymm6 + vpmadd52luq 128(%rcx),%ymm2,%ymm7 + vpmadd52luq 160(%rcx),%ymm2,%ymm8 + vpmadd52luq 192(%rcx),%ymm2,%ymm9 + vpmadd52luq 224(%rcx),%ymm2,%ymm10 + vpmadd52luq 256(%rcx),%ymm2,%ymm11 + vpmadd52luq 288(%rcx),%ymm2,%ymm12 + + + valignq $1,%ymm3,%ymm4,%ymm3 + valignq $1,%ymm4,%ymm5,%ymm4 + valignq $1,%ymm5,%ymm6,%ymm5 + valignq $1,%ymm6,%ymm7,%ymm6 + valignq $1,%ymm7,%ymm8,%ymm7 + valignq $1,%ymm8,%ymm9,%ymm8 + valignq $1,%ymm9,%ymm10,%ymm9 + valignq $1,%ymm10,%ymm11,%ymm10 + valignq $1,%ymm11,%ymm12,%ymm11 + valignq $1,%ymm12,%ymm0,%ymm12 + + vmovq %xmm3,%r13 + addq %r13,%r9 + + vpmadd52huq 0(%rsi),%ymm1,%ymm3 + vpmadd52huq 32(%rsi),%ymm1,%ymm4 + vpmadd52huq 64(%rsi),%ymm1,%ymm5 + vpmadd52huq 96(%rsi),%ymm1,%ymm6 + vpmadd52huq 128(%rsi),%ymm1,%ymm7 + vpmadd52huq 160(%rsi),%ymm1,%ymm8 + vpmadd52huq 192(%rsi),%ymm1,%ymm9 + vpmadd52huq 224(%rsi),%ymm1,%ymm10 + vpmadd52huq 256(%rsi),%ymm1,%ymm11 + vpmadd52huq 288(%rsi),%ymm1,%ymm12 + + vpmadd52huq 0(%rcx),%ymm2,%ymm3 + vpmadd52huq 32(%rcx),%ymm2,%ymm4 + vpmadd52huq 64(%rcx),%ymm2,%ymm5 + vpmadd52huq 96(%rcx),%ymm2,%ymm6 + vpmadd52huq 128(%rcx),%ymm2,%ymm7 + vpmadd52huq 160(%rcx),%ymm2,%ymm8 + vpmadd52huq 192(%rcx),%ymm2,%ymm9 + vpmadd52huq 224(%rcx),%ymm2,%ymm10 + vpmadd52huq 256(%rcx),%ymm2,%ymm11 + vpmadd52huq 288(%rcx),%ymm2,%ymm12 + movq 8(%r11),%r13 + + vpbroadcastq %r13,%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vpbroadcastq %r13,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + vpmadd52luq 0(%rsi),%ymm1,%ymm3 + vpmadd52luq 32(%rsi),%ymm1,%ymm4 + vpmadd52luq 64(%rsi),%ymm1,%ymm5 + vpmadd52luq 96(%rsi),%ymm1,%ymm6 + vpmadd52luq 128(%rsi),%ymm1,%ymm7 + vpmadd52luq 160(%rsi),%ymm1,%ymm8 + vpmadd52luq 192(%rsi),%ymm1,%ymm9 + vpmadd52luq 224(%rsi),%ymm1,%ymm10 + vpmadd52luq 256(%rsi),%ymm1,%ymm11 + vpmadd52luq 288(%rsi),%ymm1,%ymm12 + + vpmadd52luq 0(%rcx),%ymm2,%ymm3 + vpmadd52luq 32(%rcx),%ymm2,%ymm4 + vpmadd52luq 64(%rcx),%ymm2,%ymm5 + vpmadd52luq 96(%rcx),%ymm2,%ymm6 + vpmadd52luq 128(%rcx),%ymm2,%ymm7 + vpmadd52luq 160(%rcx),%ymm2,%ymm8 + vpmadd52luq 192(%rcx),%ymm2,%ymm9 + vpmadd52luq 224(%rcx),%ymm2,%ymm10 + vpmadd52luq 256(%rcx),%ymm2,%ymm11 + vpmadd52luq 288(%rcx),%ymm2,%ymm12 + + + valignq $1,%ymm3,%ymm4,%ymm3 + valignq $1,%ymm4,%ymm5,%ymm4 + valignq $1,%ymm5,%ymm6,%ymm5 + valignq $1,%ymm6,%ymm7,%ymm6 + valignq $1,%ymm7,%ymm8,%ymm7 + valignq $1,%ymm8,%ymm9,%ymm8 + valignq $1,%ymm9,%ymm10,%ymm9 + valignq $1,%ymm10,%ymm11,%ymm10 + valignq $1,%ymm11,%ymm12,%ymm11 + valignq $1,%ymm12,%ymm0,%ymm12 + + vmovq %xmm3,%r13 + addq %r13,%r9 + + vpmadd52huq 0(%rsi),%ymm1,%ymm3 + vpmadd52huq 32(%rsi),%ymm1,%ymm4 + vpmadd52huq 64(%rsi),%ymm1,%ymm5 + vpmadd52huq 96(%rsi),%ymm1,%ymm6 + vpmadd52huq 128(%rsi),%ymm1,%ymm7 + vpmadd52huq 160(%rsi),%ymm1,%ymm8 + vpmadd52huq 192(%rsi),%ymm1,%ymm9 + vpmadd52huq 224(%rsi),%ymm1,%ymm10 + vpmadd52huq 256(%rsi),%ymm1,%ymm11 + vpmadd52huq 288(%rsi),%ymm1,%ymm12 + + vpmadd52huq 0(%rcx),%ymm2,%ymm3 + vpmadd52huq 32(%rcx),%ymm2,%ymm4 + vpmadd52huq 64(%rcx),%ymm2,%ymm5 + vpmadd52huq 96(%rcx),%ymm2,%ymm6 + vpmadd52huq 128(%rcx),%ymm2,%ymm7 + vpmadd52huq 160(%rcx),%ymm2,%ymm8 + vpmadd52huq 192(%rcx),%ymm2,%ymm9 + vpmadd52huq 224(%rcx),%ymm2,%ymm10 + vpmadd52huq 256(%rcx),%ymm2,%ymm11 + vpmadd52huq 288(%rcx),%ymm2,%ymm12 + movq 16(%r11),%r13 + + vpbroadcastq %r13,%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vpbroadcastq %r13,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + vpmadd52luq 0(%rsi),%ymm1,%ymm3 + vpmadd52luq 32(%rsi),%ymm1,%ymm4 + vpmadd52luq 64(%rsi),%ymm1,%ymm5 + vpmadd52luq 96(%rsi),%ymm1,%ymm6 + vpmadd52luq 128(%rsi),%ymm1,%ymm7 + vpmadd52luq 160(%rsi),%ymm1,%ymm8 + vpmadd52luq 192(%rsi),%ymm1,%ymm9 + vpmadd52luq 224(%rsi),%ymm1,%ymm10 + vpmadd52luq 256(%rsi),%ymm1,%ymm11 + vpmadd52luq 288(%rsi),%ymm1,%ymm12 + + vpmadd52luq 0(%rcx),%ymm2,%ymm3 + vpmadd52luq 32(%rcx),%ymm2,%ymm4 + vpmadd52luq 64(%rcx),%ymm2,%ymm5 + vpmadd52luq 96(%rcx),%ymm2,%ymm6 + vpmadd52luq 128(%rcx),%ymm2,%ymm7 + vpmadd52luq 160(%rcx),%ymm2,%ymm8 + vpmadd52luq 192(%rcx),%ymm2,%ymm9 + vpmadd52luq 224(%rcx),%ymm2,%ymm10 + vpmadd52luq 256(%rcx),%ymm2,%ymm11 + vpmadd52luq 288(%rcx),%ymm2,%ymm12 + + + valignq $1,%ymm3,%ymm4,%ymm3 + valignq $1,%ymm4,%ymm5,%ymm4 + valignq $1,%ymm5,%ymm6,%ymm5 + valignq $1,%ymm6,%ymm7,%ymm6 + valignq $1,%ymm7,%ymm8,%ymm7 + valignq $1,%ymm8,%ymm9,%ymm8 + valignq $1,%ymm9,%ymm10,%ymm9 + valignq $1,%ymm10,%ymm11,%ymm10 + valignq $1,%ymm11,%ymm12,%ymm11 + valignq $1,%ymm12,%ymm0,%ymm12 + + vmovq %xmm3,%r13 + addq %r13,%r9 + + vpmadd52huq 0(%rsi),%ymm1,%ymm3 + vpmadd52huq 32(%rsi),%ymm1,%ymm4 + vpmadd52huq 64(%rsi),%ymm1,%ymm5 + vpmadd52huq 96(%rsi),%ymm1,%ymm6 + vpmadd52huq 128(%rsi),%ymm1,%ymm7 + vpmadd52huq 160(%rsi),%ymm1,%ymm8 + vpmadd52huq 192(%rsi),%ymm1,%ymm9 + vpmadd52huq 224(%rsi),%ymm1,%ymm10 + vpmadd52huq 256(%rsi),%ymm1,%ymm11 + vpmadd52huq 288(%rsi),%ymm1,%ymm12 + + vpmadd52huq 0(%rcx),%ymm2,%ymm3 + vpmadd52huq 32(%rcx),%ymm2,%ymm4 + vpmadd52huq 64(%rcx),%ymm2,%ymm5 + vpmadd52huq 96(%rcx),%ymm2,%ymm6 + vpmadd52huq 128(%rcx),%ymm2,%ymm7 + vpmadd52huq 160(%rcx),%ymm2,%ymm8 + vpmadd52huq 192(%rcx),%ymm2,%ymm9 + vpmadd52huq 224(%rcx),%ymm2,%ymm10 + vpmadd52huq 256(%rcx),%ymm2,%ymm11 + vpmadd52huq 288(%rcx),%ymm2,%ymm12 + movq 24(%r11),%r13 + + vpbroadcastq %r13,%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vpbroadcastq %r13,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + vpmadd52luq 0(%rsi),%ymm1,%ymm3 + vpmadd52luq 32(%rsi),%ymm1,%ymm4 + vpmadd52luq 64(%rsi),%ymm1,%ymm5 + vpmadd52luq 96(%rsi),%ymm1,%ymm6 + vpmadd52luq 128(%rsi),%ymm1,%ymm7 + vpmadd52luq 160(%rsi),%ymm1,%ymm8 + vpmadd52luq 192(%rsi),%ymm1,%ymm9 + vpmadd52luq 224(%rsi),%ymm1,%ymm10 + vpmadd52luq 256(%rsi),%ymm1,%ymm11 + vpmadd52luq 288(%rsi),%ymm1,%ymm12 + + vpmadd52luq 0(%rcx),%ymm2,%ymm3 + vpmadd52luq 32(%rcx),%ymm2,%ymm4 + vpmadd52luq 64(%rcx),%ymm2,%ymm5 + vpmadd52luq 96(%rcx),%ymm2,%ymm6 + vpmadd52luq 128(%rcx),%ymm2,%ymm7 + vpmadd52luq 160(%rcx),%ymm2,%ymm8 + vpmadd52luq 192(%rcx),%ymm2,%ymm9 + vpmadd52luq 224(%rcx),%ymm2,%ymm10 + vpmadd52luq 256(%rcx),%ymm2,%ymm11 + vpmadd52luq 288(%rcx),%ymm2,%ymm12 + + + valignq $1,%ymm3,%ymm4,%ymm3 + valignq $1,%ymm4,%ymm5,%ymm4 + valignq $1,%ymm5,%ymm6,%ymm5 + valignq $1,%ymm6,%ymm7,%ymm6 + valignq $1,%ymm7,%ymm8,%ymm7 + valignq $1,%ymm8,%ymm9,%ymm8 + valignq $1,%ymm9,%ymm10,%ymm9 + valignq $1,%ymm10,%ymm11,%ymm10 + valignq $1,%ymm11,%ymm12,%ymm11 + valignq $1,%ymm12,%ymm0,%ymm12 + + vmovq %xmm3,%r13 + addq %r13,%r9 + + vpmadd52huq 0(%rsi),%ymm1,%ymm3 + vpmadd52huq 32(%rsi),%ymm1,%ymm4 + vpmadd52huq 64(%rsi),%ymm1,%ymm5 + vpmadd52huq 96(%rsi),%ymm1,%ymm6 + vpmadd52huq 128(%rsi),%ymm1,%ymm7 + vpmadd52huq 160(%rsi),%ymm1,%ymm8 + vpmadd52huq 192(%rsi),%ymm1,%ymm9 + vpmadd52huq 224(%rsi),%ymm1,%ymm10 + vpmadd52huq 256(%rsi),%ymm1,%ymm11 + vpmadd52huq 288(%rsi),%ymm1,%ymm12 + + vpmadd52huq 0(%rcx),%ymm2,%ymm3 + vpmadd52huq 32(%rcx),%ymm2,%ymm4 + vpmadd52huq 64(%rcx),%ymm2,%ymm5 + vpmadd52huq 96(%rcx),%ymm2,%ymm6 + vpmadd52huq 128(%rcx),%ymm2,%ymm7 + vpmadd52huq 160(%rcx),%ymm2,%ymm8 + vpmadd52huq 192(%rcx),%ymm2,%ymm9 + vpmadd52huq 224(%rcx),%ymm2,%ymm10 + vpmadd52huq 256(%rcx),%ymm2,%ymm11 + vpmadd52huq 288(%rcx),%ymm2,%ymm12 + leaq 32(%r11),%r11 + decl %ebx + jne .Lloop10 + + vpbroadcastq %r9,%ymm0 + vpblendd $3,%ymm0,%ymm3,%ymm3 + + + + vpsrlq $52,%ymm3,%ymm0 + vpsrlq $52,%ymm4,%ymm1 + vpsrlq $52,%ymm5,%ymm2 + vpsrlq $52,%ymm6,%ymm23 + vpsrlq $52,%ymm7,%ymm24 + vpsrlq $52,%ymm8,%ymm25 + vpsrlq $52,%ymm9,%ymm26 + vpsrlq $52,%ymm10,%ymm27 + vpsrlq $52,%ymm11,%ymm28 + vpsrlq $52,%ymm12,%ymm29 + + + valignq $3,%ymm28,%ymm29,%ymm29 + valignq $3,%ymm27,%ymm28,%ymm28 + valignq $3,%ymm26,%ymm27,%ymm27 + valignq $3,%ymm25,%ymm26,%ymm26 + valignq $3,%ymm24,%ymm25,%ymm25 + valignq $3,%ymm23,%ymm24,%ymm24 + valignq $3,%ymm2,%ymm23,%ymm23 + valignq $3,%ymm1,%ymm2,%ymm2 + valignq $3,%ymm0,%ymm1,%ymm1 + valignq $3,.Lzeros(%rip),%ymm0,%ymm0 + + + vpandq .Lmask52x4(%rip),%ymm3,%ymm3 + vpandq .Lmask52x4(%rip),%ymm4,%ymm4 + vpandq .Lmask52x4(%rip),%ymm5,%ymm5 + vpandq .Lmask52x4(%rip),%ymm6,%ymm6 + vpandq .Lmask52x4(%rip),%ymm7,%ymm7 + vpandq .Lmask52x4(%rip),%ymm8,%ymm8 + vpandq .Lmask52x4(%rip),%ymm9,%ymm9 + vpandq .Lmask52x4(%rip),%ymm10,%ymm10 + vpandq .Lmask52x4(%rip),%ymm11,%ymm11 + vpandq .Lmask52x4(%rip),%ymm12,%ymm12 + + + vpaddq %ymm0,%ymm3,%ymm3 + vpaddq %ymm1,%ymm4,%ymm4 + vpaddq %ymm2,%ymm5,%ymm5 + vpaddq %ymm23,%ymm6,%ymm6 + vpaddq %ymm24,%ymm7,%ymm7 + vpaddq %ymm25,%ymm8,%ymm8 + vpaddq %ymm26,%ymm9,%ymm9 + vpaddq %ymm27,%ymm10,%ymm10 + vpaddq %ymm28,%ymm11,%ymm11 + vpaddq %ymm29,%ymm12,%ymm12 + + + + vpcmpuq $6,.Lmask52x4(%rip),%ymm3,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm4,%k2 + kmovb %k1,%r14d + kmovb %k2,%r13d + shlb $4,%r13b + orb %r13b,%r14b + + vpcmpuq $6,.Lmask52x4(%rip),%ymm5,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm6,%k2 + kmovb %k1,%r13d + kmovb %k2,%r12d + shlb $4,%r12b + orb %r12b,%r13b + + vpcmpuq $6,.Lmask52x4(%rip),%ymm7,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm8,%k2 + kmovb %k1,%r12d + kmovb %k2,%r11d + shlb $4,%r11b + orb %r11b,%r12b + + vpcmpuq $6,.Lmask52x4(%rip),%ymm9,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm10,%k2 + kmovb %k1,%r11d + kmovb %k2,%r10d + shlb $4,%r10b + orb %r10b,%r11b + + vpcmpuq $6,.Lmask52x4(%rip),%ymm11,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm12,%k2 + kmovb %k1,%r10d + kmovb %k2,%r9d + shlb $4,%r9b + orb %r9b,%r10b + + addb %r14b,%r14b + adcb %r13b,%r13b + adcb %r12b,%r12b + adcb %r11b,%r11b + adcb %r10b,%r10b + + + vpcmpuq $0,.Lmask52x4(%rip),%ymm3,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm4,%k2 + kmovb %k1,%r9d + kmovb %k2,%r8d + shlb $4,%r8b + orb %r8b,%r9b + + vpcmpuq $0,.Lmask52x4(%rip),%ymm5,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm6,%k2 + kmovb %k1,%r8d + kmovb %k2,%edx + shlb $4,%dl + orb %dl,%r8b + + vpcmpuq $0,.Lmask52x4(%rip),%ymm7,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm8,%k2 + kmovb %k1,%edx + kmovb %k2,%ecx + shlb $4,%cl + orb %cl,%dl + + vpcmpuq $0,.Lmask52x4(%rip),%ymm9,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm10,%k2 + kmovb %k1,%ecx + kmovb %k2,%ebx + shlb $4,%bl + orb %bl,%cl + + vpcmpuq $0,.Lmask52x4(%rip),%ymm11,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm12,%k2 + kmovb %k1,%ebx + kmovb %k2,%eax + shlb $4,%al + orb %al,%bl + + addb %r9b,%r14b + adcb %r8b,%r13b + adcb %dl,%r12b + adcb %cl,%r11b + adcb %bl,%r10b + + xorb %r9b,%r14b + xorb %r8b,%r13b + xorb %dl,%r12b + xorb %cl,%r11b + xorb %bl,%r10b + + kmovb %r14d,%k1 + shrb $4,%r14b + kmovb %r14d,%k2 + kmovb %r13d,%k3 + shrb $4,%r13b + kmovb %r13d,%k4 + kmovb %r12d,%k5 + shrb $4,%r12b + kmovb %r12d,%k6 + kmovb %r11d,%k7 + + vpsubq .Lmask52x4(%rip),%ymm3,%ymm3{%k1} + vpsubq .Lmask52x4(%rip),%ymm4,%ymm4{%k2} + vpsubq .Lmask52x4(%rip),%ymm5,%ymm5{%k3} + vpsubq .Lmask52x4(%rip),%ymm6,%ymm6{%k4} + vpsubq .Lmask52x4(%rip),%ymm7,%ymm7{%k5} + vpsubq .Lmask52x4(%rip),%ymm8,%ymm8{%k6} + vpsubq .Lmask52x4(%rip),%ymm9,%ymm9{%k7} + + vpandq .Lmask52x4(%rip),%ymm3,%ymm3 + vpandq .Lmask52x4(%rip),%ymm4,%ymm4 + vpandq .Lmask52x4(%rip),%ymm5,%ymm5 + vpandq .Lmask52x4(%rip),%ymm6,%ymm6 + vpandq .Lmask52x4(%rip),%ymm7,%ymm7 + vpandq .Lmask52x4(%rip),%ymm8,%ymm8 + vpandq .Lmask52x4(%rip),%ymm9,%ymm9 + + shrb $4,%r11b + kmovb %r11d,%k1 + kmovb %r10d,%k2 + shrb $4,%r10b + kmovb %r10d,%k3 + + vpsubq .Lmask52x4(%rip),%ymm10,%ymm10{%k1} + vpsubq .Lmask52x4(%rip),%ymm11,%ymm11{%k2} + vpsubq .Lmask52x4(%rip),%ymm12,%ymm12{%k3} + + vpandq .Lmask52x4(%rip),%ymm10,%ymm10 + vpandq .Lmask52x4(%rip),%ymm11,%ymm11 + vpandq .Lmask52x4(%rip),%ymm12,%ymm12 + + vmovdqu64 %ymm3,0(%rdi) + vmovdqu64 %ymm4,32(%rdi) + vmovdqu64 %ymm5,64(%rdi) + vmovdqu64 %ymm6,96(%rdi) + vmovdqu64 %ymm7,128(%rdi) + vmovdqu64 %ymm8,160(%rdi) + vmovdqu64 %ymm9,192(%rdi) + vmovdqu64 %ymm10,224(%rdi) + vmovdqu64 %ymm11,256(%rdi) + vmovdqu64 %ymm12,288(%rdi) + + vzeroupper + leaq (%rsp),%rax +.cfi_def_cfa_register %rax + movq 0(%rax),%r15 +.cfi_restore %r15 + movq 8(%rax),%r14 +.cfi_restore %r14 + movq 16(%rax),%r13 +.cfi_restore %r13 + movq 24(%rax),%r12 +.cfi_restore %r12 + movq 32(%rax),%rbp +.cfi_restore %rbp + movq 40(%rax),%rbx +.cfi_restore %rbx + leaq 48(%rax),%rsp +.cfi_def_cfa %rsp,8 +.Lossl_rsaz_amm52x40_x1_ifma256_epilogue: + + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_rsaz_amm52x40_x1_ifma256, .-ossl_rsaz_amm52x40_x1_ifma256 +.section .rodata +.align 32 +.Lmask52x4: +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.text + +.globl ossl_rsaz_amm52x40_x2_ifma256 +.type ossl_rsaz_amm52x40_x2_ifma256,@function +.align 32 +ossl_rsaz_amm52x40_x2_ifma256: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + vpxord %ymm0,%ymm0,%ymm0 + vmovdqa64 %ymm0,%ymm3 + vmovdqa64 %ymm0,%ymm4 + vmovdqa64 %ymm0,%ymm5 + vmovdqa64 %ymm0,%ymm6 + vmovdqa64 %ymm0,%ymm7 + vmovdqa64 %ymm0,%ymm8 + vmovdqa64 %ymm0,%ymm9 + vmovdqa64 %ymm0,%ymm10 + vmovdqa64 %ymm0,%ymm11 + vmovdqa64 %ymm0,%ymm12 + + vmovdqa64 %ymm0,%ymm13 + vmovdqa64 %ymm0,%ymm14 + vmovdqa64 %ymm0,%ymm15 + vmovdqa64 %ymm0,%ymm16 + vmovdqa64 %ymm0,%ymm17 + vmovdqa64 %ymm0,%ymm18 + vmovdqa64 %ymm0,%ymm19 + vmovdqa64 %ymm0,%ymm20 + vmovdqa64 %ymm0,%ymm21 + vmovdqa64 %ymm0,%ymm22 + + + xorl %r9d,%r9d + xorl %r15d,%r15d + + movq %rdx,%r11 + movq $0xfffffffffffff,%rax + + movl $40,%ebx + +.align 32 +.Lloop40: + movq 0(%r11),%r13 + + vpbroadcastq %r13,%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq (%r8),%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vpbroadcastq %r13,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + vpmadd52luq 0(%rsi),%ymm1,%ymm3 + vpmadd52luq 32(%rsi),%ymm1,%ymm4 + vpmadd52luq 64(%rsi),%ymm1,%ymm5 + vpmadd52luq 96(%rsi),%ymm1,%ymm6 + vpmadd52luq 128(%rsi),%ymm1,%ymm7 + vpmadd52luq 160(%rsi),%ymm1,%ymm8 + vpmadd52luq 192(%rsi),%ymm1,%ymm9 + vpmadd52luq 224(%rsi),%ymm1,%ymm10 + vpmadd52luq 256(%rsi),%ymm1,%ymm11 + vpmadd52luq 288(%rsi),%ymm1,%ymm12 + + vpmadd52luq 0(%rcx),%ymm2,%ymm3 + vpmadd52luq 32(%rcx),%ymm2,%ymm4 + vpmadd52luq 64(%rcx),%ymm2,%ymm5 + vpmadd52luq 96(%rcx),%ymm2,%ymm6 + vpmadd52luq 128(%rcx),%ymm2,%ymm7 + vpmadd52luq 160(%rcx),%ymm2,%ymm8 + vpmadd52luq 192(%rcx),%ymm2,%ymm9 + vpmadd52luq 224(%rcx),%ymm2,%ymm10 + vpmadd52luq 256(%rcx),%ymm2,%ymm11 + vpmadd52luq 288(%rcx),%ymm2,%ymm12 + + + valignq $1,%ymm3,%ymm4,%ymm3 + valignq $1,%ymm4,%ymm5,%ymm4 + valignq $1,%ymm5,%ymm6,%ymm5 + valignq $1,%ymm6,%ymm7,%ymm6 + valignq $1,%ymm7,%ymm8,%ymm7 + valignq $1,%ymm8,%ymm9,%ymm8 + valignq $1,%ymm9,%ymm10,%ymm9 + valignq $1,%ymm10,%ymm11,%ymm10 + valignq $1,%ymm11,%ymm12,%ymm11 + valignq $1,%ymm12,%ymm0,%ymm12 + + vmovq %xmm3,%r13 + addq %r13,%r9 + + vpmadd52huq 0(%rsi),%ymm1,%ymm3 + vpmadd52huq 32(%rsi),%ymm1,%ymm4 + vpmadd52huq 64(%rsi),%ymm1,%ymm5 + vpmadd52huq 96(%rsi),%ymm1,%ymm6 + vpmadd52huq 128(%rsi),%ymm1,%ymm7 + vpmadd52huq 160(%rsi),%ymm1,%ymm8 + vpmadd52huq 192(%rsi),%ymm1,%ymm9 + vpmadd52huq 224(%rsi),%ymm1,%ymm10 + vpmadd52huq 256(%rsi),%ymm1,%ymm11 + vpmadd52huq 288(%rsi),%ymm1,%ymm12 + + vpmadd52huq 0(%rcx),%ymm2,%ymm3 + vpmadd52huq 32(%rcx),%ymm2,%ymm4 + vpmadd52huq 64(%rcx),%ymm2,%ymm5 + vpmadd52huq 96(%rcx),%ymm2,%ymm6 + vpmadd52huq 128(%rcx),%ymm2,%ymm7 + vpmadd52huq 160(%rcx),%ymm2,%ymm8 + vpmadd52huq 192(%rcx),%ymm2,%ymm9 + vpmadd52huq 224(%rcx),%ymm2,%ymm10 + vpmadd52huq 256(%rcx),%ymm2,%ymm11 + vpmadd52huq 288(%rcx),%ymm2,%ymm12 + movq 320(%r11),%r13 + + vpbroadcastq %r13,%ymm1 + movq 320(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r15 + movq %r12,%r10 + adcq $0,%r10 + + movq 8(%r8),%r13 + imulq %r15,%r13 + andq %rax,%r13 + + vpbroadcastq %r13,%ymm2 + movq 320(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r15 + adcq %r12,%r10 + + shrq $52,%r15 + salq $12,%r10 + orq %r10,%r15 + + vpmadd52luq 320(%rsi),%ymm1,%ymm13 + vpmadd52luq 352(%rsi),%ymm1,%ymm14 + vpmadd52luq 384(%rsi),%ymm1,%ymm15 + vpmadd52luq 416(%rsi),%ymm1,%ymm16 + vpmadd52luq 448(%rsi),%ymm1,%ymm17 + vpmadd52luq 480(%rsi),%ymm1,%ymm18 + vpmadd52luq 512(%rsi),%ymm1,%ymm19 + vpmadd52luq 544(%rsi),%ymm1,%ymm20 + vpmadd52luq 576(%rsi),%ymm1,%ymm21 + vpmadd52luq 608(%rsi),%ymm1,%ymm22 + + vpmadd52luq 320(%rcx),%ymm2,%ymm13 + vpmadd52luq 352(%rcx),%ymm2,%ymm14 + vpmadd52luq 384(%rcx),%ymm2,%ymm15 + vpmadd52luq 416(%rcx),%ymm2,%ymm16 + vpmadd52luq 448(%rcx),%ymm2,%ymm17 + vpmadd52luq 480(%rcx),%ymm2,%ymm18 + vpmadd52luq 512(%rcx),%ymm2,%ymm19 + vpmadd52luq 544(%rcx),%ymm2,%ymm20 + vpmadd52luq 576(%rcx),%ymm2,%ymm21 + vpmadd52luq 608(%rcx),%ymm2,%ymm22 + + + valignq $1,%ymm13,%ymm14,%ymm13 + valignq $1,%ymm14,%ymm15,%ymm14 + valignq $1,%ymm15,%ymm16,%ymm15 + valignq $1,%ymm16,%ymm17,%ymm16 + valignq $1,%ymm17,%ymm18,%ymm17 + valignq $1,%ymm18,%ymm19,%ymm18 + valignq $1,%ymm19,%ymm20,%ymm19 + valignq $1,%ymm20,%ymm21,%ymm20 + valignq $1,%ymm21,%ymm22,%ymm21 + valignq $1,%ymm22,%ymm0,%ymm22 + + vmovq %xmm13,%r13 + addq %r13,%r15 + + vpmadd52huq 320(%rsi),%ymm1,%ymm13 + vpmadd52huq 352(%rsi),%ymm1,%ymm14 + vpmadd52huq 384(%rsi),%ymm1,%ymm15 + vpmadd52huq 416(%rsi),%ymm1,%ymm16 + vpmadd52huq 448(%rsi),%ymm1,%ymm17 + vpmadd52huq 480(%rsi),%ymm1,%ymm18 + vpmadd52huq 512(%rsi),%ymm1,%ymm19 + vpmadd52huq 544(%rsi),%ymm1,%ymm20 + vpmadd52huq 576(%rsi),%ymm1,%ymm21 + vpmadd52huq 608(%rsi),%ymm1,%ymm22 + + vpmadd52huq 320(%rcx),%ymm2,%ymm13 + vpmadd52huq 352(%rcx),%ymm2,%ymm14 + vpmadd52huq 384(%rcx),%ymm2,%ymm15 + vpmadd52huq 416(%rcx),%ymm2,%ymm16 + vpmadd52huq 448(%rcx),%ymm2,%ymm17 + vpmadd52huq 480(%rcx),%ymm2,%ymm18 + vpmadd52huq 512(%rcx),%ymm2,%ymm19 + vpmadd52huq 544(%rcx),%ymm2,%ymm20 + vpmadd52huq 576(%rcx),%ymm2,%ymm21 + vpmadd52huq 608(%rcx),%ymm2,%ymm22 + leaq 8(%r11),%r11 + decl %ebx + jne .Lloop40 + + vpbroadcastq %r9,%ymm0 + vpblendd $3,%ymm0,%ymm3,%ymm3 + + + + vpsrlq $52,%ymm3,%ymm0 + vpsrlq $52,%ymm4,%ymm1 + vpsrlq $52,%ymm5,%ymm2 + vpsrlq $52,%ymm6,%ymm23 + vpsrlq $52,%ymm7,%ymm24 + vpsrlq $52,%ymm8,%ymm25 + vpsrlq $52,%ymm9,%ymm26 + vpsrlq $52,%ymm10,%ymm27 + vpsrlq $52,%ymm11,%ymm28 + vpsrlq $52,%ymm12,%ymm29 + + + valignq $3,%ymm28,%ymm29,%ymm29 + valignq $3,%ymm27,%ymm28,%ymm28 + valignq $3,%ymm26,%ymm27,%ymm27 + valignq $3,%ymm25,%ymm26,%ymm26 + valignq $3,%ymm24,%ymm25,%ymm25 + valignq $3,%ymm23,%ymm24,%ymm24 + valignq $3,%ymm2,%ymm23,%ymm23 + valignq $3,%ymm1,%ymm2,%ymm2 + valignq $3,%ymm0,%ymm1,%ymm1 + valignq $3,.Lzeros(%rip),%ymm0,%ymm0 + + + vpandq .Lmask52x4(%rip),%ymm3,%ymm3 + vpandq .Lmask52x4(%rip),%ymm4,%ymm4 + vpandq .Lmask52x4(%rip),%ymm5,%ymm5 + vpandq .Lmask52x4(%rip),%ymm6,%ymm6 + vpandq .Lmask52x4(%rip),%ymm7,%ymm7 + vpandq .Lmask52x4(%rip),%ymm8,%ymm8 + vpandq .Lmask52x4(%rip),%ymm9,%ymm9 + vpandq .Lmask52x4(%rip),%ymm10,%ymm10 + vpandq .Lmask52x4(%rip),%ymm11,%ymm11 + vpandq .Lmask52x4(%rip),%ymm12,%ymm12 + + + vpaddq %ymm0,%ymm3,%ymm3 + vpaddq %ymm1,%ymm4,%ymm4 + vpaddq %ymm2,%ymm5,%ymm5 + vpaddq %ymm23,%ymm6,%ymm6 + vpaddq %ymm24,%ymm7,%ymm7 + vpaddq %ymm25,%ymm8,%ymm8 + vpaddq %ymm26,%ymm9,%ymm9 + vpaddq %ymm27,%ymm10,%ymm10 + vpaddq %ymm28,%ymm11,%ymm11 + vpaddq %ymm29,%ymm12,%ymm12 + + + + vpcmpuq $6,.Lmask52x4(%rip),%ymm3,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm4,%k2 + kmovb %k1,%r14d + kmovb %k2,%r13d + shlb $4,%r13b + orb %r13b,%r14b + + vpcmpuq $6,.Lmask52x4(%rip),%ymm5,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm6,%k2 + kmovb %k1,%r13d + kmovb %k2,%r12d + shlb $4,%r12b + orb %r12b,%r13b + + vpcmpuq $6,.Lmask52x4(%rip),%ymm7,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm8,%k2 + kmovb %k1,%r12d + kmovb %k2,%r11d + shlb $4,%r11b + orb %r11b,%r12b + + vpcmpuq $6,.Lmask52x4(%rip),%ymm9,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm10,%k2 + kmovb %k1,%r11d + kmovb %k2,%r10d + shlb $4,%r10b + orb %r10b,%r11b + + vpcmpuq $6,.Lmask52x4(%rip),%ymm11,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm12,%k2 + kmovb %k1,%r10d + kmovb %k2,%r9d + shlb $4,%r9b + orb %r9b,%r10b + + addb %r14b,%r14b + adcb %r13b,%r13b + adcb %r12b,%r12b + adcb %r11b,%r11b + adcb %r10b,%r10b + + + vpcmpuq $0,.Lmask52x4(%rip),%ymm3,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm4,%k2 + kmovb %k1,%r9d + kmovb %k2,%r8d + shlb $4,%r8b + orb %r8b,%r9b + + vpcmpuq $0,.Lmask52x4(%rip),%ymm5,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm6,%k2 + kmovb %k1,%r8d + kmovb %k2,%edx + shlb $4,%dl + orb %dl,%r8b + + vpcmpuq $0,.Lmask52x4(%rip),%ymm7,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm8,%k2 + kmovb %k1,%edx + kmovb %k2,%ecx + shlb $4,%cl + orb %cl,%dl + + vpcmpuq $0,.Lmask52x4(%rip),%ymm9,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm10,%k2 + kmovb %k1,%ecx + kmovb %k2,%ebx + shlb $4,%bl + orb %bl,%cl + + vpcmpuq $0,.Lmask52x4(%rip),%ymm11,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm12,%k2 + kmovb %k1,%ebx + kmovb %k2,%eax + shlb $4,%al + orb %al,%bl + + addb %r9b,%r14b + adcb %r8b,%r13b + adcb %dl,%r12b + adcb %cl,%r11b + adcb %bl,%r10b + + xorb %r9b,%r14b + xorb %r8b,%r13b + xorb %dl,%r12b + xorb %cl,%r11b + xorb %bl,%r10b + + kmovb %r14d,%k1 + shrb $4,%r14b + kmovb %r14d,%k2 + kmovb %r13d,%k3 + shrb $4,%r13b + kmovb %r13d,%k4 + kmovb %r12d,%k5 + shrb $4,%r12b + kmovb %r12d,%k6 + kmovb %r11d,%k7 + + vpsubq .Lmask52x4(%rip),%ymm3,%ymm3{%k1} + vpsubq .Lmask52x4(%rip),%ymm4,%ymm4{%k2} + vpsubq .Lmask52x4(%rip),%ymm5,%ymm5{%k3} + vpsubq .Lmask52x4(%rip),%ymm6,%ymm6{%k4} + vpsubq .Lmask52x4(%rip),%ymm7,%ymm7{%k5} + vpsubq .Lmask52x4(%rip),%ymm8,%ymm8{%k6} + vpsubq .Lmask52x4(%rip),%ymm9,%ymm9{%k7} + + vpandq .Lmask52x4(%rip),%ymm3,%ymm3 + vpandq .Lmask52x4(%rip),%ymm4,%ymm4 + vpandq .Lmask52x4(%rip),%ymm5,%ymm5 + vpandq .Lmask52x4(%rip),%ymm6,%ymm6 + vpandq .Lmask52x4(%rip),%ymm7,%ymm7 + vpandq .Lmask52x4(%rip),%ymm8,%ymm8 + vpandq .Lmask52x4(%rip),%ymm9,%ymm9 + + shrb $4,%r11b + kmovb %r11d,%k1 + kmovb %r10d,%k2 + shrb $4,%r10b + kmovb %r10d,%k3 + + vpsubq .Lmask52x4(%rip),%ymm10,%ymm10{%k1} + vpsubq .Lmask52x4(%rip),%ymm11,%ymm11{%k2} + vpsubq .Lmask52x4(%rip),%ymm12,%ymm12{%k3} + + vpandq .Lmask52x4(%rip),%ymm10,%ymm10 + vpandq .Lmask52x4(%rip),%ymm11,%ymm11 + vpandq .Lmask52x4(%rip),%ymm12,%ymm12 + + vpbroadcastq %r15,%ymm0 + vpblendd $3,%ymm0,%ymm13,%ymm13 + + + + vpsrlq $52,%ymm13,%ymm0 + vpsrlq $52,%ymm14,%ymm1 + vpsrlq $52,%ymm15,%ymm2 + vpsrlq $52,%ymm16,%ymm23 + vpsrlq $52,%ymm17,%ymm24 + vpsrlq $52,%ymm18,%ymm25 + vpsrlq $52,%ymm19,%ymm26 + vpsrlq $52,%ymm20,%ymm27 + vpsrlq $52,%ymm21,%ymm28 + vpsrlq $52,%ymm22,%ymm29 + + + valignq $3,%ymm28,%ymm29,%ymm29 + valignq $3,%ymm27,%ymm28,%ymm28 + valignq $3,%ymm26,%ymm27,%ymm27 + valignq $3,%ymm25,%ymm26,%ymm26 + valignq $3,%ymm24,%ymm25,%ymm25 + valignq $3,%ymm23,%ymm24,%ymm24 + valignq $3,%ymm2,%ymm23,%ymm23 + valignq $3,%ymm1,%ymm2,%ymm2 + valignq $3,%ymm0,%ymm1,%ymm1 + valignq $3,.Lzeros(%rip),%ymm0,%ymm0 + + + vpandq .Lmask52x4(%rip),%ymm13,%ymm13 + vpandq .Lmask52x4(%rip),%ymm14,%ymm14 + vpandq .Lmask52x4(%rip),%ymm15,%ymm15 + vpandq .Lmask52x4(%rip),%ymm16,%ymm16 + vpandq .Lmask52x4(%rip),%ymm17,%ymm17 + vpandq .Lmask52x4(%rip),%ymm18,%ymm18 + vpandq .Lmask52x4(%rip),%ymm19,%ymm19 + vpandq .Lmask52x4(%rip),%ymm20,%ymm20 + vpandq .Lmask52x4(%rip),%ymm21,%ymm21 + vpandq .Lmask52x4(%rip),%ymm22,%ymm22 + + + vpaddq %ymm0,%ymm13,%ymm13 + vpaddq %ymm1,%ymm14,%ymm14 + vpaddq %ymm2,%ymm15,%ymm15 + vpaddq %ymm23,%ymm16,%ymm16 + vpaddq %ymm24,%ymm17,%ymm17 + vpaddq %ymm25,%ymm18,%ymm18 + vpaddq %ymm26,%ymm19,%ymm19 + vpaddq %ymm27,%ymm20,%ymm20 + vpaddq %ymm28,%ymm21,%ymm21 + vpaddq %ymm29,%ymm22,%ymm22 + + + + vpcmpuq $6,.Lmask52x4(%rip),%ymm13,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm14,%k2 + kmovb %k1,%r14d + kmovb %k2,%r13d + shlb $4,%r13b + orb %r13b,%r14b + + vpcmpuq $6,.Lmask52x4(%rip),%ymm15,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm16,%k2 + kmovb %k1,%r13d + kmovb %k2,%r12d + shlb $4,%r12b + orb %r12b,%r13b + + vpcmpuq $6,.Lmask52x4(%rip),%ymm17,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm18,%k2 + kmovb %k1,%r12d + kmovb %k2,%r11d + shlb $4,%r11b + orb %r11b,%r12b + + vpcmpuq $6,.Lmask52x4(%rip),%ymm19,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm20,%k2 + kmovb %k1,%r11d + kmovb %k2,%r10d + shlb $4,%r10b + orb %r10b,%r11b + + vpcmpuq $6,.Lmask52x4(%rip),%ymm21,%k1 + vpcmpuq $6,.Lmask52x4(%rip),%ymm22,%k2 + kmovb %k1,%r10d + kmovb %k2,%r9d + shlb $4,%r9b + orb %r9b,%r10b + + addb %r14b,%r14b + adcb %r13b,%r13b + adcb %r12b,%r12b + adcb %r11b,%r11b + adcb %r10b,%r10b + + + vpcmpuq $0,.Lmask52x4(%rip),%ymm13,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm14,%k2 + kmovb %k1,%r9d + kmovb %k2,%r8d + shlb $4,%r8b + orb %r8b,%r9b + + vpcmpuq $0,.Lmask52x4(%rip),%ymm15,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm16,%k2 + kmovb %k1,%r8d + kmovb %k2,%edx + shlb $4,%dl + orb %dl,%r8b + + vpcmpuq $0,.Lmask52x4(%rip),%ymm17,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm18,%k2 + kmovb %k1,%edx + kmovb %k2,%ecx + shlb $4,%cl + orb %cl,%dl + + vpcmpuq $0,.Lmask52x4(%rip),%ymm19,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm20,%k2 + kmovb %k1,%ecx + kmovb %k2,%ebx + shlb $4,%bl + orb %bl,%cl + + vpcmpuq $0,.Lmask52x4(%rip),%ymm21,%k1 + vpcmpuq $0,.Lmask52x4(%rip),%ymm22,%k2 + kmovb %k1,%ebx + kmovb %k2,%eax + shlb $4,%al + orb %al,%bl + + addb %r9b,%r14b + adcb %r8b,%r13b + adcb %dl,%r12b + adcb %cl,%r11b + adcb %bl,%r10b + + xorb %r9b,%r14b + xorb %r8b,%r13b + xorb %dl,%r12b + xorb %cl,%r11b + xorb %bl,%r10b + + kmovb %r14d,%k1 + shrb $4,%r14b + kmovb %r14d,%k2 + kmovb %r13d,%k3 + shrb $4,%r13b + kmovb %r13d,%k4 + kmovb %r12d,%k5 + shrb $4,%r12b + kmovb %r12d,%k6 + kmovb %r11d,%k7 + + vpsubq .Lmask52x4(%rip),%ymm13,%ymm13{%k1} + vpsubq .Lmask52x4(%rip),%ymm14,%ymm14{%k2} + vpsubq .Lmask52x4(%rip),%ymm15,%ymm15{%k3} + vpsubq .Lmask52x4(%rip),%ymm16,%ymm16{%k4} + vpsubq .Lmask52x4(%rip),%ymm17,%ymm17{%k5} + vpsubq .Lmask52x4(%rip),%ymm18,%ymm18{%k6} + vpsubq .Lmask52x4(%rip),%ymm19,%ymm19{%k7} + + vpandq .Lmask52x4(%rip),%ymm13,%ymm13 + vpandq .Lmask52x4(%rip),%ymm14,%ymm14 + vpandq .Lmask52x4(%rip),%ymm15,%ymm15 + vpandq .Lmask52x4(%rip),%ymm16,%ymm16 + vpandq .Lmask52x4(%rip),%ymm17,%ymm17 + vpandq .Lmask52x4(%rip),%ymm18,%ymm18 + vpandq .Lmask52x4(%rip),%ymm19,%ymm19 + + shrb $4,%r11b + kmovb %r11d,%k1 + kmovb %r10d,%k2 + shrb $4,%r10b + kmovb %r10d,%k3 + + vpsubq .Lmask52x4(%rip),%ymm20,%ymm20{%k1} + vpsubq .Lmask52x4(%rip),%ymm21,%ymm21{%k2} + vpsubq .Lmask52x4(%rip),%ymm22,%ymm22{%k3} + + vpandq .Lmask52x4(%rip),%ymm20,%ymm20 + vpandq .Lmask52x4(%rip),%ymm21,%ymm21 + vpandq .Lmask52x4(%rip),%ymm22,%ymm22 + + vmovdqu64 %ymm3,0(%rdi) + vmovdqu64 %ymm4,32(%rdi) + vmovdqu64 %ymm5,64(%rdi) + vmovdqu64 %ymm6,96(%rdi) + vmovdqu64 %ymm7,128(%rdi) + vmovdqu64 %ymm8,160(%rdi) + vmovdqu64 %ymm9,192(%rdi) + vmovdqu64 %ymm10,224(%rdi) + vmovdqu64 %ymm11,256(%rdi) + vmovdqu64 %ymm12,288(%rdi) + + vmovdqu64 %ymm13,320(%rdi) + vmovdqu64 %ymm14,352(%rdi) + vmovdqu64 %ymm15,384(%rdi) + vmovdqu64 %ymm16,416(%rdi) + vmovdqu64 %ymm17,448(%rdi) + vmovdqu64 %ymm18,480(%rdi) + vmovdqu64 %ymm19,512(%rdi) + vmovdqu64 %ymm20,544(%rdi) + vmovdqu64 %ymm21,576(%rdi) + vmovdqu64 %ymm22,608(%rdi) + + vzeroupper + leaq (%rsp),%rax +.cfi_def_cfa_register %rax + movq 0(%rax),%r15 +.cfi_restore %r15 + movq 8(%rax),%r14 +.cfi_restore %r14 + movq 16(%rax),%r13 +.cfi_restore %r13 + movq 24(%rax),%r12 +.cfi_restore %r12 + movq 32(%rax),%rbp +.cfi_restore %rbp + movq 40(%rax),%rbx +.cfi_restore %rbx + leaq 48(%rax),%rsp +.cfi_def_cfa %rsp,8 +.Lossl_rsaz_amm52x40_x2_ifma256_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_rsaz_amm52x40_x2_ifma256, .-ossl_rsaz_amm52x40_x2_ifma256 +.text + +.align 32 +.globl ossl_extract_multiplier_2x40_win5 +.type ossl_extract_multiplier_2x40_win5,@function +ossl_extract_multiplier_2x40_win5: +.cfi_startproc +.byte 243,15,30,250 + vmovdqa64 .Lones(%rip),%ymm24 + vpbroadcastq %rdx,%ymm22 + vpbroadcastq %rcx,%ymm23 + leaq 20480(%rsi),%rax + + + movq %rsi,%r10 + + + vpxor %xmm0,%xmm0,%xmm0 + vmovdqa64 %ymm0,%ymm1 + vmovdqa64 %ymm0,%ymm2 + vmovdqa64 %ymm0,%ymm3 + vmovdqa64 %ymm0,%ymm4 + vmovdqa64 %ymm0,%ymm5 + vmovdqa64 %ymm0,%ymm16 + vmovdqa64 %ymm0,%ymm17 + vmovdqa64 %ymm0,%ymm18 + vmovdqa64 %ymm0,%ymm19 + vpxorq %ymm21,%ymm21,%ymm21 +.align 32 +.Lloop_0: + vpcmpq $0,%ymm21,%ymm22,%k1 + vmovdqu64 0(%rsi),%ymm20 + vpblendmq %ymm20,%ymm0,%ymm0{%k1} + vmovdqu64 32(%rsi),%ymm20 + vpblendmq %ymm20,%ymm1,%ymm1{%k1} + vmovdqu64 64(%rsi),%ymm20 + vpblendmq %ymm20,%ymm2,%ymm2{%k1} + vmovdqu64 96(%rsi),%ymm20 + vpblendmq %ymm20,%ymm3,%ymm3{%k1} + vmovdqu64 128(%rsi),%ymm20 + vpblendmq %ymm20,%ymm4,%ymm4{%k1} + vmovdqu64 160(%rsi),%ymm20 + vpblendmq %ymm20,%ymm5,%ymm5{%k1} + vmovdqu64 192(%rsi),%ymm20 + vpblendmq %ymm20,%ymm16,%ymm16{%k1} + vmovdqu64 224(%rsi),%ymm20 + vpblendmq %ymm20,%ymm17,%ymm17{%k1} + vmovdqu64 256(%rsi),%ymm20 + vpblendmq %ymm20,%ymm18,%ymm18{%k1} + vmovdqu64 288(%rsi),%ymm20 + vpblendmq %ymm20,%ymm19,%ymm19{%k1} + vpaddq %ymm24,%ymm21,%ymm21 + addq $640,%rsi + cmpq %rsi,%rax + jne .Lloop_0 + vmovdqu64 %ymm0,0(%rdi) + vmovdqu64 %ymm1,32(%rdi) + vmovdqu64 %ymm2,64(%rdi) + vmovdqu64 %ymm3,96(%rdi) + vmovdqu64 %ymm4,128(%rdi) + vmovdqu64 %ymm5,160(%rdi) + vmovdqu64 %ymm16,192(%rdi) + vmovdqu64 %ymm17,224(%rdi) + vmovdqu64 %ymm18,256(%rdi) + vmovdqu64 %ymm19,288(%rdi) + movq %r10,%rsi + vpxorq %ymm21,%ymm21,%ymm21 +.align 32 +.Lloop_320: + vpcmpq $0,%ymm21,%ymm23,%k1 + vmovdqu64 320(%rsi),%ymm20 + vpblendmq %ymm20,%ymm0,%ymm0{%k1} + vmovdqu64 352(%rsi),%ymm20 + vpblendmq %ymm20,%ymm1,%ymm1{%k1} + vmovdqu64 384(%rsi),%ymm20 + vpblendmq %ymm20,%ymm2,%ymm2{%k1} + vmovdqu64 416(%rsi),%ymm20 + vpblendmq %ymm20,%ymm3,%ymm3{%k1} + vmovdqu64 448(%rsi),%ymm20 + vpblendmq %ymm20,%ymm4,%ymm4{%k1} + vmovdqu64 480(%rsi),%ymm20 + vpblendmq %ymm20,%ymm5,%ymm5{%k1} + vmovdqu64 512(%rsi),%ymm20 + vpblendmq %ymm20,%ymm16,%ymm16{%k1} + vmovdqu64 544(%rsi),%ymm20 + vpblendmq %ymm20,%ymm17,%ymm17{%k1} + vmovdqu64 576(%rsi),%ymm20 + vpblendmq %ymm20,%ymm18,%ymm18{%k1} + vmovdqu64 608(%rsi),%ymm20 + vpblendmq %ymm20,%ymm19,%ymm19{%k1} + vpaddq %ymm24,%ymm21,%ymm21 + addq $640,%rsi + cmpq %rsi,%rax + jne .Lloop_320 + vmovdqu64 %ymm0,320(%rdi) + vmovdqu64 %ymm1,352(%rdi) + vmovdqu64 %ymm2,384(%rdi) + vmovdqu64 %ymm3,416(%rdi) + vmovdqu64 %ymm4,448(%rdi) + vmovdqu64 %ymm5,480(%rdi) + vmovdqu64 %ymm16,512(%rdi) + vmovdqu64 %ymm17,544(%rdi) + vmovdqu64 %ymm18,576(%rdi) + vmovdqu64 %ymm19,608(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_extract_multiplier_2x40_win5, .-ossl_extract_multiplier_2x40_win5 +.section .rodata +.align 32 +.Lones: +.quad 1,1,1,1 +.Lzeros: +.quad 0,0,0,0 diff --git a/contrib/openssl-cmake/asm/crypto/bn/rsaz-4k-avxifma.s b/contrib/openssl-cmake/asm/crypto/bn/rsaz-4k-avxifma.s new file mode 100644 index 000000000000..32c6dfd33fb7 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/bn/rsaz-4k-avxifma.s @@ -0,0 +1,1901 @@ +.text + +.globl ossl_rsaz_amm52x40_x1_avxifma256 +.type ossl_rsaz_amm52x40_x1_avxifma256,@function +.align 32 +ossl_rsaz_amm52x40_x1_avxifma256: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + vpxor %ymm0,%ymm0,%ymm0 + vmovapd %ymm0,%ymm3 + vmovapd %ymm0,%ymm4 + vmovapd %ymm0,%ymm5 + vmovapd %ymm0,%ymm6 + vmovapd %ymm0,%ymm7 + vmovapd %ymm0,%ymm8 + vmovapd %ymm0,%ymm9 + vmovapd %ymm0,%ymm10 + vmovapd %ymm0,%ymm11 + vmovapd %ymm0,%ymm12 + + xorl %r9d,%r9d + + movq %rdx,%r11 + movq $0xfffffffffffff,%rax + + + movl $10,%ebx + +.align 32 +.Lloop10: + movq 0(%r11),%r13 + + vpbroadcastq 0(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -328(%rsp),%rsp + +{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4 +{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5 +{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6 +{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7 +{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8 +{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10 +{vex} vpmadd52luq 256(%rsi),%ymm1,%ymm11 +{vex} vpmadd52luq 288(%rsi),%ymm1,%ymm12 + +{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4 +{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5 +{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6 +{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7 +{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8 +{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10 +{vex} vpmadd52luq 256(%rcx),%ymm2,%ymm11 +{vex} vpmadd52luq 288(%rcx),%ymm2,%ymm12 + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + vmovdqu %ymm11,256(%rsp) + vmovdqu %ymm12,288(%rsp) + movq $0,320(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + vmovdqu 264(%rsp),%ymm11 + vmovdqu 296(%rsp),%ymm12 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4 +{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5 +{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6 +{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7 +{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8 +{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10 +{vex} vpmadd52huq 256(%rsi),%ymm1,%ymm11 +{vex} vpmadd52huq 288(%rsi),%ymm1,%ymm12 + +{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4 +{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5 +{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6 +{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7 +{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8 +{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10 +{vex} vpmadd52huq 256(%rcx),%ymm2,%ymm11 +{vex} vpmadd52huq 288(%rcx),%ymm2,%ymm12 + leaq 328(%rsp),%rsp + movq 8(%r11),%r13 + + vpbroadcastq 8(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -328(%rsp),%rsp + +{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4 +{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5 +{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6 +{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7 +{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8 +{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10 +{vex} vpmadd52luq 256(%rsi),%ymm1,%ymm11 +{vex} vpmadd52luq 288(%rsi),%ymm1,%ymm12 + +{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4 +{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5 +{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6 +{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7 +{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8 +{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10 +{vex} vpmadd52luq 256(%rcx),%ymm2,%ymm11 +{vex} vpmadd52luq 288(%rcx),%ymm2,%ymm12 + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + vmovdqu %ymm11,256(%rsp) + vmovdqu %ymm12,288(%rsp) + movq $0,320(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + vmovdqu 264(%rsp),%ymm11 + vmovdqu 296(%rsp),%ymm12 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4 +{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5 +{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6 +{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7 +{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8 +{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10 +{vex} vpmadd52huq 256(%rsi),%ymm1,%ymm11 +{vex} vpmadd52huq 288(%rsi),%ymm1,%ymm12 + +{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4 +{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5 +{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6 +{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7 +{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8 +{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10 +{vex} vpmadd52huq 256(%rcx),%ymm2,%ymm11 +{vex} vpmadd52huq 288(%rcx),%ymm2,%ymm12 + leaq 328(%rsp),%rsp + movq 16(%r11),%r13 + + vpbroadcastq 16(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -328(%rsp),%rsp + +{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4 +{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5 +{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6 +{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7 +{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8 +{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10 +{vex} vpmadd52luq 256(%rsi),%ymm1,%ymm11 +{vex} vpmadd52luq 288(%rsi),%ymm1,%ymm12 + +{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4 +{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5 +{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6 +{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7 +{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8 +{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10 +{vex} vpmadd52luq 256(%rcx),%ymm2,%ymm11 +{vex} vpmadd52luq 288(%rcx),%ymm2,%ymm12 + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + vmovdqu %ymm11,256(%rsp) + vmovdqu %ymm12,288(%rsp) + movq $0,320(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + vmovdqu 264(%rsp),%ymm11 + vmovdqu 296(%rsp),%ymm12 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4 +{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5 +{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6 +{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7 +{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8 +{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10 +{vex} vpmadd52huq 256(%rsi),%ymm1,%ymm11 +{vex} vpmadd52huq 288(%rsi),%ymm1,%ymm12 + +{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4 +{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5 +{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6 +{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7 +{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8 +{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10 +{vex} vpmadd52huq 256(%rcx),%ymm2,%ymm11 +{vex} vpmadd52huq 288(%rcx),%ymm2,%ymm12 + leaq 328(%rsp),%rsp + movq 24(%r11),%r13 + + vpbroadcastq 24(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq %r8,%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -328(%rsp),%rsp + +{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4 +{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5 +{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6 +{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7 +{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8 +{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10 +{vex} vpmadd52luq 256(%rsi),%ymm1,%ymm11 +{vex} vpmadd52luq 288(%rsi),%ymm1,%ymm12 + +{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4 +{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5 +{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6 +{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7 +{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8 +{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10 +{vex} vpmadd52luq 256(%rcx),%ymm2,%ymm11 +{vex} vpmadd52luq 288(%rcx),%ymm2,%ymm12 + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + vmovdqu %ymm11,256(%rsp) + vmovdqu %ymm12,288(%rsp) + movq $0,320(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + vmovdqu 264(%rsp),%ymm11 + vmovdqu 296(%rsp),%ymm12 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4 +{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5 +{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6 +{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7 +{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8 +{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10 +{vex} vpmadd52huq 256(%rsi),%ymm1,%ymm11 +{vex} vpmadd52huq 288(%rsi),%ymm1,%ymm12 + +{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4 +{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5 +{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6 +{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7 +{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8 +{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10 +{vex} vpmadd52huq 256(%rcx),%ymm2,%ymm11 +{vex} vpmadd52huq 288(%rcx),%ymm2,%ymm12 + leaq 328(%rsp),%rsp + leaq 32(%r11),%r11 + decl %ebx + jne .Lloop10 + + vmovq %r9,%xmm0 + vpbroadcastq %xmm0,%ymm0 + vpblendd $3,%ymm0,%ymm3,%ymm3 + + leaq -640(%rsp),%rsp + vmovupd %ymm3,0(%rsp) + vmovupd %ymm4,32(%rsp) + vmovupd %ymm5,64(%rsp) + vmovupd %ymm6,96(%rsp) + vmovupd %ymm7,128(%rsp) + vmovupd %ymm8,160(%rsp) + vmovupd %ymm9,192(%rsp) + vmovupd %ymm10,224(%rsp) + vmovupd %ymm11,256(%rsp) + vmovupd %ymm12,288(%rsp) + + + + vpsrlq $52,%ymm3,%ymm3 + vpsrlq $52,%ymm4,%ymm4 + vpsrlq $52,%ymm5,%ymm5 + vpsrlq $52,%ymm6,%ymm6 + vpsrlq $52,%ymm7,%ymm7 + vpsrlq $52,%ymm8,%ymm8 + vpsrlq $52,%ymm9,%ymm9 + vpsrlq $52,%ymm10,%ymm10 + vpsrlq $52,%ymm11,%ymm11 + vpsrlq $52,%ymm12,%ymm12 + + + vpermq $144,%ymm12,%ymm12 + vpermq $3,%ymm11,%ymm13 + vblendpd $1,%ymm13,%ymm12,%ymm12 + + vpermq $144,%ymm11,%ymm11 + vpermq $3,%ymm10,%ymm13 + vblendpd $1,%ymm13,%ymm11,%ymm11 + + vpermq $144,%ymm10,%ymm10 + vpermq $3,%ymm9,%ymm13 + vblendpd $1,%ymm13,%ymm10,%ymm10 + + vpermq $144,%ymm9,%ymm9 + vpermq $3,%ymm8,%ymm13 + vblendpd $1,%ymm13,%ymm9,%ymm9 + + vpermq $144,%ymm8,%ymm8 + vpermq $3,%ymm7,%ymm13 + vblendpd $1,%ymm13,%ymm8,%ymm8 + + vpermq $144,%ymm7,%ymm7 + vpermq $3,%ymm6,%ymm13 + vblendpd $1,%ymm13,%ymm7,%ymm7 + + vpermq $144,%ymm6,%ymm6 + vpermq $3,%ymm5,%ymm13 + vblendpd $1,%ymm13,%ymm6,%ymm6 + + vpermq $144,%ymm5,%ymm5 + vpermq $3,%ymm4,%ymm13 + vblendpd $1,%ymm13,%ymm5,%ymm5 + + vpermq $144,%ymm4,%ymm4 + vpermq $3,%ymm3,%ymm13 + vblendpd $1,%ymm13,%ymm4,%ymm4 + + vpermq $144,%ymm3,%ymm3 + vpand .Lhigh64x3(%rip),%ymm3,%ymm3 + + vmovupd %ymm3,320(%rsp) + vmovupd %ymm4,352(%rsp) + vmovupd %ymm5,384(%rsp) + vmovupd %ymm6,416(%rsp) + vmovupd %ymm7,448(%rsp) + vmovupd %ymm8,480(%rsp) + vmovupd %ymm9,512(%rsp) + vmovupd %ymm10,544(%rsp) + vmovupd %ymm11,576(%rsp) + vmovupd %ymm12,608(%rsp) + + vmovupd 0(%rsp),%ymm3 + vmovupd 32(%rsp),%ymm4 + vmovupd 64(%rsp),%ymm5 + vmovupd 96(%rsp),%ymm6 + vmovupd 128(%rsp),%ymm7 + vmovupd 160(%rsp),%ymm8 + vmovupd 192(%rsp),%ymm9 + vmovupd 224(%rsp),%ymm10 + vmovupd 256(%rsp),%ymm11 + vmovupd 288(%rsp),%ymm12 + + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + vpand .Lmask52x4(%rip),%ymm11,%ymm11 + vpand .Lmask52x4(%rip),%ymm12,%ymm12 + + + vpaddq 320(%rsp),%ymm3,%ymm3 + vpaddq 352(%rsp),%ymm4,%ymm4 + vpaddq 384(%rsp),%ymm5,%ymm5 + vpaddq 416(%rsp),%ymm6,%ymm6 + vpaddq 448(%rsp),%ymm7,%ymm7 + vpaddq 480(%rsp),%ymm8,%ymm8 + vpaddq 512(%rsp),%ymm9,%ymm9 + vpaddq 544(%rsp),%ymm10,%ymm10 + vpaddq 576(%rsp),%ymm11,%ymm11 + vpaddq 608(%rsp),%ymm12,%ymm12 + + leaq 640(%rsp),%rsp + + + + vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm13 + vmovmskpd %ymm13,%r14d + vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm13 + vmovmskpd %ymm13,%r13d + shlb $4,%r13b + orb %r13b,%r14b + + vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm13 + vmovmskpd %ymm13,%r13d + vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm13 + vmovmskpd %ymm13,%r12d + shlb $4,%r12b + orb %r12b,%r13b + + vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm13 + vmovmskpd %ymm13,%r12d + vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13 + vmovmskpd %ymm13,%r11d + shlb $4,%r11b + orb %r11b,%r12b + + vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm13 + vmovmskpd %ymm13,%r11d + vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm13 + vmovmskpd %ymm13,%r10d + shlb $4,%r10b + orb %r10b,%r11b + + vpcmpgtq .Lmask52x4(%rip),%ymm11,%ymm13 + vmovmskpd %ymm13,%r10d + vpcmpgtq .Lmask52x4(%rip),%ymm12,%ymm13 + vmovmskpd %ymm13,%r9d + shlb $4,%r9b + orb %r9b,%r10b + + addb %r14b,%r14b + adcb %r13b,%r13b + adcb %r12b,%r12b + adcb %r11b,%r11b + adcb %r10b,%r10b + + + vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm13 + vmovmskpd %ymm13,%r9d + vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm13 + vmovmskpd %ymm13,%r8d + shlb $4,%r8b + orb %r8b,%r9b + + vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm13 + vmovmskpd %ymm13,%r8d + vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm13 + vmovmskpd %ymm13,%edx + shlb $4,%dl + orb %dl,%r8b + + vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm13 + vmovmskpd %ymm13,%edx + vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13 + vmovmskpd %ymm13,%ecx + shlb $4,%cl + orb %cl,%dl + + vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm13 + vmovmskpd %ymm13,%ecx + vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm13 + vmovmskpd %ymm13,%ebx + shlb $4,%bl + orb %bl,%cl + + vpcmpeqq .Lmask52x4(%rip),%ymm11,%ymm13 + vmovmskpd %ymm13,%ebx + vpcmpeqq .Lmask52x4(%rip),%ymm12,%ymm13 + vmovmskpd %ymm13,%eax + shlb $4,%al + orb %al,%bl + + addb %r9b,%r14b + adcb %r8b,%r13b + adcb %dl,%r12b + adcb %cl,%r11b + adcb %bl,%r10b + + xorb %r9b,%r14b + xorb %r8b,%r13b + xorb %dl,%r12b + xorb %cl,%r11b + xorb %bl,%r10b + + pushq %r9 + pushq %r8 + + leaq .Lkmasklut(%rip),%r8 + + movb %r14b,%r9b + andq $0xf,%r14 + vpsubq .Lmask52x4(%rip),%ymm3,%ymm13 + shlq $5,%r14 + vmovapd (%r8,%r14,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm3,%ymm3 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm4,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm4,%ymm4 + + movb %r13b,%r9b + andq $0xf,%r13 + vpsubq .Lmask52x4(%rip),%ymm5,%ymm13 + shlq $5,%r13 + vmovapd (%r8,%r13,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm5,%ymm5 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm6,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm6,%ymm6 + + movb %r12b,%r9b + andq $0xf,%r12 + vpsubq .Lmask52x4(%rip),%ymm7,%ymm13 + shlq $5,%r12 + vmovapd (%r8,%r12,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm7,%ymm7 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm8,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm8,%ymm8 + + movb %r11b,%r9b + andq $0xf,%r11 + vpsubq .Lmask52x4(%rip),%ymm9,%ymm13 + shlq $5,%r11 + vmovapd (%r8,%r11,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm9,%ymm9 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm10,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm10,%ymm10 + + movb %r10b,%r9b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm11,%ymm13 + shlq $5,%r10 + vmovapd (%r8,%r10,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm11,%ymm11 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm12,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm12,%ymm12 + + popq %r8 + popq %r9 + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + vpand .Lmask52x4(%rip),%ymm11,%ymm11 + vpand .Lmask52x4(%rip),%ymm12,%ymm12 + + vmovdqu %ymm3,0(%rdi) + vmovdqu %ymm4,32(%rdi) + vmovdqu %ymm5,64(%rdi) + vmovdqu %ymm6,96(%rdi) + vmovdqu %ymm7,128(%rdi) + vmovdqu %ymm8,160(%rdi) + vmovdqu %ymm9,192(%rdi) + vmovdqu %ymm10,224(%rdi) + vmovdqu %ymm11,256(%rdi) + vmovdqu %ymm12,288(%rdi) + + vzeroupper + leaq (%rsp),%rax +.cfi_def_cfa_register %rax + movq 0(%rax),%r15 +.cfi_restore %r15 + movq 8(%rax),%r14 +.cfi_restore %r14 + movq 16(%rax),%r13 +.cfi_restore %r13 + movq 24(%rax),%r12 +.cfi_restore %r12 + movq 32(%rax),%rbp +.cfi_restore %rbp + movq 40(%rax),%rbx +.cfi_restore %rbx + leaq 48(%rax),%rsp +.cfi_def_cfa %rsp,8 +.Lossl_rsaz_amm52x40_x1_avxifma256_epilogue: + + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_rsaz_amm52x40_x1_avxifma256, .-ossl_rsaz_amm52x40_x1_avxifma256 +.section .rodata +.align 32 +.Lmask52x4: +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.quad 0xfffffffffffff +.Lhigh64x3: +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.Lkmasklut: + +.quad 0x0 +.quad 0x0 +.quad 0x0 +.quad 0x0 + +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0x0 +.quad 0x0 + +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0x0 + +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0x0 + +.quad 0x0 +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0x0 + +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0x0 + +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0x0 + +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0x0 + +.quad 0x0 +.quad 0x0 +.quad 0x0 +.quad 0xffffffffffffffff + +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0x0 +.quad 0xffffffffffffffff + +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0xffffffffffffffff + +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0xffffffffffffffff + +.quad 0x0 +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff + +.quad 0xffffffffffffffff +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff + +.quad 0x0 +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff + +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.quad 0xffffffffffffffff +.text + +.globl ossl_rsaz_amm52x40_x2_avxifma256 +.type ossl_rsaz_amm52x40_x2_avxifma256,@function +.align 32 +ossl_rsaz_amm52x40_x2_avxifma256: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + vpxor %ymm0,%ymm0,%ymm0 + vmovapd %ymm0,%ymm3 + vmovapd %ymm0,%ymm4 + vmovapd %ymm0,%ymm5 + vmovapd %ymm0,%ymm6 + vmovapd %ymm0,%ymm7 + vmovapd %ymm0,%ymm8 + vmovapd %ymm0,%ymm9 + vmovapd %ymm0,%ymm10 + vmovapd %ymm0,%ymm11 + vmovapd %ymm0,%ymm12 + + xorl %r9d,%r9d + + movq %rdx,%r11 + movq $0xfffffffffffff,%rax + + movl $40,%ebx + +.align 32 +.Lloop40: + movq 0(%r11),%r13 + + vpbroadcastq 0(%r11),%ymm1 + movq 0(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq (%r8),%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 0(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -328(%rsp),%rsp + +{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4 +{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5 +{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6 +{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7 +{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8 +{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10 +{vex} vpmadd52luq 256(%rsi),%ymm1,%ymm11 +{vex} vpmadd52luq 288(%rsi),%ymm1,%ymm12 + +{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4 +{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5 +{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6 +{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7 +{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8 +{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10 +{vex} vpmadd52luq 256(%rcx),%ymm2,%ymm11 +{vex} vpmadd52luq 288(%rcx),%ymm2,%ymm12 + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + vmovdqu %ymm11,256(%rsp) + vmovdqu %ymm12,288(%rsp) + movq $0,320(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + vmovdqu 264(%rsp),%ymm11 + vmovdqu 296(%rsp),%ymm12 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3 +{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4 +{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5 +{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6 +{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7 +{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8 +{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9 +{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10 +{vex} vpmadd52huq 256(%rsi),%ymm1,%ymm11 +{vex} vpmadd52huq 288(%rsi),%ymm1,%ymm12 + +{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3 +{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4 +{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5 +{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6 +{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7 +{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8 +{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9 +{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10 +{vex} vpmadd52huq 256(%rcx),%ymm2,%ymm11 +{vex} vpmadd52huq 288(%rcx),%ymm2,%ymm12 + leaq 328(%rsp),%rsp + leaq 8(%r11),%r11 + decl %ebx + jne .Lloop40 + + pushq %r11 + pushq %rsi + pushq %rcx + pushq %r8 + + vmovq %r9,%xmm0 + vpbroadcastq %xmm0,%ymm0 + vpblendd $3,%ymm0,%ymm3,%ymm3 + + leaq -640(%rsp),%rsp + vmovupd %ymm3,0(%rsp) + vmovupd %ymm4,32(%rsp) + vmovupd %ymm5,64(%rsp) + vmovupd %ymm6,96(%rsp) + vmovupd %ymm7,128(%rsp) + vmovupd %ymm8,160(%rsp) + vmovupd %ymm9,192(%rsp) + vmovupd %ymm10,224(%rsp) + vmovupd %ymm11,256(%rsp) + vmovupd %ymm12,288(%rsp) + + + + vpsrlq $52,%ymm3,%ymm3 + vpsrlq $52,%ymm4,%ymm4 + vpsrlq $52,%ymm5,%ymm5 + vpsrlq $52,%ymm6,%ymm6 + vpsrlq $52,%ymm7,%ymm7 + vpsrlq $52,%ymm8,%ymm8 + vpsrlq $52,%ymm9,%ymm9 + vpsrlq $52,%ymm10,%ymm10 + vpsrlq $52,%ymm11,%ymm11 + vpsrlq $52,%ymm12,%ymm12 + + + vpermq $144,%ymm12,%ymm12 + vpermq $3,%ymm11,%ymm13 + vblendpd $1,%ymm13,%ymm12,%ymm12 + + vpermq $144,%ymm11,%ymm11 + vpermq $3,%ymm10,%ymm13 + vblendpd $1,%ymm13,%ymm11,%ymm11 + + vpermq $144,%ymm10,%ymm10 + vpermq $3,%ymm9,%ymm13 + vblendpd $1,%ymm13,%ymm10,%ymm10 + + vpermq $144,%ymm9,%ymm9 + vpermq $3,%ymm8,%ymm13 + vblendpd $1,%ymm13,%ymm9,%ymm9 + + vpermq $144,%ymm8,%ymm8 + vpermq $3,%ymm7,%ymm13 + vblendpd $1,%ymm13,%ymm8,%ymm8 + + vpermq $144,%ymm7,%ymm7 + vpermq $3,%ymm6,%ymm13 + vblendpd $1,%ymm13,%ymm7,%ymm7 + + vpermq $144,%ymm6,%ymm6 + vpermq $3,%ymm5,%ymm13 + vblendpd $1,%ymm13,%ymm6,%ymm6 + + vpermq $144,%ymm5,%ymm5 + vpermq $3,%ymm4,%ymm13 + vblendpd $1,%ymm13,%ymm5,%ymm5 + + vpermq $144,%ymm4,%ymm4 + vpermq $3,%ymm3,%ymm13 + vblendpd $1,%ymm13,%ymm4,%ymm4 + + vpermq $144,%ymm3,%ymm3 + vpand .Lhigh64x3(%rip),%ymm3,%ymm3 + + vmovupd %ymm3,320(%rsp) + vmovupd %ymm4,352(%rsp) + vmovupd %ymm5,384(%rsp) + vmovupd %ymm6,416(%rsp) + vmovupd %ymm7,448(%rsp) + vmovupd %ymm8,480(%rsp) + vmovupd %ymm9,512(%rsp) + vmovupd %ymm10,544(%rsp) + vmovupd %ymm11,576(%rsp) + vmovupd %ymm12,608(%rsp) + + vmovupd 0(%rsp),%ymm3 + vmovupd 32(%rsp),%ymm4 + vmovupd 64(%rsp),%ymm5 + vmovupd 96(%rsp),%ymm6 + vmovupd 128(%rsp),%ymm7 + vmovupd 160(%rsp),%ymm8 + vmovupd 192(%rsp),%ymm9 + vmovupd 224(%rsp),%ymm10 + vmovupd 256(%rsp),%ymm11 + vmovupd 288(%rsp),%ymm12 + + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + vpand .Lmask52x4(%rip),%ymm11,%ymm11 + vpand .Lmask52x4(%rip),%ymm12,%ymm12 + + + vpaddq 320(%rsp),%ymm3,%ymm3 + vpaddq 352(%rsp),%ymm4,%ymm4 + vpaddq 384(%rsp),%ymm5,%ymm5 + vpaddq 416(%rsp),%ymm6,%ymm6 + vpaddq 448(%rsp),%ymm7,%ymm7 + vpaddq 480(%rsp),%ymm8,%ymm8 + vpaddq 512(%rsp),%ymm9,%ymm9 + vpaddq 544(%rsp),%ymm10,%ymm10 + vpaddq 576(%rsp),%ymm11,%ymm11 + vpaddq 608(%rsp),%ymm12,%ymm12 + + leaq 640(%rsp),%rsp + + + + vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm13 + vmovmskpd %ymm13,%r14d + vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm13 + vmovmskpd %ymm13,%r13d + shlb $4,%r13b + orb %r13b,%r14b + + vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm13 + vmovmskpd %ymm13,%r13d + vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm13 + vmovmskpd %ymm13,%r12d + shlb $4,%r12b + orb %r12b,%r13b + + vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm13 + vmovmskpd %ymm13,%r12d + vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13 + vmovmskpd %ymm13,%r11d + shlb $4,%r11b + orb %r11b,%r12b + + vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm13 + vmovmskpd %ymm13,%r11d + vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm13 + vmovmskpd %ymm13,%r10d + shlb $4,%r10b + orb %r10b,%r11b + + vpcmpgtq .Lmask52x4(%rip),%ymm11,%ymm13 + vmovmskpd %ymm13,%r10d + vpcmpgtq .Lmask52x4(%rip),%ymm12,%ymm13 + vmovmskpd %ymm13,%r9d + shlb $4,%r9b + orb %r9b,%r10b + + addb %r14b,%r14b + adcb %r13b,%r13b + adcb %r12b,%r12b + adcb %r11b,%r11b + adcb %r10b,%r10b + + + vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm13 + vmovmskpd %ymm13,%r9d + vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm13 + vmovmskpd %ymm13,%r8d + shlb $4,%r8b + orb %r8b,%r9b + + vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm13 + vmovmskpd %ymm13,%r8d + vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm13 + vmovmskpd %ymm13,%edx + shlb $4,%dl + orb %dl,%r8b + + vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm13 + vmovmskpd %ymm13,%edx + vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13 + vmovmskpd %ymm13,%ecx + shlb $4,%cl + orb %cl,%dl + + vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm13 + vmovmskpd %ymm13,%ecx + vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm13 + vmovmskpd %ymm13,%ebx + shlb $4,%bl + orb %bl,%cl + + vpcmpeqq .Lmask52x4(%rip),%ymm11,%ymm13 + vmovmskpd %ymm13,%ebx + vpcmpeqq .Lmask52x4(%rip),%ymm12,%ymm13 + vmovmskpd %ymm13,%eax + shlb $4,%al + orb %al,%bl + + addb %r9b,%r14b + adcb %r8b,%r13b + adcb %dl,%r12b + adcb %cl,%r11b + adcb %bl,%r10b + + xorb %r9b,%r14b + xorb %r8b,%r13b + xorb %dl,%r12b + xorb %cl,%r11b + xorb %bl,%r10b + + pushq %r9 + pushq %r8 + + leaq .Lkmasklut(%rip),%r8 + + movb %r14b,%r9b + andq $0xf,%r14 + vpsubq .Lmask52x4(%rip),%ymm3,%ymm13 + shlq $5,%r14 + vmovapd (%r8,%r14,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm3,%ymm3 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm4,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm4,%ymm4 + + movb %r13b,%r9b + andq $0xf,%r13 + vpsubq .Lmask52x4(%rip),%ymm5,%ymm13 + shlq $5,%r13 + vmovapd (%r8,%r13,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm5,%ymm5 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm6,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm6,%ymm6 + + movb %r12b,%r9b + andq $0xf,%r12 + vpsubq .Lmask52x4(%rip),%ymm7,%ymm13 + shlq $5,%r12 + vmovapd (%r8,%r12,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm7,%ymm7 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm8,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm8,%ymm8 + + movb %r11b,%r9b + andq $0xf,%r11 + vpsubq .Lmask52x4(%rip),%ymm9,%ymm13 + shlq $5,%r11 + vmovapd (%r8,%r11,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm9,%ymm9 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm10,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm10,%ymm10 + + movb %r10b,%r9b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm11,%ymm13 + shlq $5,%r10 + vmovapd (%r8,%r10,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm11,%ymm11 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm12,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm12,%ymm12 + + popq %r8 + popq %r9 + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + vpand .Lmask52x4(%rip),%ymm11,%ymm11 + vpand .Lmask52x4(%rip),%ymm12,%ymm12 + + popq %r8 + popq %rcx + popq %rsi + popq %r11 + + vmovdqu %ymm3,0(%rdi) + vmovdqu %ymm4,32(%rdi) + vmovdqu %ymm5,64(%rdi) + vmovdqu %ymm6,96(%rdi) + vmovdqu %ymm7,128(%rdi) + vmovdqu %ymm8,160(%rdi) + vmovdqu %ymm9,192(%rdi) + vmovdqu %ymm10,224(%rdi) + vmovdqu %ymm11,256(%rdi) + vmovdqu %ymm12,288(%rdi) + + xorl %r15d,%r15d + + movq $0xfffffffffffff,%rax + + movl $40,%ebx + + vpxor %ymm0,%ymm0,%ymm0 + vmovapd %ymm0,%ymm3 + vmovapd %ymm0,%ymm4 + vmovapd %ymm0,%ymm5 + vmovapd %ymm0,%ymm6 + vmovapd %ymm0,%ymm7 + vmovapd %ymm0,%ymm8 + vmovapd %ymm0,%ymm9 + vmovapd %ymm0,%ymm10 + vmovapd %ymm0,%ymm11 + vmovapd %ymm0,%ymm12 +.align 32 +.Lloop40_1: + movq 0(%r11),%r13 + + vpbroadcastq 0(%r11),%ymm1 + movq 320(%rsi),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + movq %r12,%r10 + adcq $0,%r10 + + movq 8(%r8),%r13 + imulq %r9,%r13 + andq %rax,%r13 + + vmovq %r13,%xmm2 + vpbroadcastq %xmm2,%ymm2 + movq 320(%rcx),%rdx + mulxq %r13,%r13,%r12 + addq %r13,%r9 + adcq %r12,%r10 + + shrq $52,%r9 + salq $12,%r10 + orq %r10,%r9 + + leaq -328(%rsp),%rsp + +{vex} vpmadd52luq 320(%rsi),%ymm1,%ymm3 +{vex} vpmadd52luq 352(%rsi),%ymm1,%ymm4 +{vex} vpmadd52luq 384(%rsi),%ymm1,%ymm5 +{vex} vpmadd52luq 416(%rsi),%ymm1,%ymm6 +{vex} vpmadd52luq 448(%rsi),%ymm1,%ymm7 +{vex} vpmadd52luq 480(%rsi),%ymm1,%ymm8 +{vex} vpmadd52luq 512(%rsi),%ymm1,%ymm9 +{vex} vpmadd52luq 544(%rsi),%ymm1,%ymm10 +{vex} vpmadd52luq 576(%rsi),%ymm1,%ymm11 +{vex} vpmadd52luq 608(%rsi),%ymm1,%ymm12 + +{vex} vpmadd52luq 320(%rcx),%ymm2,%ymm3 +{vex} vpmadd52luq 352(%rcx),%ymm2,%ymm4 +{vex} vpmadd52luq 384(%rcx),%ymm2,%ymm5 +{vex} vpmadd52luq 416(%rcx),%ymm2,%ymm6 +{vex} vpmadd52luq 448(%rcx),%ymm2,%ymm7 +{vex} vpmadd52luq 480(%rcx),%ymm2,%ymm8 +{vex} vpmadd52luq 512(%rcx),%ymm2,%ymm9 +{vex} vpmadd52luq 544(%rcx),%ymm2,%ymm10 +{vex} vpmadd52luq 576(%rcx),%ymm2,%ymm11 +{vex} vpmadd52luq 608(%rcx),%ymm2,%ymm12 + vmovdqu %ymm3,0(%rsp) + vmovdqu %ymm4,32(%rsp) + vmovdqu %ymm5,64(%rsp) + vmovdqu %ymm6,96(%rsp) + vmovdqu %ymm7,128(%rsp) + vmovdqu %ymm8,160(%rsp) + vmovdqu %ymm9,192(%rsp) + vmovdqu %ymm10,224(%rsp) + vmovdqu %ymm11,256(%rsp) + vmovdqu %ymm12,288(%rsp) + movq $0,320(%rsp) + + vmovdqu 8(%rsp),%ymm3 + vmovdqu 40(%rsp),%ymm4 + vmovdqu 72(%rsp),%ymm5 + vmovdqu 104(%rsp),%ymm6 + vmovdqu 136(%rsp),%ymm7 + vmovdqu 168(%rsp),%ymm8 + vmovdqu 200(%rsp),%ymm9 + vmovdqu 232(%rsp),%ymm10 + vmovdqu 264(%rsp),%ymm11 + vmovdqu 296(%rsp),%ymm12 + + addq 8(%rsp),%r9 + +{vex} vpmadd52huq 320(%rsi),%ymm1,%ymm3 +{vex} vpmadd52huq 352(%rsi),%ymm1,%ymm4 +{vex} vpmadd52huq 384(%rsi),%ymm1,%ymm5 +{vex} vpmadd52huq 416(%rsi),%ymm1,%ymm6 +{vex} vpmadd52huq 448(%rsi),%ymm1,%ymm7 +{vex} vpmadd52huq 480(%rsi),%ymm1,%ymm8 +{vex} vpmadd52huq 512(%rsi),%ymm1,%ymm9 +{vex} vpmadd52huq 544(%rsi),%ymm1,%ymm10 +{vex} vpmadd52huq 576(%rsi),%ymm1,%ymm11 +{vex} vpmadd52huq 608(%rsi),%ymm1,%ymm12 + +{vex} vpmadd52huq 320(%rcx),%ymm2,%ymm3 +{vex} vpmadd52huq 352(%rcx),%ymm2,%ymm4 +{vex} vpmadd52huq 384(%rcx),%ymm2,%ymm5 +{vex} vpmadd52huq 416(%rcx),%ymm2,%ymm6 +{vex} vpmadd52huq 448(%rcx),%ymm2,%ymm7 +{vex} vpmadd52huq 480(%rcx),%ymm2,%ymm8 +{vex} vpmadd52huq 512(%rcx),%ymm2,%ymm9 +{vex} vpmadd52huq 544(%rcx),%ymm2,%ymm10 +{vex} vpmadd52huq 576(%rcx),%ymm2,%ymm11 +{vex} vpmadd52huq 608(%rcx),%ymm2,%ymm12 + leaq 328(%rsp),%rsp + leaq 8(%r11),%r11 + decl %ebx + jne .Lloop40_1 + + vmovq %r9,%xmm0 + vpbroadcastq %xmm0,%ymm0 + vpblendd $3,%ymm0,%ymm3,%ymm3 + + leaq -640(%rsp),%rsp + vmovupd %ymm3,0(%rsp) + vmovupd %ymm4,32(%rsp) + vmovupd %ymm5,64(%rsp) + vmovupd %ymm6,96(%rsp) + vmovupd %ymm7,128(%rsp) + vmovupd %ymm8,160(%rsp) + vmovupd %ymm9,192(%rsp) + vmovupd %ymm10,224(%rsp) + vmovupd %ymm11,256(%rsp) + vmovupd %ymm12,288(%rsp) + + + + vpsrlq $52,%ymm3,%ymm3 + vpsrlq $52,%ymm4,%ymm4 + vpsrlq $52,%ymm5,%ymm5 + vpsrlq $52,%ymm6,%ymm6 + vpsrlq $52,%ymm7,%ymm7 + vpsrlq $52,%ymm8,%ymm8 + vpsrlq $52,%ymm9,%ymm9 + vpsrlq $52,%ymm10,%ymm10 + vpsrlq $52,%ymm11,%ymm11 + vpsrlq $52,%ymm12,%ymm12 + + + vpermq $144,%ymm12,%ymm12 + vpermq $3,%ymm11,%ymm13 + vblendpd $1,%ymm13,%ymm12,%ymm12 + + vpermq $144,%ymm11,%ymm11 + vpermq $3,%ymm10,%ymm13 + vblendpd $1,%ymm13,%ymm11,%ymm11 + + vpermq $144,%ymm10,%ymm10 + vpermq $3,%ymm9,%ymm13 + vblendpd $1,%ymm13,%ymm10,%ymm10 + + vpermq $144,%ymm9,%ymm9 + vpermq $3,%ymm8,%ymm13 + vblendpd $1,%ymm13,%ymm9,%ymm9 + + vpermq $144,%ymm8,%ymm8 + vpermq $3,%ymm7,%ymm13 + vblendpd $1,%ymm13,%ymm8,%ymm8 + + vpermq $144,%ymm7,%ymm7 + vpermq $3,%ymm6,%ymm13 + vblendpd $1,%ymm13,%ymm7,%ymm7 + + vpermq $144,%ymm6,%ymm6 + vpermq $3,%ymm5,%ymm13 + vblendpd $1,%ymm13,%ymm6,%ymm6 + + vpermq $144,%ymm5,%ymm5 + vpermq $3,%ymm4,%ymm13 + vblendpd $1,%ymm13,%ymm5,%ymm5 + + vpermq $144,%ymm4,%ymm4 + vpermq $3,%ymm3,%ymm13 + vblendpd $1,%ymm13,%ymm4,%ymm4 + + vpermq $144,%ymm3,%ymm3 + vpand .Lhigh64x3(%rip),%ymm3,%ymm3 + + vmovupd %ymm3,320(%rsp) + vmovupd %ymm4,352(%rsp) + vmovupd %ymm5,384(%rsp) + vmovupd %ymm6,416(%rsp) + vmovupd %ymm7,448(%rsp) + vmovupd %ymm8,480(%rsp) + vmovupd %ymm9,512(%rsp) + vmovupd %ymm10,544(%rsp) + vmovupd %ymm11,576(%rsp) + vmovupd %ymm12,608(%rsp) + + vmovupd 0(%rsp),%ymm3 + vmovupd 32(%rsp),%ymm4 + vmovupd 64(%rsp),%ymm5 + vmovupd 96(%rsp),%ymm6 + vmovupd 128(%rsp),%ymm7 + vmovupd 160(%rsp),%ymm8 + vmovupd 192(%rsp),%ymm9 + vmovupd 224(%rsp),%ymm10 + vmovupd 256(%rsp),%ymm11 + vmovupd 288(%rsp),%ymm12 + + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + vpand .Lmask52x4(%rip),%ymm11,%ymm11 + vpand .Lmask52x4(%rip),%ymm12,%ymm12 + + + vpaddq 320(%rsp),%ymm3,%ymm3 + vpaddq 352(%rsp),%ymm4,%ymm4 + vpaddq 384(%rsp),%ymm5,%ymm5 + vpaddq 416(%rsp),%ymm6,%ymm6 + vpaddq 448(%rsp),%ymm7,%ymm7 + vpaddq 480(%rsp),%ymm8,%ymm8 + vpaddq 512(%rsp),%ymm9,%ymm9 + vpaddq 544(%rsp),%ymm10,%ymm10 + vpaddq 576(%rsp),%ymm11,%ymm11 + vpaddq 608(%rsp),%ymm12,%ymm12 + + leaq 640(%rsp),%rsp + + + + vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm13 + vmovmskpd %ymm13,%r14d + vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm13 + vmovmskpd %ymm13,%r13d + shlb $4,%r13b + orb %r13b,%r14b + + vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm13 + vmovmskpd %ymm13,%r13d + vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm13 + vmovmskpd %ymm13,%r12d + shlb $4,%r12b + orb %r12b,%r13b + + vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm13 + vmovmskpd %ymm13,%r12d + vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13 + vmovmskpd %ymm13,%r11d + shlb $4,%r11b + orb %r11b,%r12b + + vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm13 + vmovmskpd %ymm13,%r11d + vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm13 + vmovmskpd %ymm13,%r10d + shlb $4,%r10b + orb %r10b,%r11b + + vpcmpgtq .Lmask52x4(%rip),%ymm11,%ymm13 + vmovmskpd %ymm13,%r10d + vpcmpgtq .Lmask52x4(%rip),%ymm12,%ymm13 + vmovmskpd %ymm13,%r9d + shlb $4,%r9b + orb %r9b,%r10b + + addb %r14b,%r14b + adcb %r13b,%r13b + adcb %r12b,%r12b + adcb %r11b,%r11b + adcb %r10b,%r10b + + + vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm13 + vmovmskpd %ymm13,%r9d + vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm13 + vmovmskpd %ymm13,%r8d + shlb $4,%r8b + orb %r8b,%r9b + + vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm13 + vmovmskpd %ymm13,%r8d + vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm13 + vmovmskpd %ymm13,%edx + shlb $4,%dl + orb %dl,%r8b + + vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm13 + vmovmskpd %ymm13,%edx + vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13 + vmovmskpd %ymm13,%ecx + shlb $4,%cl + orb %cl,%dl + + vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm13 + vmovmskpd %ymm13,%ecx + vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm13 + vmovmskpd %ymm13,%ebx + shlb $4,%bl + orb %bl,%cl + + vpcmpeqq .Lmask52x4(%rip),%ymm11,%ymm13 + vmovmskpd %ymm13,%ebx + vpcmpeqq .Lmask52x4(%rip),%ymm12,%ymm13 + vmovmskpd %ymm13,%eax + shlb $4,%al + orb %al,%bl + + addb %r9b,%r14b + adcb %r8b,%r13b + adcb %dl,%r12b + adcb %cl,%r11b + adcb %bl,%r10b + + xorb %r9b,%r14b + xorb %r8b,%r13b + xorb %dl,%r12b + xorb %cl,%r11b + xorb %bl,%r10b + + pushq %r9 + pushq %r8 + + leaq .Lkmasklut(%rip),%r8 + + movb %r14b,%r9b + andq $0xf,%r14 + vpsubq .Lmask52x4(%rip),%ymm3,%ymm13 + shlq $5,%r14 + vmovapd (%r8,%r14,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm3,%ymm3 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm4,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm4,%ymm4 + + movb %r13b,%r9b + andq $0xf,%r13 + vpsubq .Lmask52x4(%rip),%ymm5,%ymm13 + shlq $5,%r13 + vmovapd (%r8,%r13,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm5,%ymm5 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm6,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm6,%ymm6 + + movb %r12b,%r9b + andq $0xf,%r12 + vpsubq .Lmask52x4(%rip),%ymm7,%ymm13 + shlq $5,%r12 + vmovapd (%r8,%r12,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm7,%ymm7 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm8,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm8,%ymm8 + + movb %r11b,%r9b + andq $0xf,%r11 + vpsubq .Lmask52x4(%rip),%ymm9,%ymm13 + shlq $5,%r11 + vmovapd (%r8,%r11,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm9,%ymm9 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm10,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm10,%ymm10 + + movb %r10b,%r9b + andq $0xf,%r10 + vpsubq .Lmask52x4(%rip),%ymm11,%ymm13 + shlq $5,%r10 + vmovapd (%r8,%r10,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm11,%ymm11 + + shrb $4,%r9b + andq $0xf,%r9 + vpsubq .Lmask52x4(%rip),%ymm12,%ymm13 + shlq $5,%r9 + vmovapd (%r8,%r9,1),%ymm14 + vblendvpd %ymm14,%ymm13,%ymm12,%ymm12 + + popq %r8 + popq %r9 + + vpand .Lmask52x4(%rip),%ymm3,%ymm3 + vpand .Lmask52x4(%rip),%ymm4,%ymm4 + vpand .Lmask52x4(%rip),%ymm5,%ymm5 + vpand .Lmask52x4(%rip),%ymm6,%ymm6 + vpand .Lmask52x4(%rip),%ymm7,%ymm7 + vpand .Lmask52x4(%rip),%ymm8,%ymm8 + vpand .Lmask52x4(%rip),%ymm9,%ymm9 + + vpand .Lmask52x4(%rip),%ymm10,%ymm10 + vpand .Lmask52x4(%rip),%ymm11,%ymm11 + vpand .Lmask52x4(%rip),%ymm12,%ymm12 + + vmovdqu %ymm3,320(%rdi) + vmovdqu %ymm4,352(%rdi) + vmovdqu %ymm5,384(%rdi) + vmovdqu %ymm6,416(%rdi) + vmovdqu %ymm7,448(%rdi) + vmovdqu %ymm8,480(%rdi) + vmovdqu %ymm9,512(%rdi) + vmovdqu %ymm10,544(%rdi) + vmovdqu %ymm11,576(%rdi) + vmovdqu %ymm12,608(%rdi) + + vzeroupper + leaq (%rsp),%rax +.cfi_def_cfa_register %rax + movq 0(%rax),%r15 +.cfi_restore %r15 + movq 8(%rax),%r14 +.cfi_restore %r14 + movq 16(%rax),%r13 +.cfi_restore %r13 + movq 24(%rax),%r12 +.cfi_restore %r12 + movq 32(%rax),%rbp +.cfi_restore %rbp + movq 40(%rax),%rbx +.cfi_restore %rbx + leaq 48(%rax),%rsp +.cfi_def_cfa %rsp,8 +.Lossl_rsaz_amm52x40_x2_avxifma256_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_rsaz_amm52x40_x2_avxifma256, .-ossl_rsaz_amm52x40_x2_avxifma256 +.text + +.align 32 +.globl ossl_extract_multiplier_2x40_win5_avx +.type ossl_extract_multiplier_2x40_win5_avx,@function +ossl_extract_multiplier_2x40_win5_avx: +.cfi_startproc +.byte 243,15,30,250 + vmovapd .Lones(%rip),%ymm14 + vmovq %rdx,%xmm10 + vpbroadcastq %xmm10,%ymm12 + vmovq %rcx,%xmm10 + vpbroadcastq %xmm10,%ymm13 + leaq 20480(%rsi),%rax + + + movq %rsi,%r10 + + + vpxor %xmm0,%xmm0,%xmm0 + vmovapd %ymm0,%ymm1 + vmovapd %ymm0,%ymm2 + vmovapd %ymm0,%ymm3 + vmovapd %ymm0,%ymm4 + vmovapd %ymm0,%ymm5 + vmovapd %ymm0,%ymm6 + vmovapd %ymm0,%ymm7 + vmovapd %ymm0,%ymm8 + vmovapd %ymm0,%ymm9 + vpxor %ymm11,%ymm11,%ymm11 +.align 32 +.Lloop_0: + vpcmpeqq %ymm11,%ymm12,%ymm15 + vmovdqu 0(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm0,%ymm0 + vmovdqu 32(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm1,%ymm1 + vmovdqu 64(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm2,%ymm2 + vmovdqu 96(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm3,%ymm3 + vmovdqu 128(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm4,%ymm4 + vmovdqu 160(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm5,%ymm5 + vmovdqu 192(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm6,%ymm6 + vmovdqu 224(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm7,%ymm7 + vmovdqu 256(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm8,%ymm8 + vmovdqu 288(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm9,%ymm9 + vpaddq %ymm14,%ymm11,%ymm11 + addq $640,%rsi + cmpq %rsi,%rax + jne .Lloop_0 + vmovdqu %ymm0,0(%rdi) + vmovdqu %ymm1,32(%rdi) + vmovdqu %ymm2,64(%rdi) + vmovdqu %ymm3,96(%rdi) + vmovdqu %ymm4,128(%rdi) + vmovdqu %ymm5,160(%rdi) + vmovdqu %ymm6,192(%rdi) + vmovdqu %ymm7,224(%rdi) + vmovdqu %ymm8,256(%rdi) + vmovdqu %ymm9,288(%rdi) + movq %r10,%rsi + vpxor %ymm11,%ymm11,%ymm11 +.align 32 +.Lloop_320: + vpcmpeqq %ymm11,%ymm13,%ymm15 + vmovdqu 320(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm0,%ymm0 + vmovdqu 352(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm1,%ymm1 + vmovdqu 384(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm2,%ymm2 + vmovdqu 416(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm3,%ymm3 + vmovdqu 448(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm4,%ymm4 + vmovdqu 480(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm5,%ymm5 + vmovdqu 512(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm6,%ymm6 + vmovdqu 544(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm7,%ymm7 + vmovdqu 576(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm8,%ymm8 + vmovdqu 608(%rsi),%ymm10 + + vblendvpd %ymm15,%ymm10,%ymm9,%ymm9 + vpaddq %ymm14,%ymm11,%ymm11 + addq $640,%rsi + cmpq %rsi,%rax + jne .Lloop_320 + vmovdqu %ymm0,320(%rdi) + vmovdqu %ymm1,352(%rdi) + vmovdqu %ymm2,384(%rdi) + vmovdqu %ymm3,416(%rdi) + vmovdqu %ymm4,448(%rdi) + vmovdqu %ymm5,480(%rdi) + vmovdqu %ymm6,512(%rdi) + vmovdqu %ymm7,544(%rdi) + vmovdqu %ymm8,576(%rdi) + vmovdqu %ymm9,608(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_extract_multiplier_2x40_win5_avx, .-ossl_extract_multiplier_2x40_win5_avx +.section .rodata +.align 32 +.Lones: +.quad 1,1,1,1 +.Lzeros: +.quad 0,0,0,0 diff --git a/contrib/openssl-cmake/asm/crypto/bn/rsaz-avx2.s b/contrib/openssl-cmake/asm/crypto/bn/rsaz-avx2.s new file mode 100644 index 000000000000..e078b0000a75 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/bn/rsaz-avx2.s @@ -0,0 +1,1746 @@ +.text + +.globl rsaz_1024_sqr_avx2 +.type rsaz_1024_sqr_avx2,@function +.align 64 +rsaz_1024_sqr_avx2: +.cfi_startproc + leaq (%rsp),%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + vzeroupper + movq %rax,%rbp +.cfi_def_cfa_register %rbp + movq %rdx,%r13 + subq $832,%rsp + movq %r13,%r15 + subq $-128,%rdi + subq $-128,%rsi + subq $-128,%r13 + + andq $4095,%r15 + addq $320,%r15 + shrq $12,%r15 + vpxor %ymm9,%ymm9,%ymm9 + jz .Lsqr_1024_no_n_copy + + + + + + subq $320,%rsp + vmovdqu 0-128(%r13),%ymm0 + andq $-2048,%rsp + vmovdqu 32-128(%r13),%ymm1 + vmovdqu 64-128(%r13),%ymm2 + vmovdqu 96-128(%r13),%ymm3 + vmovdqu 128-128(%r13),%ymm4 + vmovdqu 160-128(%r13),%ymm5 + vmovdqu 192-128(%r13),%ymm6 + vmovdqu 224-128(%r13),%ymm7 + vmovdqu 256-128(%r13),%ymm8 + leaq 832+128(%rsp),%r13 + vmovdqu %ymm0,0-128(%r13) + vmovdqu %ymm1,32-128(%r13) + vmovdqu %ymm2,64-128(%r13) + vmovdqu %ymm3,96-128(%r13) + vmovdqu %ymm4,128-128(%r13) + vmovdqu %ymm5,160-128(%r13) + vmovdqu %ymm6,192-128(%r13) + vmovdqu %ymm7,224-128(%r13) + vmovdqu %ymm8,256-128(%r13) + vmovdqu %ymm9,288-128(%r13) + +.Lsqr_1024_no_n_copy: + andq $-1024,%rsp + + vmovdqu 32-128(%rsi),%ymm1 + vmovdqu 64-128(%rsi),%ymm2 + vmovdqu 96-128(%rsi),%ymm3 + vmovdqu 128-128(%rsi),%ymm4 + vmovdqu 160-128(%rsi),%ymm5 + vmovdqu 192-128(%rsi),%ymm6 + vmovdqu 224-128(%rsi),%ymm7 + vmovdqu 256-128(%rsi),%ymm8 + + leaq 192(%rsp),%rbx + vmovdqu .Land_mask(%rip),%ymm15 + jmp .LOOP_GRANDE_SQR_1024 + +.align 32 +.LOOP_GRANDE_SQR_1024: + leaq 576+128(%rsp),%r9 + leaq 448(%rsp),%r12 + + + + + vpaddq %ymm1,%ymm1,%ymm1 + vpbroadcastq 0-128(%rsi),%ymm10 + vpaddq %ymm2,%ymm2,%ymm2 + vmovdqa %ymm1,0-128(%r9) + vpaddq %ymm3,%ymm3,%ymm3 + vmovdqa %ymm2,32-128(%r9) + vpaddq %ymm4,%ymm4,%ymm4 + vmovdqa %ymm3,64-128(%r9) + vpaddq %ymm5,%ymm5,%ymm5 + vmovdqa %ymm4,96-128(%r9) + vpaddq %ymm6,%ymm6,%ymm6 + vmovdqa %ymm5,128-128(%r9) + vpaddq %ymm7,%ymm7,%ymm7 + vmovdqa %ymm6,160-128(%r9) + vpaddq %ymm8,%ymm8,%ymm8 + vmovdqa %ymm7,192-128(%r9) + vpxor %ymm9,%ymm9,%ymm9 + vmovdqa %ymm8,224-128(%r9) + + vpmuludq 0-128(%rsi),%ymm10,%ymm0 + vpbroadcastq 32-128(%rsi),%ymm11 + vmovdqu %ymm9,288-192(%rbx) + vpmuludq %ymm10,%ymm1,%ymm1 + vmovdqu %ymm9,320-448(%r12) + vpmuludq %ymm10,%ymm2,%ymm2 + vmovdqu %ymm9,352-448(%r12) + vpmuludq %ymm10,%ymm3,%ymm3 + vmovdqu %ymm9,384-448(%r12) + vpmuludq %ymm10,%ymm4,%ymm4 + vmovdqu %ymm9,416-448(%r12) + vpmuludq %ymm10,%ymm5,%ymm5 + vmovdqu %ymm9,448-448(%r12) + vpmuludq %ymm10,%ymm6,%ymm6 + vmovdqu %ymm9,480-448(%r12) + vpmuludq %ymm10,%ymm7,%ymm7 + vmovdqu %ymm9,512-448(%r12) + vpmuludq %ymm10,%ymm8,%ymm8 + vpbroadcastq 64-128(%rsi),%ymm10 + vmovdqu %ymm9,544-448(%r12) + + movq %rsi,%r15 + movl $4,%r14d + jmp .Lsqr_entry_1024 +.align 32 +.LOOP_SQR_1024: + vpbroadcastq 32-128(%r15),%ymm11 + vpmuludq 0-128(%rsi),%ymm10,%ymm0 + vpaddq 0-192(%rbx),%ymm0,%ymm0 + vpmuludq 0-128(%r9),%ymm10,%ymm1 + vpaddq 32-192(%rbx),%ymm1,%ymm1 + vpmuludq 32-128(%r9),%ymm10,%ymm2 + vpaddq 64-192(%rbx),%ymm2,%ymm2 + vpmuludq 64-128(%r9),%ymm10,%ymm3 + vpaddq 96-192(%rbx),%ymm3,%ymm3 + vpmuludq 96-128(%r9),%ymm10,%ymm4 + vpaddq 128-192(%rbx),%ymm4,%ymm4 + vpmuludq 128-128(%r9),%ymm10,%ymm5 + vpaddq 160-192(%rbx),%ymm5,%ymm5 + vpmuludq 160-128(%r9),%ymm10,%ymm6 + vpaddq 192-192(%rbx),%ymm6,%ymm6 + vpmuludq 192-128(%r9),%ymm10,%ymm7 + vpaddq 224-192(%rbx),%ymm7,%ymm7 + vpmuludq 224-128(%r9),%ymm10,%ymm8 + vpbroadcastq 64-128(%r15),%ymm10 + vpaddq 256-192(%rbx),%ymm8,%ymm8 +.Lsqr_entry_1024: + vmovdqu %ymm0,0-192(%rbx) + vmovdqu %ymm1,32-192(%rbx) + + vpmuludq 32-128(%rsi),%ymm11,%ymm12 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq 32-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm3,%ymm3 + vpmuludq 64-128(%r9),%ymm11,%ymm13 + vpaddq %ymm13,%ymm4,%ymm4 + vpmuludq 96-128(%r9),%ymm11,%ymm12 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq 128-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm6,%ymm6 + vpmuludq 160-128(%r9),%ymm11,%ymm13 + vpaddq %ymm13,%ymm7,%ymm7 + vpmuludq 192-128(%r9),%ymm11,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq 224-128(%r9),%ymm11,%ymm0 + vpbroadcastq 96-128(%r15),%ymm11 + vpaddq 288-192(%rbx),%ymm0,%ymm0 + + vmovdqu %ymm2,64-192(%rbx) + vmovdqu %ymm3,96-192(%rbx) + + vpmuludq 64-128(%rsi),%ymm10,%ymm13 + vpaddq %ymm13,%ymm4,%ymm4 + vpmuludq 64-128(%r9),%ymm10,%ymm12 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq 96-128(%r9),%ymm10,%ymm14 + vpaddq %ymm14,%ymm6,%ymm6 + vpmuludq 128-128(%r9),%ymm10,%ymm13 + vpaddq %ymm13,%ymm7,%ymm7 + vpmuludq 160-128(%r9),%ymm10,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq 192-128(%r9),%ymm10,%ymm14 + vpaddq %ymm14,%ymm0,%ymm0 + vpmuludq 224-128(%r9),%ymm10,%ymm1 + vpbroadcastq 128-128(%r15),%ymm10 + vpaddq 320-448(%r12),%ymm1,%ymm1 + + vmovdqu %ymm4,128-192(%rbx) + vmovdqu %ymm5,160-192(%rbx) + + vpmuludq 96-128(%rsi),%ymm11,%ymm12 + vpaddq %ymm12,%ymm6,%ymm6 + vpmuludq 96-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm7,%ymm7 + vpmuludq 128-128(%r9),%ymm11,%ymm13 + vpaddq %ymm13,%ymm8,%ymm8 + vpmuludq 160-128(%r9),%ymm11,%ymm12 + vpaddq %ymm12,%ymm0,%ymm0 + vpmuludq 192-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm1,%ymm1 + vpmuludq 224-128(%r9),%ymm11,%ymm2 + vpbroadcastq 160-128(%r15),%ymm11 + vpaddq 352-448(%r12),%ymm2,%ymm2 + + vmovdqu %ymm6,192-192(%rbx) + vmovdqu %ymm7,224-192(%rbx) + + vpmuludq 128-128(%rsi),%ymm10,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq 128-128(%r9),%ymm10,%ymm14 + vpaddq %ymm14,%ymm0,%ymm0 + vpmuludq 160-128(%r9),%ymm10,%ymm13 + vpaddq %ymm13,%ymm1,%ymm1 + vpmuludq 192-128(%r9),%ymm10,%ymm12 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq 224-128(%r9),%ymm10,%ymm3 + vpbroadcastq 192-128(%r15),%ymm10 + vpaddq 384-448(%r12),%ymm3,%ymm3 + + vmovdqu %ymm8,256-192(%rbx) + vmovdqu %ymm0,288-192(%rbx) + leaq 8(%rbx),%rbx + + vpmuludq 160-128(%rsi),%ymm11,%ymm13 + vpaddq %ymm13,%ymm1,%ymm1 + vpmuludq 160-128(%r9),%ymm11,%ymm12 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq 192-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm3,%ymm3 + vpmuludq 224-128(%r9),%ymm11,%ymm4 + vpbroadcastq 224-128(%r15),%ymm11 + vpaddq 416-448(%r12),%ymm4,%ymm4 + + vmovdqu %ymm1,320-448(%r12) + vmovdqu %ymm2,352-448(%r12) + + vpmuludq 192-128(%rsi),%ymm10,%ymm12 + vpaddq %ymm12,%ymm3,%ymm3 + vpmuludq 192-128(%r9),%ymm10,%ymm14 + vpbroadcastq 256-128(%r15),%ymm0 + vpaddq %ymm14,%ymm4,%ymm4 + vpmuludq 224-128(%r9),%ymm10,%ymm5 + vpbroadcastq 0+8-128(%r15),%ymm10 + vpaddq 448-448(%r12),%ymm5,%ymm5 + + vmovdqu %ymm3,384-448(%r12) + vmovdqu %ymm4,416-448(%r12) + leaq 8(%r15),%r15 + + vpmuludq 224-128(%rsi),%ymm11,%ymm12 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq 224-128(%r9),%ymm11,%ymm6 + vpaddq 480-448(%r12),%ymm6,%ymm6 + + vpmuludq 256-128(%rsi),%ymm0,%ymm7 + vmovdqu %ymm5,448-448(%r12) + vpaddq 512-448(%r12),%ymm7,%ymm7 + vmovdqu %ymm6,480-448(%r12) + vmovdqu %ymm7,512-448(%r12) + leaq 8(%r12),%r12 + + decl %r14d + jnz .LOOP_SQR_1024 + + vmovdqu 256(%rsp),%ymm8 + vmovdqu 288(%rsp),%ymm1 + vmovdqu 320(%rsp),%ymm2 + leaq 192(%rsp),%rbx + + vpsrlq $29,%ymm8,%ymm14 + vpand %ymm15,%ymm8,%ymm8 + vpsrlq $29,%ymm1,%ymm11 + vpand %ymm15,%ymm1,%ymm1 + + vpermq $0x93,%ymm14,%ymm14 + vpxor %ymm9,%ymm9,%ymm9 + vpermq $0x93,%ymm11,%ymm11 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm8,%ymm8 + vpblendd $3,%ymm11,%ymm9,%ymm11 + vpaddq %ymm14,%ymm1,%ymm1 + vpaddq %ymm11,%ymm2,%ymm2 + vmovdqu %ymm1,288-192(%rbx) + vmovdqu %ymm2,320-192(%rbx) + + movq (%rsp),%rax + movq 8(%rsp),%r10 + movq 16(%rsp),%r11 + movq 24(%rsp),%r12 + vmovdqu 32(%rsp),%ymm1 + vmovdqu 64-192(%rbx),%ymm2 + vmovdqu 96-192(%rbx),%ymm3 + vmovdqu 128-192(%rbx),%ymm4 + vmovdqu 160-192(%rbx),%ymm5 + vmovdqu 192-192(%rbx),%ymm6 + vmovdqu 224-192(%rbx),%ymm7 + + movq %rax,%r9 + imull %ecx,%eax + andl $0x1fffffff,%eax + vmovd %eax,%xmm12 + + movq %rax,%rdx + imulq -128(%r13),%rax + vpbroadcastq %xmm12,%ymm12 + addq %rax,%r9 + movq %rdx,%rax + imulq 8-128(%r13),%rax + shrq $29,%r9 + addq %rax,%r10 + movq %rdx,%rax + imulq 16-128(%r13),%rax + addq %r9,%r10 + addq %rax,%r11 + imulq 24-128(%r13),%rdx + addq %rdx,%r12 + + movq %r10,%rax + imull %ecx,%eax + andl $0x1fffffff,%eax + + movl $9,%r14d + jmp .LOOP_REDUCE_1024 + +.align 32 +.LOOP_REDUCE_1024: + vmovd %eax,%xmm13 + vpbroadcastq %xmm13,%ymm13 + + vpmuludq 32-128(%r13),%ymm12,%ymm10 + movq %rax,%rdx + imulq -128(%r13),%rax + vpaddq %ymm10,%ymm1,%ymm1 + addq %rax,%r10 + vpmuludq 64-128(%r13),%ymm12,%ymm14 + movq %rdx,%rax + imulq 8-128(%r13),%rax + vpaddq %ymm14,%ymm2,%ymm2 + vpmuludq 96-128(%r13),%ymm12,%ymm11 +.byte 0x67 + addq %rax,%r11 +.byte 0x67 + movq %rdx,%rax + imulq 16-128(%r13),%rax + shrq $29,%r10 + vpaddq %ymm11,%ymm3,%ymm3 + vpmuludq 128-128(%r13),%ymm12,%ymm10 + addq %rax,%r12 + addq %r10,%r11 + vpaddq %ymm10,%ymm4,%ymm4 + vpmuludq 160-128(%r13),%ymm12,%ymm14 + movq %r11,%rax + imull %ecx,%eax + vpaddq %ymm14,%ymm5,%ymm5 + vpmuludq 192-128(%r13),%ymm12,%ymm11 + andl $0x1fffffff,%eax + vpaddq %ymm11,%ymm6,%ymm6 + vpmuludq 224-128(%r13),%ymm12,%ymm10 + vpaddq %ymm10,%ymm7,%ymm7 + vpmuludq 256-128(%r13),%ymm12,%ymm14 + vmovd %eax,%xmm12 + + vpaddq %ymm14,%ymm8,%ymm8 + + vpbroadcastq %xmm12,%ymm12 + + vpmuludq 32-8-128(%r13),%ymm13,%ymm11 + vmovdqu 96-8-128(%r13),%ymm14 + movq %rax,%rdx + imulq -128(%r13),%rax + vpaddq %ymm11,%ymm1,%ymm1 + vpmuludq 64-8-128(%r13),%ymm13,%ymm10 + vmovdqu 128-8-128(%r13),%ymm11 + addq %rax,%r11 + movq %rdx,%rax + imulq 8-128(%r13),%rax + vpaddq %ymm10,%ymm2,%ymm2 + addq %r12,%rax + shrq $29,%r11 + vpmuludq %ymm13,%ymm14,%ymm14 + vmovdqu 160-8-128(%r13),%ymm10 + addq %r11,%rax + vpaddq %ymm14,%ymm3,%ymm3 + vpmuludq %ymm13,%ymm11,%ymm11 + vmovdqu 192-8-128(%r13),%ymm14 +.byte 0x67 + movq %rax,%r12 + imull %ecx,%eax + vpaddq %ymm11,%ymm4,%ymm4 + vpmuludq %ymm13,%ymm10,%ymm10 +.byte 0xc4,0x41,0x7e,0x6f,0x9d,0x58,0x00,0x00,0x00 + andl $0x1fffffff,%eax + vpaddq %ymm10,%ymm5,%ymm5 + vpmuludq %ymm13,%ymm14,%ymm14 + vmovdqu 256-8-128(%r13),%ymm10 + vpaddq %ymm14,%ymm6,%ymm6 + vpmuludq %ymm13,%ymm11,%ymm11 + vmovdqu 288-8-128(%r13),%ymm9 + vmovd %eax,%xmm0 + imulq -128(%r13),%rax + vpaddq %ymm11,%ymm7,%ymm7 + vpmuludq %ymm13,%ymm10,%ymm10 + vmovdqu 32-16-128(%r13),%ymm14 + vpbroadcastq %xmm0,%ymm0 + vpaddq %ymm10,%ymm8,%ymm8 + vpmuludq %ymm13,%ymm9,%ymm9 + vmovdqu 64-16-128(%r13),%ymm11 + addq %rax,%r12 + + vmovdqu 32-24-128(%r13),%ymm13 + vpmuludq %ymm12,%ymm14,%ymm14 + vmovdqu 96-16-128(%r13),%ymm10 + vpaddq %ymm14,%ymm1,%ymm1 + vpmuludq %ymm0,%ymm13,%ymm13 + vpmuludq %ymm12,%ymm11,%ymm11 +.byte 0xc4,0x41,0x7e,0x6f,0xb5,0xf0,0xff,0xff,0xff + vpaddq %ymm1,%ymm13,%ymm13 + vpaddq %ymm11,%ymm2,%ymm2 + vpmuludq %ymm12,%ymm10,%ymm10 + vmovdqu 160-16-128(%r13),%ymm11 +.byte 0x67 + vmovq %xmm13,%rax + vmovdqu %ymm13,(%rsp) + vpaddq %ymm10,%ymm3,%ymm3 + vpmuludq %ymm12,%ymm14,%ymm14 + vmovdqu 192-16-128(%r13),%ymm10 + vpaddq %ymm14,%ymm4,%ymm4 + vpmuludq %ymm12,%ymm11,%ymm11 + vmovdqu 224-16-128(%r13),%ymm14 + vpaddq %ymm11,%ymm5,%ymm5 + vpmuludq %ymm12,%ymm10,%ymm10 + vmovdqu 256-16-128(%r13),%ymm11 + vpaddq %ymm10,%ymm6,%ymm6 + vpmuludq %ymm12,%ymm14,%ymm14 + shrq $29,%r12 + vmovdqu 288-16-128(%r13),%ymm10 + addq %r12,%rax + vpaddq %ymm14,%ymm7,%ymm7 + vpmuludq %ymm12,%ymm11,%ymm11 + + movq %rax,%r9 + imull %ecx,%eax + vpaddq %ymm11,%ymm8,%ymm8 + vpmuludq %ymm12,%ymm10,%ymm10 + andl $0x1fffffff,%eax + vmovd %eax,%xmm12 + vmovdqu 96-24-128(%r13),%ymm11 +.byte 0x67 + vpaddq %ymm10,%ymm9,%ymm9 + vpbroadcastq %xmm12,%ymm12 + + vpmuludq 64-24-128(%r13),%ymm0,%ymm14 + vmovdqu 128-24-128(%r13),%ymm10 + movq %rax,%rdx + imulq -128(%r13),%rax + movq 8(%rsp),%r10 + vpaddq %ymm14,%ymm2,%ymm1 + vpmuludq %ymm0,%ymm11,%ymm11 + vmovdqu 160-24-128(%r13),%ymm14 + addq %rax,%r9 + movq %rdx,%rax + imulq 8-128(%r13),%rax +.byte 0x67 + shrq $29,%r9 + movq 16(%rsp),%r11 + vpaddq %ymm11,%ymm3,%ymm2 + vpmuludq %ymm0,%ymm10,%ymm10 + vmovdqu 192-24-128(%r13),%ymm11 + addq %rax,%r10 + movq %rdx,%rax + imulq 16-128(%r13),%rax + vpaddq %ymm10,%ymm4,%ymm3 + vpmuludq %ymm0,%ymm14,%ymm14 + vmovdqu 224-24-128(%r13),%ymm10 + imulq 24-128(%r13),%rdx + addq %rax,%r11 + leaq (%r9,%r10,1),%rax + vpaddq %ymm14,%ymm5,%ymm4 + vpmuludq %ymm0,%ymm11,%ymm11 + vmovdqu 256-24-128(%r13),%ymm14 + movq %rax,%r10 + imull %ecx,%eax + vpmuludq %ymm0,%ymm10,%ymm10 + vpaddq %ymm11,%ymm6,%ymm5 + vmovdqu 288-24-128(%r13),%ymm11 + andl $0x1fffffff,%eax + vpaddq %ymm10,%ymm7,%ymm6 + vpmuludq %ymm0,%ymm14,%ymm14 + addq 24(%rsp),%rdx + vpaddq %ymm14,%ymm8,%ymm7 + vpmuludq %ymm0,%ymm11,%ymm11 + vpaddq %ymm11,%ymm9,%ymm8 + vmovq %r12,%xmm9 + movq %rdx,%r12 + + decl %r14d + jnz .LOOP_REDUCE_1024 + leaq 448(%rsp),%r12 + vpaddq %ymm9,%ymm13,%ymm0 + vpxor %ymm9,%ymm9,%ymm9 + + vpaddq 288-192(%rbx),%ymm0,%ymm0 + vpaddq 320-448(%r12),%ymm1,%ymm1 + vpaddq 352-448(%r12),%ymm2,%ymm2 + vpaddq 384-448(%r12),%ymm3,%ymm3 + vpaddq 416-448(%r12),%ymm4,%ymm4 + vpaddq 448-448(%r12),%ymm5,%ymm5 + vpaddq 480-448(%r12),%ymm6,%ymm6 + vpaddq 512-448(%r12),%ymm7,%ymm7 + vpaddq 544-448(%r12),%ymm8,%ymm8 + + vpsrlq $29,%ymm0,%ymm14 + vpand %ymm15,%ymm0,%ymm0 + vpsrlq $29,%ymm1,%ymm11 + vpand %ymm15,%ymm1,%ymm1 + vpsrlq $29,%ymm2,%ymm12 + vpermq $0x93,%ymm14,%ymm14 + vpand %ymm15,%ymm2,%ymm2 + vpsrlq $29,%ymm3,%ymm13 + vpermq $0x93,%ymm11,%ymm11 + vpand %ymm15,%ymm3,%ymm3 + vpermq $0x93,%ymm12,%ymm12 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpermq $0x93,%ymm13,%ymm13 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm0,%ymm0 + vpblendd $3,%ymm11,%ymm12,%ymm11 + vpaddq %ymm14,%ymm1,%ymm1 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm11,%ymm2,%ymm2 + vpblendd $3,%ymm13,%ymm9,%ymm13 + vpaddq %ymm12,%ymm3,%ymm3 + vpaddq %ymm13,%ymm4,%ymm4 + + vpsrlq $29,%ymm0,%ymm14 + vpand %ymm15,%ymm0,%ymm0 + vpsrlq $29,%ymm1,%ymm11 + vpand %ymm15,%ymm1,%ymm1 + vpsrlq $29,%ymm2,%ymm12 + vpermq $0x93,%ymm14,%ymm14 + vpand %ymm15,%ymm2,%ymm2 + vpsrlq $29,%ymm3,%ymm13 + vpermq $0x93,%ymm11,%ymm11 + vpand %ymm15,%ymm3,%ymm3 + vpermq $0x93,%ymm12,%ymm12 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpermq $0x93,%ymm13,%ymm13 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm0,%ymm0 + vpblendd $3,%ymm11,%ymm12,%ymm11 + vpaddq %ymm14,%ymm1,%ymm1 + vmovdqu %ymm0,0-128(%rdi) + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm11,%ymm2,%ymm2 + vmovdqu %ymm1,32-128(%rdi) + vpblendd $3,%ymm13,%ymm9,%ymm13 + vpaddq %ymm12,%ymm3,%ymm3 + vmovdqu %ymm2,64-128(%rdi) + vpaddq %ymm13,%ymm4,%ymm4 + vmovdqu %ymm3,96-128(%rdi) + vpsrlq $29,%ymm4,%ymm14 + vpand %ymm15,%ymm4,%ymm4 + vpsrlq $29,%ymm5,%ymm11 + vpand %ymm15,%ymm5,%ymm5 + vpsrlq $29,%ymm6,%ymm12 + vpermq $0x93,%ymm14,%ymm14 + vpand %ymm15,%ymm6,%ymm6 + vpsrlq $29,%ymm7,%ymm13 + vpermq $0x93,%ymm11,%ymm11 + vpand %ymm15,%ymm7,%ymm7 + vpsrlq $29,%ymm8,%ymm0 + vpermq $0x93,%ymm12,%ymm12 + vpand %ymm15,%ymm8,%ymm8 + vpermq $0x93,%ymm13,%ymm13 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpermq $0x93,%ymm0,%ymm0 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm4,%ymm4 + vpblendd $3,%ymm11,%ymm12,%ymm11 + vpaddq %ymm14,%ymm5,%ymm5 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm11,%ymm6,%ymm6 + vpblendd $3,%ymm13,%ymm0,%ymm13 + vpaddq %ymm12,%ymm7,%ymm7 + vpaddq %ymm13,%ymm8,%ymm8 + + vpsrlq $29,%ymm4,%ymm14 + vpand %ymm15,%ymm4,%ymm4 + vpsrlq $29,%ymm5,%ymm11 + vpand %ymm15,%ymm5,%ymm5 + vpsrlq $29,%ymm6,%ymm12 + vpermq $0x93,%ymm14,%ymm14 + vpand %ymm15,%ymm6,%ymm6 + vpsrlq $29,%ymm7,%ymm13 + vpermq $0x93,%ymm11,%ymm11 + vpand %ymm15,%ymm7,%ymm7 + vpsrlq $29,%ymm8,%ymm0 + vpermq $0x93,%ymm12,%ymm12 + vpand %ymm15,%ymm8,%ymm8 + vpermq $0x93,%ymm13,%ymm13 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpermq $0x93,%ymm0,%ymm0 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm4,%ymm4 + vpblendd $3,%ymm11,%ymm12,%ymm11 + vpaddq %ymm14,%ymm5,%ymm5 + vmovdqu %ymm4,128-128(%rdi) + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm11,%ymm6,%ymm6 + vmovdqu %ymm5,160-128(%rdi) + vpblendd $3,%ymm13,%ymm0,%ymm13 + vpaddq %ymm12,%ymm7,%ymm7 + vmovdqu %ymm6,192-128(%rdi) + vpaddq %ymm13,%ymm8,%ymm8 + vmovdqu %ymm7,224-128(%rdi) + vmovdqu %ymm8,256-128(%rdi) + + movq %rdi,%rsi + decl %r8d + jne .LOOP_GRANDE_SQR_1024 + + vzeroall + movq %rbp,%rax +.cfi_def_cfa_register %rax + movq -48(%rax),%r15 +.cfi_restore %r15 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lsqr_1024_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2 +.globl rsaz_1024_mul_avx2 +.type rsaz_1024_mul_avx2,@function +.align 64 +rsaz_1024_mul_avx2: +.cfi_startproc + leaq (%rsp),%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + movq %rax,%rbp +.cfi_def_cfa_register %rbp + vzeroall + movq %rdx,%r13 + subq $64,%rsp + + + + + + +.byte 0x67,0x67 + movq %rsi,%r15 + andq $4095,%r15 + addq $320,%r15 + shrq $12,%r15 + movq %rsi,%r15 + cmovnzq %r13,%rsi + cmovnzq %r15,%r13 + + movq %rcx,%r15 + subq $-128,%rsi + subq $-128,%rcx + subq $-128,%rdi + + andq $4095,%r15 + addq $320,%r15 +.byte 0x67,0x67 + shrq $12,%r15 + jz .Lmul_1024_no_n_copy + + + + + + subq $320,%rsp + vmovdqu 0-128(%rcx),%ymm0 + andq $-512,%rsp + vmovdqu 32-128(%rcx),%ymm1 + vmovdqu 64-128(%rcx),%ymm2 + vmovdqu 96-128(%rcx),%ymm3 + vmovdqu 128-128(%rcx),%ymm4 + vmovdqu 160-128(%rcx),%ymm5 + vmovdqu 192-128(%rcx),%ymm6 + vmovdqu 224-128(%rcx),%ymm7 + vmovdqu 256-128(%rcx),%ymm8 + leaq 64+128(%rsp),%rcx + vmovdqu %ymm0,0-128(%rcx) + vpxor %ymm0,%ymm0,%ymm0 + vmovdqu %ymm1,32-128(%rcx) + vpxor %ymm1,%ymm1,%ymm1 + vmovdqu %ymm2,64-128(%rcx) + vpxor %ymm2,%ymm2,%ymm2 + vmovdqu %ymm3,96-128(%rcx) + vpxor %ymm3,%ymm3,%ymm3 + vmovdqu %ymm4,128-128(%rcx) + vpxor %ymm4,%ymm4,%ymm4 + vmovdqu %ymm5,160-128(%rcx) + vpxor %ymm5,%ymm5,%ymm5 + vmovdqu %ymm6,192-128(%rcx) + vpxor %ymm6,%ymm6,%ymm6 + vmovdqu %ymm7,224-128(%rcx) + vpxor %ymm7,%ymm7,%ymm7 + vmovdqu %ymm8,256-128(%rcx) + vmovdqa %ymm0,%ymm8 + vmovdqu %ymm9,288-128(%rcx) +.Lmul_1024_no_n_copy: + andq $-64,%rsp + + movq (%r13),%rbx + vpbroadcastq (%r13),%ymm10 + vmovdqu %ymm0,(%rsp) + xorq %r9,%r9 +.byte 0x67 + xorq %r10,%r10 + xorq %r11,%r11 + xorq %r12,%r12 + + vmovdqu .Land_mask(%rip),%ymm15 + movl $9,%r14d + vmovdqu %ymm9,288-128(%rdi) + jmp .Loop_mul_1024 + +.align 32 +.Loop_mul_1024: + vpsrlq $29,%ymm3,%ymm9 + movq %rbx,%rax + imulq -128(%rsi),%rax + addq %r9,%rax + movq %rbx,%r10 + imulq 8-128(%rsi),%r10 + addq 8(%rsp),%r10 + + movq %rax,%r9 + imull %r8d,%eax + andl $0x1fffffff,%eax + + movq %rbx,%r11 + imulq 16-128(%rsi),%r11 + addq 16(%rsp),%r11 + + movq %rbx,%r12 + imulq 24-128(%rsi),%r12 + addq 24(%rsp),%r12 + vpmuludq 32-128(%rsi),%ymm10,%ymm0 + vmovd %eax,%xmm11 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq 64-128(%rsi),%ymm10,%ymm12 + vpbroadcastq %xmm11,%ymm11 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq 96-128(%rsi),%ymm10,%ymm13 + vpand %ymm15,%ymm3,%ymm3 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq 128-128(%rsi),%ymm10,%ymm0 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq 160-128(%rsi),%ymm10,%ymm12 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq 192-128(%rsi),%ymm10,%ymm13 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq 224-128(%rsi),%ymm10,%ymm0 + vpermq $0x93,%ymm9,%ymm9 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq 256-128(%rsi),%ymm10,%ymm12 + vpbroadcastq 8(%r13),%ymm10 + vpaddq %ymm12,%ymm8,%ymm8 + + movq %rax,%rdx + imulq -128(%rcx),%rax + addq %rax,%r9 + movq %rdx,%rax + imulq 8-128(%rcx),%rax + addq %rax,%r10 + movq %rdx,%rax + imulq 16-128(%rcx),%rax + addq %rax,%r11 + shrq $29,%r9 + imulq 24-128(%rcx),%rdx + addq %rdx,%r12 + addq %r9,%r10 + + vpmuludq 32-128(%rcx),%ymm11,%ymm13 + vmovq %xmm10,%rbx + vpaddq %ymm13,%ymm1,%ymm1 + vpmuludq 64-128(%rcx),%ymm11,%ymm0 + vpaddq %ymm0,%ymm2,%ymm2 + vpmuludq 96-128(%rcx),%ymm11,%ymm12 + vpaddq %ymm12,%ymm3,%ymm3 + vpmuludq 128-128(%rcx),%ymm11,%ymm13 + vpaddq %ymm13,%ymm4,%ymm4 + vpmuludq 160-128(%rcx),%ymm11,%ymm0 + vpaddq %ymm0,%ymm5,%ymm5 + vpmuludq 192-128(%rcx),%ymm11,%ymm12 + vpaddq %ymm12,%ymm6,%ymm6 + vpmuludq 224-128(%rcx),%ymm11,%ymm13 + vpblendd $3,%ymm14,%ymm9,%ymm12 + vpaddq %ymm13,%ymm7,%ymm7 + vpmuludq 256-128(%rcx),%ymm11,%ymm0 + vpaddq %ymm12,%ymm3,%ymm3 + vpaddq %ymm0,%ymm8,%ymm8 + + movq %rbx,%rax + imulq -128(%rsi),%rax + addq %rax,%r10 + vmovdqu -8+32-128(%rsi),%ymm12 + movq %rbx,%rax + imulq 8-128(%rsi),%rax + addq %rax,%r11 + vmovdqu -8+64-128(%rsi),%ymm13 + + movq %r10,%rax + vpblendd $0xfc,%ymm14,%ymm9,%ymm9 + imull %r8d,%eax + vpaddq %ymm9,%ymm4,%ymm4 + andl $0x1fffffff,%eax + + imulq 16-128(%rsi),%rbx + addq %rbx,%r12 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovd %eax,%xmm11 + vmovdqu -8+96-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm1,%ymm1 + vpmuludq %ymm10,%ymm13,%ymm13 + vpbroadcastq %xmm11,%ymm11 + vmovdqu -8+128-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm2,%ymm2 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -8+160-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm3,%ymm3 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovdqu -8+192-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm4,%ymm4 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -8+224-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm5,%ymm5 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -8+256-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm6,%ymm6 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovdqu -8+288-128(%rsi),%ymm9 + vpaddq %ymm12,%ymm7,%ymm7 + vpmuludq %ymm10,%ymm13,%ymm13 + vpaddq %ymm13,%ymm8,%ymm8 + vpmuludq %ymm10,%ymm9,%ymm9 + vpbroadcastq 16(%r13),%ymm10 + + movq %rax,%rdx + imulq -128(%rcx),%rax + addq %rax,%r10 + vmovdqu -8+32-128(%rcx),%ymm0 + movq %rdx,%rax + imulq 8-128(%rcx),%rax + addq %rax,%r11 + vmovdqu -8+64-128(%rcx),%ymm12 + shrq $29,%r10 + imulq 16-128(%rcx),%rdx + addq %rdx,%r12 + addq %r10,%r11 + + vpmuludq %ymm11,%ymm0,%ymm0 + vmovq %xmm10,%rbx + vmovdqu -8+96-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -8+128-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -8+160-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -8+192-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -8+224-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -8+256-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -8+288-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq %ymm11,%ymm12,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq %ymm11,%ymm13,%ymm13 + vpaddq %ymm13,%ymm9,%ymm9 + + vmovdqu -16+32-128(%rsi),%ymm0 + movq %rbx,%rax + imulq -128(%rsi),%rax + addq %r11,%rax + + vmovdqu -16+64-128(%rsi),%ymm12 + movq %rax,%r11 + imull %r8d,%eax + andl $0x1fffffff,%eax + + imulq 8-128(%rsi),%rbx + addq %rbx,%r12 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovd %eax,%xmm11 + vmovdqu -16+96-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq %ymm10,%ymm12,%ymm12 + vpbroadcastq %xmm11,%ymm11 + vmovdqu -16+128-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -16+160-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -16+192-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovdqu -16+224-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -16+256-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -16+288-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq %ymm10,%ymm12,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq %ymm10,%ymm13,%ymm13 + vpbroadcastq 24(%r13),%ymm10 + vpaddq %ymm13,%ymm9,%ymm9 + + vmovdqu -16+32-128(%rcx),%ymm0 + movq %rax,%rdx + imulq -128(%rcx),%rax + addq %rax,%r11 + vmovdqu -16+64-128(%rcx),%ymm12 + imulq 8-128(%rcx),%rdx + addq %rdx,%r12 + shrq $29,%r11 + + vpmuludq %ymm11,%ymm0,%ymm0 + vmovq %xmm10,%rbx + vmovdqu -16+96-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -16+128-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -16+160-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -16+192-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -16+224-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -16+256-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -16+288-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -24+32-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -24+64-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm9,%ymm9 + + addq %r11,%r12 + imulq -128(%rsi),%rbx + addq %rbx,%r12 + + movq %r12,%rax + imull %r8d,%eax + andl $0x1fffffff,%eax + + vpmuludq %ymm10,%ymm0,%ymm0 + vmovd %eax,%xmm11 + vmovdqu -24+96-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq %ymm10,%ymm12,%ymm12 + vpbroadcastq %xmm11,%ymm11 + vmovdqu -24+128-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -24+160-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -24+192-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovdqu -24+224-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -24+256-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -24+288-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq %ymm10,%ymm12,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq %ymm10,%ymm13,%ymm13 + vpbroadcastq 32(%r13),%ymm10 + vpaddq %ymm13,%ymm9,%ymm9 + addq $32,%r13 + + vmovdqu -24+32-128(%rcx),%ymm0 + imulq -128(%rcx),%rax + addq %rax,%r12 + shrq $29,%r12 + + vmovdqu -24+64-128(%rcx),%ymm12 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovq %xmm10,%rbx + vmovdqu -24+96-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm1,%ymm0 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu %ymm0,(%rsp) + vpaddq %ymm12,%ymm2,%ymm1 + vmovdqu -24+128-128(%rcx),%ymm0 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -24+160-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm3,%ymm2 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -24+192-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm4,%ymm3 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -24+224-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm5,%ymm4 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -24+256-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm6,%ymm5 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -24+288-128(%rcx),%ymm13 + movq %r12,%r9 + vpaddq %ymm0,%ymm7,%ymm6 + vpmuludq %ymm11,%ymm12,%ymm12 + addq (%rsp),%r9 + vpaddq %ymm12,%ymm8,%ymm7 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovq %r12,%xmm12 + vpaddq %ymm13,%ymm9,%ymm8 + + decl %r14d + jnz .Loop_mul_1024 + vpaddq (%rsp),%ymm12,%ymm0 + + vpsrlq $29,%ymm0,%ymm12 + vpand %ymm15,%ymm0,%ymm0 + vpsrlq $29,%ymm1,%ymm13 + vpand %ymm15,%ymm1,%ymm1 + vpsrlq $29,%ymm2,%ymm10 + vpermq $0x93,%ymm12,%ymm12 + vpand %ymm15,%ymm2,%ymm2 + vpsrlq $29,%ymm3,%ymm11 + vpermq $0x93,%ymm13,%ymm13 + vpand %ymm15,%ymm3,%ymm3 + + vpblendd $3,%ymm14,%ymm12,%ymm9 + vpermq $0x93,%ymm10,%ymm10 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpermq $0x93,%ymm11,%ymm11 + vpaddq %ymm9,%ymm0,%ymm0 + vpblendd $3,%ymm13,%ymm10,%ymm13 + vpaddq %ymm12,%ymm1,%ymm1 + vpblendd $3,%ymm10,%ymm11,%ymm10 + vpaddq %ymm13,%ymm2,%ymm2 + vpblendd $3,%ymm11,%ymm14,%ymm11 + vpaddq %ymm10,%ymm3,%ymm3 + vpaddq %ymm11,%ymm4,%ymm4 + + vpsrlq $29,%ymm0,%ymm12 + vpand %ymm15,%ymm0,%ymm0 + vpsrlq $29,%ymm1,%ymm13 + vpand %ymm15,%ymm1,%ymm1 + vpsrlq $29,%ymm2,%ymm10 + vpermq $0x93,%ymm12,%ymm12 + vpand %ymm15,%ymm2,%ymm2 + vpsrlq $29,%ymm3,%ymm11 + vpermq $0x93,%ymm13,%ymm13 + vpand %ymm15,%ymm3,%ymm3 + vpermq $0x93,%ymm10,%ymm10 + + vpblendd $3,%ymm14,%ymm12,%ymm9 + vpermq $0x93,%ymm11,%ymm11 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm9,%ymm0,%ymm0 + vpblendd $3,%ymm13,%ymm10,%ymm13 + vpaddq %ymm12,%ymm1,%ymm1 + vpblendd $3,%ymm10,%ymm11,%ymm10 + vpaddq %ymm13,%ymm2,%ymm2 + vpblendd $3,%ymm11,%ymm14,%ymm11 + vpaddq %ymm10,%ymm3,%ymm3 + vpaddq %ymm11,%ymm4,%ymm4 + + vmovdqu %ymm0,0-128(%rdi) + vmovdqu %ymm1,32-128(%rdi) + vmovdqu %ymm2,64-128(%rdi) + vmovdqu %ymm3,96-128(%rdi) + vpsrlq $29,%ymm4,%ymm12 + vpand %ymm15,%ymm4,%ymm4 + vpsrlq $29,%ymm5,%ymm13 + vpand %ymm15,%ymm5,%ymm5 + vpsrlq $29,%ymm6,%ymm10 + vpermq $0x93,%ymm12,%ymm12 + vpand %ymm15,%ymm6,%ymm6 + vpsrlq $29,%ymm7,%ymm11 + vpermq $0x93,%ymm13,%ymm13 + vpand %ymm15,%ymm7,%ymm7 + vpsrlq $29,%ymm8,%ymm0 + vpermq $0x93,%ymm10,%ymm10 + vpand %ymm15,%ymm8,%ymm8 + vpermq $0x93,%ymm11,%ymm11 + + vpblendd $3,%ymm14,%ymm12,%ymm9 + vpermq $0x93,%ymm0,%ymm0 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm9,%ymm4,%ymm4 + vpblendd $3,%ymm13,%ymm10,%ymm13 + vpaddq %ymm12,%ymm5,%ymm5 + vpblendd $3,%ymm10,%ymm11,%ymm10 + vpaddq %ymm13,%ymm6,%ymm6 + vpblendd $3,%ymm11,%ymm0,%ymm11 + vpaddq %ymm10,%ymm7,%ymm7 + vpaddq %ymm11,%ymm8,%ymm8 + + vpsrlq $29,%ymm4,%ymm12 + vpand %ymm15,%ymm4,%ymm4 + vpsrlq $29,%ymm5,%ymm13 + vpand %ymm15,%ymm5,%ymm5 + vpsrlq $29,%ymm6,%ymm10 + vpermq $0x93,%ymm12,%ymm12 + vpand %ymm15,%ymm6,%ymm6 + vpsrlq $29,%ymm7,%ymm11 + vpermq $0x93,%ymm13,%ymm13 + vpand %ymm15,%ymm7,%ymm7 + vpsrlq $29,%ymm8,%ymm0 + vpermq $0x93,%ymm10,%ymm10 + vpand %ymm15,%ymm8,%ymm8 + vpermq $0x93,%ymm11,%ymm11 + + vpblendd $3,%ymm14,%ymm12,%ymm9 + vpermq $0x93,%ymm0,%ymm0 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm9,%ymm4,%ymm4 + vpblendd $3,%ymm13,%ymm10,%ymm13 + vpaddq %ymm12,%ymm5,%ymm5 + vpblendd $3,%ymm10,%ymm11,%ymm10 + vpaddq %ymm13,%ymm6,%ymm6 + vpblendd $3,%ymm11,%ymm0,%ymm11 + vpaddq %ymm10,%ymm7,%ymm7 + vpaddq %ymm11,%ymm8,%ymm8 + + vmovdqu %ymm4,128-128(%rdi) + vmovdqu %ymm5,160-128(%rdi) + vmovdqu %ymm6,192-128(%rdi) + vmovdqu %ymm7,224-128(%rdi) + vmovdqu %ymm8,256-128(%rdi) + vzeroupper + + movq %rbp,%rax +.cfi_def_cfa_register %rax + movq -48(%rax),%r15 +.cfi_restore %r15 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lmul_1024_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2 +.globl rsaz_1024_red2norm_avx2 +.type rsaz_1024_red2norm_avx2,@function +.align 32 +rsaz_1024_red2norm_avx2: +.cfi_startproc + subq $-128,%rsi + xorq %rax,%rax + movq -128(%rsi),%r8 + movq -120(%rsi),%r9 + movq -112(%rsi),%r10 + shlq $0,%r8 + shlq $29,%r9 + movq %r10,%r11 + shlq $58,%r10 + shrq $6,%r11 + addq %r8,%rax + addq %r9,%rax + addq %r10,%rax + adcq $0,%r11 + movq %rax,0(%rdi) + movq %r11,%rax + movq -104(%rsi),%r8 + movq -96(%rsi),%r9 + shlq $23,%r8 + movq %r9,%r10 + shlq $52,%r9 + shrq $12,%r10 + addq %r8,%rax + addq %r9,%rax + adcq $0,%r10 + movq %rax,8(%rdi) + movq %r10,%rax + movq -88(%rsi),%r11 + movq -80(%rsi),%r8 + shlq $17,%r11 + movq %r8,%r9 + shlq $46,%r8 + shrq $18,%r9 + addq %r11,%rax + addq %r8,%rax + adcq $0,%r9 + movq %rax,16(%rdi) + movq %r9,%rax + movq -72(%rsi),%r10 + movq -64(%rsi),%r11 + shlq $11,%r10 + movq %r11,%r8 + shlq $40,%r11 + shrq $24,%r8 + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,24(%rdi) + movq %r8,%rax + movq -56(%rsi),%r9 + movq -48(%rsi),%r10 + movq -40(%rsi),%r11 + shlq $5,%r9 + shlq $34,%r10 + movq %r11,%r8 + shlq $63,%r11 + shrq $1,%r8 + addq %r9,%rax + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,32(%rdi) + movq %r8,%rax + movq -32(%rsi),%r9 + movq -24(%rsi),%r10 + shlq $28,%r9 + movq %r10,%r11 + shlq $57,%r10 + shrq $7,%r11 + addq %r9,%rax + addq %r10,%rax + adcq $0,%r11 + movq %rax,40(%rdi) + movq %r11,%rax + movq -16(%rsi),%r8 + movq -8(%rsi),%r9 + shlq $22,%r8 + movq %r9,%r10 + shlq $51,%r9 + shrq $13,%r10 + addq %r8,%rax + addq %r9,%rax + adcq $0,%r10 + movq %rax,48(%rdi) + movq %r10,%rax + movq 0(%rsi),%r11 + movq 8(%rsi),%r8 + shlq $16,%r11 + movq %r8,%r9 + shlq $45,%r8 + shrq $19,%r9 + addq %r11,%rax + addq %r8,%rax + adcq $0,%r9 + movq %rax,56(%rdi) + movq %r9,%rax + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + shlq $10,%r10 + movq %r11,%r8 + shlq $39,%r11 + shrq $25,%r8 + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,64(%rdi) + movq %r8,%rax + movq 32(%rsi),%r9 + movq 40(%rsi),%r10 + movq 48(%rsi),%r11 + shlq $4,%r9 + shlq $33,%r10 + movq %r11,%r8 + shlq $62,%r11 + shrq $2,%r8 + addq %r9,%rax + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,72(%rdi) + movq %r8,%rax + movq 56(%rsi),%r9 + movq 64(%rsi),%r10 + shlq $27,%r9 + movq %r10,%r11 + shlq $56,%r10 + shrq $8,%r11 + addq %r9,%rax + addq %r10,%rax + adcq $0,%r11 + movq %rax,80(%rdi) + movq %r11,%rax + movq 72(%rsi),%r8 + movq 80(%rsi),%r9 + shlq $21,%r8 + movq %r9,%r10 + shlq $50,%r9 + shrq $14,%r10 + addq %r8,%rax + addq %r9,%rax + adcq $0,%r10 + movq %rax,88(%rdi) + movq %r10,%rax + movq 88(%rsi),%r11 + movq 96(%rsi),%r8 + shlq $15,%r11 + movq %r8,%r9 + shlq $44,%r8 + shrq $20,%r9 + addq %r11,%rax + addq %r8,%rax + adcq $0,%r9 + movq %rax,96(%rdi) + movq %r9,%rax + movq 104(%rsi),%r10 + movq 112(%rsi),%r11 + shlq $9,%r10 + movq %r11,%r8 + shlq $38,%r11 + shrq $26,%r8 + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,104(%rdi) + movq %r8,%rax + movq 120(%rsi),%r9 + movq 128(%rsi),%r10 + movq 136(%rsi),%r11 + shlq $3,%r9 + shlq $32,%r10 + movq %r11,%r8 + shlq $61,%r11 + shrq $3,%r8 + addq %r9,%rax + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,112(%rdi) + movq %r8,%rax + movq 144(%rsi),%r9 + movq 152(%rsi),%r10 + shlq $26,%r9 + movq %r10,%r11 + shlq $55,%r10 + shrq $9,%r11 + addq %r9,%rax + addq %r10,%rax + adcq $0,%r11 + movq %rax,120(%rdi) + movq %r11,%rax + .byte 0xf3,0xc3 +.cfi_endproc +.size rsaz_1024_red2norm_avx2,.-rsaz_1024_red2norm_avx2 + +.globl rsaz_1024_norm2red_avx2 +.type rsaz_1024_norm2red_avx2,@function +.align 32 +rsaz_1024_norm2red_avx2: +.cfi_startproc + subq $-128,%rdi + movq (%rsi),%r8 + movl $0x1fffffff,%eax + movq 8(%rsi),%r9 + movq %r8,%r11 + shrq $0,%r11 + andq %rax,%r11 + movq %r11,-128(%rdi) + movq %r8,%r10 + shrq $29,%r10 + andq %rax,%r10 + movq %r10,-120(%rdi) + shrdq $58,%r9,%r8 + andq %rax,%r8 + movq %r8,-112(%rdi) + movq 16(%rsi),%r10 + movq %r9,%r8 + shrq $23,%r8 + andq %rax,%r8 + movq %r8,-104(%rdi) + shrdq $52,%r10,%r9 + andq %rax,%r9 + movq %r9,-96(%rdi) + movq 24(%rsi),%r11 + movq %r10,%r9 + shrq $17,%r9 + andq %rax,%r9 + movq %r9,-88(%rdi) + shrdq $46,%r11,%r10 + andq %rax,%r10 + movq %r10,-80(%rdi) + movq 32(%rsi),%r8 + movq %r11,%r10 + shrq $11,%r10 + andq %rax,%r10 + movq %r10,-72(%rdi) + shrdq $40,%r8,%r11 + andq %rax,%r11 + movq %r11,-64(%rdi) + movq 40(%rsi),%r9 + movq %r8,%r11 + shrq $5,%r11 + andq %rax,%r11 + movq %r11,-56(%rdi) + movq %r8,%r10 + shrq $34,%r10 + andq %rax,%r10 + movq %r10,-48(%rdi) + shrdq $63,%r9,%r8 + andq %rax,%r8 + movq %r8,-40(%rdi) + movq 48(%rsi),%r10 + movq %r9,%r8 + shrq $28,%r8 + andq %rax,%r8 + movq %r8,-32(%rdi) + shrdq $57,%r10,%r9 + andq %rax,%r9 + movq %r9,-24(%rdi) + movq 56(%rsi),%r11 + movq %r10,%r9 + shrq $22,%r9 + andq %rax,%r9 + movq %r9,-16(%rdi) + shrdq $51,%r11,%r10 + andq %rax,%r10 + movq %r10,-8(%rdi) + movq 64(%rsi),%r8 + movq %r11,%r10 + shrq $16,%r10 + andq %rax,%r10 + movq %r10,0(%rdi) + shrdq $45,%r8,%r11 + andq %rax,%r11 + movq %r11,8(%rdi) + movq 72(%rsi),%r9 + movq %r8,%r11 + shrq $10,%r11 + andq %rax,%r11 + movq %r11,16(%rdi) + shrdq $39,%r9,%r8 + andq %rax,%r8 + movq %r8,24(%rdi) + movq 80(%rsi),%r10 + movq %r9,%r8 + shrq $4,%r8 + andq %rax,%r8 + movq %r8,32(%rdi) + movq %r9,%r11 + shrq $33,%r11 + andq %rax,%r11 + movq %r11,40(%rdi) + shrdq $62,%r10,%r9 + andq %rax,%r9 + movq %r9,48(%rdi) + movq 88(%rsi),%r11 + movq %r10,%r9 + shrq $27,%r9 + andq %rax,%r9 + movq %r9,56(%rdi) + shrdq $56,%r11,%r10 + andq %rax,%r10 + movq %r10,64(%rdi) + movq 96(%rsi),%r8 + movq %r11,%r10 + shrq $21,%r10 + andq %rax,%r10 + movq %r10,72(%rdi) + shrdq $50,%r8,%r11 + andq %rax,%r11 + movq %r11,80(%rdi) + movq 104(%rsi),%r9 + movq %r8,%r11 + shrq $15,%r11 + andq %rax,%r11 + movq %r11,88(%rdi) + shrdq $44,%r9,%r8 + andq %rax,%r8 + movq %r8,96(%rdi) + movq 112(%rsi),%r10 + movq %r9,%r8 + shrq $9,%r8 + andq %rax,%r8 + movq %r8,104(%rdi) + shrdq $38,%r10,%r9 + andq %rax,%r9 + movq %r9,112(%rdi) + movq 120(%rsi),%r11 + movq %r10,%r9 + shrq $3,%r9 + andq %rax,%r9 + movq %r9,120(%rdi) + movq %r10,%r8 + shrq $32,%r8 + andq %rax,%r8 + movq %r8,128(%rdi) + shrdq $61,%r11,%r10 + andq %rax,%r10 + movq %r10,136(%rdi) + xorq %r8,%r8 + movq %r11,%r10 + shrq $26,%r10 + andq %rax,%r10 + movq %r10,144(%rdi) + shrdq $55,%r8,%r11 + andq %rax,%r11 + movq %r11,152(%rdi) + movq %r8,160(%rdi) + movq %r8,168(%rdi) + movq %r8,176(%rdi) + movq %r8,184(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc +.size rsaz_1024_norm2red_avx2,.-rsaz_1024_norm2red_avx2 +.globl rsaz_1024_scatter5_avx2 +.type rsaz_1024_scatter5_avx2,@function +.align 32 +rsaz_1024_scatter5_avx2: +.cfi_startproc + vzeroupper + vmovdqu .Lscatter_permd(%rip),%ymm5 + shll $4,%edx + leaq (%rdi,%rdx,1),%rdi + movl $9,%eax + jmp .Loop_scatter_1024 + +.align 32 +.Loop_scatter_1024: + vmovdqu (%rsi),%ymm0 + leaq 32(%rsi),%rsi + vpermd %ymm0,%ymm5,%ymm0 + vmovdqu %xmm0,(%rdi) + leaq 512(%rdi),%rdi + decl %eax + jnz .Loop_scatter_1024 + + vzeroupper + .byte 0xf3,0xc3 +.cfi_endproc +.size rsaz_1024_scatter5_avx2,.-rsaz_1024_scatter5_avx2 + +.globl rsaz_1024_gather5_avx2 +.type rsaz_1024_gather5_avx2,@function +.align 32 +rsaz_1024_gather5_avx2: +.cfi_startproc + vzeroupper + movq %rsp,%r11 +.cfi_def_cfa_register %r11 + leaq -256(%rsp),%rsp + andq $-32,%rsp + leaq .Linc(%rip),%r10 + leaq -128(%rsp),%rax + + vmovd %edx,%xmm4 + vmovdqa (%r10),%ymm0 + vmovdqa 32(%r10),%ymm1 + vmovdqa 64(%r10),%ymm5 + vpbroadcastd %xmm4,%ymm4 + + vpaddd %ymm5,%ymm0,%ymm2 + vpcmpeqd %ymm4,%ymm0,%ymm0 + vpaddd %ymm5,%ymm1,%ymm3 + vpcmpeqd %ymm4,%ymm1,%ymm1 + vmovdqa %ymm0,0+128(%rax) + vpaddd %ymm5,%ymm2,%ymm0 + vpcmpeqd %ymm4,%ymm2,%ymm2 + vmovdqa %ymm1,32+128(%rax) + vpaddd %ymm5,%ymm3,%ymm1 + vpcmpeqd %ymm4,%ymm3,%ymm3 + vmovdqa %ymm2,64+128(%rax) + vpaddd %ymm5,%ymm0,%ymm2 + vpcmpeqd %ymm4,%ymm0,%ymm0 + vmovdqa %ymm3,96+128(%rax) + vpaddd %ymm5,%ymm1,%ymm3 + vpcmpeqd %ymm4,%ymm1,%ymm1 + vmovdqa %ymm0,128+128(%rax) + vpaddd %ymm5,%ymm2,%ymm8 + vpcmpeqd %ymm4,%ymm2,%ymm2 + vmovdqa %ymm1,160+128(%rax) + vpaddd %ymm5,%ymm3,%ymm9 + vpcmpeqd %ymm4,%ymm3,%ymm3 + vmovdqa %ymm2,192+128(%rax) + vpaddd %ymm5,%ymm8,%ymm10 + vpcmpeqd %ymm4,%ymm8,%ymm8 + vmovdqa %ymm3,224+128(%rax) + vpaddd %ymm5,%ymm9,%ymm11 + vpcmpeqd %ymm4,%ymm9,%ymm9 + vpaddd %ymm5,%ymm10,%ymm12 + vpcmpeqd %ymm4,%ymm10,%ymm10 + vpaddd %ymm5,%ymm11,%ymm13 + vpcmpeqd %ymm4,%ymm11,%ymm11 + vpaddd %ymm5,%ymm12,%ymm14 + vpcmpeqd %ymm4,%ymm12,%ymm12 + vpaddd %ymm5,%ymm13,%ymm15 + vpcmpeqd %ymm4,%ymm13,%ymm13 + vpcmpeqd %ymm4,%ymm14,%ymm14 + vpcmpeqd %ymm4,%ymm15,%ymm15 + + vmovdqa -32(%r10),%ymm7 + leaq 128(%rsi),%rsi + movl $9,%edx + +.Loop_gather_1024: + vmovdqa 0-128(%rsi),%ymm0 + vmovdqa 32-128(%rsi),%ymm1 + vmovdqa 64-128(%rsi),%ymm2 + vmovdqa 96-128(%rsi),%ymm3 + vpand 0+128(%rax),%ymm0,%ymm0 + vpand 32+128(%rax),%ymm1,%ymm1 + vpand 64+128(%rax),%ymm2,%ymm2 + vpor %ymm0,%ymm1,%ymm4 + vpand 96+128(%rax),%ymm3,%ymm3 + vmovdqa 128-128(%rsi),%ymm0 + vmovdqa 160-128(%rsi),%ymm1 + vpor %ymm2,%ymm3,%ymm5 + vmovdqa 192-128(%rsi),%ymm2 + vmovdqa 224-128(%rsi),%ymm3 + vpand 128+128(%rax),%ymm0,%ymm0 + vpand 160+128(%rax),%ymm1,%ymm1 + vpand 192+128(%rax),%ymm2,%ymm2 + vpor %ymm0,%ymm4,%ymm4 + vpand 224+128(%rax),%ymm3,%ymm3 + vpand 256-128(%rsi),%ymm8,%ymm0 + vpor %ymm1,%ymm5,%ymm5 + vpand 288-128(%rsi),%ymm9,%ymm1 + vpor %ymm2,%ymm4,%ymm4 + vpand 320-128(%rsi),%ymm10,%ymm2 + vpor %ymm3,%ymm5,%ymm5 + vpand 352-128(%rsi),%ymm11,%ymm3 + vpor %ymm0,%ymm4,%ymm4 + vpand 384-128(%rsi),%ymm12,%ymm0 + vpor %ymm1,%ymm5,%ymm5 + vpand 416-128(%rsi),%ymm13,%ymm1 + vpor %ymm2,%ymm4,%ymm4 + vpand 448-128(%rsi),%ymm14,%ymm2 + vpor %ymm3,%ymm5,%ymm5 + vpand 480-128(%rsi),%ymm15,%ymm3 + leaq 512(%rsi),%rsi + vpor %ymm0,%ymm4,%ymm4 + vpor %ymm1,%ymm5,%ymm5 + vpor %ymm2,%ymm4,%ymm4 + vpor %ymm3,%ymm5,%ymm5 + + vpor %ymm5,%ymm4,%ymm4 + vextracti128 $1,%ymm4,%xmm5 + vpor %xmm4,%xmm5,%xmm5 + vpermd %ymm5,%ymm7,%ymm5 + vmovdqu %ymm5,(%rdi) + leaq 32(%rdi),%rdi + decl %edx + jnz .Loop_gather_1024 + + vpxor %ymm0,%ymm0,%ymm0 + vmovdqu %ymm0,(%rdi) + vzeroupper + leaq (%r11),%rsp +.cfi_def_cfa_register %rsp + .byte 0xf3,0xc3 +.cfi_endproc +.LSEH_end_rsaz_1024_gather5: +.size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2 + +.globl rsaz_avx2_eligible +.type rsaz_avx2_eligible,@function +.align 32 +rsaz_avx2_eligible: + movl OPENSSL_ia32cap_P+8(%rip),%eax + movl $524544,%ecx + movl $0,%edx + andl %eax,%ecx + cmpl $524544,%ecx + cmovel %edx,%eax + andl $32,%eax + shrl $5,%eax + .byte 0xf3,0xc3 +.size rsaz_avx2_eligible,.-rsaz_avx2_eligible + +.section .rodata +.align 64 +.Land_mask: +.quad 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff +.Lscatter_permd: +.long 0,2,4,6,7,7,7,7 +.Lgather_permd: +.long 0,7,1,7,2,7,3,7 +.Linc: +.long 0,0,0,0, 1,1,1,1 +.long 2,2,2,2, 3,3,3,3 +.long 4,4,4,4, 4,4,4,4 +.previous +.align 64 diff --git a/contrib/openssl-cmake/asm/crypto/bn/rsaz-x86_64.s b/contrib/openssl-cmake/asm/crypto/bn/rsaz-x86_64.s new file mode 100644 index 000000000000..8247a82277a2 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/bn/rsaz-x86_64.s @@ -0,0 +1,2017 @@ +.text + + + +.globl rsaz_512_sqr +.type rsaz_512_sqr,@function +.align 32 +rsaz_512_sqr: +.cfi_startproc + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + subq $128+24,%rsp +.cfi_adjust_cfa_offset 128+24 +.Lsqr_body: +.byte 102,72,15,110,202 + movq (%rsi),%rdx + movq 8(%rsi),%rax + movq %rcx,128(%rsp) + movl $0x80100,%r11d + andl OPENSSL_ia32cap_P+8(%rip),%r11d + cmpl $0x80100,%r11d + je .Loop_sqrx + jmp .Loop_sqr + +.align 32 +.Loop_sqr: + movl %r8d,128+8(%rsp) + + movq %rdx,%rbx + movq %rax,%rbp + mulq %rdx + movq %rax,%r8 + movq 16(%rsi),%rax + movq %rdx,%r9 + + mulq %rbx + addq %rax,%r9 + movq 24(%rsi),%rax + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r10 + movq 32(%rsi),%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r11 + movq 40(%rsi),%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r12 + movq 48(%rsi),%rax + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r13 + movq 56(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + addq %rax,%r14 + movq %rbx,%rax + adcq $0,%rdx + + xorq %rcx,%rcx + addq %r8,%r8 + movq %rdx,%r15 + adcq $0,%rcx + + mulq %rax + addq %r8,%rdx + adcq $0,%rcx + + movq %rax,(%rsp) + movq %rdx,8(%rsp) + + + movq 16(%rsi),%rax + mulq %rbp + addq %rax,%r10 + movq 24(%rsi),%rax + movq %rdx,%rbx + adcq $0,%rbx + + mulq %rbp + addq %rax,%r11 + movq 32(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r11 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %rbp + addq %rax,%r12 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r12 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %rbp + addq %rax,%r13 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r13 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %rbp + addq %rax,%r14 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r14 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %rbp + addq %rax,%r15 + movq %rbp,%rax + adcq $0,%rdx + addq %rbx,%r15 + adcq $0,%rdx + + xorq %rbx,%rbx + addq %r9,%r9 + movq %rdx,%r8 + adcq %r10,%r10 + adcq $0,%rbx + + mulq %rax + + addq %rcx,%rax + movq 16(%rsi),%rbp + addq %rax,%r9 + movq 24(%rsi),%rax + adcq %rdx,%r10 + adcq $0,%rbx + + movq %r9,16(%rsp) + movq %r10,24(%rsp) + + + mulq %rbp + addq %rax,%r12 + movq 32(%rsi),%rax + movq %rdx,%rcx + adcq $0,%rcx + + mulq %rbp + addq %rax,%r13 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %rcx,%r13 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %rbp + addq %rax,%r14 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %rcx,%r14 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %rbp + addq %rax,%r15 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %rcx,%r15 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %rbp + addq %rax,%r8 + movq %rbp,%rax + adcq $0,%rdx + addq %rcx,%r8 + adcq $0,%rdx + + xorq %rcx,%rcx + addq %r11,%r11 + movq %rdx,%r9 + adcq %r12,%r12 + adcq $0,%rcx + + mulq %rax + + addq %rbx,%rax + movq 24(%rsi),%r10 + addq %rax,%r11 + movq 32(%rsi),%rax + adcq %rdx,%r12 + adcq $0,%rcx + + movq %r11,32(%rsp) + movq %r12,40(%rsp) + + + movq %rax,%r11 + mulq %r10 + addq %rax,%r14 + movq 40(%rsi),%rax + movq %rdx,%rbx + adcq $0,%rbx + + movq %rax,%r12 + mulq %r10 + addq %rax,%r15 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r15 + movq %rdx,%rbx + adcq $0,%rbx + + movq %rax,%rbp + mulq %r10 + addq %rax,%r8 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r8 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r10 + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + addq %rbx,%r9 + adcq $0,%rdx + + xorq %rbx,%rbx + addq %r13,%r13 + movq %rdx,%r10 + adcq %r14,%r14 + adcq $0,%rbx + + mulq %rax + + addq %rcx,%rax + addq %rax,%r13 + movq %r12,%rax + adcq %rdx,%r14 + adcq $0,%rbx + + movq %r13,48(%rsp) + movq %r14,56(%rsp) + + + mulq %r11 + addq %rax,%r8 + movq %rbp,%rax + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r11 + addq %rax,%r9 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %rcx,%r9 + movq %rdx,%rcx + adcq $0,%rcx + + movq %rax,%r14 + mulq %r11 + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + addq %rcx,%r10 + adcq $0,%rdx + + xorq %rcx,%rcx + addq %r15,%r15 + movq %rdx,%r11 + adcq %r8,%r8 + adcq $0,%rcx + + mulq %rax + + addq %rbx,%rax + addq %rax,%r15 + movq %rbp,%rax + adcq %rdx,%r8 + adcq $0,%rcx + + movq %r15,64(%rsp) + movq %r8,72(%rsp) + + + mulq %r12 + addq %rax,%r10 + movq %r14,%rax + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r12 + addq %rax,%r11 + movq %r12,%rax + adcq $0,%rdx + addq %rbx,%r11 + adcq $0,%rdx + + xorq %rbx,%rbx + addq %r9,%r9 + movq %rdx,%r12 + adcq %r10,%r10 + adcq $0,%rbx + + mulq %rax + + addq %rcx,%rax + addq %rax,%r9 + movq %r14,%rax + adcq %rdx,%r10 + adcq $0,%rbx + + movq %r9,80(%rsp) + movq %r10,88(%rsp) + + + mulq %rbp + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + + xorq %rcx,%rcx + addq %r11,%r11 + movq %rdx,%r13 + adcq %r12,%r12 + adcq $0,%rcx + + mulq %rax + + addq %rbx,%rax + addq %rax,%r11 + movq %r14,%rax + adcq %rdx,%r12 + adcq $0,%rcx + + movq %r11,96(%rsp) + movq %r12,104(%rsp) + + + xorq %rbx,%rbx + addq %r13,%r13 + adcq $0,%rbx + + mulq %rax + + addq %rcx,%rax + addq %r13,%rax + adcq %rbx,%rdx + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 +.byte 102,72,15,126,205 + + movq %rax,112(%rsp) + movq %rdx,120(%rsp) + + call __rsaz_512_reduce + + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + movq %r8,%rdx + movq %r9,%rax + movl 128+8(%rsp),%r8d + movq %rdi,%rsi + + decl %r8d + jnz .Loop_sqr + jmp .Lsqr_tail + +.align 32 +.Loop_sqrx: + movl %r8d,128+8(%rsp) +.byte 102,72,15,110,199 + + mulxq %rax,%r8,%r9 + movq %rax,%rbx + + mulxq 16(%rsi),%rcx,%r10 + xorq %rbp,%rbp + + mulxq 24(%rsi),%rax,%r11 + adcxq %rcx,%r9 + +.byte 0xc4,0x62,0xf3,0xf6,0xa6,0x20,0x00,0x00,0x00 + adcxq %rax,%r10 + +.byte 0xc4,0x62,0xfb,0xf6,0xae,0x28,0x00,0x00,0x00 + adcxq %rcx,%r11 + + mulxq 48(%rsi),%rcx,%r14 + adcxq %rax,%r12 + adcxq %rcx,%r13 + + mulxq 56(%rsi),%rax,%r15 + adcxq %rax,%r14 + adcxq %rbp,%r15 + + mulxq %rdx,%rax,%rdi + movq %rbx,%rdx + xorq %rcx,%rcx + adoxq %r8,%r8 + adcxq %rdi,%r8 + adoxq %rbp,%rcx + adcxq %rbp,%rcx + + movq %rax,(%rsp) + movq %r8,8(%rsp) + + +.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x10,0x00,0x00,0x00 + adoxq %rax,%r10 + adcxq %rbx,%r11 + + mulxq 24(%rsi),%rdi,%r8 + adoxq %rdi,%r11 +.byte 0x66 + adcxq %r8,%r12 + + mulxq 32(%rsi),%rax,%rbx + adoxq %rax,%r12 + adcxq %rbx,%r13 + + mulxq 40(%rsi),%rdi,%r8 + adoxq %rdi,%r13 + adcxq %r8,%r14 + +.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 + adoxq %rax,%r14 + adcxq %rbx,%r15 + +.byte 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00 + adoxq %rdi,%r15 + adcxq %rbp,%r8 + mulxq %rdx,%rax,%rdi + adoxq %rbp,%r8 +.byte 0x48,0x8b,0x96,0x10,0x00,0x00,0x00 + + xorq %rbx,%rbx + adoxq %r9,%r9 + + adcxq %rcx,%rax + adoxq %r10,%r10 + adcxq %rax,%r9 + adoxq %rbp,%rbx + adcxq %rdi,%r10 + adcxq %rbp,%rbx + + movq %r9,16(%rsp) +.byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00 + + + mulxq 24(%rsi),%rdi,%r9 + adoxq %rdi,%r12 + adcxq %r9,%r13 + + mulxq 32(%rsi),%rax,%rcx + adoxq %rax,%r13 + adcxq %rcx,%r14 + +.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x28,0x00,0x00,0x00 + adoxq %rdi,%r14 + adcxq %r9,%r15 + +.byte 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00 + adoxq %rax,%r15 + adcxq %rcx,%r8 + + mulxq 56(%rsi),%rdi,%r9 + adoxq %rdi,%r8 + adcxq %rbp,%r9 + mulxq %rdx,%rax,%rdi + adoxq %rbp,%r9 + movq 24(%rsi),%rdx + + xorq %rcx,%rcx + adoxq %r11,%r11 + + adcxq %rbx,%rax + adoxq %r12,%r12 + adcxq %rax,%r11 + adoxq %rbp,%rcx + adcxq %rdi,%r12 + adcxq %rbp,%rcx + + movq %r11,32(%rsp) + movq %r12,40(%rsp) + + + mulxq 32(%rsi),%rax,%rbx + adoxq %rax,%r14 + adcxq %rbx,%r15 + + mulxq 40(%rsi),%rdi,%r10 + adoxq %rdi,%r15 + adcxq %r10,%r8 + + mulxq 48(%rsi),%rax,%rbx + adoxq %rax,%r8 + adcxq %rbx,%r9 + + mulxq 56(%rsi),%rdi,%r10 + adoxq %rdi,%r9 + adcxq %rbp,%r10 + mulxq %rdx,%rax,%rdi + adoxq %rbp,%r10 + movq 32(%rsi),%rdx + + xorq %rbx,%rbx + adoxq %r13,%r13 + + adcxq %rcx,%rax + adoxq %r14,%r14 + adcxq %rax,%r13 + adoxq %rbp,%rbx + adcxq %rdi,%r14 + adcxq %rbp,%rbx + + movq %r13,48(%rsp) + movq %r14,56(%rsp) + + + mulxq 40(%rsi),%rdi,%r11 + adoxq %rdi,%r8 + adcxq %r11,%r9 + + mulxq 48(%rsi),%rax,%rcx + adoxq %rax,%r9 + adcxq %rcx,%r10 + + mulxq 56(%rsi),%rdi,%r11 + adoxq %rdi,%r10 + adcxq %rbp,%r11 + mulxq %rdx,%rax,%rdi + movq 40(%rsi),%rdx + adoxq %rbp,%r11 + + xorq %rcx,%rcx + adoxq %r15,%r15 + + adcxq %rbx,%rax + adoxq %r8,%r8 + adcxq %rax,%r15 + adoxq %rbp,%rcx + adcxq %rdi,%r8 + adcxq %rbp,%rcx + + movq %r15,64(%rsp) + movq %r8,72(%rsp) + + +.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 + adoxq %rax,%r10 + adcxq %rbx,%r11 + +.byte 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00 + adoxq %rdi,%r11 + adcxq %rbp,%r12 + mulxq %rdx,%rax,%rdi + adoxq %rbp,%r12 + movq 48(%rsi),%rdx + + xorq %rbx,%rbx + adoxq %r9,%r9 + + adcxq %rcx,%rax + adoxq %r10,%r10 + adcxq %rax,%r9 + adcxq %rdi,%r10 + adoxq %rbp,%rbx + adcxq %rbp,%rbx + + movq %r9,80(%rsp) + movq %r10,88(%rsp) + + +.byte 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00 + adoxq %rax,%r12 + adoxq %rbp,%r13 + + mulxq %rdx,%rax,%rdi + xorq %rcx,%rcx + movq 56(%rsi),%rdx + adoxq %r11,%r11 + + adcxq %rbx,%rax + adoxq %r12,%r12 + adcxq %rax,%r11 + adoxq %rbp,%rcx + adcxq %rdi,%r12 + adcxq %rbp,%rcx + +.byte 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00 +.byte 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00 + + + mulxq %rdx,%rax,%rdx + xorq %rbx,%rbx + adoxq %r13,%r13 + + adcxq %rcx,%rax + adoxq %rbp,%rbx + adcxq %r13,%rax + adcxq %rdx,%rbx + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq 128(%rsp),%rdx + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + movq %rax,112(%rsp) + movq %rbx,120(%rsp) + + call __rsaz_512_reducex + + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + movq %r8,%rdx + movq %r9,%rax + movl 128+8(%rsp),%r8d + movq %rdi,%rsi + + decl %r8d + jnz .Loop_sqrx + +.Lsqr_tail: + + leaq 128+24+48(%rsp),%rax +.cfi_def_cfa %rax,8 + movq -48(%rax),%r15 +.cfi_restore %r15 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lsqr_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size rsaz_512_sqr,.-rsaz_512_sqr +.globl rsaz_512_mul +.type rsaz_512_mul,@function +.align 32 +rsaz_512_mul: +.cfi_startproc + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + subq $128+24,%rsp +.cfi_adjust_cfa_offset 128+24 +.Lmul_body: +.byte 102,72,15,110,199 +.byte 102,72,15,110,201 + movq %r8,128(%rsp) + movl $0x80100,%r11d + andl OPENSSL_ia32cap_P+8(%rip),%r11d + cmpl $0x80100,%r11d + je .Lmulx + movq (%rdx),%rbx + movq %rdx,%rbp + call __rsaz_512_mul + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + jmp .Lmul_tail + +.align 32 +.Lmulx: + movq %rdx,%rbp + movq (%rdx),%rdx + call __rsaz_512_mulx + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq 128(%rsp),%rdx + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reducex +.Lmul_tail: + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + leaq 128+24+48(%rsp),%rax +.cfi_def_cfa %rax,8 + movq -48(%rax),%r15 +.cfi_restore %r15 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lmul_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size rsaz_512_mul,.-rsaz_512_mul +.globl rsaz_512_mul_gather4 +.type rsaz_512_mul_gather4,@function +.align 32 +rsaz_512_mul_gather4: +.cfi_startproc + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + subq $152,%rsp +.cfi_adjust_cfa_offset 152 +.Lmul_gather4_body: + movd %r9d,%xmm8 + movdqa .Linc+16(%rip),%xmm1 + movdqa .Linc(%rip),%xmm0 + + pshufd $0,%xmm8,%xmm8 + movdqa %xmm1,%xmm7 + movdqa %xmm1,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm8,%xmm0 + movdqa %xmm7,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm8,%xmm1 + movdqa %xmm7,%xmm4 + paddd %xmm2,%xmm3 + pcmpeqd %xmm8,%xmm2 + movdqa %xmm7,%xmm5 + paddd %xmm3,%xmm4 + pcmpeqd %xmm8,%xmm3 + movdqa %xmm7,%xmm6 + paddd %xmm4,%xmm5 + pcmpeqd %xmm8,%xmm4 + paddd %xmm5,%xmm6 + pcmpeqd %xmm8,%xmm5 + paddd %xmm6,%xmm7 + pcmpeqd %xmm8,%xmm6 + pcmpeqd %xmm8,%xmm7 + + movdqa 0(%rdx),%xmm8 + movdqa 16(%rdx),%xmm9 + movdqa 32(%rdx),%xmm10 + movdqa 48(%rdx),%xmm11 + pand %xmm0,%xmm8 + movdqa 64(%rdx),%xmm12 + pand %xmm1,%xmm9 + movdqa 80(%rdx),%xmm13 + pand %xmm2,%xmm10 + movdqa 96(%rdx),%xmm14 + pand %xmm3,%xmm11 + movdqa 112(%rdx),%xmm15 + leaq 128(%rdx),%rbp + pand %xmm4,%xmm12 + pand %xmm5,%xmm13 + pand %xmm6,%xmm14 + pand %xmm7,%xmm15 + por %xmm10,%xmm8 + por %xmm11,%xmm9 + por %xmm12,%xmm8 + por %xmm13,%xmm9 + por %xmm14,%xmm8 + por %xmm15,%xmm9 + + por %xmm9,%xmm8 + pshufd $0x4e,%xmm8,%xmm9 + por %xmm9,%xmm8 + movl $0x80100,%r11d + andl OPENSSL_ia32cap_P+8(%rip),%r11d + cmpl $0x80100,%r11d + je .Lmulx_gather +.byte 102,76,15,126,195 + + movq %r8,128(%rsp) + movq %rdi,128+8(%rsp) + movq %rcx,128+16(%rsp) + + movq (%rsi),%rax + movq 8(%rsi),%rcx + mulq %rbx + movq %rax,(%rsp) + movq %rcx,%rax + movq %rdx,%r8 + + mulq %rbx + addq %rax,%r8 + movq 16(%rsi),%rax + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r9 + movq 24(%rsi),%rax + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r10 + movq 32(%rsi),%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r11 + movq 40(%rsi),%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r12 + movq 48(%rsi),%rax + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r13 + movq 56(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + addq %rax,%r14 + movq (%rsi),%rax + movq %rdx,%r15 + adcq $0,%r15 + + leaq 8(%rsp),%rdi + movl $7,%ecx + jmp .Loop_mul_gather + +.align 32 +.Loop_mul_gather: + movdqa 0(%rbp),%xmm8 + movdqa 16(%rbp),%xmm9 + movdqa 32(%rbp),%xmm10 + movdqa 48(%rbp),%xmm11 + pand %xmm0,%xmm8 + movdqa 64(%rbp),%xmm12 + pand %xmm1,%xmm9 + movdqa 80(%rbp),%xmm13 + pand %xmm2,%xmm10 + movdqa 96(%rbp),%xmm14 + pand %xmm3,%xmm11 + movdqa 112(%rbp),%xmm15 + leaq 128(%rbp),%rbp + pand %xmm4,%xmm12 + pand %xmm5,%xmm13 + pand %xmm6,%xmm14 + pand %xmm7,%xmm15 + por %xmm10,%xmm8 + por %xmm11,%xmm9 + por %xmm12,%xmm8 + por %xmm13,%xmm9 + por %xmm14,%xmm8 + por %xmm15,%xmm9 + + por %xmm9,%xmm8 + pshufd $0x4e,%xmm8,%xmm9 + por %xmm9,%xmm8 +.byte 102,76,15,126,195 + + mulq %rbx + addq %rax,%r8 + movq 8(%rsi),%rax + movq %r8,(%rdi) + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 24(%rsi),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 32(%rsi),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + addq %rax,%r15 + movq (%rsi),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + leaq 8(%rdi),%rdi + + decl %ecx + jnz .Loop_mul_gather + + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + movq 128+8(%rsp),%rdi + movq 128+16(%rsp),%rbp + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + jmp .Lmul_gather_tail + +.align 32 +.Lmulx_gather: +.byte 102,76,15,126,194 + + movq %r8,128(%rsp) + movq %rdi,128+8(%rsp) + movq %rcx,128+16(%rsp) + + mulxq (%rsi),%rbx,%r8 + movq %rbx,(%rsp) + xorl %edi,%edi + + mulxq 8(%rsi),%rax,%r9 + + mulxq 16(%rsi),%rbx,%r10 + adcxq %rax,%r8 + + mulxq 24(%rsi),%rax,%r11 + adcxq %rbx,%r9 + + mulxq 32(%rsi),%rbx,%r12 + adcxq %rax,%r10 + + mulxq 40(%rsi),%rax,%r13 + adcxq %rbx,%r11 + + mulxq 48(%rsi),%rbx,%r14 + adcxq %rax,%r12 + + mulxq 56(%rsi),%rax,%r15 + adcxq %rbx,%r13 + adcxq %rax,%r14 +.byte 0x67 + movq %r8,%rbx + adcxq %rdi,%r15 + + movq $-7,%rcx + jmp .Loop_mulx_gather + +.align 32 +.Loop_mulx_gather: + movdqa 0(%rbp),%xmm8 + movdqa 16(%rbp),%xmm9 + movdqa 32(%rbp),%xmm10 + movdqa 48(%rbp),%xmm11 + pand %xmm0,%xmm8 + movdqa 64(%rbp),%xmm12 + pand %xmm1,%xmm9 + movdqa 80(%rbp),%xmm13 + pand %xmm2,%xmm10 + movdqa 96(%rbp),%xmm14 + pand %xmm3,%xmm11 + movdqa 112(%rbp),%xmm15 + leaq 128(%rbp),%rbp + pand %xmm4,%xmm12 + pand %xmm5,%xmm13 + pand %xmm6,%xmm14 + pand %xmm7,%xmm15 + por %xmm10,%xmm8 + por %xmm11,%xmm9 + por %xmm12,%xmm8 + por %xmm13,%xmm9 + por %xmm14,%xmm8 + por %xmm15,%xmm9 + + por %xmm9,%xmm8 + pshufd $0x4e,%xmm8,%xmm9 + por %xmm9,%xmm8 +.byte 102,76,15,126,194 + +.byte 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00 + adcxq %rax,%rbx + adoxq %r9,%r8 + + mulxq 8(%rsi),%rax,%r9 + adcxq %rax,%r8 + adoxq %r10,%r9 + + mulxq 16(%rsi),%rax,%r10 + adcxq %rax,%r9 + adoxq %r11,%r10 + +.byte 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00 + adcxq %rax,%r10 + adoxq %r12,%r11 + + mulxq 32(%rsi),%rax,%r12 + adcxq %rax,%r11 + adoxq %r13,%r12 + + mulxq 40(%rsi),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + +.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00 + adcxq %rax,%r13 +.byte 0x67 + adoxq %r15,%r14 + + mulxq 56(%rsi),%rax,%r15 + movq %rbx,64(%rsp,%rcx,8) + adcxq %rax,%r14 + adoxq %rdi,%r15 + movq %r8,%rbx + adcxq %rdi,%r15 + + incq %rcx + jnz .Loop_mulx_gather + + movq %r8,64(%rsp) + movq %r9,64+8(%rsp) + movq %r10,64+16(%rsp) + movq %r11,64+24(%rsp) + movq %r12,64+32(%rsp) + movq %r13,64+40(%rsp) + movq %r14,64+48(%rsp) + movq %r15,64+56(%rsp) + + movq 128(%rsp),%rdx + movq 128+8(%rsp),%rdi + movq 128+16(%rsp),%rbp + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reducex + +.Lmul_gather_tail: + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + leaq 128+24+48(%rsp),%rax +.cfi_def_cfa %rax,8 + movq -48(%rax),%r15 +.cfi_restore %r15 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lmul_gather4_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4 +.globl rsaz_512_mul_scatter4 +.type rsaz_512_mul_scatter4,@function +.align 32 +rsaz_512_mul_scatter4: +.cfi_startproc + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + movl %r9d,%r9d + subq $128+24,%rsp +.cfi_adjust_cfa_offset 128+24 +.Lmul_scatter4_body: + leaq (%r8,%r9,8),%r8 +.byte 102,72,15,110,199 +.byte 102,72,15,110,202 +.byte 102,73,15,110,208 + movq %rcx,128(%rsp) + + movq %rdi,%rbp + movl $0x80100,%r11d + andl OPENSSL_ia32cap_P+8(%rip),%r11d + cmpl $0x80100,%r11d + je .Lmulx_scatter + movq (%rdi),%rbx + call __rsaz_512_mul + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + jmp .Lmul_scatter_tail + +.align 32 +.Lmulx_scatter: + movq (%rdi),%rdx + call __rsaz_512_mulx + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq 128(%rsp),%rdx + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reducex + +.Lmul_scatter_tail: + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 +.byte 102,72,15,126,214 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + movq %r8,0(%rsi) + movq %r9,128(%rsi) + movq %r10,256(%rsi) + movq %r11,384(%rsi) + movq %r12,512(%rsi) + movq %r13,640(%rsi) + movq %r14,768(%rsi) + movq %r15,896(%rsi) + + leaq 128+24+48(%rsp),%rax +.cfi_def_cfa %rax,8 + movq -48(%rax),%r15 +.cfi_restore %r15 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lmul_scatter4_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4 +.globl rsaz_512_mul_by_one +.type rsaz_512_mul_by_one,@function +.align 32 +rsaz_512_mul_by_one: +.cfi_startproc + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + subq $128+24,%rsp +.cfi_adjust_cfa_offset 128+24 +.Lmul_by_one_body: + movl OPENSSL_ia32cap_P+8(%rip),%eax + movq %rdx,%rbp + movq %rcx,128(%rsp) + + movq (%rsi),%r8 + pxor %xmm0,%xmm0 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + movq 56(%rsi),%r15 + + movdqa %xmm0,(%rsp) + movdqa %xmm0,16(%rsp) + movdqa %xmm0,32(%rsp) + movdqa %xmm0,48(%rsp) + movdqa %xmm0,64(%rsp) + movdqa %xmm0,80(%rsp) + movdqa %xmm0,96(%rsp) + andl $0x80100,%eax + cmpl $0x80100,%eax + je .Lby_one_callx + call __rsaz_512_reduce + jmp .Lby_one_tail +.align 32 +.Lby_one_callx: + movq 128(%rsp),%rdx + call __rsaz_512_reducex +.Lby_one_tail: + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + leaq 128+24+48(%rsp),%rax +.cfi_def_cfa %rax,8 + movq -48(%rax),%r15 +.cfi_restore %r15 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lmul_by_one_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one +.type __rsaz_512_reduce,@function +.align 32 +__rsaz_512_reduce: +.cfi_startproc + movq %r8,%rbx + imulq 128+8(%rsp),%rbx + movq 0(%rbp),%rax + movl $8,%ecx + jmp .Lreduction_loop + +.align 32 +.Lreduction_loop: + mulq %rbx + movq 8(%rbp),%rax + negq %r8 + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 16(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 24(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 32(%rbp),%rax + adcq $0,%rdx + addq %r11,%r10 + movq 128+8(%rsp),%rsi + + + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbx + addq %rax,%r12 + movq 40(%rbp),%rax + adcq $0,%rdx + imulq %r8,%rsi + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq %rsi,%rbx + addq %rax,%r15 + movq 0(%rbp),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jne .Lreduction_loop + + .byte 0xf3,0xc3 +.cfi_endproc +.size __rsaz_512_reduce,.-__rsaz_512_reduce +.type __rsaz_512_reducex,@function +.align 32 +__rsaz_512_reducex: +.cfi_startproc + + imulq %r8,%rdx + xorq %rsi,%rsi + movl $8,%ecx + jmp .Lreduction_loopx + +.align 32 +.Lreduction_loopx: + movq %r8,%rbx + mulxq 0(%rbp),%rax,%r8 + adcxq %rbx,%rax + adoxq %r9,%r8 + + mulxq 8(%rbp),%rax,%r9 + adcxq %rax,%r8 + adoxq %r10,%r9 + + mulxq 16(%rbp),%rbx,%r10 + adcxq %rbx,%r9 + adoxq %r11,%r10 + + mulxq 24(%rbp),%rbx,%r11 + adcxq %rbx,%r10 + adoxq %r12,%r11 + +.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 + movq %rdx,%rax + movq %r8,%rdx + adcxq %rbx,%r11 + adoxq %r13,%r12 + + mulxq 128+8(%rsp),%rbx,%rdx + movq %rax,%rdx + + mulxq 40(%rbp),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + +.byte 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00 + adcxq %rax,%r13 + adoxq %r15,%r14 + + mulxq 56(%rbp),%rax,%r15 + movq %rbx,%rdx + adcxq %rax,%r14 + adoxq %rsi,%r15 + adcxq %rsi,%r15 + + decl %ecx + jne .Lreduction_loopx + + .byte 0xf3,0xc3 +.cfi_endproc +.size __rsaz_512_reducex,.-__rsaz_512_reducex +.type __rsaz_512_subtract,@function +.align 32 +__rsaz_512_subtract: +.cfi_startproc + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + movq 0(%rbp),%r8 + movq 8(%rbp),%r9 + negq %r8 + notq %r9 + andq %rcx,%r8 + movq 16(%rbp),%r10 + andq %rcx,%r9 + notq %r10 + movq 24(%rbp),%r11 + andq %rcx,%r10 + notq %r11 + movq 32(%rbp),%r12 + andq %rcx,%r11 + notq %r12 + movq 40(%rbp),%r13 + andq %rcx,%r12 + notq %r13 + movq 48(%rbp),%r14 + andq %rcx,%r13 + notq %r14 + movq 56(%rbp),%r15 + andq %rcx,%r14 + notq %r15 + andq %rcx,%r15 + + addq (%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __rsaz_512_subtract,.-__rsaz_512_subtract +.type __rsaz_512_mul,@function +.align 32 +__rsaz_512_mul: +.cfi_startproc + leaq 8(%rsp),%rdi + + movq (%rsi),%rax + mulq %rbx + movq %rax,(%rdi) + movq 8(%rsi),%rax + movq %rdx,%r8 + + mulq %rbx + addq %rax,%r8 + movq 16(%rsi),%rax + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r9 + movq 24(%rsi),%rax + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r10 + movq 32(%rsi),%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r11 + movq 40(%rsi),%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r12 + movq 48(%rsi),%rax + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r13 + movq 56(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + addq %rax,%r14 + movq (%rsi),%rax + movq %rdx,%r15 + adcq $0,%r15 + + leaq 8(%rbp),%rbp + leaq 8(%rdi),%rdi + + movl $7,%ecx + jmp .Loop_mul + +.align 32 +.Loop_mul: + movq (%rbp),%rbx + mulq %rbx + addq %rax,%r8 + movq 8(%rsi),%rax + movq %r8,(%rdi) + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 24(%rsi),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 32(%rsi),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + leaq 8(%rbp),%rbp + adcq $0,%r14 + + mulq %rbx + addq %rax,%r15 + movq (%rsi),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + leaq 8(%rdi),%rdi + + decl %ecx + jnz .Loop_mul + + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __rsaz_512_mul,.-__rsaz_512_mul +.type __rsaz_512_mulx,@function +.align 32 +__rsaz_512_mulx: +.cfi_startproc + mulxq (%rsi),%rbx,%r8 + movq $-6,%rcx + + mulxq 8(%rsi),%rax,%r9 + movq %rbx,8(%rsp) + + mulxq 16(%rsi),%rbx,%r10 + adcq %rax,%r8 + + mulxq 24(%rsi),%rax,%r11 + adcq %rbx,%r9 + + mulxq 32(%rsi),%rbx,%r12 + adcq %rax,%r10 + + mulxq 40(%rsi),%rax,%r13 + adcq %rbx,%r11 + + mulxq 48(%rsi),%rbx,%r14 + adcq %rax,%r12 + + mulxq 56(%rsi),%rax,%r15 + movq 8(%rbp),%rdx + adcq %rbx,%r13 + adcq %rax,%r14 + adcq $0,%r15 + + xorq %rdi,%rdi + jmp .Loop_mulx + +.align 32 +.Loop_mulx: + movq %r8,%rbx + mulxq (%rsi),%rax,%r8 + adcxq %rax,%rbx + adoxq %r9,%r8 + + mulxq 8(%rsi),%rax,%r9 + adcxq %rax,%r8 + adoxq %r10,%r9 + + mulxq 16(%rsi),%rax,%r10 + adcxq %rax,%r9 + adoxq %r11,%r10 + + mulxq 24(%rsi),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + +.byte 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00 + adcxq %rax,%r11 + adoxq %r13,%r12 + + mulxq 40(%rsi),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + + mulxq 48(%rsi),%rax,%r14 + adcxq %rax,%r13 + adoxq %r15,%r14 + + mulxq 56(%rsi),%rax,%r15 + movq 64(%rbp,%rcx,8),%rdx + movq %rbx,8+64-8(%rsp,%rcx,8) + adcxq %rax,%r14 + adoxq %rdi,%r15 + adcxq %rdi,%r15 + + incq %rcx + jnz .Loop_mulx + + movq %r8,%rbx + mulxq (%rsi),%rax,%r8 + adcxq %rax,%rbx + adoxq %r9,%r8 + +.byte 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00 + adcxq %rax,%r8 + adoxq %r10,%r9 + +.byte 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00 + adcxq %rax,%r9 + adoxq %r11,%r10 + + mulxq 24(%rsi),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + + mulxq 32(%rsi),%rax,%r12 + adcxq %rax,%r11 + adoxq %r13,%r12 + + mulxq 40(%rsi),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + +.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00 + adcxq %rax,%r13 + adoxq %r15,%r14 + +.byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00 + adcxq %rax,%r14 + adoxq %rdi,%r15 + adcxq %rdi,%r15 + + movq %rbx,8+64-8(%rsp) + movq %r8,8+64(%rsp) + movq %r9,8+64+8(%rsp) + movq %r10,8+64+16(%rsp) + movq %r11,8+64+24(%rsp) + movq %r12,8+64+32(%rsp) + movq %r13,8+64+40(%rsp) + movq %r14,8+64+48(%rsp) + movq %r15,8+64+56(%rsp) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __rsaz_512_mulx,.-__rsaz_512_mulx +.globl rsaz_512_scatter4 +.type rsaz_512_scatter4,@function +.align 16 +rsaz_512_scatter4: +.cfi_startproc + leaq (%rdi,%rdx,8),%rdi + movl $8,%r9d + jmp .Loop_scatter +.align 16 +.Loop_scatter: + movq (%rsi),%rax + leaq 8(%rsi),%rsi + movq %rax,(%rdi) + leaq 128(%rdi),%rdi + decl %r9d + jnz .Loop_scatter + .byte 0xf3,0xc3 +.cfi_endproc +.size rsaz_512_scatter4,.-rsaz_512_scatter4 + +.globl rsaz_512_gather4 +.type rsaz_512_gather4,@function +.align 16 +rsaz_512_gather4: +.cfi_startproc + movd %edx,%xmm8 + movdqa .Linc+16(%rip),%xmm1 + movdqa .Linc(%rip),%xmm0 + + pshufd $0,%xmm8,%xmm8 + movdqa %xmm1,%xmm7 + movdqa %xmm1,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm8,%xmm0 + movdqa %xmm7,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm8,%xmm1 + movdqa %xmm7,%xmm4 + paddd %xmm2,%xmm3 + pcmpeqd %xmm8,%xmm2 + movdqa %xmm7,%xmm5 + paddd %xmm3,%xmm4 + pcmpeqd %xmm8,%xmm3 + movdqa %xmm7,%xmm6 + paddd %xmm4,%xmm5 + pcmpeqd %xmm8,%xmm4 + paddd %xmm5,%xmm6 + pcmpeqd %xmm8,%xmm5 + paddd %xmm6,%xmm7 + pcmpeqd %xmm8,%xmm6 + pcmpeqd %xmm8,%xmm7 + movl $8,%r9d + jmp .Loop_gather +.align 16 +.Loop_gather: + movdqa 0(%rsi),%xmm8 + movdqa 16(%rsi),%xmm9 + movdqa 32(%rsi),%xmm10 + movdqa 48(%rsi),%xmm11 + pand %xmm0,%xmm8 + movdqa 64(%rsi),%xmm12 + pand %xmm1,%xmm9 + movdqa 80(%rsi),%xmm13 + pand %xmm2,%xmm10 + movdqa 96(%rsi),%xmm14 + pand %xmm3,%xmm11 + movdqa 112(%rsi),%xmm15 + leaq 128(%rsi),%rsi + pand %xmm4,%xmm12 + pand %xmm5,%xmm13 + pand %xmm6,%xmm14 + pand %xmm7,%xmm15 + por %xmm10,%xmm8 + por %xmm11,%xmm9 + por %xmm12,%xmm8 + por %xmm13,%xmm9 + por %xmm14,%xmm8 + por %xmm15,%xmm9 + + por %xmm9,%xmm8 + pshufd $0x4e,%xmm8,%xmm9 + por %xmm9,%xmm8 + movq %xmm8,(%rdi) + leaq 8(%rdi),%rdi + decl %r9d + jnz .Loop_gather + .byte 0xf3,0xc3 +.LSEH_end_rsaz_512_gather4: +.cfi_endproc +.size rsaz_512_gather4,.-rsaz_512_gather4 + +.section .rodata +.align 64 +.Linc: +.long 0,0, 1,1 +.long 2,2, 2,2 +.previous diff --git a/contrib/openssl-cmake/asm/crypto/bn/x86_64-gf2m.s b/contrib/openssl-cmake/asm/crypto/bn/x86_64-gf2m.s new file mode 100644 index 000000000000..0846c4441e3a --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/bn/x86_64-gf2m.s @@ -0,0 +1,311 @@ +.text + +.type _mul_1x1,@function +.align 16 +_mul_1x1: +.cfi_startproc + subq $128+8,%rsp +.cfi_adjust_cfa_offset 128+8 + movq $-1,%r9 + leaq (%rax,%rax,1),%rsi + shrq $3,%r9 + leaq (,%rax,4),%rdi + andq %rax,%r9 + leaq (,%rax,8),%r12 + sarq $63,%rax + leaq (%r9,%r9,1),%r10 + sarq $63,%rsi + leaq (,%r9,4),%r11 + andq %rbp,%rax + sarq $63,%rdi + movq %rax,%rdx + shlq $63,%rax + andq %rbp,%rsi + shrq $1,%rdx + movq %rsi,%rcx + shlq $62,%rsi + andq %rbp,%rdi + shrq $2,%rcx + xorq %rsi,%rax + movq %rdi,%rbx + shlq $61,%rdi + xorq %rcx,%rdx + shrq $3,%rbx + xorq %rdi,%rax + xorq %rbx,%rdx + + movq %r9,%r13 + movq $0,0(%rsp) + xorq %r10,%r13 + movq %r9,8(%rsp) + movq %r11,%r14 + movq %r10,16(%rsp) + xorq %r12,%r14 + movq %r13,24(%rsp) + + xorq %r11,%r9 + movq %r11,32(%rsp) + xorq %r11,%r10 + movq %r9,40(%rsp) + xorq %r11,%r13 + movq %r10,48(%rsp) + xorq %r14,%r9 + movq %r13,56(%rsp) + xorq %r14,%r10 + + movq %r12,64(%rsp) + xorq %r14,%r13 + movq %r9,72(%rsp) + xorq %r11,%r9 + movq %r10,80(%rsp) + xorq %r11,%r10 + movq %r13,88(%rsp) + + xorq %r11,%r13 + movq %r14,96(%rsp) + movq %r8,%rsi + movq %r9,104(%rsp) + andq %rbp,%rsi + movq %r10,112(%rsp) + shrq $4,%rbp + movq %r13,120(%rsp) + movq %r8,%rdi + andq %rbp,%rdi + shrq $4,%rbp + + movq (%rsp,%rsi,8),%xmm0 + movq %r8,%rsi + andq %rbp,%rsi + shrq $4,%rbp + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $4,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $60,%rbx + xorq %rcx,%rax + pslldq $1,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $12,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $52,%rbx + xorq %rcx,%rax + pslldq $2,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $20,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $44,%rbx + xorq %rcx,%rax + pslldq $3,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $28,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $36,%rbx + xorq %rcx,%rax + pslldq $4,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $36,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $28,%rbx + xorq %rcx,%rax + pslldq $5,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $44,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $20,%rbx + xorq %rcx,%rax + pslldq $6,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $52,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $12,%rbx + xorq %rcx,%rax + pslldq $7,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %rcx,%rbx + shlq $60,%rcx +.byte 102,72,15,126,198 + shrq $4,%rbx + xorq %rcx,%rax + psrldq $8,%xmm0 + xorq %rbx,%rdx +.byte 102,72,15,126,199 + xorq %rsi,%rax + xorq %rdi,%rdx + + addq $128+8,%rsp +.cfi_adjust_cfa_offset -128-8 + .byte 0xf3,0xc3 +.Lend_mul_1x1: +.cfi_endproc +.size _mul_1x1,.-_mul_1x1 + +.globl bn_GF2m_mul_2x2 +.type bn_GF2m_mul_2x2,@function +.align 16 +bn_GF2m_mul_2x2: +.cfi_startproc + movq %rsp,%rax + movq OPENSSL_ia32cap_P(%rip),%r10 + btq $33,%r10 + jnc .Lvanilla_mul_2x2 + +.byte 102,72,15,110,198 +.byte 102,72,15,110,201 +.byte 102,72,15,110,210 +.byte 102,73,15,110,216 + movdqa %xmm0,%xmm4 + movdqa %xmm1,%xmm5 +.byte 102,15,58,68,193,0 + pxor %xmm2,%xmm4 + pxor %xmm3,%xmm5 +.byte 102,15,58,68,211,0 +.byte 102,15,58,68,229,0 + xorps %xmm0,%xmm4 + xorps %xmm2,%xmm4 + movdqa %xmm4,%xmm5 + pslldq $8,%xmm4 + psrldq $8,%xmm5 + pxor %xmm4,%xmm2 + pxor %xmm5,%xmm0 + movdqu %xmm2,0(%rdi) + movdqu %xmm0,16(%rdi) + .byte 0xf3,0xc3 + +.align 16 +.Lvanilla_mul_2x2: + leaq -136(%rsp),%rsp +.cfi_adjust_cfa_offset 8*17 + movq %r14,80(%rsp) +.cfi_rel_offset %r14,8*10 + movq %r13,88(%rsp) +.cfi_rel_offset %r13,8*11 + movq %r12,96(%rsp) +.cfi_rel_offset %r12,8*12 + movq %rbp,104(%rsp) +.cfi_rel_offset %rbp,8*13 + movq %rbx,112(%rsp) +.cfi_rel_offset %rbx,8*14 +.Lbody_mul_2x2: + movq %rdi,32(%rsp) + movq %rsi,40(%rsp) + movq %rdx,48(%rsp) + movq %rcx,56(%rsp) + movq %r8,64(%rsp) + + movq $0xf,%r8 + movq %rsi,%rax + movq %rcx,%rbp + call _mul_1x1 + movq %rax,16(%rsp) + movq %rdx,24(%rsp) + + movq 48(%rsp),%rax + movq 64(%rsp),%rbp + call _mul_1x1 + movq %rax,0(%rsp) + movq %rdx,8(%rsp) + + movq 40(%rsp),%rax + movq 56(%rsp),%rbp + xorq 48(%rsp),%rax + xorq 64(%rsp),%rbp + call _mul_1x1 + movq 0(%rsp),%rbx + movq 8(%rsp),%rcx + movq 16(%rsp),%rdi + movq 24(%rsp),%rsi + movq 32(%rsp),%rbp + + xorq %rdx,%rax + xorq %rcx,%rdx + xorq %rbx,%rax + movq %rbx,0(%rbp) + xorq %rdi,%rdx + movq %rsi,24(%rbp) + xorq %rsi,%rax + xorq %rsi,%rdx + xorq %rdx,%rax + movq %rdx,16(%rbp) + movq %rax,8(%rbp) + + movq 80(%rsp),%r14 +.cfi_restore %r14 + movq 88(%rsp),%r13 +.cfi_restore %r13 + movq 96(%rsp),%r12 +.cfi_restore %r12 + movq 104(%rsp),%rbp +.cfi_restore %rbp + movq 112(%rsp),%rbx +.cfi_restore %rbx + leaq 136(%rsp),%rsp +.cfi_adjust_cfa_offset -8*17 +.Lepilogue_mul_2x2: + .byte 0xf3,0xc3 +.Lend_mul_2x2: +.cfi_endproc +.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 +.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 16 diff --git a/contrib/openssl-cmake/asm/crypto/bn/x86_64-mont.s b/contrib/openssl-cmake/asm/crypto/bn/x86_64-mont.s new file mode 100644 index 000000000000..414be6aff521 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/bn/x86_64-mont.s @@ -0,0 +1,1239 @@ +.text + + + +.globl bn_mul_mont +.type bn_mul_mont,@function +.align 16 +bn_mul_mont: +.cfi_startproc + movl %r9d,%r9d + movq %rsp,%rax +.cfi_def_cfa_register %rax + testl $3,%r9d + jnz .Lmul_enter + cmpl $8,%r9d + jb .Lmul_enter + movl OPENSSL_ia32cap_P+8(%rip),%r11d + cmpq %rsi,%rdx + jne .Lmul4x_enter + testl $7,%r9d + jz .Lsqr8x_enter + jmp .Lmul4x_enter + +.align 16 +.Lmul_enter: + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + + negq %r9 + movq %rsp,%r11 + leaq -16(%rsp,%r9,8),%r10 + negq %r9 + andq $-1024,%r10 + + + + + + + + + + subq %r10,%r11 + andq $-4096,%r11 + leaq (%r10,%r11,1),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja .Lmul_page_walk + jmp .Lmul_page_walk_done + +.align 16 +.Lmul_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja .Lmul_page_walk +.Lmul_page_walk_done: + + movq %rax,8(%rsp,%r9,8) +.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 +.Lmul_body: + movq %rdx,%r12 + movq (%r8),%r8 + movq (%r12),%rbx + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp .L1st_enter + +.align 16 +.L1st: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r13 + movq %r10,%r11 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +.L1st_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 1(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + cmpq %r9,%r15 + jne .L1st + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + movq %r10,%r11 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + jmp .Louter +.align 16 +.Louter: + movq (%r12,%r14,8),%rbx + xorq %r15,%r15 + movq %r8,%rbp + movq (%rsp),%r10 + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq 8(%rsp),%r10 + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp .Linner_enter + +.align 16 +.Linner: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +.Linner_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + leaq 1(%r15),%r15 + + mulq %rbp + cmpq %r9,%r15 + jne .Linner + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + cmpq %r9,%r14 + jb .Louter + + xorq %r14,%r14 + movq (%rsp),%rax + movq %r9,%r15 + +.align 16 +.Lsub: sbbq (%rcx,%r14,8),%rax + movq %rax,(%rdi,%r14,8) + movq 8(%rsp,%r14,8),%rax + leaq 1(%r14),%r14 + decq %r15 + jnz .Lsub + + sbbq $0,%rax + movq $-1,%rbx + xorq %rax,%rbx + xorq %r14,%r14 + movq %r9,%r15 + +.Lcopy: + movq (%rdi,%r14,8),%rcx + movq (%rsp,%r14,8),%rdx + andq %rbx,%rcx + andq %rax,%rdx + movq %r9,(%rsp,%r14,8) + orq %rcx,%rdx + movq %rdx,(%rdi,%r14,8) + leaq 1(%r14),%r14 + subq $1,%r15 + jnz .Lcopy + + movq 8(%rsp,%r9,8),%rsi +.cfi_def_cfa %rsi,8 + movq $1,%rax + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lmul_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size bn_mul_mont,.-bn_mul_mont +.type bn_mul4x_mont,@function +.align 16 +bn_mul4x_mont: +.cfi_startproc + movl %r9d,%r9d + movq %rsp,%rax +.cfi_def_cfa_register %rax +.Lmul4x_enter: + andl $0x80100,%r11d + cmpl $0x80100,%r11d + je .Lmulx4x_enter + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + + negq %r9 + movq %rsp,%r11 + leaq -32(%rsp,%r9,8),%r10 + negq %r9 + andq $-1024,%r10 + + subq %r10,%r11 + andq $-4096,%r11 + leaq (%r10,%r11,1),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja .Lmul4x_page_walk + jmp .Lmul4x_page_walk_done + +.Lmul4x_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja .Lmul4x_page_walk +.Lmul4x_page_walk_done: + + movq %rax,8(%rsp,%r9,8) +.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 +.Lmul4x_body: + movq %rdi,16(%rsp,%r9,8) + movq %rdx,%r12 + movq (%r8),%r8 + movq (%r12),%rbx + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 4(%r15),%r15 + adcq $0,%rdx + movq %rdi,(%rsp) + movq %rdx,%r13 + jmp .L1st4x +.align 16 +.L1st4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 4(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq -16(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-32(%rsp,%r15,8) + movq %rdx,%r13 + cmpq %r9,%r15 + jb .L1st4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + movq %r13,-8(%rsp,%r15,8) + movq %rdi,(%rsp,%r15,8) + + leaq 1(%r14),%r14 +.align 4 +.Louter4x: + movq (%r12,%r14,8),%rbx + xorq %r15,%r15 + movq (%rsp),%r10 + movq %r8,%rbp + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + addq 8(%rsp),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 4(%r15),%r15 + adcq $0,%rdx + movq %rdi,(%rsp) + movq %rdx,%r13 + jmp .Linner4x +.align 16 +.Linner4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -16(%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -8(%rsp,%r15,8),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq 8(%rsp,%r15,8),%r11 + adcq $0,%rdx + leaq 4(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq -16(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-32(%rsp,%r15,8) + movq %rdx,%r13 + cmpq %r9,%r15 + jb .Linner4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -16(%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -8(%rsp,%r15,8),%r11 + adcq $0,%rdx + leaq 1(%r14),%r14 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + addq (%rsp,%r9,8),%r13 + adcq $0,%rdi + movq %r13,-8(%rsp,%r15,8) + movq %rdi,(%rsp,%r15,8) + + cmpq %r9,%r14 + jb .Louter4x + movq 16(%rsp,%r9,8),%rdi + leaq -4(%r9),%r15 + movq 0(%rsp),%rax + movq 8(%rsp),%rdx + shrq $2,%r15 + leaq (%rsp),%rsi + xorq %r14,%r14 + + subq 0(%rcx),%rax + movq 16(%rsi),%rbx + movq 24(%rsi),%rbp + sbbq 8(%rcx),%rdx + +.Lsub4x: + movq %rax,0(%rdi,%r14,8) + movq %rdx,8(%rdi,%r14,8) + sbbq 16(%rcx,%r14,8),%rbx + movq 32(%rsi,%r14,8),%rax + movq 40(%rsi,%r14,8),%rdx + sbbq 24(%rcx,%r14,8),%rbp + movq %rbx,16(%rdi,%r14,8) + movq %rbp,24(%rdi,%r14,8) + sbbq 32(%rcx,%r14,8),%rax + movq 48(%rsi,%r14,8),%rbx + movq 56(%rsi,%r14,8),%rbp + sbbq 40(%rcx,%r14,8),%rdx + leaq 4(%r14),%r14 + decq %r15 + jnz .Lsub4x + + movq %rax,0(%rdi,%r14,8) + movq 32(%rsi,%r14,8),%rax + sbbq 16(%rcx,%r14,8),%rbx + movq %rdx,8(%rdi,%r14,8) + sbbq 24(%rcx,%r14,8),%rbp + movq %rbx,16(%rdi,%r14,8) + + sbbq $0,%rax + movq %rbp,24(%rdi,%r14,8) + pxor %xmm0,%xmm0 +.byte 102,72,15,110,224 + pcmpeqd %xmm5,%xmm5 + pshufd $0,%xmm4,%xmm4 + movq %r9,%r15 + pxor %xmm4,%xmm5 + shrq $2,%r15 + xorl %eax,%eax + + jmp .Lcopy4x +.align 16 +.Lcopy4x: + movdqa (%rsp,%rax,1),%xmm1 + movdqu (%rdi,%rax,1),%xmm2 + pand %xmm4,%xmm1 + pand %xmm5,%xmm2 + movdqa 16(%rsp,%rax,1),%xmm3 + movdqa %xmm0,(%rsp,%rax,1) + por %xmm2,%xmm1 + movdqu 16(%rdi,%rax,1),%xmm2 + movdqu %xmm1,(%rdi,%rax,1) + pand %xmm4,%xmm3 + pand %xmm5,%xmm2 + movdqa %xmm0,16(%rsp,%rax,1) + por %xmm2,%xmm3 + movdqu %xmm3,16(%rdi,%rax,1) + leaq 32(%rax),%rax + decq %r15 + jnz .Lcopy4x + movq 8(%rsp,%r9,8),%rsi +.cfi_def_cfa %rsi, 8 + movq $1,%rax + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lmul4x_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size bn_mul4x_mont,.-bn_mul4x_mont + + + +.type bn_sqr8x_mont,@function +.align 32 +bn_sqr8x_mont: +.cfi_startproc + movq %rsp,%rax +.cfi_def_cfa_register %rax +.Lsqr8x_enter: + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 +.Lsqr8x_prologue: + + movl %r9d,%r10d + shll $3,%r9d + shlq $3+2,%r10 + negq %r9 + + + + + + + leaq -64(%rsp,%r9,2),%r11 + movq %rsp,%rbp + movq (%r8),%r8 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lsqr8x_sp_alt + subq %r11,%rbp + leaq -64(%rbp,%r9,2),%rbp + jmp .Lsqr8x_sp_done + +.align 32 +.Lsqr8x_sp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rbp,%r9,2),%rbp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rbp +.Lsqr8x_sp_done: + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 + andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lsqr8x_page_walk + jmp .Lsqr8x_page_walk_done + +.align 16 +.Lsqr8x_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lsqr8x_page_walk +.Lsqr8x_page_walk_done: + + movq %r9,%r10 + negq %r9 + + movq %r8,32(%rsp) + movq %rax,40(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 +.Lsqr8x_body: + +.byte 102,72,15,110,209 + pxor %xmm0,%xmm0 +.byte 102,72,15,110,207 +.byte 102,73,15,110,218 + movl OPENSSL_ia32cap_P+8(%rip),%eax + andl $0x80100,%eax + cmpl $0x80100,%eax + jne .Lsqr8x_nox + + call bn_sqrx8x_internal + + + + + leaq (%r8,%rcx,1),%rbx + movq %rcx,%r9 + movq %rcx,%rdx +.byte 102,72,15,126,207 + sarq $3+2,%rcx + jmp .Lsqr8x_sub + +.align 32 +.Lsqr8x_nox: + call bn_sqr8x_internal + + + + + leaq (%rdi,%r9,1),%rbx + movq %r9,%rcx + movq %r9,%rdx +.byte 102,72,15,126,207 + sarq $3+2,%rcx + jmp .Lsqr8x_sub + +.align 32 +.Lsqr8x_sub: + movq 0(%rbx),%r12 + movq 8(%rbx),%r13 + movq 16(%rbx),%r14 + movq 24(%rbx),%r15 + leaq 32(%rbx),%rbx + sbbq 0(%rbp),%r12 + sbbq 8(%rbp),%r13 + sbbq 16(%rbp),%r14 + sbbq 24(%rbp),%r15 + leaq 32(%rbp),%rbp + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r14,16(%rdi) + movq %r15,24(%rdi) + leaq 32(%rdi),%rdi + incq %rcx + jnz .Lsqr8x_sub + + sbbq $0,%rax + leaq (%rbx,%r9,1),%rbx + leaq (%rdi,%r9,1),%rdi + +.byte 102,72,15,110,200 + pxor %xmm0,%xmm0 + pshufd $0,%xmm1,%xmm1 + movq 40(%rsp),%rsi +.cfi_def_cfa %rsi,8 + jmp .Lsqr8x_cond_copy + +.align 32 +.Lsqr8x_cond_copy: + movdqa 0(%rbx),%xmm2 + movdqa 16(%rbx),%xmm3 + leaq 32(%rbx),%rbx + movdqu 0(%rdi),%xmm4 + movdqu 16(%rdi),%xmm5 + leaq 32(%rdi),%rdi + movdqa %xmm0,-32(%rbx) + movdqa %xmm0,-16(%rbx) + movdqa %xmm0,-32(%rbx,%rdx,1) + movdqa %xmm0,-16(%rbx,%rdx,1) + pcmpeqd %xmm1,%xmm0 + pand %xmm1,%xmm2 + pand %xmm1,%xmm3 + pand %xmm0,%xmm4 + pand %xmm0,%xmm5 + pxor %xmm0,%xmm0 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqu %xmm4,-32(%rdi) + movdqu %xmm5,-16(%rdi) + addq $32,%r9 + jnz .Lsqr8x_cond_copy + + movq $1,%rax + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lsqr8x_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size bn_sqr8x_mont,.-bn_sqr8x_mont +.type bn_mulx4x_mont,@function +.align 32 +bn_mulx4x_mont: +.cfi_startproc + movq %rsp,%rax +.cfi_def_cfa_register %rax +.Lmulx4x_enter: + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 +.Lmulx4x_prologue: + + shll $3,%r9d + xorq %r10,%r10 + subq %r9,%r10 + movq (%r8),%r8 + leaq -72(%rsp,%r10,1),%rbp + andq $-128,%rbp + movq %rsp,%r11 + subq %rbp,%r11 + andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lmulx4x_page_walk + jmp .Lmulx4x_page_walk_done + +.align 16 +.Lmulx4x_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lmulx4x_page_walk +.Lmulx4x_page_walk_done: + + leaq (%rdx,%r9,1),%r10 + + + + + + + + + + + + + movq %r9,0(%rsp) + shrq $5,%r9 + movq %r10,16(%rsp) + subq $1,%r9 + movq %r8,24(%rsp) + movq %rdi,32(%rsp) + movq %rax,40(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 + movq %r9,48(%rsp) + jmp .Lmulx4x_body + +.align 32 +.Lmulx4x_body: + leaq 8(%rdx),%rdi + movq (%rdx),%rdx + leaq 64+32(%rsp),%rbx + movq %rdx,%r9 + + mulxq 0(%rsi),%r8,%rax + mulxq 8(%rsi),%r11,%r14 + addq %rax,%r11 + movq %rdi,8(%rsp) + mulxq 16(%rsi),%r12,%r13 + adcq %r14,%r12 + adcq $0,%r13 + + movq %r8,%rdi + imulq 24(%rsp),%r8 + xorq %rbp,%rbp + + mulxq 24(%rsi),%rax,%r14 + movq %r8,%rdx + leaq 32(%rsi),%rsi + adcxq %rax,%r13 + adcxq %rbp,%r14 + + mulxq 0(%rcx),%rax,%r10 + adcxq %rax,%rdi + adoxq %r11,%r10 + mulxq 8(%rcx),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 +.byte 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00 + movq 48(%rsp),%rdi + movq %r10,-32(%rbx) + adcxq %rax,%r11 + adoxq %r13,%r12 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r11,-24(%rbx) + adcxq %rax,%r12 + adoxq %rbp,%r15 + leaq 32(%rcx),%rcx + movq %r12,-16(%rbx) + + jmp .Lmulx4x_1st + +.align 32 +.Lmulx4x_1st: + adcxq %rbp,%r15 + mulxq 0(%rsi),%r10,%rax + adcxq %r14,%r10 + mulxq 8(%rsi),%r11,%r14 + adcxq %rax,%r11 + mulxq 16(%rsi),%r12,%rax + adcxq %r14,%r12 + mulxq 24(%rsi),%r13,%r14 +.byte 0x67,0x67 + movq %r8,%rdx + adcxq %rax,%r13 + adcxq %rbp,%r14 + leaq 32(%rsi),%rsi + leaq 32(%rbx),%rbx + + adoxq %r15,%r10 + mulxq 0(%rcx),%rax,%r15 + adcxq %rax,%r10 + adoxq %r15,%r11 + mulxq 8(%rcx),%rax,%r15 + adcxq %rax,%r11 + adoxq %r15,%r12 + mulxq 16(%rcx),%rax,%r15 + movq %r10,-40(%rbx) + adcxq %rax,%r12 + movq %r11,-32(%rbx) + adoxq %r15,%r13 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r12,-24(%rbx) + adcxq %rax,%r13 + adoxq %rbp,%r15 + leaq 32(%rcx),%rcx + movq %r13,-16(%rbx) + + decq %rdi + jnz .Lmulx4x_1st + + movq 0(%rsp),%rax + movq 8(%rsp),%rdi + adcq %rbp,%r15 + addq %r15,%r14 + sbbq %r15,%r15 + movq %r14,-8(%rbx) + jmp .Lmulx4x_outer + +.align 32 +.Lmulx4x_outer: + movq (%rdi),%rdx + leaq 8(%rdi),%rdi + subq %rax,%rsi + movq %r15,(%rbx) + leaq 64+32(%rsp),%rbx + subq %rax,%rcx + + mulxq 0(%rsi),%r8,%r11 + xorl %ebp,%ebp + movq %rdx,%r9 + mulxq 8(%rsi),%r14,%r12 + adoxq -32(%rbx),%r8 + adcxq %r14,%r11 + mulxq 16(%rsi),%r15,%r13 + adoxq -24(%rbx),%r11 + adcxq %r15,%r12 + adoxq -16(%rbx),%r12 + adcxq %rbp,%r13 + adoxq %rbp,%r13 + + movq %rdi,8(%rsp) + movq %r8,%r15 + imulq 24(%rsp),%r8 + xorl %ebp,%ebp + + mulxq 24(%rsi),%rax,%r14 + movq %r8,%rdx + adcxq %rax,%r13 + adoxq -8(%rbx),%r13 + adcxq %rbp,%r14 + leaq 32(%rsi),%rsi + adoxq %rbp,%r14 + + mulxq 0(%rcx),%rax,%r10 + adcxq %rax,%r15 + adoxq %r11,%r10 + mulxq 8(%rcx),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + mulxq 16(%rcx),%rax,%r12 + movq %r10,-32(%rbx) + adcxq %rax,%r11 + adoxq %r13,%r12 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r11,-24(%rbx) + leaq 32(%rcx),%rcx + adcxq %rax,%r12 + adoxq %rbp,%r15 + movq 48(%rsp),%rdi + movq %r12,-16(%rbx) + + jmp .Lmulx4x_inner + +.align 32 +.Lmulx4x_inner: + mulxq 0(%rsi),%r10,%rax + adcxq %rbp,%r15 + adoxq %r14,%r10 + mulxq 8(%rsi),%r11,%r14 + adcxq 0(%rbx),%r10 + adoxq %rax,%r11 + mulxq 16(%rsi),%r12,%rax + adcxq 8(%rbx),%r11 + adoxq %r14,%r12 + mulxq 24(%rsi),%r13,%r14 + movq %r8,%rdx + adcxq 16(%rbx),%r12 + adoxq %rax,%r13 + adcxq 24(%rbx),%r13 + adoxq %rbp,%r14 + leaq 32(%rsi),%rsi + leaq 32(%rbx),%rbx + adcxq %rbp,%r14 + + adoxq %r15,%r10 + mulxq 0(%rcx),%rax,%r15 + adcxq %rax,%r10 + adoxq %r15,%r11 + mulxq 8(%rcx),%rax,%r15 + adcxq %rax,%r11 + adoxq %r15,%r12 + mulxq 16(%rcx),%rax,%r15 + movq %r10,-40(%rbx) + adcxq %rax,%r12 + adoxq %r15,%r13 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r11,-32(%rbx) + movq %r12,-24(%rbx) + adcxq %rax,%r13 + adoxq %rbp,%r15 + leaq 32(%rcx),%rcx + movq %r13,-16(%rbx) + + decq %rdi + jnz .Lmulx4x_inner + + movq 0(%rsp),%rax + movq 8(%rsp),%rdi + adcq %rbp,%r15 + subq 0(%rbx),%rbp + adcq %r15,%r14 + sbbq %r15,%r15 + movq %r14,-8(%rbx) + + cmpq 16(%rsp),%rdi + jne .Lmulx4x_outer + + leaq 64(%rsp),%rbx + subq %rax,%rcx + negq %r15 + movq %rax,%rdx + shrq $3+2,%rax + movq 32(%rsp),%rdi + jmp .Lmulx4x_sub + +.align 32 +.Lmulx4x_sub: + movq 0(%rbx),%r11 + movq 8(%rbx),%r12 + movq 16(%rbx),%r13 + movq 24(%rbx),%r14 + leaq 32(%rbx),%rbx + sbbq 0(%rcx),%r11 + sbbq 8(%rcx),%r12 + sbbq 16(%rcx),%r13 + sbbq 24(%rcx),%r14 + leaq 32(%rcx),%rcx + movq %r11,0(%rdi) + movq %r12,8(%rdi) + movq %r13,16(%rdi) + movq %r14,24(%rdi) + leaq 32(%rdi),%rdi + decq %rax + jnz .Lmulx4x_sub + + sbbq $0,%r15 + leaq 64(%rsp),%rbx + subq %rdx,%rdi + +.byte 102,73,15,110,207 + pxor %xmm0,%xmm0 + pshufd $0,%xmm1,%xmm1 + movq 40(%rsp),%rsi +.cfi_def_cfa %rsi,8 + jmp .Lmulx4x_cond_copy + +.align 32 +.Lmulx4x_cond_copy: + movdqa 0(%rbx),%xmm2 + movdqa 16(%rbx),%xmm3 + leaq 32(%rbx),%rbx + movdqu 0(%rdi),%xmm4 + movdqu 16(%rdi),%xmm5 + leaq 32(%rdi),%rdi + movdqa %xmm0,-32(%rbx) + movdqa %xmm0,-16(%rbx) + pcmpeqd %xmm1,%xmm0 + pand %xmm1,%xmm2 + pand %xmm1,%xmm3 + pand %xmm0,%xmm4 + pand %xmm0,%xmm5 + pxor %xmm0,%xmm0 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqu %xmm4,-32(%rdi) + movdqu %xmm5,-16(%rdi) + subq $32,%rdx + jnz .Lmulx4x_cond_copy + + movq %rdx,(%rbx) + + movq $1,%rax + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lmulx4x_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size bn_mulx4x_mont,.-bn_mulx4x_mont +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 16 diff --git a/contrib/openssl-cmake/asm/crypto/bn/x86_64-mont5.s b/contrib/openssl-cmake/asm/crypto/bn/x86_64-mont5.s new file mode 100644 index 000000000000..7d76c0e5505d --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/bn/x86_64-mont5.s @@ -0,0 +1,3605 @@ +.text + + + +.globl bn_mul_mont_gather5 +.type bn_mul_mont_gather5,@function +.align 64 +bn_mul_mont_gather5: +.cfi_startproc + movl %r9d,%r9d + movq %rsp,%rax +.cfi_def_cfa_register %rax + testl $7,%r9d + jnz .Lmul_enter + movl OPENSSL_ia32cap_P+8(%rip),%r11d + jmp .Lmul4x_enter + +.align 16 +.Lmul_enter: + movd 8(%rsp),%xmm5 + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + + negq %r9 + movq %rsp,%r11 + leaq -280(%rsp,%r9,8),%r10 + negq %r9 + andq $-1024,%r10 + + + + + + + + + + subq %r10,%r11 + andq $-4096,%r11 + leaq (%r10,%r11,1),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja .Lmul_page_walk + jmp .Lmul_page_walk_done + +.Lmul_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r11 + cmpq %r10,%rsp + ja .Lmul_page_walk +.Lmul_page_walk_done: + + leaq .Linc(%rip),%r10 + movq %rax,8(%rsp,%r9,8) +.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 +.Lmul_body: + + leaq 128(%rdx),%r12 + movdqa 0(%r10),%xmm0 + movdqa 16(%r10),%xmm1 + leaq 24-112(%rsp,%r9,8),%r10 + andq $-16,%r10 + + pshufd $0,%xmm5,%xmm5 + movdqa %xmm1,%xmm4 + movdqa %xmm1,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 +.byte 0x67 + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,112(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,128(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,144(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,160(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,176(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,192(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,208(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,224(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,240(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,256(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,272(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,288(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,304(%r10) + + paddd %xmm2,%xmm3 +.byte 0x67 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,320(%r10) + + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,336(%r10) + pand 64(%r12),%xmm0 + + pand 80(%r12),%xmm1 + pand 96(%r12),%xmm2 + movdqa %xmm3,352(%r10) + pand 112(%r12),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa -128(%r12),%xmm4 + movdqa -112(%r12),%xmm5 + movdqa -96(%r12),%xmm2 + pand 112(%r10),%xmm4 + movdqa -80(%r12),%xmm3 + pand 128(%r10),%xmm5 + por %xmm4,%xmm0 + pand 144(%r10),%xmm2 + por %xmm5,%xmm1 + pand 160(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa -64(%r12),%xmm4 + movdqa -48(%r12),%xmm5 + movdqa -32(%r12),%xmm2 + pand 176(%r10),%xmm4 + movdqa -16(%r12),%xmm3 + pand 192(%r10),%xmm5 + por %xmm4,%xmm0 + pand 208(%r10),%xmm2 + por %xmm5,%xmm1 + pand 224(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa 0(%r12),%xmm4 + movdqa 16(%r12),%xmm5 + movdqa 32(%r12),%xmm2 + pand 240(%r10),%xmm4 + movdqa 48(%r12),%xmm3 + pand 256(%r10),%xmm5 + por %xmm4,%xmm0 + pand 272(%r10),%xmm2 + por %xmm5,%xmm1 + pand 288(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + por %xmm1,%xmm0 + pshufd $0x4e,%xmm0,%xmm1 + por %xmm1,%xmm0 + leaq 256(%r12),%r12 +.byte 102,72,15,126,195 + + movq (%r8),%r8 + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp .L1st_enter + +.align 16 +.L1st: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r13 + movq %r10,%r11 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +.L1st_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 1(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + cmpq %r9,%r15 + jne .L1st + + + addq %rax,%r13 + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-16(%rsp,%r9,8) + movq %rdx,%r13 + movq %r10,%r11 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + jmp .Louter +.align 16 +.Louter: + leaq 24+128(%rsp,%r9,8),%rdx + andq $-16,%rdx + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + movdqa -128(%r12),%xmm0 + movdqa -112(%r12),%xmm1 + movdqa -96(%r12),%xmm2 + movdqa -80(%r12),%xmm3 + pand -128(%rdx),%xmm0 + pand -112(%rdx),%xmm1 + por %xmm0,%xmm4 + pand -96(%rdx),%xmm2 + por %xmm1,%xmm5 + pand -80(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa -64(%r12),%xmm0 + movdqa -48(%r12),%xmm1 + movdqa -32(%r12),%xmm2 + movdqa -16(%r12),%xmm3 + pand -64(%rdx),%xmm0 + pand -48(%rdx),%xmm1 + por %xmm0,%xmm4 + pand -32(%rdx),%xmm2 + por %xmm1,%xmm5 + pand -16(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 0(%r12),%xmm0 + movdqa 16(%r12),%xmm1 + movdqa 32(%r12),%xmm2 + movdqa 48(%r12),%xmm3 + pand 0(%rdx),%xmm0 + pand 16(%rdx),%xmm1 + por %xmm0,%xmm4 + pand 32(%rdx),%xmm2 + por %xmm1,%xmm5 + pand 48(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 64(%r12),%xmm0 + movdqa 80(%r12),%xmm1 + movdqa 96(%r12),%xmm2 + movdqa 112(%r12),%xmm3 + pand 64(%rdx),%xmm0 + pand 80(%rdx),%xmm1 + por %xmm0,%xmm4 + pand 96(%rdx),%xmm2 + por %xmm1,%xmm5 + pand 112(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + por %xmm5,%xmm4 + pshufd $0x4e,%xmm4,%xmm0 + por %xmm4,%xmm0 + leaq 256(%r12),%r12 + + movq (%rsi),%rax +.byte 102,72,15,126,195 + + xorq %r15,%r15 + movq %r8,%rbp + movq (%rsp),%r10 + + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq 8(%rsp),%r10 + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp .Linner_enter + +.align 16 +.Linner: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +.Linner_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + leaq 1(%r15),%r15 + + mulq %rbp + cmpq %r9,%r15 + jne .Linner + + addq %rax,%r13 + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r9,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r9,8) + movq %rdx,%r13 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + cmpq %r9,%r14 + jb .Louter + + xorq %r14,%r14 + movq (%rsp),%rax + leaq (%rsp),%rsi + movq %r9,%r15 + jmp .Lsub +.align 16 +.Lsub: sbbq (%rcx,%r14,8),%rax + movq %rax,(%rdi,%r14,8) + movq 8(%rsi,%r14,8),%rax + leaq 1(%r14),%r14 + decq %r15 + jnz .Lsub + + sbbq $0,%rax + movq $-1,%rbx + xorq %rax,%rbx + xorq %r14,%r14 + movq %r9,%r15 + +.Lcopy: + movq (%rdi,%r14,8),%rcx + movq (%rsp,%r14,8),%rdx + andq %rbx,%rcx + andq %rax,%rdx + movq %r14,(%rsp,%r14,8) + orq %rcx,%rdx + movq %rdx,(%rdi,%r14,8) + leaq 1(%r14),%r14 + subq $1,%r15 + jnz .Lcopy + + movq 8(%rsp,%r9,8),%rsi +.cfi_def_cfa %rsi,8 + movq $1,%rax + + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lmul_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 +.type bn_mul4x_mont_gather5,@function +.align 32 +bn_mul4x_mont_gather5: +.cfi_startproc +.byte 0x67 + movq %rsp,%rax +.cfi_def_cfa_register %rax +.Lmul4x_enter: + andl $0x80108,%r11d + cmpl $0x80108,%r11d + je .Lmulx4x_enter + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 +.Lmul4x_prologue: + +.byte 0x67 + shll $3,%r9d + leaq (%r9,%r9,2),%r10 + negq %r9 + + + + + + + + + + + leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp + subq %rdi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lmul4xsp_alt + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp + jmp .Lmul4xsp_done + +.align 32 +.Lmul4xsp_alt: + leaq 4096-320(,%r9,2),%r10 + leaq -320(%rbp,%r9,2),%rbp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rbp +.Lmul4xsp_done: + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 + andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lmul4x_page_walk + jmp .Lmul4x_page_walk_done + +.Lmul4x_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lmul4x_page_walk +.Lmul4x_page_walk_done: + + negq %r9 + + movq %rax,40(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 +.Lmul4x_body: + + call mul4x_internal + + movq 40(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq $1,%rax + + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lmul4x_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 + +.type mul4x_internal,@function +.align 32 +mul4x_internal: +.cfi_startproc + shlq $5,%r9 + movd 8(%rax),%xmm5 + leaq .Linc(%rip),%rax + leaq 128(%rdx,%r9,1),%r13 + shrq $5,%r9 + movdqa 0(%rax),%xmm0 + movdqa 16(%rax),%xmm1 + leaq 88-112(%rsp,%r9,1),%r10 + leaq 128(%rdx),%r12 + + pshufd $0,%xmm5,%xmm5 + movdqa %xmm1,%xmm4 +.byte 0x67,0x67 + movdqa %xmm1,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 +.byte 0x67 + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,112(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,128(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,144(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,160(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,176(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,192(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,208(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,224(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,240(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,256(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,272(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,288(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,304(%r10) + + paddd %xmm2,%xmm3 +.byte 0x67 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,320(%r10) + + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,336(%r10) + pand 64(%r12),%xmm0 + + pand 80(%r12),%xmm1 + pand 96(%r12),%xmm2 + movdqa %xmm3,352(%r10) + pand 112(%r12),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa -128(%r12),%xmm4 + movdqa -112(%r12),%xmm5 + movdqa -96(%r12),%xmm2 + pand 112(%r10),%xmm4 + movdqa -80(%r12),%xmm3 + pand 128(%r10),%xmm5 + por %xmm4,%xmm0 + pand 144(%r10),%xmm2 + por %xmm5,%xmm1 + pand 160(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa -64(%r12),%xmm4 + movdqa -48(%r12),%xmm5 + movdqa -32(%r12),%xmm2 + pand 176(%r10),%xmm4 + movdqa -16(%r12),%xmm3 + pand 192(%r10),%xmm5 + por %xmm4,%xmm0 + pand 208(%r10),%xmm2 + por %xmm5,%xmm1 + pand 224(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa 0(%r12),%xmm4 + movdqa 16(%r12),%xmm5 + movdqa 32(%r12),%xmm2 + pand 240(%r10),%xmm4 + movdqa 48(%r12),%xmm3 + pand 256(%r10),%xmm5 + por %xmm4,%xmm0 + pand 272(%r10),%xmm2 + por %xmm5,%xmm1 + pand 288(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + por %xmm1,%xmm0 + pshufd $0x4e,%xmm0,%xmm1 + por %xmm1,%xmm0 + leaq 256(%r12),%r12 +.byte 102,72,15,126,195 + + movq %r13,16+8(%rsp) + movq %rdi,56+8(%rsp) + + movq (%r8),%r8 + movq (%rsi),%rax + leaq (%rsi,%r9,1),%rsi + negq %r9 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + imulq %r10,%rbp + leaq 64+8(%rsp),%r14 + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi,%r9,1),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 32(%r9),%r15 + leaq 32(%rcx),%rcx + adcq $0,%rdx + movq %rdi,(%r14) + movq %rdx,%r13 + jmp .L1st4x + +.align 32 +.L1st4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx),%rax + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%r14) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq 0(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 32(%rcx),%rcx + adcq $0,%rdx + movq %rdi,(%r14) + movq %rdx,%r13 + + addq $32,%r15 + jnz .L1st4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx),%rax + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%r14) + movq %rdx,%r13 + + leaq (%rcx,%r9,1),%rcx + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + movq %r13,-8(%r14) + + jmp .Louter4x + +.align 32 +.Louter4x: + leaq 16+128(%r14),%rdx + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + movdqa -128(%r12),%xmm0 + movdqa -112(%r12),%xmm1 + movdqa -96(%r12),%xmm2 + movdqa -80(%r12),%xmm3 + pand -128(%rdx),%xmm0 + pand -112(%rdx),%xmm1 + por %xmm0,%xmm4 + pand -96(%rdx),%xmm2 + por %xmm1,%xmm5 + pand -80(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa -64(%r12),%xmm0 + movdqa -48(%r12),%xmm1 + movdqa -32(%r12),%xmm2 + movdqa -16(%r12),%xmm3 + pand -64(%rdx),%xmm0 + pand -48(%rdx),%xmm1 + por %xmm0,%xmm4 + pand -32(%rdx),%xmm2 + por %xmm1,%xmm5 + pand -16(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 0(%r12),%xmm0 + movdqa 16(%r12),%xmm1 + movdqa 32(%r12),%xmm2 + movdqa 48(%r12),%xmm3 + pand 0(%rdx),%xmm0 + pand 16(%rdx),%xmm1 + por %xmm0,%xmm4 + pand 32(%rdx),%xmm2 + por %xmm1,%xmm5 + pand 48(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 64(%r12),%xmm0 + movdqa 80(%r12),%xmm1 + movdqa 96(%r12),%xmm2 + movdqa 112(%r12),%xmm3 + pand 64(%rdx),%xmm0 + pand 80(%rdx),%xmm1 + por %xmm0,%xmm4 + pand 96(%rdx),%xmm2 + por %xmm1,%xmm5 + pand 112(%rdx),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + por %xmm5,%xmm4 + pshufd $0x4e,%xmm4,%xmm0 + por %xmm4,%xmm0 + leaq 256(%r12),%r12 +.byte 102,72,15,126,195 + + movq (%r14,%r9,1),%r10 + movq %r8,%rbp + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + imulq %r10,%rbp + movq %rdx,%r11 + movq %rdi,(%r14) + + leaq (%r14,%r9,1),%r14 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi,%r9,1),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + addq 8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 32(%r9),%r15 + leaq 32(%rcx),%rcx + adcq $0,%rdx + movq %rdx,%r13 + jmp .Linner4x + +.align 32 +.Linner4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx),%rax + adcq $0,%rdx + addq 16(%r14),%r10 + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdi,-32(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx),%rax + adcq $0,%rdx + addq -8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq 0(%rcx),%rax + adcq $0,%rdx + addq (%r14),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdi,-16(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + addq 8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 32(%rcx),%rcx + adcq $0,%rdx + movq %r13,-8(%r14) + movq %rdx,%r13 + + addq $32,%r15 + jnz .Linner4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx),%rax + adcq $0,%rdx + addq 16(%r14),%r10 + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdi,-32(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq %rbp,%rax + movq -8(%rcx),%rbp + adcq $0,%rdx + addq -8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%r13 + + movq %rdi,-16(%r14) + leaq (%rcx,%r9,1),%rcx + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + addq (%r14),%r13 + adcq $0,%rdi + movq %r13,-8(%r14) + + cmpq 16+8(%rsp),%r12 + jb .Louter4x + xorq %rax,%rax + subq %r13,%rbp + adcq %r15,%r15 + orq %r15,%rdi + subq %rdi,%rax + leaq (%r14,%r9,1),%rbx + movq (%rcx),%r12 + leaq (%rcx),%rbp + movq %r9,%rcx + sarq $3+2,%rcx + movq 56+8(%rsp),%rdi + decq %r12 + xorq %r10,%r10 + movq 8(%rbp),%r13 + movq 16(%rbp),%r14 + movq 24(%rbp),%r15 + jmp .Lsqr4x_sub_entry +.cfi_endproc +.size mul4x_internal,.-mul4x_internal +.globl bn_power5 +.type bn_power5,@function +.align 32 +bn_power5: +.cfi_startproc + movq %rsp,%rax +.cfi_def_cfa_register %rax + movl OPENSSL_ia32cap_P+8(%rip),%r11d + andl $0x80108,%r11d + cmpl $0x80108,%r11d + je .Lpowerx5_enter + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 +.Lpower5_prologue: + + shll $3,%r9d + leal (%r9,%r9,2),%r10d + negq %r9 + movq (%r8),%r8 + + + + + + + + + leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp + subq %rdi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lpwr_sp_alt + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp + jmp .Lpwr_sp_done + +.align 32 +.Lpwr_sp_alt: + leaq 4096-320(,%r9,2),%r10 + leaq -320(%rbp,%r9,2),%rbp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rbp +.Lpwr_sp_done: + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 + andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lpwr_page_walk + jmp .Lpwr_page_walk_done + +.Lpwr_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lpwr_page_walk +.Lpwr_page_walk_done: + + movq %r9,%r10 + negq %r9 + + + + + + + + + + + movq %r8,32(%rsp) + movq %rax,40(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 +.Lpower5_body: +.byte 102,72,15,110,207 +.byte 102,72,15,110,209 +.byte 102,73,15,110,218 +.byte 102,72,15,110,226 + + call __bn_sqr8x_internal + call __bn_post4x_internal + call __bn_sqr8x_internal + call __bn_post4x_internal + call __bn_sqr8x_internal + call __bn_post4x_internal + call __bn_sqr8x_internal + call __bn_post4x_internal + call __bn_sqr8x_internal + call __bn_post4x_internal + +.byte 102,72,15,126,209 +.byte 102,72,15,126,226 + movq %rsi,%rdi + movq 40(%rsp),%rax + leaq 32(%rsp),%r8 + + call mul4x_internal + + movq 40(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq $1,%rax + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lpower5_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size bn_power5,.-bn_power5 + +.globl bn_sqr8x_internal +.hidden bn_sqr8x_internal +.type bn_sqr8x_internal,@function +.align 32 +bn_sqr8x_internal: +__bn_sqr8x_internal: +.cfi_startproc + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + leaq 32(%r10),%rbp + leaq (%rsi,%r9,1),%rsi + + movq %r9,%rcx + + + movq -32(%rsi,%rbp,1),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi,%rbp,1),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi,%rbp,1),%rbx + movq %rax,%r15 + + mulq %r14 + movq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + movq %r10,-24(%rdi,%rbp,1) + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq $0,%rdx + movq %r11,-16(%rdi,%rbp,1) + movq %rdx,%r10 + + + movq -8(%rsi,%rbp,1),%rbx + mulq %r15 + movq %rax,%r12 + movq %rbx,%rax + movq %rdx,%r13 + + leaq (%rbp),%rcx + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + jmp .Lsqr4x_1st + +.align 32 +.Lsqr4x_1st: + movq (%rsi,%rcx,1),%rbx + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 8(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,(%rdi,%rcx,1) + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq 16(%rsi,%rcx,1),%rbx + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %r10,8(%rdi,%rcx,1) + movq %rdx,%r12 + adcq $0,%r12 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 24(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,16(%rdi,%rcx,1) + movq %rdx,%r13 + adcq $0,%r13 + leaq 32(%rcx),%rcx + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + + cmpq $0,%rcx + jne .Lsqr4x_1st + + mulq %r15 + addq %rax,%r13 + leaq 16(%rbp),%rbp + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + jmp .Lsqr4x_outer + +.align 32 +.Lsqr4x_outer: + movq -32(%rsi,%rbp,1),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi,%rbp,1),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi,%rbp,1),%rbx + movq %rax,%r15 + + mulq %r14 + movq -24(%rdi,%rbp,1),%r10 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + movq %r10,-24(%rdi,%rbp,1) + movq %rdx,%r11 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq $0,%rdx + addq -16(%rdi,%rbp,1),%r11 + movq %rdx,%r10 + adcq $0,%r10 + movq %r11,-16(%rdi,%rbp,1) + + xorq %r12,%r12 + + movq -8(%rsi,%rbp,1),%rbx + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq $0,%rdx + addq -8(%rdi,%rbp,1),%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + addq %r12,%r10 + movq %rdx,%r11 + adcq $0,%r11 + movq %r10,-8(%rdi,%rbp,1) + + leaq (%rbp),%rcx + jmp .Lsqr4x_inner + +.align 32 +.Lsqr4x_inner: + movq (%rsi,%rcx,1),%rbx + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %rdx,%r12 + adcq $0,%r12 + addq (%rdi,%rcx,1),%r13 + adcq $0,%r12 + +.byte 0x67 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 8(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + mulq %r15 + addq %rax,%r12 + movq %r11,(%rdi,%rcx,1) + movq %rbx,%rax + movq %rdx,%r13 + adcq $0,%r13 + addq 8(%rdi,%rcx,1),%r12 + leaq 16(%rcx),%rcx + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + addq %r12,%r10 + movq %rdx,%r11 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + + cmpq $0,%rcx + jne .Lsqr4x_inner + +.byte 0x67 + mulq %r15 + addq %rax,%r13 + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + + addq $16,%rbp + jnz .Lsqr4x_outer + + + movq -32(%rsi),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi),%rbx + movq %rax,%r15 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq %r10,-24(%rdi) + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + movq -8(%rsi),%rbx + adcq $0,%r10 + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,-16(%rdi) + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi) + + mulq %r15 + addq %rax,%r13 + movq -16(%rsi),%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + + mulq %rbx + addq $16,%rbp + xorq %r14,%r14 + subq %r9,%rbp + xorq %r15,%r15 + + addq %r12,%rax + adcq $0,%rdx + movq %rax,8(%rdi) + movq %rdx,16(%rdi) + movq %r15,24(%rdi) + + movq -16(%rsi,%rbp,1),%rax + leaq 48+8(%rsp),%rdi + xorq %r10,%r10 + movq 8(%rdi),%r11 + + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq 16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 24(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi,%rbp,1),%rax + movq %r12,(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 32(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 40(%rdi),%r11 + adcq %rax,%rbx + movq 0(%rsi,%rbp,1),%rax + movq %rbx,16(%rdi) + adcq %rdx,%r8 + leaq 16(%rbp),%rbp + movq %r8,24(%rdi) + sbbq %r15,%r15 + leaq 64(%rdi),%rdi + jmp .Lsqr4x_shift_n_add + +.align 32 +.Lsqr4x_shift_n_add: + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi,%rbp,1),%rax + movq %r12,-32(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 0(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 8(%rdi),%r11 + adcq %rax,%rbx + movq 0(%rsi,%rbp,1),%rax + movq %rbx,-16(%rdi) + adcq %rdx,%r8 + + leaq (%r14,%r10,2),%r12 + movq %r8,-8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq 16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 24(%rdi),%r11 + adcq %rax,%r12 + movq 8(%rsi,%rbp,1),%rax + movq %r12,0(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 32(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 40(%rdi),%r11 + adcq %rax,%rbx + movq 16(%rsi,%rbp,1),%rax + movq %rbx,16(%rdi) + adcq %rdx,%r8 + movq %r8,24(%rdi) + sbbq %r15,%r15 + leaq 64(%rdi),%rdi + addq $32,%rbp + jnz .Lsqr4x_shift_n_add + + leaq (%r14,%r10,2),%r12 +.byte 0x67 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi),%rax + movq %r12,-32(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + mulq %rax + negq %r15 + adcq %rax,%rbx + adcq %rdx,%r8 + movq %rbx,-16(%rdi) + movq %r8,-8(%rdi) +.byte 102,72,15,126,213 +__bn_sqr8x_reduction: + xorq %rax,%rax + leaq (%r9,%rbp,1),%rcx + leaq 48+8(%rsp,%r9,2),%rdx + movq %rcx,0+8(%rsp) + leaq 48+8(%rsp,%r9,1),%rdi + movq %rdx,8+8(%rsp) + negq %r9 + jmp .L8x_reduction_loop + +.align 32 +.L8x_reduction_loop: + leaq (%rdi,%r9,1),%rdi +.byte 0x66 + movq 0(%rdi),%rbx + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + movq %rax,(%rdx) + leaq 64(%rdi),%rdi + +.byte 0x67 + movq %rbx,%r8 + imulq 32+8(%rsp),%rbx + movq 0(%rbp),%rax + movl $8,%ecx + jmp .L8x_reduce + +.align 32 +.L8x_reduce: + mulq %rbx + movq 8(%rbp),%rax + negq %r8 + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 16(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rbx,48-8+8(%rsp,%rcx,8) + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 24(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq 32+8(%rsp),%rsi + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 32(%rbp),%rax + adcq $0,%rdx + imulq %r8,%rsi + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 40(%rbp),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq %rsi,%rbx + addq %rax,%r15 + movq 0(%rbp),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jnz .L8x_reduce + + leaq 64(%rbp),%rbp + xorq %rax,%rax + movq 8+8(%rsp),%rdx + cmpq 0+8(%rsp),%rbp + jae .L8x_no_tail + +.byte 0x66 + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + sbbq %rsi,%rsi + + movq 48+56+8(%rsp),%rbx + movl $8,%ecx + movq 0(%rbp),%rax + jmp .L8x_tail + +.align 32 +.L8x_tail: + mulq %rbx + addq %rax,%r8 + movq 8(%rbp),%rax + movq %r8,(%rdi) + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 16(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + leaq 8(%rdi),%rdi + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 24(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 32(%rbp),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 40(%rbp),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq 48-16+8(%rsp,%rcx,8),%rbx + addq %rax,%r15 + adcq $0,%rdx + addq %r15,%r14 + movq 0(%rbp),%rax + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jnz .L8x_tail + + leaq 64(%rbp),%rbp + movq 8+8(%rsp),%rdx + cmpq 0+8(%rsp),%rbp + jae .L8x_tail_done + + movq 48+56+8(%rsp),%rbx + negq %rsi + movq 0(%rbp),%rax + adcq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + sbbq %rsi,%rsi + + movl $8,%ecx + jmp .L8x_tail + +.align 32 +.L8x_tail_done: + xorq %rax,%rax + addq (%rdx),%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + adcq $0,%rax + + negq %rsi +.L8x_no_tail: + adcq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + adcq $0,%rax + movq -8(%rbp),%rcx + xorq %rsi,%rsi + +.byte 102,72,15,126,213 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) +.byte 102,73,15,126,217 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + leaq 64(%rdi),%rdi + + cmpq %rdx,%rdi + jb .L8x_reduction_loop + .byte 0xf3,0xc3 +.cfi_endproc +.size bn_sqr8x_internal,.-bn_sqr8x_internal +.type __bn_post4x_internal,@function +.align 32 +__bn_post4x_internal: +.cfi_startproc + movq 0(%rbp),%r12 + leaq (%rdi,%r9,1),%rbx + movq %r9,%rcx +.byte 102,72,15,126,207 + negq %rax +.byte 102,72,15,126,206 + sarq $3+2,%rcx + decq %r12 + xorq %r10,%r10 + movq 8(%rbp),%r13 + movq 16(%rbp),%r14 + movq 24(%rbp),%r15 + jmp .Lsqr4x_sub_entry + +.align 16 +.Lsqr4x_sub: + movq 0(%rbp),%r12 + movq 8(%rbp),%r13 + movq 16(%rbp),%r14 + movq 24(%rbp),%r15 +.Lsqr4x_sub_entry: + leaq 32(%rbp),%rbp + notq %r12 + notq %r13 + notq %r14 + notq %r15 + andq %rax,%r12 + andq %rax,%r13 + andq %rax,%r14 + andq %rax,%r15 + + negq %r10 + adcq 0(%rbx),%r12 + adcq 8(%rbx),%r13 + adcq 16(%rbx),%r14 + adcq 24(%rbx),%r15 + movq %r12,0(%rdi) + leaq 32(%rbx),%rbx + movq %r13,8(%rdi) + sbbq %r10,%r10 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + leaq 32(%rdi),%rdi + + incq %rcx + jnz .Lsqr4x_sub + + movq %r9,%r10 + negq %r9 + .byte 0xf3,0xc3 +.cfi_endproc +.size __bn_post4x_internal,.-__bn_post4x_internal +.type bn_mulx4x_mont_gather5,@function +.align 32 +bn_mulx4x_mont_gather5: +.cfi_startproc + movq %rsp,%rax +.cfi_def_cfa_register %rax +.Lmulx4x_enter: + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 +.Lmulx4x_prologue: + + shll $3,%r9d + leaq (%r9,%r9,2),%r10 + negq %r9 + movq (%r8),%r8 + + + + + + + + + + + leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp + subq %rdi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lmulx4xsp_alt + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp + jmp .Lmulx4xsp_done + +.Lmulx4xsp_alt: + leaq 4096-320(,%r9,2),%r10 + leaq -320(%rbp,%r9,2),%rbp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rbp +.Lmulx4xsp_done: + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 + andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lmulx4x_page_walk + jmp .Lmulx4x_page_walk_done + +.Lmulx4x_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lmulx4x_page_walk +.Lmulx4x_page_walk_done: + + + + + + + + + + + + + + movq %r8,32(%rsp) + movq %rax,40(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 +.Lmulx4x_body: + call mulx4x_internal + + movq 40(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq $1,%rax + + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lmulx4x_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5 + +.type mulx4x_internal,@function +.align 32 +mulx4x_internal: +.cfi_startproc + movq %r9,8(%rsp) + movq %r9,%r10 + negq %r9 + shlq $5,%r9 + negq %r10 + leaq 128(%rdx,%r9,1),%r13 + shrq $5+5,%r9 + movd 8(%rax),%xmm5 + subq $1,%r9 + leaq .Linc(%rip),%rax + movq %r13,16+8(%rsp) + movq %r9,24+8(%rsp) + movq %rdi,56+8(%rsp) + movdqa 0(%rax),%xmm0 + movdqa 16(%rax),%xmm1 + leaq 88-112(%rsp,%r10,1),%r10 + leaq 128(%rdx),%rdi + + pshufd $0,%xmm5,%xmm5 + movdqa %xmm1,%xmm4 +.byte 0x67 + movdqa %xmm1,%xmm2 +.byte 0x67 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,112(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,128(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,144(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,160(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,176(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,192(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,208(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,224(%r10) + movdqa %xmm4,%xmm3 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,240(%r10) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,256(%r10) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,272(%r10) + movdqa %xmm4,%xmm2 + + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,288(%r10) + movdqa %xmm4,%xmm3 +.byte 0x67 + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,304(%r10) + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,320(%r10) + + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,336(%r10) + + pand 64(%rdi),%xmm0 + pand 80(%rdi),%xmm1 + pand 96(%rdi),%xmm2 + movdqa %xmm3,352(%r10) + pand 112(%rdi),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa -128(%rdi),%xmm4 + movdqa -112(%rdi),%xmm5 + movdqa -96(%rdi),%xmm2 + pand 112(%r10),%xmm4 + movdqa -80(%rdi),%xmm3 + pand 128(%r10),%xmm5 + por %xmm4,%xmm0 + pand 144(%r10),%xmm2 + por %xmm5,%xmm1 + pand 160(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa -64(%rdi),%xmm4 + movdqa -48(%rdi),%xmm5 + movdqa -32(%rdi),%xmm2 + pand 176(%r10),%xmm4 + movdqa -16(%rdi),%xmm3 + pand 192(%r10),%xmm5 + por %xmm4,%xmm0 + pand 208(%r10),%xmm2 + por %xmm5,%xmm1 + pand 224(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + movdqa 0(%rdi),%xmm4 + movdqa 16(%rdi),%xmm5 + movdqa 32(%rdi),%xmm2 + pand 240(%r10),%xmm4 + movdqa 48(%rdi),%xmm3 + pand 256(%r10),%xmm5 + por %xmm4,%xmm0 + pand 272(%r10),%xmm2 + por %xmm5,%xmm1 + pand 288(%r10),%xmm3 + por %xmm2,%xmm0 + por %xmm3,%xmm1 + pxor %xmm1,%xmm0 + pshufd $0x4e,%xmm0,%xmm1 + por %xmm1,%xmm0 + leaq 256(%rdi),%rdi +.byte 102,72,15,126,194 + leaq 64+32+8(%rsp),%rbx + + movq %rdx,%r9 + mulxq 0(%rsi),%r8,%rax + mulxq 8(%rsi),%r11,%r12 + addq %rax,%r11 + mulxq 16(%rsi),%rax,%r13 + adcq %rax,%r12 + adcq $0,%r13 + mulxq 24(%rsi),%rax,%r14 + + movq %r8,%r15 + imulq 32+8(%rsp),%r8 + xorq %rbp,%rbp + movq %r8,%rdx + + movq %rdi,8+8(%rsp) + + leaq 32(%rsi),%rsi + adcxq %rax,%r13 + adcxq %rbp,%r14 + + mulxq 0(%rcx),%rax,%r10 + adcxq %rax,%r15 + adoxq %r11,%r10 + mulxq 8(%rcx),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + mulxq 16(%rcx),%rax,%r12 + movq 24+8(%rsp),%rdi + movq %r10,-32(%rbx) + adcxq %rax,%r11 + adoxq %r13,%r12 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r11,-24(%rbx) + adcxq %rax,%r12 + adoxq %rbp,%r15 + leaq 32(%rcx),%rcx + movq %r12,-16(%rbx) + jmp .Lmulx4x_1st + +.align 32 +.Lmulx4x_1st: + adcxq %rbp,%r15 + mulxq 0(%rsi),%r10,%rax + adcxq %r14,%r10 + mulxq 8(%rsi),%r11,%r14 + adcxq %rax,%r11 + mulxq 16(%rsi),%r12,%rax + adcxq %r14,%r12 + mulxq 24(%rsi),%r13,%r14 +.byte 0x67,0x67 + movq %r8,%rdx + adcxq %rax,%r13 + adcxq %rbp,%r14 + leaq 32(%rsi),%rsi + leaq 32(%rbx),%rbx + + adoxq %r15,%r10 + mulxq 0(%rcx),%rax,%r15 + adcxq %rax,%r10 + adoxq %r15,%r11 + mulxq 8(%rcx),%rax,%r15 + adcxq %rax,%r11 + adoxq %r15,%r12 + mulxq 16(%rcx),%rax,%r15 + movq %r10,-40(%rbx) + adcxq %rax,%r12 + movq %r11,-32(%rbx) + adoxq %r15,%r13 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r12,-24(%rbx) + adcxq %rax,%r13 + adoxq %rbp,%r15 + leaq 32(%rcx),%rcx + movq %r13,-16(%rbx) + + decq %rdi + jnz .Lmulx4x_1st + + movq 8(%rsp),%rax + adcq %rbp,%r15 + leaq (%rsi,%rax,1),%rsi + addq %r15,%r14 + movq 8+8(%rsp),%rdi + adcq %rbp,%rbp + movq %r14,-8(%rbx) + jmp .Lmulx4x_outer + +.align 32 +.Lmulx4x_outer: + leaq 16-256(%rbx),%r10 + pxor %xmm4,%xmm4 +.byte 0x67,0x67 + pxor %xmm5,%xmm5 + movdqa -128(%rdi),%xmm0 + movdqa -112(%rdi),%xmm1 + movdqa -96(%rdi),%xmm2 + pand 256(%r10),%xmm0 + movdqa -80(%rdi),%xmm3 + pand 272(%r10),%xmm1 + por %xmm0,%xmm4 + pand 288(%r10),%xmm2 + por %xmm1,%xmm5 + pand 304(%r10),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa -64(%rdi),%xmm0 + movdqa -48(%rdi),%xmm1 + movdqa -32(%rdi),%xmm2 + pand 320(%r10),%xmm0 + movdqa -16(%rdi),%xmm3 + pand 336(%r10),%xmm1 + por %xmm0,%xmm4 + pand 352(%r10),%xmm2 + por %xmm1,%xmm5 + pand 368(%r10),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 0(%rdi),%xmm0 + movdqa 16(%rdi),%xmm1 + movdqa 32(%rdi),%xmm2 + pand 384(%r10),%xmm0 + movdqa 48(%rdi),%xmm3 + pand 400(%r10),%xmm1 + por %xmm0,%xmm4 + pand 416(%r10),%xmm2 + por %xmm1,%xmm5 + pand 432(%r10),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 64(%rdi),%xmm0 + movdqa 80(%rdi),%xmm1 + movdqa 96(%rdi),%xmm2 + pand 448(%r10),%xmm0 + movdqa 112(%rdi),%xmm3 + pand 464(%r10),%xmm1 + por %xmm0,%xmm4 + pand 480(%r10),%xmm2 + por %xmm1,%xmm5 + pand 496(%r10),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + por %xmm5,%xmm4 + pshufd $0x4e,%xmm4,%xmm0 + por %xmm4,%xmm0 + leaq 256(%rdi),%rdi +.byte 102,72,15,126,194 + + movq %rbp,(%rbx) + leaq 32(%rbx,%rax,1),%rbx + mulxq 0(%rsi),%r8,%r11 + xorq %rbp,%rbp + movq %rdx,%r9 + mulxq 8(%rsi),%r14,%r12 + adoxq -32(%rbx),%r8 + adcxq %r14,%r11 + mulxq 16(%rsi),%r15,%r13 + adoxq -24(%rbx),%r11 + adcxq %r15,%r12 + mulxq 24(%rsi),%rdx,%r14 + adoxq -16(%rbx),%r12 + adcxq %rdx,%r13 + leaq (%rcx,%rax,1),%rcx + leaq 32(%rsi),%rsi + adoxq -8(%rbx),%r13 + adcxq %rbp,%r14 + adoxq %rbp,%r14 + + movq %r8,%r15 + imulq 32+8(%rsp),%r8 + + movq %r8,%rdx + xorq %rbp,%rbp + movq %rdi,8+8(%rsp) + + mulxq 0(%rcx),%rax,%r10 + adcxq %rax,%r15 + adoxq %r11,%r10 + mulxq 8(%rcx),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + mulxq 16(%rcx),%rax,%r12 + adcxq %rax,%r11 + adoxq %r13,%r12 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq 24+8(%rsp),%rdi + movq %r10,-32(%rbx) + adcxq %rax,%r12 + movq %r11,-24(%rbx) + adoxq %rbp,%r15 + movq %r12,-16(%rbx) + leaq 32(%rcx),%rcx + jmp .Lmulx4x_inner + +.align 32 +.Lmulx4x_inner: + mulxq 0(%rsi),%r10,%rax + adcxq %rbp,%r15 + adoxq %r14,%r10 + mulxq 8(%rsi),%r11,%r14 + adcxq 0(%rbx),%r10 + adoxq %rax,%r11 + mulxq 16(%rsi),%r12,%rax + adcxq 8(%rbx),%r11 + adoxq %r14,%r12 + mulxq 24(%rsi),%r13,%r14 + movq %r8,%rdx + adcxq 16(%rbx),%r12 + adoxq %rax,%r13 + adcxq 24(%rbx),%r13 + adoxq %rbp,%r14 + leaq 32(%rsi),%rsi + leaq 32(%rbx),%rbx + adcxq %rbp,%r14 + + adoxq %r15,%r10 + mulxq 0(%rcx),%rax,%r15 + adcxq %rax,%r10 + adoxq %r15,%r11 + mulxq 8(%rcx),%rax,%r15 + adcxq %rax,%r11 + adoxq %r15,%r12 + mulxq 16(%rcx),%rax,%r15 + movq %r10,-40(%rbx) + adcxq %rax,%r12 + adoxq %r15,%r13 + movq %r11,-32(%rbx) + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + leaq 32(%rcx),%rcx + movq %r12,-24(%rbx) + adcxq %rax,%r13 + adoxq %rbp,%r15 + movq %r13,-16(%rbx) + + decq %rdi + jnz .Lmulx4x_inner + + movq 0+8(%rsp),%rax + adcq %rbp,%r15 + subq 0(%rbx),%rdi + movq 8+8(%rsp),%rdi + movq 16+8(%rsp),%r10 + adcq %r15,%r14 + leaq (%rsi,%rax,1),%rsi + adcq %rbp,%rbp + movq %r14,-8(%rbx) + + cmpq %r10,%rdi + jb .Lmulx4x_outer + + movq -8(%rcx),%r10 + movq %rbp,%r8 + movq (%rcx,%rax,1),%r12 + leaq (%rcx,%rax,1),%rbp + movq %rax,%rcx + leaq (%rbx,%rax,1),%rdi + xorl %eax,%eax + xorq %r15,%r15 + subq %r14,%r10 + adcq %r15,%r15 + orq %r15,%r8 + sarq $3+2,%rcx + subq %r8,%rax + movq 56+8(%rsp),%rdx + decq %r12 + movq 8(%rbp),%r13 + xorq %r8,%r8 + movq 16(%rbp),%r14 + movq 24(%rbp),%r15 + jmp .Lsqrx4x_sub_entry +.cfi_endproc +.size mulx4x_internal,.-mulx4x_internal +.type bn_powerx5,@function +.align 32 +bn_powerx5: +.cfi_startproc + movq %rsp,%rax +.cfi_def_cfa_register %rax +.Lpowerx5_enter: + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 +.Lpowerx5_prologue: + + shll $3,%r9d + leaq (%r9,%r9,2),%r10 + negq %r9 + movq (%r8),%r8 + + + + + + + + + leaq -320(%rsp,%r9,2),%r11 + movq %rsp,%rbp + subq %rdi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lpwrx_sp_alt + subq %r11,%rbp + leaq -320(%rbp,%r9,2),%rbp + jmp .Lpwrx_sp_done + +.align 32 +.Lpwrx_sp_alt: + leaq 4096-320(,%r9,2),%r10 + leaq -320(%rbp,%r9,2),%rbp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rbp +.Lpwrx_sp_done: + andq $-64,%rbp + movq %rsp,%r11 + subq %rbp,%r11 + andq $-4096,%r11 + leaq (%r11,%rbp,1),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lpwrx_page_walk + jmp .Lpwrx_page_walk_done + +.Lpwrx_page_walk: + leaq -4096(%rsp),%rsp + movq (%rsp),%r10 + cmpq %rbp,%rsp + ja .Lpwrx_page_walk +.Lpwrx_page_walk_done: + + movq %r9,%r10 + negq %r9 + + + + + + + + + + + + + pxor %xmm0,%xmm0 +.byte 102,72,15,110,207 +.byte 102,72,15,110,209 +.byte 102,73,15,110,218 +.byte 102,72,15,110,226 + movq %r8,32(%rsp) + movq %rax,40(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 +.Lpowerx5_body: + + call __bn_sqrx8x_internal + call __bn_postx4x_internal + call __bn_sqrx8x_internal + call __bn_postx4x_internal + call __bn_sqrx8x_internal + call __bn_postx4x_internal + call __bn_sqrx8x_internal + call __bn_postx4x_internal + call __bn_sqrx8x_internal + call __bn_postx4x_internal + + movq %r10,%r9 + movq %rsi,%rdi +.byte 102,72,15,126,209 +.byte 102,72,15,126,226 + movq 40(%rsp),%rax + + call mulx4x_internal + + movq 40(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq $1,%rax + + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lpowerx5_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size bn_powerx5,.-bn_powerx5 + +.globl bn_sqrx8x_internal +.hidden bn_sqrx8x_internal +.type bn_sqrx8x_internal,@function +.align 32 +bn_sqrx8x_internal: +__bn_sqrx8x_internal: +.cfi_startproc + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + leaq 48+8(%rsp),%rdi + leaq (%rsi,%r9,1),%rbp + movq %r9,0+8(%rsp) + movq %rbp,8+8(%rsp) + jmp .Lsqr8x_zero_start + +.align 32 +.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 +.Lsqrx8x_zero: +.byte 0x3e + movdqa %xmm0,0(%rdi) + movdqa %xmm0,16(%rdi) + movdqa %xmm0,32(%rdi) + movdqa %xmm0,48(%rdi) +.Lsqr8x_zero_start: + movdqa %xmm0,64(%rdi) + movdqa %xmm0,80(%rdi) + movdqa %xmm0,96(%rdi) + movdqa %xmm0,112(%rdi) + leaq 128(%rdi),%rdi + subq $64,%r9 + jnz .Lsqrx8x_zero + + movq 0(%rsi),%rdx + + xorq %r10,%r10 + xorq %r11,%r11 + xorq %r12,%r12 + xorq %r13,%r13 + xorq %r14,%r14 + xorq %r15,%r15 + leaq 48+8(%rsp),%rdi + xorq %rbp,%rbp + jmp .Lsqrx8x_outer_loop + +.align 32 +.Lsqrx8x_outer_loop: + mulxq 8(%rsi),%r8,%rax + adcxq %r9,%r8 + adoxq %rax,%r10 + mulxq 16(%rsi),%r9,%rax + adcxq %r10,%r9 + adoxq %rax,%r11 +.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 + adcxq %r11,%r10 + adoxq %rax,%r12 +.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 + adcxq %r12,%r11 + adoxq %rax,%r13 + mulxq 40(%rsi),%r12,%rax + adcxq %r13,%r12 + adoxq %rax,%r14 + mulxq 48(%rsi),%r13,%rax + adcxq %r14,%r13 + adoxq %r15,%rax + mulxq 56(%rsi),%r14,%r15 + movq 8(%rsi),%rdx + adcxq %rax,%r14 + adoxq %rbp,%r15 + adcq 64(%rdi),%r15 + movq %r8,8(%rdi) + movq %r9,16(%rdi) + sbbq %rcx,%rcx + xorq %rbp,%rbp + + + mulxq 16(%rsi),%r8,%rbx + mulxq 24(%rsi),%r9,%rax + adcxq %r10,%r8 + adoxq %rbx,%r9 + mulxq 32(%rsi),%r10,%rbx + adcxq %r11,%r9 + adoxq %rax,%r10 +.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 + adcxq %r12,%r10 + adoxq %rbx,%r11 +.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 + adcxq %r13,%r11 + adoxq %r14,%r12 +.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 + movq 16(%rsi),%rdx + adcxq %rax,%r12 + adoxq %rbx,%r13 + adcxq %r15,%r13 + adoxq %rbp,%r14 + adcxq %rbp,%r14 + + movq %r8,24(%rdi) + movq %r9,32(%rdi) + + mulxq 24(%rsi),%r8,%rbx + mulxq 32(%rsi),%r9,%rax + adcxq %r10,%r8 + adoxq %rbx,%r9 + mulxq 40(%rsi),%r10,%rbx + adcxq %r11,%r9 + adoxq %rax,%r10 +.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 + adcxq %r12,%r10 + adoxq %r13,%r11 +.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 +.byte 0x3e + movq 24(%rsi),%rdx + adcxq %rbx,%r11 + adoxq %rax,%r12 + adcxq %r14,%r12 + movq %r8,40(%rdi) + movq %r9,48(%rdi) + mulxq 32(%rsi),%r8,%rax + adoxq %rbp,%r13 + adcxq %rbp,%r13 + + mulxq 40(%rsi),%r9,%rbx + adcxq %r10,%r8 + adoxq %rax,%r9 + mulxq 48(%rsi),%r10,%rax + adcxq %r11,%r9 + adoxq %r12,%r10 + mulxq 56(%rsi),%r11,%r12 + movq 32(%rsi),%rdx + movq 40(%rsi),%r14 + adcxq %rbx,%r10 + adoxq %rax,%r11 + movq 48(%rsi),%r15 + adcxq %r13,%r11 + adoxq %rbp,%r12 + adcxq %rbp,%r12 + + movq %r8,56(%rdi) + movq %r9,64(%rdi) + + mulxq %r14,%r9,%rax + movq 56(%rsi),%r8 + adcxq %r10,%r9 + mulxq %r15,%r10,%rbx + adoxq %rax,%r10 + adcxq %r11,%r10 + mulxq %r8,%r11,%rax + movq %r14,%rdx + adoxq %rbx,%r11 + adcxq %r12,%r11 + + adcxq %rbp,%rax + + mulxq %r15,%r14,%rbx + mulxq %r8,%r12,%r13 + movq %r15,%rdx + leaq 64(%rsi),%rsi + adcxq %r14,%r11 + adoxq %rbx,%r12 + adcxq %rax,%r12 + adoxq %rbp,%r13 + +.byte 0x67,0x67 + mulxq %r8,%r8,%r14 + adcxq %r8,%r13 + adcxq %rbp,%r14 + + cmpq 8+8(%rsp),%rsi + je .Lsqrx8x_outer_break + + negq %rcx + movq $-8,%rcx + movq %rbp,%r15 + movq 64(%rdi),%r8 + adcxq 72(%rdi),%r9 + adcxq 80(%rdi),%r10 + adcxq 88(%rdi),%r11 + adcq 96(%rdi),%r12 + adcq 104(%rdi),%r13 + adcq 112(%rdi),%r14 + adcq 120(%rdi),%r15 + leaq (%rsi),%rbp + leaq 128(%rdi),%rdi + sbbq %rax,%rax + + movq -64(%rsi),%rdx + movq %rax,16+8(%rsp) + movq %rdi,24+8(%rsp) + + + xorl %eax,%eax + jmp .Lsqrx8x_loop + +.align 32 +.Lsqrx8x_loop: + movq %r8,%rbx + mulxq 0(%rbp),%rax,%r8 + adcxq %rax,%rbx + adoxq %r9,%r8 + + mulxq 8(%rbp),%rax,%r9 + adcxq %rax,%r8 + adoxq %r10,%r9 + + mulxq 16(%rbp),%rax,%r10 + adcxq %rax,%r9 + adoxq %r11,%r10 + + mulxq 24(%rbp),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + +.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 + adcxq %rax,%r11 + adoxq %r13,%r12 + + mulxq 40(%rbp),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + + mulxq 48(%rbp),%rax,%r14 + movq %rbx,(%rdi,%rcx,8) + movl $0,%ebx + adcxq %rax,%r13 + adoxq %r15,%r14 + +.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 + movq 8(%rsi,%rcx,8),%rdx + adcxq %rax,%r14 + adoxq %rbx,%r15 + adcxq %rbx,%r15 + +.byte 0x67 + incq %rcx + jnz .Lsqrx8x_loop + + leaq 64(%rbp),%rbp + movq $-8,%rcx + cmpq 8+8(%rsp),%rbp + je .Lsqrx8x_break + + subq 16+8(%rsp),%rbx +.byte 0x66 + movq -64(%rsi),%rdx + adcxq 0(%rdi),%r8 + adcxq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + leaq 64(%rdi),%rdi +.byte 0x67 + sbbq %rax,%rax + xorl %ebx,%ebx + movq %rax,16+8(%rsp) + jmp .Lsqrx8x_loop + +.align 32 +.Lsqrx8x_break: + xorq %rbp,%rbp + subq 16+8(%rsp),%rbx + adcxq %rbp,%r8 + movq 24+8(%rsp),%rcx + adcxq %rbp,%r9 + movq 0(%rsi),%rdx + adcq $0,%r10 + movq %r8,0(%rdi) + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + cmpq %rcx,%rdi + je .Lsqrx8x_outer_loop + + movq %r9,8(%rdi) + movq 8(%rcx),%r9 + movq %r10,16(%rdi) + movq 16(%rcx),%r10 + movq %r11,24(%rdi) + movq 24(%rcx),%r11 + movq %r12,32(%rdi) + movq 32(%rcx),%r12 + movq %r13,40(%rdi) + movq 40(%rcx),%r13 + movq %r14,48(%rdi) + movq 48(%rcx),%r14 + movq %r15,56(%rdi) + movq 56(%rcx),%r15 + movq %rcx,%rdi + jmp .Lsqrx8x_outer_loop + +.align 32 +.Lsqrx8x_outer_break: + movq %r9,72(%rdi) +.byte 102,72,15,126,217 + movq %r10,80(%rdi) + movq %r11,88(%rdi) + movq %r12,96(%rdi) + movq %r13,104(%rdi) + movq %r14,112(%rdi) + leaq 48+8(%rsp),%rdi + movq (%rsi,%rcx,1),%rdx + + movq 8(%rdi),%r11 + xorq %r10,%r10 + movq 0+8(%rsp),%r9 + adoxq %r11,%r11 + movq 16(%rdi),%r12 + movq 24(%rdi),%r13 + + +.align 32 +.Lsqrx4x_shift_n_add: + mulxq %rdx,%rax,%rbx + adoxq %r12,%r12 + adcxq %r10,%rax +.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 +.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 + adoxq %r13,%r13 + adcxq %r11,%rbx + movq 40(%rdi),%r11 + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + + mulxq %rdx,%rax,%rbx + adoxq %r10,%r10 + adcxq %r12,%rax + movq 16(%rsi,%rcx,1),%rdx + movq 48(%rdi),%r12 + adoxq %r11,%r11 + adcxq %r13,%rbx + movq 56(%rdi),%r13 + movq %rax,16(%rdi) + movq %rbx,24(%rdi) + + mulxq %rdx,%rax,%rbx + adoxq %r12,%r12 + adcxq %r10,%rax + movq 24(%rsi,%rcx,1),%rdx + leaq 32(%rcx),%rcx + movq 64(%rdi),%r10 + adoxq %r13,%r13 + adcxq %r11,%rbx + movq 72(%rdi),%r11 + movq %rax,32(%rdi) + movq %rbx,40(%rdi) + + mulxq %rdx,%rax,%rbx + adoxq %r10,%r10 + adcxq %r12,%rax + jrcxz .Lsqrx4x_shift_n_add_break +.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 + adoxq %r11,%r11 + adcxq %r13,%rbx + movq 80(%rdi),%r12 + movq 88(%rdi),%r13 + movq %rax,48(%rdi) + movq %rbx,56(%rdi) + leaq 64(%rdi),%rdi + nop + jmp .Lsqrx4x_shift_n_add + +.align 32 +.Lsqrx4x_shift_n_add_break: + adcxq %r13,%rbx + movq %rax,48(%rdi) + movq %rbx,56(%rdi) + leaq 64(%rdi),%rdi +.byte 102,72,15,126,213 +__bn_sqrx8x_reduction: + xorl %eax,%eax + movq 32+8(%rsp),%rbx + movq 48+8(%rsp),%rdx + leaq -64(%rbp,%r9,1),%rcx + + movq %rcx,0+8(%rsp) + movq %rdi,8+8(%rsp) + + leaq 48+8(%rsp),%rdi + jmp .Lsqrx8x_reduction_loop + +.align 32 +.Lsqrx8x_reduction_loop: + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq %rdx,%r8 + imulq %rbx,%rdx + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + movq %rax,24+8(%rsp) + + leaq 64(%rdi),%rdi + xorq %rsi,%rsi + movq $-8,%rcx + jmp .Lsqrx8x_reduce + +.align 32 +.Lsqrx8x_reduce: + movq %r8,%rbx + mulxq 0(%rbp),%rax,%r8 + adcxq %rbx,%rax + adoxq %r9,%r8 + + mulxq 8(%rbp),%rbx,%r9 + adcxq %rbx,%r8 + adoxq %r10,%r9 + + mulxq 16(%rbp),%rbx,%r10 + adcxq %rbx,%r9 + adoxq %r11,%r10 + + mulxq 24(%rbp),%rbx,%r11 + adcxq %rbx,%r10 + adoxq %r12,%r11 + +.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 + movq %rdx,%rax + movq %r8,%rdx + adcxq %rbx,%r11 + adoxq %r13,%r12 + + mulxq 32+8(%rsp),%rbx,%rdx + movq %rax,%rdx + movq %rax,64+48+8(%rsp,%rcx,8) + + mulxq 40(%rbp),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + + mulxq 48(%rbp),%rax,%r14 + adcxq %rax,%r13 + adoxq %r15,%r14 + + mulxq 56(%rbp),%rax,%r15 + movq %rbx,%rdx + adcxq %rax,%r14 + adoxq %rsi,%r15 + adcxq %rsi,%r15 + +.byte 0x67,0x67,0x67 + incq %rcx + jnz .Lsqrx8x_reduce + + movq %rsi,%rax + cmpq 0+8(%rsp),%rbp + jae .Lsqrx8x_no_tail + + movq 48+8(%rsp),%rdx + addq 0(%rdi),%r8 + leaq 64(%rbp),%rbp + movq $-8,%rcx + adcxq 8(%rdi),%r9 + adcxq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + leaq 64(%rdi),%rdi + sbbq %rax,%rax + + xorq %rsi,%rsi + movq %rax,16+8(%rsp) + jmp .Lsqrx8x_tail + +.align 32 +.Lsqrx8x_tail: + movq %r8,%rbx + mulxq 0(%rbp),%rax,%r8 + adcxq %rax,%rbx + adoxq %r9,%r8 + + mulxq 8(%rbp),%rax,%r9 + adcxq %rax,%r8 + adoxq %r10,%r9 + + mulxq 16(%rbp),%rax,%r10 + adcxq %rax,%r9 + adoxq %r11,%r10 + + mulxq 24(%rbp),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + +.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 + adcxq %rax,%r11 + adoxq %r13,%r12 + + mulxq 40(%rbp),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + + mulxq 48(%rbp),%rax,%r14 + adcxq %rax,%r13 + adoxq %r15,%r14 + + mulxq 56(%rbp),%rax,%r15 + movq 72+48+8(%rsp,%rcx,8),%rdx + adcxq %rax,%r14 + adoxq %rsi,%r15 + movq %rbx,(%rdi,%rcx,8) + movq %r8,%rbx + adcxq %rsi,%r15 + + incq %rcx + jnz .Lsqrx8x_tail + + cmpq 0+8(%rsp),%rbp + jae .Lsqrx8x_tail_done + + subq 16+8(%rsp),%rsi + movq 48+8(%rsp),%rdx + leaq 64(%rbp),%rbp + adcq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + leaq 64(%rdi),%rdi + sbbq %rax,%rax + subq $8,%rcx + + xorq %rsi,%rsi + movq %rax,16+8(%rsp) + jmp .Lsqrx8x_tail + +.align 32 +.Lsqrx8x_tail_done: + xorq %rax,%rax + addq 24+8(%rsp),%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + adcq $0,%r12 + adcq $0,%r13 + adcq $0,%r14 + adcq $0,%r15 + adcq $0,%rax + + subq 16+8(%rsp),%rsi +.Lsqrx8x_no_tail: + adcq 0(%rdi),%r8 +.byte 102,72,15,126,217 + adcq 8(%rdi),%r9 + movq 56(%rbp),%rsi +.byte 102,72,15,126,213 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + adcq $0,%rax + + movq 32+8(%rsp),%rbx + movq 64(%rdi,%rcx,1),%rdx + + movq %r8,0(%rdi) + leaq 64(%rdi),%r8 + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + leaq 64(%rdi,%rcx,1),%rdi + cmpq 8+8(%rsp),%r8 + jb .Lsqrx8x_reduction_loop + .byte 0xf3,0xc3 +.cfi_endproc +.size bn_sqrx8x_internal,.-bn_sqrx8x_internal +.align 32 +__bn_postx4x_internal: +.cfi_startproc + movq 0(%rbp),%r12 + movq %rcx,%r10 + movq %rcx,%r9 + negq %rax + sarq $3+2,%rcx + +.byte 102,72,15,126,202 +.byte 102,72,15,126,206 + decq %r12 + movq 8(%rbp),%r13 + xorq %r8,%r8 + movq 16(%rbp),%r14 + movq 24(%rbp),%r15 + jmp .Lsqrx4x_sub_entry + +.align 16 +.Lsqrx4x_sub: + movq 0(%rbp),%r12 + movq 8(%rbp),%r13 + movq 16(%rbp),%r14 + movq 24(%rbp),%r15 +.Lsqrx4x_sub_entry: + andnq %rax,%r12,%r12 + leaq 32(%rbp),%rbp + andnq %rax,%r13,%r13 + andnq %rax,%r14,%r14 + andnq %rax,%r15,%r15 + + negq %r8 + adcq 0(%rdi),%r12 + adcq 8(%rdi),%r13 + adcq 16(%rdi),%r14 + adcq 24(%rdi),%r15 + movq %r12,0(%rdx) + leaq 32(%rdi),%rdi + movq %r13,8(%rdx) + sbbq %r8,%r8 + movq %r14,16(%rdx) + movq %r15,24(%rdx) + leaq 32(%rdx),%rdx + + incq %rcx + jnz .Lsqrx4x_sub + + negq %r9 + + .byte 0xf3,0xc3 +.cfi_endproc +.size __bn_postx4x_internal,.-__bn_postx4x_internal +.globl bn_get_bits5 +.type bn_get_bits5,@function +.align 16 +bn_get_bits5: +.cfi_startproc + leaq 0(%rdi),%r10 + leaq 1(%rdi),%r11 + movl %esi,%ecx + shrl $4,%esi + andl $15,%ecx + leal -8(%rcx),%eax + cmpl $11,%ecx + cmovaq %r11,%r10 + cmoval %eax,%ecx + movzwl (%r10,%rsi,2),%eax + shrl %cl,%eax + andl $31,%eax + .byte 0xf3,0xc3 +.cfi_endproc +.size bn_get_bits5,.-bn_get_bits5 + +.globl bn_scatter5 +.type bn_scatter5,@function +.align 16 +bn_scatter5: +.cfi_startproc + cmpl $0,%esi + jz .Lscatter_epilogue + leaq (%rdx,%rcx,8),%rdx +.Lscatter: + movq (%rdi),%rax + leaq 8(%rdi),%rdi + movq %rax,(%rdx) + leaq 256(%rdx),%rdx + subl $1,%esi + jnz .Lscatter +.Lscatter_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size bn_scatter5,.-bn_scatter5 + +.globl bn_gather5 +.type bn_gather5,@function +.align 32 +bn_gather5: +.LSEH_begin_bn_gather5: +.cfi_startproc + +.byte 0x4c,0x8d,0x14,0x24 +.byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 + leaq .Linc(%rip),%rax + andq $-16,%rsp + + movd %ecx,%xmm5 + movdqa 0(%rax),%xmm0 + movdqa 16(%rax),%xmm1 + leaq 128(%rdx),%r11 + leaq 128(%rsp),%rax + + pshufd $0,%xmm5,%xmm5 + movdqa %xmm1,%xmm4 + movdqa %xmm1,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm4,%xmm3 + + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,-128(%rax) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,-112(%rax) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,-96(%rax) + movdqa %xmm4,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,-80(%rax) + movdqa %xmm4,%xmm3 + + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,-64(%rax) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,-48(%rax) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,-32(%rax) + movdqa %xmm4,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,-16(%rax) + movdqa %xmm4,%xmm3 + + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,0(%rax) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,16(%rax) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,32(%rax) + movdqa %xmm4,%xmm2 + paddd %xmm0,%xmm1 + pcmpeqd %xmm5,%xmm0 + movdqa %xmm3,48(%rax) + movdqa %xmm4,%xmm3 + + paddd %xmm1,%xmm2 + pcmpeqd %xmm5,%xmm1 + movdqa %xmm0,64(%rax) + movdqa %xmm4,%xmm0 + + paddd %xmm2,%xmm3 + pcmpeqd %xmm5,%xmm2 + movdqa %xmm1,80(%rax) + movdqa %xmm4,%xmm1 + + paddd %xmm3,%xmm0 + pcmpeqd %xmm5,%xmm3 + movdqa %xmm2,96(%rax) + movdqa %xmm4,%xmm2 + movdqa %xmm3,112(%rax) + jmp .Lgather + +.align 32 +.Lgather: + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + movdqa -128(%r11),%xmm0 + movdqa -112(%r11),%xmm1 + movdqa -96(%r11),%xmm2 + pand -128(%rax),%xmm0 + movdqa -80(%r11),%xmm3 + pand -112(%rax),%xmm1 + por %xmm0,%xmm4 + pand -96(%rax),%xmm2 + por %xmm1,%xmm5 + pand -80(%rax),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa -64(%r11),%xmm0 + movdqa -48(%r11),%xmm1 + movdqa -32(%r11),%xmm2 + pand -64(%rax),%xmm0 + movdqa -16(%r11),%xmm3 + pand -48(%rax),%xmm1 + por %xmm0,%xmm4 + pand -32(%rax),%xmm2 + por %xmm1,%xmm5 + pand -16(%rax),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 0(%r11),%xmm0 + movdqa 16(%r11),%xmm1 + movdqa 32(%r11),%xmm2 + pand 0(%rax),%xmm0 + movdqa 48(%r11),%xmm3 + pand 16(%rax),%xmm1 + por %xmm0,%xmm4 + pand 32(%rax),%xmm2 + por %xmm1,%xmm5 + pand 48(%rax),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + movdqa 64(%r11),%xmm0 + movdqa 80(%r11),%xmm1 + movdqa 96(%r11),%xmm2 + pand 64(%rax),%xmm0 + movdqa 112(%r11),%xmm3 + pand 80(%rax),%xmm1 + por %xmm0,%xmm4 + pand 96(%rax),%xmm2 + por %xmm1,%xmm5 + pand 112(%rax),%xmm3 + por %xmm2,%xmm4 + por %xmm3,%xmm5 + por %xmm5,%xmm4 + leaq 256(%r11),%r11 + pshufd $0x4e,%xmm4,%xmm0 + por %xmm4,%xmm0 + movq %xmm0,(%rdi) + leaq 8(%rdi),%rdi + subl $1,%esi + jnz .Lgather + + leaq (%r10),%rsp + .byte 0xf3,0xc3 +.LSEH_end_bn_gather5: +.cfi_endproc +.size bn_gather5,.-bn_gather5 +.section .rodata +.align 64 +.Linc: +.long 0,0, 1,1 +.long 2,2, 2,2 +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.previous diff --git a/contrib/openssl-cmake/asm/crypto/camellia/cmll-x86_64.s b/contrib/openssl-cmake/asm/crypto/camellia/cmll-x86_64.s new file mode 100644 index 000000000000..c845f97c7b0e --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/camellia/cmll-x86_64.s @@ -0,0 +1,1927 @@ +.text + + +.globl Camellia_EncryptBlock +.type Camellia_EncryptBlock,@function +.align 16 +Camellia_EncryptBlock: +.cfi_startproc + movl $128,%eax + subl %edi,%eax + movl $3,%edi + adcl $0,%edi + jmp .Lenc_rounds +.cfi_endproc +.size Camellia_EncryptBlock,.-Camellia_EncryptBlock + +.globl Camellia_EncryptBlock_Rounds +.type Camellia_EncryptBlock_Rounds,@function +.align 16 +.Lenc_rounds: +Camellia_EncryptBlock_Rounds: +.cfi_startproc + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-32 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-40 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-48 +.Lenc_prologue: + + + movq %rcx,%r13 + movq %rdx,%r14 + + shll $6,%edi + leaq .LCamellia_SBOX(%rip),%rbp + leaq (%r14,%rdi,1),%r15 + + movl 0(%rsi),%r8d + movl 4(%rsi),%r9d + movl 8(%rsi),%r10d + bswapl %r8d + movl 12(%rsi),%r11d + bswapl %r9d + bswapl %r10d + bswapl %r11d + + call _x86_64_Camellia_encrypt + + bswapl %r8d + bswapl %r9d + bswapl %r10d + movl %r8d,0(%r13) + bswapl %r11d + movl %r9d,4(%r13) + movl %r10d,8(%r13) + movl %r11d,12(%r13) + + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%rbp +.cfi_restore %rbp + movq 32(%rsp),%rbx +.cfi_restore %rbx + leaq 40(%rsp),%rsp +.cfi_adjust_cfa_offset -40 +.Lenc_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size Camellia_EncryptBlock_Rounds,.-Camellia_EncryptBlock_Rounds + +.type _x86_64_Camellia_encrypt,@function +.align 16 +_x86_64_Camellia_encrypt: +.cfi_startproc + xorl 0(%r14),%r9d + xorl 4(%r14),%r8d + xorl 8(%r14),%r11d + xorl 12(%r14),%r10d +.align 16 +.Leloop: + movl 16(%r14),%ebx + movl 20(%r14),%eax + + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 24(%r14),%ebx + movl 28(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 32(%r14),%ebx + movl 36(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 40(%r14),%ebx + movl 44(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 48(%r14),%ebx + movl 52(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 56(%r14),%ebx + movl 60(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 64(%r14),%ebx + movl 68(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + leaq 64(%r14),%r14 + cmpq %r15,%r14 + movl 8(%r14),%edx + movl 12(%r14),%ecx + je .Ledone + + andl %r8d,%eax + orl %r11d,%edx + roll $1,%eax + xorl %edx,%r10d + xorl %eax,%r9d + andl %r10d,%ecx + orl %r9d,%ebx + roll $1,%ecx + xorl %ebx,%r8d + xorl %ecx,%r11d + jmp .Leloop + +.align 16 +.Ledone: + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r8d,%ecx + xorl %r9d,%edx + + movl %eax,%r8d + movl %ebx,%r9d + movl %ecx,%r10d + movl %edx,%r11d + +.byte 0xf3,0xc3 +.cfi_endproc +.size _x86_64_Camellia_encrypt,.-_x86_64_Camellia_encrypt + + +.globl Camellia_DecryptBlock +.type Camellia_DecryptBlock,@function +.align 16 +Camellia_DecryptBlock: +.cfi_startproc + movl $128,%eax + subl %edi,%eax + movl $3,%edi + adcl $0,%edi + jmp .Ldec_rounds +.cfi_endproc +.size Camellia_DecryptBlock,.-Camellia_DecryptBlock + +.globl Camellia_DecryptBlock_Rounds +.type Camellia_DecryptBlock_Rounds,@function +.align 16 +.Ldec_rounds: +Camellia_DecryptBlock_Rounds: +.cfi_startproc + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-32 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-40 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-48 +.Ldec_prologue: + + + movq %rcx,%r13 + movq %rdx,%r15 + + shll $6,%edi + leaq .LCamellia_SBOX(%rip),%rbp + leaq (%r15,%rdi,1),%r14 + + movl 0(%rsi),%r8d + movl 4(%rsi),%r9d + movl 8(%rsi),%r10d + bswapl %r8d + movl 12(%rsi),%r11d + bswapl %r9d + bswapl %r10d + bswapl %r11d + + call _x86_64_Camellia_decrypt + + bswapl %r8d + bswapl %r9d + bswapl %r10d + movl %r8d,0(%r13) + bswapl %r11d + movl %r9d,4(%r13) + movl %r10d,8(%r13) + movl %r11d,12(%r13) + + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%rbp +.cfi_restore %rbp + movq 32(%rsp),%rbx +.cfi_restore %rbx + leaq 40(%rsp),%rsp +.cfi_adjust_cfa_offset -40 +.Ldec_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size Camellia_DecryptBlock_Rounds,.-Camellia_DecryptBlock_Rounds + +.type _x86_64_Camellia_decrypt,@function +.align 16 +_x86_64_Camellia_decrypt: +.cfi_startproc + xorl 0(%r14),%r9d + xorl 4(%r14),%r8d + xorl 8(%r14),%r11d + xorl 12(%r14),%r10d +.align 16 +.Ldloop: + movl -8(%r14),%ebx + movl -4(%r14),%eax + + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl -16(%r14),%ebx + movl -12(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl -24(%r14),%ebx + movl -20(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl -32(%r14),%ebx + movl -28(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl -40(%r14),%ebx + movl -36(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl -48(%r14),%ebx + movl -44(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl -56(%r14),%ebx + movl -52(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + leaq -64(%r14),%r14 + cmpq %r15,%r14 + movl 0(%r14),%edx + movl 4(%r14),%ecx + je .Lddone + + andl %r8d,%eax + orl %r11d,%edx + roll $1,%eax + xorl %edx,%r10d + xorl %eax,%r9d + andl %r10d,%ecx + orl %r9d,%ebx + roll $1,%ecx + xorl %ebx,%r8d + xorl %ecx,%r11d + + jmp .Ldloop + +.align 16 +.Lddone: + xorl %r10d,%ecx + xorl %r11d,%edx + xorl %r8d,%eax + xorl %r9d,%ebx + + movl %ecx,%r8d + movl %edx,%r9d + movl %eax,%r10d + movl %ebx,%r11d + +.byte 0xf3,0xc3 +.cfi_endproc +.size _x86_64_Camellia_decrypt,.-_x86_64_Camellia_decrypt +.globl Camellia_Ekeygen +.type Camellia_Ekeygen,@function +.align 16 +Camellia_Ekeygen: +.cfi_startproc + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-32 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-40 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-48 +.Lkey_prologue: + + movl %edi,%r15d + movq %rdx,%r13 + + movl 0(%rsi),%r8d + movl 4(%rsi),%r9d + movl 8(%rsi),%r10d + movl 12(%rsi),%r11d + + bswapl %r8d + bswapl %r9d + bswapl %r10d + bswapl %r11d + movl %r9d,0(%r13) + movl %r8d,4(%r13) + movl %r11d,8(%r13) + movl %r10d,12(%r13) + cmpq $128,%r15 + je .L1st128 + + movl 16(%rsi),%r8d + movl 20(%rsi),%r9d + cmpq $192,%r15 + je .L1st192 + movl 24(%rsi),%r10d + movl 28(%rsi),%r11d + jmp .L1st256 +.L1st192: + movl %r8d,%r10d + movl %r9d,%r11d + notl %r10d + notl %r11d +.L1st256: + bswapl %r8d + bswapl %r9d + bswapl %r10d + bswapl %r11d + movl %r9d,32(%r13) + movl %r8d,36(%r13) + movl %r11d,40(%r13) + movl %r10d,44(%r13) + xorl 0(%r13),%r9d + xorl 4(%r13),%r8d + xorl 8(%r13),%r11d + xorl 12(%r13),%r10d + +.L1st128: + leaq .LCamellia_SIGMA(%rip),%r14 + leaq .LCamellia_SBOX(%rip),%rbp + + movl 0(%r14),%ebx + movl 4(%r14),%eax + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 8(%r14),%ebx + movl 12(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 16(%r14),%ebx + movl 20(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + xorl 0(%r13),%r9d + xorl 4(%r13),%r8d + xorl 8(%r13),%r11d + xorl 12(%r13),%r10d + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 24(%r14),%ebx + movl 28(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 32(%r14),%ebx + movl 36(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + cmpq $128,%r15 + jne .L2nd256 + + leaq 128(%r13),%r13 + shlq $32,%r8 + shlq $32,%r10 + orq %r9,%r8 + orq %r11,%r10 + movq -128(%r13),%rax + movq -120(%r13),%rbx + movq %r8,-112(%r13) + movq %r10,-104(%r13) + movq %rax,%r11 + shlq $15,%rax + movq %rbx,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%rax + shlq $15,%rbx + orq %r11,%rbx + movq %rax,-96(%r13) + movq %rbx,-88(%r13) + movq %r8,%r11 + shlq $15,%r8 + movq %r10,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%r8 + shlq $15,%r10 + orq %r11,%r10 + movq %r8,-80(%r13) + movq %r10,-72(%r13) + movq %r8,%r11 + shlq $15,%r8 + movq %r10,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%r8 + shlq $15,%r10 + orq %r11,%r10 + movq %r8,-64(%r13) + movq %r10,-56(%r13) + movq %rax,%r11 + shlq $30,%rax + movq %rbx,%r9 + shrq $34,%r9 + shrq $34,%r11 + orq %r9,%rax + shlq $30,%rbx + orq %r11,%rbx + movq %rax,-48(%r13) + movq %rbx,-40(%r13) + movq %r8,%r11 + shlq $15,%r8 + movq %r10,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%r8 + shlq $15,%r10 + orq %r11,%r10 + movq %r8,-32(%r13) + movq %rax,%r11 + shlq $15,%rax + movq %rbx,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%rax + shlq $15,%rbx + orq %r11,%rbx + movq %rbx,-24(%r13) + movq %r8,%r11 + shlq $15,%r8 + movq %r10,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%r8 + shlq $15,%r10 + orq %r11,%r10 + movq %r8,-16(%r13) + movq %r10,-8(%r13) + movq %rax,%r11 + shlq $17,%rax + movq %rbx,%r9 + shrq $47,%r9 + shrq $47,%r11 + orq %r9,%rax + shlq $17,%rbx + orq %r11,%rbx + movq %rax,0(%r13) + movq %rbx,8(%r13) + movq %rax,%r11 + shlq $17,%rax + movq %rbx,%r9 + shrq $47,%r9 + shrq $47,%r11 + orq %r9,%rax + shlq $17,%rbx + orq %r11,%rbx + movq %rax,16(%r13) + movq %rbx,24(%r13) + movq %r8,%r11 + shlq $34,%r8 + movq %r10,%r9 + shrq $30,%r9 + shrq $30,%r11 + orq %r9,%r8 + shlq $34,%r10 + orq %r11,%r10 + movq %r8,32(%r13) + movq %r10,40(%r13) + movq %rax,%r11 + shlq $17,%rax + movq %rbx,%r9 + shrq $47,%r9 + shrq $47,%r11 + orq %r9,%rax + shlq $17,%rbx + orq %r11,%rbx + movq %rax,48(%r13) + movq %rbx,56(%r13) + movq %r8,%r11 + shlq $17,%r8 + movq %r10,%r9 + shrq $47,%r9 + shrq $47,%r11 + orq %r9,%r8 + shlq $17,%r10 + orq %r11,%r10 + movq %r8,64(%r13) + movq %r10,72(%r13) + movl $3,%eax + jmp .Ldone +.align 16 +.L2nd256: + movl %r9d,48(%r13) + movl %r8d,52(%r13) + movl %r11d,56(%r13) + movl %r10d,60(%r13) + xorl 32(%r13),%r9d + xorl 36(%r13),%r8d + xorl 40(%r13),%r11d + xorl 44(%r13),%r10d + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 40(%r14),%ebx + movl 44(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 48(%r14),%ebx + movl 52(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + movq 0(%r13),%rax + movq 8(%r13),%rbx + movq 32(%r13),%rcx + movq 40(%r13),%rdx + movq 48(%r13),%r14 + movq 56(%r13),%r15 + leaq 128(%r13),%r13 + shlq $32,%r8 + shlq $32,%r10 + orq %r9,%r8 + orq %r11,%r10 + movq %r8,-112(%r13) + movq %r10,-104(%r13) + movq %rcx,%r11 + shlq $15,%rcx + movq %rdx,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%rcx + shlq $15,%rdx + orq %r11,%rdx + movq %rcx,-96(%r13) + movq %rdx,-88(%r13) + movq %r14,%r11 + shlq $15,%r14 + movq %r15,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%r14 + shlq $15,%r15 + orq %r11,%r15 + movq %r14,-80(%r13) + movq %r15,-72(%r13) + movq %rcx,%r11 + shlq $15,%rcx + movq %rdx,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%rcx + shlq $15,%rdx + orq %r11,%rdx + movq %rcx,-64(%r13) + movq %rdx,-56(%r13) + movq %r8,%r11 + shlq $30,%r8 + movq %r10,%r9 + shrq $34,%r9 + shrq $34,%r11 + orq %r9,%r8 + shlq $30,%r10 + orq %r11,%r10 + movq %r8,-48(%r13) + movq %r10,-40(%r13) + movq %rax,%r11 + shlq $45,%rax + movq %rbx,%r9 + shrq $19,%r9 + shrq $19,%r11 + orq %r9,%rax + shlq $45,%rbx + orq %r11,%rbx + movq %rax,-32(%r13) + movq %rbx,-24(%r13) + movq %r14,%r11 + shlq $30,%r14 + movq %r15,%r9 + shrq $34,%r9 + shrq $34,%r11 + orq %r9,%r14 + shlq $30,%r15 + orq %r11,%r15 + movq %r14,-16(%r13) + movq %r15,-8(%r13) + movq %rax,%r11 + shlq $15,%rax + movq %rbx,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%rax + shlq $15,%rbx + orq %r11,%rbx + movq %rax,0(%r13) + movq %rbx,8(%r13) + movq %rcx,%r11 + shlq $30,%rcx + movq %rdx,%r9 + shrq $34,%r9 + shrq $34,%r11 + orq %r9,%rcx + shlq $30,%rdx + orq %r11,%rdx + movq %rcx,16(%r13) + movq %rdx,24(%r13) + movq %r8,%r11 + shlq $30,%r8 + movq %r10,%r9 + shrq $34,%r9 + shrq $34,%r11 + orq %r9,%r8 + shlq $30,%r10 + orq %r11,%r10 + movq %r8,32(%r13) + movq %r10,40(%r13) + movq %rax,%r11 + shlq $17,%rax + movq %rbx,%r9 + shrq $47,%r9 + shrq $47,%r11 + orq %r9,%rax + shlq $17,%rbx + orq %r11,%rbx + movq %rax,48(%r13) + movq %rbx,56(%r13) + movq %r14,%r11 + shlq $32,%r14 + movq %r15,%r9 + shrq $32,%r9 + shrq $32,%r11 + orq %r9,%r14 + shlq $32,%r15 + orq %r11,%r15 + movq %r14,64(%r13) + movq %r15,72(%r13) + movq %rcx,%r11 + shlq $34,%rcx + movq %rdx,%r9 + shrq $30,%r9 + shrq $30,%r11 + orq %r9,%rcx + shlq $34,%rdx + orq %r11,%rdx + movq %rcx,80(%r13) + movq %rdx,88(%r13) + movq %r14,%r11 + shlq $17,%r14 + movq %r15,%r9 + shrq $47,%r9 + shrq $47,%r11 + orq %r9,%r14 + shlq $17,%r15 + orq %r11,%r15 + movq %r14,96(%r13) + movq %r15,104(%r13) + movq %rax,%r11 + shlq $34,%rax + movq %rbx,%r9 + shrq $30,%r9 + shrq $30,%r11 + orq %r9,%rax + shlq $34,%rbx + orq %r11,%rbx + movq %rax,112(%r13) + movq %rbx,120(%r13) + movq %r8,%r11 + shlq $51,%r8 + movq %r10,%r9 + shrq $13,%r9 + shrq $13,%r11 + orq %r9,%r8 + shlq $51,%r10 + orq %r11,%r10 + movq %r8,128(%r13) + movq %r10,136(%r13) + movl $4,%eax +.Ldone: + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%rbp +.cfi_restore %rbp + movq 32(%rsp),%rbx +.cfi_restore %rbx + leaq 40(%rsp),%rsp +.cfi_adjust_cfa_offset -40 +.Lkey_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size Camellia_Ekeygen,.-Camellia_Ekeygen +.section .rodata +.align 64 +.LCamellia_SIGMA: +.long 0x3bcc908b, 0xa09e667f, 0x4caa73b2, 0xb67ae858 +.long 0xe94f82be, 0xc6ef372f, 0xf1d36f1c, 0x54ff53a5 +.long 0xde682d1d, 0x10e527fa, 0xb3e6c1fd, 0xb05688c2 +.long 0, 0, 0, 0 +.LCamellia_SBOX: +.long 0x70707000,0x70700070 +.long 0x82828200,0x2c2c002c +.long 0x2c2c2c00,0xb3b300b3 +.long 0xececec00,0xc0c000c0 +.long 0xb3b3b300,0xe4e400e4 +.long 0x27272700,0x57570057 +.long 0xc0c0c000,0xeaea00ea +.long 0xe5e5e500,0xaeae00ae +.long 0xe4e4e400,0x23230023 +.long 0x85858500,0x6b6b006b +.long 0x57575700,0x45450045 +.long 0x35353500,0xa5a500a5 +.long 0xeaeaea00,0xeded00ed +.long 0x0c0c0c00,0x4f4f004f +.long 0xaeaeae00,0x1d1d001d +.long 0x41414100,0x92920092 +.long 0x23232300,0x86860086 +.long 0xefefef00,0xafaf00af +.long 0x6b6b6b00,0x7c7c007c +.long 0x93939300,0x1f1f001f +.long 0x45454500,0x3e3e003e +.long 0x19191900,0xdcdc00dc +.long 0xa5a5a500,0x5e5e005e +.long 0x21212100,0x0b0b000b +.long 0xededed00,0xa6a600a6 +.long 0x0e0e0e00,0x39390039 +.long 0x4f4f4f00,0xd5d500d5 +.long 0x4e4e4e00,0x5d5d005d +.long 0x1d1d1d00,0xd9d900d9 +.long 0x65656500,0x5a5a005a +.long 0x92929200,0x51510051 +.long 0xbdbdbd00,0x6c6c006c +.long 0x86868600,0x8b8b008b +.long 0xb8b8b800,0x9a9a009a +.long 0xafafaf00,0xfbfb00fb +.long 0x8f8f8f00,0xb0b000b0 +.long 0x7c7c7c00,0x74740074 +.long 0xebebeb00,0x2b2b002b +.long 0x1f1f1f00,0xf0f000f0 +.long 0xcecece00,0x84840084 +.long 0x3e3e3e00,0xdfdf00df +.long 0x30303000,0xcbcb00cb +.long 0xdcdcdc00,0x34340034 +.long 0x5f5f5f00,0x76760076 +.long 0x5e5e5e00,0x6d6d006d +.long 0xc5c5c500,0xa9a900a9 +.long 0x0b0b0b00,0xd1d100d1 +.long 0x1a1a1a00,0x04040004 +.long 0xa6a6a600,0x14140014 +.long 0xe1e1e100,0x3a3a003a +.long 0x39393900,0xdede00de +.long 0xcacaca00,0x11110011 +.long 0xd5d5d500,0x32320032 +.long 0x47474700,0x9c9c009c +.long 0x5d5d5d00,0x53530053 +.long 0x3d3d3d00,0xf2f200f2 +.long 0xd9d9d900,0xfefe00fe +.long 0x01010100,0xcfcf00cf +.long 0x5a5a5a00,0xc3c300c3 +.long 0xd6d6d600,0x7a7a007a +.long 0x51515100,0x24240024 +.long 0x56565600,0xe8e800e8 +.long 0x6c6c6c00,0x60600060 +.long 0x4d4d4d00,0x69690069 +.long 0x8b8b8b00,0xaaaa00aa +.long 0x0d0d0d00,0xa0a000a0 +.long 0x9a9a9a00,0xa1a100a1 +.long 0x66666600,0x62620062 +.long 0xfbfbfb00,0x54540054 +.long 0xcccccc00,0x1e1e001e +.long 0xb0b0b000,0xe0e000e0 +.long 0x2d2d2d00,0x64640064 +.long 0x74747400,0x10100010 +.long 0x12121200,0x00000000 +.long 0x2b2b2b00,0xa3a300a3 +.long 0x20202000,0x75750075 +.long 0xf0f0f000,0x8a8a008a +.long 0xb1b1b100,0xe6e600e6 +.long 0x84848400,0x09090009 +.long 0x99999900,0xdddd00dd +.long 0xdfdfdf00,0x87870087 +.long 0x4c4c4c00,0x83830083 +.long 0xcbcbcb00,0xcdcd00cd +.long 0xc2c2c200,0x90900090 +.long 0x34343400,0x73730073 +.long 0x7e7e7e00,0xf6f600f6 +.long 0x76767600,0x9d9d009d +.long 0x05050500,0xbfbf00bf +.long 0x6d6d6d00,0x52520052 +.long 0xb7b7b700,0xd8d800d8 +.long 0xa9a9a900,0xc8c800c8 +.long 0x31313100,0xc6c600c6 +.long 0xd1d1d100,0x81810081 +.long 0x17171700,0x6f6f006f +.long 0x04040400,0x13130013 +.long 0xd7d7d700,0x63630063 +.long 0x14141400,0xe9e900e9 +.long 0x58585800,0xa7a700a7 +.long 0x3a3a3a00,0x9f9f009f +.long 0x61616100,0xbcbc00bc +.long 0xdedede00,0x29290029 +.long 0x1b1b1b00,0xf9f900f9 +.long 0x11111100,0x2f2f002f +.long 0x1c1c1c00,0xb4b400b4 +.long 0x32323200,0x78780078 +.long 0x0f0f0f00,0x06060006 +.long 0x9c9c9c00,0xe7e700e7 +.long 0x16161600,0x71710071 +.long 0x53535300,0xd4d400d4 +.long 0x18181800,0xabab00ab +.long 0xf2f2f200,0x88880088 +.long 0x22222200,0x8d8d008d +.long 0xfefefe00,0x72720072 +.long 0x44444400,0xb9b900b9 +.long 0xcfcfcf00,0xf8f800f8 +.long 0xb2b2b200,0xacac00ac +.long 0xc3c3c300,0x36360036 +.long 0xb5b5b500,0x2a2a002a +.long 0x7a7a7a00,0x3c3c003c +.long 0x91919100,0xf1f100f1 +.long 0x24242400,0x40400040 +.long 0x08080800,0xd3d300d3 +.long 0xe8e8e800,0xbbbb00bb +.long 0xa8a8a800,0x43430043 +.long 0x60606000,0x15150015 +.long 0xfcfcfc00,0xadad00ad +.long 0x69696900,0x77770077 +.long 0x50505000,0x80800080 +.long 0xaaaaaa00,0x82820082 +.long 0xd0d0d000,0xecec00ec +.long 0xa0a0a000,0x27270027 +.long 0x7d7d7d00,0xe5e500e5 +.long 0xa1a1a100,0x85850085 +.long 0x89898900,0x35350035 +.long 0x62626200,0x0c0c000c +.long 0x97979700,0x41410041 +.long 0x54545400,0xefef00ef +.long 0x5b5b5b00,0x93930093 +.long 0x1e1e1e00,0x19190019 +.long 0x95959500,0x21210021 +.long 0xe0e0e000,0x0e0e000e +.long 0xffffff00,0x4e4e004e +.long 0x64646400,0x65650065 +.long 0xd2d2d200,0xbdbd00bd +.long 0x10101000,0xb8b800b8 +.long 0xc4c4c400,0x8f8f008f +.long 0x00000000,0xebeb00eb +.long 0x48484800,0xcece00ce +.long 0xa3a3a300,0x30300030 +.long 0xf7f7f700,0x5f5f005f +.long 0x75757500,0xc5c500c5 +.long 0xdbdbdb00,0x1a1a001a +.long 0x8a8a8a00,0xe1e100e1 +.long 0x03030300,0xcaca00ca +.long 0xe6e6e600,0x47470047 +.long 0xdadada00,0x3d3d003d +.long 0x09090900,0x01010001 +.long 0x3f3f3f00,0xd6d600d6 +.long 0xdddddd00,0x56560056 +.long 0x94949400,0x4d4d004d +.long 0x87878700,0x0d0d000d +.long 0x5c5c5c00,0x66660066 +.long 0x83838300,0xcccc00cc +.long 0x02020200,0x2d2d002d +.long 0xcdcdcd00,0x12120012 +.long 0x4a4a4a00,0x20200020 +.long 0x90909000,0xb1b100b1 +.long 0x33333300,0x99990099 +.long 0x73737300,0x4c4c004c +.long 0x67676700,0xc2c200c2 +.long 0xf6f6f600,0x7e7e007e +.long 0xf3f3f300,0x05050005 +.long 0x9d9d9d00,0xb7b700b7 +.long 0x7f7f7f00,0x31310031 +.long 0xbfbfbf00,0x17170017 +.long 0xe2e2e200,0xd7d700d7 +.long 0x52525200,0x58580058 +.long 0x9b9b9b00,0x61610061 +.long 0xd8d8d800,0x1b1b001b +.long 0x26262600,0x1c1c001c +.long 0xc8c8c800,0x0f0f000f +.long 0x37373700,0x16160016 +.long 0xc6c6c600,0x18180018 +.long 0x3b3b3b00,0x22220022 +.long 0x81818100,0x44440044 +.long 0x96969600,0xb2b200b2 +.long 0x6f6f6f00,0xb5b500b5 +.long 0x4b4b4b00,0x91910091 +.long 0x13131300,0x08080008 +.long 0xbebebe00,0xa8a800a8 +.long 0x63636300,0xfcfc00fc +.long 0x2e2e2e00,0x50500050 +.long 0xe9e9e900,0xd0d000d0 +.long 0x79797900,0x7d7d007d +.long 0xa7a7a700,0x89890089 +.long 0x8c8c8c00,0x97970097 +.long 0x9f9f9f00,0x5b5b005b +.long 0x6e6e6e00,0x95950095 +.long 0xbcbcbc00,0xffff00ff +.long 0x8e8e8e00,0xd2d200d2 +.long 0x29292900,0xc4c400c4 +.long 0xf5f5f500,0x48480048 +.long 0xf9f9f900,0xf7f700f7 +.long 0xb6b6b600,0xdbdb00db +.long 0x2f2f2f00,0x03030003 +.long 0xfdfdfd00,0xdada00da +.long 0xb4b4b400,0x3f3f003f +.long 0x59595900,0x94940094 +.long 0x78787800,0x5c5c005c +.long 0x98989800,0x02020002 +.long 0x06060600,0x4a4a004a +.long 0x6a6a6a00,0x33330033 +.long 0xe7e7e700,0x67670067 +.long 0x46464600,0xf3f300f3 +.long 0x71717100,0x7f7f007f +.long 0xbababa00,0xe2e200e2 +.long 0xd4d4d400,0x9b9b009b +.long 0x25252500,0x26260026 +.long 0xababab00,0x37370037 +.long 0x42424200,0x3b3b003b +.long 0x88888800,0x96960096 +.long 0xa2a2a200,0x4b4b004b +.long 0x8d8d8d00,0xbebe00be +.long 0xfafafa00,0x2e2e002e +.long 0x72727200,0x79790079 +.long 0x07070700,0x8c8c008c +.long 0xb9b9b900,0x6e6e006e +.long 0x55555500,0x8e8e008e +.long 0xf8f8f800,0xf5f500f5 +.long 0xeeeeee00,0xb6b600b6 +.long 0xacacac00,0xfdfd00fd +.long 0x0a0a0a00,0x59590059 +.long 0x36363600,0x98980098 +.long 0x49494900,0x6a6a006a +.long 0x2a2a2a00,0x46460046 +.long 0x68686800,0xbaba00ba +.long 0x3c3c3c00,0x25250025 +.long 0x38383800,0x42420042 +.long 0xf1f1f100,0xa2a200a2 +.long 0xa4a4a400,0xfafa00fa +.long 0x40404000,0x07070007 +.long 0x28282800,0x55550055 +.long 0xd3d3d300,0xeeee00ee +.long 0x7b7b7b00,0x0a0a000a +.long 0xbbbbbb00,0x49490049 +.long 0xc9c9c900,0x68680068 +.long 0x43434300,0x38380038 +.long 0xc1c1c100,0xa4a400a4 +.long 0x15151500,0x28280028 +.long 0xe3e3e300,0x7b7b007b +.long 0xadadad00,0xc9c900c9 +.long 0xf4f4f400,0xc1c100c1 +.long 0x77777700,0xe3e300e3 +.long 0xc7c7c700,0xf4f400f4 +.long 0x80808000,0xc7c700c7 +.long 0x9e9e9e00,0x9e9e009e +.long 0x00e0e0e0,0x38003838 +.long 0x00050505,0x41004141 +.long 0x00585858,0x16001616 +.long 0x00d9d9d9,0x76007676 +.long 0x00676767,0xd900d9d9 +.long 0x004e4e4e,0x93009393 +.long 0x00818181,0x60006060 +.long 0x00cbcbcb,0xf200f2f2 +.long 0x00c9c9c9,0x72007272 +.long 0x000b0b0b,0xc200c2c2 +.long 0x00aeaeae,0xab00abab +.long 0x006a6a6a,0x9a009a9a +.long 0x00d5d5d5,0x75007575 +.long 0x00181818,0x06000606 +.long 0x005d5d5d,0x57005757 +.long 0x00828282,0xa000a0a0 +.long 0x00464646,0x91009191 +.long 0x00dfdfdf,0xf700f7f7 +.long 0x00d6d6d6,0xb500b5b5 +.long 0x00272727,0xc900c9c9 +.long 0x008a8a8a,0xa200a2a2 +.long 0x00323232,0x8c008c8c +.long 0x004b4b4b,0xd200d2d2 +.long 0x00424242,0x90009090 +.long 0x00dbdbdb,0xf600f6f6 +.long 0x001c1c1c,0x07000707 +.long 0x009e9e9e,0xa700a7a7 +.long 0x009c9c9c,0x27002727 +.long 0x003a3a3a,0x8e008e8e +.long 0x00cacaca,0xb200b2b2 +.long 0x00252525,0x49004949 +.long 0x007b7b7b,0xde00dede +.long 0x000d0d0d,0x43004343 +.long 0x00717171,0x5c005c5c +.long 0x005f5f5f,0xd700d7d7 +.long 0x001f1f1f,0xc700c7c7 +.long 0x00f8f8f8,0x3e003e3e +.long 0x00d7d7d7,0xf500f5f5 +.long 0x003e3e3e,0x8f008f8f +.long 0x009d9d9d,0x67006767 +.long 0x007c7c7c,0x1f001f1f +.long 0x00606060,0x18001818 +.long 0x00b9b9b9,0x6e006e6e +.long 0x00bebebe,0xaf00afaf +.long 0x00bcbcbc,0x2f002f2f +.long 0x008b8b8b,0xe200e2e2 +.long 0x00161616,0x85008585 +.long 0x00343434,0x0d000d0d +.long 0x004d4d4d,0x53005353 +.long 0x00c3c3c3,0xf000f0f0 +.long 0x00727272,0x9c009c9c +.long 0x00959595,0x65006565 +.long 0x00ababab,0xea00eaea +.long 0x008e8e8e,0xa300a3a3 +.long 0x00bababa,0xae00aeae +.long 0x007a7a7a,0x9e009e9e +.long 0x00b3b3b3,0xec00ecec +.long 0x00020202,0x80008080 +.long 0x00b4b4b4,0x2d002d2d +.long 0x00adadad,0x6b006b6b +.long 0x00a2a2a2,0xa800a8a8 +.long 0x00acacac,0x2b002b2b +.long 0x00d8d8d8,0x36003636 +.long 0x009a9a9a,0xa600a6a6 +.long 0x00171717,0xc500c5c5 +.long 0x001a1a1a,0x86008686 +.long 0x00353535,0x4d004d4d +.long 0x00cccccc,0x33003333 +.long 0x00f7f7f7,0xfd00fdfd +.long 0x00999999,0x66006666 +.long 0x00616161,0x58005858 +.long 0x005a5a5a,0x96009696 +.long 0x00e8e8e8,0x3a003a3a +.long 0x00242424,0x09000909 +.long 0x00565656,0x95009595 +.long 0x00404040,0x10001010 +.long 0x00e1e1e1,0x78007878 +.long 0x00636363,0xd800d8d8 +.long 0x00090909,0x42004242 +.long 0x00333333,0xcc00cccc +.long 0x00bfbfbf,0xef00efef +.long 0x00989898,0x26002626 +.long 0x00979797,0xe500e5e5 +.long 0x00858585,0x61006161 +.long 0x00686868,0x1a001a1a +.long 0x00fcfcfc,0x3f003f3f +.long 0x00ececec,0x3b003b3b +.long 0x000a0a0a,0x82008282 +.long 0x00dadada,0xb600b6b6 +.long 0x006f6f6f,0xdb00dbdb +.long 0x00535353,0xd400d4d4 +.long 0x00626262,0x98009898 +.long 0x00a3a3a3,0xe800e8e8 +.long 0x002e2e2e,0x8b008b8b +.long 0x00080808,0x02000202 +.long 0x00afafaf,0xeb00ebeb +.long 0x00282828,0x0a000a0a +.long 0x00b0b0b0,0x2c002c2c +.long 0x00747474,0x1d001d1d +.long 0x00c2c2c2,0xb000b0b0 +.long 0x00bdbdbd,0x6f006f6f +.long 0x00363636,0x8d008d8d +.long 0x00222222,0x88008888 +.long 0x00383838,0x0e000e0e +.long 0x00646464,0x19001919 +.long 0x001e1e1e,0x87008787 +.long 0x00393939,0x4e004e4e +.long 0x002c2c2c,0x0b000b0b +.long 0x00a6a6a6,0xa900a9a9 +.long 0x00303030,0x0c000c0c +.long 0x00e5e5e5,0x79007979 +.long 0x00444444,0x11001111 +.long 0x00fdfdfd,0x7f007f7f +.long 0x00888888,0x22002222 +.long 0x009f9f9f,0xe700e7e7 +.long 0x00656565,0x59005959 +.long 0x00878787,0xe100e1e1 +.long 0x006b6b6b,0xda00dada +.long 0x00f4f4f4,0x3d003d3d +.long 0x00232323,0xc800c8c8 +.long 0x00484848,0x12001212 +.long 0x00101010,0x04000404 +.long 0x00d1d1d1,0x74007474 +.long 0x00515151,0x54005454 +.long 0x00c0c0c0,0x30003030 +.long 0x00f9f9f9,0x7e007e7e +.long 0x00d2d2d2,0xb400b4b4 +.long 0x00a0a0a0,0x28002828 +.long 0x00555555,0x55005555 +.long 0x00a1a1a1,0x68006868 +.long 0x00414141,0x50005050 +.long 0x00fafafa,0xbe00bebe +.long 0x00434343,0xd000d0d0 +.long 0x00131313,0xc400c4c4 +.long 0x00c4c4c4,0x31003131 +.long 0x002f2f2f,0xcb00cbcb +.long 0x00a8a8a8,0x2a002a2a +.long 0x00b6b6b6,0xad00adad +.long 0x003c3c3c,0x0f000f0f +.long 0x002b2b2b,0xca00caca +.long 0x00c1c1c1,0x70007070 +.long 0x00ffffff,0xff00ffff +.long 0x00c8c8c8,0x32003232 +.long 0x00a5a5a5,0x69006969 +.long 0x00202020,0x08000808 +.long 0x00898989,0x62006262 +.long 0x00000000,0x00000000 +.long 0x00909090,0x24002424 +.long 0x00474747,0xd100d1d1 +.long 0x00efefef,0xfb00fbfb +.long 0x00eaeaea,0xba00baba +.long 0x00b7b7b7,0xed00eded +.long 0x00151515,0x45004545 +.long 0x00060606,0x81008181 +.long 0x00cdcdcd,0x73007373 +.long 0x00b5b5b5,0x6d006d6d +.long 0x00121212,0x84008484 +.long 0x007e7e7e,0x9f009f9f +.long 0x00bbbbbb,0xee00eeee +.long 0x00292929,0x4a004a4a +.long 0x000f0f0f,0xc300c3c3 +.long 0x00b8b8b8,0x2e002e2e +.long 0x00070707,0xc100c1c1 +.long 0x00040404,0x01000101 +.long 0x009b9b9b,0xe600e6e6 +.long 0x00949494,0x25002525 +.long 0x00212121,0x48004848 +.long 0x00666666,0x99009999 +.long 0x00e6e6e6,0xb900b9b9 +.long 0x00cecece,0xb300b3b3 +.long 0x00ededed,0x7b007b7b +.long 0x00e7e7e7,0xf900f9f9 +.long 0x003b3b3b,0xce00cece +.long 0x00fefefe,0xbf00bfbf +.long 0x007f7f7f,0xdf00dfdf +.long 0x00c5c5c5,0x71007171 +.long 0x00a4a4a4,0x29002929 +.long 0x00373737,0xcd00cdcd +.long 0x00b1b1b1,0x6c006c6c +.long 0x004c4c4c,0x13001313 +.long 0x00919191,0x64006464 +.long 0x006e6e6e,0x9b009b9b +.long 0x008d8d8d,0x63006363 +.long 0x00767676,0x9d009d9d +.long 0x00030303,0xc000c0c0 +.long 0x002d2d2d,0x4b004b4b +.long 0x00dedede,0xb700b7b7 +.long 0x00969696,0xa500a5a5 +.long 0x00262626,0x89008989 +.long 0x007d7d7d,0x5f005f5f +.long 0x00c6c6c6,0xb100b1b1 +.long 0x005c5c5c,0x17001717 +.long 0x00d3d3d3,0xf400f4f4 +.long 0x00f2f2f2,0xbc00bcbc +.long 0x004f4f4f,0xd300d3d3 +.long 0x00191919,0x46004646 +.long 0x003f3f3f,0xcf00cfcf +.long 0x00dcdcdc,0x37003737 +.long 0x00797979,0x5e005e5e +.long 0x001d1d1d,0x47004747 +.long 0x00525252,0x94009494 +.long 0x00ebebeb,0xfa00fafa +.long 0x00f3f3f3,0xfc00fcfc +.long 0x006d6d6d,0x5b005b5b +.long 0x005e5e5e,0x97009797 +.long 0x00fbfbfb,0xfe00fefe +.long 0x00696969,0x5a005a5a +.long 0x00b2b2b2,0xac00acac +.long 0x00f0f0f0,0x3c003c3c +.long 0x00313131,0x4c004c4c +.long 0x000c0c0c,0x03000303 +.long 0x00d4d4d4,0x35003535 +.long 0x00cfcfcf,0xf300f3f3 +.long 0x008c8c8c,0x23002323 +.long 0x00e2e2e2,0xb800b8b8 +.long 0x00757575,0x5d005d5d +.long 0x00a9a9a9,0x6a006a6a +.long 0x004a4a4a,0x92009292 +.long 0x00575757,0xd500d5d5 +.long 0x00848484,0x21002121 +.long 0x00111111,0x44004444 +.long 0x00454545,0x51005151 +.long 0x001b1b1b,0xc600c6c6 +.long 0x00f5f5f5,0x7d007d7d +.long 0x00e4e4e4,0x39003939 +.long 0x000e0e0e,0x83008383 +.long 0x00737373,0xdc00dcdc +.long 0x00aaaaaa,0xaa00aaaa +.long 0x00f1f1f1,0x7c007c7c +.long 0x00dddddd,0x77007777 +.long 0x00595959,0x56005656 +.long 0x00141414,0x05000505 +.long 0x006c6c6c,0x1b001b1b +.long 0x00929292,0xa400a4a4 +.long 0x00545454,0x15001515 +.long 0x00d0d0d0,0x34003434 +.long 0x00787878,0x1e001e1e +.long 0x00707070,0x1c001c1c +.long 0x00e3e3e3,0xf800f8f8 +.long 0x00494949,0x52005252 +.long 0x00808080,0x20002020 +.long 0x00505050,0x14001414 +.long 0x00a7a7a7,0xe900e9e9 +.long 0x00f6f6f6,0xbd00bdbd +.long 0x00777777,0xdd00dddd +.long 0x00939393,0xe400e4e4 +.long 0x00868686,0xa100a1a1 +.long 0x00838383,0xe000e0e0 +.long 0x002a2a2a,0x8a008a8a +.long 0x00c7c7c7,0xf100f1f1 +.long 0x005b5b5b,0xd600d6d6 +.long 0x00e9e9e9,0x7a007a7a +.long 0x00eeeeee,0xbb00bbbb +.long 0x008f8f8f,0xe300e3e3 +.long 0x00010101,0x40004040 +.long 0x003d3d3d,0x4f004f4f +.text +.globl Camellia_cbc_encrypt +.type Camellia_cbc_encrypt,@function +.align 16 +Camellia_cbc_encrypt: +.cfi_startproc +.byte 243,15,30,250 + cmpq $0,%rdx + je .Lcbc_abort + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lcbc_prologue: + + movq %rsp,%rbp +.cfi_def_cfa_register %rbp + subq $64,%rsp + andq $-64,%rsp + + + + leaq -64-63(%rcx),%r10 + subq %rsp,%r10 + negq %r10 + andq $0x3C0,%r10 + subq %r10,%rsp + + + movq %rdi,%r12 + movq %rsi,%r13 + movq %r8,%rbx + movq %rcx,%r14 + movl 272(%rcx),%r15d + + movq %r8,40(%rsp) + movq %rbp,48(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x30,0x06,0x23,0x38 + +.Lcbc_body: + leaq .LCamellia_SBOX(%rip),%rbp + + movl $32,%ecx +.align 4 +.Lcbc_prefetch_sbox: + movq 0(%rbp),%rax + movq 32(%rbp),%rsi + movq 64(%rbp),%rdi + movq 96(%rbp),%r11 + leaq 128(%rbp),%rbp + loop .Lcbc_prefetch_sbox + subq $4096,%rbp + shlq $6,%r15 + movq %rdx,%rcx + leaq (%r14,%r15,1),%r15 + + cmpl $0,%r9d + je .LCBC_DECRYPT + + andq $-16,%rdx + andq $15,%rcx + leaq (%r12,%rdx,1),%rdx + movq %r14,0(%rsp) + movq %rdx,8(%rsp) + movq %rcx,16(%rsp) + + cmpq %r12,%rdx + movl 0(%rbx),%r8d + movl 4(%rbx),%r9d + movl 8(%rbx),%r10d + movl 12(%rbx),%r11d + je .Lcbc_enc_tail + jmp .Lcbc_eloop + +.align 16 +.Lcbc_eloop: + xorl 0(%r12),%r8d + xorl 4(%r12),%r9d + xorl 8(%r12),%r10d + bswapl %r8d + xorl 12(%r12),%r11d + bswapl %r9d + bswapl %r10d + bswapl %r11d + + call _x86_64_Camellia_encrypt + + movq 0(%rsp),%r14 + bswapl %r8d + movq 8(%rsp),%rdx + bswapl %r9d + movq 16(%rsp),%rcx + bswapl %r10d + movl %r8d,0(%r13) + bswapl %r11d + movl %r9d,4(%r13) + movl %r10d,8(%r13) + leaq 16(%r12),%r12 + movl %r11d,12(%r13) + cmpq %rdx,%r12 + leaq 16(%r13),%r13 + jne .Lcbc_eloop + + cmpq $0,%rcx + jne .Lcbc_enc_tail + + movq 40(%rsp),%r13 + movl %r8d,0(%r13) + movl %r9d,4(%r13) + movl %r10d,8(%r13) + movl %r11d,12(%r13) + jmp .Lcbc_done + +.align 16 +.Lcbc_enc_tail: + xorq %rax,%rax + movq %rax,0+24(%rsp) + movq %rax,8+24(%rsp) + movq %rax,16(%rsp) + +.Lcbc_enc_pushf: + pushfq + cld + movq %r12,%rsi + leaq 8+24(%rsp),%rdi +.long 0x9066A4F3 + popfq +.Lcbc_enc_popf: + + leaq 24(%rsp),%r12 + leaq 16+24(%rsp),%rax + movq %rax,8(%rsp) + jmp .Lcbc_eloop + +.align 16 +.LCBC_DECRYPT: + xchgq %r14,%r15 + addq $15,%rdx + andq $15,%rcx + andq $-16,%rdx + movq %r14,0(%rsp) + leaq (%r12,%rdx,1),%rdx + movq %rdx,8(%rsp) + movq %rcx,16(%rsp) + + movq (%rbx),%rax + movq 8(%rbx),%rbx + jmp .Lcbc_dloop +.align 16 +.Lcbc_dloop: + movl 0(%r12),%r8d + movl 4(%r12),%r9d + movl 8(%r12),%r10d + bswapl %r8d + movl 12(%r12),%r11d + bswapl %r9d + movq %rax,0+24(%rsp) + bswapl %r10d + movq %rbx,8+24(%rsp) + bswapl %r11d + + call _x86_64_Camellia_decrypt + + movq 0(%rsp),%r14 + movq 8(%rsp),%rdx + movq 16(%rsp),%rcx + + bswapl %r8d + movq (%r12),%rax + bswapl %r9d + movq 8(%r12),%rbx + bswapl %r10d + xorl 0+24(%rsp),%r8d + bswapl %r11d + xorl 4+24(%rsp),%r9d + xorl 8+24(%rsp),%r10d + leaq 16(%r12),%r12 + xorl 12+24(%rsp),%r11d + cmpq %rdx,%r12 + je .Lcbc_ddone + + movl %r8d,0(%r13) + movl %r9d,4(%r13) + movl %r10d,8(%r13) + movl %r11d,12(%r13) + + leaq 16(%r13),%r13 + jmp .Lcbc_dloop + +.align 16 +.Lcbc_ddone: + movq 40(%rsp),%rdx + cmpq $0,%rcx + jne .Lcbc_dec_tail + + movl %r8d,0(%r13) + movl %r9d,4(%r13) + movl %r10d,8(%r13) + movl %r11d,12(%r13) + + movq %rax,(%rdx) + movq %rbx,8(%rdx) + jmp .Lcbc_done +.align 16 +.Lcbc_dec_tail: + movl %r8d,0+24(%rsp) + movl %r9d,4+24(%rsp) + movl %r10d,8+24(%rsp) + movl %r11d,12+24(%rsp) + +.Lcbc_dec_pushf: + pushfq + cld + leaq 8+24(%rsp),%rsi + leaq (%r13),%rdi +.long 0x9066A4F3 + popfq +.Lcbc_dec_popf: + + movq %rax,(%rdx) + movq %rbx,8(%rdx) + jmp .Lcbc_done + +.align 16 +.Lcbc_done: + movq 48(%rsp),%rcx +.cfi_def_cfa %rcx,56 + movq 0(%rcx),%r15 +.cfi_restore %r15 + movq 8(%rcx),%r14 +.cfi_restore %r14 + movq 16(%rcx),%r13 +.cfi_restore %r13 + movq 24(%rcx),%r12 +.cfi_restore %r12 + movq 32(%rcx),%rbp +.cfi_restore %rbp + movq 40(%rcx),%rbx +.cfi_restore %rbx + leaq 48(%rcx),%rsp +.cfi_def_cfa %rsp,8 +.Lcbc_abort: + .byte 0xf3,0xc3 +.cfi_endproc +.size Camellia_cbc_encrypt,.-Camellia_cbc_encrypt + +.byte 67,97,109,101,108,108,105,97,32,102,111,114,32,120,56,54,95,54,52,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 diff --git a/contrib/openssl-cmake/asm/crypto/chacha/chacha-armv8-sve.S b/contrib/openssl-cmake/asm/crypto/chacha/chacha-armv8-sve.S new file mode 100644 index 000000000000..e64c2544d1b4 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/chacha/chacha-armv8-sve.S @@ -0,0 +1,3558 @@ +// Copyright 2022-2025 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the Apache License 2.0 (the "License"). You may not use +// this file except in compliance with the License. You can obtain a copy +// in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html +// +// +// ChaCha20 for ARMv8 via SVE +// +// $output is the last argument if it looks like a file (it has an extension) +// $flavour is the first argument if it doesn't look like a file +#include "arm_arch.h" + +.arch armv8-a + + +.hidden OPENSSL_armcap_P + +.text + +.section .rodata +.align 5 +.type _chacha_sve_consts,%object +_chacha_sve_consts: +.Lchacha20_consts: +.quad 0x3320646e61707865,0x6b20657479622d32 // endian-neutral +.Lrot8: +.word 0x02010003,0x04040404,0x02010003,0x04040404 +.size _chacha_sve_consts,.-_chacha_sve_consts + +.previous + +.globl ChaCha20_ctr32_sve +.type ChaCha20_ctr32_sve,%function +.align 5 +ChaCha20_ctr32_sve: + AARCH64_VALID_CALL_TARGET +.inst 0x04a0e3e5 //cntw x5, ALL, MUL #1 + cmp x2,x5,lsl #6 + b.lt .Lreturn + mov x7,0 + adrp x6,OPENSSL_armcap_P + ldr w6,[x6,#:lo12:OPENSSL_armcap_P] + tst w6,#ARMV8_SVE2 + b.eq 1f + mov x7,1 + b 2f +1: + cmp x5,4 + b.le .Lreturn + adrp x6,.Lrot8 + add x6,x6,#:lo12:.Lrot8 + ldp w9,w10,[x6] +.inst 0x04aa4d3f //index z31.s,w9,w10 +2: + AARCH64_SIGN_LINK_REGISTER + stp d8,d9,[sp,-192]! + stp d10,d11,[sp,16] + stp d12,d13,[sp,32] + stp d14,d15,[sp,48] + stp x16,x17,[sp,64] + stp x18,x19,[sp,80] + stp x20,x21,[sp,96] + stp x22,x23,[sp,112] + stp x24,x25,[sp,128] + stp x26,x27,[sp,144] + stp x28,x29,[sp,160] + str x30,[sp,176] + + adrp x6,.Lchacha20_consts + add x6,x6,#:lo12:.Lchacha20_consts + ldp x23,x24,[x6] + ldp x25,x26,[x3] + ldp x27,x28,[x3, 16] + ldp x29,x30,[x4] +.inst 0x2599e3e0 //ptrues p0.s,ALL +#ifdef __AARCH64EB__ + ror x25,x25,#32 + ror x26,x26,#32 + ror x27,x27,#32 + ror x28,x28,#32 + ror x29,x29,#32 + ror x30,x30,#32 +#endif + cbz x7, 1f +.align 5 +100: + subs x7,x2,x5,lsl #6 + b.lt 110f + mov x2,x7 + b.eq 101f + cmp x2,64 + b.lt 101f + mixin=1 + lsr x8,x23,#32 +.inst 0x05a03ae0 //dup z0.s,w23 +.inst 0x05a03af9 //dup z25.s,w23 +.if mixin == 1 + mov w7,w23 +.endif +.inst 0x05a03904 //dup z4.s,w8 +.inst 0x05a0391a //dup z26.s,w8 + lsr x10,x24,#32 +.inst 0x05a03b08 //dup z8.s,w24 +.inst 0x05a03b1b //dup z27.s,w24 +.if mixin == 1 + mov w9,w24 +.endif +.inst 0x05a0394c //dup z12.s,w10 +.inst 0x05a0395c //dup z28.s,w10 + lsr x12,x25,#32 +.inst 0x05a03b21 //dup z1.s,w25 +.inst 0x05a03b3d //dup z29.s,w25 +.if mixin == 1 + mov w11,w25 +.endif +.inst 0x05a03985 //dup z5.s,w12 +.inst 0x05a0399e //dup z30.s,w12 + lsr x14,x26,#32 +.inst 0x05a03b49 //dup z9.s,w26 +.inst 0x05a03b55 //dup z21.s,w26 +.if mixin == 1 + mov w13,w26 +.endif +.inst 0x05a039cd //dup z13.s,w14 +.inst 0x05a039d6 //dup z22.s,w14 + lsr x16,x27,#32 +.inst 0x05a03b62 //dup z2.s,w27 +.inst 0x05a03b77 //dup z23.s,w27 +.if mixin == 1 + mov w15,w27 +.endif +.inst 0x05a03a06 //dup z6.s,w16 +.inst 0x05a03a18 //dup z24.s,w16 + lsr x18,x28,#32 +.inst 0x05a03b8a //dup z10.s,w28 +.inst 0x05a03b91 //dup z17.s,w28 +.if mixin == 1 + mov w17,w28 +.endif +.inst 0x05a03a4e //dup z14.s,w18 +.inst 0x05a03a52 //dup z18.s,w18 + lsr x22,x30,#32 +.inst 0x05a03bcb //dup z11.s,w30 +.inst 0x05a03bd4 //dup z20.s,w30 +.if mixin == 1 + mov w21,w30 +.endif +.inst 0x05a03acf //dup z15.s,w22 +.inst 0x05a03adf //dup z31.s,w22 +.if mixin == 1 + add w20,w29,#1 + mov w19,w29 +.inst 0x04a14690 //index z16.s,w20,1 +.inst 0x04a14683 //index z3.s,w20,1 +.else +.inst 0x04a147b0 //index z16.s,w29,1 +.inst 0x04a147a3 //index z3.s,w29,1 +.endif + lsr x20,x29,#32 +.inst 0x05a03a87 //dup z7.s,w20 +.inst 0x05a03a93 //dup z19.s,w20 + mov x6,#10 +10: +.align 5 +.inst 0x04a10000 //add z0.s,z0.s,z1.s +.if mixin == 1 + add w7,w7,w11 +.endif +.inst 0x04a50084 //add z4.s,z4.s,z5.s +.if mixin == 1 + add w8,w8,w12 +.endif +.inst 0x04a90108 //add z8.s,z8.s,z9.s +.if mixin == 1 + add w9,w9,w13 +.endif +.inst 0x04ad018c //add z12.s,z12.s,z13.s +.if mixin == 1 + add w10,w10,w14 +.endif +.if mixin == 1 + eor w19,w19,w7 +.endif +.inst 0x04703403 //xar z3.s,z3.s,z0.s,16 +.if mixin == 1 + ror w19,w19,16 +.endif +.if mixin == 1 + eor w20,w20,w8 +.endif +.inst 0x04703487 //xar z7.s,z7.s,z4.s,16 +.if mixin == 1 + ror w20,w20,16 +.endif +.if mixin == 1 + eor w21,w21,w9 +.endif +.inst 0x0470350b //xar z11.s,z11.s,z8.s,16 +.if mixin == 1 + ror w21,w21,16 +.endif +.if mixin == 1 + eor w22,w22,w10 +.endif +.inst 0x0470358f //xar z15.s,z15.s,z12.s,16 +.if mixin == 1 + ror w22,w22,16 +.endif +.inst 0x04a30042 //add z2.s,z2.s,z3.s +.if mixin == 1 + add w15,w15,w19 +.endif +.inst 0x04a700c6 //add z6.s,z6.s,z7.s +.if mixin == 1 + add w16,w16,w20 +.endif +.inst 0x04ab014a //add z10.s,z10.s,z11.s +.if mixin == 1 + add w17,w17,w21 +.endif +.inst 0x04af01ce //add z14.s,z14.s,z15.s +.if mixin == 1 + add w18,w18,w22 +.endif +.if mixin == 1 + eor w11,w11,w15 +.endif +.inst 0x046c3441 //xar z1.s,z1.s,z2.s,20 +.if mixin == 1 + ror w11,w11,20 +.endif +.if mixin == 1 + eor w12,w12,w16 +.endif +.inst 0x046c34c5 //xar z5.s,z5.s,z6.s,20 +.if mixin == 1 + ror w12,w12,20 +.endif +.if mixin == 1 + eor w13,w13,w17 +.endif +.inst 0x046c3549 //xar z9.s,z9.s,z10.s,20 +.if mixin == 1 + ror w13,w13,20 +.endif +.if mixin == 1 + eor w14,w14,w18 +.endif +.inst 0x046c35cd //xar z13.s,z13.s,z14.s,20 +.if mixin == 1 + ror w14,w14,20 +.endif +.inst 0x04a10000 //add z0.s,z0.s,z1.s +.if mixin == 1 + add w7,w7,w11 +.endif +.inst 0x04a50084 //add z4.s,z4.s,z5.s +.if mixin == 1 + add w8,w8,w12 +.endif +.inst 0x04a90108 //add z8.s,z8.s,z9.s +.if mixin == 1 + add w9,w9,w13 +.endif +.inst 0x04ad018c //add z12.s,z12.s,z13.s +.if mixin == 1 + add w10,w10,w14 +.endif +.if mixin == 1 + eor w19,w19,w7 +.endif +.inst 0x04683403 //xar z3.s,z3.s,z0.s,24 +.if mixin == 1 + ror w19,w19,24 +.endif +.if mixin == 1 + eor w20,w20,w8 +.endif +.inst 0x04683487 //xar z7.s,z7.s,z4.s,24 +.if mixin == 1 + ror w20,w20,24 +.endif +.if mixin == 1 + eor w21,w21,w9 +.endif +.inst 0x0468350b //xar z11.s,z11.s,z8.s,24 +.if mixin == 1 + ror w21,w21,24 +.endif +.if mixin == 1 + eor w22,w22,w10 +.endif +.inst 0x0468358f //xar z15.s,z15.s,z12.s,24 +.if mixin == 1 + ror w22,w22,24 +.endif +.inst 0x04a30042 //add z2.s,z2.s,z3.s +.if mixin == 1 + add w15,w15,w19 +.endif +.inst 0x04a700c6 //add z6.s,z6.s,z7.s +.if mixin == 1 + add w16,w16,w20 +.endif +.inst 0x04ab014a //add z10.s,z10.s,z11.s +.if mixin == 1 + add w17,w17,w21 +.endif +.inst 0x04af01ce //add z14.s,z14.s,z15.s +.if mixin == 1 + add w18,w18,w22 +.endif +.if mixin == 1 + eor w11,w11,w15 +.endif +.inst 0x04673441 //xar z1.s,z1.s,z2.s,25 +.if mixin == 1 + ror w11,w11,25 +.endif +.if mixin == 1 + eor w12,w12,w16 +.endif +.inst 0x046734c5 //xar z5.s,z5.s,z6.s,25 +.if mixin == 1 + ror w12,w12,25 +.endif +.if mixin == 1 + eor w13,w13,w17 +.endif +.inst 0x04673549 //xar z9.s,z9.s,z10.s,25 +.if mixin == 1 + ror w13,w13,25 +.endif +.if mixin == 1 + eor w14,w14,w18 +.endif +.inst 0x046735cd //xar z13.s,z13.s,z14.s,25 +.if mixin == 1 + ror w14,w14,25 +.endif +.inst 0x04a50000 //add z0.s,z0.s,z5.s +.if mixin == 1 + add w7,w7,w12 +.endif +.inst 0x04a90084 //add z4.s,z4.s,z9.s +.if mixin == 1 + add w8,w8,w13 +.endif +.inst 0x04ad0108 //add z8.s,z8.s,z13.s +.if mixin == 1 + add w9,w9,w14 +.endif +.inst 0x04a1018c //add z12.s,z12.s,z1.s +.if mixin == 1 + add w10,w10,w11 +.endif +.if mixin == 1 + eor w22,w22,w7 +.endif +.inst 0x0470340f //xar z15.s,z15.s,z0.s,16 +.if mixin == 1 + ror w22,w22,16 +.endif +.if mixin == 1 + eor w19,w19,w8 +.endif +.inst 0x04703483 //xar z3.s,z3.s,z4.s,16 +.if mixin == 1 + ror w19,w19,16 +.endif +.if mixin == 1 + eor w20,w20,w9 +.endif +.inst 0x04703507 //xar z7.s,z7.s,z8.s,16 +.if mixin == 1 + ror w20,w20,16 +.endif +.if mixin == 1 + eor w21,w21,w10 +.endif +.inst 0x0470358b //xar z11.s,z11.s,z12.s,16 +.if mixin == 1 + ror w21,w21,16 +.endif +.inst 0x04af014a //add z10.s,z10.s,z15.s +.if mixin == 1 + add w17,w17,w22 +.endif +.inst 0x04a301ce //add z14.s,z14.s,z3.s +.if mixin == 1 + add w18,w18,w19 +.endif +.inst 0x04a70042 //add z2.s,z2.s,z7.s +.if mixin == 1 + add w15,w15,w20 +.endif +.inst 0x04ab00c6 //add z6.s,z6.s,z11.s +.if mixin == 1 + add w16,w16,w21 +.endif +.if mixin == 1 + eor w12,w12,w17 +.endif +.inst 0x046c3545 //xar z5.s,z5.s,z10.s,20 +.if mixin == 1 + ror w12,w12,20 +.endif +.if mixin == 1 + eor w13,w13,w18 +.endif +.inst 0x046c35c9 //xar z9.s,z9.s,z14.s,20 +.if mixin == 1 + ror w13,w13,20 +.endif +.if mixin == 1 + eor w14,w14,w15 +.endif +.inst 0x046c344d //xar z13.s,z13.s,z2.s,20 +.if mixin == 1 + ror w14,w14,20 +.endif +.if mixin == 1 + eor w11,w11,w16 +.endif +.inst 0x046c34c1 //xar z1.s,z1.s,z6.s,20 +.if mixin == 1 + ror w11,w11,20 +.endif +.inst 0x04a50000 //add z0.s,z0.s,z5.s +.if mixin == 1 + add w7,w7,w12 +.endif +.inst 0x04a90084 //add z4.s,z4.s,z9.s +.if mixin == 1 + add w8,w8,w13 +.endif +.inst 0x04ad0108 //add z8.s,z8.s,z13.s +.if mixin == 1 + add w9,w9,w14 +.endif +.inst 0x04a1018c //add z12.s,z12.s,z1.s +.if mixin == 1 + add w10,w10,w11 +.endif +.if mixin == 1 + eor w22,w22,w7 +.endif +.inst 0x0468340f //xar z15.s,z15.s,z0.s,24 +.if mixin == 1 + ror w22,w22,24 +.endif +.if mixin == 1 + eor w19,w19,w8 +.endif +.inst 0x04683483 //xar z3.s,z3.s,z4.s,24 +.if mixin == 1 + ror w19,w19,24 +.endif +.if mixin == 1 + eor w20,w20,w9 +.endif +.inst 0x04683507 //xar z7.s,z7.s,z8.s,24 +.if mixin == 1 + ror w20,w20,24 +.endif +.if mixin == 1 + eor w21,w21,w10 +.endif +.inst 0x0468358b //xar z11.s,z11.s,z12.s,24 +.if mixin == 1 + ror w21,w21,24 +.endif +.inst 0x04af014a //add z10.s,z10.s,z15.s +.if mixin == 1 + add w17,w17,w22 +.endif +.inst 0x04a301ce //add z14.s,z14.s,z3.s +.if mixin == 1 + add w18,w18,w19 +.endif +.inst 0x04a70042 //add z2.s,z2.s,z7.s +.if mixin == 1 + add w15,w15,w20 +.endif +.inst 0x04ab00c6 //add z6.s,z6.s,z11.s +.if mixin == 1 + add w16,w16,w21 +.endif +.if mixin == 1 + eor w12,w12,w17 +.endif +.inst 0x04673545 //xar z5.s,z5.s,z10.s,25 +.if mixin == 1 + ror w12,w12,25 +.endif +.if mixin == 1 + eor w13,w13,w18 +.endif +.inst 0x046735c9 //xar z9.s,z9.s,z14.s,25 +.if mixin == 1 + ror w13,w13,25 +.endif +.if mixin == 1 + eor w14,w14,w15 +.endif +.inst 0x0467344d //xar z13.s,z13.s,z2.s,25 +.if mixin == 1 + ror w14,w14,25 +.endif +.if mixin == 1 + eor w11,w11,w16 +.endif +.inst 0x046734c1 //xar z1.s,z1.s,z6.s,25 +.if mixin == 1 + ror w11,w11,25 +.endif + sub x6,x6,1 + cbnz x6,10b +.if mixin == 1 + add w7,w7,w23 +.endif +.inst 0x04b90000 //add z0.s,z0.s,z25.s +.if mixin == 1 + add x8,x8,x23,lsr #32 +.endif +.inst 0x04ba0084 //add z4.s,z4.s,z26.s +.if mixin == 1 + add x7,x7,x8,lsl #32 // pack +.endif +.if mixin == 1 + add w9,w9,w24 +.endif +.inst 0x04bb0108 //add z8.s,z8.s,z27.s +.if mixin == 1 + add x10,x10,x24,lsr #32 +.endif +.inst 0x04bc018c //add z12.s,z12.s,z28.s +.if mixin == 1 + add x9,x9,x10,lsl #32 // pack +.endif +.if mixin == 1 + ldp x8,x10,[x1],#16 +.endif +.if mixin == 1 + add w11,w11,w25 +.endif +.inst 0x04bd0021 //add z1.s,z1.s,z29.s +.if mixin == 1 + add x12,x12,x25,lsr #32 +.endif +.inst 0x04be00a5 //add z5.s,z5.s,z30.s +.if mixin == 1 + add x11,x11,x12,lsl #32 // pack +.endif +.if mixin == 1 + add w13,w13,w26 +.endif +.inst 0x04b50129 //add z9.s,z9.s,z21.s +.if mixin == 1 + add x14,x14,x26,lsr #32 +.endif +.inst 0x04b601ad //add z13.s,z13.s,z22.s +.if mixin == 1 + add x13,x13,x14,lsl #32 // pack +.endif +.if mixin == 1 + ldp x12,x14,[x1],#16 +.endif +.if mixin == 1 + add w15,w15,w27 +.endif +.inst 0x04b70042 //add z2.s,z2.s,z23.s +.if mixin == 1 + add x16,x16,x27,lsr #32 +.endif +.inst 0x04b800c6 //add z6.s,z6.s,z24.s +.if mixin == 1 + add x15,x15,x16,lsl #32 // pack +.endif +.if mixin == 1 + add w17,w17,w28 +.endif +.inst 0x04b1014a //add z10.s,z10.s,z17.s +.if mixin == 1 + add x18,x18,x28,lsr #32 +.endif +.inst 0x04b201ce //add z14.s,z14.s,z18.s +.if mixin == 1 + add x17,x17,x18,lsl #32 // pack +.endif +.if mixin == 1 + ldp x16,x18,[x1],#16 +.endif +.if mixin == 1 + add w19,w19,w29 +.endif +.inst 0x04b00063 //add z3.s,z3.s,z16.s +.if mixin == 1 + add x20,x20,x29,lsr #32 +.endif +.inst 0x04b300e7 //add z7.s,z7.s,z19.s +.if mixin == 1 + add x19,x19,x20,lsl #32 // pack +.endif +.if mixin == 1 + add w21,w21,w30 +.endif +.inst 0x04b4016b //add z11.s,z11.s,z20.s +.if mixin == 1 + add x22,x22,x30,lsr #32 +.endif +.inst 0x04bf01ef //add z15.s,z15.s,z31.s +.if mixin == 1 + add x21,x21,x22,lsl #32 // pack +.endif +.if mixin == 1 + ldp x20,x22,[x1],#16 +.endif +#ifdef __AARCH64EB__ + rev x7,x7 +.inst 0x05a48000 //revb z0.s,p0/m,z0.s +.inst 0x05a48084 //revb z4.s,p0/m,z4.s + rev x9,x9 +.inst 0x05a48108 //revb z8.s,p0/m,z8.s +.inst 0x05a4818c //revb z12.s,p0/m,z12.s + rev x11,x11 +.inst 0x05a48021 //revb z1.s,p0/m,z1.s +.inst 0x05a480a5 //revb z5.s,p0/m,z5.s + rev x13,x13 +.inst 0x05a48129 //revb z9.s,p0/m,z9.s +.inst 0x05a481ad //revb z13.s,p0/m,z13.s + rev x15,x15 +.inst 0x05a48042 //revb z2.s,p0/m,z2.s +.inst 0x05a480c6 //revb z6.s,p0/m,z6.s + rev x17,x17 +.inst 0x05a4814a //revb z10.s,p0/m,z10.s +.inst 0x05a481ce //revb z14.s,p0/m,z14.s + rev x19,x19 +.inst 0x05a48063 //revb z3.s,p0/m,z3.s +.inst 0x05a480e7 //revb z7.s,p0/m,z7.s + rev x21,x21 +.inst 0x05a4816b //revb z11.s,p0/m,z11.s +.inst 0x05a481ef //revb z15.s,p0/m,z15.s +#endif +.if mixin == 1 + add x29,x29,#1 +.endif + cmp x5,4 + b.ne 200f +.if mixin == 1 + eor x7,x7,x8 +.endif +.if mixin == 1 + eor x9,x9,x10 +.endif +.if mixin == 1 + eor x11,x11,x12 +.endif +.inst 0x05a46011 //zip1 z17.s,z0.s,z4.s +.inst 0x05a46412 //zip2 z18.s,z0.s,z4.s +.inst 0x05ac6113 //zip1 z19.s,z8.s,z12.s +.inst 0x05ac6514 //zip2 z20.s,z8.s,z12.s + +.inst 0x05a56035 //zip1 z21.s,z1.s,z5.s +.inst 0x05a56436 //zip2 z22.s,z1.s,z5.s +.inst 0x05ad6137 //zip1 z23.s,z9.s,z13.s +.inst 0x05ad6538 //zip2 z24.s,z9.s,z13.s + +.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d +.inst 0x05f36624 //zip2 z4.d,z17.d,z19.d +.inst 0x05f46248 //zip1 z8.d,z18.d,z20.d +.inst 0x05f4664c //zip2 z12.d,z18.d,z20.d + +.inst 0x05f762a1 //zip1 z1.d,z21.d,z23.d +.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d +.inst 0x05f862c9 //zip1 z9.d,z22.d,z24.d +.inst 0x05f866cd //zip2 z13.d,z22.d,z24.d +.if mixin == 1 + eor x13,x13,x14 +.endif +.if mixin == 1 + eor x15,x15,x16 +.endif +.if mixin == 1 + eor x17,x17,x18 +.endif +.inst 0x05a66051 //zip1 z17.s,z2.s,z6.s +.inst 0x05a66452 //zip2 z18.s,z2.s,z6.s +.inst 0x05ae6153 //zip1 z19.s,z10.s,z14.s +.inst 0x05ae6554 //zip2 z20.s,z10.s,z14.s + +.inst 0x05a76075 //zip1 z21.s,z3.s,z7.s +.inst 0x05a76476 //zip2 z22.s,z3.s,z7.s +.inst 0x05af6177 //zip1 z23.s,z11.s,z15.s +.inst 0x05af6578 //zip2 z24.s,z11.s,z15.s + +.inst 0x05f36222 //zip1 z2.d,z17.d,z19.d +.inst 0x05f36626 //zip2 z6.d,z17.d,z19.d +.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d +.inst 0x05f4664e //zip2 z14.d,z18.d,z20.d + +.inst 0x05f762a3 //zip1 z3.d,z21.d,z23.d +.inst 0x05f766a7 //zip2 z7.d,z21.d,z23.d +.inst 0x05f862cb //zip1 z11.d,z22.d,z24.d +.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d +.if mixin == 1 + eor x19,x19,x20 +.endif +.if mixin == 1 + eor x21,x21,x22 +.endif + ld1 {v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64 + ld1 {v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64 +.inst 0x04b13000 //eor z0.d,z0.d,z17.d +.inst 0x04b23021 //eor z1.d,z1.d,z18.d +.inst 0x04b33042 //eor z2.d,z2.d,z19.d +.inst 0x04b43063 //eor z3.d,z3.d,z20.d +.inst 0x04b53084 //eor z4.d,z4.d,z21.d +.inst 0x04b630a5 //eor z5.d,z5.d,z22.d +.inst 0x04b730c6 //eor z6.d,z6.d,z23.d +.inst 0x04b830e7 //eor z7.d,z7.d,z24.d + ld1 {v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64 + ld1 {v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64 +.if mixin == 1 + stp x7,x9,[x0],#16 +.endif +.inst 0x04b13108 //eor z8.d,z8.d,z17.d +.inst 0x04b23129 //eor z9.d,z9.d,z18.d +.if mixin == 1 + stp x11,x13,[x0],#16 +.endif +.inst 0x04b3314a //eor z10.d,z10.d,z19.d +.inst 0x04b4316b //eor z11.d,z11.d,z20.d +.if mixin == 1 + stp x15,x17,[x0],#16 +.endif +.inst 0x04b5318c //eor z12.d,z12.d,z21.d +.inst 0x04b631ad //eor z13.d,z13.d,z22.d +.if mixin == 1 + stp x19,x21,[x0],#16 +.endif +.inst 0x04b731ce //eor z14.d,z14.d,z23.d +.inst 0x04b831ef //eor z15.d,z15.d,z24.d + st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x0],#64 + st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 + st1 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64 + st1 {v12.4s,v13.4s,v14.4s,v15.4s},[x0],#64 + b 210f +200: +.inst 0x05a16011 //zip1 z17.s,z0.s,z1.s +.inst 0x05a16412 //zip2 z18.s,z0.s,z1.s +.inst 0x05a36053 //zip1 z19.s,z2.s,z3.s +.inst 0x05a36454 //zip2 z20.s,z2.s,z3.s + +.inst 0x05a56095 //zip1 z21.s,z4.s,z5.s +.inst 0x05a56496 //zip2 z22.s,z4.s,z5.s +.inst 0x05a760d7 //zip1 z23.s,z6.s,z7.s +.inst 0x05a764d8 //zip2 z24.s,z6.s,z7.s + +.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d +.inst 0x05f36621 //zip2 z1.d,z17.d,z19.d +.inst 0x05f46242 //zip1 z2.d,z18.d,z20.d +.inst 0x05f46643 //zip2 z3.d,z18.d,z20.d + +.inst 0x05f762a4 //zip1 z4.d,z21.d,z23.d +.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d +.inst 0x05f862c6 //zip1 z6.d,z22.d,z24.d +.inst 0x05f866c7 //zip2 z7.d,z22.d,z24.d +.if mixin == 1 + eor x7,x7,x8 +.endif +.if mixin == 1 + eor x9,x9,x10 +.endif +.inst 0x05a96111 //zip1 z17.s,z8.s,z9.s +.inst 0x05a96512 //zip2 z18.s,z8.s,z9.s +.inst 0x05ab6153 //zip1 z19.s,z10.s,z11.s +.inst 0x05ab6554 //zip2 z20.s,z10.s,z11.s + +.inst 0x05ad6195 //zip1 z21.s,z12.s,z13.s +.inst 0x05ad6596 //zip2 z22.s,z12.s,z13.s +.inst 0x05af61d7 //zip1 z23.s,z14.s,z15.s +.inst 0x05af65d8 //zip2 z24.s,z14.s,z15.s + +.inst 0x05f36228 //zip1 z8.d,z17.d,z19.d +.inst 0x05f36629 //zip2 z9.d,z17.d,z19.d +.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d +.inst 0x05f4664b //zip2 z11.d,z18.d,z20.d + +.inst 0x05f762ac //zip1 z12.d,z21.d,z23.d +.inst 0x05f766ad //zip2 z13.d,z21.d,z23.d +.inst 0x05f862ce //zip1 z14.d,z22.d,z24.d +.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d +.if mixin == 1 + eor x11,x11,x12 +.endif +.if mixin == 1 + eor x13,x13,x14 +.endif +.inst 0x05a46011 //zip1 z17.s,z0.s,z4.s +.inst 0x05a46412 //zip2 z18.s,z0.s,z4.s +.inst 0x05ac6113 //zip1 z19.s,z8.s,z12.s +.inst 0x05ac6514 //zip2 z20.s,z8.s,z12.s + +.inst 0x05a56035 //zip1 z21.s,z1.s,z5.s +.inst 0x05a56436 //zip2 z22.s,z1.s,z5.s +.inst 0x05ad6137 //zip1 z23.s,z9.s,z13.s +.inst 0x05ad6538 //zip2 z24.s,z9.s,z13.s + +.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d +.inst 0x05f36624 //zip2 z4.d,z17.d,z19.d +.inst 0x05f46248 //zip1 z8.d,z18.d,z20.d +.inst 0x05f4664c //zip2 z12.d,z18.d,z20.d + +.inst 0x05f762a1 //zip1 z1.d,z21.d,z23.d +.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d +.inst 0x05f862c9 //zip1 z9.d,z22.d,z24.d +.inst 0x05f866cd //zip2 z13.d,z22.d,z24.d +.if mixin == 1 + eor x15,x15,x16 +.endif +.if mixin == 1 + eor x17,x17,x18 +.endif +.inst 0x05a66051 //zip1 z17.s,z2.s,z6.s +.inst 0x05a66452 //zip2 z18.s,z2.s,z6.s +.inst 0x05ae6153 //zip1 z19.s,z10.s,z14.s +.inst 0x05ae6554 //zip2 z20.s,z10.s,z14.s + +.inst 0x05a76075 //zip1 z21.s,z3.s,z7.s +.inst 0x05a76476 //zip2 z22.s,z3.s,z7.s +.inst 0x05af6177 //zip1 z23.s,z11.s,z15.s +.inst 0x05af6578 //zip2 z24.s,z11.s,z15.s + +.inst 0x05f36222 //zip1 z2.d,z17.d,z19.d +.inst 0x05f36626 //zip2 z6.d,z17.d,z19.d +.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d +.inst 0x05f4664e //zip2 z14.d,z18.d,z20.d + +.inst 0x05f762a3 //zip1 z3.d,z21.d,z23.d +.inst 0x05f766a7 //zip2 z7.d,z21.d,z23.d +.inst 0x05f862cb //zip1 z11.d,z22.d,z24.d +.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d +.if mixin == 1 + eor x19,x19,x20 +.endif +.if mixin == 1 + eor x21,x21,x22 +.endif +.inst 0xa540a031 //ld1w {z17.s},p0/z,[x1,#0,MUL VL] +.inst 0xa541a032 //ld1w {z18.s},p0/z,[x1,#1,MUL VL] +.inst 0xa542a033 //ld1w {z19.s},p0/z,[x1,#2,MUL VL] +.inst 0xa543a034 //ld1w {z20.s},p0/z,[x1,#3,MUL VL] +.inst 0xa544a035 //ld1w {z21.s},p0/z,[x1,#4,MUL VL] +.inst 0xa545a036 //ld1w {z22.s},p0/z,[x1,#5,MUL VL] +.inst 0xa546a037 //ld1w {z23.s},p0/z,[x1,#6,MUL VL] +.inst 0xa547a038 //ld1w {z24.s},p0/z,[x1,#7,MUL VL] +.inst 0x04215101 //addvl x1,x1,8 +.inst 0x04b13000 //eor z0.d,z0.d,z17.d +.inst 0x04b23084 //eor z4.d,z4.d,z18.d +.inst 0x04b33108 //eor z8.d,z8.d,z19.d +.inst 0x04b4318c //eor z12.d,z12.d,z20.d +.inst 0x04b53021 //eor z1.d,z1.d,z21.d +.inst 0x04b630a5 //eor z5.d,z5.d,z22.d +.inst 0x04b73129 //eor z9.d,z9.d,z23.d +.inst 0x04b831ad //eor z13.d,z13.d,z24.d +.inst 0xa540a031 //ld1w {z17.s},p0/z,[x1,#0,MUL VL] +.inst 0xa541a032 //ld1w {z18.s},p0/z,[x1,#1,MUL VL] +.inst 0xa542a033 //ld1w {z19.s},p0/z,[x1,#2,MUL VL] +.inst 0xa543a034 //ld1w {z20.s},p0/z,[x1,#3,MUL VL] +.inst 0xa544a035 //ld1w {z21.s},p0/z,[x1,#4,MUL VL] +.inst 0xa545a036 //ld1w {z22.s},p0/z,[x1,#5,MUL VL] +.inst 0xa546a037 //ld1w {z23.s},p0/z,[x1,#6,MUL VL] +.inst 0xa547a038 //ld1w {z24.s},p0/z,[x1,#7,MUL VL] +.inst 0x04215101 //addvl x1,x1,8 +.if mixin == 1 + stp x7,x9,[x0],#16 +.endif +.inst 0x04b13042 //eor z2.d,z2.d,z17.d +.inst 0x04b230c6 //eor z6.d,z6.d,z18.d +.if mixin == 1 + stp x11,x13,[x0],#16 +.endif +.inst 0x04b3314a //eor z10.d,z10.d,z19.d +.inst 0x04b431ce //eor z14.d,z14.d,z20.d +.if mixin == 1 + stp x15,x17,[x0],#16 +.endif +.inst 0x04b53063 //eor z3.d,z3.d,z21.d +.inst 0x04b630e7 //eor z7.d,z7.d,z22.d +.if mixin == 1 + stp x19,x21,[x0],#16 +.endif +.inst 0x04b7316b //eor z11.d,z11.d,z23.d +.inst 0x04b831ef //eor z15.d,z15.d,z24.d +.inst 0xe540e000 //st1w {z0.s},p0,[x0,#0,MUL VL] +.inst 0xe541e004 //st1w {z4.s},p0,[x0,#1,MUL VL] +.inst 0xe542e008 //st1w {z8.s},p0,[x0,#2,MUL VL] +.inst 0xe543e00c //st1w {z12.s},p0,[x0,#3,MUL VL] +.inst 0xe544e001 //st1w {z1.s},p0,[x0,#4,MUL VL] +.inst 0xe545e005 //st1w {z5.s},p0,[x0,#5,MUL VL] +.inst 0xe546e009 //st1w {z9.s},p0,[x0,#6,MUL VL] +.inst 0xe547e00d //st1w {z13.s},p0,[x0,#7,MUL VL] +.inst 0x04205100 //addvl x0,x0,8 +.inst 0xe540e002 //st1w {z2.s},p0,[x0,#0,MUL VL] +.inst 0xe541e006 //st1w {z6.s},p0,[x0,#1,MUL VL] +.inst 0xe542e00a //st1w {z10.s},p0,[x0,#2,MUL VL] +.inst 0xe543e00e //st1w {z14.s},p0,[x0,#3,MUL VL] +.inst 0xe544e003 //st1w {z3.s},p0,[x0,#4,MUL VL] +.inst 0xe545e007 //st1w {z7.s},p0,[x0,#5,MUL VL] +.inst 0xe546e00b //st1w {z11.s},p0,[x0,#6,MUL VL] +.inst 0xe547e00f //st1w {z15.s},p0,[x0,#7,MUL VL] +.inst 0x04205100 //addvl x0,x0,8 +210: +.inst 0x04b0e3fd //incw x29, ALL, MUL #1 + subs x2,x2,64 + b.gt 100b + b 110f +101: + mixin=0 + lsr x8,x23,#32 +.inst 0x05a03ae0 //dup z0.s,w23 +.inst 0x05a03af9 //dup z25.s,w23 +.if mixin == 1 + mov w7,w23 +.endif +.inst 0x05a03904 //dup z4.s,w8 +.inst 0x05a0391a //dup z26.s,w8 + lsr x10,x24,#32 +.inst 0x05a03b08 //dup z8.s,w24 +.inst 0x05a03b1b //dup z27.s,w24 +.if mixin == 1 + mov w9,w24 +.endif +.inst 0x05a0394c //dup z12.s,w10 +.inst 0x05a0395c //dup z28.s,w10 + lsr x12,x25,#32 +.inst 0x05a03b21 //dup z1.s,w25 +.inst 0x05a03b3d //dup z29.s,w25 +.if mixin == 1 + mov w11,w25 +.endif +.inst 0x05a03985 //dup z5.s,w12 +.inst 0x05a0399e //dup z30.s,w12 + lsr x14,x26,#32 +.inst 0x05a03b49 //dup z9.s,w26 +.inst 0x05a03b55 //dup z21.s,w26 +.if mixin == 1 + mov w13,w26 +.endif +.inst 0x05a039cd //dup z13.s,w14 +.inst 0x05a039d6 //dup z22.s,w14 + lsr x16,x27,#32 +.inst 0x05a03b62 //dup z2.s,w27 +.inst 0x05a03b77 //dup z23.s,w27 +.if mixin == 1 + mov w15,w27 +.endif +.inst 0x05a03a06 //dup z6.s,w16 +.inst 0x05a03a18 //dup z24.s,w16 + lsr x18,x28,#32 +.inst 0x05a03b8a //dup z10.s,w28 +.inst 0x05a03b91 //dup z17.s,w28 +.if mixin == 1 + mov w17,w28 +.endif +.inst 0x05a03a4e //dup z14.s,w18 +.inst 0x05a03a52 //dup z18.s,w18 + lsr x22,x30,#32 +.inst 0x05a03bcb //dup z11.s,w30 +.inst 0x05a03bd4 //dup z20.s,w30 +.if mixin == 1 + mov w21,w30 +.endif +.inst 0x05a03acf //dup z15.s,w22 +.inst 0x05a03adf //dup z31.s,w22 +.if mixin == 1 + add w20,w29,#1 + mov w19,w29 +.inst 0x04a14690 //index z16.s,w20,1 +.inst 0x04a14683 //index z3.s,w20,1 +.else +.inst 0x04a147b0 //index z16.s,w29,1 +.inst 0x04a147a3 //index z3.s,w29,1 +.endif + lsr x20,x29,#32 +.inst 0x05a03a87 //dup z7.s,w20 +.inst 0x05a03a93 //dup z19.s,w20 + mov x6,#10 +10: +.align 5 +.inst 0x04a10000 //add z0.s,z0.s,z1.s +.if mixin == 1 + add w7,w7,w11 +.endif +.inst 0x04a50084 //add z4.s,z4.s,z5.s +.if mixin == 1 + add w8,w8,w12 +.endif +.inst 0x04a90108 //add z8.s,z8.s,z9.s +.if mixin == 1 + add w9,w9,w13 +.endif +.inst 0x04ad018c //add z12.s,z12.s,z13.s +.if mixin == 1 + add w10,w10,w14 +.endif +.if mixin == 1 + eor w19,w19,w7 +.endif +.inst 0x04703403 //xar z3.s,z3.s,z0.s,16 +.if mixin == 1 + ror w19,w19,16 +.endif +.if mixin == 1 + eor w20,w20,w8 +.endif +.inst 0x04703487 //xar z7.s,z7.s,z4.s,16 +.if mixin == 1 + ror w20,w20,16 +.endif +.if mixin == 1 + eor w21,w21,w9 +.endif +.inst 0x0470350b //xar z11.s,z11.s,z8.s,16 +.if mixin == 1 + ror w21,w21,16 +.endif +.if mixin == 1 + eor w22,w22,w10 +.endif +.inst 0x0470358f //xar z15.s,z15.s,z12.s,16 +.if mixin == 1 + ror w22,w22,16 +.endif +.inst 0x04a30042 //add z2.s,z2.s,z3.s +.if mixin == 1 + add w15,w15,w19 +.endif +.inst 0x04a700c6 //add z6.s,z6.s,z7.s +.if mixin == 1 + add w16,w16,w20 +.endif +.inst 0x04ab014a //add z10.s,z10.s,z11.s +.if mixin == 1 + add w17,w17,w21 +.endif +.inst 0x04af01ce //add z14.s,z14.s,z15.s +.if mixin == 1 + add w18,w18,w22 +.endif +.if mixin == 1 + eor w11,w11,w15 +.endif +.inst 0x046c3441 //xar z1.s,z1.s,z2.s,20 +.if mixin == 1 + ror w11,w11,20 +.endif +.if mixin == 1 + eor w12,w12,w16 +.endif +.inst 0x046c34c5 //xar z5.s,z5.s,z6.s,20 +.if mixin == 1 + ror w12,w12,20 +.endif +.if mixin == 1 + eor w13,w13,w17 +.endif +.inst 0x046c3549 //xar z9.s,z9.s,z10.s,20 +.if mixin == 1 + ror w13,w13,20 +.endif +.if mixin == 1 + eor w14,w14,w18 +.endif +.inst 0x046c35cd //xar z13.s,z13.s,z14.s,20 +.if mixin == 1 + ror w14,w14,20 +.endif +.inst 0x04a10000 //add z0.s,z0.s,z1.s +.if mixin == 1 + add w7,w7,w11 +.endif +.inst 0x04a50084 //add z4.s,z4.s,z5.s +.if mixin == 1 + add w8,w8,w12 +.endif +.inst 0x04a90108 //add z8.s,z8.s,z9.s +.if mixin == 1 + add w9,w9,w13 +.endif +.inst 0x04ad018c //add z12.s,z12.s,z13.s +.if mixin == 1 + add w10,w10,w14 +.endif +.if mixin == 1 + eor w19,w19,w7 +.endif +.inst 0x04683403 //xar z3.s,z3.s,z0.s,24 +.if mixin == 1 + ror w19,w19,24 +.endif +.if mixin == 1 + eor w20,w20,w8 +.endif +.inst 0x04683487 //xar z7.s,z7.s,z4.s,24 +.if mixin == 1 + ror w20,w20,24 +.endif +.if mixin == 1 + eor w21,w21,w9 +.endif +.inst 0x0468350b //xar z11.s,z11.s,z8.s,24 +.if mixin == 1 + ror w21,w21,24 +.endif +.if mixin == 1 + eor w22,w22,w10 +.endif +.inst 0x0468358f //xar z15.s,z15.s,z12.s,24 +.if mixin == 1 + ror w22,w22,24 +.endif +.inst 0x04a30042 //add z2.s,z2.s,z3.s +.if mixin == 1 + add w15,w15,w19 +.endif +.inst 0x04a700c6 //add z6.s,z6.s,z7.s +.if mixin == 1 + add w16,w16,w20 +.endif +.inst 0x04ab014a //add z10.s,z10.s,z11.s +.if mixin == 1 + add w17,w17,w21 +.endif +.inst 0x04af01ce //add z14.s,z14.s,z15.s +.if mixin == 1 + add w18,w18,w22 +.endif +.if mixin == 1 + eor w11,w11,w15 +.endif +.inst 0x04673441 //xar z1.s,z1.s,z2.s,25 +.if mixin == 1 + ror w11,w11,25 +.endif +.if mixin == 1 + eor w12,w12,w16 +.endif +.inst 0x046734c5 //xar z5.s,z5.s,z6.s,25 +.if mixin == 1 + ror w12,w12,25 +.endif +.if mixin == 1 + eor w13,w13,w17 +.endif +.inst 0x04673549 //xar z9.s,z9.s,z10.s,25 +.if mixin == 1 + ror w13,w13,25 +.endif +.if mixin == 1 + eor w14,w14,w18 +.endif +.inst 0x046735cd //xar z13.s,z13.s,z14.s,25 +.if mixin == 1 + ror w14,w14,25 +.endif +.inst 0x04a50000 //add z0.s,z0.s,z5.s +.if mixin == 1 + add w7,w7,w12 +.endif +.inst 0x04a90084 //add z4.s,z4.s,z9.s +.if mixin == 1 + add w8,w8,w13 +.endif +.inst 0x04ad0108 //add z8.s,z8.s,z13.s +.if mixin == 1 + add w9,w9,w14 +.endif +.inst 0x04a1018c //add z12.s,z12.s,z1.s +.if mixin == 1 + add w10,w10,w11 +.endif +.if mixin == 1 + eor w22,w22,w7 +.endif +.inst 0x0470340f //xar z15.s,z15.s,z0.s,16 +.if mixin == 1 + ror w22,w22,16 +.endif +.if mixin == 1 + eor w19,w19,w8 +.endif +.inst 0x04703483 //xar z3.s,z3.s,z4.s,16 +.if mixin == 1 + ror w19,w19,16 +.endif +.if mixin == 1 + eor w20,w20,w9 +.endif +.inst 0x04703507 //xar z7.s,z7.s,z8.s,16 +.if mixin == 1 + ror w20,w20,16 +.endif +.if mixin == 1 + eor w21,w21,w10 +.endif +.inst 0x0470358b //xar z11.s,z11.s,z12.s,16 +.if mixin == 1 + ror w21,w21,16 +.endif +.inst 0x04af014a //add z10.s,z10.s,z15.s +.if mixin == 1 + add w17,w17,w22 +.endif +.inst 0x04a301ce //add z14.s,z14.s,z3.s +.if mixin == 1 + add w18,w18,w19 +.endif +.inst 0x04a70042 //add z2.s,z2.s,z7.s +.if mixin == 1 + add w15,w15,w20 +.endif +.inst 0x04ab00c6 //add z6.s,z6.s,z11.s +.if mixin == 1 + add w16,w16,w21 +.endif +.if mixin == 1 + eor w12,w12,w17 +.endif +.inst 0x046c3545 //xar z5.s,z5.s,z10.s,20 +.if mixin == 1 + ror w12,w12,20 +.endif +.if mixin == 1 + eor w13,w13,w18 +.endif +.inst 0x046c35c9 //xar z9.s,z9.s,z14.s,20 +.if mixin == 1 + ror w13,w13,20 +.endif +.if mixin == 1 + eor w14,w14,w15 +.endif +.inst 0x046c344d //xar z13.s,z13.s,z2.s,20 +.if mixin == 1 + ror w14,w14,20 +.endif +.if mixin == 1 + eor w11,w11,w16 +.endif +.inst 0x046c34c1 //xar z1.s,z1.s,z6.s,20 +.if mixin == 1 + ror w11,w11,20 +.endif +.inst 0x04a50000 //add z0.s,z0.s,z5.s +.if mixin == 1 + add w7,w7,w12 +.endif +.inst 0x04a90084 //add z4.s,z4.s,z9.s +.if mixin == 1 + add w8,w8,w13 +.endif +.inst 0x04ad0108 //add z8.s,z8.s,z13.s +.if mixin == 1 + add w9,w9,w14 +.endif +.inst 0x04a1018c //add z12.s,z12.s,z1.s +.if mixin == 1 + add w10,w10,w11 +.endif +.if mixin == 1 + eor w22,w22,w7 +.endif +.inst 0x0468340f //xar z15.s,z15.s,z0.s,24 +.if mixin == 1 + ror w22,w22,24 +.endif +.if mixin == 1 + eor w19,w19,w8 +.endif +.inst 0x04683483 //xar z3.s,z3.s,z4.s,24 +.if mixin == 1 + ror w19,w19,24 +.endif +.if mixin == 1 + eor w20,w20,w9 +.endif +.inst 0x04683507 //xar z7.s,z7.s,z8.s,24 +.if mixin == 1 + ror w20,w20,24 +.endif +.if mixin == 1 + eor w21,w21,w10 +.endif +.inst 0x0468358b //xar z11.s,z11.s,z12.s,24 +.if mixin == 1 + ror w21,w21,24 +.endif +.inst 0x04af014a //add z10.s,z10.s,z15.s +.if mixin == 1 + add w17,w17,w22 +.endif +.inst 0x04a301ce //add z14.s,z14.s,z3.s +.if mixin == 1 + add w18,w18,w19 +.endif +.inst 0x04a70042 //add z2.s,z2.s,z7.s +.if mixin == 1 + add w15,w15,w20 +.endif +.inst 0x04ab00c6 //add z6.s,z6.s,z11.s +.if mixin == 1 + add w16,w16,w21 +.endif +.if mixin == 1 + eor w12,w12,w17 +.endif +.inst 0x04673545 //xar z5.s,z5.s,z10.s,25 +.if mixin == 1 + ror w12,w12,25 +.endif +.if mixin == 1 + eor w13,w13,w18 +.endif +.inst 0x046735c9 //xar z9.s,z9.s,z14.s,25 +.if mixin == 1 + ror w13,w13,25 +.endif +.if mixin == 1 + eor w14,w14,w15 +.endif +.inst 0x0467344d //xar z13.s,z13.s,z2.s,25 +.if mixin == 1 + ror w14,w14,25 +.endif +.if mixin == 1 + eor w11,w11,w16 +.endif +.inst 0x046734c1 //xar z1.s,z1.s,z6.s,25 +.if mixin == 1 + ror w11,w11,25 +.endif + sub x6,x6,1 + cbnz x6,10b +.if mixin == 1 + add w7,w7,w23 +.endif +.inst 0x04b90000 //add z0.s,z0.s,z25.s +.if mixin == 1 + add x8,x8,x23,lsr #32 +.endif +.inst 0x04ba0084 //add z4.s,z4.s,z26.s +.if mixin == 1 + add x7,x7,x8,lsl #32 // pack +.endif +.if mixin == 1 + add w9,w9,w24 +.endif +.inst 0x04bb0108 //add z8.s,z8.s,z27.s +.if mixin == 1 + add x10,x10,x24,lsr #32 +.endif +.inst 0x04bc018c //add z12.s,z12.s,z28.s +.if mixin == 1 + add x9,x9,x10,lsl #32 // pack +.endif +.if mixin == 1 + ldp x8,x10,[x1],#16 +.endif +.if mixin == 1 + add w11,w11,w25 +.endif +.inst 0x04bd0021 //add z1.s,z1.s,z29.s +.if mixin == 1 + add x12,x12,x25,lsr #32 +.endif +.inst 0x04be00a5 //add z5.s,z5.s,z30.s +.if mixin == 1 + add x11,x11,x12,lsl #32 // pack +.endif +.if mixin == 1 + add w13,w13,w26 +.endif +.inst 0x04b50129 //add z9.s,z9.s,z21.s +.if mixin == 1 + add x14,x14,x26,lsr #32 +.endif +.inst 0x04b601ad //add z13.s,z13.s,z22.s +.if mixin == 1 + add x13,x13,x14,lsl #32 // pack +.endif +.if mixin == 1 + ldp x12,x14,[x1],#16 +.endif +.if mixin == 1 + add w15,w15,w27 +.endif +.inst 0x04b70042 //add z2.s,z2.s,z23.s +.if mixin == 1 + add x16,x16,x27,lsr #32 +.endif +.inst 0x04b800c6 //add z6.s,z6.s,z24.s +.if mixin == 1 + add x15,x15,x16,lsl #32 // pack +.endif +.if mixin == 1 + add w17,w17,w28 +.endif +.inst 0x04b1014a //add z10.s,z10.s,z17.s +.if mixin == 1 + add x18,x18,x28,lsr #32 +.endif +.inst 0x04b201ce //add z14.s,z14.s,z18.s +.if mixin == 1 + add x17,x17,x18,lsl #32 // pack +.endif +.if mixin == 1 + ldp x16,x18,[x1],#16 +.endif +.if mixin == 1 + add w19,w19,w29 +.endif +.inst 0x04b00063 //add z3.s,z3.s,z16.s +.if mixin == 1 + add x20,x20,x29,lsr #32 +.endif +.inst 0x04b300e7 //add z7.s,z7.s,z19.s +.if mixin == 1 + add x19,x19,x20,lsl #32 // pack +.endif +.if mixin == 1 + add w21,w21,w30 +.endif +.inst 0x04b4016b //add z11.s,z11.s,z20.s +.if mixin == 1 + add x22,x22,x30,lsr #32 +.endif +.inst 0x04bf01ef //add z15.s,z15.s,z31.s +.if mixin == 1 + add x21,x21,x22,lsl #32 // pack +.endif +.if mixin == 1 + ldp x20,x22,[x1],#16 +.endif +#ifdef __AARCH64EB__ + rev x7,x7 +.inst 0x05a48000 //revb z0.s,p0/m,z0.s +.inst 0x05a48084 //revb z4.s,p0/m,z4.s + rev x9,x9 +.inst 0x05a48108 //revb z8.s,p0/m,z8.s +.inst 0x05a4818c //revb z12.s,p0/m,z12.s + rev x11,x11 +.inst 0x05a48021 //revb z1.s,p0/m,z1.s +.inst 0x05a480a5 //revb z5.s,p0/m,z5.s + rev x13,x13 +.inst 0x05a48129 //revb z9.s,p0/m,z9.s +.inst 0x05a481ad //revb z13.s,p0/m,z13.s + rev x15,x15 +.inst 0x05a48042 //revb z2.s,p0/m,z2.s +.inst 0x05a480c6 //revb z6.s,p0/m,z6.s + rev x17,x17 +.inst 0x05a4814a //revb z10.s,p0/m,z10.s +.inst 0x05a481ce //revb z14.s,p0/m,z14.s + rev x19,x19 +.inst 0x05a48063 //revb z3.s,p0/m,z3.s +.inst 0x05a480e7 //revb z7.s,p0/m,z7.s + rev x21,x21 +.inst 0x05a4816b //revb z11.s,p0/m,z11.s +.inst 0x05a481ef //revb z15.s,p0/m,z15.s +#endif +.if mixin == 1 + add x29,x29,#1 +.endif + cmp x5,4 + b.ne 200f +.if mixin == 1 + eor x7,x7,x8 +.endif +.if mixin == 1 + eor x9,x9,x10 +.endif +.if mixin == 1 + eor x11,x11,x12 +.endif +.inst 0x05a46011 //zip1 z17.s,z0.s,z4.s +.inst 0x05a46412 //zip2 z18.s,z0.s,z4.s +.inst 0x05ac6113 //zip1 z19.s,z8.s,z12.s +.inst 0x05ac6514 //zip2 z20.s,z8.s,z12.s + +.inst 0x05a56035 //zip1 z21.s,z1.s,z5.s +.inst 0x05a56436 //zip2 z22.s,z1.s,z5.s +.inst 0x05ad6137 //zip1 z23.s,z9.s,z13.s +.inst 0x05ad6538 //zip2 z24.s,z9.s,z13.s + +.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d +.inst 0x05f36624 //zip2 z4.d,z17.d,z19.d +.inst 0x05f46248 //zip1 z8.d,z18.d,z20.d +.inst 0x05f4664c //zip2 z12.d,z18.d,z20.d + +.inst 0x05f762a1 //zip1 z1.d,z21.d,z23.d +.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d +.inst 0x05f862c9 //zip1 z9.d,z22.d,z24.d +.inst 0x05f866cd //zip2 z13.d,z22.d,z24.d +.if mixin == 1 + eor x13,x13,x14 +.endif +.if mixin == 1 + eor x15,x15,x16 +.endif +.if mixin == 1 + eor x17,x17,x18 +.endif +.inst 0x05a66051 //zip1 z17.s,z2.s,z6.s +.inst 0x05a66452 //zip2 z18.s,z2.s,z6.s +.inst 0x05ae6153 //zip1 z19.s,z10.s,z14.s +.inst 0x05ae6554 //zip2 z20.s,z10.s,z14.s + +.inst 0x05a76075 //zip1 z21.s,z3.s,z7.s +.inst 0x05a76476 //zip2 z22.s,z3.s,z7.s +.inst 0x05af6177 //zip1 z23.s,z11.s,z15.s +.inst 0x05af6578 //zip2 z24.s,z11.s,z15.s + +.inst 0x05f36222 //zip1 z2.d,z17.d,z19.d +.inst 0x05f36626 //zip2 z6.d,z17.d,z19.d +.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d +.inst 0x05f4664e //zip2 z14.d,z18.d,z20.d + +.inst 0x05f762a3 //zip1 z3.d,z21.d,z23.d +.inst 0x05f766a7 //zip2 z7.d,z21.d,z23.d +.inst 0x05f862cb //zip1 z11.d,z22.d,z24.d +.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d +.if mixin == 1 + eor x19,x19,x20 +.endif +.if mixin == 1 + eor x21,x21,x22 +.endif + ld1 {v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64 + ld1 {v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64 +.inst 0x04b13000 //eor z0.d,z0.d,z17.d +.inst 0x04b23021 //eor z1.d,z1.d,z18.d +.inst 0x04b33042 //eor z2.d,z2.d,z19.d +.inst 0x04b43063 //eor z3.d,z3.d,z20.d +.inst 0x04b53084 //eor z4.d,z4.d,z21.d +.inst 0x04b630a5 //eor z5.d,z5.d,z22.d +.inst 0x04b730c6 //eor z6.d,z6.d,z23.d +.inst 0x04b830e7 //eor z7.d,z7.d,z24.d + ld1 {v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64 + ld1 {v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64 +.if mixin == 1 + stp x7,x9,[x0],#16 +.endif +.inst 0x04b13108 //eor z8.d,z8.d,z17.d +.inst 0x04b23129 //eor z9.d,z9.d,z18.d +.if mixin == 1 + stp x11,x13,[x0],#16 +.endif +.inst 0x04b3314a //eor z10.d,z10.d,z19.d +.inst 0x04b4316b //eor z11.d,z11.d,z20.d +.if mixin == 1 + stp x15,x17,[x0],#16 +.endif +.inst 0x04b5318c //eor z12.d,z12.d,z21.d +.inst 0x04b631ad //eor z13.d,z13.d,z22.d +.if mixin == 1 + stp x19,x21,[x0],#16 +.endif +.inst 0x04b731ce //eor z14.d,z14.d,z23.d +.inst 0x04b831ef //eor z15.d,z15.d,z24.d + st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x0],#64 + st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 + st1 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64 + st1 {v12.4s,v13.4s,v14.4s,v15.4s},[x0],#64 + b 210f +200: +.inst 0x05a16011 //zip1 z17.s,z0.s,z1.s +.inst 0x05a16412 //zip2 z18.s,z0.s,z1.s +.inst 0x05a36053 //zip1 z19.s,z2.s,z3.s +.inst 0x05a36454 //zip2 z20.s,z2.s,z3.s + +.inst 0x05a56095 //zip1 z21.s,z4.s,z5.s +.inst 0x05a56496 //zip2 z22.s,z4.s,z5.s +.inst 0x05a760d7 //zip1 z23.s,z6.s,z7.s +.inst 0x05a764d8 //zip2 z24.s,z6.s,z7.s + +.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d +.inst 0x05f36621 //zip2 z1.d,z17.d,z19.d +.inst 0x05f46242 //zip1 z2.d,z18.d,z20.d +.inst 0x05f46643 //zip2 z3.d,z18.d,z20.d + +.inst 0x05f762a4 //zip1 z4.d,z21.d,z23.d +.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d +.inst 0x05f862c6 //zip1 z6.d,z22.d,z24.d +.inst 0x05f866c7 //zip2 z7.d,z22.d,z24.d +.if mixin == 1 + eor x7,x7,x8 +.endif +.if mixin == 1 + eor x9,x9,x10 +.endif +.inst 0x05a96111 //zip1 z17.s,z8.s,z9.s +.inst 0x05a96512 //zip2 z18.s,z8.s,z9.s +.inst 0x05ab6153 //zip1 z19.s,z10.s,z11.s +.inst 0x05ab6554 //zip2 z20.s,z10.s,z11.s + +.inst 0x05ad6195 //zip1 z21.s,z12.s,z13.s +.inst 0x05ad6596 //zip2 z22.s,z12.s,z13.s +.inst 0x05af61d7 //zip1 z23.s,z14.s,z15.s +.inst 0x05af65d8 //zip2 z24.s,z14.s,z15.s + +.inst 0x05f36228 //zip1 z8.d,z17.d,z19.d +.inst 0x05f36629 //zip2 z9.d,z17.d,z19.d +.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d +.inst 0x05f4664b //zip2 z11.d,z18.d,z20.d + +.inst 0x05f762ac //zip1 z12.d,z21.d,z23.d +.inst 0x05f766ad //zip2 z13.d,z21.d,z23.d +.inst 0x05f862ce //zip1 z14.d,z22.d,z24.d +.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d +.if mixin == 1 + eor x11,x11,x12 +.endif +.if mixin == 1 + eor x13,x13,x14 +.endif +.inst 0x05a46011 //zip1 z17.s,z0.s,z4.s +.inst 0x05a46412 //zip2 z18.s,z0.s,z4.s +.inst 0x05ac6113 //zip1 z19.s,z8.s,z12.s +.inst 0x05ac6514 //zip2 z20.s,z8.s,z12.s + +.inst 0x05a56035 //zip1 z21.s,z1.s,z5.s +.inst 0x05a56436 //zip2 z22.s,z1.s,z5.s +.inst 0x05ad6137 //zip1 z23.s,z9.s,z13.s +.inst 0x05ad6538 //zip2 z24.s,z9.s,z13.s + +.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d +.inst 0x05f36624 //zip2 z4.d,z17.d,z19.d +.inst 0x05f46248 //zip1 z8.d,z18.d,z20.d +.inst 0x05f4664c //zip2 z12.d,z18.d,z20.d + +.inst 0x05f762a1 //zip1 z1.d,z21.d,z23.d +.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d +.inst 0x05f862c9 //zip1 z9.d,z22.d,z24.d +.inst 0x05f866cd //zip2 z13.d,z22.d,z24.d +.if mixin == 1 + eor x15,x15,x16 +.endif +.if mixin == 1 + eor x17,x17,x18 +.endif +.inst 0x05a66051 //zip1 z17.s,z2.s,z6.s +.inst 0x05a66452 //zip2 z18.s,z2.s,z6.s +.inst 0x05ae6153 //zip1 z19.s,z10.s,z14.s +.inst 0x05ae6554 //zip2 z20.s,z10.s,z14.s + +.inst 0x05a76075 //zip1 z21.s,z3.s,z7.s +.inst 0x05a76476 //zip2 z22.s,z3.s,z7.s +.inst 0x05af6177 //zip1 z23.s,z11.s,z15.s +.inst 0x05af6578 //zip2 z24.s,z11.s,z15.s + +.inst 0x05f36222 //zip1 z2.d,z17.d,z19.d +.inst 0x05f36626 //zip2 z6.d,z17.d,z19.d +.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d +.inst 0x05f4664e //zip2 z14.d,z18.d,z20.d + +.inst 0x05f762a3 //zip1 z3.d,z21.d,z23.d +.inst 0x05f766a7 //zip2 z7.d,z21.d,z23.d +.inst 0x05f862cb //zip1 z11.d,z22.d,z24.d +.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d +.if mixin == 1 + eor x19,x19,x20 +.endif +.if mixin == 1 + eor x21,x21,x22 +.endif +.inst 0xa540a031 //ld1w {z17.s},p0/z,[x1,#0,MUL VL] +.inst 0xa541a032 //ld1w {z18.s},p0/z,[x1,#1,MUL VL] +.inst 0xa542a033 //ld1w {z19.s},p0/z,[x1,#2,MUL VL] +.inst 0xa543a034 //ld1w {z20.s},p0/z,[x1,#3,MUL VL] +.inst 0xa544a035 //ld1w {z21.s},p0/z,[x1,#4,MUL VL] +.inst 0xa545a036 //ld1w {z22.s},p0/z,[x1,#5,MUL VL] +.inst 0xa546a037 //ld1w {z23.s},p0/z,[x1,#6,MUL VL] +.inst 0xa547a038 //ld1w {z24.s},p0/z,[x1,#7,MUL VL] +.inst 0x04215101 //addvl x1,x1,8 +.inst 0x04b13000 //eor z0.d,z0.d,z17.d +.inst 0x04b23084 //eor z4.d,z4.d,z18.d +.inst 0x04b33108 //eor z8.d,z8.d,z19.d +.inst 0x04b4318c //eor z12.d,z12.d,z20.d +.inst 0x04b53021 //eor z1.d,z1.d,z21.d +.inst 0x04b630a5 //eor z5.d,z5.d,z22.d +.inst 0x04b73129 //eor z9.d,z9.d,z23.d +.inst 0x04b831ad //eor z13.d,z13.d,z24.d +.inst 0xa540a031 //ld1w {z17.s},p0/z,[x1,#0,MUL VL] +.inst 0xa541a032 //ld1w {z18.s},p0/z,[x1,#1,MUL VL] +.inst 0xa542a033 //ld1w {z19.s},p0/z,[x1,#2,MUL VL] +.inst 0xa543a034 //ld1w {z20.s},p0/z,[x1,#3,MUL VL] +.inst 0xa544a035 //ld1w {z21.s},p0/z,[x1,#4,MUL VL] +.inst 0xa545a036 //ld1w {z22.s},p0/z,[x1,#5,MUL VL] +.inst 0xa546a037 //ld1w {z23.s},p0/z,[x1,#6,MUL VL] +.inst 0xa547a038 //ld1w {z24.s},p0/z,[x1,#7,MUL VL] +.inst 0x04215101 //addvl x1,x1,8 +.if mixin == 1 + stp x7,x9,[x0],#16 +.endif +.inst 0x04b13042 //eor z2.d,z2.d,z17.d +.inst 0x04b230c6 //eor z6.d,z6.d,z18.d +.if mixin == 1 + stp x11,x13,[x0],#16 +.endif +.inst 0x04b3314a //eor z10.d,z10.d,z19.d +.inst 0x04b431ce //eor z14.d,z14.d,z20.d +.if mixin == 1 + stp x15,x17,[x0],#16 +.endif +.inst 0x04b53063 //eor z3.d,z3.d,z21.d +.inst 0x04b630e7 //eor z7.d,z7.d,z22.d +.if mixin == 1 + stp x19,x21,[x0],#16 +.endif +.inst 0x04b7316b //eor z11.d,z11.d,z23.d +.inst 0x04b831ef //eor z15.d,z15.d,z24.d +.inst 0xe540e000 //st1w {z0.s},p0,[x0,#0,MUL VL] +.inst 0xe541e004 //st1w {z4.s},p0,[x0,#1,MUL VL] +.inst 0xe542e008 //st1w {z8.s},p0,[x0,#2,MUL VL] +.inst 0xe543e00c //st1w {z12.s},p0,[x0,#3,MUL VL] +.inst 0xe544e001 //st1w {z1.s},p0,[x0,#4,MUL VL] +.inst 0xe545e005 //st1w {z5.s},p0,[x0,#5,MUL VL] +.inst 0xe546e009 //st1w {z9.s},p0,[x0,#6,MUL VL] +.inst 0xe547e00d //st1w {z13.s},p0,[x0,#7,MUL VL] +.inst 0x04205100 //addvl x0,x0,8 +.inst 0xe540e002 //st1w {z2.s},p0,[x0,#0,MUL VL] +.inst 0xe541e006 //st1w {z6.s},p0,[x0,#1,MUL VL] +.inst 0xe542e00a //st1w {z10.s},p0,[x0,#2,MUL VL] +.inst 0xe543e00e //st1w {z14.s},p0,[x0,#3,MUL VL] +.inst 0xe544e003 //st1w {z3.s},p0,[x0,#4,MUL VL] +.inst 0xe545e007 //st1w {z7.s},p0,[x0,#5,MUL VL] +.inst 0xe546e00b //st1w {z11.s},p0,[x0,#6,MUL VL] +.inst 0xe547e00f //st1w {z15.s},p0,[x0,#7,MUL VL] +.inst 0x04205100 //addvl x0,x0,8 +210: +.inst 0x04b0e3fd //incw x29, ALL, MUL #1 +110: + b 2f +1: +.align 5 +100: + subs x7,x2,x5,lsl #6 + b.lt 110f + mov x2,x7 + b.eq 101f + cmp x2,64 + b.lt 101f + mixin=1 + lsr x8,x23,#32 +.inst 0x05a03ae0 //dup z0.s,w23 +.inst 0x05a03af9 //dup z25.s,w23 +.if mixin == 1 + mov w7,w23 +.endif +.inst 0x05a03904 //dup z4.s,w8 +.inst 0x05a0391a //dup z26.s,w8 + lsr x10,x24,#32 +.inst 0x05a03b08 //dup z8.s,w24 +.inst 0x05a03b1b //dup z27.s,w24 +.if mixin == 1 + mov w9,w24 +.endif +.inst 0x05a0394c //dup z12.s,w10 +.inst 0x05a0395c //dup z28.s,w10 + lsr x12,x25,#32 +.inst 0x05a03b21 //dup z1.s,w25 +.inst 0x05a03b3d //dup z29.s,w25 +.if mixin == 1 + mov w11,w25 +.endif +.inst 0x05a03985 //dup z5.s,w12 +.inst 0x05a0399e //dup z30.s,w12 + lsr x14,x26,#32 +.inst 0x05a03b49 //dup z9.s,w26 +.inst 0x05a03b55 //dup z21.s,w26 +.if mixin == 1 + mov w13,w26 +.endif +.inst 0x05a039cd //dup z13.s,w14 +.inst 0x05a039d6 //dup z22.s,w14 + lsr x16,x27,#32 +.inst 0x05a03b62 //dup z2.s,w27 +.inst 0x05a03b77 //dup z23.s,w27 +.if mixin == 1 + mov w15,w27 +.endif +.inst 0x05a03a06 //dup z6.s,w16 +.inst 0x05a03a18 //dup z24.s,w16 + lsr x18,x28,#32 +.inst 0x05a03b8a //dup z10.s,w28 +.if mixin == 1 + mov w17,w28 +.endif +.inst 0x05a03a4e //dup z14.s,w18 + lsr x22,x30,#32 +.inst 0x05a03bcb //dup z11.s,w30 +.if mixin == 1 + mov w21,w30 +.endif +.inst 0x05a03acf //dup z15.s,w22 +.if mixin == 1 + add w20,w29,#1 + mov w19,w29 +.inst 0x04a14690 //index z16.s,w20,1 +.inst 0x04a14683 //index z3.s,w20,1 +.else +.inst 0x04a147b0 //index z16.s,w29,1 +.inst 0x04a147a3 //index z3.s,w29,1 +.endif + lsr x20,x29,#32 +.inst 0x05a03a87 //dup z7.s,w20 + mov x6,#10 +10: +.align 5 +.inst 0x04a10000 //add z0.s,z0.s,z1.s +.if mixin == 1 + add w7,w7,w11 +.endif +.inst 0x04a50084 //add z4.s,z4.s,z5.s +.if mixin == 1 + add w8,w8,w12 +.endif +.inst 0x04a90108 //add z8.s,z8.s,z9.s +.if mixin == 1 + add w9,w9,w13 +.endif +.inst 0x04ad018c //add z12.s,z12.s,z13.s +.if mixin == 1 + add w10,w10,w14 +.endif +.inst 0x04a03063 //eor z3.d,z3.d,z0.d +.if mixin == 1 + eor w19,w19,w7 +.endif +.inst 0x04a430e7 //eor z7.d,z7.d,z4.d +.if mixin == 1 + eor w20,w20,w8 +.endif +.inst 0x04a8316b //eor z11.d,z11.d,z8.d +.if mixin == 1 + eor w21,w21,w9 +.endif +.inst 0x04ac31ef //eor z15.d,z15.d,z12.d +.if mixin == 1 + eor w22,w22,w10 +.endif +.inst 0x05a58063 //revh z3.s,p0/m,z3.s +.if mixin == 1 + ror w19,w19,#16 +.endif +.inst 0x05a580e7 //revh z7.s,p0/m,z7.s +.if mixin == 1 + ror w20,w20,#16 +.endif +.inst 0x05a5816b //revh z11.s,p0/m,z11.s +.if mixin == 1 + ror w21,w21,#16 +.endif +.inst 0x05a581ef //revh z15.s,p0/m,z15.s +.if mixin == 1 + ror w22,w22,#16 +.endif +.inst 0x04a30042 //add z2.s,z2.s,z3.s +.if mixin == 1 + add w15,w15,w19 +.endif +.inst 0x04a700c6 //add z6.s,z6.s,z7.s +.if mixin == 1 + add w16,w16,w20 +.endif +.inst 0x04ab014a //add z10.s,z10.s,z11.s +.if mixin == 1 + add w17,w17,w21 +.endif +.inst 0x04af01ce //add z14.s,z14.s,z15.s +.if mixin == 1 + add w18,w18,w22 +.endif +.inst 0x04a23021 //eor z1.d,z1.d,z2.d +.if mixin == 1 + eor w11,w11,w15 +.endif +.inst 0x04a630a5 //eor z5.d,z5.d,z6.d +.if mixin == 1 + eor w12,w12,w16 +.endif +.inst 0x04aa3129 //eor z9.d,z9.d,z10.d +.if mixin == 1 + eor w13,w13,w17 +.endif +.inst 0x04ae31ad //eor z13.d,z13.d,z14.d +.if mixin == 1 + eor w14,w14,w18 +.endif +.inst 0x046c9c31 //lsl z17.s,z1.s,12 +.inst 0x046c9cb2 //lsl z18.s,z5.s,12 +.inst 0x046c9d33 //lsl z19.s,z9.s,12 +.inst 0x046c9db4 //lsl z20.s,z13.s,12 +.inst 0x046c9421 //lsr z1.s,z1.s,20 +.if mixin == 1 + ror w11,w11,20 +.endif +.inst 0x046c94a5 //lsr z5.s,z5.s,20 +.if mixin == 1 + ror w12,w12,20 +.endif +.inst 0x046c9529 //lsr z9.s,z9.s,20 +.if mixin == 1 + ror w13,w13,20 +.endif +.inst 0x046c95ad //lsr z13.s,z13.s,20 +.if mixin == 1 + ror w14,w14,20 +.endif +.inst 0x04713021 //orr z1.d,z1.d,z17.d +.inst 0x047230a5 //orr z5.d,z5.d,z18.d +.inst 0x04733129 //orr z9.d,z9.d,z19.d +.inst 0x047431ad //orr z13.d,z13.d,z20.d +.inst 0x04a10000 //add z0.s,z0.s,z1.s +.if mixin == 1 + add w7,w7,w11 +.endif +.inst 0x04a50084 //add z4.s,z4.s,z5.s +.if mixin == 1 + add w8,w8,w12 +.endif +.inst 0x04a90108 //add z8.s,z8.s,z9.s +.if mixin == 1 + add w9,w9,w13 +.endif +.inst 0x04ad018c //add z12.s,z12.s,z13.s +.if mixin == 1 + add w10,w10,w14 +.endif +.inst 0x04a03063 //eor z3.d,z3.d,z0.d +.if mixin == 1 + eor w19,w19,w7 +.endif +.inst 0x04a430e7 //eor z7.d,z7.d,z4.d +.if mixin == 1 + eor w20,w20,w8 +.endif +.inst 0x04a8316b //eor z11.d,z11.d,z8.d +.if mixin == 1 + eor w21,w21,w9 +.endif +.inst 0x04ac31ef //eor z15.d,z15.d,z12.d +.if mixin == 1 + eor w22,w22,w10 +.endif +.inst 0x053f3063 //tbl z3.b,{z3.b},z31.b +.if mixin == 1 + ror w19,w19,#24 +.endif +.inst 0x053f30e7 //tbl z7.b,{z7.b},z31.b +.if mixin == 1 + ror w20,w20,#24 +.endif +.inst 0x053f316b //tbl z11.b,{z11.b},z31.b +.if mixin == 1 + ror w21,w21,#24 +.endif +.inst 0x053f31ef //tbl z15.b,{z15.b},z31.b +.if mixin == 1 + ror w22,w22,#24 +.endif +.inst 0x04a30042 //add z2.s,z2.s,z3.s +.if mixin == 1 + add w15,w15,w19 +.endif +.inst 0x04a700c6 //add z6.s,z6.s,z7.s +.if mixin == 1 + add w16,w16,w20 +.endif +.inst 0x04ab014a //add z10.s,z10.s,z11.s +.if mixin == 1 + add w17,w17,w21 +.endif +.inst 0x04af01ce //add z14.s,z14.s,z15.s +.if mixin == 1 + add w18,w18,w22 +.endif +.inst 0x04a23021 //eor z1.d,z1.d,z2.d +.if mixin == 1 + eor w11,w11,w15 +.endif +.inst 0x04a630a5 //eor z5.d,z5.d,z6.d +.if mixin == 1 + eor w12,w12,w16 +.endif +.inst 0x04aa3129 //eor z9.d,z9.d,z10.d +.if mixin == 1 + eor w13,w13,w17 +.endif +.inst 0x04ae31ad //eor z13.d,z13.d,z14.d +.if mixin == 1 + eor w14,w14,w18 +.endif +.inst 0x04679c31 //lsl z17.s,z1.s,7 +.inst 0x04679cb2 //lsl z18.s,z5.s,7 +.inst 0x04679d33 //lsl z19.s,z9.s,7 +.inst 0x04679db4 //lsl z20.s,z13.s,7 +.inst 0x04679421 //lsr z1.s,z1.s,25 +.if mixin == 1 + ror w11,w11,25 +.endif +.inst 0x046794a5 //lsr z5.s,z5.s,25 +.if mixin == 1 + ror w12,w12,25 +.endif +.inst 0x04679529 //lsr z9.s,z9.s,25 +.if mixin == 1 + ror w13,w13,25 +.endif +.inst 0x046795ad //lsr z13.s,z13.s,25 +.if mixin == 1 + ror w14,w14,25 +.endif +.inst 0x04713021 //orr z1.d,z1.d,z17.d +.inst 0x047230a5 //orr z5.d,z5.d,z18.d +.inst 0x04733129 //orr z9.d,z9.d,z19.d +.inst 0x047431ad //orr z13.d,z13.d,z20.d +.inst 0x04a50000 //add z0.s,z0.s,z5.s +.if mixin == 1 + add w7,w7,w12 +.endif +.inst 0x04a90084 //add z4.s,z4.s,z9.s +.if mixin == 1 + add w8,w8,w13 +.endif +.inst 0x04ad0108 //add z8.s,z8.s,z13.s +.if mixin == 1 + add w9,w9,w14 +.endif +.inst 0x04a1018c //add z12.s,z12.s,z1.s +.if mixin == 1 + add w10,w10,w11 +.endif +.inst 0x04a031ef //eor z15.d,z15.d,z0.d +.if mixin == 1 + eor w22,w22,w7 +.endif +.inst 0x04a43063 //eor z3.d,z3.d,z4.d +.if mixin == 1 + eor w19,w19,w8 +.endif +.inst 0x04a830e7 //eor z7.d,z7.d,z8.d +.if mixin == 1 + eor w20,w20,w9 +.endif +.inst 0x04ac316b //eor z11.d,z11.d,z12.d +.if mixin == 1 + eor w21,w21,w10 +.endif +.inst 0x05a581ef //revh z15.s,p0/m,z15.s +.if mixin == 1 + ror w22,w22,#16 +.endif +.inst 0x05a58063 //revh z3.s,p0/m,z3.s +.if mixin == 1 + ror w19,w19,#16 +.endif +.inst 0x05a580e7 //revh z7.s,p0/m,z7.s +.if mixin == 1 + ror w20,w20,#16 +.endif +.inst 0x05a5816b //revh z11.s,p0/m,z11.s +.if mixin == 1 + ror w21,w21,#16 +.endif +.inst 0x04af014a //add z10.s,z10.s,z15.s +.if mixin == 1 + add w17,w17,w22 +.endif +.inst 0x04a301ce //add z14.s,z14.s,z3.s +.if mixin == 1 + add w18,w18,w19 +.endif +.inst 0x04a70042 //add z2.s,z2.s,z7.s +.if mixin == 1 + add w15,w15,w20 +.endif +.inst 0x04ab00c6 //add z6.s,z6.s,z11.s +.if mixin == 1 + add w16,w16,w21 +.endif +.inst 0x04aa30a5 //eor z5.d,z5.d,z10.d +.if mixin == 1 + eor w12,w12,w17 +.endif +.inst 0x04ae3129 //eor z9.d,z9.d,z14.d +.if mixin == 1 + eor w13,w13,w18 +.endif +.inst 0x04a231ad //eor z13.d,z13.d,z2.d +.if mixin == 1 + eor w14,w14,w15 +.endif +.inst 0x04a63021 //eor z1.d,z1.d,z6.d +.if mixin == 1 + eor w11,w11,w16 +.endif +.inst 0x046c9cb1 //lsl z17.s,z5.s,12 +.inst 0x046c9d32 //lsl z18.s,z9.s,12 +.inst 0x046c9db3 //lsl z19.s,z13.s,12 +.inst 0x046c9c34 //lsl z20.s,z1.s,12 +.inst 0x046c94a5 //lsr z5.s,z5.s,20 +.if mixin == 1 + ror w12,w12,20 +.endif +.inst 0x046c9529 //lsr z9.s,z9.s,20 +.if mixin == 1 + ror w13,w13,20 +.endif +.inst 0x046c95ad //lsr z13.s,z13.s,20 +.if mixin == 1 + ror w14,w14,20 +.endif +.inst 0x046c9421 //lsr z1.s,z1.s,20 +.if mixin == 1 + ror w11,w11,20 +.endif +.inst 0x047130a5 //orr z5.d,z5.d,z17.d +.inst 0x04723129 //orr z9.d,z9.d,z18.d +.inst 0x047331ad //orr z13.d,z13.d,z19.d +.inst 0x04743021 //orr z1.d,z1.d,z20.d +.inst 0x04a50000 //add z0.s,z0.s,z5.s +.if mixin == 1 + add w7,w7,w12 +.endif +.inst 0x04a90084 //add z4.s,z4.s,z9.s +.if mixin == 1 + add w8,w8,w13 +.endif +.inst 0x04ad0108 //add z8.s,z8.s,z13.s +.if mixin == 1 + add w9,w9,w14 +.endif +.inst 0x04a1018c //add z12.s,z12.s,z1.s +.if mixin == 1 + add w10,w10,w11 +.endif +.inst 0x04a031ef //eor z15.d,z15.d,z0.d +.if mixin == 1 + eor w22,w22,w7 +.endif +.inst 0x04a43063 //eor z3.d,z3.d,z4.d +.if mixin == 1 + eor w19,w19,w8 +.endif +.inst 0x04a830e7 //eor z7.d,z7.d,z8.d +.if mixin == 1 + eor w20,w20,w9 +.endif +.inst 0x04ac316b //eor z11.d,z11.d,z12.d +.if mixin == 1 + eor w21,w21,w10 +.endif +.inst 0x053f31ef //tbl z15.b,{z15.b},z31.b +.if mixin == 1 + ror w22,w22,#24 +.endif +.inst 0x053f3063 //tbl z3.b,{z3.b},z31.b +.if mixin == 1 + ror w19,w19,#24 +.endif +.inst 0x053f30e7 //tbl z7.b,{z7.b},z31.b +.if mixin == 1 + ror w20,w20,#24 +.endif +.inst 0x053f316b //tbl z11.b,{z11.b},z31.b +.if mixin == 1 + ror w21,w21,#24 +.endif +.inst 0x04af014a //add z10.s,z10.s,z15.s +.if mixin == 1 + add w17,w17,w22 +.endif +.inst 0x04a301ce //add z14.s,z14.s,z3.s +.if mixin == 1 + add w18,w18,w19 +.endif +.inst 0x04a70042 //add z2.s,z2.s,z7.s +.if mixin == 1 + add w15,w15,w20 +.endif +.inst 0x04ab00c6 //add z6.s,z6.s,z11.s +.if mixin == 1 + add w16,w16,w21 +.endif +.inst 0x04aa30a5 //eor z5.d,z5.d,z10.d +.if mixin == 1 + eor w12,w12,w17 +.endif +.inst 0x04ae3129 //eor z9.d,z9.d,z14.d +.if mixin == 1 + eor w13,w13,w18 +.endif +.inst 0x04a231ad //eor z13.d,z13.d,z2.d +.if mixin == 1 + eor w14,w14,w15 +.endif +.inst 0x04a63021 //eor z1.d,z1.d,z6.d +.if mixin == 1 + eor w11,w11,w16 +.endif +.inst 0x04679cb1 //lsl z17.s,z5.s,7 +.inst 0x04679d32 //lsl z18.s,z9.s,7 +.inst 0x04679db3 //lsl z19.s,z13.s,7 +.inst 0x04679c34 //lsl z20.s,z1.s,7 +.inst 0x046794a5 //lsr z5.s,z5.s,25 +.if mixin == 1 + ror w12,w12,25 +.endif +.inst 0x04679529 //lsr z9.s,z9.s,25 +.if mixin == 1 + ror w13,w13,25 +.endif +.inst 0x046795ad //lsr z13.s,z13.s,25 +.if mixin == 1 + ror w14,w14,25 +.endif +.inst 0x04679421 //lsr z1.s,z1.s,25 +.if mixin == 1 + ror w11,w11,25 +.endif +.inst 0x047130a5 //orr z5.d,z5.d,z17.d +.inst 0x04723129 //orr z9.d,z9.d,z18.d +.inst 0x047331ad //orr z13.d,z13.d,z19.d +.inst 0x04743021 //orr z1.d,z1.d,z20.d + sub x6,x6,1 + cbnz x6,10b + lsr x6,x28,#32 +.inst 0x05a03b91 //dup z17.s,w28 +.inst 0x05a038d2 //dup z18.s,w6 + lsr x6,x29,#32 +.inst 0x05a038d3 //dup z19.s,w6 + lsr x6,x30,#32 +.if mixin == 1 + add w7,w7,w23 +.endif +.inst 0x04b90000 //add z0.s,z0.s,z25.s +.if mixin == 1 + add x8,x8,x23,lsr #32 +.endif +.inst 0x04ba0084 //add z4.s,z4.s,z26.s +.if mixin == 1 + add x7,x7,x8,lsl #32 // pack +.endif +.if mixin == 1 + add w9,w9,w24 +.endif +.inst 0x04bb0108 //add z8.s,z8.s,z27.s +.if mixin == 1 + add x10,x10,x24,lsr #32 +.endif +.inst 0x04bc018c //add z12.s,z12.s,z28.s +.if mixin == 1 + add x9,x9,x10,lsl #32 // pack +.endif +.if mixin == 1 + ldp x8,x10,[x1],#16 +.endif +.if mixin == 1 + add w11,w11,w25 +.endif +.inst 0x04bd0021 //add z1.s,z1.s,z29.s +.if mixin == 1 + add x12,x12,x25,lsr #32 +.endif +.inst 0x04be00a5 //add z5.s,z5.s,z30.s +.if mixin == 1 + add x11,x11,x12,lsl #32 // pack +.endif +.if mixin == 1 + add w13,w13,w26 +.endif +.inst 0x04b50129 //add z9.s,z9.s,z21.s +.if mixin == 1 + add x14,x14,x26,lsr #32 +.endif +.inst 0x04b601ad //add z13.s,z13.s,z22.s +.if mixin == 1 + add x13,x13,x14,lsl #32 // pack +.endif +.if mixin == 1 + ldp x12,x14,[x1],#16 +.endif +.if mixin == 1 + add w15,w15,w27 +.endif +.inst 0x04b70042 //add z2.s,z2.s,z23.s +.if mixin == 1 + add x16,x16,x27,lsr #32 +.endif +.inst 0x04b800c6 //add z6.s,z6.s,z24.s +.if mixin == 1 + add x15,x15,x16,lsl #32 // pack +.endif +.if mixin == 1 + add w17,w17,w28 +.endif +.inst 0x04b1014a //add z10.s,z10.s,z17.s +.if mixin == 1 + add x18,x18,x28,lsr #32 +.endif +.inst 0x04b201ce //add z14.s,z14.s,z18.s +.if mixin == 1 + add x17,x17,x18,lsl #32 // pack +.endif +.if mixin == 1 + ldp x16,x18,[x1],#16 +.endif +.inst 0x05a03bd4 //dup z20.s,w30 +.inst 0x05a038d9 //dup z25.s,w6 // bak[15] not available for SVE +.if mixin == 1 + add w19,w19,w29 +.endif +.inst 0x04b00063 //add z3.s,z3.s,z16.s +.if mixin == 1 + add x20,x20,x29,lsr #32 +.endif +.inst 0x04b300e7 //add z7.s,z7.s,z19.s +.if mixin == 1 + add x19,x19,x20,lsl #32 // pack +.endif +.if mixin == 1 + add w21,w21,w30 +.endif +.inst 0x04b4016b //add z11.s,z11.s,z20.s +.if mixin == 1 + add x22,x22,x30,lsr #32 +.endif +.inst 0x04b901ef //add z15.s,z15.s,z25.s +.if mixin == 1 + add x21,x21,x22,lsl #32 // pack +.endif +.if mixin == 1 + ldp x20,x22,[x1],#16 +.endif +#ifdef __AARCH64EB__ + rev x7,x7 +.inst 0x05a48000 //revb z0.s,p0/m,z0.s +.inst 0x05a48084 //revb z4.s,p0/m,z4.s + rev x9,x9 +.inst 0x05a48108 //revb z8.s,p0/m,z8.s +.inst 0x05a4818c //revb z12.s,p0/m,z12.s + rev x11,x11 +.inst 0x05a48021 //revb z1.s,p0/m,z1.s +.inst 0x05a480a5 //revb z5.s,p0/m,z5.s + rev x13,x13 +.inst 0x05a48129 //revb z9.s,p0/m,z9.s +.inst 0x05a481ad //revb z13.s,p0/m,z13.s + rev x15,x15 +.inst 0x05a48042 //revb z2.s,p0/m,z2.s +.inst 0x05a480c6 //revb z6.s,p0/m,z6.s + rev x17,x17 +.inst 0x05a4814a //revb z10.s,p0/m,z10.s +.inst 0x05a481ce //revb z14.s,p0/m,z14.s + rev x19,x19 +.inst 0x05a48063 //revb z3.s,p0/m,z3.s +.inst 0x05a480e7 //revb z7.s,p0/m,z7.s + rev x21,x21 +.inst 0x05a4816b //revb z11.s,p0/m,z11.s +.inst 0x05a481ef //revb z15.s,p0/m,z15.s +#endif +.if mixin == 1 + add x29,x29,#1 +.endif + cmp x5,4 + b.ne 200f +.if mixin == 1 + eor x7,x7,x8 +.endif +.if mixin == 1 + eor x9,x9,x10 +.endif +.if mixin == 1 + eor x11,x11,x12 +.endif +.inst 0x05a46011 //zip1 z17.s,z0.s,z4.s +.inst 0x05a46412 //zip2 z18.s,z0.s,z4.s +.inst 0x05ac6113 //zip1 z19.s,z8.s,z12.s +.inst 0x05ac6514 //zip2 z20.s,z8.s,z12.s + +.inst 0x05a56035 //zip1 z21.s,z1.s,z5.s +.inst 0x05a56436 //zip2 z22.s,z1.s,z5.s +.inst 0x05ad6137 //zip1 z23.s,z9.s,z13.s +.inst 0x05ad6538 //zip2 z24.s,z9.s,z13.s + +.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d +.inst 0x05f36624 //zip2 z4.d,z17.d,z19.d +.inst 0x05f46248 //zip1 z8.d,z18.d,z20.d +.inst 0x05f4664c //zip2 z12.d,z18.d,z20.d + +.inst 0x05f762a1 //zip1 z1.d,z21.d,z23.d +.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d +.inst 0x05f862c9 //zip1 z9.d,z22.d,z24.d +.inst 0x05f866cd //zip2 z13.d,z22.d,z24.d +.if mixin == 1 + eor x13,x13,x14 +.endif +.if mixin == 1 + eor x15,x15,x16 +.endif +.if mixin == 1 + eor x17,x17,x18 +.endif +.inst 0x05a66051 //zip1 z17.s,z2.s,z6.s +.inst 0x05a66452 //zip2 z18.s,z2.s,z6.s +.inst 0x05ae6153 //zip1 z19.s,z10.s,z14.s +.inst 0x05ae6554 //zip2 z20.s,z10.s,z14.s + +.inst 0x05a76075 //zip1 z21.s,z3.s,z7.s +.inst 0x05a76476 //zip2 z22.s,z3.s,z7.s +.inst 0x05af6177 //zip1 z23.s,z11.s,z15.s +.inst 0x05af6578 //zip2 z24.s,z11.s,z15.s + +.inst 0x05f36222 //zip1 z2.d,z17.d,z19.d +.inst 0x05f36626 //zip2 z6.d,z17.d,z19.d +.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d +.inst 0x05f4664e //zip2 z14.d,z18.d,z20.d + +.inst 0x05f762a3 //zip1 z3.d,z21.d,z23.d +.inst 0x05f766a7 //zip2 z7.d,z21.d,z23.d +.inst 0x05f862cb //zip1 z11.d,z22.d,z24.d +.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d +.if mixin == 1 + eor x19,x19,x20 +.endif +.if mixin == 1 + eor x21,x21,x22 +.endif + ld1 {v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64 + ld1 {v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64 +.inst 0x04b13000 //eor z0.d,z0.d,z17.d +.inst 0x04b23021 //eor z1.d,z1.d,z18.d +.inst 0x04b33042 //eor z2.d,z2.d,z19.d +.inst 0x04b43063 //eor z3.d,z3.d,z20.d +.inst 0x04b53084 //eor z4.d,z4.d,z21.d +.inst 0x04b630a5 //eor z5.d,z5.d,z22.d +.inst 0x04b730c6 //eor z6.d,z6.d,z23.d +.inst 0x04b830e7 //eor z7.d,z7.d,z24.d + ld1 {v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64 + ld1 {v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64 +.if mixin == 1 + stp x7,x9,[x0],#16 +.endif +.inst 0x04b13108 //eor z8.d,z8.d,z17.d +.inst 0x04b23129 //eor z9.d,z9.d,z18.d +.if mixin == 1 + stp x11,x13,[x0],#16 +.endif +.inst 0x04b3314a //eor z10.d,z10.d,z19.d +.inst 0x04b4316b //eor z11.d,z11.d,z20.d +.if mixin == 1 + stp x15,x17,[x0],#16 +.endif +.inst 0x04b5318c //eor z12.d,z12.d,z21.d +.inst 0x04b631ad //eor z13.d,z13.d,z22.d +.if mixin == 1 + stp x19,x21,[x0],#16 +.endif +.inst 0x04b731ce //eor z14.d,z14.d,z23.d +.inst 0x04b831ef //eor z15.d,z15.d,z24.d + st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x0],#64 + st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 + st1 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64 + st1 {v12.4s,v13.4s,v14.4s,v15.4s},[x0],#64 + b 210f +200: +.inst 0x05a16011 //zip1 z17.s,z0.s,z1.s +.inst 0x05a16412 //zip2 z18.s,z0.s,z1.s +.inst 0x05a36053 //zip1 z19.s,z2.s,z3.s +.inst 0x05a36454 //zip2 z20.s,z2.s,z3.s + +.inst 0x05a56095 //zip1 z21.s,z4.s,z5.s +.inst 0x05a56496 //zip2 z22.s,z4.s,z5.s +.inst 0x05a760d7 //zip1 z23.s,z6.s,z7.s +.inst 0x05a764d8 //zip2 z24.s,z6.s,z7.s + +.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d +.inst 0x05f36621 //zip2 z1.d,z17.d,z19.d +.inst 0x05f46242 //zip1 z2.d,z18.d,z20.d +.inst 0x05f46643 //zip2 z3.d,z18.d,z20.d + +.inst 0x05f762a4 //zip1 z4.d,z21.d,z23.d +.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d +.inst 0x05f862c6 //zip1 z6.d,z22.d,z24.d +.inst 0x05f866c7 //zip2 z7.d,z22.d,z24.d +.if mixin == 1 + eor x7,x7,x8 +.endif +.if mixin == 1 + eor x9,x9,x10 +.endif +.inst 0x05a96111 //zip1 z17.s,z8.s,z9.s +.inst 0x05a96512 //zip2 z18.s,z8.s,z9.s +.inst 0x05ab6153 //zip1 z19.s,z10.s,z11.s +.inst 0x05ab6554 //zip2 z20.s,z10.s,z11.s + +.inst 0x05ad6195 //zip1 z21.s,z12.s,z13.s +.inst 0x05ad6596 //zip2 z22.s,z12.s,z13.s +.inst 0x05af61d7 //zip1 z23.s,z14.s,z15.s +.inst 0x05af65d8 //zip2 z24.s,z14.s,z15.s + +.inst 0x05f36228 //zip1 z8.d,z17.d,z19.d +.inst 0x05f36629 //zip2 z9.d,z17.d,z19.d +.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d +.inst 0x05f4664b //zip2 z11.d,z18.d,z20.d + +.inst 0x05f762ac //zip1 z12.d,z21.d,z23.d +.inst 0x05f766ad //zip2 z13.d,z21.d,z23.d +.inst 0x05f862ce //zip1 z14.d,z22.d,z24.d +.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d +.if mixin == 1 + eor x11,x11,x12 +.endif +.if mixin == 1 + eor x13,x13,x14 +.endif +.inst 0x05a46011 //zip1 z17.s,z0.s,z4.s +.inst 0x05a46412 //zip2 z18.s,z0.s,z4.s +.inst 0x05ac6113 //zip1 z19.s,z8.s,z12.s +.inst 0x05ac6514 //zip2 z20.s,z8.s,z12.s + +.inst 0x05a56035 //zip1 z21.s,z1.s,z5.s +.inst 0x05a56436 //zip2 z22.s,z1.s,z5.s +.inst 0x05ad6137 //zip1 z23.s,z9.s,z13.s +.inst 0x05ad6538 //zip2 z24.s,z9.s,z13.s + +.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d +.inst 0x05f36624 //zip2 z4.d,z17.d,z19.d +.inst 0x05f46248 //zip1 z8.d,z18.d,z20.d +.inst 0x05f4664c //zip2 z12.d,z18.d,z20.d + +.inst 0x05f762a1 //zip1 z1.d,z21.d,z23.d +.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d +.inst 0x05f862c9 //zip1 z9.d,z22.d,z24.d +.inst 0x05f866cd //zip2 z13.d,z22.d,z24.d +.if mixin == 1 + eor x15,x15,x16 +.endif +.if mixin == 1 + eor x17,x17,x18 +.endif +.inst 0x05a66051 //zip1 z17.s,z2.s,z6.s +.inst 0x05a66452 //zip2 z18.s,z2.s,z6.s +.inst 0x05ae6153 //zip1 z19.s,z10.s,z14.s +.inst 0x05ae6554 //zip2 z20.s,z10.s,z14.s + +.inst 0x05a76075 //zip1 z21.s,z3.s,z7.s +.inst 0x05a76476 //zip2 z22.s,z3.s,z7.s +.inst 0x05af6177 //zip1 z23.s,z11.s,z15.s +.inst 0x05af6578 //zip2 z24.s,z11.s,z15.s + +.inst 0x05f36222 //zip1 z2.d,z17.d,z19.d +.inst 0x05f36626 //zip2 z6.d,z17.d,z19.d +.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d +.inst 0x05f4664e //zip2 z14.d,z18.d,z20.d + +.inst 0x05f762a3 //zip1 z3.d,z21.d,z23.d +.inst 0x05f766a7 //zip2 z7.d,z21.d,z23.d +.inst 0x05f862cb //zip1 z11.d,z22.d,z24.d +.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d +.if mixin == 1 + eor x19,x19,x20 +.endif +.if mixin == 1 + eor x21,x21,x22 +.endif +.inst 0xa540a031 //ld1w {z17.s},p0/z,[x1,#0,MUL VL] +.inst 0xa541a032 //ld1w {z18.s},p0/z,[x1,#1,MUL VL] +.inst 0xa542a033 //ld1w {z19.s},p0/z,[x1,#2,MUL VL] +.inst 0xa543a034 //ld1w {z20.s},p0/z,[x1,#3,MUL VL] +.inst 0xa544a035 //ld1w {z21.s},p0/z,[x1,#4,MUL VL] +.inst 0xa545a036 //ld1w {z22.s},p0/z,[x1,#5,MUL VL] +.inst 0xa546a037 //ld1w {z23.s},p0/z,[x1,#6,MUL VL] +.inst 0xa547a038 //ld1w {z24.s},p0/z,[x1,#7,MUL VL] +.inst 0x04215101 //addvl x1,x1,8 +.inst 0x04b13000 //eor z0.d,z0.d,z17.d +.inst 0x04b23084 //eor z4.d,z4.d,z18.d +.inst 0x04b33108 //eor z8.d,z8.d,z19.d +.inst 0x04b4318c //eor z12.d,z12.d,z20.d +.inst 0x04b53021 //eor z1.d,z1.d,z21.d +.inst 0x04b630a5 //eor z5.d,z5.d,z22.d +.inst 0x04b73129 //eor z9.d,z9.d,z23.d +.inst 0x04b831ad //eor z13.d,z13.d,z24.d +.inst 0xa540a031 //ld1w {z17.s},p0/z,[x1,#0,MUL VL] +.inst 0xa541a032 //ld1w {z18.s},p0/z,[x1,#1,MUL VL] +.inst 0xa542a033 //ld1w {z19.s},p0/z,[x1,#2,MUL VL] +.inst 0xa543a034 //ld1w {z20.s},p0/z,[x1,#3,MUL VL] +.inst 0xa544a035 //ld1w {z21.s},p0/z,[x1,#4,MUL VL] +.inst 0xa545a036 //ld1w {z22.s},p0/z,[x1,#5,MUL VL] +.inst 0xa546a037 //ld1w {z23.s},p0/z,[x1,#6,MUL VL] +.inst 0xa547a038 //ld1w {z24.s},p0/z,[x1,#7,MUL VL] +.inst 0x04215101 //addvl x1,x1,8 +.if mixin == 1 + stp x7,x9,[x0],#16 +.endif +.inst 0x04b13042 //eor z2.d,z2.d,z17.d +.inst 0x04b230c6 //eor z6.d,z6.d,z18.d +.if mixin == 1 + stp x11,x13,[x0],#16 +.endif +.inst 0x04b3314a //eor z10.d,z10.d,z19.d +.inst 0x04b431ce //eor z14.d,z14.d,z20.d +.if mixin == 1 + stp x15,x17,[x0],#16 +.endif +.inst 0x04b53063 //eor z3.d,z3.d,z21.d +.inst 0x04b630e7 //eor z7.d,z7.d,z22.d +.if mixin == 1 + stp x19,x21,[x0],#16 +.endif +.inst 0x04b7316b //eor z11.d,z11.d,z23.d +.inst 0x04b831ef //eor z15.d,z15.d,z24.d +.inst 0xe540e000 //st1w {z0.s},p0,[x0,#0,MUL VL] +.inst 0xe541e004 //st1w {z4.s},p0,[x0,#1,MUL VL] +.inst 0xe542e008 //st1w {z8.s},p0,[x0,#2,MUL VL] +.inst 0xe543e00c //st1w {z12.s},p0,[x0,#3,MUL VL] +.inst 0xe544e001 //st1w {z1.s},p0,[x0,#4,MUL VL] +.inst 0xe545e005 //st1w {z5.s},p0,[x0,#5,MUL VL] +.inst 0xe546e009 //st1w {z9.s},p0,[x0,#6,MUL VL] +.inst 0xe547e00d //st1w {z13.s},p0,[x0,#7,MUL VL] +.inst 0x04205100 //addvl x0,x0,8 +.inst 0xe540e002 //st1w {z2.s},p0,[x0,#0,MUL VL] +.inst 0xe541e006 //st1w {z6.s},p0,[x0,#1,MUL VL] +.inst 0xe542e00a //st1w {z10.s},p0,[x0,#2,MUL VL] +.inst 0xe543e00e //st1w {z14.s},p0,[x0,#3,MUL VL] +.inst 0xe544e003 //st1w {z3.s},p0,[x0,#4,MUL VL] +.inst 0xe545e007 //st1w {z7.s},p0,[x0,#5,MUL VL] +.inst 0xe546e00b //st1w {z11.s},p0,[x0,#6,MUL VL] +.inst 0xe547e00f //st1w {z15.s},p0,[x0,#7,MUL VL] +.inst 0x04205100 //addvl x0,x0,8 +210: +.inst 0x04b0e3fd //incw x29, ALL, MUL #1 + subs x2,x2,64 + b.gt 100b + b 110f +101: + mixin=0 + lsr x8,x23,#32 +.inst 0x05a03ae0 //dup z0.s,w23 +.inst 0x05a03af9 //dup z25.s,w23 +.if mixin == 1 + mov w7,w23 +.endif +.inst 0x05a03904 //dup z4.s,w8 +.inst 0x05a0391a //dup z26.s,w8 + lsr x10,x24,#32 +.inst 0x05a03b08 //dup z8.s,w24 +.inst 0x05a03b1b //dup z27.s,w24 +.if mixin == 1 + mov w9,w24 +.endif +.inst 0x05a0394c //dup z12.s,w10 +.inst 0x05a0395c //dup z28.s,w10 + lsr x12,x25,#32 +.inst 0x05a03b21 //dup z1.s,w25 +.inst 0x05a03b3d //dup z29.s,w25 +.if mixin == 1 + mov w11,w25 +.endif +.inst 0x05a03985 //dup z5.s,w12 +.inst 0x05a0399e //dup z30.s,w12 + lsr x14,x26,#32 +.inst 0x05a03b49 //dup z9.s,w26 +.inst 0x05a03b55 //dup z21.s,w26 +.if mixin == 1 + mov w13,w26 +.endif +.inst 0x05a039cd //dup z13.s,w14 +.inst 0x05a039d6 //dup z22.s,w14 + lsr x16,x27,#32 +.inst 0x05a03b62 //dup z2.s,w27 +.inst 0x05a03b77 //dup z23.s,w27 +.if mixin == 1 + mov w15,w27 +.endif +.inst 0x05a03a06 //dup z6.s,w16 +.inst 0x05a03a18 //dup z24.s,w16 + lsr x18,x28,#32 +.inst 0x05a03b8a //dup z10.s,w28 +.if mixin == 1 + mov w17,w28 +.endif +.inst 0x05a03a4e //dup z14.s,w18 + lsr x22,x30,#32 +.inst 0x05a03bcb //dup z11.s,w30 +.if mixin == 1 + mov w21,w30 +.endif +.inst 0x05a03acf //dup z15.s,w22 +.if mixin == 1 + add w20,w29,#1 + mov w19,w29 +.inst 0x04a14690 //index z16.s,w20,1 +.inst 0x04a14683 //index z3.s,w20,1 +.else +.inst 0x04a147b0 //index z16.s,w29,1 +.inst 0x04a147a3 //index z3.s,w29,1 +.endif + lsr x20,x29,#32 +.inst 0x05a03a87 //dup z7.s,w20 + mov x6,#10 +10: +.align 5 +.inst 0x04a10000 //add z0.s,z0.s,z1.s +.if mixin == 1 + add w7,w7,w11 +.endif +.inst 0x04a50084 //add z4.s,z4.s,z5.s +.if mixin == 1 + add w8,w8,w12 +.endif +.inst 0x04a90108 //add z8.s,z8.s,z9.s +.if mixin == 1 + add w9,w9,w13 +.endif +.inst 0x04ad018c //add z12.s,z12.s,z13.s +.if mixin == 1 + add w10,w10,w14 +.endif +.inst 0x04a03063 //eor z3.d,z3.d,z0.d +.if mixin == 1 + eor w19,w19,w7 +.endif +.inst 0x04a430e7 //eor z7.d,z7.d,z4.d +.if mixin == 1 + eor w20,w20,w8 +.endif +.inst 0x04a8316b //eor z11.d,z11.d,z8.d +.if mixin == 1 + eor w21,w21,w9 +.endif +.inst 0x04ac31ef //eor z15.d,z15.d,z12.d +.if mixin == 1 + eor w22,w22,w10 +.endif +.inst 0x05a58063 //revh z3.s,p0/m,z3.s +.if mixin == 1 + ror w19,w19,#16 +.endif +.inst 0x05a580e7 //revh z7.s,p0/m,z7.s +.if mixin == 1 + ror w20,w20,#16 +.endif +.inst 0x05a5816b //revh z11.s,p0/m,z11.s +.if mixin == 1 + ror w21,w21,#16 +.endif +.inst 0x05a581ef //revh z15.s,p0/m,z15.s +.if mixin == 1 + ror w22,w22,#16 +.endif +.inst 0x04a30042 //add z2.s,z2.s,z3.s +.if mixin == 1 + add w15,w15,w19 +.endif +.inst 0x04a700c6 //add z6.s,z6.s,z7.s +.if mixin == 1 + add w16,w16,w20 +.endif +.inst 0x04ab014a //add z10.s,z10.s,z11.s +.if mixin == 1 + add w17,w17,w21 +.endif +.inst 0x04af01ce //add z14.s,z14.s,z15.s +.if mixin == 1 + add w18,w18,w22 +.endif +.inst 0x04a23021 //eor z1.d,z1.d,z2.d +.if mixin == 1 + eor w11,w11,w15 +.endif +.inst 0x04a630a5 //eor z5.d,z5.d,z6.d +.if mixin == 1 + eor w12,w12,w16 +.endif +.inst 0x04aa3129 //eor z9.d,z9.d,z10.d +.if mixin == 1 + eor w13,w13,w17 +.endif +.inst 0x04ae31ad //eor z13.d,z13.d,z14.d +.if mixin == 1 + eor w14,w14,w18 +.endif +.inst 0x046c9c31 //lsl z17.s,z1.s,12 +.inst 0x046c9cb2 //lsl z18.s,z5.s,12 +.inst 0x046c9d33 //lsl z19.s,z9.s,12 +.inst 0x046c9db4 //lsl z20.s,z13.s,12 +.inst 0x046c9421 //lsr z1.s,z1.s,20 +.if mixin == 1 + ror w11,w11,20 +.endif +.inst 0x046c94a5 //lsr z5.s,z5.s,20 +.if mixin == 1 + ror w12,w12,20 +.endif +.inst 0x046c9529 //lsr z9.s,z9.s,20 +.if mixin == 1 + ror w13,w13,20 +.endif +.inst 0x046c95ad //lsr z13.s,z13.s,20 +.if mixin == 1 + ror w14,w14,20 +.endif +.inst 0x04713021 //orr z1.d,z1.d,z17.d +.inst 0x047230a5 //orr z5.d,z5.d,z18.d +.inst 0x04733129 //orr z9.d,z9.d,z19.d +.inst 0x047431ad //orr z13.d,z13.d,z20.d +.inst 0x04a10000 //add z0.s,z0.s,z1.s +.if mixin == 1 + add w7,w7,w11 +.endif +.inst 0x04a50084 //add z4.s,z4.s,z5.s +.if mixin == 1 + add w8,w8,w12 +.endif +.inst 0x04a90108 //add z8.s,z8.s,z9.s +.if mixin == 1 + add w9,w9,w13 +.endif +.inst 0x04ad018c //add z12.s,z12.s,z13.s +.if mixin == 1 + add w10,w10,w14 +.endif +.inst 0x04a03063 //eor z3.d,z3.d,z0.d +.if mixin == 1 + eor w19,w19,w7 +.endif +.inst 0x04a430e7 //eor z7.d,z7.d,z4.d +.if mixin == 1 + eor w20,w20,w8 +.endif +.inst 0x04a8316b //eor z11.d,z11.d,z8.d +.if mixin == 1 + eor w21,w21,w9 +.endif +.inst 0x04ac31ef //eor z15.d,z15.d,z12.d +.if mixin == 1 + eor w22,w22,w10 +.endif +.inst 0x053f3063 //tbl z3.b,{z3.b},z31.b +.if mixin == 1 + ror w19,w19,#24 +.endif +.inst 0x053f30e7 //tbl z7.b,{z7.b},z31.b +.if mixin == 1 + ror w20,w20,#24 +.endif +.inst 0x053f316b //tbl z11.b,{z11.b},z31.b +.if mixin == 1 + ror w21,w21,#24 +.endif +.inst 0x053f31ef //tbl z15.b,{z15.b},z31.b +.if mixin == 1 + ror w22,w22,#24 +.endif +.inst 0x04a30042 //add z2.s,z2.s,z3.s +.if mixin == 1 + add w15,w15,w19 +.endif +.inst 0x04a700c6 //add z6.s,z6.s,z7.s +.if mixin == 1 + add w16,w16,w20 +.endif +.inst 0x04ab014a //add z10.s,z10.s,z11.s +.if mixin == 1 + add w17,w17,w21 +.endif +.inst 0x04af01ce //add z14.s,z14.s,z15.s +.if mixin == 1 + add w18,w18,w22 +.endif +.inst 0x04a23021 //eor z1.d,z1.d,z2.d +.if mixin == 1 + eor w11,w11,w15 +.endif +.inst 0x04a630a5 //eor z5.d,z5.d,z6.d +.if mixin == 1 + eor w12,w12,w16 +.endif +.inst 0x04aa3129 //eor z9.d,z9.d,z10.d +.if mixin == 1 + eor w13,w13,w17 +.endif +.inst 0x04ae31ad //eor z13.d,z13.d,z14.d +.if mixin == 1 + eor w14,w14,w18 +.endif +.inst 0x04679c31 //lsl z17.s,z1.s,7 +.inst 0x04679cb2 //lsl z18.s,z5.s,7 +.inst 0x04679d33 //lsl z19.s,z9.s,7 +.inst 0x04679db4 //lsl z20.s,z13.s,7 +.inst 0x04679421 //lsr z1.s,z1.s,25 +.if mixin == 1 + ror w11,w11,25 +.endif +.inst 0x046794a5 //lsr z5.s,z5.s,25 +.if mixin == 1 + ror w12,w12,25 +.endif +.inst 0x04679529 //lsr z9.s,z9.s,25 +.if mixin == 1 + ror w13,w13,25 +.endif +.inst 0x046795ad //lsr z13.s,z13.s,25 +.if mixin == 1 + ror w14,w14,25 +.endif +.inst 0x04713021 //orr z1.d,z1.d,z17.d +.inst 0x047230a5 //orr z5.d,z5.d,z18.d +.inst 0x04733129 //orr z9.d,z9.d,z19.d +.inst 0x047431ad //orr z13.d,z13.d,z20.d +.inst 0x04a50000 //add z0.s,z0.s,z5.s +.if mixin == 1 + add w7,w7,w12 +.endif +.inst 0x04a90084 //add z4.s,z4.s,z9.s +.if mixin == 1 + add w8,w8,w13 +.endif +.inst 0x04ad0108 //add z8.s,z8.s,z13.s +.if mixin == 1 + add w9,w9,w14 +.endif +.inst 0x04a1018c //add z12.s,z12.s,z1.s +.if mixin == 1 + add w10,w10,w11 +.endif +.inst 0x04a031ef //eor z15.d,z15.d,z0.d +.if mixin == 1 + eor w22,w22,w7 +.endif +.inst 0x04a43063 //eor z3.d,z3.d,z4.d +.if mixin == 1 + eor w19,w19,w8 +.endif +.inst 0x04a830e7 //eor z7.d,z7.d,z8.d +.if mixin == 1 + eor w20,w20,w9 +.endif +.inst 0x04ac316b //eor z11.d,z11.d,z12.d +.if mixin == 1 + eor w21,w21,w10 +.endif +.inst 0x05a581ef //revh z15.s,p0/m,z15.s +.if mixin == 1 + ror w22,w22,#16 +.endif +.inst 0x05a58063 //revh z3.s,p0/m,z3.s +.if mixin == 1 + ror w19,w19,#16 +.endif +.inst 0x05a580e7 //revh z7.s,p0/m,z7.s +.if mixin == 1 + ror w20,w20,#16 +.endif +.inst 0x05a5816b //revh z11.s,p0/m,z11.s +.if mixin == 1 + ror w21,w21,#16 +.endif +.inst 0x04af014a //add z10.s,z10.s,z15.s +.if mixin == 1 + add w17,w17,w22 +.endif +.inst 0x04a301ce //add z14.s,z14.s,z3.s +.if mixin == 1 + add w18,w18,w19 +.endif +.inst 0x04a70042 //add z2.s,z2.s,z7.s +.if mixin == 1 + add w15,w15,w20 +.endif +.inst 0x04ab00c6 //add z6.s,z6.s,z11.s +.if mixin == 1 + add w16,w16,w21 +.endif +.inst 0x04aa30a5 //eor z5.d,z5.d,z10.d +.if mixin == 1 + eor w12,w12,w17 +.endif +.inst 0x04ae3129 //eor z9.d,z9.d,z14.d +.if mixin == 1 + eor w13,w13,w18 +.endif +.inst 0x04a231ad //eor z13.d,z13.d,z2.d +.if mixin == 1 + eor w14,w14,w15 +.endif +.inst 0x04a63021 //eor z1.d,z1.d,z6.d +.if mixin == 1 + eor w11,w11,w16 +.endif +.inst 0x046c9cb1 //lsl z17.s,z5.s,12 +.inst 0x046c9d32 //lsl z18.s,z9.s,12 +.inst 0x046c9db3 //lsl z19.s,z13.s,12 +.inst 0x046c9c34 //lsl z20.s,z1.s,12 +.inst 0x046c94a5 //lsr z5.s,z5.s,20 +.if mixin == 1 + ror w12,w12,20 +.endif +.inst 0x046c9529 //lsr z9.s,z9.s,20 +.if mixin == 1 + ror w13,w13,20 +.endif +.inst 0x046c95ad //lsr z13.s,z13.s,20 +.if mixin == 1 + ror w14,w14,20 +.endif +.inst 0x046c9421 //lsr z1.s,z1.s,20 +.if mixin == 1 + ror w11,w11,20 +.endif +.inst 0x047130a5 //orr z5.d,z5.d,z17.d +.inst 0x04723129 //orr z9.d,z9.d,z18.d +.inst 0x047331ad //orr z13.d,z13.d,z19.d +.inst 0x04743021 //orr z1.d,z1.d,z20.d +.inst 0x04a50000 //add z0.s,z0.s,z5.s +.if mixin == 1 + add w7,w7,w12 +.endif +.inst 0x04a90084 //add z4.s,z4.s,z9.s +.if mixin == 1 + add w8,w8,w13 +.endif +.inst 0x04ad0108 //add z8.s,z8.s,z13.s +.if mixin == 1 + add w9,w9,w14 +.endif +.inst 0x04a1018c //add z12.s,z12.s,z1.s +.if mixin == 1 + add w10,w10,w11 +.endif +.inst 0x04a031ef //eor z15.d,z15.d,z0.d +.if mixin == 1 + eor w22,w22,w7 +.endif +.inst 0x04a43063 //eor z3.d,z3.d,z4.d +.if mixin == 1 + eor w19,w19,w8 +.endif +.inst 0x04a830e7 //eor z7.d,z7.d,z8.d +.if mixin == 1 + eor w20,w20,w9 +.endif +.inst 0x04ac316b //eor z11.d,z11.d,z12.d +.if mixin == 1 + eor w21,w21,w10 +.endif +.inst 0x053f31ef //tbl z15.b,{z15.b},z31.b +.if mixin == 1 + ror w22,w22,#24 +.endif +.inst 0x053f3063 //tbl z3.b,{z3.b},z31.b +.if mixin == 1 + ror w19,w19,#24 +.endif +.inst 0x053f30e7 //tbl z7.b,{z7.b},z31.b +.if mixin == 1 + ror w20,w20,#24 +.endif +.inst 0x053f316b //tbl z11.b,{z11.b},z31.b +.if mixin == 1 + ror w21,w21,#24 +.endif +.inst 0x04af014a //add z10.s,z10.s,z15.s +.if mixin == 1 + add w17,w17,w22 +.endif +.inst 0x04a301ce //add z14.s,z14.s,z3.s +.if mixin == 1 + add w18,w18,w19 +.endif +.inst 0x04a70042 //add z2.s,z2.s,z7.s +.if mixin == 1 + add w15,w15,w20 +.endif +.inst 0x04ab00c6 //add z6.s,z6.s,z11.s +.if mixin == 1 + add w16,w16,w21 +.endif +.inst 0x04aa30a5 //eor z5.d,z5.d,z10.d +.if mixin == 1 + eor w12,w12,w17 +.endif +.inst 0x04ae3129 //eor z9.d,z9.d,z14.d +.if mixin == 1 + eor w13,w13,w18 +.endif +.inst 0x04a231ad //eor z13.d,z13.d,z2.d +.if mixin == 1 + eor w14,w14,w15 +.endif +.inst 0x04a63021 //eor z1.d,z1.d,z6.d +.if mixin == 1 + eor w11,w11,w16 +.endif +.inst 0x04679cb1 //lsl z17.s,z5.s,7 +.inst 0x04679d32 //lsl z18.s,z9.s,7 +.inst 0x04679db3 //lsl z19.s,z13.s,7 +.inst 0x04679c34 //lsl z20.s,z1.s,7 +.inst 0x046794a5 //lsr z5.s,z5.s,25 +.if mixin == 1 + ror w12,w12,25 +.endif +.inst 0x04679529 //lsr z9.s,z9.s,25 +.if mixin == 1 + ror w13,w13,25 +.endif +.inst 0x046795ad //lsr z13.s,z13.s,25 +.if mixin == 1 + ror w14,w14,25 +.endif +.inst 0x04679421 //lsr z1.s,z1.s,25 +.if mixin == 1 + ror w11,w11,25 +.endif +.inst 0x047130a5 //orr z5.d,z5.d,z17.d +.inst 0x04723129 //orr z9.d,z9.d,z18.d +.inst 0x047331ad //orr z13.d,z13.d,z19.d +.inst 0x04743021 //orr z1.d,z1.d,z20.d + sub x6,x6,1 + cbnz x6,10b + lsr x6,x28,#32 +.inst 0x05a03b91 //dup z17.s,w28 +.inst 0x05a038d2 //dup z18.s,w6 + lsr x6,x29,#32 +.inst 0x05a038d3 //dup z19.s,w6 + lsr x6,x30,#32 +.if mixin == 1 + add w7,w7,w23 +.endif +.inst 0x04b90000 //add z0.s,z0.s,z25.s +.if mixin == 1 + add x8,x8,x23,lsr #32 +.endif +.inst 0x04ba0084 //add z4.s,z4.s,z26.s +.if mixin == 1 + add x7,x7,x8,lsl #32 // pack +.endif +.if mixin == 1 + add w9,w9,w24 +.endif +.inst 0x04bb0108 //add z8.s,z8.s,z27.s +.if mixin == 1 + add x10,x10,x24,lsr #32 +.endif +.inst 0x04bc018c //add z12.s,z12.s,z28.s +.if mixin == 1 + add x9,x9,x10,lsl #32 // pack +.endif +.if mixin == 1 + ldp x8,x10,[x1],#16 +.endif +.if mixin == 1 + add w11,w11,w25 +.endif +.inst 0x04bd0021 //add z1.s,z1.s,z29.s +.if mixin == 1 + add x12,x12,x25,lsr #32 +.endif +.inst 0x04be00a5 //add z5.s,z5.s,z30.s +.if mixin == 1 + add x11,x11,x12,lsl #32 // pack +.endif +.if mixin == 1 + add w13,w13,w26 +.endif +.inst 0x04b50129 //add z9.s,z9.s,z21.s +.if mixin == 1 + add x14,x14,x26,lsr #32 +.endif +.inst 0x04b601ad //add z13.s,z13.s,z22.s +.if mixin == 1 + add x13,x13,x14,lsl #32 // pack +.endif +.if mixin == 1 + ldp x12,x14,[x1],#16 +.endif +.if mixin == 1 + add w15,w15,w27 +.endif +.inst 0x04b70042 //add z2.s,z2.s,z23.s +.if mixin == 1 + add x16,x16,x27,lsr #32 +.endif +.inst 0x04b800c6 //add z6.s,z6.s,z24.s +.if mixin == 1 + add x15,x15,x16,lsl #32 // pack +.endif +.if mixin == 1 + add w17,w17,w28 +.endif +.inst 0x04b1014a //add z10.s,z10.s,z17.s +.if mixin == 1 + add x18,x18,x28,lsr #32 +.endif +.inst 0x04b201ce //add z14.s,z14.s,z18.s +.if mixin == 1 + add x17,x17,x18,lsl #32 // pack +.endif +.if mixin == 1 + ldp x16,x18,[x1],#16 +.endif +.inst 0x05a03bd4 //dup z20.s,w30 +.inst 0x05a038d9 //dup z25.s,w6 // bak[15] not available for SVE +.if mixin == 1 + add w19,w19,w29 +.endif +.inst 0x04b00063 //add z3.s,z3.s,z16.s +.if mixin == 1 + add x20,x20,x29,lsr #32 +.endif +.inst 0x04b300e7 //add z7.s,z7.s,z19.s +.if mixin == 1 + add x19,x19,x20,lsl #32 // pack +.endif +.if mixin == 1 + add w21,w21,w30 +.endif +.inst 0x04b4016b //add z11.s,z11.s,z20.s +.if mixin == 1 + add x22,x22,x30,lsr #32 +.endif +.inst 0x04b901ef //add z15.s,z15.s,z25.s +.if mixin == 1 + add x21,x21,x22,lsl #32 // pack +.endif +.if mixin == 1 + ldp x20,x22,[x1],#16 +.endif +#ifdef __AARCH64EB__ + rev x7,x7 +.inst 0x05a48000 //revb z0.s,p0/m,z0.s +.inst 0x05a48084 //revb z4.s,p0/m,z4.s + rev x9,x9 +.inst 0x05a48108 //revb z8.s,p0/m,z8.s +.inst 0x05a4818c //revb z12.s,p0/m,z12.s + rev x11,x11 +.inst 0x05a48021 //revb z1.s,p0/m,z1.s +.inst 0x05a480a5 //revb z5.s,p0/m,z5.s + rev x13,x13 +.inst 0x05a48129 //revb z9.s,p0/m,z9.s +.inst 0x05a481ad //revb z13.s,p0/m,z13.s + rev x15,x15 +.inst 0x05a48042 //revb z2.s,p0/m,z2.s +.inst 0x05a480c6 //revb z6.s,p0/m,z6.s + rev x17,x17 +.inst 0x05a4814a //revb z10.s,p0/m,z10.s +.inst 0x05a481ce //revb z14.s,p0/m,z14.s + rev x19,x19 +.inst 0x05a48063 //revb z3.s,p0/m,z3.s +.inst 0x05a480e7 //revb z7.s,p0/m,z7.s + rev x21,x21 +.inst 0x05a4816b //revb z11.s,p0/m,z11.s +.inst 0x05a481ef //revb z15.s,p0/m,z15.s +#endif +.if mixin == 1 + add x29,x29,#1 +.endif + cmp x5,4 + b.ne 200f +.if mixin == 1 + eor x7,x7,x8 +.endif +.if mixin == 1 + eor x9,x9,x10 +.endif +.if mixin == 1 + eor x11,x11,x12 +.endif +.inst 0x05a46011 //zip1 z17.s,z0.s,z4.s +.inst 0x05a46412 //zip2 z18.s,z0.s,z4.s +.inst 0x05ac6113 //zip1 z19.s,z8.s,z12.s +.inst 0x05ac6514 //zip2 z20.s,z8.s,z12.s + +.inst 0x05a56035 //zip1 z21.s,z1.s,z5.s +.inst 0x05a56436 //zip2 z22.s,z1.s,z5.s +.inst 0x05ad6137 //zip1 z23.s,z9.s,z13.s +.inst 0x05ad6538 //zip2 z24.s,z9.s,z13.s + +.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d +.inst 0x05f36624 //zip2 z4.d,z17.d,z19.d +.inst 0x05f46248 //zip1 z8.d,z18.d,z20.d +.inst 0x05f4664c //zip2 z12.d,z18.d,z20.d + +.inst 0x05f762a1 //zip1 z1.d,z21.d,z23.d +.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d +.inst 0x05f862c9 //zip1 z9.d,z22.d,z24.d +.inst 0x05f866cd //zip2 z13.d,z22.d,z24.d +.if mixin == 1 + eor x13,x13,x14 +.endif +.if mixin == 1 + eor x15,x15,x16 +.endif +.if mixin == 1 + eor x17,x17,x18 +.endif +.inst 0x05a66051 //zip1 z17.s,z2.s,z6.s +.inst 0x05a66452 //zip2 z18.s,z2.s,z6.s +.inst 0x05ae6153 //zip1 z19.s,z10.s,z14.s +.inst 0x05ae6554 //zip2 z20.s,z10.s,z14.s + +.inst 0x05a76075 //zip1 z21.s,z3.s,z7.s +.inst 0x05a76476 //zip2 z22.s,z3.s,z7.s +.inst 0x05af6177 //zip1 z23.s,z11.s,z15.s +.inst 0x05af6578 //zip2 z24.s,z11.s,z15.s + +.inst 0x05f36222 //zip1 z2.d,z17.d,z19.d +.inst 0x05f36626 //zip2 z6.d,z17.d,z19.d +.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d +.inst 0x05f4664e //zip2 z14.d,z18.d,z20.d + +.inst 0x05f762a3 //zip1 z3.d,z21.d,z23.d +.inst 0x05f766a7 //zip2 z7.d,z21.d,z23.d +.inst 0x05f862cb //zip1 z11.d,z22.d,z24.d +.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d +.if mixin == 1 + eor x19,x19,x20 +.endif +.if mixin == 1 + eor x21,x21,x22 +.endif + ld1 {v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64 + ld1 {v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64 +.inst 0x04b13000 //eor z0.d,z0.d,z17.d +.inst 0x04b23021 //eor z1.d,z1.d,z18.d +.inst 0x04b33042 //eor z2.d,z2.d,z19.d +.inst 0x04b43063 //eor z3.d,z3.d,z20.d +.inst 0x04b53084 //eor z4.d,z4.d,z21.d +.inst 0x04b630a5 //eor z5.d,z5.d,z22.d +.inst 0x04b730c6 //eor z6.d,z6.d,z23.d +.inst 0x04b830e7 //eor z7.d,z7.d,z24.d + ld1 {v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64 + ld1 {v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64 +.if mixin == 1 + stp x7,x9,[x0],#16 +.endif +.inst 0x04b13108 //eor z8.d,z8.d,z17.d +.inst 0x04b23129 //eor z9.d,z9.d,z18.d +.if mixin == 1 + stp x11,x13,[x0],#16 +.endif +.inst 0x04b3314a //eor z10.d,z10.d,z19.d +.inst 0x04b4316b //eor z11.d,z11.d,z20.d +.if mixin == 1 + stp x15,x17,[x0],#16 +.endif +.inst 0x04b5318c //eor z12.d,z12.d,z21.d +.inst 0x04b631ad //eor z13.d,z13.d,z22.d +.if mixin == 1 + stp x19,x21,[x0],#16 +.endif +.inst 0x04b731ce //eor z14.d,z14.d,z23.d +.inst 0x04b831ef //eor z15.d,z15.d,z24.d + st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x0],#64 + st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 + st1 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64 + st1 {v12.4s,v13.4s,v14.4s,v15.4s},[x0],#64 + b 210f +200: +.inst 0x05a16011 //zip1 z17.s,z0.s,z1.s +.inst 0x05a16412 //zip2 z18.s,z0.s,z1.s +.inst 0x05a36053 //zip1 z19.s,z2.s,z3.s +.inst 0x05a36454 //zip2 z20.s,z2.s,z3.s + +.inst 0x05a56095 //zip1 z21.s,z4.s,z5.s +.inst 0x05a56496 //zip2 z22.s,z4.s,z5.s +.inst 0x05a760d7 //zip1 z23.s,z6.s,z7.s +.inst 0x05a764d8 //zip2 z24.s,z6.s,z7.s + +.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d +.inst 0x05f36621 //zip2 z1.d,z17.d,z19.d +.inst 0x05f46242 //zip1 z2.d,z18.d,z20.d +.inst 0x05f46643 //zip2 z3.d,z18.d,z20.d + +.inst 0x05f762a4 //zip1 z4.d,z21.d,z23.d +.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d +.inst 0x05f862c6 //zip1 z6.d,z22.d,z24.d +.inst 0x05f866c7 //zip2 z7.d,z22.d,z24.d +.if mixin == 1 + eor x7,x7,x8 +.endif +.if mixin == 1 + eor x9,x9,x10 +.endif +.inst 0x05a96111 //zip1 z17.s,z8.s,z9.s +.inst 0x05a96512 //zip2 z18.s,z8.s,z9.s +.inst 0x05ab6153 //zip1 z19.s,z10.s,z11.s +.inst 0x05ab6554 //zip2 z20.s,z10.s,z11.s + +.inst 0x05ad6195 //zip1 z21.s,z12.s,z13.s +.inst 0x05ad6596 //zip2 z22.s,z12.s,z13.s +.inst 0x05af61d7 //zip1 z23.s,z14.s,z15.s +.inst 0x05af65d8 //zip2 z24.s,z14.s,z15.s + +.inst 0x05f36228 //zip1 z8.d,z17.d,z19.d +.inst 0x05f36629 //zip2 z9.d,z17.d,z19.d +.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d +.inst 0x05f4664b //zip2 z11.d,z18.d,z20.d + +.inst 0x05f762ac //zip1 z12.d,z21.d,z23.d +.inst 0x05f766ad //zip2 z13.d,z21.d,z23.d +.inst 0x05f862ce //zip1 z14.d,z22.d,z24.d +.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d +.if mixin == 1 + eor x11,x11,x12 +.endif +.if mixin == 1 + eor x13,x13,x14 +.endif +.inst 0x05a46011 //zip1 z17.s,z0.s,z4.s +.inst 0x05a46412 //zip2 z18.s,z0.s,z4.s +.inst 0x05ac6113 //zip1 z19.s,z8.s,z12.s +.inst 0x05ac6514 //zip2 z20.s,z8.s,z12.s + +.inst 0x05a56035 //zip1 z21.s,z1.s,z5.s +.inst 0x05a56436 //zip2 z22.s,z1.s,z5.s +.inst 0x05ad6137 //zip1 z23.s,z9.s,z13.s +.inst 0x05ad6538 //zip2 z24.s,z9.s,z13.s + +.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d +.inst 0x05f36624 //zip2 z4.d,z17.d,z19.d +.inst 0x05f46248 //zip1 z8.d,z18.d,z20.d +.inst 0x05f4664c //zip2 z12.d,z18.d,z20.d + +.inst 0x05f762a1 //zip1 z1.d,z21.d,z23.d +.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d +.inst 0x05f862c9 //zip1 z9.d,z22.d,z24.d +.inst 0x05f866cd //zip2 z13.d,z22.d,z24.d +.if mixin == 1 + eor x15,x15,x16 +.endif +.if mixin == 1 + eor x17,x17,x18 +.endif +.inst 0x05a66051 //zip1 z17.s,z2.s,z6.s +.inst 0x05a66452 //zip2 z18.s,z2.s,z6.s +.inst 0x05ae6153 //zip1 z19.s,z10.s,z14.s +.inst 0x05ae6554 //zip2 z20.s,z10.s,z14.s + +.inst 0x05a76075 //zip1 z21.s,z3.s,z7.s +.inst 0x05a76476 //zip2 z22.s,z3.s,z7.s +.inst 0x05af6177 //zip1 z23.s,z11.s,z15.s +.inst 0x05af6578 //zip2 z24.s,z11.s,z15.s + +.inst 0x05f36222 //zip1 z2.d,z17.d,z19.d +.inst 0x05f36626 //zip2 z6.d,z17.d,z19.d +.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d +.inst 0x05f4664e //zip2 z14.d,z18.d,z20.d + +.inst 0x05f762a3 //zip1 z3.d,z21.d,z23.d +.inst 0x05f766a7 //zip2 z7.d,z21.d,z23.d +.inst 0x05f862cb //zip1 z11.d,z22.d,z24.d +.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d +.if mixin == 1 + eor x19,x19,x20 +.endif +.if mixin == 1 + eor x21,x21,x22 +.endif +.inst 0xa540a031 //ld1w {z17.s},p0/z,[x1,#0,MUL VL] +.inst 0xa541a032 //ld1w {z18.s},p0/z,[x1,#1,MUL VL] +.inst 0xa542a033 //ld1w {z19.s},p0/z,[x1,#2,MUL VL] +.inst 0xa543a034 //ld1w {z20.s},p0/z,[x1,#3,MUL VL] +.inst 0xa544a035 //ld1w {z21.s},p0/z,[x1,#4,MUL VL] +.inst 0xa545a036 //ld1w {z22.s},p0/z,[x1,#5,MUL VL] +.inst 0xa546a037 //ld1w {z23.s},p0/z,[x1,#6,MUL VL] +.inst 0xa547a038 //ld1w {z24.s},p0/z,[x1,#7,MUL VL] +.inst 0x04215101 //addvl x1,x1,8 +.inst 0x04b13000 //eor z0.d,z0.d,z17.d +.inst 0x04b23084 //eor z4.d,z4.d,z18.d +.inst 0x04b33108 //eor z8.d,z8.d,z19.d +.inst 0x04b4318c //eor z12.d,z12.d,z20.d +.inst 0x04b53021 //eor z1.d,z1.d,z21.d +.inst 0x04b630a5 //eor z5.d,z5.d,z22.d +.inst 0x04b73129 //eor z9.d,z9.d,z23.d +.inst 0x04b831ad //eor z13.d,z13.d,z24.d +.inst 0xa540a031 //ld1w {z17.s},p0/z,[x1,#0,MUL VL] +.inst 0xa541a032 //ld1w {z18.s},p0/z,[x1,#1,MUL VL] +.inst 0xa542a033 //ld1w {z19.s},p0/z,[x1,#2,MUL VL] +.inst 0xa543a034 //ld1w {z20.s},p0/z,[x1,#3,MUL VL] +.inst 0xa544a035 //ld1w {z21.s},p0/z,[x1,#4,MUL VL] +.inst 0xa545a036 //ld1w {z22.s},p0/z,[x1,#5,MUL VL] +.inst 0xa546a037 //ld1w {z23.s},p0/z,[x1,#6,MUL VL] +.inst 0xa547a038 //ld1w {z24.s},p0/z,[x1,#7,MUL VL] +.inst 0x04215101 //addvl x1,x1,8 +.if mixin == 1 + stp x7,x9,[x0],#16 +.endif +.inst 0x04b13042 //eor z2.d,z2.d,z17.d +.inst 0x04b230c6 //eor z6.d,z6.d,z18.d +.if mixin == 1 + stp x11,x13,[x0],#16 +.endif +.inst 0x04b3314a //eor z10.d,z10.d,z19.d +.inst 0x04b431ce //eor z14.d,z14.d,z20.d +.if mixin == 1 + stp x15,x17,[x0],#16 +.endif +.inst 0x04b53063 //eor z3.d,z3.d,z21.d +.inst 0x04b630e7 //eor z7.d,z7.d,z22.d +.if mixin == 1 + stp x19,x21,[x0],#16 +.endif +.inst 0x04b7316b //eor z11.d,z11.d,z23.d +.inst 0x04b831ef //eor z15.d,z15.d,z24.d +.inst 0xe540e000 //st1w {z0.s},p0,[x0,#0,MUL VL] +.inst 0xe541e004 //st1w {z4.s},p0,[x0,#1,MUL VL] +.inst 0xe542e008 //st1w {z8.s},p0,[x0,#2,MUL VL] +.inst 0xe543e00c //st1w {z12.s},p0,[x0,#3,MUL VL] +.inst 0xe544e001 //st1w {z1.s},p0,[x0,#4,MUL VL] +.inst 0xe545e005 //st1w {z5.s},p0,[x0,#5,MUL VL] +.inst 0xe546e009 //st1w {z9.s},p0,[x0,#6,MUL VL] +.inst 0xe547e00d //st1w {z13.s},p0,[x0,#7,MUL VL] +.inst 0x04205100 //addvl x0,x0,8 +.inst 0xe540e002 //st1w {z2.s},p0,[x0,#0,MUL VL] +.inst 0xe541e006 //st1w {z6.s},p0,[x0,#1,MUL VL] +.inst 0xe542e00a //st1w {z10.s},p0,[x0,#2,MUL VL] +.inst 0xe543e00e //st1w {z14.s},p0,[x0,#3,MUL VL] +.inst 0xe544e003 //st1w {z3.s},p0,[x0,#4,MUL VL] +.inst 0xe545e007 //st1w {z7.s},p0,[x0,#5,MUL VL] +.inst 0xe546e00b //st1w {z11.s},p0,[x0,#6,MUL VL] +.inst 0xe547e00f //st1w {z15.s},p0,[x0,#7,MUL VL] +.inst 0x04205100 //addvl x0,x0,8 +210: +.inst 0x04b0e3fd //incw x29, ALL, MUL #1 +110: +2: + str w29,[x4] + ldp d10,d11,[sp,16] + ldp d12,d13,[sp,32] + ldp d14,d15,[sp,48] + ldp x16,x17,[sp,64] + ldp x18,x19,[sp,80] + ldp x20,x21,[sp,96] + ldp x22,x23,[sp,112] + ldp x24,x25,[sp,128] + ldp x26,x27,[sp,144] + ldp x28,x29,[sp,160] + ldr x30,[sp,176] + ldp d8,d9,[sp],192 + AARCH64_VALIDATE_LINK_REGISTER +.Lreturn: + ret +.size ChaCha20_ctr32_sve,.-ChaCha20_ctr32_sve diff --git a/contrib/openssl-cmake/asm/crypto/chacha/chacha-armv8.S b/contrib/openssl-cmake/asm/crypto/chacha/chacha-armv8.S new file mode 100644 index 000000000000..ca9d709ed670 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/chacha/chacha-armv8.S @@ -0,0 +1,2075 @@ +#include "arm_arch.h" +#ifndef __KERNEL__ + +.hidden OPENSSL_armcap_P + + +#endif + +.section .rodata + +.align 5 +.Lsigma: +.quad 0x3320646e61707865,0x6b20657479622d32 // endian-neutral +.Lone: +.long 1,2,3,4 +.Lrot24: +.long 0x02010003,0x06050407,0x0a09080b,0x0e0d0c0f +.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0 +.align 2 + +.text + +.globl ChaCha20_ctr32_dflt +.type ChaCha20_ctr32_dflt,%function +.align 5 +ChaCha20_ctr32_dflt: + AARCH64_SIGN_LINK_REGISTER + cmp x2,#192 + b.lo .Lshort +#ifndef __KERNEL__ + adrp x17,OPENSSL_armcap_P + ldr w17,[x17,#:lo12:OPENSSL_armcap_P] +.Lcheck_neon: + tst w17,#ARMV7_NEON + b.ne .LChaCha20_neon +#endif + +.Lshort: + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + + adrp x5,.Lsigma + add x5,x5,#:lo12:.Lsigma + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#64 + + ldp x22,x23,[x5] // load sigma + ldp x24,x25,[x3] // load key + ldp x26,x27,[x3,#16] + ldp x28,x30,[x4] // load counter +#ifdef __AARCH64EB__ + ror x24,x24,#32 + ror x25,x25,#32 + ror x26,x26,#32 + ror x27,x27,#32 + ror x28,x28,#32 + ror x30,x30,#32 +#endif + +.Loop_outer: + mov w5,w22 // unpack key block + lsr x6,x22,#32 + mov w7,w23 + lsr x8,x23,#32 + mov w9,w24 + lsr x10,x24,#32 + mov w11,w25 + lsr x12,x25,#32 + mov w13,w26 + lsr x14,x26,#32 + mov w15,w27 + lsr x16,x27,#32 + mov w17,w28 + lsr x19,x28,#32 + mov w20,w30 + lsr x21,x30,#32 + + mov x4,#10 + subs x2,x2,#64 +.Loop: + sub x4,x4,#1 + add w5,w5,w9 + add w6,w6,w10 + add w7,w7,w11 + add w8,w8,w12 + eor w17,w17,w5 + eor w19,w19,w6 + eor w20,w20,w7 + eor w21,w21,w8 + ror w17,w17,#16 + ror w19,w19,#16 + ror w20,w20,#16 + ror w21,w21,#16 + add w13,w13,w17 + add w14,w14,w19 + add w15,w15,w20 + add w16,w16,w21 + eor w9,w9,w13 + eor w10,w10,w14 + eor w11,w11,w15 + eor w12,w12,w16 + ror w9,w9,#20 + ror w10,w10,#20 + ror w11,w11,#20 + ror w12,w12,#20 + add w5,w5,w9 + add w6,w6,w10 + add w7,w7,w11 + add w8,w8,w12 + eor w17,w17,w5 + eor w19,w19,w6 + eor w20,w20,w7 + eor w21,w21,w8 + ror w17,w17,#24 + ror w19,w19,#24 + ror w20,w20,#24 + ror w21,w21,#24 + add w13,w13,w17 + add w14,w14,w19 + add w15,w15,w20 + add w16,w16,w21 + eor w9,w9,w13 + eor w10,w10,w14 + eor w11,w11,w15 + eor w12,w12,w16 + ror w9,w9,#25 + ror w10,w10,#25 + ror w11,w11,#25 + ror w12,w12,#25 + add w5,w5,w10 + add w6,w6,w11 + add w7,w7,w12 + add w8,w8,w9 + eor w21,w21,w5 + eor w17,w17,w6 + eor w19,w19,w7 + eor w20,w20,w8 + ror w21,w21,#16 + ror w17,w17,#16 + ror w19,w19,#16 + ror w20,w20,#16 + add w15,w15,w21 + add w16,w16,w17 + add w13,w13,w19 + add w14,w14,w20 + eor w10,w10,w15 + eor w11,w11,w16 + eor w12,w12,w13 + eor w9,w9,w14 + ror w10,w10,#20 + ror w11,w11,#20 + ror w12,w12,#20 + ror w9,w9,#20 + add w5,w5,w10 + add w6,w6,w11 + add w7,w7,w12 + add w8,w8,w9 + eor w21,w21,w5 + eor w17,w17,w6 + eor w19,w19,w7 + eor w20,w20,w8 + ror w21,w21,#24 + ror w17,w17,#24 + ror w19,w19,#24 + ror w20,w20,#24 + add w15,w15,w21 + add w16,w16,w17 + add w13,w13,w19 + add w14,w14,w20 + eor w10,w10,w15 + eor w11,w11,w16 + eor w12,w12,w13 + eor w9,w9,w14 + ror w10,w10,#25 + ror w11,w11,#25 + ror w12,w12,#25 + ror w9,w9,#25 + cbnz x4,.Loop + + add w5,w5,w22 // accumulate key block + add x6,x6,x22,lsr#32 + add w7,w7,w23 + add x8,x8,x23,lsr#32 + add w9,w9,w24 + add x10,x10,x24,lsr#32 + add w11,w11,w25 + add x12,x12,x25,lsr#32 + add w13,w13,w26 + add x14,x14,x26,lsr#32 + add w15,w15,w27 + add x16,x16,x27,lsr#32 + add w17,w17,w28 + add x19,x19,x28,lsr#32 + add w20,w20,w30 + add x21,x21,x30,lsr#32 + + b.lo .Ltail + + add x5,x5,x6,lsl#32 // pack + add x7,x7,x8,lsl#32 + ldp x6,x8,[x1,#0] // load input + add x9,x9,x10,lsl#32 + add x11,x11,x12,lsl#32 + ldp x10,x12,[x1,#16] + add x13,x13,x14,lsl#32 + add x15,x15,x16,lsl#32 + ldp x14,x16,[x1,#32] + add x17,x17,x19,lsl#32 + add x20,x20,x21,lsl#32 + ldp x19,x21,[x1,#48] + add x1,x1,#64 +#ifdef __AARCH64EB__ + rev x5,x5 + rev x7,x7 + rev x9,x9 + rev x11,x11 + rev x13,x13 + rev x15,x15 + rev x17,x17 + rev x20,x20 +#endif + eor x5,x5,x6 + eor x7,x7,x8 + eor x9,x9,x10 + eor x11,x11,x12 + eor x13,x13,x14 + eor x15,x15,x16 + eor x17,x17,x19 + eor x20,x20,x21 + + stp x5,x7,[x0,#0] // store output + add x28,x28,#1 // increment counter + stp x9,x11,[x0,#16] + stp x13,x15,[x0,#32] + stp x17,x20,[x0,#48] + add x0,x0,#64 + + b.hi .Loop_outer + + ldp x19,x20,[x29,#16] + add sp,sp,#64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 +.Labort: + AARCH64_VALIDATE_LINK_REGISTER + ret + +.align 4 +.Ltail: + add x2,x2,#64 +.Less_than_64: + sub x0,x0,#1 + add x1,x1,x2 + add x0,x0,x2 + add x4,sp,x2 + neg x2,x2 + + add x5,x5,x6,lsl#32 // pack + add x7,x7,x8,lsl#32 + add x9,x9,x10,lsl#32 + add x11,x11,x12,lsl#32 + add x13,x13,x14,lsl#32 + add x15,x15,x16,lsl#32 + add x17,x17,x19,lsl#32 + add x20,x20,x21,lsl#32 +#ifdef __AARCH64EB__ + rev x5,x5 + rev x7,x7 + rev x9,x9 + rev x11,x11 + rev x13,x13 + rev x15,x15 + rev x17,x17 + rev x20,x20 +#endif + stp x5,x7,[sp,#0] + stp x9,x11,[sp,#16] + stp x13,x15,[sp,#32] + stp x17,x20,[sp,#48] + +.Loop_tail: + ldrb w10,[x1,x2] + ldrb w11,[x4,x2] + add x2,x2,#1 + eor w10,w10,w11 + strb w10,[x0,x2] + cbnz x2,.Loop_tail + + stp xzr,xzr,[sp,#0] + stp xzr,xzr,[sp,#16] + stp xzr,xzr,[sp,#32] + stp xzr,xzr,[sp,#48] + + ldp x19,x20,[x29,#16] + add sp,sp,#64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ChaCha20_ctr32_dflt,.-ChaCha20_ctr32_dflt + +.globl ChaCha20_ctr32 +.type ChaCha20_ctr32,%function +.align 5 +ChaCha20_ctr32: + AARCH64_SIGN_LINK_REGISTER + cbz x2,.Labort + cmp x2,#192 + b.lo .Lshort +#ifndef __KERNEL__ + adrp x17,OPENSSL_armcap_P + ldr w17,[x17,#:lo12:OPENSSL_armcap_P] + tst w17,#ARMV8_SVE + b.eq .Lcheck_neon + stp x29,x30,[sp,#-16]! + sub sp,sp,#16 + // SVE handling will inevitably increment the counter + // Neon/Scalar code that follows to process tail data needs to + // use new counter, unfortunately the input counter buffer + // pointed to by ctr is meant to be read-only per API contract + // we have to copy the buffer to stack to be writable by SVE + ldp x5,x6,[x4] + stp x5,x6,[sp] + mov x4,sp + bl ChaCha20_ctr32_sve + cbz x2,1f + bl ChaCha20_ctr32_dflt +1: + add sp,sp,#16 + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +#endif + b .Lshort +.size ChaCha20_ctr32,.-ChaCha20_ctr32 + +#ifdef __KERNEL__ +.globl ChaCha20_neon +#endif +.type ChaCha20_neon,%function +.align 5 +ChaCha20_neon: + AARCH64_SIGN_LINK_REGISTER +.LChaCha20_neon: + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + + adrp x5,.Lsigma + add x5,x5,#:lo12:.Lsigma + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + cmp x2,#512 + b.hs .L512_or_more_neon + + sub sp,sp,#64 + + ldp x22,x23,[x5] // load sigma + ld1 {v0.4s},[x5],#16 + ldp x24,x25,[x3] // load key + ldp x26,x27,[x3,#16] + ld1 {v1.4s,v2.4s},[x3] + ldp x28,x30,[x4] // load counter + ld1 {v3.4s},[x4] + stp d8,d9,[sp] // meet ABI requirements + ld1 {v8.4s,v9.4s},[x5] +#ifdef __AARCH64EB__ + rev64 v0.4s,v0.4s + ror x24,x24,#32 + ror x25,x25,#32 + ror x26,x26,#32 + ror x27,x27,#32 + ror x28,x28,#32 + ror x30,x30,#32 +#endif + +.Loop_outer_neon: + dup v16.4s,v0.s[0] // unpack key block + mov w5,w22 + dup v20.4s,v0.s[1] + lsr x6,x22,#32 + dup v24.4s,v0.s[2] + mov w7,w23 + dup v28.4s,v0.s[3] + lsr x8,x23,#32 + dup v17.4s,v1.s[0] + mov w9,w24 + dup v21.4s,v1.s[1] + lsr x10,x24,#32 + dup v25.4s,v1.s[2] + mov w11,w25 + dup v29.4s,v1.s[3] + lsr x12,x25,#32 + dup v19.4s,v3.s[0] + mov w13,w26 + dup v23.4s,v3.s[1] + lsr x14,x26,#32 + dup v27.4s,v3.s[2] + mov w15,w27 + dup v31.4s,v3.s[3] + lsr x16,x27,#32 + add v19.4s,v19.4s,v8.4s + mov w17,w28 + dup v18.4s,v2.s[0] + lsr x19,x28,#32 + dup v22.4s,v2.s[1] + mov w20,w30 + dup v26.4s,v2.s[2] + lsr x21,x30,#32 + dup v30.4s,v2.s[3] + + mov x4,#10 + subs x2,x2,#320 +.Loop_neon: + sub x4,x4,#1 + add v16.4s,v16.4s,v17.4s + add w5,w5,w9 + add v20.4s,v20.4s,v21.4s + add w6,w6,w10 + add v24.4s,v24.4s,v25.4s + add w7,w7,w11 + add v28.4s,v28.4s,v29.4s + add w8,w8,w12 + eor v19.16b,v19.16b,v16.16b + eor w17,w17,w5 + eor v23.16b,v23.16b,v20.16b + eor w19,w19,w6 + eor v27.16b,v27.16b,v24.16b + eor w20,w20,w7 + eor v31.16b,v31.16b,v28.16b + eor w21,w21,w8 + rev32 v19.8h,v19.8h + ror w17,w17,#16 + rev32 v23.8h,v23.8h + ror w19,w19,#16 + rev32 v27.8h,v27.8h + ror w20,w20,#16 + rev32 v31.8h,v31.8h + ror w21,w21,#16 + add v18.4s,v18.4s,v19.4s + add w13,w13,w17 + add v22.4s,v22.4s,v23.4s + add w14,w14,w19 + add v26.4s,v26.4s,v27.4s + add w15,w15,w20 + add v30.4s,v30.4s,v31.4s + add w16,w16,w21 + eor v4.16b,v17.16b,v18.16b + eor w9,w9,w13 + eor v5.16b,v21.16b,v22.16b + eor w10,w10,w14 + eor v6.16b,v25.16b,v26.16b + eor w11,w11,w15 + eor v7.16b,v29.16b,v30.16b + eor w12,w12,w16 + ushr v17.4s,v4.4s,#20 + ror w9,w9,#20 + ushr v21.4s,v5.4s,#20 + ror w10,w10,#20 + ushr v25.4s,v6.4s,#20 + ror w11,w11,#20 + ushr v29.4s,v7.4s,#20 + ror w12,w12,#20 + sli v17.4s,v4.4s,#12 + add w5,w5,w9 + sli v21.4s,v5.4s,#12 + add w6,w6,w10 + sli v25.4s,v6.4s,#12 + add w7,w7,w11 + sli v29.4s,v7.4s,#12 + add w8,w8,w12 + add v16.4s,v16.4s,v17.4s + eor w17,w17,w5 + add v20.4s,v20.4s,v21.4s + eor w19,w19,w6 + add v24.4s,v24.4s,v25.4s + eor w20,w20,w7 + add v28.4s,v28.4s,v29.4s + eor w21,w21,w8 + eor v4.16b,v19.16b,v16.16b + ror w17,w17,#24 + eor v5.16b,v23.16b,v20.16b + ror w19,w19,#24 + eor v6.16b,v27.16b,v24.16b + ror w20,w20,#24 + eor v7.16b,v31.16b,v28.16b + ror w21,w21,#24 + tbl v19.16b,{v4.16b},v9.16b + add w13,w13,w17 + tbl v23.16b,{v5.16b},v9.16b + add w14,w14,w19 + tbl v27.16b,{v6.16b},v9.16b + add w15,w15,w20 + tbl v31.16b,{v7.16b},v9.16b + add w16,w16,w21 + add v18.4s,v18.4s,v19.4s + eor w9,w9,w13 + add v22.4s,v22.4s,v23.4s + eor w10,w10,w14 + add v26.4s,v26.4s,v27.4s + eor w11,w11,w15 + add v30.4s,v30.4s,v31.4s + eor w12,w12,w16 + eor v4.16b,v17.16b,v18.16b + ror w9,w9,#25 + eor v5.16b,v21.16b,v22.16b + ror w10,w10,#25 + eor v6.16b,v25.16b,v26.16b + ror w11,w11,#25 + eor v7.16b,v29.16b,v30.16b + ror w12,w12,#25 + ushr v17.4s,v4.4s,#25 + ushr v21.4s,v5.4s,#25 + ushr v25.4s,v6.4s,#25 + ushr v29.4s,v7.4s,#25 + sli v17.4s,v4.4s,#7 + sli v21.4s,v5.4s,#7 + sli v25.4s,v6.4s,#7 + sli v29.4s,v7.4s,#7 + add v16.4s,v16.4s,v21.4s + add w5,w5,w10 + add v20.4s,v20.4s,v25.4s + add w6,w6,w11 + add v24.4s,v24.4s,v29.4s + add w7,w7,w12 + add v28.4s,v28.4s,v17.4s + add w8,w8,w9 + eor v31.16b,v31.16b,v16.16b + eor w21,w21,w5 + eor v19.16b,v19.16b,v20.16b + eor w17,w17,w6 + eor v23.16b,v23.16b,v24.16b + eor w19,w19,w7 + eor v27.16b,v27.16b,v28.16b + eor w20,w20,w8 + rev32 v31.8h,v31.8h + ror w21,w21,#16 + rev32 v19.8h,v19.8h + ror w17,w17,#16 + rev32 v23.8h,v23.8h + ror w19,w19,#16 + rev32 v27.8h,v27.8h + ror w20,w20,#16 + add v26.4s,v26.4s,v31.4s + add w15,w15,w21 + add v30.4s,v30.4s,v19.4s + add w16,w16,w17 + add v18.4s,v18.4s,v23.4s + add w13,w13,w19 + add v22.4s,v22.4s,v27.4s + add w14,w14,w20 + eor v4.16b,v21.16b,v26.16b + eor w10,w10,w15 + eor v5.16b,v25.16b,v30.16b + eor w11,w11,w16 + eor v6.16b,v29.16b,v18.16b + eor w12,w12,w13 + eor v7.16b,v17.16b,v22.16b + eor w9,w9,w14 + ushr v21.4s,v4.4s,#20 + ror w10,w10,#20 + ushr v25.4s,v5.4s,#20 + ror w11,w11,#20 + ushr v29.4s,v6.4s,#20 + ror w12,w12,#20 + ushr v17.4s,v7.4s,#20 + ror w9,w9,#20 + sli v21.4s,v4.4s,#12 + add w5,w5,w10 + sli v25.4s,v5.4s,#12 + add w6,w6,w11 + sli v29.4s,v6.4s,#12 + add w7,w7,w12 + sli v17.4s,v7.4s,#12 + add w8,w8,w9 + add v16.4s,v16.4s,v21.4s + eor w21,w21,w5 + add v20.4s,v20.4s,v25.4s + eor w17,w17,w6 + add v24.4s,v24.4s,v29.4s + eor w19,w19,w7 + add v28.4s,v28.4s,v17.4s + eor w20,w20,w8 + eor v4.16b,v31.16b,v16.16b + ror w21,w21,#24 + eor v5.16b,v19.16b,v20.16b + ror w17,w17,#24 + eor v6.16b,v23.16b,v24.16b + ror w19,w19,#24 + eor v7.16b,v27.16b,v28.16b + ror w20,w20,#24 + tbl v31.16b,{v4.16b},v9.16b + add w15,w15,w21 + tbl v19.16b,{v5.16b},v9.16b + add w16,w16,w17 + tbl v23.16b,{v6.16b},v9.16b + add w13,w13,w19 + tbl v27.16b,{v7.16b},v9.16b + add w14,w14,w20 + add v26.4s,v26.4s,v31.4s + eor w10,w10,w15 + add v30.4s,v30.4s,v19.4s + eor w11,w11,w16 + add v18.4s,v18.4s,v23.4s + eor w12,w12,w13 + add v22.4s,v22.4s,v27.4s + eor w9,w9,w14 + eor v4.16b,v21.16b,v26.16b + ror w10,w10,#25 + eor v5.16b,v25.16b,v30.16b + ror w11,w11,#25 + eor v6.16b,v29.16b,v18.16b + ror w12,w12,#25 + eor v7.16b,v17.16b,v22.16b + ror w9,w9,#25 + ushr v21.4s,v4.4s,#25 + ushr v25.4s,v5.4s,#25 + ushr v29.4s,v6.4s,#25 + ushr v17.4s,v7.4s,#25 + sli v21.4s,v4.4s,#7 + sli v25.4s,v5.4s,#7 + sli v29.4s,v6.4s,#7 + sli v17.4s,v7.4s,#7 + cbnz x4,.Loop_neon + + add v19.4s,v19.4s,v8.4s + + zip1 v4.4s,v16.4s,v20.4s // transpose data + zip1 v5.4s,v24.4s,v28.4s + zip2 v6.4s,v16.4s,v20.4s + zip2 v7.4s,v24.4s,v28.4s + zip1 v16.2d,v4.2d,v5.2d + zip2 v20.2d,v4.2d,v5.2d + zip1 v24.2d,v6.2d,v7.2d + zip2 v28.2d,v6.2d,v7.2d + + zip1 v4.4s,v17.4s,v21.4s + zip1 v5.4s,v25.4s,v29.4s + zip2 v6.4s,v17.4s,v21.4s + zip2 v7.4s,v25.4s,v29.4s + zip1 v17.2d,v4.2d,v5.2d + zip2 v21.2d,v4.2d,v5.2d + zip1 v25.2d,v6.2d,v7.2d + zip2 v29.2d,v6.2d,v7.2d + + zip1 v4.4s,v18.4s,v22.4s + add w5,w5,w22 // accumulate key block + zip1 v5.4s,v26.4s,v30.4s + add x6,x6,x22,lsr#32 + zip2 v6.4s,v18.4s,v22.4s + add w7,w7,w23 + zip2 v7.4s,v26.4s,v30.4s + add x8,x8,x23,lsr#32 + zip1 v18.2d,v4.2d,v5.2d + add w9,w9,w24 + zip2 v22.2d,v4.2d,v5.2d + add x10,x10,x24,lsr#32 + zip1 v26.2d,v6.2d,v7.2d + add w11,w11,w25 + zip2 v30.2d,v6.2d,v7.2d + add x12,x12,x25,lsr#32 + + zip1 v4.4s,v19.4s,v23.4s + add w13,w13,w26 + zip1 v5.4s,v27.4s,v31.4s + add x14,x14,x26,lsr#32 + zip2 v6.4s,v19.4s,v23.4s + add w15,w15,w27 + zip2 v7.4s,v27.4s,v31.4s + add x16,x16,x27,lsr#32 + zip1 v19.2d,v4.2d,v5.2d + add w17,w17,w28 + zip2 v23.2d,v4.2d,v5.2d + add x19,x19,x28,lsr#32 + zip1 v27.2d,v6.2d,v7.2d + add w20,w20,w30 + zip2 v31.2d,v6.2d,v7.2d + add x21,x21,x30,lsr#32 + + b.lo .Ltail_neon + + add x5,x5,x6,lsl#32 // pack + add x7,x7,x8,lsl#32 + ldp x6,x8,[x1,#0] // load input + add v16.4s,v16.4s,v0.4s // accumulate key block + add x9,x9,x10,lsl#32 + add x11,x11,x12,lsl#32 + ldp x10,x12,[x1,#16] + add v17.4s,v17.4s,v1.4s + add x13,x13,x14,lsl#32 + add x15,x15,x16,lsl#32 + ldp x14,x16,[x1,#32] + add v18.4s,v18.4s,v2.4s + add x17,x17,x19,lsl#32 + add x20,x20,x21,lsl#32 + ldp x19,x21,[x1,#48] + add v19.4s,v19.4s,v3.4s + add x1,x1,#64 +#ifdef __AARCH64EB__ + rev x5,x5 + rev x7,x7 + rev x9,x9 + rev x11,x11 + rev x13,x13 + rev x15,x15 + rev x17,x17 + rev x20,x20 +#endif + ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 + eor x5,x5,x6 + add v20.4s,v20.4s,v0.4s + eor x7,x7,x8 + add v21.4s,v21.4s,v1.4s + eor x9,x9,x10 + add v22.4s,v22.4s,v2.4s + eor x11,x11,x12 + add v23.4s,v23.4s,v3.4s + eor x13,x13,x14 + eor v16.16b,v16.16b,v4.16b + movi v4.4s,#5 + eor x15,x15,x16 + eor v17.16b,v17.16b,v5.16b + eor x17,x17,x19 + eor v18.16b,v18.16b,v6.16b + eor x20,x20,x21 + eor v19.16b,v19.16b,v7.16b + add v8.4s,v8.4s,v4.4s // += 5 + ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 + + stp x5,x7,[x0,#0] // store output + add x28,x28,#5 // increment counter + stp x9,x11,[x0,#16] + stp x13,x15,[x0,#32] + stp x17,x20,[x0,#48] + add x0,x0,#64 + + st1 {v16.16b,v17.16b,v18.16b,v19.16b},[x0],#64 + add v24.4s,v24.4s,v0.4s + add v25.4s,v25.4s,v1.4s + add v26.4s,v26.4s,v2.4s + add v27.4s,v27.4s,v3.4s + ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 + + eor v20.16b,v20.16b,v4.16b + eor v21.16b,v21.16b,v5.16b + eor v22.16b,v22.16b,v6.16b + eor v23.16b,v23.16b,v7.16b + st1 {v20.16b,v21.16b,v22.16b,v23.16b},[x0],#64 + add v28.4s,v28.4s,v0.4s + add v29.4s,v29.4s,v1.4s + add v30.4s,v30.4s,v2.4s + add v31.4s,v31.4s,v3.4s + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 + + eor v24.16b,v24.16b,v16.16b + eor v25.16b,v25.16b,v17.16b + eor v26.16b,v26.16b,v18.16b + eor v27.16b,v27.16b,v19.16b + st1 {v24.16b,v25.16b,v26.16b,v27.16b},[x0],#64 + + eor v28.16b,v28.16b,v20.16b + eor v29.16b,v29.16b,v21.16b + eor v30.16b,v30.16b,v22.16b + eor v31.16b,v31.16b,v23.16b + st1 {v28.16b,v29.16b,v30.16b,v31.16b},[x0],#64 + + b.hi .Loop_outer_neon + + ldp d8,d9,[sp] // meet ABI requirements + + ldp x19,x20,[x29,#16] + add sp,sp,#64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret + +.align 4 +.Ltail_neon: + add x2,x2,#320 + ldp d8,d9,[sp] // meet ABI requirements + cmp x2,#64 + b.lo .Less_than_64 + + add x5,x5,x6,lsl#32 // pack + add x7,x7,x8,lsl#32 + ldp x6,x8,[x1,#0] // load input + add x9,x9,x10,lsl#32 + add x11,x11,x12,lsl#32 + ldp x10,x12,[x1,#16] + add x13,x13,x14,lsl#32 + add x15,x15,x16,lsl#32 + ldp x14,x16,[x1,#32] + add x17,x17,x19,lsl#32 + add x20,x20,x21,lsl#32 + ldp x19,x21,[x1,#48] + add x1,x1,#64 +#ifdef __AARCH64EB__ + rev x5,x5 + rev x7,x7 + rev x9,x9 + rev x11,x11 + rev x13,x13 + rev x15,x15 + rev x17,x17 + rev x20,x20 +#endif + eor x5,x5,x6 + eor x7,x7,x8 + eor x9,x9,x10 + eor x11,x11,x12 + eor x13,x13,x14 + eor x15,x15,x16 + eor x17,x17,x19 + eor x20,x20,x21 + + stp x5,x7,[x0,#0] // store output + add v16.4s,v16.4s,v0.4s // accumulate key block + stp x9,x11,[x0,#16] + add v17.4s,v17.4s,v1.4s + stp x13,x15,[x0,#32] + add v18.4s,v18.4s,v2.4s + stp x17,x20,[x0,#48] + add v19.4s,v19.4s,v3.4s + add x0,x0,#64 + b.eq .Ldone_neon + sub x2,x2,#64 + cmp x2,#64 + b.lo .Last_neon + + ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 + eor v16.16b,v16.16b,v4.16b + eor v17.16b,v17.16b,v5.16b + eor v18.16b,v18.16b,v6.16b + eor v19.16b,v19.16b,v7.16b + st1 {v16.16b,v17.16b,v18.16b,v19.16b},[x0],#64 + b.eq .Ldone_neon + + add v16.4s,v20.4s,v0.4s + add v17.4s,v21.4s,v1.4s + sub x2,x2,#64 + add v18.4s,v22.4s,v2.4s + cmp x2,#64 + add v19.4s,v23.4s,v3.4s + b.lo .Last_neon + + ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 + eor v20.16b,v16.16b,v4.16b + eor v21.16b,v17.16b,v5.16b + eor v22.16b,v18.16b,v6.16b + eor v23.16b,v19.16b,v7.16b + st1 {v20.16b,v21.16b,v22.16b,v23.16b},[x0],#64 + b.eq .Ldone_neon + + add v16.4s,v24.4s,v0.4s + add v17.4s,v25.4s,v1.4s + sub x2,x2,#64 + add v18.4s,v26.4s,v2.4s + cmp x2,#64 + add v19.4s,v27.4s,v3.4s + b.lo .Last_neon + + ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 + eor v24.16b,v16.16b,v4.16b + eor v25.16b,v17.16b,v5.16b + eor v26.16b,v18.16b,v6.16b + eor v27.16b,v19.16b,v7.16b + st1 {v24.16b,v25.16b,v26.16b,v27.16b},[x0],#64 + b.eq .Ldone_neon + + add v16.4s,v28.4s,v0.4s + add v17.4s,v29.4s,v1.4s + add v18.4s,v30.4s,v2.4s + add v19.4s,v31.4s,v3.4s + sub x2,x2,#64 + +.Last_neon: + st1 {v16.16b,v17.16b,v18.16b,v19.16b},[sp] + + sub x0,x0,#1 + add x1,x1,x2 + add x0,x0,x2 + add x4,sp,x2 + neg x2,x2 + +.Loop_tail_neon: + ldrb w10,[x1,x2] + ldrb w11,[x4,x2] + add x2,x2,#1 + eor w10,w10,w11 + strb w10,[x0,x2] + cbnz x2,.Loop_tail_neon + + stp xzr,xzr,[sp,#0] + stp xzr,xzr,[sp,#16] + stp xzr,xzr,[sp,#32] + stp xzr,xzr,[sp,#48] + +.Ldone_neon: + ldp x19,x20,[x29,#16] + add sp,sp,#64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ChaCha20_neon,.-ChaCha20_neon +.type ChaCha20_512_neon,%function +.align 5 +ChaCha20_512_neon: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + + adrp x5,.Lsigma + add x5,x5,#:lo12:.Lsigma + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + +.L512_or_more_neon: + sub sp,sp,#128+64 + + eor v7.16b,v7.16b,v7.16b + ldp x22,x23,[x5] // load sigma + ld1 {v0.4s},[x5],#16 + ldp x24,x25,[x3] // load key + ldp x26,x27,[x3,#16] + ld1 {v1.4s,v2.4s},[x3] + ldp x28,x30,[x4] // load counter + ld1 {v3.4s},[x4] + ld1 {v7.s}[0],[x5] + add x3,x5,#16 // .Lrot24 +#ifdef __AARCH64EB__ + rev64 v0.4s,v0.4s + ror x24,x24,#32 + ror x25,x25,#32 + ror x26,x26,#32 + ror x27,x27,#32 + ror x28,x28,#32 + ror x30,x30,#32 +#endif + add v3.4s,v3.4s,v7.4s // += 1 + stp q0,q1,[sp,#0] // off-load key block, invariant part + add v3.4s,v3.4s,v7.4s // not typo + str q2,[sp,#32] + add v4.4s,v3.4s,v7.4s + add v5.4s,v4.4s,v7.4s + add v6.4s,v5.4s,v7.4s + shl v7.4s,v7.4s,#2 // 1 -> 4 + + stp d8,d9,[sp,#128+0] // meet ABI requirements + stp d10,d11,[sp,#128+16] + stp d12,d13,[sp,#128+32] + stp d14,d15,[sp,#128+48] + + sub x2,x2,#512 // not typo + +.Loop_outer_512_neon: + mov v8.16b,v0.16b + mov v12.16b,v0.16b + mov v16.16b,v0.16b + mov v20.16b,v0.16b + mov v24.16b,v0.16b + mov v28.16b,v0.16b + mov v9.16b,v1.16b + mov w5,w22 // unpack key block + mov v13.16b,v1.16b + lsr x6,x22,#32 + mov v17.16b,v1.16b + mov w7,w23 + mov v21.16b,v1.16b + lsr x8,x23,#32 + mov v25.16b,v1.16b + mov w9,w24 + mov v29.16b,v1.16b + lsr x10,x24,#32 + mov v11.16b,v3.16b + mov w11,w25 + mov v15.16b,v4.16b + lsr x12,x25,#32 + mov v19.16b,v5.16b + mov w13,w26 + mov v23.16b,v6.16b + lsr x14,x26,#32 + mov v10.16b,v2.16b + mov w15,w27 + mov v14.16b,v2.16b + lsr x16,x27,#32 + add v27.4s,v11.4s,v7.4s // +4 + mov w17,w28 + add v31.4s,v15.4s,v7.4s // +4 + lsr x19,x28,#32 + mov v18.16b,v2.16b + mov w20,w30 + mov v22.16b,v2.16b + lsr x21,x30,#32 + mov v26.16b,v2.16b + stp q3,q4,[sp,#48] // off-load key block, variable part + mov v30.16b,v2.16b + stp q5,q6,[sp,#80] + + mov x4,#5 + ld1 {v6.4s},[x3] + subs x2,x2,#512 +.Loop_upper_neon: + sub x4,x4,#1 + add v8.4s,v8.4s,v9.4s + add w5,w5,w9 + add v12.4s,v12.4s,v13.4s + add w6,w6,w10 + add v16.4s,v16.4s,v17.4s + add w7,w7,w11 + add v20.4s,v20.4s,v21.4s + add w8,w8,w12 + add v24.4s,v24.4s,v25.4s + eor w17,w17,w5 + add v28.4s,v28.4s,v29.4s + eor w19,w19,w6 + eor v11.16b,v11.16b,v8.16b + eor w20,w20,w7 + eor v15.16b,v15.16b,v12.16b + eor w21,w21,w8 + eor v19.16b,v19.16b,v16.16b + ror w17,w17,#16 + eor v23.16b,v23.16b,v20.16b + ror w19,w19,#16 + eor v27.16b,v27.16b,v24.16b + ror w20,w20,#16 + eor v31.16b,v31.16b,v28.16b + ror w21,w21,#16 + rev32 v11.8h,v11.8h + add w13,w13,w17 + rev32 v15.8h,v15.8h + add w14,w14,w19 + rev32 v19.8h,v19.8h + add w15,w15,w20 + rev32 v23.8h,v23.8h + add w16,w16,w21 + rev32 v27.8h,v27.8h + eor w9,w9,w13 + rev32 v31.8h,v31.8h + eor w10,w10,w14 + add v10.4s,v10.4s,v11.4s + eor w11,w11,w15 + add v14.4s,v14.4s,v15.4s + eor w12,w12,w16 + add v18.4s,v18.4s,v19.4s + ror w9,w9,#20 + add v22.4s,v22.4s,v23.4s + ror w10,w10,#20 + add v26.4s,v26.4s,v27.4s + ror w11,w11,#20 + add v30.4s,v30.4s,v31.4s + ror w12,w12,#20 + eor v0.16b,v9.16b,v10.16b + add w5,w5,w9 + eor v1.16b,v13.16b,v14.16b + add w6,w6,w10 + eor v2.16b,v17.16b,v18.16b + add w7,w7,w11 + eor v3.16b,v21.16b,v22.16b + add w8,w8,w12 + eor v4.16b,v25.16b,v26.16b + eor w17,w17,w5 + eor v5.16b,v29.16b,v30.16b + eor w19,w19,w6 + ushr v9.4s,v0.4s,#20 + eor w20,w20,w7 + ushr v13.4s,v1.4s,#20 + eor w21,w21,w8 + ushr v17.4s,v2.4s,#20 + ror w17,w17,#24 + ushr v21.4s,v3.4s,#20 + ror w19,w19,#24 + ushr v25.4s,v4.4s,#20 + ror w20,w20,#24 + ushr v29.4s,v5.4s,#20 + ror w21,w21,#24 + sli v9.4s,v0.4s,#12 + add w13,w13,w17 + sli v13.4s,v1.4s,#12 + add w14,w14,w19 + sli v17.4s,v2.4s,#12 + add w15,w15,w20 + sli v21.4s,v3.4s,#12 + add w16,w16,w21 + sli v25.4s,v4.4s,#12 + eor w9,w9,w13 + sli v29.4s,v5.4s,#12 + eor w10,w10,w14 + add v8.4s,v8.4s,v9.4s + eor w11,w11,w15 + add v12.4s,v12.4s,v13.4s + eor w12,w12,w16 + add v16.4s,v16.4s,v17.4s + ror w9,w9,#25 + add v20.4s,v20.4s,v21.4s + ror w10,w10,#25 + add v24.4s,v24.4s,v25.4s + ror w11,w11,#25 + add v28.4s,v28.4s,v29.4s + ror w12,w12,#25 + eor v11.16b,v11.16b,v8.16b + add w5,w5,w10 + eor v15.16b,v15.16b,v12.16b + add w6,w6,w11 + eor v19.16b,v19.16b,v16.16b + add w7,w7,w12 + eor v23.16b,v23.16b,v20.16b + add w8,w8,w9 + eor v27.16b,v27.16b,v24.16b + eor w21,w21,w5 + eor v31.16b,v31.16b,v28.16b + eor w17,w17,w6 + tbl v11.16b,{v11.16b},v6.16b + eor w19,w19,w7 + tbl v15.16b,{v15.16b},v6.16b + eor w20,w20,w8 + tbl v19.16b,{v19.16b},v6.16b + ror w21,w21,#16 + tbl v23.16b,{v23.16b},v6.16b + ror w17,w17,#16 + tbl v27.16b,{v27.16b},v6.16b + ror w19,w19,#16 + tbl v31.16b,{v31.16b},v6.16b + ror w20,w20,#16 + add v10.4s,v10.4s,v11.4s + add w15,w15,w21 + add v14.4s,v14.4s,v15.4s + add w16,w16,w17 + add v18.4s,v18.4s,v19.4s + add w13,w13,w19 + add v22.4s,v22.4s,v23.4s + add w14,w14,w20 + add v26.4s,v26.4s,v27.4s + eor w10,w10,w15 + add v30.4s,v30.4s,v31.4s + eor w11,w11,w16 + eor v0.16b,v9.16b,v10.16b + eor w12,w12,w13 + eor v1.16b,v13.16b,v14.16b + eor w9,w9,w14 + eor v2.16b,v17.16b,v18.16b + ror w10,w10,#20 + eor v3.16b,v21.16b,v22.16b + ror w11,w11,#20 + eor v4.16b,v25.16b,v26.16b + ror w12,w12,#20 + eor v5.16b,v29.16b,v30.16b + ror w9,w9,#20 + ushr v9.4s,v0.4s,#25 + add w5,w5,w10 + ushr v13.4s,v1.4s,#25 + add w6,w6,w11 + ushr v17.4s,v2.4s,#25 + add w7,w7,w12 + ushr v21.4s,v3.4s,#25 + add w8,w8,w9 + ushr v25.4s,v4.4s,#25 + eor w21,w21,w5 + ushr v29.4s,v5.4s,#25 + eor w17,w17,w6 + sli v9.4s,v0.4s,#7 + eor w19,w19,w7 + sli v13.4s,v1.4s,#7 + eor w20,w20,w8 + sli v17.4s,v2.4s,#7 + ror w21,w21,#24 + sli v21.4s,v3.4s,#7 + ror w17,w17,#24 + sli v25.4s,v4.4s,#7 + ror w19,w19,#24 + sli v29.4s,v5.4s,#7 + ror w20,w20,#24 + ext v10.16b,v10.16b,v10.16b,#8 + add w15,w15,w21 + ext v14.16b,v14.16b,v14.16b,#8 + add w16,w16,w17 + ext v18.16b,v18.16b,v18.16b,#8 + add w13,w13,w19 + ext v22.16b,v22.16b,v22.16b,#8 + add w14,w14,w20 + ext v26.16b,v26.16b,v26.16b,#8 + eor w10,w10,w15 + ext v30.16b,v30.16b,v30.16b,#8 + eor w11,w11,w16 + ext v11.16b,v11.16b,v11.16b,#12 + eor w12,w12,w13 + ext v15.16b,v15.16b,v15.16b,#12 + eor w9,w9,w14 + ext v19.16b,v19.16b,v19.16b,#12 + ror w10,w10,#25 + ext v23.16b,v23.16b,v23.16b,#12 + ror w11,w11,#25 + ext v27.16b,v27.16b,v27.16b,#12 + ror w12,w12,#25 + ext v31.16b,v31.16b,v31.16b,#12 + ror w9,w9,#25 + ext v9.16b,v9.16b,v9.16b,#4 + ext v13.16b,v13.16b,v13.16b,#4 + ext v17.16b,v17.16b,v17.16b,#4 + ext v21.16b,v21.16b,v21.16b,#4 + ext v25.16b,v25.16b,v25.16b,#4 + ext v29.16b,v29.16b,v29.16b,#4 + add v8.4s,v8.4s,v9.4s + add w5,w5,w9 + add v12.4s,v12.4s,v13.4s + add w6,w6,w10 + add v16.4s,v16.4s,v17.4s + add w7,w7,w11 + add v20.4s,v20.4s,v21.4s + add w8,w8,w12 + add v24.4s,v24.4s,v25.4s + eor w17,w17,w5 + add v28.4s,v28.4s,v29.4s + eor w19,w19,w6 + eor v11.16b,v11.16b,v8.16b + eor w20,w20,w7 + eor v15.16b,v15.16b,v12.16b + eor w21,w21,w8 + eor v19.16b,v19.16b,v16.16b + ror w17,w17,#16 + eor v23.16b,v23.16b,v20.16b + ror w19,w19,#16 + eor v27.16b,v27.16b,v24.16b + ror w20,w20,#16 + eor v31.16b,v31.16b,v28.16b + ror w21,w21,#16 + rev32 v11.8h,v11.8h + add w13,w13,w17 + rev32 v15.8h,v15.8h + add w14,w14,w19 + rev32 v19.8h,v19.8h + add w15,w15,w20 + rev32 v23.8h,v23.8h + add w16,w16,w21 + rev32 v27.8h,v27.8h + eor w9,w9,w13 + rev32 v31.8h,v31.8h + eor w10,w10,w14 + add v10.4s,v10.4s,v11.4s + eor w11,w11,w15 + add v14.4s,v14.4s,v15.4s + eor w12,w12,w16 + add v18.4s,v18.4s,v19.4s + ror w9,w9,#20 + add v22.4s,v22.4s,v23.4s + ror w10,w10,#20 + add v26.4s,v26.4s,v27.4s + ror w11,w11,#20 + add v30.4s,v30.4s,v31.4s + ror w12,w12,#20 + eor v0.16b,v9.16b,v10.16b + add w5,w5,w9 + eor v1.16b,v13.16b,v14.16b + add w6,w6,w10 + eor v2.16b,v17.16b,v18.16b + add w7,w7,w11 + eor v3.16b,v21.16b,v22.16b + add w8,w8,w12 + eor v4.16b,v25.16b,v26.16b + eor w17,w17,w5 + eor v5.16b,v29.16b,v30.16b + eor w19,w19,w6 + ushr v9.4s,v0.4s,#20 + eor w20,w20,w7 + ushr v13.4s,v1.4s,#20 + eor w21,w21,w8 + ushr v17.4s,v2.4s,#20 + ror w17,w17,#24 + ushr v21.4s,v3.4s,#20 + ror w19,w19,#24 + ushr v25.4s,v4.4s,#20 + ror w20,w20,#24 + ushr v29.4s,v5.4s,#20 + ror w21,w21,#24 + sli v9.4s,v0.4s,#12 + add w13,w13,w17 + sli v13.4s,v1.4s,#12 + add w14,w14,w19 + sli v17.4s,v2.4s,#12 + add w15,w15,w20 + sli v21.4s,v3.4s,#12 + add w16,w16,w21 + sli v25.4s,v4.4s,#12 + eor w9,w9,w13 + sli v29.4s,v5.4s,#12 + eor w10,w10,w14 + add v8.4s,v8.4s,v9.4s + eor w11,w11,w15 + add v12.4s,v12.4s,v13.4s + eor w12,w12,w16 + add v16.4s,v16.4s,v17.4s + ror w9,w9,#25 + add v20.4s,v20.4s,v21.4s + ror w10,w10,#25 + add v24.4s,v24.4s,v25.4s + ror w11,w11,#25 + add v28.4s,v28.4s,v29.4s + ror w12,w12,#25 + eor v11.16b,v11.16b,v8.16b + add w5,w5,w10 + eor v15.16b,v15.16b,v12.16b + add w6,w6,w11 + eor v19.16b,v19.16b,v16.16b + add w7,w7,w12 + eor v23.16b,v23.16b,v20.16b + add w8,w8,w9 + eor v27.16b,v27.16b,v24.16b + eor w21,w21,w5 + eor v31.16b,v31.16b,v28.16b + eor w17,w17,w6 + tbl v11.16b,{v11.16b},v6.16b + eor w19,w19,w7 + tbl v15.16b,{v15.16b},v6.16b + eor w20,w20,w8 + tbl v19.16b,{v19.16b},v6.16b + ror w21,w21,#16 + tbl v23.16b,{v23.16b},v6.16b + ror w17,w17,#16 + tbl v27.16b,{v27.16b},v6.16b + ror w19,w19,#16 + tbl v31.16b,{v31.16b},v6.16b + ror w20,w20,#16 + add v10.4s,v10.4s,v11.4s + add w15,w15,w21 + add v14.4s,v14.4s,v15.4s + add w16,w16,w17 + add v18.4s,v18.4s,v19.4s + add w13,w13,w19 + add v22.4s,v22.4s,v23.4s + add w14,w14,w20 + add v26.4s,v26.4s,v27.4s + eor w10,w10,w15 + add v30.4s,v30.4s,v31.4s + eor w11,w11,w16 + eor v0.16b,v9.16b,v10.16b + eor w12,w12,w13 + eor v1.16b,v13.16b,v14.16b + eor w9,w9,w14 + eor v2.16b,v17.16b,v18.16b + ror w10,w10,#20 + eor v3.16b,v21.16b,v22.16b + ror w11,w11,#20 + eor v4.16b,v25.16b,v26.16b + ror w12,w12,#20 + eor v5.16b,v29.16b,v30.16b + ror w9,w9,#20 + ushr v9.4s,v0.4s,#25 + add w5,w5,w10 + ushr v13.4s,v1.4s,#25 + add w6,w6,w11 + ushr v17.4s,v2.4s,#25 + add w7,w7,w12 + ushr v21.4s,v3.4s,#25 + add w8,w8,w9 + ushr v25.4s,v4.4s,#25 + eor w21,w21,w5 + ushr v29.4s,v5.4s,#25 + eor w17,w17,w6 + sli v9.4s,v0.4s,#7 + eor w19,w19,w7 + sli v13.4s,v1.4s,#7 + eor w20,w20,w8 + sli v17.4s,v2.4s,#7 + ror w21,w21,#24 + sli v21.4s,v3.4s,#7 + ror w17,w17,#24 + sli v25.4s,v4.4s,#7 + ror w19,w19,#24 + sli v29.4s,v5.4s,#7 + ror w20,w20,#24 + ext v10.16b,v10.16b,v10.16b,#8 + add w15,w15,w21 + ext v14.16b,v14.16b,v14.16b,#8 + add w16,w16,w17 + ext v18.16b,v18.16b,v18.16b,#8 + add w13,w13,w19 + ext v22.16b,v22.16b,v22.16b,#8 + add w14,w14,w20 + ext v26.16b,v26.16b,v26.16b,#8 + eor w10,w10,w15 + ext v30.16b,v30.16b,v30.16b,#8 + eor w11,w11,w16 + ext v11.16b,v11.16b,v11.16b,#4 + eor w12,w12,w13 + ext v15.16b,v15.16b,v15.16b,#4 + eor w9,w9,w14 + ext v19.16b,v19.16b,v19.16b,#4 + ror w10,w10,#25 + ext v23.16b,v23.16b,v23.16b,#4 + ror w11,w11,#25 + ext v27.16b,v27.16b,v27.16b,#4 + ror w12,w12,#25 + ext v31.16b,v31.16b,v31.16b,#4 + ror w9,w9,#25 + ext v9.16b,v9.16b,v9.16b,#12 + ext v13.16b,v13.16b,v13.16b,#12 + ext v17.16b,v17.16b,v17.16b,#12 + ext v21.16b,v21.16b,v21.16b,#12 + ext v25.16b,v25.16b,v25.16b,#12 + ext v29.16b,v29.16b,v29.16b,#12 + cbnz x4,.Loop_upper_neon + + add w5,w5,w22 // accumulate key block + add x6,x6,x22,lsr#32 + add w7,w7,w23 + add x8,x8,x23,lsr#32 + add w9,w9,w24 + add x10,x10,x24,lsr#32 + add w11,w11,w25 + add x12,x12,x25,lsr#32 + add w13,w13,w26 + add x14,x14,x26,lsr#32 + add w15,w15,w27 + add x16,x16,x27,lsr#32 + add w17,w17,w28 + add x19,x19,x28,lsr#32 + add w20,w20,w30 + add x21,x21,x30,lsr#32 + + add x5,x5,x6,lsl#32 // pack + add x7,x7,x8,lsl#32 + ldp x6,x8,[x1,#0] // load input + add x9,x9,x10,lsl#32 + add x11,x11,x12,lsl#32 + ldp x10,x12,[x1,#16] + add x13,x13,x14,lsl#32 + add x15,x15,x16,lsl#32 + ldp x14,x16,[x1,#32] + add x17,x17,x19,lsl#32 + add x20,x20,x21,lsl#32 + ldp x19,x21,[x1,#48] + add x1,x1,#64 +#ifdef __AARCH64EB__ + rev x5,x5 + rev x7,x7 + rev x9,x9 + rev x11,x11 + rev x13,x13 + rev x15,x15 + rev x17,x17 + rev x20,x20 +#endif + eor x5,x5,x6 + eor x7,x7,x8 + eor x9,x9,x10 + eor x11,x11,x12 + eor x13,x13,x14 + eor x15,x15,x16 + eor x17,x17,x19 + eor x20,x20,x21 + + stp x5,x7,[x0,#0] // store output + add x28,x28,#1 // increment counter + mov w5,w22 // unpack key block + lsr x6,x22,#32 + stp x9,x11,[x0,#16] + mov w7,w23 + lsr x8,x23,#32 + stp x13,x15,[x0,#32] + mov w9,w24 + lsr x10,x24,#32 + stp x17,x20,[x0,#48] + add x0,x0,#64 + mov w11,w25 + lsr x12,x25,#32 + mov w13,w26 + lsr x14,x26,#32 + mov w15,w27 + lsr x16,x27,#32 + mov w17,w28 + lsr x19,x28,#32 + mov w20,w30 + lsr x21,x30,#32 + + mov x4,#5 +.Loop_lower_neon: + sub x4,x4,#1 + add v8.4s,v8.4s,v9.4s + add w5,w5,w9 + add v12.4s,v12.4s,v13.4s + add w6,w6,w10 + add v16.4s,v16.4s,v17.4s + add w7,w7,w11 + add v20.4s,v20.4s,v21.4s + add w8,w8,w12 + add v24.4s,v24.4s,v25.4s + eor w17,w17,w5 + add v28.4s,v28.4s,v29.4s + eor w19,w19,w6 + eor v11.16b,v11.16b,v8.16b + eor w20,w20,w7 + eor v15.16b,v15.16b,v12.16b + eor w21,w21,w8 + eor v19.16b,v19.16b,v16.16b + ror w17,w17,#16 + eor v23.16b,v23.16b,v20.16b + ror w19,w19,#16 + eor v27.16b,v27.16b,v24.16b + ror w20,w20,#16 + eor v31.16b,v31.16b,v28.16b + ror w21,w21,#16 + rev32 v11.8h,v11.8h + add w13,w13,w17 + rev32 v15.8h,v15.8h + add w14,w14,w19 + rev32 v19.8h,v19.8h + add w15,w15,w20 + rev32 v23.8h,v23.8h + add w16,w16,w21 + rev32 v27.8h,v27.8h + eor w9,w9,w13 + rev32 v31.8h,v31.8h + eor w10,w10,w14 + add v10.4s,v10.4s,v11.4s + eor w11,w11,w15 + add v14.4s,v14.4s,v15.4s + eor w12,w12,w16 + add v18.4s,v18.4s,v19.4s + ror w9,w9,#20 + add v22.4s,v22.4s,v23.4s + ror w10,w10,#20 + add v26.4s,v26.4s,v27.4s + ror w11,w11,#20 + add v30.4s,v30.4s,v31.4s + ror w12,w12,#20 + eor v0.16b,v9.16b,v10.16b + add w5,w5,w9 + eor v1.16b,v13.16b,v14.16b + add w6,w6,w10 + eor v2.16b,v17.16b,v18.16b + add w7,w7,w11 + eor v3.16b,v21.16b,v22.16b + add w8,w8,w12 + eor v4.16b,v25.16b,v26.16b + eor w17,w17,w5 + eor v5.16b,v29.16b,v30.16b + eor w19,w19,w6 + ushr v9.4s,v0.4s,#20 + eor w20,w20,w7 + ushr v13.4s,v1.4s,#20 + eor w21,w21,w8 + ushr v17.4s,v2.4s,#20 + ror w17,w17,#24 + ushr v21.4s,v3.4s,#20 + ror w19,w19,#24 + ushr v25.4s,v4.4s,#20 + ror w20,w20,#24 + ushr v29.4s,v5.4s,#20 + ror w21,w21,#24 + sli v9.4s,v0.4s,#12 + add w13,w13,w17 + sli v13.4s,v1.4s,#12 + add w14,w14,w19 + sli v17.4s,v2.4s,#12 + add w15,w15,w20 + sli v21.4s,v3.4s,#12 + add w16,w16,w21 + sli v25.4s,v4.4s,#12 + eor w9,w9,w13 + sli v29.4s,v5.4s,#12 + eor w10,w10,w14 + add v8.4s,v8.4s,v9.4s + eor w11,w11,w15 + add v12.4s,v12.4s,v13.4s + eor w12,w12,w16 + add v16.4s,v16.4s,v17.4s + ror w9,w9,#25 + add v20.4s,v20.4s,v21.4s + ror w10,w10,#25 + add v24.4s,v24.4s,v25.4s + ror w11,w11,#25 + add v28.4s,v28.4s,v29.4s + ror w12,w12,#25 + eor v11.16b,v11.16b,v8.16b + add w5,w5,w10 + eor v15.16b,v15.16b,v12.16b + add w6,w6,w11 + eor v19.16b,v19.16b,v16.16b + add w7,w7,w12 + eor v23.16b,v23.16b,v20.16b + add w8,w8,w9 + eor v27.16b,v27.16b,v24.16b + eor w21,w21,w5 + eor v31.16b,v31.16b,v28.16b + eor w17,w17,w6 + tbl v11.16b,{v11.16b},v6.16b + eor w19,w19,w7 + tbl v15.16b,{v15.16b},v6.16b + eor w20,w20,w8 + tbl v19.16b,{v19.16b},v6.16b + ror w21,w21,#16 + tbl v23.16b,{v23.16b},v6.16b + ror w17,w17,#16 + tbl v27.16b,{v27.16b},v6.16b + ror w19,w19,#16 + tbl v31.16b,{v31.16b},v6.16b + ror w20,w20,#16 + add v10.4s,v10.4s,v11.4s + add w15,w15,w21 + add v14.4s,v14.4s,v15.4s + add w16,w16,w17 + add v18.4s,v18.4s,v19.4s + add w13,w13,w19 + add v22.4s,v22.4s,v23.4s + add w14,w14,w20 + add v26.4s,v26.4s,v27.4s + eor w10,w10,w15 + add v30.4s,v30.4s,v31.4s + eor w11,w11,w16 + eor v0.16b,v9.16b,v10.16b + eor w12,w12,w13 + eor v1.16b,v13.16b,v14.16b + eor w9,w9,w14 + eor v2.16b,v17.16b,v18.16b + ror w10,w10,#20 + eor v3.16b,v21.16b,v22.16b + ror w11,w11,#20 + eor v4.16b,v25.16b,v26.16b + ror w12,w12,#20 + eor v5.16b,v29.16b,v30.16b + ror w9,w9,#20 + ushr v9.4s,v0.4s,#25 + add w5,w5,w10 + ushr v13.4s,v1.4s,#25 + add w6,w6,w11 + ushr v17.4s,v2.4s,#25 + add w7,w7,w12 + ushr v21.4s,v3.4s,#25 + add w8,w8,w9 + ushr v25.4s,v4.4s,#25 + eor w21,w21,w5 + ushr v29.4s,v5.4s,#25 + eor w17,w17,w6 + sli v9.4s,v0.4s,#7 + eor w19,w19,w7 + sli v13.4s,v1.4s,#7 + eor w20,w20,w8 + sli v17.4s,v2.4s,#7 + ror w21,w21,#24 + sli v21.4s,v3.4s,#7 + ror w17,w17,#24 + sli v25.4s,v4.4s,#7 + ror w19,w19,#24 + sli v29.4s,v5.4s,#7 + ror w20,w20,#24 + ext v10.16b,v10.16b,v10.16b,#8 + add w15,w15,w21 + ext v14.16b,v14.16b,v14.16b,#8 + add w16,w16,w17 + ext v18.16b,v18.16b,v18.16b,#8 + add w13,w13,w19 + ext v22.16b,v22.16b,v22.16b,#8 + add w14,w14,w20 + ext v26.16b,v26.16b,v26.16b,#8 + eor w10,w10,w15 + ext v30.16b,v30.16b,v30.16b,#8 + eor w11,w11,w16 + ext v11.16b,v11.16b,v11.16b,#12 + eor w12,w12,w13 + ext v15.16b,v15.16b,v15.16b,#12 + eor w9,w9,w14 + ext v19.16b,v19.16b,v19.16b,#12 + ror w10,w10,#25 + ext v23.16b,v23.16b,v23.16b,#12 + ror w11,w11,#25 + ext v27.16b,v27.16b,v27.16b,#12 + ror w12,w12,#25 + ext v31.16b,v31.16b,v31.16b,#12 + ror w9,w9,#25 + ext v9.16b,v9.16b,v9.16b,#4 + ext v13.16b,v13.16b,v13.16b,#4 + ext v17.16b,v17.16b,v17.16b,#4 + ext v21.16b,v21.16b,v21.16b,#4 + ext v25.16b,v25.16b,v25.16b,#4 + ext v29.16b,v29.16b,v29.16b,#4 + add v8.4s,v8.4s,v9.4s + add w5,w5,w9 + add v12.4s,v12.4s,v13.4s + add w6,w6,w10 + add v16.4s,v16.4s,v17.4s + add w7,w7,w11 + add v20.4s,v20.4s,v21.4s + add w8,w8,w12 + add v24.4s,v24.4s,v25.4s + eor w17,w17,w5 + add v28.4s,v28.4s,v29.4s + eor w19,w19,w6 + eor v11.16b,v11.16b,v8.16b + eor w20,w20,w7 + eor v15.16b,v15.16b,v12.16b + eor w21,w21,w8 + eor v19.16b,v19.16b,v16.16b + ror w17,w17,#16 + eor v23.16b,v23.16b,v20.16b + ror w19,w19,#16 + eor v27.16b,v27.16b,v24.16b + ror w20,w20,#16 + eor v31.16b,v31.16b,v28.16b + ror w21,w21,#16 + rev32 v11.8h,v11.8h + add w13,w13,w17 + rev32 v15.8h,v15.8h + add w14,w14,w19 + rev32 v19.8h,v19.8h + add w15,w15,w20 + rev32 v23.8h,v23.8h + add w16,w16,w21 + rev32 v27.8h,v27.8h + eor w9,w9,w13 + rev32 v31.8h,v31.8h + eor w10,w10,w14 + add v10.4s,v10.4s,v11.4s + eor w11,w11,w15 + add v14.4s,v14.4s,v15.4s + eor w12,w12,w16 + add v18.4s,v18.4s,v19.4s + ror w9,w9,#20 + add v22.4s,v22.4s,v23.4s + ror w10,w10,#20 + add v26.4s,v26.4s,v27.4s + ror w11,w11,#20 + add v30.4s,v30.4s,v31.4s + ror w12,w12,#20 + eor v0.16b,v9.16b,v10.16b + add w5,w5,w9 + eor v1.16b,v13.16b,v14.16b + add w6,w6,w10 + eor v2.16b,v17.16b,v18.16b + add w7,w7,w11 + eor v3.16b,v21.16b,v22.16b + add w8,w8,w12 + eor v4.16b,v25.16b,v26.16b + eor w17,w17,w5 + eor v5.16b,v29.16b,v30.16b + eor w19,w19,w6 + ushr v9.4s,v0.4s,#20 + eor w20,w20,w7 + ushr v13.4s,v1.4s,#20 + eor w21,w21,w8 + ushr v17.4s,v2.4s,#20 + ror w17,w17,#24 + ushr v21.4s,v3.4s,#20 + ror w19,w19,#24 + ushr v25.4s,v4.4s,#20 + ror w20,w20,#24 + ushr v29.4s,v5.4s,#20 + ror w21,w21,#24 + sli v9.4s,v0.4s,#12 + add w13,w13,w17 + sli v13.4s,v1.4s,#12 + add w14,w14,w19 + sli v17.4s,v2.4s,#12 + add w15,w15,w20 + sli v21.4s,v3.4s,#12 + add w16,w16,w21 + sli v25.4s,v4.4s,#12 + eor w9,w9,w13 + sli v29.4s,v5.4s,#12 + eor w10,w10,w14 + add v8.4s,v8.4s,v9.4s + eor w11,w11,w15 + add v12.4s,v12.4s,v13.4s + eor w12,w12,w16 + add v16.4s,v16.4s,v17.4s + ror w9,w9,#25 + add v20.4s,v20.4s,v21.4s + ror w10,w10,#25 + add v24.4s,v24.4s,v25.4s + ror w11,w11,#25 + add v28.4s,v28.4s,v29.4s + ror w12,w12,#25 + eor v11.16b,v11.16b,v8.16b + add w5,w5,w10 + eor v15.16b,v15.16b,v12.16b + add w6,w6,w11 + eor v19.16b,v19.16b,v16.16b + add w7,w7,w12 + eor v23.16b,v23.16b,v20.16b + add w8,w8,w9 + eor v27.16b,v27.16b,v24.16b + eor w21,w21,w5 + eor v31.16b,v31.16b,v28.16b + eor w17,w17,w6 + tbl v11.16b,{v11.16b},v6.16b + eor w19,w19,w7 + tbl v15.16b,{v15.16b},v6.16b + eor w20,w20,w8 + tbl v19.16b,{v19.16b},v6.16b + ror w21,w21,#16 + tbl v23.16b,{v23.16b},v6.16b + ror w17,w17,#16 + tbl v27.16b,{v27.16b},v6.16b + ror w19,w19,#16 + tbl v31.16b,{v31.16b},v6.16b + ror w20,w20,#16 + add v10.4s,v10.4s,v11.4s + add w15,w15,w21 + add v14.4s,v14.4s,v15.4s + add w16,w16,w17 + add v18.4s,v18.4s,v19.4s + add w13,w13,w19 + add v22.4s,v22.4s,v23.4s + add w14,w14,w20 + add v26.4s,v26.4s,v27.4s + eor w10,w10,w15 + add v30.4s,v30.4s,v31.4s + eor w11,w11,w16 + eor v0.16b,v9.16b,v10.16b + eor w12,w12,w13 + eor v1.16b,v13.16b,v14.16b + eor w9,w9,w14 + eor v2.16b,v17.16b,v18.16b + ror w10,w10,#20 + eor v3.16b,v21.16b,v22.16b + ror w11,w11,#20 + eor v4.16b,v25.16b,v26.16b + ror w12,w12,#20 + eor v5.16b,v29.16b,v30.16b + ror w9,w9,#20 + ushr v9.4s,v0.4s,#25 + add w5,w5,w10 + ushr v13.4s,v1.4s,#25 + add w6,w6,w11 + ushr v17.4s,v2.4s,#25 + add w7,w7,w12 + ushr v21.4s,v3.4s,#25 + add w8,w8,w9 + ushr v25.4s,v4.4s,#25 + eor w21,w21,w5 + ushr v29.4s,v5.4s,#25 + eor w17,w17,w6 + sli v9.4s,v0.4s,#7 + eor w19,w19,w7 + sli v13.4s,v1.4s,#7 + eor w20,w20,w8 + sli v17.4s,v2.4s,#7 + ror w21,w21,#24 + sli v21.4s,v3.4s,#7 + ror w17,w17,#24 + sli v25.4s,v4.4s,#7 + ror w19,w19,#24 + sli v29.4s,v5.4s,#7 + ror w20,w20,#24 + ext v10.16b,v10.16b,v10.16b,#8 + add w15,w15,w21 + ext v14.16b,v14.16b,v14.16b,#8 + add w16,w16,w17 + ext v18.16b,v18.16b,v18.16b,#8 + add w13,w13,w19 + ext v22.16b,v22.16b,v22.16b,#8 + add w14,w14,w20 + ext v26.16b,v26.16b,v26.16b,#8 + eor w10,w10,w15 + ext v30.16b,v30.16b,v30.16b,#8 + eor w11,w11,w16 + ext v11.16b,v11.16b,v11.16b,#4 + eor w12,w12,w13 + ext v15.16b,v15.16b,v15.16b,#4 + eor w9,w9,w14 + ext v19.16b,v19.16b,v19.16b,#4 + ror w10,w10,#25 + ext v23.16b,v23.16b,v23.16b,#4 + ror w11,w11,#25 + ext v27.16b,v27.16b,v27.16b,#4 + ror w12,w12,#25 + ext v31.16b,v31.16b,v31.16b,#4 + ror w9,w9,#25 + ext v9.16b,v9.16b,v9.16b,#12 + ext v13.16b,v13.16b,v13.16b,#12 + ext v17.16b,v17.16b,v17.16b,#12 + ext v21.16b,v21.16b,v21.16b,#12 + ext v25.16b,v25.16b,v25.16b,#12 + ext v29.16b,v29.16b,v29.16b,#12 + cbnz x4,.Loop_lower_neon + + add w5,w5,w22 // accumulate key block + ldp q0,q1,[sp,#0] + add x6,x6,x22,lsr#32 + ldp q2,q3,[sp,#32] + add w7,w7,w23 + ldp q4,q5,[sp,#64] + add x8,x8,x23,lsr#32 + ldr q6,[sp,#96] + add v8.4s,v8.4s,v0.4s + add w9,w9,w24 + add v12.4s,v12.4s,v0.4s + add x10,x10,x24,lsr#32 + add v16.4s,v16.4s,v0.4s + add w11,w11,w25 + add v20.4s,v20.4s,v0.4s + add x12,x12,x25,lsr#32 + add v24.4s,v24.4s,v0.4s + add w13,w13,w26 + add v28.4s,v28.4s,v0.4s + add x14,x14,x26,lsr#32 + add v10.4s,v10.4s,v2.4s + add w15,w15,w27 + add v14.4s,v14.4s,v2.4s + add x16,x16,x27,lsr#32 + add v18.4s,v18.4s,v2.4s + add w17,w17,w28 + add v22.4s,v22.4s,v2.4s + add x19,x19,x28,lsr#32 + add v26.4s,v26.4s,v2.4s + add w20,w20,w30 + add v30.4s,v30.4s,v2.4s + add x21,x21,x30,lsr#32 + add v27.4s,v27.4s,v7.4s // +4 + add x5,x5,x6,lsl#32 // pack + add v31.4s,v31.4s,v7.4s // +4 + add x7,x7,x8,lsl#32 + add v11.4s,v11.4s,v3.4s + ldp x6,x8,[x1,#0] // load input + add v15.4s,v15.4s,v4.4s + add x9,x9,x10,lsl#32 + add v19.4s,v19.4s,v5.4s + add x11,x11,x12,lsl#32 + add v23.4s,v23.4s,v6.4s + ldp x10,x12,[x1,#16] + add v27.4s,v27.4s,v3.4s + add x13,x13,x14,lsl#32 + add v31.4s,v31.4s,v4.4s + add x15,x15,x16,lsl#32 + add v9.4s,v9.4s,v1.4s + ldp x14,x16,[x1,#32] + add v13.4s,v13.4s,v1.4s + add x17,x17,x19,lsl#32 + add v17.4s,v17.4s,v1.4s + add x20,x20,x21,lsl#32 + add v21.4s,v21.4s,v1.4s + ldp x19,x21,[x1,#48] + add v25.4s,v25.4s,v1.4s + add x1,x1,#64 + add v29.4s,v29.4s,v1.4s + +#ifdef __AARCH64EB__ + rev x5,x5 + rev x7,x7 + rev x9,x9 + rev x11,x11 + rev x13,x13 + rev x15,x15 + rev x17,x17 + rev x20,x20 +#endif + ld1 {v0.16b,v1.16b,v2.16b,v3.16b},[x1],#64 + eor x5,x5,x6 + eor x7,x7,x8 + eor x9,x9,x10 + eor x11,x11,x12 + eor x13,x13,x14 + eor v8.16b,v8.16b,v0.16b + eor x15,x15,x16 + eor v9.16b,v9.16b,v1.16b + eor x17,x17,x19 + eor v10.16b,v10.16b,v2.16b + eor x20,x20,x21 + eor v11.16b,v11.16b,v3.16b + ld1 {v0.16b,v1.16b,v2.16b,v3.16b},[x1],#64 + + stp x5,x7,[x0,#0] // store output + add x28,x28,#7 // increment counter + stp x9,x11,[x0,#16] + stp x13,x15,[x0,#32] + stp x17,x20,[x0,#48] + add x0,x0,#64 + st1 {v8.16b,v9.16b,v10.16b,v11.16b},[x0],#64 + + ld1 {v8.16b,v9.16b,v10.16b,v11.16b},[x1],#64 + eor v12.16b,v12.16b,v0.16b + eor v13.16b,v13.16b,v1.16b + eor v14.16b,v14.16b,v2.16b + eor v15.16b,v15.16b,v3.16b + st1 {v12.16b,v13.16b,v14.16b,v15.16b},[x0],#64 + + ld1 {v12.16b,v13.16b,v14.16b,v15.16b},[x1],#64 + eor v16.16b,v16.16b,v8.16b + ldp q0,q1,[sp,#0] + eor v17.16b,v17.16b,v9.16b + ldp q2,q3,[sp,#32] + eor v18.16b,v18.16b,v10.16b + eor v19.16b,v19.16b,v11.16b + st1 {v16.16b,v17.16b,v18.16b,v19.16b},[x0],#64 + + ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 + eor v20.16b,v20.16b,v12.16b + eor v21.16b,v21.16b,v13.16b + eor v22.16b,v22.16b,v14.16b + eor v23.16b,v23.16b,v15.16b + st1 {v20.16b,v21.16b,v22.16b,v23.16b},[x0],#64 + + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 + eor v24.16b,v24.16b,v16.16b + eor v25.16b,v25.16b,v17.16b + eor v26.16b,v26.16b,v18.16b + eor v27.16b,v27.16b,v19.16b + st1 {v24.16b,v25.16b,v26.16b,v27.16b},[x0],#64 + + shl v8.4s,v7.4s,#1 // 4 -> 8 + eor v28.16b,v28.16b,v20.16b + eor v29.16b,v29.16b,v21.16b + eor v30.16b,v30.16b,v22.16b + eor v31.16b,v31.16b,v23.16b + st1 {v28.16b,v29.16b,v30.16b,v31.16b},[x0],#64 + + add v3.4s,v3.4s,v8.4s // += 8 + add v4.4s,v4.4s,v8.4s + add v5.4s,v5.4s,v8.4s + add v6.4s,v6.4s,v8.4s + + b.hs .Loop_outer_512_neon + + adds x2,x2,#512 + ushr v7.4s,v7.4s,#1 // 4 -> 2 + + ldp d10,d11,[sp,#128+16] // meet ABI requirements + ldp d12,d13,[sp,#128+32] + ldp d14,d15,[sp,#128+48] + + stp q0,q0,[sp,#0] // wipe off-load area + stp q0,q0,[sp,#32] + stp q0,q0,[sp,#64] + + b.eq .Ldone_512_neon + + sub x3,x3,#16 // .Lone + cmp x2,#192 + add sp,sp,#128 + sub v3.4s,v3.4s,v7.4s // -= 2 + ld1 {v8.4s,v9.4s},[x3] + b.hs .Loop_outer_neon + + ldp d8,d9,[sp,#0] // meet ABI requirements + eor v1.16b,v1.16b,v1.16b + eor v2.16b,v2.16b,v2.16b + eor v3.16b,v3.16b,v3.16b + eor v4.16b,v4.16b,v4.16b + eor v5.16b,v5.16b,v5.16b + eor v6.16b,v6.16b,v6.16b + b .Loop_outer + +.Ldone_512_neon: + ldp d8,d9,[sp,#128+0] // meet ABI requirements + ldp x19,x20,[x29,#16] + add sp,sp,#128+64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ChaCha20_512_neon,.-ChaCha20_512_neon diff --git a/contrib/openssl-cmake/asm/crypto/chacha/chacha-s390x.S b/contrib/openssl-cmake/asm/crypto/chacha/chacha-s390x.S new file mode 100644 index 000000000000..575f6c1dc5e9 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/chacha/chacha-s390x.S @@ -0,0 +1,954 @@ +#include "s390x_arch.h" +.text +.globl ChaCha20_ctr32 +.type ChaCha20_ctr32,@function +.align 32 +ChaCha20_ctr32: + larl %r1,OPENSSL_s390xcap_P + lghi %r0,64 + ltgr %r4,%r4 + bzr %r14 + lg %r1,S390X_STFLE+16(%r1) + clgr %r4,%r0 + jle .Lshort + tmhh %r1,16384 + jnz .LChaCha20_ctr32_vx +.Lshort: + aghi %r4,-64 + lghi %r1,-240 + stmg %r6,%r15,6*8(%r15) + slgr %r2,%r3 + la %r4,0(%r3,%r4) + larl %r7,.Lsigma + lgr %r0,%r15 + la %r15,0(%r1,%r15) + stg %r0,0(%r15) + lmg %r8,%r11,0(%r5) + lmg %r12,%r13,0(%r6) + lmg %r6,%r7,0(%r7) + la %r14,0(%r3) + stg %r2,240+3*8(%r15) + stg %r4,240+4*8(%r15) + stmg %r6,%r13,160(%r15) + srlg %r10,%r12,32 + j .Loop_outer +.align 16 +.Loop_outer: + lm %r0,%r7,160+4*0(%r15) + lm %r8,%r9,160+4*10(%r15) + lm %r11,%r13,160+4*13(%r15) + stm %r8,%r9,160+4*8+4*10(%r15) + lm %r8,%r9,160+4*8(%r15) + st %r10,160+4*12(%r15) + stg %r14,240+2*8(%r15) + lhi %r14,10 + j .Loop +.align 4 +.Loop: + alr %r0,%r4 + alr %r1,%r5 + xr %r10,%r0 + xr %r11,%r1 + rll %r10,%r10,16 + rll %r11,%r11,16 + alr %r8,%r10 + alr %r9,%r11 + xr %r4,%r8 + xr %r5,%r9 + rll %r4,%r4,12 + rll %r5,%r5,12 + alr %r0,%r4 + alr %r1,%r5 + xr %r10,%r0 + xr %r11,%r1 + rll %r10,%r10,8 + rll %r11,%r11,8 + alr %r8,%r10 + alr %r9,%r11 + xr %r4,%r8 + xr %r5,%r9 + rll %r4,%r4,7 + rll %r5,%r5,7 + stm %r8,%r9,160+4*8+4*8(%r15) + lm %r8,%r9,160+4*8+4*10(%r15) + alr %r2,%r6 + alr %r3,%r7 + xr %r12,%r2 + xr %r13,%r3 + rll %r12,%r12,16 + rll %r13,%r13,16 + alr %r8,%r12 + alr %r9,%r13 + xr %r6,%r8 + xr %r7,%r9 + rll %r6,%r6,12 + rll %r7,%r7,12 + alr %r2,%r6 + alr %r3,%r7 + xr %r12,%r2 + xr %r13,%r3 + rll %r12,%r12,8 + rll %r13,%r13,8 + alr %r8,%r12 + alr %r9,%r13 + xr %r6,%r8 + xr %r7,%r9 + rll %r6,%r6,7 + rll %r7,%r7,7 + alr %r0,%r5 + alr %r1,%r6 + xr %r13,%r0 + xr %r10,%r1 + rll %r13,%r13,16 + rll %r10,%r10,16 + alr %r8,%r13 + alr %r9,%r10 + xr %r5,%r8 + xr %r6,%r9 + rll %r5,%r5,12 + rll %r6,%r6,12 + alr %r0,%r5 + alr %r1,%r6 + xr %r13,%r0 + xr %r10,%r1 + rll %r13,%r13,8 + rll %r10,%r10,8 + alr %r8,%r13 + alr %r9,%r10 + xr %r5,%r8 + xr %r6,%r9 + rll %r5,%r5,7 + rll %r6,%r6,7 + stm %r8,%r9,160+4*8+4*10(%r15) + lm %r8,%r9,160+4*8+4*8(%r15) + alr %r2,%r7 + alr %r3,%r4 + xr %r11,%r2 + xr %r12,%r3 + rll %r11,%r11,16 + rll %r12,%r12,16 + alr %r8,%r11 + alr %r9,%r12 + xr %r7,%r8 + xr %r4,%r9 + rll %r7,%r7,12 + rll %r4,%r4,12 + alr %r2,%r7 + alr %r3,%r4 + xr %r11,%r2 + xr %r12,%r3 + rll %r11,%r11,8 + rll %r12,%r12,8 + alr %r8,%r11 + alr %r9,%r12 + xr %r7,%r8 + xr %r4,%r9 + rll %r7,%r7,7 + rll %r4,%r4,7 + brct %r14,.Loop + lg %r14,240+2*8(%r15) + stm %r8,%r9,160+4*8+4*8(%r15) + lmg %r8,%r9,240+3*8(%r15) + al %r0,160+4*0(%r15) + al %r1,160+4*1(%r15) + al %r2,160+4*2(%r15) + al %r3,160+4*3(%r15) + al %r4,160+4*4(%r15) + al %r5,160+4*5(%r15) + al %r6,160+4*6(%r15) + al %r7,160+4*7(%r15) + lrvr %r0,%r0 + lrvr %r1,%r1 + lrvr %r2,%r2 + lrvr %r3,%r3 + lrvr %r4,%r4 + lrvr %r5,%r5 + lrvr %r6,%r6 + lrvr %r7,%r7 + al %r10,160+4*12(%r15) + al %r11,160+4*13(%r15) + al %r12,160+4*14(%r15) + al %r13,160+4*15(%r15) + lrvr %r10,%r10 + lrvr %r11,%r11 + lrvr %r12,%r12 + lrvr %r13,%r13 + la %r8,0(%r8,%r14) + clgr %r14,%r9 + jh .Ltail + x %r0,4*0(%r14) + x %r1,4*1(%r14) + st %r0,4*0(%r8) + x %r2,4*2(%r14) + st %r1,4*1(%r8) + x %r3,4*3(%r14) + st %r2,4*2(%r8) + x %r4,4*4(%r14) + st %r3,4*3(%r8) + lm %r0,%r3,160+4*8+4*8(%r15) + x %r5,4*5(%r14) + st %r4,4*4(%r8) + x %r6,4*6(%r14) + al %r0,160+4*8(%r15) + st %r5,4*5(%r8) + x %r7,4*7(%r14) + al %r1,160+4*9(%r15) + st %r6,4*6(%r8) + x %r10,4*12(%r14) + al %r2,160+4*10(%r15) + st %r7,4*7(%r8) + x %r11,4*13(%r14) + al %r3,160+4*11(%r15) + st %r10,4*12(%r8) + x %r12,4*14(%r14) + st %r11,4*13(%r8) + x %r13,4*15(%r14) + st %r12,4*14(%r8) + lrvr %r0,%r0 + st %r13,4*15(%r8) + lrvr %r1,%r1 + lrvr %r2,%r2 + lrvr %r3,%r3 + lhi %r10,1 + x %r0,4*8(%r14) + al %r10,160+4*12(%r15) + x %r1,4*9(%r14) + st %r0,4*8(%r8) + x %r2,4*10(%r14) + st %r1,4*9(%r8) + x %r3,4*11(%r14) + st %r2,4*10(%r8) + st %r3,4*11(%r8) + clgr %r14,%r9 + la %r14,64(%r14) + jl .Loop_outer +.Ldone: + xgr %r0,%r0 + xgr %r1,%r1 + xgr %r2,%r2 + xgr %r3,%r3 + stmg %r0,%r3,160+4*4(%r15) + stmg %r0,%r3,160+4*12(%r15) + lmg %r6,%r15,240+6*8(%r15) + br %r14 +.align 16 +.Ltail: + la %r9,64(%r9) + stm %r0,%r7,160+4*0(%r15) + slgr %r9,%r14 + lm %r0,%r3,160+4*8+4*8(%r15) + lghi %r6,0 + stm %r10,%r13,160+4*12(%r15) + al %r0,160+4*8(%r15) + al %r1,160+4*9(%r15) + al %r2,160+4*10(%r15) + al %r3,160+4*11(%r15) + lrvr %r0,%r0 + lrvr %r1,%r1 + lrvr %r2,%r2 + lrvr %r3,%r3 + stm %r0,%r3,160+4*8(%r15) +.Loop_tail: + llgc %r4,0(%r6,%r14) + llgc %r5,160(%r6,%r15) + xr %r5,%r4 + stc %r5,0(%r6,%r8) + la %r6,1(%r6) + brct %r9,.Loop_tail + j .Ldone +.size ChaCha20_ctr32,.-ChaCha20_ctr32 +.align 32 +ChaCha20_ctr32_4x: +.LChaCha20_ctr32_4x: + stmg %r6,%r7,6*8(%r15) + lghi %r1,-224 + lgr %r0,%r15 + la %r15,0(%r1,%r15) + stg %r0,0(%r15) + std %f8,160+8*0(%r15) + std %f9,160+8*1(%r15) + std %f10,160+8*2(%r15) + std %f11,160+8*3(%r15) + std %f12,160+8*4(%r15) + std %f13,160+8*5(%r15) + std %f14,160+8*6(%r15) + std %f15,160+8*7(%r15) + larl %r7,.Lsigma + lhi %r0,10 + lhi %r1,0 + .word 0xe700,0x7000,0x0806 # vl %v16,0(%r7) + .word 0xe710,0x5000,0x0806 # vl %v17,0(%r5) + .word 0xe720,0x5010,0x0806 # vl %v18,16(%r5) + .word 0xe730,0x6000,0x0806 # vl %v19,0(%r6) + .word 0xe7f0,0x7040,0x0806 # vl %v31,0x40(%r7) + .word 0xe7c0,0x7050,0x0806 # vl %v28,0x50(%r7) + .word 0xe7a3,000000,0x2c4d # vrep %v26,%v19,0,2 + .word 0xe731,000000,0x2822 # vlvg %v19,%r1,0,2 + .word 0xe7aa,0xc000,0x2ef3 # va %v26,%v26,%v28,2 + .word 0xe703,0x7060,0x0036 # vlm %v0,%v3,0x60(%r7) + .word 0xe741,000000,0x244d # vrep %v4,%v17,0,2 + .word 0xe751,0x0001,0x244d # vrep %v5,%v17,1,2 + .word 0xe761,0x0002,0x244d # vrep %v6,%v17,2,2 + .word 0xe771,0x0003,0x244d # vrep %v7,%v17,3,2 + .word 0xe782,000000,0x244d # vrep %v8,%v18,0,2 + .word 0xe792,0x0001,0x244d # vrep %v9,%v18,1,2 + .word 0xe7a2,0x0002,0x244d # vrep %v10,%v18,2,2 + .word 0xe7b2,0x0003,0x244d # vrep %v11,%v18,3,2 + .word 0xe7ca,000000,0x0456 # vlr %v12,%v26 + .word 0xe7d3,0x0001,0x244d # vrep %v13,%v19,1,2 + .word 0xe7e3,0x0002,0x244d # vrep %v14,%v19,2,2 + .word 0xe7f3,0x0003,0x244d # vrep %v15,%v19,3,2 +.Loop_4x: + .word 0xe700,0x4000,0x20f3 # va %v0,%v0,%v4,2 + .word 0xe7cc,000000,0x006d # vx %v12,%v12,%v0 + .word 0xe7cc,0x0010,0x2033 # verll %v12,%v12,16,2 + .word 0xe711,0x5000,0x20f3 # va %v1,%v1,%v5,2 + .word 0xe7dd,0x1000,0x006d # vx %v13,%v13,%v1 + .word 0xe7dd,0x0010,0x2033 # verll %v13,%v13,16,2 + .word 0xe722,0x6000,0x20f3 # va %v2,%v2,%v6,2 + .word 0xe7ee,0x2000,0x006d # vx %v14,%v14,%v2 + .word 0xe7ee,0x0010,0x2033 # verll %v14,%v14,16,2 + .word 0xe733,0x7000,0x20f3 # va %v3,%v3,%v7,2 + .word 0xe7ff,0x3000,0x006d # vx %v15,%v15,%v3 + .word 0xe7ff,0x0010,0x2033 # verll %v15,%v15,16,2 + .word 0xe788,0xc000,0x20f3 # va %v8,%v8,%v12,2 + .word 0xe744,0x8000,0x006d # vx %v4,%v4,%v8 + .word 0xe744,0x000c,0x2033 # verll %v4,%v4,12,2 + .word 0xe799,0xd000,0x20f3 # va %v9,%v9,%v13,2 + .word 0xe755,0x9000,0x006d # vx %v5,%v5,%v9 + .word 0xe755,0x000c,0x2033 # verll %v5,%v5,12,2 + .word 0xe7aa,0xe000,0x20f3 # va %v10,%v10,%v14,2 + .word 0xe766,0xa000,0x006d # vx %v6,%v6,%v10 + .word 0xe766,0x000c,0x2033 # verll %v6,%v6,12,2 + .word 0xe7bb,0xf000,0x20f3 # va %v11,%v11,%v15,2 + .word 0xe777,0xb000,0x006d # vx %v7,%v7,%v11 + .word 0xe777,0x000c,0x2033 # verll %v7,%v7,12,2 + .word 0xe700,0x4000,0x20f3 # va %v0,%v0,%v4,2 + .word 0xe7cc,000000,0x006d # vx %v12,%v12,%v0 + .word 0xe7cc,0x0008,0x2033 # verll %v12,%v12,8,2 + .word 0xe711,0x5000,0x20f3 # va %v1,%v1,%v5,2 + .word 0xe7dd,0x1000,0x006d # vx %v13,%v13,%v1 + .word 0xe7dd,0x0008,0x2033 # verll %v13,%v13,8,2 + .word 0xe722,0x6000,0x20f3 # va %v2,%v2,%v6,2 + .word 0xe7ee,0x2000,0x006d # vx %v14,%v14,%v2 + .word 0xe7ee,0x0008,0x2033 # verll %v14,%v14,8,2 + .word 0xe733,0x7000,0x20f3 # va %v3,%v3,%v7,2 + .word 0xe7ff,0x3000,0x006d # vx %v15,%v15,%v3 + .word 0xe7ff,0x0008,0x2033 # verll %v15,%v15,8,2 + .word 0xe788,0xc000,0x20f3 # va %v8,%v8,%v12,2 + .word 0xe744,0x8000,0x006d # vx %v4,%v4,%v8 + .word 0xe744,0x0007,0x2033 # verll %v4,%v4,7,2 + .word 0xe799,0xd000,0x20f3 # va %v9,%v9,%v13,2 + .word 0xe755,0x9000,0x006d # vx %v5,%v5,%v9 + .word 0xe755,0x0007,0x2033 # verll %v5,%v5,7,2 + .word 0xe7aa,0xe000,0x20f3 # va %v10,%v10,%v14,2 + .word 0xe766,0xa000,0x006d # vx %v6,%v6,%v10 + .word 0xe766,0x0007,0x2033 # verll %v6,%v6,7,2 + .word 0xe7bb,0xf000,0x20f3 # va %v11,%v11,%v15,2 + .word 0xe777,0xb000,0x006d # vx %v7,%v7,%v11 + .word 0xe777,0x0007,0x2033 # verll %v7,%v7,7,2 + .word 0xe700,0x5000,0x20f3 # va %v0,%v0,%v5,2 + .word 0xe7ff,000000,0x006d # vx %v15,%v15,%v0 + .word 0xe7ff,0x0010,0x2033 # verll %v15,%v15,16,2 + .word 0xe711,0x6000,0x20f3 # va %v1,%v1,%v6,2 + .word 0xe7cc,0x1000,0x006d # vx %v12,%v12,%v1 + .word 0xe7cc,0x0010,0x2033 # verll %v12,%v12,16,2 + .word 0xe722,0x7000,0x20f3 # va %v2,%v2,%v7,2 + .word 0xe7dd,0x2000,0x006d # vx %v13,%v13,%v2 + .word 0xe7dd,0x0010,0x2033 # verll %v13,%v13,16,2 + .word 0xe733,0x4000,0x20f3 # va %v3,%v3,%v4,2 + .word 0xe7ee,0x3000,0x006d # vx %v14,%v14,%v3 + .word 0xe7ee,0x0010,0x2033 # verll %v14,%v14,16,2 + .word 0xe7aa,0xf000,0x20f3 # va %v10,%v10,%v15,2 + .word 0xe755,0xa000,0x006d # vx %v5,%v5,%v10 + .word 0xe755,0x000c,0x2033 # verll %v5,%v5,12,2 + .word 0xe7bb,0xc000,0x20f3 # va %v11,%v11,%v12,2 + .word 0xe766,0xb000,0x006d # vx %v6,%v6,%v11 + .word 0xe766,0x000c,0x2033 # verll %v6,%v6,12,2 + .word 0xe788,0xd000,0x20f3 # va %v8,%v8,%v13,2 + .word 0xe777,0x8000,0x006d # vx %v7,%v7,%v8 + .word 0xe777,0x000c,0x2033 # verll %v7,%v7,12,2 + .word 0xe799,0xe000,0x20f3 # va %v9,%v9,%v14,2 + .word 0xe744,0x9000,0x006d # vx %v4,%v4,%v9 + .word 0xe744,0x000c,0x2033 # verll %v4,%v4,12,2 + .word 0xe700,0x5000,0x20f3 # va %v0,%v0,%v5,2 + .word 0xe7ff,000000,0x006d # vx %v15,%v15,%v0 + .word 0xe7ff,0x0008,0x2033 # verll %v15,%v15,8,2 + .word 0xe711,0x6000,0x20f3 # va %v1,%v1,%v6,2 + .word 0xe7cc,0x1000,0x006d # vx %v12,%v12,%v1 + .word 0xe7cc,0x0008,0x2033 # verll %v12,%v12,8,2 + .word 0xe722,0x7000,0x20f3 # va %v2,%v2,%v7,2 + .word 0xe7dd,0x2000,0x006d # vx %v13,%v13,%v2 + .word 0xe7dd,0x0008,0x2033 # verll %v13,%v13,8,2 + .word 0xe733,0x4000,0x20f3 # va %v3,%v3,%v4,2 + .word 0xe7ee,0x3000,0x006d # vx %v14,%v14,%v3 + .word 0xe7ee,0x0008,0x2033 # verll %v14,%v14,8,2 + .word 0xe7aa,0xf000,0x20f3 # va %v10,%v10,%v15,2 + .word 0xe755,0xa000,0x006d # vx %v5,%v5,%v10 + .word 0xe755,0x0007,0x2033 # verll %v5,%v5,7,2 + .word 0xe7bb,0xc000,0x20f3 # va %v11,%v11,%v12,2 + .word 0xe766,0xb000,0x006d # vx %v6,%v6,%v11 + .word 0xe766,0x0007,0x2033 # verll %v6,%v6,7,2 + .word 0xe788,0xd000,0x20f3 # va %v8,%v8,%v13,2 + .word 0xe777,0x8000,0x006d # vx %v7,%v7,%v8 + .word 0xe777,0x0007,0x2033 # verll %v7,%v7,7,2 + .word 0xe799,0xe000,0x20f3 # va %v9,%v9,%v14,2 + .word 0xe744,0x9000,0x006d # vx %v4,%v4,%v9 + .word 0xe744,0x0007,0x2033 # verll %v4,%v4,7,2 + brct %r0,.Loop_4x + .word 0xe7cc,0xa000,0x22f3 # va %v12,%v12,%v26,2 + .word 0xe7b0,0x1000,0x2861 # vmrh %v27,%v0,%v1,2 + .word 0xe7c2,0x3000,0x2861 # vmrh %v28,%v2,%v3,2 + .word 0xe7d0,0x1000,0x2860 # vmrl %v29,%v0,%v1,2 + .word 0xe7e2,0x3000,0x2860 # vmrl %v30,%v2,%v3,2 + .word 0xe70b,0xc000,0x0684 # vpdi %v0,%v27,%v28,0 + .word 0xe71b,0xc000,0x5684 # vpdi %v1,%v27,%v28,5 + .word 0xe72d,0xe000,0x0684 # vpdi %v2,%v29,%v30,0 + .word 0xe73d,0xe000,0x5684 # vpdi %v3,%v29,%v30,5 + .word 0xe7b4,0x5000,0x2861 # vmrh %v27,%v4,%v5,2 + .word 0xe7c6,0x7000,0x2861 # vmrh %v28,%v6,%v7,2 + .word 0xe7d4,0x5000,0x2860 # vmrl %v29,%v4,%v5,2 + .word 0xe7e6,0x7000,0x2860 # vmrl %v30,%v6,%v7,2 + .word 0xe74b,0xc000,0x0684 # vpdi %v4,%v27,%v28,0 + .word 0xe75b,0xc000,0x5684 # vpdi %v5,%v27,%v28,5 + .word 0xe76d,0xe000,0x0684 # vpdi %v6,%v29,%v30,0 + .word 0xe77d,0xe000,0x5684 # vpdi %v7,%v29,%v30,5 + .word 0xe7b8,0x9000,0x2861 # vmrh %v27,%v8,%v9,2 + .word 0xe7ca,0xb000,0x2861 # vmrh %v28,%v10,%v11,2 + .word 0xe7d8,0x9000,0x2860 # vmrl %v29,%v8,%v9,2 + .word 0xe7ea,0xb000,0x2860 # vmrl %v30,%v10,%v11,2 + .word 0xe78b,0xc000,0x0684 # vpdi %v8,%v27,%v28,0 + .word 0xe79b,0xc000,0x5684 # vpdi %v9,%v27,%v28,5 + .word 0xe7ad,0xe000,0x0684 # vpdi %v10,%v29,%v30,0 + .word 0xe7bd,0xe000,0x5684 # vpdi %v11,%v29,%v30,5 + .word 0xe7bc,0xd000,0x2861 # vmrh %v27,%v12,%v13,2 + .word 0xe7ce,0xf000,0x2861 # vmrh %v28,%v14,%v15,2 + .word 0xe7dc,0xd000,0x2860 # vmrl %v29,%v12,%v13,2 + .word 0xe7ee,0xf000,0x2860 # vmrl %v30,%v14,%v15,2 + .word 0xe7cb,0xc000,0x0684 # vpdi %v12,%v27,%v28,0 + .word 0xe7db,0xc000,0x5684 # vpdi %v13,%v27,%v28,5 + .word 0xe7ed,0xe000,0x0684 # vpdi %v14,%v29,%v30,0 + .word 0xe7fd,0xe000,0x5684 # vpdi %v15,%v29,%v30,5 + .word 0xe700,000000,0x22f3 # va %v0,%v0,%v16,2 + .word 0xe744,0x1000,0x22f3 # va %v4,%v4,%v17,2 + .word 0xe788,0x2000,0x22f3 # va %v8,%v8,%v18,2 + .word 0xe7cc,0x3000,0x22f3 # va %v12,%v12,%v19,2 + .word 0xe700,000000,0xf18c # vperm %v0,%v0,%v0,%v31 + .word 0xe744,0x4000,0xf18c # vperm %v4,%v4,%v4,%v31 + .word 0xe788,0x8000,0xf18c # vperm %v8,%v8,%v8,%v31 + .word 0xe7cc,0xc000,0xf18c # vperm %v12,%v12,%v12,%v31 + .word 0xe7be,0x3000,0x0c36 # vlm %v27,%v30,0(%r3) + .word 0xe7bb,000000,0x0c6d # vx %v27,%v27,%v0 + .word 0xe7cc,0x4000,0x0c6d # vx %v28,%v28,%v4 + .word 0xe7dd,0x8000,0x0c6d # vx %v29,%v29,%v8 + .word 0xe7ee,0xc000,0x0c6d # vx %v30,%v30,%v12 + .word 0xe7be,0x2000,0x0c3e # vstm %v27,%v30,0(%r2) + la %r3,0x40(%r3) + la %r2,0x40(%r2) + aghi %r4,-64 + .word 0xe701,000000,0x22f3 # va %v0,%v1,%v16,2 + .word 0xe745,0x1000,0x22f3 # va %v4,%v5,%v17,2 + .word 0xe789,0x2000,0x22f3 # va %v8,%v9,%v18,2 + .word 0xe7cd,0x3000,0x22f3 # va %v12,%v13,%v19,2 + .word 0xe700,000000,0xf18c # vperm %v0,%v0,%v0,%v31 + .word 0xe744,0x4000,0xf18c # vperm %v4,%v4,%v4,%v31 + .word 0xe788,0x8000,0xf18c # vperm %v8,%v8,%v8,%v31 + .word 0xe7cc,0xc000,0xf18c # vperm %v12,%v12,%v12,%v31 + .word 0xc24e,000000,0x0040 # clgfi %r4,64 + jl .Ltail_4x + .word 0xe7be,0x3000,0x0c36 # vlm %v27,%v30,0(%r3) + .word 0xe7bb,000000,0x0c6d # vx %v27,%v27,%v0 + .word 0xe7cc,0x4000,0x0c6d # vx %v28,%v28,%v4 + .word 0xe7dd,0x8000,0x0c6d # vx %v29,%v29,%v8 + .word 0xe7ee,0xc000,0x0c6d # vx %v30,%v30,%v12 + .word 0xe7be,0x2000,0x0c3e # vstm %v27,%v30,0(%r2) + la %r3,0x40(%r3) + la %r2,0x40(%r2) + aghi %r4,-64 + je .Ldone_4x + .word 0xe702,000000,0x22f3 # va %v0,%v2,%v16,2 + .word 0xe746,0x1000,0x22f3 # va %v4,%v6,%v17,2 + .word 0xe78a,0x2000,0x22f3 # va %v8,%v10,%v18,2 + .word 0xe7ce,0x3000,0x22f3 # va %v12,%v14,%v19,2 + .word 0xe700,000000,0xf18c # vperm %v0,%v0,%v0,%v31 + .word 0xe744,0x4000,0xf18c # vperm %v4,%v4,%v4,%v31 + .word 0xe788,0x8000,0xf18c # vperm %v8,%v8,%v8,%v31 + .word 0xe7cc,0xc000,0xf18c # vperm %v12,%v12,%v12,%v31 + .word 0xc24e,000000,0x0040 # clgfi %r4,64 + jl .Ltail_4x + .word 0xe7be,0x3000,0x0c36 # vlm %v27,%v30,0(%r3) + .word 0xe7bb,000000,0x0c6d # vx %v27,%v27,%v0 + .word 0xe7cc,0x4000,0x0c6d # vx %v28,%v28,%v4 + .word 0xe7dd,0x8000,0x0c6d # vx %v29,%v29,%v8 + .word 0xe7ee,0xc000,0x0c6d # vx %v30,%v30,%v12 + .word 0xe7be,0x2000,0x0c3e # vstm %v27,%v30,0(%r2) + la %r3,0x40(%r3) + la %r2,0x40(%r2) + aghi %r4,-64 + je .Ldone_4x + .word 0xe703,000000,0x22f3 # va %v0,%v3,%v16,2 + .word 0xe747,0x1000,0x22f3 # va %v4,%v7,%v17,2 + .word 0xe78b,0x2000,0x22f3 # va %v8,%v11,%v18,2 + .word 0xe7cf,0x3000,0x22f3 # va %v12,%v15,%v19,2 + .word 0xe700,000000,0xf18c # vperm %v0,%v0,%v0,%v31 + .word 0xe744,0x4000,0xf18c # vperm %v4,%v4,%v4,%v31 + .word 0xe788,0x8000,0xf18c # vperm %v8,%v8,%v8,%v31 + .word 0xe7cc,0xc000,0xf18c # vperm %v12,%v12,%v12,%v31 + .word 0xc24e,000000,0x0040 # clgfi %r4,64 + jl .Ltail_4x + .word 0xe7be,0x3000,0x0c36 # vlm %v27,%v30,0(%r3) + .word 0xe7bb,000000,0x0c6d # vx %v27,%v27,%v0 + .word 0xe7cc,0x4000,0x0c6d # vx %v28,%v28,%v4 + .word 0xe7dd,0x8000,0x0c6d # vx %v29,%v29,%v8 + .word 0xe7ee,0xc000,0x0c6d # vx %v30,%v30,%v12 + .word 0xe7be,0x2000,0x0c3e # vstm %v27,%v30,0(%r2) +.Ldone_4x: + ld %f8,160+8*0(%r15) + ld %f9,160+8*1(%r15) + ld %f10,160+8*2(%r15) + ld %f11,160+8*3(%r15) + ld %f12,160+8*4(%r15) + ld %f13,160+8*5(%r15) + ld %f14,160+8*6(%r15) + ld %f15,160+8*7(%r15) + lmg %r6,%r7,224+6*8(%r15) + la %r15,224(%r15) + br %r14 +.align 16 +.Ltail_4x: + .word 0xe7b8,000000,0x0856 # vlr %v27,%v8 + ld %f8,160+8*0(%r15) + ld %f9,160+8*1(%r15) + ld %f10,160+8*2(%r15) + ld %f11,160+8*3(%r15) + .word 0xe7cc,000000,0x0856 # vlr %v28,%v12 + ld %f12,160+8*4(%r15) + ld %f13,160+8*5(%r15) + ld %f14,160+8*6(%r15) + ld %f15,160+8*7(%r15) + .word 0xe700,0xf0a0,0x000e # vst %v0,160+0x00(%r15) + .word 0xe740,0xf0b0,0x000e # vst %v4,160+0x10(%r15) + .word 0xe7b0,0xf0c0,0x080e # vst %v27,160+0x20(%r15) + .word 0xe7c0,0xf0d0,0x080e # vst %v28,160+0x30(%r15) + lghi %r1,0 +.Loop_tail_4x: + llgc %r5,0(%r1,%r3) + llgc %r6,160(%r1,%r15) + xr %r6,%r5 + stc %r6,0(%r1,%r2) + la %r1,1(%r1) + brct %r4,.Loop_tail_4x + lmg %r6,%r7,224+6*8(%r15) + la %r15,224(%r15) + br %r14 +.size ChaCha20_ctr32_4x,.-ChaCha20_ctr32_4x +.globl ChaCha20_ctr32_vx +.align 32 +ChaCha20_ctr32_vx: +.LChaCha20_ctr32_vx: + .word 0xc24e,000000,0x0100 # clgfi %r4,256 + jle .LChaCha20_ctr32_4x + stmg %r6,%r7,6*8(%r15) + lghi %r1,-224 + lgr %r0,%r15 + la %r15,0(%r1,%r15) + stg %r0,0(%r15) + std %f8,224-8*8(%r15) + std %f9,224-8*7(%r15) + std %f10,224-8*6(%r15) + std %f11,224-8*5(%r15) + std %f12,224-8*4(%r15) + std %f13,224-8*3(%r15) + std %f14,224-8*2(%r15) + std %f15,224-8*1(%r15) + larl %r7,.Lsigma + lhi %r0,10 + .word 0xe789,0x5000,0x0c36 # vlm %v24,%v25,0(%r5) + .word 0xe7a0,0x6000,0x0806 # vl %v26,0(%r6) + .word 0xe7bf,0x7000,0x0c36 # vlm %v27,%v31,0(%r7) +.Loop_outer_vx: + .word 0xe70b,000000,0x0456 # vlr %v0,%v27 + .word 0xe718,000000,0x0456 # vlr %v1,%v24 + .word 0xe74b,000000,0x0456 # vlr %v4,%v27 + .word 0xe758,000000,0x0456 # vlr %v5,%v24 + .word 0xe78b,000000,0x0456 # vlr %v8,%v27 + .word 0xe798,000000,0x0456 # vlr %v9,%v24 + .word 0xe7cb,000000,0x0456 # vlr %v12,%v27 + .word 0xe7d8,000000,0x0456 # vlr %v13,%v24 + .word 0xe70b,000000,0x0c56 # vlr %v16,%v27 + .word 0xe718,000000,0x0c56 # vlr %v17,%v24 + .word 0xe74b,000000,0x0c56 # vlr %v20,%v27 + .word 0xe758,000000,0x0c56 # vlr %v21,%v24 + .word 0xe73a,000000,0x0456 # vlr %v3,%v26 + .word 0xe77a,0xc000,0x26f3 # va %v7,%v26,%v28,2 + .word 0xe7ba,0xd000,0x26f3 # va %v11,%v26,%v29,2 + .word 0xe7fa,0xe000,0x26f3 # va %v15,%v26,%v30,2 + .word 0xe73b,0xd000,0x2af3 # va %v19,%v11,%v29,2 + .word 0xe77b,0xe000,0x2af3 # va %v23,%v11,%v30,2 + .word 0xe729,000000,0x0456 # vlr %v2,%v25 + .word 0xe769,000000,0x0456 # vlr %v6,%v25 + .word 0xe7a9,000000,0x0456 # vlr %v10,%v25 + .word 0xe7e9,000000,0x0456 # vlr %v14,%v25 + .word 0xe729,000000,0x0c56 # vlr %v18,%v25 + .word 0xe769,000000,0x0c56 # vlr %v22,%v25 + .word 0xe7c7,000000,0x0856 # vlr %v28,%v7 + .word 0xe7db,000000,0x0856 # vlr %v29,%v11 + .word 0xe7ef,000000,0x0856 # vlr %v30,%v15 +.align 4 +.Loop_vx: + .word 0xe700,0x1000,0x20f3 # va %v0,%v0,%v1,2 + .word 0xe744,0x5000,0x20f3 # va %v4,%v4,%v5,2 + .word 0xe788,0x9000,0x20f3 # va %v8,%v8,%v9,2 + .word 0xe7cc,0xd000,0x20f3 # va %v12,%v12,%v13,2 + .word 0xe700,0x1000,0x2ef3 # va %v16,%v16,%v17,2 + .word 0xe744,0x5000,0x2ef3 # va %v20,%v20,%v21,2 + .word 0xe733,000000,0x006d # vx %v3,%v3,%v0 + .word 0xe777,0x4000,0x006d # vx %v7,%v7,%v4 + .word 0xe7bb,0x8000,0x006d # vx %v11,%v11,%v8 + .word 0xe7ff,0xc000,0x006d # vx %v15,%v15,%v12 + .word 0xe733,000000,0x0e6d # vx %v19,%v19,%v16 + .word 0xe777,0x4000,0x0e6d # vx %v23,%v23,%v20 + .word 0xe733,0x0010,0x2033 # verll %v3,%v3,16,2 + .word 0xe777,0x0010,0x2033 # verll %v7,%v7,16,2 + .word 0xe7bb,0x0010,0x2033 # verll %v11,%v11,16,2 + .word 0xe7ff,0x0010,0x2033 # verll %v15,%v15,16,2 + .word 0xe733,0x0010,0x2c33 # verll %v19,%v19,16,2 + .word 0xe777,0x0010,0x2c33 # verll %v23,%v23,16,2 + .word 0xe722,0x3000,0x20f3 # va %v2,%v2,%v3,2 + .word 0xe766,0x7000,0x20f3 # va %v6,%v6,%v7,2 + .word 0xe7aa,0xb000,0x20f3 # va %v10,%v10,%v11,2 + .word 0xe7ee,0xf000,0x20f3 # va %v14,%v14,%v15,2 + .word 0xe722,0x3000,0x2ef3 # va %v18,%v18,%v19,2 + .word 0xe766,0x7000,0x2ef3 # va %v22,%v22,%v23,2 + .word 0xe711,0x2000,0x006d # vx %v1,%v1,%v2 + .word 0xe755,0x6000,0x006d # vx %v5,%v5,%v6 + .word 0xe799,0xa000,0x006d # vx %v9,%v9,%v10 + .word 0xe7dd,0xe000,0x006d # vx %v13,%v13,%v14 + .word 0xe711,0x2000,0x0e6d # vx %v17,%v17,%v18 + .word 0xe755,0x6000,0x0e6d # vx %v21,%v21,%v22 + .word 0xe711,0x000c,0x2033 # verll %v1,%v1,12,2 + .word 0xe755,0x000c,0x2033 # verll %v5,%v5,12,2 + .word 0xe799,0x000c,0x2033 # verll %v9,%v9,12,2 + .word 0xe7dd,0x000c,0x2033 # verll %v13,%v13,12,2 + .word 0xe711,0x000c,0x2c33 # verll %v17,%v17,12,2 + .word 0xe755,0x000c,0x2c33 # verll %v21,%v21,12,2 + .word 0xe700,0x1000,0x20f3 # va %v0,%v0,%v1,2 + .word 0xe744,0x5000,0x20f3 # va %v4,%v4,%v5,2 + .word 0xe788,0x9000,0x20f3 # va %v8,%v8,%v9,2 + .word 0xe7cc,0xd000,0x20f3 # va %v12,%v12,%v13,2 + .word 0xe700,0x1000,0x2ef3 # va %v16,%v16,%v17,2 + .word 0xe744,0x5000,0x2ef3 # va %v20,%v20,%v21,2 + .word 0xe733,000000,0x006d # vx %v3,%v3,%v0 + .word 0xe777,0x4000,0x006d # vx %v7,%v7,%v4 + .word 0xe7bb,0x8000,0x006d # vx %v11,%v11,%v8 + .word 0xe7ff,0xc000,0x006d # vx %v15,%v15,%v12 + .word 0xe733,000000,0x0e6d # vx %v19,%v19,%v16 + .word 0xe777,0x4000,0x0e6d # vx %v23,%v23,%v20 + .word 0xe733,0x0008,0x2033 # verll %v3,%v3,8,2 + .word 0xe777,0x0008,0x2033 # verll %v7,%v7,8,2 + .word 0xe7bb,0x0008,0x2033 # verll %v11,%v11,8,2 + .word 0xe7ff,0x0008,0x2033 # verll %v15,%v15,8,2 + .word 0xe733,0x0008,0x2c33 # verll %v19,%v19,8,2 + .word 0xe777,0x0008,0x2c33 # verll %v23,%v23,8,2 + .word 0xe722,0x3000,0x20f3 # va %v2,%v2,%v3,2 + .word 0xe766,0x7000,0x20f3 # va %v6,%v6,%v7,2 + .word 0xe7aa,0xb000,0x20f3 # va %v10,%v10,%v11,2 + .word 0xe7ee,0xf000,0x20f3 # va %v14,%v14,%v15,2 + .word 0xe722,0x3000,0x2ef3 # va %v18,%v18,%v19,2 + .word 0xe766,0x7000,0x2ef3 # va %v22,%v22,%v23,2 + .word 0xe711,0x2000,0x006d # vx %v1,%v1,%v2 + .word 0xe755,0x6000,0x006d # vx %v5,%v5,%v6 + .word 0xe799,0xa000,0x006d # vx %v9,%v9,%v10 + .word 0xe7dd,0xe000,0x006d # vx %v13,%v13,%v14 + .word 0xe711,0x2000,0x0e6d # vx %v17,%v17,%v18 + .word 0xe755,0x6000,0x0e6d # vx %v21,%v21,%v22 + .word 0xe711,0x0007,0x2033 # verll %v1,%v1,7,2 + .word 0xe755,0x0007,0x2033 # verll %v5,%v5,7,2 + .word 0xe799,0x0007,0x2033 # verll %v9,%v9,7,2 + .word 0xe7dd,0x0007,0x2033 # verll %v13,%v13,7,2 + .word 0xe711,0x0007,0x2c33 # verll %v17,%v17,7,2 + .word 0xe755,0x0007,0x2c33 # verll %v21,%v21,7,2 + .word 0xe722,0x2008,0x0077 # vsldb %v2,%v2,%v2,8 + .word 0xe766,0x6008,0x0077 # vsldb %v6,%v6,%v6,8 + .word 0xe7aa,0xa008,0x0077 # vsldb %v10,%v10,%v10,8 + .word 0xe7ee,0xe008,0x0077 # vsldb %v14,%v14,%v14,8 + .word 0xe722,0x2008,0x0e77 # vsldb %v18,%v18,%v18,8 + .word 0xe766,0x6008,0x0e77 # vsldb %v22,%v22,%v22,8 + .word 0xe711,0x1004,0x0077 # vsldb %v1,%v1,%v1,4 + .word 0xe755,0x5004,0x0077 # vsldb %v5,%v5,%v5,4 + .word 0xe799,0x9004,0x0077 # vsldb %v9,%v9,%v9,4 + .word 0xe7dd,0xd004,0x0077 # vsldb %v13,%v13,%v13,4 + .word 0xe711,0x1004,0x0e77 # vsldb %v17,%v17,%v17,4 + .word 0xe755,0x5004,0x0e77 # vsldb %v21,%v21,%v21,4 + .word 0xe733,0x300c,0x0077 # vsldb %v3,%v3,%v3,12 + .word 0xe777,0x700c,0x0077 # vsldb %v7,%v7,%v7,12 + .word 0xe7bb,0xb00c,0x0077 # vsldb %v11,%v11,%v11,12 + .word 0xe7ff,0xf00c,0x0077 # vsldb %v15,%v15,%v15,12 + .word 0xe733,0x300c,0x0e77 # vsldb %v19,%v19,%v19,12 + .word 0xe777,0x700c,0x0e77 # vsldb %v23,%v23,%v23,12 + .word 0xe700,0x1000,0x20f3 # va %v0,%v0,%v1,2 + .word 0xe744,0x5000,0x20f3 # va %v4,%v4,%v5,2 + .word 0xe788,0x9000,0x20f3 # va %v8,%v8,%v9,2 + .word 0xe7cc,0xd000,0x20f3 # va %v12,%v12,%v13,2 + .word 0xe700,0x1000,0x2ef3 # va %v16,%v16,%v17,2 + .word 0xe744,0x5000,0x2ef3 # va %v20,%v20,%v21,2 + .word 0xe733,000000,0x006d # vx %v3,%v3,%v0 + .word 0xe777,0x4000,0x006d # vx %v7,%v7,%v4 + .word 0xe7bb,0x8000,0x006d # vx %v11,%v11,%v8 + .word 0xe7ff,0xc000,0x006d # vx %v15,%v15,%v12 + .word 0xe733,000000,0x0e6d # vx %v19,%v19,%v16 + .word 0xe777,0x4000,0x0e6d # vx %v23,%v23,%v20 + .word 0xe733,0x0010,0x2033 # verll %v3,%v3,16,2 + .word 0xe777,0x0010,0x2033 # verll %v7,%v7,16,2 + .word 0xe7bb,0x0010,0x2033 # verll %v11,%v11,16,2 + .word 0xe7ff,0x0010,0x2033 # verll %v15,%v15,16,2 + .word 0xe733,0x0010,0x2c33 # verll %v19,%v19,16,2 + .word 0xe777,0x0010,0x2c33 # verll %v23,%v23,16,2 + .word 0xe722,0x3000,0x20f3 # va %v2,%v2,%v3,2 + .word 0xe766,0x7000,0x20f3 # va %v6,%v6,%v7,2 + .word 0xe7aa,0xb000,0x20f3 # va %v10,%v10,%v11,2 + .word 0xe7ee,0xf000,0x20f3 # va %v14,%v14,%v15,2 + .word 0xe722,0x3000,0x2ef3 # va %v18,%v18,%v19,2 + .word 0xe766,0x7000,0x2ef3 # va %v22,%v22,%v23,2 + .word 0xe711,0x2000,0x006d # vx %v1,%v1,%v2 + .word 0xe755,0x6000,0x006d # vx %v5,%v5,%v6 + .word 0xe799,0xa000,0x006d # vx %v9,%v9,%v10 + .word 0xe7dd,0xe000,0x006d # vx %v13,%v13,%v14 + .word 0xe711,0x2000,0x0e6d # vx %v17,%v17,%v18 + .word 0xe755,0x6000,0x0e6d # vx %v21,%v21,%v22 + .word 0xe711,0x000c,0x2033 # verll %v1,%v1,12,2 + .word 0xe755,0x000c,0x2033 # verll %v5,%v5,12,2 + .word 0xe799,0x000c,0x2033 # verll %v9,%v9,12,2 + .word 0xe7dd,0x000c,0x2033 # verll %v13,%v13,12,2 + .word 0xe711,0x000c,0x2c33 # verll %v17,%v17,12,2 + .word 0xe755,0x000c,0x2c33 # verll %v21,%v21,12,2 + .word 0xe700,0x1000,0x20f3 # va %v0,%v0,%v1,2 + .word 0xe744,0x5000,0x20f3 # va %v4,%v4,%v5,2 + .word 0xe788,0x9000,0x20f3 # va %v8,%v8,%v9,2 + .word 0xe7cc,0xd000,0x20f3 # va %v12,%v12,%v13,2 + .word 0xe700,0x1000,0x2ef3 # va %v16,%v16,%v17,2 + .word 0xe744,0x5000,0x2ef3 # va %v20,%v20,%v21,2 + .word 0xe733,000000,0x006d # vx %v3,%v3,%v0 + .word 0xe777,0x4000,0x006d # vx %v7,%v7,%v4 + .word 0xe7bb,0x8000,0x006d # vx %v11,%v11,%v8 + .word 0xe7ff,0xc000,0x006d # vx %v15,%v15,%v12 + .word 0xe733,000000,0x0e6d # vx %v19,%v19,%v16 + .word 0xe777,0x4000,0x0e6d # vx %v23,%v23,%v20 + .word 0xe733,0x0008,0x2033 # verll %v3,%v3,8,2 + .word 0xe777,0x0008,0x2033 # verll %v7,%v7,8,2 + .word 0xe7bb,0x0008,0x2033 # verll %v11,%v11,8,2 + .word 0xe7ff,0x0008,0x2033 # verll %v15,%v15,8,2 + .word 0xe733,0x0008,0x2c33 # verll %v19,%v19,8,2 + .word 0xe777,0x0008,0x2c33 # verll %v23,%v23,8,2 + .word 0xe722,0x3000,0x20f3 # va %v2,%v2,%v3,2 + .word 0xe766,0x7000,0x20f3 # va %v6,%v6,%v7,2 + .word 0xe7aa,0xb000,0x20f3 # va %v10,%v10,%v11,2 + .word 0xe7ee,0xf000,0x20f3 # va %v14,%v14,%v15,2 + .word 0xe722,0x3000,0x2ef3 # va %v18,%v18,%v19,2 + .word 0xe766,0x7000,0x2ef3 # va %v22,%v22,%v23,2 + .word 0xe711,0x2000,0x006d # vx %v1,%v1,%v2 + .word 0xe755,0x6000,0x006d # vx %v5,%v5,%v6 + .word 0xe799,0xa000,0x006d # vx %v9,%v9,%v10 + .word 0xe7dd,0xe000,0x006d # vx %v13,%v13,%v14 + .word 0xe711,0x2000,0x0e6d # vx %v17,%v17,%v18 + .word 0xe755,0x6000,0x0e6d # vx %v21,%v21,%v22 + .word 0xe711,0x0007,0x2033 # verll %v1,%v1,7,2 + .word 0xe755,0x0007,0x2033 # verll %v5,%v5,7,2 + .word 0xe799,0x0007,0x2033 # verll %v9,%v9,7,2 + .word 0xe7dd,0x0007,0x2033 # verll %v13,%v13,7,2 + .word 0xe711,0x0007,0x2c33 # verll %v17,%v17,7,2 + .word 0xe755,0x0007,0x2c33 # verll %v21,%v21,7,2 + .word 0xe722,0x2008,0x0077 # vsldb %v2,%v2,%v2,8 + .word 0xe766,0x6008,0x0077 # vsldb %v6,%v6,%v6,8 + .word 0xe7aa,0xa008,0x0077 # vsldb %v10,%v10,%v10,8 + .word 0xe7ee,0xe008,0x0077 # vsldb %v14,%v14,%v14,8 + .word 0xe722,0x2008,0x0e77 # vsldb %v18,%v18,%v18,8 + .word 0xe766,0x6008,0x0e77 # vsldb %v22,%v22,%v22,8 + .word 0xe711,0x100c,0x0077 # vsldb %v1,%v1,%v1,12 + .word 0xe755,0x500c,0x0077 # vsldb %v5,%v5,%v5,12 + .word 0xe799,0x900c,0x0077 # vsldb %v9,%v9,%v9,12 + .word 0xe7dd,0xd00c,0x0077 # vsldb %v13,%v13,%v13,12 + .word 0xe711,0x100c,0x0e77 # vsldb %v17,%v17,%v17,12 + .word 0xe755,0x500c,0x0e77 # vsldb %v21,%v21,%v21,12 + .word 0xe733,0x3004,0x0077 # vsldb %v3,%v3,%v3,4 + .word 0xe777,0x7004,0x0077 # vsldb %v7,%v7,%v7,4 + .word 0xe7bb,0xb004,0x0077 # vsldb %v11,%v11,%v11,4 + .word 0xe7ff,0xf004,0x0077 # vsldb %v15,%v15,%v15,4 + .word 0xe733,0x3004,0x0e77 # vsldb %v19,%v19,%v19,4 + .word 0xe777,0x7004,0x0e77 # vsldb %v23,%v23,%v23,4 + brct %r0,.Loop_vx + .word 0xe700,0xb000,0x22f3 # va %v0,%v0,%v27,2 + .word 0xe711,0x8000,0x22f3 # va %v1,%v1,%v24,2 + .word 0xe722,0x9000,0x22f3 # va %v2,%v2,%v25,2 + .word 0xe733,0xa000,0x22f3 # va %v3,%v3,%v26,2 + .word 0xe744,0xb000,0x22f3 # va %v4,%v4,%v27,2 + .word 0xe777,0xc000,0x22f3 # va %v7,%v7,%v28,2 + .word 0xe700,000000,0xf18c # vperm %v0,%v0,%v0,%v31 + .word 0xe711,0x1000,0xf18c # vperm %v1,%v1,%v1,%v31 + .word 0xe722,0x2000,0xf18c # vperm %v2,%v2,%v2,%v31 + .word 0xe733,0x3000,0xf18c # vperm %v3,%v3,%v3,%v31 + .word 0xc24e,000000,0x0040 # clgfi %r4,64 + jl .Ltail_vx + .word 0xe7bb,0xd000,0x22f3 # va %v11,%v11,%v29,2 + .word 0xe7ff,0xe000,0x22f3 # va %v15,%v15,%v30,2 + .word 0xe7be,0x3000,0x0c36 # vlm %v27,%v30,0(%r3) + .word 0xe700,0xb000,0x026d # vx %v0,%v0,%v27 + .word 0xe711,0xc000,0x026d # vx %v1,%v1,%v28 + .word 0xe722,0xd000,0x026d # vx %v2,%v2,%v29 + .word 0xe733,0xe000,0x026d # vx %v3,%v3,%v30 + .word 0xe7be,0x7000,0x0c36 # vlm %v27,%v30,0(%r7) + .word 0xe703,0x2000,0x003e # vstm %v0,%v3,0(%r2) + la %r3,0x40(%r3) + la %r2,0x40(%r2) + aghi %r4,-64 + je .Ldone_vx + .word 0xe755,0x8000,0x22f3 # va %v5,%v5,%v24,2 + .word 0xe766,0x9000,0x22f3 # va %v6,%v6,%v25,2 + .word 0xe704,0x4000,0xf18c # vperm %v0,%v4,%v4,%v31 + .word 0xe715,0x5000,0xf18c # vperm %v1,%v5,%v5,%v31 + .word 0xe726,0x6000,0xf18c # vperm %v2,%v6,%v6,%v31 + .word 0xe737,0x7000,0xf18c # vperm %v3,%v7,%v7,%v31 + .word 0xc24e,000000,0x0040 # clgfi %r4,64 + jl .Ltail_vx + .word 0xe747,0x3000,0x0036 # vlm %v4,%v7,0(%r3) + .word 0xe700,0x4000,0x006d # vx %v0,%v0,%v4 + .word 0xe711,0x5000,0x006d # vx %v1,%v1,%v5 + .word 0xe722,0x6000,0x006d # vx %v2,%v2,%v6 + .word 0xe733,0x7000,0x006d # vx %v3,%v3,%v7 + .word 0xe703,0x2000,0x003e # vstm %v0,%v3,0(%r2) + la %r3,0x40(%r3) + la %r2,0x40(%r2) + aghi %r4,-64 + je .Ldone_vx + .word 0xe788,0xb000,0x22f3 # va %v8,%v8,%v27,2 + .word 0xe799,0x8000,0x22f3 # va %v9,%v9,%v24,2 + .word 0xe7aa,0x9000,0x22f3 # va %v10,%v10,%v25,2 + .word 0xe708,0x8000,0xf18c # vperm %v0,%v8,%v8,%v31 + .word 0xe719,0x9000,0xf18c # vperm %v1,%v9,%v9,%v31 + .word 0xe72a,0xa000,0xf18c # vperm %v2,%v10,%v10,%v31 + .word 0xe73b,0xb000,0xf18c # vperm %v3,%v11,%v11,%v31 + .word 0xc24e,000000,0x0040 # clgfi %r4,64 + jl .Ltail_vx + .word 0xe747,0x3000,0x0036 # vlm %v4,%v7,0(%r3) + .word 0xe700,0x4000,0x006d # vx %v0,%v0,%v4 + .word 0xe711,0x5000,0x006d # vx %v1,%v1,%v5 + .word 0xe722,0x6000,0x006d # vx %v2,%v2,%v6 + .word 0xe733,0x7000,0x006d # vx %v3,%v3,%v7 + .word 0xe703,0x2000,0x003e # vstm %v0,%v3,0(%r2) + la %r3,0x40(%r3) + la %r2,0x40(%r2) + aghi %r4,-64 + je .Ldone_vx + .word 0xe7cc,0xb000,0x22f3 # va %v12,%v12,%v27,2 + .word 0xe7dd,0x8000,0x22f3 # va %v13,%v13,%v24,2 + .word 0xe7ee,0x9000,0x22f3 # va %v14,%v14,%v25,2 + .word 0xe7ba,0xe000,0x26f3 # va %v11,%v26,%v30,2 + .word 0xe70c,0xc000,0xf18c # vperm %v0,%v12,%v12,%v31 + .word 0xe71d,0xd000,0xf18c # vperm %v1,%v13,%v13,%v31 + .word 0xe72e,0xe000,0xf18c # vperm %v2,%v14,%v14,%v31 + .word 0xe73f,0xf000,0xf18c # vperm %v3,%v15,%v15,%v31 + .word 0xc24e,000000,0x0040 # clgfi %r4,64 + jl .Ltail_vx + .word 0xe7fb,0xc000,0x22f3 # va %v15,%v11,%v28,2 + .word 0xe747,0x3000,0x0036 # vlm %v4,%v7,0(%r3) + .word 0xe700,0x4000,0x006d # vx %v0,%v0,%v4 + .word 0xe711,0x5000,0x006d # vx %v1,%v1,%v5 + .word 0xe722,0x6000,0x006d # vx %v2,%v2,%v6 + .word 0xe733,0x7000,0x006d # vx %v3,%v3,%v7 + .word 0xe703,0x2000,0x003e # vstm %v0,%v3,0(%r2) + la %r3,0x40(%r3) + la %r2,0x40(%r2) + aghi %r4,-64 + je .Ldone_vx + .word 0xe700,0xb000,0x2ef3 # va %v16,%v16,%v27,2 + .word 0xe711,0x8000,0x2ef3 # va %v17,%v17,%v24,2 + .word 0xe722,0x9000,0x2ef3 # va %v18,%v18,%v25,2 + .word 0xe733,0xf000,0x2cf3 # va %v19,%v19,%v15,2 + .word 0xe7ff,0xc000,0x22f3 # va %v15,%v15,%v28,2 + .word 0xe7ab,0xe000,0x2af3 # va %v26,%v11,%v30,2 + .word 0xe700,000000,0xf78c # vperm %v0,%v16,%v16,%v31 + .word 0xe711,0x1000,0xf78c # vperm %v1,%v17,%v17,%v31 + .word 0xe722,0x2000,0xf78c # vperm %v2,%v18,%v18,%v31 + .word 0xe733,0x3000,0xf78c # vperm %v3,%v19,%v19,%v31 + .word 0xc24e,000000,0x0040 # clgfi %r4,64 + jl .Ltail_vx + .word 0xe747,0x3000,0x0036 # vlm %v4,%v7,0(%r3) + .word 0xe700,0x4000,0x006d # vx %v0,%v0,%v4 + .word 0xe711,0x5000,0x006d # vx %v1,%v1,%v5 + .word 0xe722,0x6000,0x006d # vx %v2,%v2,%v6 + .word 0xe733,0x7000,0x006d # vx %v3,%v3,%v7 + .word 0xe703,0x2000,0x003e # vstm %v0,%v3,0(%r2) + la %r3,0x40(%r3) + la %r2,0x40(%r2) + aghi %r4,-64 + je .Ldone_vx + .word 0xe744,0xb000,0x2ef3 # va %v20,%v20,%v27,2 + .word 0xe755,0x8000,0x2ef3 # va %v21,%v21,%v24,2 + .word 0xe766,0x9000,0x2ef3 # va %v22,%v22,%v25,2 + .word 0xe777,0xf000,0x2cf3 # va %v23,%v23,%v15,2 + .word 0xe704,0x4000,0xf78c # vperm %v0,%v20,%v20,%v31 + .word 0xe715,0x5000,0xf78c # vperm %v1,%v21,%v21,%v31 + .word 0xe726,0x6000,0xf78c # vperm %v2,%v22,%v22,%v31 + .word 0xe737,0x7000,0xf78c # vperm %v3,%v23,%v23,%v31 + .word 0xc24e,000000,0x0040 # clgfi %r4,64 + jl .Ltail_vx + .word 0xe747,0x3000,0x0036 # vlm %v4,%v7,0(%r3) + .word 0xe700,0x4000,0x006d # vx %v0,%v0,%v4 + .word 0xe711,0x5000,0x006d # vx %v1,%v1,%v5 + .word 0xe722,0x6000,0x006d # vx %v2,%v2,%v6 + .word 0xe733,0x7000,0x006d # vx %v3,%v3,%v7 + .word 0xe703,0x2000,0x003e # vstm %v0,%v3,0(%r2) + la %r3,0x40(%r3) + la %r2,0x40(%r2) + lhi %r0,10 + aghi %r4,-64 + jne .Loop_outer_vx +.Ldone_vx: + ld %f8,224-8*8(%r15) + ld %f9,224-8*7(%r15) + ld %f10,224-8*6(%r15) + ld %f11,224-8*5(%r15) + ld %f12,224-8*4(%r15) + ld %f13,224-8*3(%r15) + ld %f14,224-8*2(%r15) + ld %f15,224-8*1(%r15) + lmg %r6,%r7,224+6*8(%r15) + la %r15,224(%r15) + br %r14 +.align 16 +.Ltail_vx: + ld %f8,224-8*8(%r15) + ld %f9,224-8*7(%r15) + ld %f10,224-8*6(%r15) + ld %f11,224-8*5(%r15) + ld %f12,224-8*4(%r15) + ld %f13,224-8*3(%r15) + ld %f14,224-8*2(%r15) + ld %f15,224-8*1(%r15) + .word 0xe703,0xf0a0,0x003e # vstm %v0,%v3,160(%r15) + lghi %r1,0 +.Loop_tail_vx: + llgc %r5,0(%r1,%r3) + llgc %r6,160(%r1,%r15) + xr %r6,%r5 + stc %r6,0(%r1,%r2) + la %r1,1(%r1) + brct %r4,.Loop_tail_vx + lmg %r6,%r7,224+6*8(%r15) + la %r15,224(%r15) + br %r14 +.size ChaCha20_ctr32_vx,.-ChaCha20_ctr32_vx +.align 32 +.Lsigma: +.long 1634760805,857760878,2036477234,1797285236 +.long 1,0,0,0 +.long 2,0,0,0 +.long 3,0,0,0 +.long 50462976,117835012,185207048,252579084 +.long 0,1,2,3 +.long 1634760805,1634760805,1634760805,1634760805 +.long 857760878,857760878,857760878,857760878 +.long 2036477234,2036477234,2036477234,2036477234 +.long 1797285236,1797285236,1797285236,1797285236 +.asciz "ChaCha20 for s390x, CRYPTOGAMS by " +.align 4 diff --git a/contrib/openssl-cmake/asm/crypto/chacha/chacha-x86_64.s b/contrib/openssl-cmake/asm/crypto/chacha/chacha-x86_64.s new file mode 100644 index 000000000000..b692d97aad77 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/chacha/chacha-x86_64.s @@ -0,0 +1,3430 @@ +.text + + + +.section .rodata +.align 64 +.Lzero: +.long 0,0,0,0 +.Lone: +.long 1,0,0,0 +.Linc: +.long 0,1,2,3 +.Lfour: +.long 4,4,4,4 +.Lincy: +.long 0,2,4,6,1,3,5,7 +.Leight: +.long 8,8,8,8,8,8,8,8 +.Lrot16: +.byte 0x2,0x3,0x0,0x1, 0x6,0x7,0x4,0x5, 0xa,0xb,0x8,0x9, 0xe,0xf,0xc,0xd +.Lrot24: +.byte 0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe +.Ltwoy: +.long 2,0,0,0, 2,0,0,0 +.align 64 +.Lzeroz: +.long 0,0,0,0, 1,0,0,0, 2,0,0,0, 3,0,0,0 +.Lfourz: +.long 4,0,0,0, 4,0,0,0, 4,0,0,0, 4,0,0,0 +.Lincz: +.long 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 +.Lsixteen: +.long 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16 +.Lsigma: +.byte 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107,0 +.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.previous +.globl ChaCha20_ctr32 +.type ChaCha20_ctr32,@function +.align 64 +ChaCha20_ctr32: +.cfi_startproc + cmpq $0,%rdx + je .Lno_data + movq OPENSSL_ia32cap_P+4(%rip),%r10 + btq $48,%r10 + jc .LChaCha20_avx512 + testq %r10,%r10 + js .LChaCha20_avx512vl + testl $512,%r10d + jnz .LChaCha20_ssse3 + + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $64+24,%rsp +.cfi_adjust_cfa_offset 64+24 +.Lctr32_body: + + + movdqu (%rcx),%xmm1 + movdqu 16(%rcx),%xmm2 + movdqu (%r8),%xmm3 + movdqa .Lone(%rip),%xmm4 + + + movdqa %xmm1,16(%rsp) + movdqa %xmm2,32(%rsp) + movdqa %xmm3,48(%rsp) + movq %rdx,%rbp + jmp .Loop_outer + +.align 32 +.Loop_outer: + movl $0x61707865,%eax + movl $0x3320646e,%ebx + movl $0x79622d32,%ecx + movl $0x6b206574,%edx + movl 16(%rsp),%r8d + movl 20(%rsp),%r9d + movl 24(%rsp),%r10d + movl 28(%rsp),%r11d + movd %xmm3,%r12d + movl 52(%rsp),%r13d + movl 56(%rsp),%r14d + movl 60(%rsp),%r15d + + movq %rbp,64+0(%rsp) + movl $10,%ebp + movq %rsi,64+8(%rsp) +.byte 102,72,15,126,214 + movq %rdi,64+16(%rsp) + movq %rsi,%rdi + shrq $32,%rdi + jmp .Loop + +.align 32 +.Loop: + addl %r8d,%eax + xorl %eax,%r12d + roll $16,%r12d + addl %r9d,%ebx + xorl %ebx,%r13d + roll $16,%r13d + addl %r12d,%esi + xorl %esi,%r8d + roll $12,%r8d + addl %r13d,%edi + xorl %edi,%r9d + roll $12,%r9d + addl %r8d,%eax + xorl %eax,%r12d + roll $8,%r12d + addl %r9d,%ebx + xorl %ebx,%r13d + roll $8,%r13d + addl %r12d,%esi + xorl %esi,%r8d + roll $7,%r8d + addl %r13d,%edi + xorl %edi,%r9d + roll $7,%r9d + movl %esi,32(%rsp) + movl %edi,36(%rsp) + movl 40(%rsp),%esi + movl 44(%rsp),%edi + addl %r10d,%ecx + xorl %ecx,%r14d + roll $16,%r14d + addl %r11d,%edx + xorl %edx,%r15d + roll $16,%r15d + addl %r14d,%esi + xorl %esi,%r10d + roll $12,%r10d + addl %r15d,%edi + xorl %edi,%r11d + roll $12,%r11d + addl %r10d,%ecx + xorl %ecx,%r14d + roll $8,%r14d + addl %r11d,%edx + xorl %edx,%r15d + roll $8,%r15d + addl %r14d,%esi + xorl %esi,%r10d + roll $7,%r10d + addl %r15d,%edi + xorl %edi,%r11d + roll $7,%r11d + addl %r9d,%eax + xorl %eax,%r15d + roll $16,%r15d + addl %r10d,%ebx + xorl %ebx,%r12d + roll $16,%r12d + addl %r15d,%esi + xorl %esi,%r9d + roll $12,%r9d + addl %r12d,%edi + xorl %edi,%r10d + roll $12,%r10d + addl %r9d,%eax + xorl %eax,%r15d + roll $8,%r15d + addl %r10d,%ebx + xorl %ebx,%r12d + roll $8,%r12d + addl %r15d,%esi + xorl %esi,%r9d + roll $7,%r9d + addl %r12d,%edi + xorl %edi,%r10d + roll $7,%r10d + movl %esi,40(%rsp) + movl %edi,44(%rsp) + movl 32(%rsp),%esi + movl 36(%rsp),%edi + addl %r11d,%ecx + xorl %ecx,%r13d + roll $16,%r13d + addl %r8d,%edx + xorl %edx,%r14d + roll $16,%r14d + addl %r13d,%esi + xorl %esi,%r11d + roll $12,%r11d + addl %r14d,%edi + xorl %edi,%r8d + roll $12,%r8d + addl %r11d,%ecx + xorl %ecx,%r13d + roll $8,%r13d + addl %r8d,%edx + xorl %edx,%r14d + roll $8,%r14d + addl %r13d,%esi + xorl %esi,%r11d + roll $7,%r11d + addl %r14d,%edi + xorl %edi,%r8d + roll $7,%r8d + decl %ebp + jnz .Loop + movl %edi,36(%rsp) + movl %esi,32(%rsp) + movq 64(%rsp),%rbp + movdqa %xmm2,%xmm1 + movq 64+8(%rsp),%rsi + paddd %xmm4,%xmm3 + movq 64+16(%rsp),%rdi + + addl $0x61707865,%eax + addl $0x3320646e,%ebx + addl $0x79622d32,%ecx + addl $0x6b206574,%edx + addl 16(%rsp),%r8d + addl 20(%rsp),%r9d + addl 24(%rsp),%r10d + addl 28(%rsp),%r11d + addl 48(%rsp),%r12d + addl 52(%rsp),%r13d + addl 56(%rsp),%r14d + addl 60(%rsp),%r15d + paddd 32(%rsp),%xmm1 + + cmpq $64,%rbp + jb .Ltail + + xorl 0(%rsi),%eax + xorl 4(%rsi),%ebx + xorl 8(%rsi),%ecx + xorl 12(%rsi),%edx + xorl 16(%rsi),%r8d + xorl 20(%rsi),%r9d + xorl 24(%rsi),%r10d + xorl 28(%rsi),%r11d + movdqu 32(%rsi),%xmm0 + xorl 48(%rsi),%r12d + xorl 52(%rsi),%r13d + xorl 56(%rsi),%r14d + xorl 60(%rsi),%r15d + leaq 64(%rsi),%rsi + pxor %xmm1,%xmm0 + + movdqa %xmm2,32(%rsp) + movd %xmm3,48(%rsp) + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + movdqu %xmm0,32(%rdi) + movl %r12d,48(%rdi) + movl %r13d,52(%rdi) + movl %r14d,56(%rdi) + movl %r15d,60(%rdi) + leaq 64(%rdi),%rdi + + subq $64,%rbp + jnz .Loop_outer + + jmp .Ldone + +.align 16 +.Ltail: + movl %eax,0(%rsp) + movl %ebx,4(%rsp) + xorq %rbx,%rbx + movl %ecx,8(%rsp) + movl %edx,12(%rsp) + movl %r8d,16(%rsp) + movl %r9d,20(%rsp) + movl %r10d,24(%rsp) + movl %r11d,28(%rsp) + movdqa %xmm1,32(%rsp) + movl %r12d,48(%rsp) + movl %r13d,52(%rsp) + movl %r14d,56(%rsp) + movl %r15d,60(%rsp) + +.Loop_tail: + movzbl (%rsi,%rbx,1),%eax + movzbl (%rsp,%rbx,1),%edx + leaq 1(%rbx),%rbx + xorl %edx,%eax + movb %al,-1(%rdi,%rbx,1) + decq %rbp + jnz .Loop_tail + +.Ldone: + leaq 64+24+48(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lno_data: + .byte 0xf3,0xc3 +.cfi_endproc +.size ChaCha20_ctr32,.-ChaCha20_ctr32 +.type ChaCha20_ssse3,@function +.align 32 +ChaCha20_ssse3: +.cfi_startproc +.LChaCha20_ssse3: + movq %rsp,%r9 +.cfi_def_cfa_register %r9 + testl $2048,%r10d + jnz .LChaCha20_4xop + cmpq $128,%rdx + je .LChaCha20_128 + ja .LChaCha20_4x + +.Ldo_sse3_after_all: + subq $64+8,%rsp + movdqa .Lsigma(%rip),%xmm0 + movdqu (%rcx),%xmm1 + movdqu 16(%rcx),%xmm2 + movdqu (%r8),%xmm3 + movdqa .Lrot16(%rip),%xmm6 + movdqa .Lrot24(%rip),%xmm7 + + movdqa %xmm0,0(%rsp) + movdqa %xmm1,16(%rsp) + movdqa %xmm2,32(%rsp) + movdqa %xmm3,48(%rsp) + movq $10,%r8 + jmp .Loop_ssse3 + +.align 32 +.Loop_outer_ssse3: + movdqa .Lone(%rip),%xmm3 + movdqa 0(%rsp),%xmm0 + movdqa 16(%rsp),%xmm1 + movdqa 32(%rsp),%xmm2 + paddd 48(%rsp),%xmm3 + movq $10,%r8 + movdqa %xmm3,48(%rsp) + jmp .Loop_ssse3 + +.align 32 +.Loop_ssse3: + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,222 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm4 + psrld $20,%xmm1 + pslld $12,%xmm4 + por %xmm4,%xmm1 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,223 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm4 + psrld $25,%xmm1 + pslld $7,%xmm4 + por %xmm4,%xmm1 + pshufd $78,%xmm2,%xmm2 + pshufd $57,%xmm1,%xmm1 + pshufd $147,%xmm3,%xmm3 + nop + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,222 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm4 + psrld $20,%xmm1 + pslld $12,%xmm4 + por %xmm4,%xmm1 + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,223 + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm4 + psrld $25,%xmm1 + pslld $7,%xmm4 + por %xmm4,%xmm1 + pshufd $78,%xmm2,%xmm2 + pshufd $147,%xmm1,%xmm1 + pshufd $57,%xmm3,%xmm3 + decq %r8 + jnz .Loop_ssse3 + paddd 0(%rsp),%xmm0 + paddd 16(%rsp),%xmm1 + paddd 32(%rsp),%xmm2 + paddd 48(%rsp),%xmm3 + + cmpq $64,%rdx + jb .Ltail_ssse3 + + movdqu 0(%rsi),%xmm4 + movdqu 16(%rsi),%xmm5 + pxor %xmm4,%xmm0 + movdqu 32(%rsi),%xmm4 + pxor %xmm5,%xmm1 + movdqu 48(%rsi),%xmm5 + leaq 64(%rsi),%rsi + pxor %xmm4,%xmm2 + pxor %xmm5,%xmm3 + + movdqu %xmm0,0(%rdi) + movdqu %xmm1,16(%rdi) + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + leaq 64(%rdi),%rdi + + subq $64,%rdx + jnz .Loop_outer_ssse3 + + jmp .Ldone_ssse3 + +.align 16 +.Ltail_ssse3: + movdqa %xmm0,0(%rsp) + movdqa %xmm1,16(%rsp) + movdqa %xmm2,32(%rsp) + movdqa %xmm3,48(%rsp) + xorq %r8,%r8 + +.Loop_tail_ssse3: + movzbl (%rsi,%r8,1),%eax + movzbl (%rsp,%r8,1),%ecx + leaq 1(%r8),%r8 + xorl %ecx,%eax + movb %al,-1(%rdi,%r8,1) + decq %rdx + jnz .Loop_tail_ssse3 + +.Ldone_ssse3: + leaq (%r9),%rsp +.cfi_def_cfa_register %rsp +.Lssse3_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ChaCha20_ssse3,.-ChaCha20_ssse3 +.type ChaCha20_128,@function +.align 32 +ChaCha20_128: +.cfi_startproc +.LChaCha20_128: + movq %rsp,%r9 +.cfi_def_cfa_register %r9 + subq $64+8,%rsp + movdqa .Lsigma(%rip),%xmm8 + movdqu (%rcx),%xmm9 + movdqu 16(%rcx),%xmm2 + movdqu (%r8),%xmm3 + movdqa .Lone(%rip),%xmm1 + movdqa .Lrot16(%rip),%xmm6 + movdqa .Lrot24(%rip),%xmm7 + + movdqa %xmm8,%xmm10 + movdqa %xmm8,0(%rsp) + movdqa %xmm9,%xmm11 + movdqa %xmm9,16(%rsp) + movdqa %xmm2,%xmm0 + movdqa %xmm2,32(%rsp) + paddd %xmm3,%xmm1 + movdqa %xmm3,48(%rsp) + movq $10,%r8 + jmp .Loop_128 + +.align 32 +.Loop_128: + paddd %xmm9,%xmm8 + pxor %xmm8,%xmm3 + paddd %xmm11,%xmm10 + pxor %xmm10,%xmm1 +.byte 102,15,56,0,222 +.byte 102,15,56,0,206 + paddd %xmm3,%xmm2 + paddd %xmm1,%xmm0 + pxor %xmm2,%xmm9 + pxor %xmm0,%xmm11 + movdqa %xmm9,%xmm4 + psrld $20,%xmm9 + movdqa %xmm11,%xmm5 + pslld $12,%xmm4 + psrld $20,%xmm11 + por %xmm4,%xmm9 + pslld $12,%xmm5 + por %xmm5,%xmm11 + paddd %xmm9,%xmm8 + pxor %xmm8,%xmm3 + paddd %xmm11,%xmm10 + pxor %xmm10,%xmm1 +.byte 102,15,56,0,223 +.byte 102,15,56,0,207 + paddd %xmm3,%xmm2 + paddd %xmm1,%xmm0 + pxor %xmm2,%xmm9 + pxor %xmm0,%xmm11 + movdqa %xmm9,%xmm4 + psrld $25,%xmm9 + movdqa %xmm11,%xmm5 + pslld $7,%xmm4 + psrld $25,%xmm11 + por %xmm4,%xmm9 + pslld $7,%xmm5 + por %xmm5,%xmm11 + pshufd $78,%xmm2,%xmm2 + pshufd $57,%xmm9,%xmm9 + pshufd $147,%xmm3,%xmm3 + pshufd $78,%xmm0,%xmm0 + pshufd $57,%xmm11,%xmm11 + pshufd $147,%xmm1,%xmm1 + paddd %xmm9,%xmm8 + pxor %xmm8,%xmm3 + paddd %xmm11,%xmm10 + pxor %xmm10,%xmm1 +.byte 102,15,56,0,222 +.byte 102,15,56,0,206 + paddd %xmm3,%xmm2 + paddd %xmm1,%xmm0 + pxor %xmm2,%xmm9 + pxor %xmm0,%xmm11 + movdqa %xmm9,%xmm4 + psrld $20,%xmm9 + movdqa %xmm11,%xmm5 + pslld $12,%xmm4 + psrld $20,%xmm11 + por %xmm4,%xmm9 + pslld $12,%xmm5 + por %xmm5,%xmm11 + paddd %xmm9,%xmm8 + pxor %xmm8,%xmm3 + paddd %xmm11,%xmm10 + pxor %xmm10,%xmm1 +.byte 102,15,56,0,223 +.byte 102,15,56,0,207 + paddd %xmm3,%xmm2 + paddd %xmm1,%xmm0 + pxor %xmm2,%xmm9 + pxor %xmm0,%xmm11 + movdqa %xmm9,%xmm4 + psrld $25,%xmm9 + movdqa %xmm11,%xmm5 + pslld $7,%xmm4 + psrld $25,%xmm11 + por %xmm4,%xmm9 + pslld $7,%xmm5 + por %xmm5,%xmm11 + pshufd $78,%xmm2,%xmm2 + pshufd $147,%xmm9,%xmm9 + pshufd $57,%xmm3,%xmm3 + pshufd $78,%xmm0,%xmm0 + pshufd $147,%xmm11,%xmm11 + pshufd $57,%xmm1,%xmm1 + decq %r8 + jnz .Loop_128 + paddd 0(%rsp),%xmm8 + paddd 16(%rsp),%xmm9 + paddd 32(%rsp),%xmm2 + paddd 48(%rsp),%xmm3 + paddd .Lone(%rip),%xmm1 + paddd 0(%rsp),%xmm10 + paddd 16(%rsp),%xmm11 + paddd 32(%rsp),%xmm0 + paddd 48(%rsp),%xmm1 + + movdqu 0(%rsi),%xmm4 + movdqu 16(%rsi),%xmm5 + pxor %xmm4,%xmm8 + movdqu 32(%rsi),%xmm4 + pxor %xmm5,%xmm9 + movdqu 48(%rsi),%xmm5 + pxor %xmm4,%xmm2 + movdqu 64(%rsi),%xmm4 + pxor %xmm5,%xmm3 + movdqu 80(%rsi),%xmm5 + pxor %xmm4,%xmm10 + movdqu 96(%rsi),%xmm4 + pxor %xmm5,%xmm11 + movdqu 112(%rsi),%xmm5 + pxor %xmm4,%xmm0 + pxor %xmm5,%xmm1 + + movdqu %xmm8,0(%rdi) + movdqu %xmm9,16(%rdi) + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + movdqu %xmm10,64(%rdi) + movdqu %xmm11,80(%rdi) + movdqu %xmm0,96(%rdi) + movdqu %xmm1,112(%rdi) + leaq (%r9),%rsp +.cfi_def_cfa_register %rsp +.L128_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ChaCha20_128,.-ChaCha20_128 +.type ChaCha20_4x,@function +.align 32 +ChaCha20_4x: +.cfi_startproc +.LChaCha20_4x: + movq %rsp,%r9 +.cfi_def_cfa_register %r9 + movq %r10,%r11 + shrq $32,%r10 + testq $32,%r10 + jnz .LChaCha20_8x + cmpq $192,%rdx + ja .Lproceed4x + + andq $71303168,%r11 + cmpq $4194304,%r11 + je .Ldo_sse3_after_all + +.Lproceed4x: + subq $0x140+8,%rsp + movdqa .Lsigma(%rip),%xmm11 + movdqu (%rcx),%xmm15 + movdqu 16(%rcx),%xmm7 + movdqu (%r8),%xmm3 + leaq 256(%rsp),%rcx + leaq .Lrot16(%rip),%r10 + leaq .Lrot24(%rip),%r11 + + pshufd $0x00,%xmm11,%xmm8 + pshufd $0x55,%xmm11,%xmm9 + movdqa %xmm8,64(%rsp) + pshufd $0xaa,%xmm11,%xmm10 + movdqa %xmm9,80(%rsp) + pshufd $0xff,%xmm11,%xmm11 + movdqa %xmm10,96(%rsp) + movdqa %xmm11,112(%rsp) + + pshufd $0x00,%xmm15,%xmm12 + pshufd $0x55,%xmm15,%xmm13 + movdqa %xmm12,128-256(%rcx) + pshufd $0xaa,%xmm15,%xmm14 + movdqa %xmm13,144-256(%rcx) + pshufd $0xff,%xmm15,%xmm15 + movdqa %xmm14,160-256(%rcx) + movdqa %xmm15,176-256(%rcx) + + pshufd $0x00,%xmm7,%xmm4 + pshufd $0x55,%xmm7,%xmm5 + movdqa %xmm4,192-256(%rcx) + pshufd $0xaa,%xmm7,%xmm6 + movdqa %xmm5,208-256(%rcx) + pshufd $0xff,%xmm7,%xmm7 + movdqa %xmm6,224-256(%rcx) + movdqa %xmm7,240-256(%rcx) + + pshufd $0x00,%xmm3,%xmm0 + pshufd $0x55,%xmm3,%xmm1 + paddd .Linc(%rip),%xmm0 + pshufd $0xaa,%xmm3,%xmm2 + movdqa %xmm1,272-256(%rcx) + pshufd $0xff,%xmm3,%xmm3 + movdqa %xmm2,288-256(%rcx) + movdqa %xmm3,304-256(%rcx) + + jmp .Loop_enter4x + +.align 32 +.Loop_outer4x: + movdqa 64(%rsp),%xmm8 + movdqa 80(%rsp),%xmm9 + movdqa 96(%rsp),%xmm10 + movdqa 112(%rsp),%xmm11 + movdqa 128-256(%rcx),%xmm12 + movdqa 144-256(%rcx),%xmm13 + movdqa 160-256(%rcx),%xmm14 + movdqa 176-256(%rcx),%xmm15 + movdqa 192-256(%rcx),%xmm4 + movdqa 208-256(%rcx),%xmm5 + movdqa 224-256(%rcx),%xmm6 + movdqa 240-256(%rcx),%xmm7 + movdqa 256-256(%rcx),%xmm0 + movdqa 272-256(%rcx),%xmm1 + movdqa 288-256(%rcx),%xmm2 + movdqa 304-256(%rcx),%xmm3 + paddd .Lfour(%rip),%xmm0 + +.Loop_enter4x: + movdqa %xmm6,32(%rsp) + movdqa %xmm7,48(%rsp) + movdqa (%r10),%xmm7 + movl $10,%eax + movdqa %xmm0,256-256(%rcx) + jmp .Loop4x + +.align 32 +.Loop4x: + paddd %xmm12,%xmm8 + paddd %xmm13,%xmm9 + pxor %xmm8,%xmm0 + pxor %xmm9,%xmm1 +.byte 102,15,56,0,199 +.byte 102,15,56,0,207 + paddd %xmm0,%xmm4 + paddd %xmm1,%xmm5 + pxor %xmm4,%xmm12 + pxor %xmm5,%xmm13 + movdqa %xmm12,%xmm6 + pslld $12,%xmm12 + psrld $20,%xmm6 + movdqa %xmm13,%xmm7 + pslld $12,%xmm13 + por %xmm6,%xmm12 + psrld $20,%xmm7 + movdqa (%r11),%xmm6 + por %xmm7,%xmm13 + paddd %xmm12,%xmm8 + paddd %xmm13,%xmm9 + pxor %xmm8,%xmm0 + pxor %xmm9,%xmm1 +.byte 102,15,56,0,198 +.byte 102,15,56,0,206 + paddd %xmm0,%xmm4 + paddd %xmm1,%xmm5 + pxor %xmm4,%xmm12 + pxor %xmm5,%xmm13 + movdqa %xmm12,%xmm7 + pslld $7,%xmm12 + psrld $25,%xmm7 + movdqa %xmm13,%xmm6 + pslld $7,%xmm13 + por %xmm7,%xmm12 + psrld $25,%xmm6 + movdqa (%r10),%xmm7 + por %xmm6,%xmm13 + movdqa %xmm4,0(%rsp) + movdqa %xmm5,16(%rsp) + movdqa 32(%rsp),%xmm4 + movdqa 48(%rsp),%xmm5 + paddd %xmm14,%xmm10 + paddd %xmm15,%xmm11 + pxor %xmm10,%xmm2 + pxor %xmm11,%xmm3 +.byte 102,15,56,0,215 +.byte 102,15,56,0,223 + paddd %xmm2,%xmm4 + paddd %xmm3,%xmm5 + pxor %xmm4,%xmm14 + pxor %xmm5,%xmm15 + movdqa %xmm14,%xmm6 + pslld $12,%xmm14 + psrld $20,%xmm6 + movdqa %xmm15,%xmm7 + pslld $12,%xmm15 + por %xmm6,%xmm14 + psrld $20,%xmm7 + movdqa (%r11),%xmm6 + por %xmm7,%xmm15 + paddd %xmm14,%xmm10 + paddd %xmm15,%xmm11 + pxor %xmm10,%xmm2 + pxor %xmm11,%xmm3 +.byte 102,15,56,0,214 +.byte 102,15,56,0,222 + paddd %xmm2,%xmm4 + paddd %xmm3,%xmm5 + pxor %xmm4,%xmm14 + pxor %xmm5,%xmm15 + movdqa %xmm14,%xmm7 + pslld $7,%xmm14 + psrld $25,%xmm7 + movdqa %xmm15,%xmm6 + pslld $7,%xmm15 + por %xmm7,%xmm14 + psrld $25,%xmm6 + movdqa (%r10),%xmm7 + por %xmm6,%xmm15 + paddd %xmm13,%xmm8 + paddd %xmm14,%xmm9 + pxor %xmm8,%xmm3 + pxor %xmm9,%xmm0 +.byte 102,15,56,0,223 +.byte 102,15,56,0,199 + paddd %xmm3,%xmm4 + paddd %xmm0,%xmm5 + pxor %xmm4,%xmm13 + pxor %xmm5,%xmm14 + movdqa %xmm13,%xmm6 + pslld $12,%xmm13 + psrld $20,%xmm6 + movdqa %xmm14,%xmm7 + pslld $12,%xmm14 + por %xmm6,%xmm13 + psrld $20,%xmm7 + movdqa (%r11),%xmm6 + por %xmm7,%xmm14 + paddd %xmm13,%xmm8 + paddd %xmm14,%xmm9 + pxor %xmm8,%xmm3 + pxor %xmm9,%xmm0 +.byte 102,15,56,0,222 +.byte 102,15,56,0,198 + paddd %xmm3,%xmm4 + paddd %xmm0,%xmm5 + pxor %xmm4,%xmm13 + pxor %xmm5,%xmm14 + movdqa %xmm13,%xmm7 + pslld $7,%xmm13 + psrld $25,%xmm7 + movdqa %xmm14,%xmm6 + pslld $7,%xmm14 + por %xmm7,%xmm13 + psrld $25,%xmm6 + movdqa (%r10),%xmm7 + por %xmm6,%xmm14 + movdqa %xmm4,32(%rsp) + movdqa %xmm5,48(%rsp) + movdqa 0(%rsp),%xmm4 + movdqa 16(%rsp),%xmm5 + paddd %xmm15,%xmm10 + paddd %xmm12,%xmm11 + pxor %xmm10,%xmm1 + pxor %xmm11,%xmm2 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + paddd %xmm1,%xmm4 + paddd %xmm2,%xmm5 + pxor %xmm4,%xmm15 + pxor %xmm5,%xmm12 + movdqa %xmm15,%xmm6 + pslld $12,%xmm15 + psrld $20,%xmm6 + movdqa %xmm12,%xmm7 + pslld $12,%xmm12 + por %xmm6,%xmm15 + psrld $20,%xmm7 + movdqa (%r11),%xmm6 + por %xmm7,%xmm12 + paddd %xmm15,%xmm10 + paddd %xmm12,%xmm11 + pxor %xmm10,%xmm1 + pxor %xmm11,%xmm2 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 + paddd %xmm1,%xmm4 + paddd %xmm2,%xmm5 + pxor %xmm4,%xmm15 + pxor %xmm5,%xmm12 + movdqa %xmm15,%xmm7 + pslld $7,%xmm15 + psrld $25,%xmm7 + movdqa %xmm12,%xmm6 + pslld $7,%xmm12 + por %xmm7,%xmm15 + psrld $25,%xmm6 + movdqa (%r10),%xmm7 + por %xmm6,%xmm12 + decl %eax + jnz .Loop4x + + paddd 64(%rsp),%xmm8 + paddd 80(%rsp),%xmm9 + paddd 96(%rsp),%xmm10 + paddd 112(%rsp),%xmm11 + + movdqa %xmm8,%xmm6 + punpckldq %xmm9,%xmm8 + movdqa %xmm10,%xmm7 + punpckldq %xmm11,%xmm10 + punpckhdq %xmm9,%xmm6 + punpckhdq %xmm11,%xmm7 + movdqa %xmm8,%xmm9 + punpcklqdq %xmm10,%xmm8 + movdqa %xmm6,%xmm11 + punpcklqdq %xmm7,%xmm6 + punpckhqdq %xmm10,%xmm9 + punpckhqdq %xmm7,%xmm11 + paddd 128-256(%rcx),%xmm12 + paddd 144-256(%rcx),%xmm13 + paddd 160-256(%rcx),%xmm14 + paddd 176-256(%rcx),%xmm15 + + movdqa %xmm8,0(%rsp) + movdqa %xmm9,16(%rsp) + movdqa 32(%rsp),%xmm8 + movdqa 48(%rsp),%xmm9 + + movdqa %xmm12,%xmm10 + punpckldq %xmm13,%xmm12 + movdqa %xmm14,%xmm7 + punpckldq %xmm15,%xmm14 + punpckhdq %xmm13,%xmm10 + punpckhdq %xmm15,%xmm7 + movdqa %xmm12,%xmm13 + punpcklqdq %xmm14,%xmm12 + movdqa %xmm10,%xmm15 + punpcklqdq %xmm7,%xmm10 + punpckhqdq %xmm14,%xmm13 + punpckhqdq %xmm7,%xmm15 + paddd 192-256(%rcx),%xmm4 + paddd 208-256(%rcx),%xmm5 + paddd 224-256(%rcx),%xmm8 + paddd 240-256(%rcx),%xmm9 + + movdqa %xmm6,32(%rsp) + movdqa %xmm11,48(%rsp) + + movdqa %xmm4,%xmm14 + punpckldq %xmm5,%xmm4 + movdqa %xmm8,%xmm7 + punpckldq %xmm9,%xmm8 + punpckhdq %xmm5,%xmm14 + punpckhdq %xmm9,%xmm7 + movdqa %xmm4,%xmm5 + punpcklqdq %xmm8,%xmm4 + movdqa %xmm14,%xmm9 + punpcklqdq %xmm7,%xmm14 + punpckhqdq %xmm8,%xmm5 + punpckhqdq %xmm7,%xmm9 + paddd 256-256(%rcx),%xmm0 + paddd 272-256(%rcx),%xmm1 + paddd 288-256(%rcx),%xmm2 + paddd 304-256(%rcx),%xmm3 + + movdqa %xmm0,%xmm8 + punpckldq %xmm1,%xmm0 + movdqa %xmm2,%xmm7 + punpckldq %xmm3,%xmm2 + punpckhdq %xmm1,%xmm8 + punpckhdq %xmm3,%xmm7 + movdqa %xmm0,%xmm1 + punpcklqdq %xmm2,%xmm0 + movdqa %xmm8,%xmm3 + punpcklqdq %xmm7,%xmm8 + punpckhqdq %xmm2,%xmm1 + punpckhqdq %xmm7,%xmm3 + cmpq $256,%rdx + jb .Ltail4x + + movdqu 0(%rsi),%xmm6 + movdqu 16(%rsi),%xmm11 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm7 + pxor 0(%rsp),%xmm6 + pxor %xmm12,%xmm11 + pxor %xmm4,%xmm2 + pxor %xmm0,%xmm7 + + movdqu %xmm6,0(%rdi) + movdqu 64(%rsi),%xmm6 + movdqu %xmm11,16(%rdi) + movdqu 80(%rsi),%xmm11 + movdqu %xmm2,32(%rdi) + movdqu 96(%rsi),%xmm2 + movdqu %xmm7,48(%rdi) + movdqu 112(%rsi),%xmm7 + leaq 128(%rsi),%rsi + pxor 16(%rsp),%xmm6 + pxor %xmm13,%xmm11 + pxor %xmm5,%xmm2 + pxor %xmm1,%xmm7 + + movdqu %xmm6,64(%rdi) + movdqu 0(%rsi),%xmm6 + movdqu %xmm11,80(%rdi) + movdqu 16(%rsi),%xmm11 + movdqu %xmm2,96(%rdi) + movdqu 32(%rsi),%xmm2 + movdqu %xmm7,112(%rdi) + leaq 128(%rdi),%rdi + movdqu 48(%rsi),%xmm7 + pxor 32(%rsp),%xmm6 + pxor %xmm10,%xmm11 + pxor %xmm14,%xmm2 + pxor %xmm8,%xmm7 + + movdqu %xmm6,0(%rdi) + movdqu 64(%rsi),%xmm6 + movdqu %xmm11,16(%rdi) + movdqu 80(%rsi),%xmm11 + movdqu %xmm2,32(%rdi) + movdqu 96(%rsi),%xmm2 + movdqu %xmm7,48(%rdi) + movdqu 112(%rsi),%xmm7 + leaq 128(%rsi),%rsi + pxor 48(%rsp),%xmm6 + pxor %xmm15,%xmm11 + pxor %xmm9,%xmm2 + pxor %xmm3,%xmm7 + movdqu %xmm6,64(%rdi) + movdqu %xmm11,80(%rdi) + movdqu %xmm2,96(%rdi) + movdqu %xmm7,112(%rdi) + leaq 128(%rdi),%rdi + + subq $256,%rdx + jnz .Loop_outer4x + + jmp .Ldone4x + +.Ltail4x: + cmpq $192,%rdx + jae .L192_or_more4x + cmpq $128,%rdx + jae .L128_or_more4x + cmpq $64,%rdx + jae .L64_or_more4x + + + xorq %r10,%r10 + + movdqa %xmm12,16(%rsp) + movdqa %xmm4,32(%rsp) + movdqa %xmm0,48(%rsp) + jmp .Loop_tail4x + +.align 32 +.L64_or_more4x: + movdqu 0(%rsi),%xmm6 + movdqu 16(%rsi),%xmm11 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm7 + pxor 0(%rsp),%xmm6 + pxor %xmm12,%xmm11 + pxor %xmm4,%xmm2 + pxor %xmm0,%xmm7 + movdqu %xmm6,0(%rdi) + movdqu %xmm11,16(%rdi) + movdqu %xmm2,32(%rdi) + movdqu %xmm7,48(%rdi) + je .Ldone4x + + movdqa 16(%rsp),%xmm6 + leaq 64(%rsi),%rsi + xorq %r10,%r10 + movdqa %xmm6,0(%rsp) + movdqa %xmm13,16(%rsp) + leaq 64(%rdi),%rdi + movdqa %xmm5,32(%rsp) + subq $64,%rdx + movdqa %xmm1,48(%rsp) + jmp .Loop_tail4x + +.align 32 +.L128_or_more4x: + movdqu 0(%rsi),%xmm6 + movdqu 16(%rsi),%xmm11 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm7 + pxor 0(%rsp),%xmm6 + pxor %xmm12,%xmm11 + pxor %xmm4,%xmm2 + pxor %xmm0,%xmm7 + + movdqu %xmm6,0(%rdi) + movdqu 64(%rsi),%xmm6 + movdqu %xmm11,16(%rdi) + movdqu 80(%rsi),%xmm11 + movdqu %xmm2,32(%rdi) + movdqu 96(%rsi),%xmm2 + movdqu %xmm7,48(%rdi) + movdqu 112(%rsi),%xmm7 + pxor 16(%rsp),%xmm6 + pxor %xmm13,%xmm11 + pxor %xmm5,%xmm2 + pxor %xmm1,%xmm7 + movdqu %xmm6,64(%rdi) + movdqu %xmm11,80(%rdi) + movdqu %xmm2,96(%rdi) + movdqu %xmm7,112(%rdi) + je .Ldone4x + + movdqa 32(%rsp),%xmm6 + leaq 128(%rsi),%rsi + xorq %r10,%r10 + movdqa %xmm6,0(%rsp) + movdqa %xmm10,16(%rsp) + leaq 128(%rdi),%rdi + movdqa %xmm14,32(%rsp) + subq $128,%rdx + movdqa %xmm8,48(%rsp) + jmp .Loop_tail4x + +.align 32 +.L192_or_more4x: + movdqu 0(%rsi),%xmm6 + movdqu 16(%rsi),%xmm11 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm7 + pxor 0(%rsp),%xmm6 + pxor %xmm12,%xmm11 + pxor %xmm4,%xmm2 + pxor %xmm0,%xmm7 + + movdqu %xmm6,0(%rdi) + movdqu 64(%rsi),%xmm6 + movdqu %xmm11,16(%rdi) + movdqu 80(%rsi),%xmm11 + movdqu %xmm2,32(%rdi) + movdqu 96(%rsi),%xmm2 + movdqu %xmm7,48(%rdi) + movdqu 112(%rsi),%xmm7 + leaq 128(%rsi),%rsi + pxor 16(%rsp),%xmm6 + pxor %xmm13,%xmm11 + pxor %xmm5,%xmm2 + pxor %xmm1,%xmm7 + + movdqu %xmm6,64(%rdi) + movdqu 0(%rsi),%xmm6 + movdqu %xmm11,80(%rdi) + movdqu 16(%rsi),%xmm11 + movdqu %xmm2,96(%rdi) + movdqu 32(%rsi),%xmm2 + movdqu %xmm7,112(%rdi) + leaq 128(%rdi),%rdi + movdqu 48(%rsi),%xmm7 + pxor 32(%rsp),%xmm6 + pxor %xmm10,%xmm11 + pxor %xmm14,%xmm2 + pxor %xmm8,%xmm7 + movdqu %xmm6,0(%rdi) + movdqu %xmm11,16(%rdi) + movdqu %xmm2,32(%rdi) + movdqu %xmm7,48(%rdi) + je .Ldone4x + + movdqa 48(%rsp),%xmm6 + leaq 64(%rsi),%rsi + xorq %r10,%r10 + movdqa %xmm6,0(%rsp) + movdqa %xmm15,16(%rsp) + leaq 64(%rdi),%rdi + movdqa %xmm9,32(%rsp) + subq $192,%rdx + movdqa %xmm3,48(%rsp) + +.Loop_tail4x: + movzbl (%rsi,%r10,1),%eax + movzbl (%rsp,%r10,1),%ecx + leaq 1(%r10),%r10 + xorl %ecx,%eax + movb %al,-1(%rdi,%r10,1) + decq %rdx + jnz .Loop_tail4x + +.Ldone4x: + leaq (%r9),%rsp +.cfi_def_cfa_register %rsp +.L4x_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ChaCha20_4x,.-ChaCha20_4x +.type ChaCha20_4xop,@function +.align 32 +ChaCha20_4xop: +.cfi_startproc +.LChaCha20_4xop: + movq %rsp,%r9 +.cfi_def_cfa_register %r9 + subq $0x140+8,%rsp + vzeroupper + + vmovdqa .Lsigma(%rip),%xmm11 + vmovdqu (%rcx),%xmm3 + vmovdqu 16(%rcx),%xmm15 + vmovdqu (%r8),%xmm7 + leaq 256(%rsp),%rcx + + vpshufd $0x00,%xmm11,%xmm8 + vpshufd $0x55,%xmm11,%xmm9 + vmovdqa %xmm8,64(%rsp) + vpshufd $0xaa,%xmm11,%xmm10 + vmovdqa %xmm9,80(%rsp) + vpshufd $0xff,%xmm11,%xmm11 + vmovdqa %xmm10,96(%rsp) + vmovdqa %xmm11,112(%rsp) + + vpshufd $0x00,%xmm3,%xmm0 + vpshufd $0x55,%xmm3,%xmm1 + vmovdqa %xmm0,128-256(%rcx) + vpshufd $0xaa,%xmm3,%xmm2 + vmovdqa %xmm1,144-256(%rcx) + vpshufd $0xff,%xmm3,%xmm3 + vmovdqa %xmm2,160-256(%rcx) + vmovdqa %xmm3,176-256(%rcx) + + vpshufd $0x00,%xmm15,%xmm12 + vpshufd $0x55,%xmm15,%xmm13 + vmovdqa %xmm12,192-256(%rcx) + vpshufd $0xaa,%xmm15,%xmm14 + vmovdqa %xmm13,208-256(%rcx) + vpshufd $0xff,%xmm15,%xmm15 + vmovdqa %xmm14,224-256(%rcx) + vmovdqa %xmm15,240-256(%rcx) + + vpshufd $0x00,%xmm7,%xmm4 + vpshufd $0x55,%xmm7,%xmm5 + vpaddd .Linc(%rip),%xmm4,%xmm4 + vpshufd $0xaa,%xmm7,%xmm6 + vmovdqa %xmm5,272-256(%rcx) + vpshufd $0xff,%xmm7,%xmm7 + vmovdqa %xmm6,288-256(%rcx) + vmovdqa %xmm7,304-256(%rcx) + + jmp .Loop_enter4xop + +.align 32 +.Loop_outer4xop: + vmovdqa 64(%rsp),%xmm8 + vmovdqa 80(%rsp),%xmm9 + vmovdqa 96(%rsp),%xmm10 + vmovdqa 112(%rsp),%xmm11 + vmovdqa 128-256(%rcx),%xmm0 + vmovdqa 144-256(%rcx),%xmm1 + vmovdqa 160-256(%rcx),%xmm2 + vmovdqa 176-256(%rcx),%xmm3 + vmovdqa 192-256(%rcx),%xmm12 + vmovdqa 208-256(%rcx),%xmm13 + vmovdqa 224-256(%rcx),%xmm14 + vmovdqa 240-256(%rcx),%xmm15 + vmovdqa 256-256(%rcx),%xmm4 + vmovdqa 272-256(%rcx),%xmm5 + vmovdqa 288-256(%rcx),%xmm6 + vmovdqa 304-256(%rcx),%xmm7 + vpaddd .Lfour(%rip),%xmm4,%xmm4 + +.Loop_enter4xop: + movl $10,%eax + vmovdqa %xmm4,256-256(%rcx) + jmp .Loop4xop + +.align 32 +.Loop4xop: + vpaddd %xmm0,%xmm8,%xmm8 + vpaddd %xmm1,%xmm9,%xmm9 + vpaddd %xmm2,%xmm10,%xmm10 + vpaddd %xmm3,%xmm11,%xmm11 + vpxor %xmm4,%xmm8,%xmm4 + vpxor %xmm5,%xmm9,%xmm5 + vpxor %xmm6,%xmm10,%xmm6 + vpxor %xmm7,%xmm11,%xmm7 +.byte 143,232,120,194,228,16 +.byte 143,232,120,194,237,16 +.byte 143,232,120,194,246,16 +.byte 143,232,120,194,255,16 + vpaddd %xmm4,%xmm12,%xmm12 + vpaddd %xmm5,%xmm13,%xmm13 + vpaddd %xmm6,%xmm14,%xmm14 + vpaddd %xmm7,%xmm15,%xmm15 + vpxor %xmm0,%xmm12,%xmm0 + vpxor %xmm1,%xmm13,%xmm1 + vpxor %xmm14,%xmm2,%xmm2 + vpxor %xmm15,%xmm3,%xmm3 +.byte 143,232,120,194,192,12 +.byte 143,232,120,194,201,12 +.byte 143,232,120,194,210,12 +.byte 143,232,120,194,219,12 + vpaddd %xmm8,%xmm0,%xmm8 + vpaddd %xmm9,%xmm1,%xmm9 + vpaddd %xmm2,%xmm10,%xmm10 + vpaddd %xmm3,%xmm11,%xmm11 + vpxor %xmm4,%xmm8,%xmm4 + vpxor %xmm5,%xmm9,%xmm5 + vpxor %xmm6,%xmm10,%xmm6 + vpxor %xmm7,%xmm11,%xmm7 +.byte 143,232,120,194,228,8 +.byte 143,232,120,194,237,8 +.byte 143,232,120,194,246,8 +.byte 143,232,120,194,255,8 + vpaddd %xmm4,%xmm12,%xmm12 + vpaddd %xmm5,%xmm13,%xmm13 + vpaddd %xmm6,%xmm14,%xmm14 + vpaddd %xmm7,%xmm15,%xmm15 + vpxor %xmm0,%xmm12,%xmm0 + vpxor %xmm1,%xmm13,%xmm1 + vpxor %xmm14,%xmm2,%xmm2 + vpxor %xmm15,%xmm3,%xmm3 +.byte 143,232,120,194,192,7 +.byte 143,232,120,194,201,7 +.byte 143,232,120,194,210,7 +.byte 143,232,120,194,219,7 + vpaddd %xmm1,%xmm8,%xmm8 + vpaddd %xmm2,%xmm9,%xmm9 + vpaddd %xmm3,%xmm10,%xmm10 + vpaddd %xmm0,%xmm11,%xmm11 + vpxor %xmm7,%xmm8,%xmm7 + vpxor %xmm4,%xmm9,%xmm4 + vpxor %xmm5,%xmm10,%xmm5 + vpxor %xmm6,%xmm11,%xmm6 +.byte 143,232,120,194,255,16 +.byte 143,232,120,194,228,16 +.byte 143,232,120,194,237,16 +.byte 143,232,120,194,246,16 + vpaddd %xmm7,%xmm14,%xmm14 + vpaddd %xmm4,%xmm15,%xmm15 + vpaddd %xmm5,%xmm12,%xmm12 + vpaddd %xmm6,%xmm13,%xmm13 + vpxor %xmm1,%xmm14,%xmm1 + vpxor %xmm2,%xmm15,%xmm2 + vpxor %xmm12,%xmm3,%xmm3 + vpxor %xmm13,%xmm0,%xmm0 +.byte 143,232,120,194,201,12 +.byte 143,232,120,194,210,12 +.byte 143,232,120,194,219,12 +.byte 143,232,120,194,192,12 + vpaddd %xmm8,%xmm1,%xmm8 + vpaddd %xmm9,%xmm2,%xmm9 + vpaddd %xmm3,%xmm10,%xmm10 + vpaddd %xmm0,%xmm11,%xmm11 + vpxor %xmm7,%xmm8,%xmm7 + vpxor %xmm4,%xmm9,%xmm4 + vpxor %xmm5,%xmm10,%xmm5 + vpxor %xmm6,%xmm11,%xmm6 +.byte 143,232,120,194,255,8 +.byte 143,232,120,194,228,8 +.byte 143,232,120,194,237,8 +.byte 143,232,120,194,246,8 + vpaddd %xmm7,%xmm14,%xmm14 + vpaddd %xmm4,%xmm15,%xmm15 + vpaddd %xmm5,%xmm12,%xmm12 + vpaddd %xmm6,%xmm13,%xmm13 + vpxor %xmm1,%xmm14,%xmm1 + vpxor %xmm2,%xmm15,%xmm2 + vpxor %xmm12,%xmm3,%xmm3 + vpxor %xmm13,%xmm0,%xmm0 +.byte 143,232,120,194,201,7 +.byte 143,232,120,194,210,7 +.byte 143,232,120,194,219,7 +.byte 143,232,120,194,192,7 + decl %eax + jnz .Loop4xop + + vpaddd 64(%rsp),%xmm8,%xmm8 + vpaddd 80(%rsp),%xmm9,%xmm9 + vpaddd 96(%rsp),%xmm10,%xmm10 + vpaddd 112(%rsp),%xmm11,%xmm11 + + vmovdqa %xmm14,32(%rsp) + vmovdqa %xmm15,48(%rsp) + + vpunpckldq %xmm9,%xmm8,%xmm14 + vpunpckldq %xmm11,%xmm10,%xmm15 + vpunpckhdq %xmm9,%xmm8,%xmm8 + vpunpckhdq %xmm11,%xmm10,%xmm10 + vpunpcklqdq %xmm15,%xmm14,%xmm9 + vpunpckhqdq %xmm15,%xmm14,%xmm14 + vpunpcklqdq %xmm10,%xmm8,%xmm11 + vpunpckhqdq %xmm10,%xmm8,%xmm8 + vpaddd 128-256(%rcx),%xmm0,%xmm0 + vpaddd 144-256(%rcx),%xmm1,%xmm1 + vpaddd 160-256(%rcx),%xmm2,%xmm2 + vpaddd 176-256(%rcx),%xmm3,%xmm3 + + vmovdqa %xmm9,0(%rsp) + vmovdqa %xmm14,16(%rsp) + vmovdqa 32(%rsp),%xmm9 + vmovdqa 48(%rsp),%xmm14 + + vpunpckldq %xmm1,%xmm0,%xmm10 + vpunpckldq %xmm3,%xmm2,%xmm15 + vpunpckhdq %xmm1,%xmm0,%xmm0 + vpunpckhdq %xmm3,%xmm2,%xmm2 + vpunpcklqdq %xmm15,%xmm10,%xmm1 + vpunpckhqdq %xmm15,%xmm10,%xmm10 + vpunpcklqdq %xmm2,%xmm0,%xmm3 + vpunpckhqdq %xmm2,%xmm0,%xmm0 + vpaddd 192-256(%rcx),%xmm12,%xmm12 + vpaddd 208-256(%rcx),%xmm13,%xmm13 + vpaddd 224-256(%rcx),%xmm9,%xmm9 + vpaddd 240-256(%rcx),%xmm14,%xmm14 + + vpunpckldq %xmm13,%xmm12,%xmm2 + vpunpckldq %xmm14,%xmm9,%xmm15 + vpunpckhdq %xmm13,%xmm12,%xmm12 + vpunpckhdq %xmm14,%xmm9,%xmm9 + vpunpcklqdq %xmm15,%xmm2,%xmm13 + vpunpckhqdq %xmm15,%xmm2,%xmm2 + vpunpcklqdq %xmm9,%xmm12,%xmm14 + vpunpckhqdq %xmm9,%xmm12,%xmm12 + vpaddd 256-256(%rcx),%xmm4,%xmm4 + vpaddd 272-256(%rcx),%xmm5,%xmm5 + vpaddd 288-256(%rcx),%xmm6,%xmm6 + vpaddd 304-256(%rcx),%xmm7,%xmm7 + + vpunpckldq %xmm5,%xmm4,%xmm9 + vpunpckldq %xmm7,%xmm6,%xmm15 + vpunpckhdq %xmm5,%xmm4,%xmm4 + vpunpckhdq %xmm7,%xmm6,%xmm6 + vpunpcklqdq %xmm15,%xmm9,%xmm5 + vpunpckhqdq %xmm15,%xmm9,%xmm9 + vpunpcklqdq %xmm6,%xmm4,%xmm7 + vpunpckhqdq %xmm6,%xmm4,%xmm4 + vmovdqa 0(%rsp),%xmm6 + vmovdqa 16(%rsp),%xmm15 + + cmpq $256,%rdx + jb .Ltail4xop + + vpxor 0(%rsi),%xmm6,%xmm6 + vpxor 16(%rsi),%xmm1,%xmm1 + vpxor 32(%rsi),%xmm13,%xmm13 + vpxor 48(%rsi),%xmm5,%xmm5 + vpxor 64(%rsi),%xmm15,%xmm15 + vpxor 80(%rsi),%xmm10,%xmm10 + vpxor 96(%rsi),%xmm2,%xmm2 + vpxor 112(%rsi),%xmm9,%xmm9 + leaq 128(%rsi),%rsi + vpxor 0(%rsi),%xmm11,%xmm11 + vpxor 16(%rsi),%xmm3,%xmm3 + vpxor 32(%rsi),%xmm14,%xmm14 + vpxor 48(%rsi),%xmm7,%xmm7 + vpxor 64(%rsi),%xmm8,%xmm8 + vpxor 80(%rsi),%xmm0,%xmm0 + vpxor 96(%rsi),%xmm12,%xmm12 + vpxor 112(%rsi),%xmm4,%xmm4 + leaq 128(%rsi),%rsi + + vmovdqu %xmm6,0(%rdi) + vmovdqu %xmm1,16(%rdi) + vmovdqu %xmm13,32(%rdi) + vmovdqu %xmm5,48(%rdi) + vmovdqu %xmm15,64(%rdi) + vmovdqu %xmm10,80(%rdi) + vmovdqu %xmm2,96(%rdi) + vmovdqu %xmm9,112(%rdi) + leaq 128(%rdi),%rdi + vmovdqu %xmm11,0(%rdi) + vmovdqu %xmm3,16(%rdi) + vmovdqu %xmm14,32(%rdi) + vmovdqu %xmm7,48(%rdi) + vmovdqu %xmm8,64(%rdi) + vmovdqu %xmm0,80(%rdi) + vmovdqu %xmm12,96(%rdi) + vmovdqu %xmm4,112(%rdi) + leaq 128(%rdi),%rdi + + subq $256,%rdx + jnz .Loop_outer4xop + + jmp .Ldone4xop + +.align 32 +.Ltail4xop: + cmpq $192,%rdx + jae .L192_or_more4xop + cmpq $128,%rdx + jae .L128_or_more4xop + cmpq $64,%rdx + jae .L64_or_more4xop + + xorq %r10,%r10 + vmovdqa %xmm6,0(%rsp) + vmovdqa %xmm1,16(%rsp) + vmovdqa %xmm13,32(%rsp) + vmovdqa %xmm5,48(%rsp) + jmp .Loop_tail4xop + +.align 32 +.L64_or_more4xop: + vpxor 0(%rsi),%xmm6,%xmm6 + vpxor 16(%rsi),%xmm1,%xmm1 + vpxor 32(%rsi),%xmm13,%xmm13 + vpxor 48(%rsi),%xmm5,%xmm5 + vmovdqu %xmm6,0(%rdi) + vmovdqu %xmm1,16(%rdi) + vmovdqu %xmm13,32(%rdi) + vmovdqu %xmm5,48(%rdi) + je .Ldone4xop + + leaq 64(%rsi),%rsi + vmovdqa %xmm15,0(%rsp) + xorq %r10,%r10 + vmovdqa %xmm10,16(%rsp) + leaq 64(%rdi),%rdi + vmovdqa %xmm2,32(%rsp) + subq $64,%rdx + vmovdqa %xmm9,48(%rsp) + jmp .Loop_tail4xop + +.align 32 +.L128_or_more4xop: + vpxor 0(%rsi),%xmm6,%xmm6 + vpxor 16(%rsi),%xmm1,%xmm1 + vpxor 32(%rsi),%xmm13,%xmm13 + vpxor 48(%rsi),%xmm5,%xmm5 + vpxor 64(%rsi),%xmm15,%xmm15 + vpxor 80(%rsi),%xmm10,%xmm10 + vpxor 96(%rsi),%xmm2,%xmm2 + vpxor 112(%rsi),%xmm9,%xmm9 + + vmovdqu %xmm6,0(%rdi) + vmovdqu %xmm1,16(%rdi) + vmovdqu %xmm13,32(%rdi) + vmovdqu %xmm5,48(%rdi) + vmovdqu %xmm15,64(%rdi) + vmovdqu %xmm10,80(%rdi) + vmovdqu %xmm2,96(%rdi) + vmovdqu %xmm9,112(%rdi) + je .Ldone4xop + + leaq 128(%rsi),%rsi + vmovdqa %xmm11,0(%rsp) + xorq %r10,%r10 + vmovdqa %xmm3,16(%rsp) + leaq 128(%rdi),%rdi + vmovdqa %xmm14,32(%rsp) + subq $128,%rdx + vmovdqa %xmm7,48(%rsp) + jmp .Loop_tail4xop + +.align 32 +.L192_or_more4xop: + vpxor 0(%rsi),%xmm6,%xmm6 + vpxor 16(%rsi),%xmm1,%xmm1 + vpxor 32(%rsi),%xmm13,%xmm13 + vpxor 48(%rsi),%xmm5,%xmm5 + vpxor 64(%rsi),%xmm15,%xmm15 + vpxor 80(%rsi),%xmm10,%xmm10 + vpxor 96(%rsi),%xmm2,%xmm2 + vpxor 112(%rsi),%xmm9,%xmm9 + leaq 128(%rsi),%rsi + vpxor 0(%rsi),%xmm11,%xmm11 + vpxor 16(%rsi),%xmm3,%xmm3 + vpxor 32(%rsi),%xmm14,%xmm14 + vpxor 48(%rsi),%xmm7,%xmm7 + + vmovdqu %xmm6,0(%rdi) + vmovdqu %xmm1,16(%rdi) + vmovdqu %xmm13,32(%rdi) + vmovdqu %xmm5,48(%rdi) + vmovdqu %xmm15,64(%rdi) + vmovdqu %xmm10,80(%rdi) + vmovdqu %xmm2,96(%rdi) + vmovdqu %xmm9,112(%rdi) + leaq 128(%rdi),%rdi + vmovdqu %xmm11,0(%rdi) + vmovdqu %xmm3,16(%rdi) + vmovdqu %xmm14,32(%rdi) + vmovdqu %xmm7,48(%rdi) + je .Ldone4xop + + leaq 64(%rsi),%rsi + vmovdqa %xmm8,0(%rsp) + xorq %r10,%r10 + vmovdqa %xmm0,16(%rsp) + leaq 64(%rdi),%rdi + vmovdqa %xmm12,32(%rsp) + subq $192,%rdx + vmovdqa %xmm4,48(%rsp) + +.Loop_tail4xop: + movzbl (%rsi,%r10,1),%eax + movzbl (%rsp,%r10,1),%ecx + leaq 1(%r10),%r10 + xorl %ecx,%eax + movb %al,-1(%rdi,%r10,1) + decq %rdx + jnz .Loop_tail4xop + +.Ldone4xop: + vzeroupper + leaq (%r9),%rsp +.cfi_def_cfa_register %rsp +.L4xop_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ChaCha20_4xop,.-ChaCha20_4xop +.type ChaCha20_8x,@function +.align 32 +ChaCha20_8x: +.cfi_startproc +.LChaCha20_8x: + movq %rsp,%r9 +.cfi_def_cfa_register %r9 + subq $0x280+8,%rsp + andq $-32,%rsp + vzeroupper + + + + + + + + + + + vbroadcasti128 .Lsigma(%rip),%ymm11 + vbroadcasti128 (%rcx),%ymm3 + vbroadcasti128 16(%rcx),%ymm15 + vbroadcasti128 (%r8),%ymm7 + leaq 256(%rsp),%rcx + leaq 512(%rsp),%rax + leaq .Lrot16(%rip),%r10 + leaq .Lrot24(%rip),%r11 + + vpshufd $0x00,%ymm11,%ymm8 + vpshufd $0x55,%ymm11,%ymm9 + vmovdqa %ymm8,128-256(%rcx) + vpshufd $0xaa,%ymm11,%ymm10 + vmovdqa %ymm9,160-256(%rcx) + vpshufd $0xff,%ymm11,%ymm11 + vmovdqa %ymm10,192-256(%rcx) + vmovdqa %ymm11,224-256(%rcx) + + vpshufd $0x00,%ymm3,%ymm0 + vpshufd $0x55,%ymm3,%ymm1 + vmovdqa %ymm0,256-256(%rcx) + vpshufd $0xaa,%ymm3,%ymm2 + vmovdqa %ymm1,288-256(%rcx) + vpshufd $0xff,%ymm3,%ymm3 + vmovdqa %ymm2,320-256(%rcx) + vmovdqa %ymm3,352-256(%rcx) + + vpshufd $0x00,%ymm15,%ymm12 + vpshufd $0x55,%ymm15,%ymm13 + vmovdqa %ymm12,384-512(%rax) + vpshufd $0xaa,%ymm15,%ymm14 + vmovdqa %ymm13,416-512(%rax) + vpshufd $0xff,%ymm15,%ymm15 + vmovdqa %ymm14,448-512(%rax) + vmovdqa %ymm15,480-512(%rax) + + vpshufd $0x00,%ymm7,%ymm4 + vpshufd $0x55,%ymm7,%ymm5 + vpaddd .Lincy(%rip),%ymm4,%ymm4 + vpshufd $0xaa,%ymm7,%ymm6 + vmovdqa %ymm5,544-512(%rax) + vpshufd $0xff,%ymm7,%ymm7 + vmovdqa %ymm6,576-512(%rax) + vmovdqa %ymm7,608-512(%rax) + + jmp .Loop_enter8x + +.align 32 +.Loop_outer8x: + vmovdqa 128-256(%rcx),%ymm8 + vmovdqa 160-256(%rcx),%ymm9 + vmovdqa 192-256(%rcx),%ymm10 + vmovdqa 224-256(%rcx),%ymm11 + vmovdqa 256-256(%rcx),%ymm0 + vmovdqa 288-256(%rcx),%ymm1 + vmovdqa 320-256(%rcx),%ymm2 + vmovdqa 352-256(%rcx),%ymm3 + vmovdqa 384-512(%rax),%ymm12 + vmovdqa 416-512(%rax),%ymm13 + vmovdqa 448-512(%rax),%ymm14 + vmovdqa 480-512(%rax),%ymm15 + vmovdqa 512-512(%rax),%ymm4 + vmovdqa 544-512(%rax),%ymm5 + vmovdqa 576-512(%rax),%ymm6 + vmovdqa 608-512(%rax),%ymm7 + vpaddd .Leight(%rip),%ymm4,%ymm4 + +.Loop_enter8x: + vmovdqa %ymm14,64(%rsp) + vmovdqa %ymm15,96(%rsp) + vbroadcasti128 (%r10),%ymm15 + vmovdqa %ymm4,512-512(%rax) + movl $10,%eax + jmp .Loop8x + +.align 32 +.Loop8x: + vpaddd %ymm0,%ymm8,%ymm8 + vpxor %ymm4,%ymm8,%ymm4 + vpshufb %ymm15,%ymm4,%ymm4 + vpaddd %ymm1,%ymm9,%ymm9 + vpxor %ymm5,%ymm9,%ymm5 + vpshufb %ymm15,%ymm5,%ymm5 + vpaddd %ymm4,%ymm12,%ymm12 + vpxor %ymm0,%ymm12,%ymm0 + vpslld $12,%ymm0,%ymm14 + vpsrld $20,%ymm0,%ymm0 + vpor %ymm0,%ymm14,%ymm0 + vbroadcasti128 (%r11),%ymm14 + vpaddd %ymm5,%ymm13,%ymm13 + vpxor %ymm1,%ymm13,%ymm1 + vpslld $12,%ymm1,%ymm15 + vpsrld $20,%ymm1,%ymm1 + vpor %ymm1,%ymm15,%ymm1 + vpaddd %ymm0,%ymm8,%ymm8 + vpxor %ymm4,%ymm8,%ymm4 + vpshufb %ymm14,%ymm4,%ymm4 + vpaddd %ymm1,%ymm9,%ymm9 + vpxor %ymm5,%ymm9,%ymm5 + vpshufb %ymm14,%ymm5,%ymm5 + vpaddd %ymm4,%ymm12,%ymm12 + vpxor %ymm0,%ymm12,%ymm0 + vpslld $7,%ymm0,%ymm15 + vpsrld $25,%ymm0,%ymm0 + vpor %ymm0,%ymm15,%ymm0 + vbroadcasti128 (%r10),%ymm15 + vpaddd %ymm5,%ymm13,%ymm13 + vpxor %ymm1,%ymm13,%ymm1 + vpslld $7,%ymm1,%ymm14 + vpsrld $25,%ymm1,%ymm1 + vpor %ymm1,%ymm14,%ymm1 + vmovdqa %ymm12,0(%rsp) + vmovdqa %ymm13,32(%rsp) + vmovdqa 64(%rsp),%ymm12 + vmovdqa 96(%rsp),%ymm13 + vpaddd %ymm2,%ymm10,%ymm10 + vpxor %ymm6,%ymm10,%ymm6 + vpshufb %ymm15,%ymm6,%ymm6 + vpaddd %ymm3,%ymm11,%ymm11 + vpxor %ymm7,%ymm11,%ymm7 + vpshufb %ymm15,%ymm7,%ymm7 + vpaddd %ymm6,%ymm12,%ymm12 + vpxor %ymm2,%ymm12,%ymm2 + vpslld $12,%ymm2,%ymm14 + vpsrld $20,%ymm2,%ymm2 + vpor %ymm2,%ymm14,%ymm2 + vbroadcasti128 (%r11),%ymm14 + vpaddd %ymm7,%ymm13,%ymm13 + vpxor %ymm3,%ymm13,%ymm3 + vpslld $12,%ymm3,%ymm15 + vpsrld $20,%ymm3,%ymm3 + vpor %ymm3,%ymm15,%ymm3 + vpaddd %ymm2,%ymm10,%ymm10 + vpxor %ymm6,%ymm10,%ymm6 + vpshufb %ymm14,%ymm6,%ymm6 + vpaddd %ymm3,%ymm11,%ymm11 + vpxor %ymm7,%ymm11,%ymm7 + vpshufb %ymm14,%ymm7,%ymm7 + vpaddd %ymm6,%ymm12,%ymm12 + vpxor %ymm2,%ymm12,%ymm2 + vpslld $7,%ymm2,%ymm15 + vpsrld $25,%ymm2,%ymm2 + vpor %ymm2,%ymm15,%ymm2 + vbroadcasti128 (%r10),%ymm15 + vpaddd %ymm7,%ymm13,%ymm13 + vpxor %ymm3,%ymm13,%ymm3 + vpslld $7,%ymm3,%ymm14 + vpsrld $25,%ymm3,%ymm3 + vpor %ymm3,%ymm14,%ymm3 + vpaddd %ymm1,%ymm8,%ymm8 + vpxor %ymm7,%ymm8,%ymm7 + vpshufb %ymm15,%ymm7,%ymm7 + vpaddd %ymm2,%ymm9,%ymm9 + vpxor %ymm4,%ymm9,%ymm4 + vpshufb %ymm15,%ymm4,%ymm4 + vpaddd %ymm7,%ymm12,%ymm12 + vpxor %ymm1,%ymm12,%ymm1 + vpslld $12,%ymm1,%ymm14 + vpsrld $20,%ymm1,%ymm1 + vpor %ymm1,%ymm14,%ymm1 + vbroadcasti128 (%r11),%ymm14 + vpaddd %ymm4,%ymm13,%ymm13 + vpxor %ymm2,%ymm13,%ymm2 + vpslld $12,%ymm2,%ymm15 + vpsrld $20,%ymm2,%ymm2 + vpor %ymm2,%ymm15,%ymm2 + vpaddd %ymm1,%ymm8,%ymm8 + vpxor %ymm7,%ymm8,%ymm7 + vpshufb %ymm14,%ymm7,%ymm7 + vpaddd %ymm2,%ymm9,%ymm9 + vpxor %ymm4,%ymm9,%ymm4 + vpshufb %ymm14,%ymm4,%ymm4 + vpaddd %ymm7,%ymm12,%ymm12 + vpxor %ymm1,%ymm12,%ymm1 + vpslld $7,%ymm1,%ymm15 + vpsrld $25,%ymm1,%ymm1 + vpor %ymm1,%ymm15,%ymm1 + vbroadcasti128 (%r10),%ymm15 + vpaddd %ymm4,%ymm13,%ymm13 + vpxor %ymm2,%ymm13,%ymm2 + vpslld $7,%ymm2,%ymm14 + vpsrld $25,%ymm2,%ymm2 + vpor %ymm2,%ymm14,%ymm2 + vmovdqa %ymm12,64(%rsp) + vmovdqa %ymm13,96(%rsp) + vmovdqa 0(%rsp),%ymm12 + vmovdqa 32(%rsp),%ymm13 + vpaddd %ymm3,%ymm10,%ymm10 + vpxor %ymm5,%ymm10,%ymm5 + vpshufb %ymm15,%ymm5,%ymm5 + vpaddd %ymm0,%ymm11,%ymm11 + vpxor %ymm6,%ymm11,%ymm6 + vpshufb %ymm15,%ymm6,%ymm6 + vpaddd %ymm5,%ymm12,%ymm12 + vpxor %ymm3,%ymm12,%ymm3 + vpslld $12,%ymm3,%ymm14 + vpsrld $20,%ymm3,%ymm3 + vpor %ymm3,%ymm14,%ymm3 + vbroadcasti128 (%r11),%ymm14 + vpaddd %ymm6,%ymm13,%ymm13 + vpxor %ymm0,%ymm13,%ymm0 + vpslld $12,%ymm0,%ymm15 + vpsrld $20,%ymm0,%ymm0 + vpor %ymm0,%ymm15,%ymm0 + vpaddd %ymm3,%ymm10,%ymm10 + vpxor %ymm5,%ymm10,%ymm5 + vpshufb %ymm14,%ymm5,%ymm5 + vpaddd %ymm0,%ymm11,%ymm11 + vpxor %ymm6,%ymm11,%ymm6 + vpshufb %ymm14,%ymm6,%ymm6 + vpaddd %ymm5,%ymm12,%ymm12 + vpxor %ymm3,%ymm12,%ymm3 + vpslld $7,%ymm3,%ymm15 + vpsrld $25,%ymm3,%ymm3 + vpor %ymm3,%ymm15,%ymm3 + vbroadcasti128 (%r10),%ymm15 + vpaddd %ymm6,%ymm13,%ymm13 + vpxor %ymm0,%ymm13,%ymm0 + vpslld $7,%ymm0,%ymm14 + vpsrld $25,%ymm0,%ymm0 + vpor %ymm0,%ymm14,%ymm0 + decl %eax + jnz .Loop8x + + leaq 512(%rsp),%rax + vpaddd 128-256(%rcx),%ymm8,%ymm8 + vpaddd 160-256(%rcx),%ymm9,%ymm9 + vpaddd 192-256(%rcx),%ymm10,%ymm10 + vpaddd 224-256(%rcx),%ymm11,%ymm11 + + vpunpckldq %ymm9,%ymm8,%ymm14 + vpunpckldq %ymm11,%ymm10,%ymm15 + vpunpckhdq %ymm9,%ymm8,%ymm8 + vpunpckhdq %ymm11,%ymm10,%ymm10 + vpunpcklqdq %ymm15,%ymm14,%ymm9 + vpunpckhqdq %ymm15,%ymm14,%ymm14 + vpunpcklqdq %ymm10,%ymm8,%ymm11 + vpunpckhqdq %ymm10,%ymm8,%ymm8 + vpaddd 256-256(%rcx),%ymm0,%ymm0 + vpaddd 288-256(%rcx),%ymm1,%ymm1 + vpaddd 320-256(%rcx),%ymm2,%ymm2 + vpaddd 352-256(%rcx),%ymm3,%ymm3 + + vpunpckldq %ymm1,%ymm0,%ymm10 + vpunpckldq %ymm3,%ymm2,%ymm15 + vpunpckhdq %ymm1,%ymm0,%ymm0 + vpunpckhdq %ymm3,%ymm2,%ymm2 + vpunpcklqdq %ymm15,%ymm10,%ymm1 + vpunpckhqdq %ymm15,%ymm10,%ymm10 + vpunpcklqdq %ymm2,%ymm0,%ymm3 + vpunpckhqdq %ymm2,%ymm0,%ymm0 + vperm2i128 $0x20,%ymm1,%ymm9,%ymm15 + vperm2i128 $0x31,%ymm1,%ymm9,%ymm1 + vperm2i128 $0x20,%ymm10,%ymm14,%ymm9 + vperm2i128 $0x31,%ymm10,%ymm14,%ymm10 + vperm2i128 $0x20,%ymm3,%ymm11,%ymm14 + vperm2i128 $0x31,%ymm3,%ymm11,%ymm3 + vperm2i128 $0x20,%ymm0,%ymm8,%ymm11 + vperm2i128 $0x31,%ymm0,%ymm8,%ymm0 + vmovdqa %ymm15,0(%rsp) + vmovdqa %ymm9,32(%rsp) + vmovdqa 64(%rsp),%ymm15 + vmovdqa 96(%rsp),%ymm9 + + vpaddd 384-512(%rax),%ymm12,%ymm12 + vpaddd 416-512(%rax),%ymm13,%ymm13 + vpaddd 448-512(%rax),%ymm15,%ymm15 + vpaddd 480-512(%rax),%ymm9,%ymm9 + + vpunpckldq %ymm13,%ymm12,%ymm2 + vpunpckldq %ymm9,%ymm15,%ymm8 + vpunpckhdq %ymm13,%ymm12,%ymm12 + vpunpckhdq %ymm9,%ymm15,%ymm15 + vpunpcklqdq %ymm8,%ymm2,%ymm13 + vpunpckhqdq %ymm8,%ymm2,%ymm2 + vpunpcklqdq %ymm15,%ymm12,%ymm9 + vpunpckhqdq %ymm15,%ymm12,%ymm12 + vpaddd 512-512(%rax),%ymm4,%ymm4 + vpaddd 544-512(%rax),%ymm5,%ymm5 + vpaddd 576-512(%rax),%ymm6,%ymm6 + vpaddd 608-512(%rax),%ymm7,%ymm7 + + vpunpckldq %ymm5,%ymm4,%ymm15 + vpunpckldq %ymm7,%ymm6,%ymm8 + vpunpckhdq %ymm5,%ymm4,%ymm4 + vpunpckhdq %ymm7,%ymm6,%ymm6 + vpunpcklqdq %ymm8,%ymm15,%ymm5 + vpunpckhqdq %ymm8,%ymm15,%ymm15 + vpunpcklqdq %ymm6,%ymm4,%ymm7 + vpunpckhqdq %ymm6,%ymm4,%ymm4 + vperm2i128 $0x20,%ymm5,%ymm13,%ymm8 + vperm2i128 $0x31,%ymm5,%ymm13,%ymm5 + vperm2i128 $0x20,%ymm15,%ymm2,%ymm13 + vperm2i128 $0x31,%ymm15,%ymm2,%ymm15 + vperm2i128 $0x20,%ymm7,%ymm9,%ymm2 + vperm2i128 $0x31,%ymm7,%ymm9,%ymm7 + vperm2i128 $0x20,%ymm4,%ymm12,%ymm9 + vperm2i128 $0x31,%ymm4,%ymm12,%ymm4 + vmovdqa 0(%rsp),%ymm6 + vmovdqa 32(%rsp),%ymm12 + + cmpq $512,%rdx + jb .Ltail8x + + vpxor 0(%rsi),%ymm6,%ymm6 + vpxor 32(%rsi),%ymm8,%ymm8 + vpxor 64(%rsi),%ymm1,%ymm1 + vpxor 96(%rsi),%ymm5,%ymm5 + leaq 128(%rsi),%rsi + vmovdqu %ymm6,0(%rdi) + vmovdqu %ymm8,32(%rdi) + vmovdqu %ymm1,64(%rdi) + vmovdqu %ymm5,96(%rdi) + leaq 128(%rdi),%rdi + + vpxor 0(%rsi),%ymm12,%ymm12 + vpxor 32(%rsi),%ymm13,%ymm13 + vpxor 64(%rsi),%ymm10,%ymm10 + vpxor 96(%rsi),%ymm15,%ymm15 + leaq 128(%rsi),%rsi + vmovdqu %ymm12,0(%rdi) + vmovdqu %ymm13,32(%rdi) + vmovdqu %ymm10,64(%rdi) + vmovdqu %ymm15,96(%rdi) + leaq 128(%rdi),%rdi + + vpxor 0(%rsi),%ymm14,%ymm14 + vpxor 32(%rsi),%ymm2,%ymm2 + vpxor 64(%rsi),%ymm3,%ymm3 + vpxor 96(%rsi),%ymm7,%ymm7 + leaq 128(%rsi),%rsi + vmovdqu %ymm14,0(%rdi) + vmovdqu %ymm2,32(%rdi) + vmovdqu %ymm3,64(%rdi) + vmovdqu %ymm7,96(%rdi) + leaq 128(%rdi),%rdi + + vpxor 0(%rsi),%ymm11,%ymm11 + vpxor 32(%rsi),%ymm9,%ymm9 + vpxor 64(%rsi),%ymm0,%ymm0 + vpxor 96(%rsi),%ymm4,%ymm4 + leaq 128(%rsi),%rsi + vmovdqu %ymm11,0(%rdi) + vmovdqu %ymm9,32(%rdi) + vmovdqu %ymm0,64(%rdi) + vmovdqu %ymm4,96(%rdi) + leaq 128(%rdi),%rdi + + subq $512,%rdx + jnz .Loop_outer8x + + jmp .Ldone8x + +.Ltail8x: + cmpq $448,%rdx + jae .L448_or_more8x + cmpq $384,%rdx + jae .L384_or_more8x + cmpq $320,%rdx + jae .L320_or_more8x + cmpq $256,%rdx + jae .L256_or_more8x + cmpq $192,%rdx + jae .L192_or_more8x + cmpq $128,%rdx + jae .L128_or_more8x + cmpq $64,%rdx + jae .L64_or_more8x + + xorq %r10,%r10 + vmovdqa %ymm6,0(%rsp) + vmovdqa %ymm8,32(%rsp) + jmp .Loop_tail8x + +.align 32 +.L64_or_more8x: + vpxor 0(%rsi),%ymm6,%ymm6 + vpxor 32(%rsi),%ymm8,%ymm8 + vmovdqu %ymm6,0(%rdi) + vmovdqu %ymm8,32(%rdi) + je .Ldone8x + + leaq 64(%rsi),%rsi + xorq %r10,%r10 + vmovdqa %ymm1,0(%rsp) + leaq 64(%rdi),%rdi + subq $64,%rdx + vmovdqa %ymm5,32(%rsp) + jmp .Loop_tail8x + +.align 32 +.L128_or_more8x: + vpxor 0(%rsi),%ymm6,%ymm6 + vpxor 32(%rsi),%ymm8,%ymm8 + vpxor 64(%rsi),%ymm1,%ymm1 + vpxor 96(%rsi),%ymm5,%ymm5 + vmovdqu %ymm6,0(%rdi) + vmovdqu %ymm8,32(%rdi) + vmovdqu %ymm1,64(%rdi) + vmovdqu %ymm5,96(%rdi) + je .Ldone8x + + leaq 128(%rsi),%rsi + xorq %r10,%r10 + vmovdqa %ymm12,0(%rsp) + leaq 128(%rdi),%rdi + subq $128,%rdx + vmovdqa %ymm13,32(%rsp) + jmp .Loop_tail8x + +.align 32 +.L192_or_more8x: + vpxor 0(%rsi),%ymm6,%ymm6 + vpxor 32(%rsi),%ymm8,%ymm8 + vpxor 64(%rsi),%ymm1,%ymm1 + vpxor 96(%rsi),%ymm5,%ymm5 + vpxor 128(%rsi),%ymm12,%ymm12 + vpxor 160(%rsi),%ymm13,%ymm13 + vmovdqu %ymm6,0(%rdi) + vmovdqu %ymm8,32(%rdi) + vmovdqu %ymm1,64(%rdi) + vmovdqu %ymm5,96(%rdi) + vmovdqu %ymm12,128(%rdi) + vmovdqu %ymm13,160(%rdi) + je .Ldone8x + + leaq 192(%rsi),%rsi + xorq %r10,%r10 + vmovdqa %ymm10,0(%rsp) + leaq 192(%rdi),%rdi + subq $192,%rdx + vmovdqa %ymm15,32(%rsp) + jmp .Loop_tail8x + +.align 32 +.L256_or_more8x: + vpxor 0(%rsi),%ymm6,%ymm6 + vpxor 32(%rsi),%ymm8,%ymm8 + vpxor 64(%rsi),%ymm1,%ymm1 + vpxor 96(%rsi),%ymm5,%ymm5 + vpxor 128(%rsi),%ymm12,%ymm12 + vpxor 160(%rsi),%ymm13,%ymm13 + vpxor 192(%rsi),%ymm10,%ymm10 + vpxor 224(%rsi),%ymm15,%ymm15 + vmovdqu %ymm6,0(%rdi) + vmovdqu %ymm8,32(%rdi) + vmovdqu %ymm1,64(%rdi) + vmovdqu %ymm5,96(%rdi) + vmovdqu %ymm12,128(%rdi) + vmovdqu %ymm13,160(%rdi) + vmovdqu %ymm10,192(%rdi) + vmovdqu %ymm15,224(%rdi) + je .Ldone8x + + leaq 256(%rsi),%rsi + xorq %r10,%r10 + vmovdqa %ymm14,0(%rsp) + leaq 256(%rdi),%rdi + subq $256,%rdx + vmovdqa %ymm2,32(%rsp) + jmp .Loop_tail8x + +.align 32 +.L320_or_more8x: + vpxor 0(%rsi),%ymm6,%ymm6 + vpxor 32(%rsi),%ymm8,%ymm8 + vpxor 64(%rsi),%ymm1,%ymm1 + vpxor 96(%rsi),%ymm5,%ymm5 + vpxor 128(%rsi),%ymm12,%ymm12 + vpxor 160(%rsi),%ymm13,%ymm13 + vpxor 192(%rsi),%ymm10,%ymm10 + vpxor 224(%rsi),%ymm15,%ymm15 + vpxor 256(%rsi),%ymm14,%ymm14 + vpxor 288(%rsi),%ymm2,%ymm2 + vmovdqu %ymm6,0(%rdi) + vmovdqu %ymm8,32(%rdi) + vmovdqu %ymm1,64(%rdi) + vmovdqu %ymm5,96(%rdi) + vmovdqu %ymm12,128(%rdi) + vmovdqu %ymm13,160(%rdi) + vmovdqu %ymm10,192(%rdi) + vmovdqu %ymm15,224(%rdi) + vmovdqu %ymm14,256(%rdi) + vmovdqu %ymm2,288(%rdi) + je .Ldone8x + + leaq 320(%rsi),%rsi + xorq %r10,%r10 + vmovdqa %ymm3,0(%rsp) + leaq 320(%rdi),%rdi + subq $320,%rdx + vmovdqa %ymm7,32(%rsp) + jmp .Loop_tail8x + +.align 32 +.L384_or_more8x: + vpxor 0(%rsi),%ymm6,%ymm6 + vpxor 32(%rsi),%ymm8,%ymm8 + vpxor 64(%rsi),%ymm1,%ymm1 + vpxor 96(%rsi),%ymm5,%ymm5 + vpxor 128(%rsi),%ymm12,%ymm12 + vpxor 160(%rsi),%ymm13,%ymm13 + vpxor 192(%rsi),%ymm10,%ymm10 + vpxor 224(%rsi),%ymm15,%ymm15 + vpxor 256(%rsi),%ymm14,%ymm14 + vpxor 288(%rsi),%ymm2,%ymm2 + vpxor 320(%rsi),%ymm3,%ymm3 + vpxor 352(%rsi),%ymm7,%ymm7 + vmovdqu %ymm6,0(%rdi) + vmovdqu %ymm8,32(%rdi) + vmovdqu %ymm1,64(%rdi) + vmovdqu %ymm5,96(%rdi) + vmovdqu %ymm12,128(%rdi) + vmovdqu %ymm13,160(%rdi) + vmovdqu %ymm10,192(%rdi) + vmovdqu %ymm15,224(%rdi) + vmovdqu %ymm14,256(%rdi) + vmovdqu %ymm2,288(%rdi) + vmovdqu %ymm3,320(%rdi) + vmovdqu %ymm7,352(%rdi) + je .Ldone8x + + leaq 384(%rsi),%rsi + xorq %r10,%r10 + vmovdqa %ymm11,0(%rsp) + leaq 384(%rdi),%rdi + subq $384,%rdx + vmovdqa %ymm9,32(%rsp) + jmp .Loop_tail8x + +.align 32 +.L448_or_more8x: + vpxor 0(%rsi),%ymm6,%ymm6 + vpxor 32(%rsi),%ymm8,%ymm8 + vpxor 64(%rsi),%ymm1,%ymm1 + vpxor 96(%rsi),%ymm5,%ymm5 + vpxor 128(%rsi),%ymm12,%ymm12 + vpxor 160(%rsi),%ymm13,%ymm13 + vpxor 192(%rsi),%ymm10,%ymm10 + vpxor 224(%rsi),%ymm15,%ymm15 + vpxor 256(%rsi),%ymm14,%ymm14 + vpxor 288(%rsi),%ymm2,%ymm2 + vpxor 320(%rsi),%ymm3,%ymm3 + vpxor 352(%rsi),%ymm7,%ymm7 + vpxor 384(%rsi),%ymm11,%ymm11 + vpxor 416(%rsi),%ymm9,%ymm9 + vmovdqu %ymm6,0(%rdi) + vmovdqu %ymm8,32(%rdi) + vmovdqu %ymm1,64(%rdi) + vmovdqu %ymm5,96(%rdi) + vmovdqu %ymm12,128(%rdi) + vmovdqu %ymm13,160(%rdi) + vmovdqu %ymm10,192(%rdi) + vmovdqu %ymm15,224(%rdi) + vmovdqu %ymm14,256(%rdi) + vmovdqu %ymm2,288(%rdi) + vmovdqu %ymm3,320(%rdi) + vmovdqu %ymm7,352(%rdi) + vmovdqu %ymm11,384(%rdi) + vmovdqu %ymm9,416(%rdi) + je .Ldone8x + + leaq 448(%rsi),%rsi + xorq %r10,%r10 + vmovdqa %ymm0,0(%rsp) + leaq 448(%rdi),%rdi + subq $448,%rdx + vmovdqa %ymm4,32(%rsp) + +.Loop_tail8x: + movzbl (%rsi,%r10,1),%eax + movzbl (%rsp,%r10,1),%ecx + leaq 1(%r10),%r10 + xorl %ecx,%eax + movb %al,-1(%rdi,%r10,1) + decq %rdx + jnz .Loop_tail8x + +.Ldone8x: + vzeroall + leaq (%r9),%rsp +.cfi_def_cfa_register %rsp +.L8x_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ChaCha20_8x,.-ChaCha20_8x +.type ChaCha20_avx512,@function +.align 32 +ChaCha20_avx512: +.cfi_startproc +.LChaCha20_avx512: + movq %rsp,%r9 +.cfi_def_cfa_register %r9 + cmpq $512,%rdx + ja .LChaCha20_16x + + subq $64+8,%rsp + vbroadcasti32x4 .Lsigma(%rip),%zmm0 + vbroadcasti32x4 (%rcx),%zmm1 + vbroadcasti32x4 16(%rcx),%zmm2 + vbroadcasti32x4 (%r8),%zmm3 + + vmovdqa32 %zmm0,%zmm16 + vmovdqa32 %zmm1,%zmm17 + vmovdqa32 %zmm2,%zmm18 + vpaddd .Lzeroz(%rip),%zmm3,%zmm3 + vmovdqa32 .Lfourz(%rip),%zmm20 + movq $10,%r8 + vmovdqa32 %zmm3,%zmm19 + jmp .Loop_avx512 + +.align 16 +.Loop_outer_avx512: + vmovdqa32 %zmm16,%zmm0 + vmovdqa32 %zmm17,%zmm1 + vmovdqa32 %zmm18,%zmm2 + vpaddd %zmm20,%zmm19,%zmm3 + movq $10,%r8 + vmovdqa32 %zmm3,%zmm19 + jmp .Loop_avx512 + +.align 32 +.Loop_avx512: + vpaddd %zmm1,%zmm0,%zmm0 + vpxord %zmm0,%zmm3,%zmm3 + vprold $16,%zmm3,%zmm3 + vpaddd %zmm3,%zmm2,%zmm2 + vpxord %zmm2,%zmm1,%zmm1 + vprold $12,%zmm1,%zmm1 + vpaddd %zmm1,%zmm0,%zmm0 + vpxord %zmm0,%zmm3,%zmm3 + vprold $8,%zmm3,%zmm3 + vpaddd %zmm3,%zmm2,%zmm2 + vpxord %zmm2,%zmm1,%zmm1 + vprold $7,%zmm1,%zmm1 + vpshufd $78,%zmm2,%zmm2 + vpshufd $57,%zmm1,%zmm1 + vpshufd $147,%zmm3,%zmm3 + vpaddd %zmm1,%zmm0,%zmm0 + vpxord %zmm0,%zmm3,%zmm3 + vprold $16,%zmm3,%zmm3 + vpaddd %zmm3,%zmm2,%zmm2 + vpxord %zmm2,%zmm1,%zmm1 + vprold $12,%zmm1,%zmm1 + vpaddd %zmm1,%zmm0,%zmm0 + vpxord %zmm0,%zmm3,%zmm3 + vprold $8,%zmm3,%zmm3 + vpaddd %zmm3,%zmm2,%zmm2 + vpxord %zmm2,%zmm1,%zmm1 + vprold $7,%zmm1,%zmm1 + vpshufd $78,%zmm2,%zmm2 + vpshufd $147,%zmm1,%zmm1 + vpshufd $57,%zmm3,%zmm3 + decq %r8 + jnz .Loop_avx512 + vpaddd %zmm16,%zmm0,%zmm0 + vpaddd %zmm17,%zmm1,%zmm1 + vpaddd %zmm18,%zmm2,%zmm2 + vpaddd %zmm19,%zmm3,%zmm3 + + subq $64,%rdx + jb .Ltail64_avx512 + + vpxor 0(%rsi),%xmm0,%xmm4 + vpxor 16(%rsi),%xmm1,%xmm5 + vpxor 32(%rsi),%xmm2,%xmm6 + vpxor 48(%rsi),%xmm3,%xmm7 + leaq 64(%rsi),%rsi + + vmovdqu %xmm4,0(%rdi) + vmovdqu %xmm5,16(%rdi) + vmovdqu %xmm6,32(%rdi) + vmovdqu %xmm7,48(%rdi) + leaq 64(%rdi),%rdi + + jz .Ldone_avx512 + + vextracti32x4 $1,%zmm0,%xmm4 + vextracti32x4 $1,%zmm1,%xmm5 + vextracti32x4 $1,%zmm2,%xmm6 + vextracti32x4 $1,%zmm3,%xmm7 + + subq $64,%rdx + jb .Ltail_avx512 + + vpxor 0(%rsi),%xmm4,%xmm4 + vpxor 16(%rsi),%xmm5,%xmm5 + vpxor 32(%rsi),%xmm6,%xmm6 + vpxor 48(%rsi),%xmm7,%xmm7 + leaq 64(%rsi),%rsi + + vmovdqu %xmm4,0(%rdi) + vmovdqu %xmm5,16(%rdi) + vmovdqu %xmm6,32(%rdi) + vmovdqu %xmm7,48(%rdi) + leaq 64(%rdi),%rdi + + jz .Ldone_avx512 + + vextracti32x4 $2,%zmm0,%xmm4 + vextracti32x4 $2,%zmm1,%xmm5 + vextracti32x4 $2,%zmm2,%xmm6 + vextracti32x4 $2,%zmm3,%xmm7 + + subq $64,%rdx + jb .Ltail_avx512 + + vpxor 0(%rsi),%xmm4,%xmm4 + vpxor 16(%rsi),%xmm5,%xmm5 + vpxor 32(%rsi),%xmm6,%xmm6 + vpxor 48(%rsi),%xmm7,%xmm7 + leaq 64(%rsi),%rsi + + vmovdqu %xmm4,0(%rdi) + vmovdqu %xmm5,16(%rdi) + vmovdqu %xmm6,32(%rdi) + vmovdqu %xmm7,48(%rdi) + leaq 64(%rdi),%rdi + + jz .Ldone_avx512 + + vextracti32x4 $3,%zmm0,%xmm4 + vextracti32x4 $3,%zmm1,%xmm5 + vextracti32x4 $3,%zmm2,%xmm6 + vextracti32x4 $3,%zmm3,%xmm7 + + subq $64,%rdx + jb .Ltail_avx512 + + vpxor 0(%rsi),%xmm4,%xmm4 + vpxor 16(%rsi),%xmm5,%xmm5 + vpxor 32(%rsi),%xmm6,%xmm6 + vpxor 48(%rsi),%xmm7,%xmm7 + leaq 64(%rsi),%rsi + + vmovdqu %xmm4,0(%rdi) + vmovdqu %xmm5,16(%rdi) + vmovdqu %xmm6,32(%rdi) + vmovdqu %xmm7,48(%rdi) + leaq 64(%rdi),%rdi + + jnz .Loop_outer_avx512 + + jmp .Ldone_avx512 + +.align 16 +.Ltail64_avx512: + vmovdqa %xmm0,0(%rsp) + vmovdqa %xmm1,16(%rsp) + vmovdqa %xmm2,32(%rsp) + vmovdqa %xmm3,48(%rsp) + addq $64,%rdx + jmp .Loop_tail_avx512 + +.align 16 +.Ltail_avx512: + vmovdqa %xmm4,0(%rsp) + vmovdqa %xmm5,16(%rsp) + vmovdqa %xmm6,32(%rsp) + vmovdqa %xmm7,48(%rsp) + addq $64,%rdx + +.Loop_tail_avx512: + movzbl (%rsi,%r8,1),%eax + movzbl (%rsp,%r8,1),%ecx + leaq 1(%r8),%r8 + xorl %ecx,%eax + movb %al,-1(%rdi,%r8,1) + decq %rdx + jnz .Loop_tail_avx512 + + vmovdqu32 %zmm16,0(%rsp) + +.Ldone_avx512: + vzeroall + leaq (%r9),%rsp +.cfi_def_cfa_register %rsp +.Lavx512_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ChaCha20_avx512,.-ChaCha20_avx512 +.type ChaCha20_avx512vl,@function +.align 32 +ChaCha20_avx512vl: +.cfi_startproc +.LChaCha20_avx512vl: + movq %rsp,%r9 +.cfi_def_cfa_register %r9 + cmpq $128,%rdx + ja .LChaCha20_8xvl + + subq $64+8,%rsp + vbroadcasti128 .Lsigma(%rip),%ymm0 + vbroadcasti128 (%rcx),%ymm1 + vbroadcasti128 16(%rcx),%ymm2 + vbroadcasti128 (%r8),%ymm3 + + vmovdqa32 %ymm0,%ymm16 + vmovdqa32 %ymm1,%ymm17 + vmovdqa32 %ymm2,%ymm18 + vpaddd .Lzeroz(%rip),%ymm3,%ymm3 + vmovdqa32 .Ltwoy(%rip),%ymm20 + movq $10,%r8 + vmovdqa32 %ymm3,%ymm19 + jmp .Loop_avx512vl + +.align 16 +.Loop_outer_avx512vl: + vmovdqa32 %ymm18,%ymm2 + vpaddd %ymm20,%ymm19,%ymm3 + movq $10,%r8 + vmovdqa32 %ymm3,%ymm19 + jmp .Loop_avx512vl + +.align 32 +.Loop_avx512vl: + vpaddd %ymm1,%ymm0,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vprold $16,%ymm3,%ymm3 + vpaddd %ymm3,%ymm2,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vprold $12,%ymm1,%ymm1 + vpaddd %ymm1,%ymm0,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vprold $8,%ymm3,%ymm3 + vpaddd %ymm3,%ymm2,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vprold $7,%ymm1,%ymm1 + vpshufd $78,%ymm2,%ymm2 + vpshufd $57,%ymm1,%ymm1 + vpshufd $147,%ymm3,%ymm3 + vpaddd %ymm1,%ymm0,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vprold $16,%ymm3,%ymm3 + vpaddd %ymm3,%ymm2,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vprold $12,%ymm1,%ymm1 + vpaddd %ymm1,%ymm0,%ymm0 + vpxor %ymm0,%ymm3,%ymm3 + vprold $8,%ymm3,%ymm3 + vpaddd %ymm3,%ymm2,%ymm2 + vpxor %ymm2,%ymm1,%ymm1 + vprold $7,%ymm1,%ymm1 + vpshufd $78,%ymm2,%ymm2 + vpshufd $147,%ymm1,%ymm1 + vpshufd $57,%ymm3,%ymm3 + decq %r8 + jnz .Loop_avx512vl + vpaddd %ymm16,%ymm0,%ymm0 + vpaddd %ymm17,%ymm1,%ymm1 + vpaddd %ymm18,%ymm2,%ymm2 + vpaddd %ymm19,%ymm3,%ymm3 + + subq $64,%rdx + jb .Ltail64_avx512vl + + vpxor 0(%rsi),%xmm0,%xmm4 + vpxor 16(%rsi),%xmm1,%xmm5 + vpxor 32(%rsi),%xmm2,%xmm6 + vpxor 48(%rsi),%xmm3,%xmm7 + leaq 64(%rsi),%rsi + + vmovdqu %xmm4,0(%rdi) + vmovdqu %xmm5,16(%rdi) + vmovdqu %xmm6,32(%rdi) + vmovdqu %xmm7,48(%rdi) + leaq 64(%rdi),%rdi + + jz .Ldone_avx512vl + + vextracti128 $1,%ymm0,%xmm4 + vextracti128 $1,%ymm1,%xmm5 + vextracti128 $1,%ymm2,%xmm6 + vextracti128 $1,%ymm3,%xmm7 + + subq $64,%rdx + jb .Ltail_avx512vl + + vpxor 0(%rsi),%xmm4,%xmm4 + vpxor 16(%rsi),%xmm5,%xmm5 + vpxor 32(%rsi),%xmm6,%xmm6 + vpxor 48(%rsi),%xmm7,%xmm7 + leaq 64(%rsi),%rsi + + vmovdqu %xmm4,0(%rdi) + vmovdqu %xmm5,16(%rdi) + vmovdqu %xmm6,32(%rdi) + vmovdqu %xmm7,48(%rdi) + leaq 64(%rdi),%rdi + + vmovdqa32 %ymm16,%ymm0 + vmovdqa32 %ymm17,%ymm1 + jnz .Loop_outer_avx512vl + + jmp .Ldone_avx512vl + +.align 16 +.Ltail64_avx512vl: + vmovdqa %xmm0,0(%rsp) + vmovdqa %xmm1,16(%rsp) + vmovdqa %xmm2,32(%rsp) + vmovdqa %xmm3,48(%rsp) + addq $64,%rdx + jmp .Loop_tail_avx512vl + +.align 16 +.Ltail_avx512vl: + vmovdqa %xmm4,0(%rsp) + vmovdqa %xmm5,16(%rsp) + vmovdqa %xmm6,32(%rsp) + vmovdqa %xmm7,48(%rsp) + addq $64,%rdx + +.Loop_tail_avx512vl: + movzbl (%rsi,%r8,1),%eax + movzbl (%rsp,%r8,1),%ecx + leaq 1(%r8),%r8 + xorl %ecx,%eax + movb %al,-1(%rdi,%r8,1) + decq %rdx + jnz .Loop_tail_avx512vl + + vmovdqu32 %ymm16,0(%rsp) + vmovdqu32 %ymm16,32(%rsp) + +.Ldone_avx512vl: + vzeroall + leaq (%r9),%rsp +.cfi_def_cfa_register %rsp +.Lavx512vl_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ChaCha20_avx512vl,.-ChaCha20_avx512vl +.type ChaCha20_16x,@function +.align 32 +ChaCha20_16x: +.cfi_startproc +.LChaCha20_16x: + movq %rsp,%r9 +.cfi_def_cfa_register %r9 + subq $64+8,%rsp + andq $-64,%rsp + vzeroupper + + leaq .Lsigma(%rip),%r10 + vbroadcasti32x4 (%r10),%zmm3 + vbroadcasti32x4 (%rcx),%zmm7 + vbroadcasti32x4 16(%rcx),%zmm11 + vbroadcasti32x4 (%r8),%zmm15 + + vpshufd $0x00,%zmm3,%zmm0 + vpshufd $0x55,%zmm3,%zmm1 + vpshufd $0xaa,%zmm3,%zmm2 + vpshufd $0xff,%zmm3,%zmm3 + vmovdqa64 %zmm0,%zmm16 + vmovdqa64 %zmm1,%zmm17 + vmovdqa64 %zmm2,%zmm18 + vmovdqa64 %zmm3,%zmm19 + + vpshufd $0x00,%zmm7,%zmm4 + vpshufd $0x55,%zmm7,%zmm5 + vpshufd $0xaa,%zmm7,%zmm6 + vpshufd $0xff,%zmm7,%zmm7 + vmovdqa64 %zmm4,%zmm20 + vmovdqa64 %zmm5,%zmm21 + vmovdqa64 %zmm6,%zmm22 + vmovdqa64 %zmm7,%zmm23 + + vpshufd $0x00,%zmm11,%zmm8 + vpshufd $0x55,%zmm11,%zmm9 + vpshufd $0xaa,%zmm11,%zmm10 + vpshufd $0xff,%zmm11,%zmm11 + vmovdqa64 %zmm8,%zmm24 + vmovdqa64 %zmm9,%zmm25 + vmovdqa64 %zmm10,%zmm26 + vmovdqa64 %zmm11,%zmm27 + + vpshufd $0x00,%zmm15,%zmm12 + vpshufd $0x55,%zmm15,%zmm13 + vpshufd $0xaa,%zmm15,%zmm14 + vpshufd $0xff,%zmm15,%zmm15 + vpaddd .Lincz(%rip),%zmm12,%zmm12 + vmovdqa64 %zmm12,%zmm28 + vmovdqa64 %zmm13,%zmm29 + vmovdqa64 %zmm14,%zmm30 + vmovdqa64 %zmm15,%zmm31 + + movl $10,%eax + jmp .Loop16x + +.align 32 +.Loop_outer16x: + vpbroadcastd 0(%r10),%zmm0 + vpbroadcastd 4(%r10),%zmm1 + vpbroadcastd 8(%r10),%zmm2 + vpbroadcastd 12(%r10),%zmm3 + vpaddd .Lsixteen(%rip),%zmm28,%zmm28 + vmovdqa64 %zmm20,%zmm4 + vmovdqa64 %zmm21,%zmm5 + vmovdqa64 %zmm22,%zmm6 + vmovdqa64 %zmm23,%zmm7 + vmovdqa64 %zmm24,%zmm8 + vmovdqa64 %zmm25,%zmm9 + vmovdqa64 %zmm26,%zmm10 + vmovdqa64 %zmm27,%zmm11 + vmovdqa64 %zmm28,%zmm12 + vmovdqa64 %zmm29,%zmm13 + vmovdqa64 %zmm30,%zmm14 + vmovdqa64 %zmm31,%zmm15 + + vmovdqa64 %zmm0,%zmm16 + vmovdqa64 %zmm1,%zmm17 + vmovdqa64 %zmm2,%zmm18 + vmovdqa64 %zmm3,%zmm19 + + movl $10,%eax + jmp .Loop16x + +.align 32 +.Loop16x: + vpaddd %zmm4,%zmm0,%zmm0 + vpaddd %zmm5,%zmm1,%zmm1 + vpaddd %zmm6,%zmm2,%zmm2 + vpaddd %zmm7,%zmm3,%zmm3 + vpxord %zmm0,%zmm12,%zmm12 + vpxord %zmm1,%zmm13,%zmm13 + vpxord %zmm2,%zmm14,%zmm14 + vpxord %zmm3,%zmm15,%zmm15 + vprold $16,%zmm12,%zmm12 + vprold $16,%zmm13,%zmm13 + vprold $16,%zmm14,%zmm14 + vprold $16,%zmm15,%zmm15 + vpaddd %zmm12,%zmm8,%zmm8 + vpaddd %zmm13,%zmm9,%zmm9 + vpaddd %zmm14,%zmm10,%zmm10 + vpaddd %zmm15,%zmm11,%zmm11 + vpxord %zmm8,%zmm4,%zmm4 + vpxord %zmm9,%zmm5,%zmm5 + vpxord %zmm10,%zmm6,%zmm6 + vpxord %zmm11,%zmm7,%zmm7 + vprold $12,%zmm4,%zmm4 + vprold $12,%zmm5,%zmm5 + vprold $12,%zmm6,%zmm6 + vprold $12,%zmm7,%zmm7 + vpaddd %zmm4,%zmm0,%zmm0 + vpaddd %zmm5,%zmm1,%zmm1 + vpaddd %zmm6,%zmm2,%zmm2 + vpaddd %zmm7,%zmm3,%zmm3 + vpxord %zmm0,%zmm12,%zmm12 + vpxord %zmm1,%zmm13,%zmm13 + vpxord %zmm2,%zmm14,%zmm14 + vpxord %zmm3,%zmm15,%zmm15 + vprold $8,%zmm12,%zmm12 + vprold $8,%zmm13,%zmm13 + vprold $8,%zmm14,%zmm14 + vprold $8,%zmm15,%zmm15 + vpaddd %zmm12,%zmm8,%zmm8 + vpaddd %zmm13,%zmm9,%zmm9 + vpaddd %zmm14,%zmm10,%zmm10 + vpaddd %zmm15,%zmm11,%zmm11 + vpxord %zmm8,%zmm4,%zmm4 + vpxord %zmm9,%zmm5,%zmm5 + vpxord %zmm10,%zmm6,%zmm6 + vpxord %zmm11,%zmm7,%zmm7 + vprold $7,%zmm4,%zmm4 + vprold $7,%zmm5,%zmm5 + vprold $7,%zmm6,%zmm6 + vprold $7,%zmm7,%zmm7 + vpaddd %zmm5,%zmm0,%zmm0 + vpaddd %zmm6,%zmm1,%zmm1 + vpaddd %zmm7,%zmm2,%zmm2 + vpaddd %zmm4,%zmm3,%zmm3 + vpxord %zmm0,%zmm15,%zmm15 + vpxord %zmm1,%zmm12,%zmm12 + vpxord %zmm2,%zmm13,%zmm13 + vpxord %zmm3,%zmm14,%zmm14 + vprold $16,%zmm15,%zmm15 + vprold $16,%zmm12,%zmm12 + vprold $16,%zmm13,%zmm13 + vprold $16,%zmm14,%zmm14 + vpaddd %zmm15,%zmm10,%zmm10 + vpaddd %zmm12,%zmm11,%zmm11 + vpaddd %zmm13,%zmm8,%zmm8 + vpaddd %zmm14,%zmm9,%zmm9 + vpxord %zmm10,%zmm5,%zmm5 + vpxord %zmm11,%zmm6,%zmm6 + vpxord %zmm8,%zmm7,%zmm7 + vpxord %zmm9,%zmm4,%zmm4 + vprold $12,%zmm5,%zmm5 + vprold $12,%zmm6,%zmm6 + vprold $12,%zmm7,%zmm7 + vprold $12,%zmm4,%zmm4 + vpaddd %zmm5,%zmm0,%zmm0 + vpaddd %zmm6,%zmm1,%zmm1 + vpaddd %zmm7,%zmm2,%zmm2 + vpaddd %zmm4,%zmm3,%zmm3 + vpxord %zmm0,%zmm15,%zmm15 + vpxord %zmm1,%zmm12,%zmm12 + vpxord %zmm2,%zmm13,%zmm13 + vpxord %zmm3,%zmm14,%zmm14 + vprold $8,%zmm15,%zmm15 + vprold $8,%zmm12,%zmm12 + vprold $8,%zmm13,%zmm13 + vprold $8,%zmm14,%zmm14 + vpaddd %zmm15,%zmm10,%zmm10 + vpaddd %zmm12,%zmm11,%zmm11 + vpaddd %zmm13,%zmm8,%zmm8 + vpaddd %zmm14,%zmm9,%zmm9 + vpxord %zmm10,%zmm5,%zmm5 + vpxord %zmm11,%zmm6,%zmm6 + vpxord %zmm8,%zmm7,%zmm7 + vpxord %zmm9,%zmm4,%zmm4 + vprold $7,%zmm5,%zmm5 + vprold $7,%zmm6,%zmm6 + vprold $7,%zmm7,%zmm7 + vprold $7,%zmm4,%zmm4 + decl %eax + jnz .Loop16x + + vpaddd %zmm16,%zmm0,%zmm0 + vpaddd %zmm17,%zmm1,%zmm1 + vpaddd %zmm18,%zmm2,%zmm2 + vpaddd %zmm19,%zmm3,%zmm3 + + vpunpckldq %zmm1,%zmm0,%zmm18 + vpunpckldq %zmm3,%zmm2,%zmm19 + vpunpckhdq %zmm1,%zmm0,%zmm0 + vpunpckhdq %zmm3,%zmm2,%zmm2 + vpunpcklqdq %zmm19,%zmm18,%zmm1 + vpunpckhqdq %zmm19,%zmm18,%zmm18 + vpunpcklqdq %zmm2,%zmm0,%zmm3 + vpunpckhqdq %zmm2,%zmm0,%zmm0 + vpaddd %zmm20,%zmm4,%zmm4 + vpaddd %zmm21,%zmm5,%zmm5 + vpaddd %zmm22,%zmm6,%zmm6 + vpaddd %zmm23,%zmm7,%zmm7 + + vpunpckldq %zmm5,%zmm4,%zmm2 + vpunpckldq %zmm7,%zmm6,%zmm19 + vpunpckhdq %zmm5,%zmm4,%zmm4 + vpunpckhdq %zmm7,%zmm6,%zmm6 + vpunpcklqdq %zmm19,%zmm2,%zmm5 + vpunpckhqdq %zmm19,%zmm2,%zmm2 + vpunpcklqdq %zmm6,%zmm4,%zmm7 + vpunpckhqdq %zmm6,%zmm4,%zmm4 + vshufi32x4 $0x44,%zmm5,%zmm1,%zmm19 + vshufi32x4 $0xee,%zmm5,%zmm1,%zmm5 + vshufi32x4 $0x44,%zmm2,%zmm18,%zmm1 + vshufi32x4 $0xee,%zmm2,%zmm18,%zmm2 + vshufi32x4 $0x44,%zmm7,%zmm3,%zmm18 + vshufi32x4 $0xee,%zmm7,%zmm3,%zmm7 + vshufi32x4 $0x44,%zmm4,%zmm0,%zmm3 + vshufi32x4 $0xee,%zmm4,%zmm0,%zmm4 + vpaddd %zmm24,%zmm8,%zmm8 + vpaddd %zmm25,%zmm9,%zmm9 + vpaddd %zmm26,%zmm10,%zmm10 + vpaddd %zmm27,%zmm11,%zmm11 + + vpunpckldq %zmm9,%zmm8,%zmm6 + vpunpckldq %zmm11,%zmm10,%zmm0 + vpunpckhdq %zmm9,%zmm8,%zmm8 + vpunpckhdq %zmm11,%zmm10,%zmm10 + vpunpcklqdq %zmm0,%zmm6,%zmm9 + vpunpckhqdq %zmm0,%zmm6,%zmm6 + vpunpcklqdq %zmm10,%zmm8,%zmm11 + vpunpckhqdq %zmm10,%zmm8,%zmm8 + vpaddd %zmm28,%zmm12,%zmm12 + vpaddd %zmm29,%zmm13,%zmm13 + vpaddd %zmm30,%zmm14,%zmm14 + vpaddd %zmm31,%zmm15,%zmm15 + + vpunpckldq %zmm13,%zmm12,%zmm10 + vpunpckldq %zmm15,%zmm14,%zmm0 + vpunpckhdq %zmm13,%zmm12,%zmm12 + vpunpckhdq %zmm15,%zmm14,%zmm14 + vpunpcklqdq %zmm0,%zmm10,%zmm13 + vpunpckhqdq %zmm0,%zmm10,%zmm10 + vpunpcklqdq %zmm14,%zmm12,%zmm15 + vpunpckhqdq %zmm14,%zmm12,%zmm12 + vshufi32x4 $0x44,%zmm13,%zmm9,%zmm0 + vshufi32x4 $0xee,%zmm13,%zmm9,%zmm13 + vshufi32x4 $0x44,%zmm10,%zmm6,%zmm9 + vshufi32x4 $0xee,%zmm10,%zmm6,%zmm10 + vshufi32x4 $0x44,%zmm15,%zmm11,%zmm6 + vshufi32x4 $0xee,%zmm15,%zmm11,%zmm15 + vshufi32x4 $0x44,%zmm12,%zmm8,%zmm11 + vshufi32x4 $0xee,%zmm12,%zmm8,%zmm12 + vshufi32x4 $0x88,%zmm0,%zmm19,%zmm16 + vshufi32x4 $0xdd,%zmm0,%zmm19,%zmm19 + vshufi32x4 $0x88,%zmm13,%zmm5,%zmm0 + vshufi32x4 $0xdd,%zmm13,%zmm5,%zmm13 + vshufi32x4 $0x88,%zmm9,%zmm1,%zmm17 + vshufi32x4 $0xdd,%zmm9,%zmm1,%zmm1 + vshufi32x4 $0x88,%zmm10,%zmm2,%zmm9 + vshufi32x4 $0xdd,%zmm10,%zmm2,%zmm10 + vshufi32x4 $0x88,%zmm6,%zmm18,%zmm14 + vshufi32x4 $0xdd,%zmm6,%zmm18,%zmm18 + vshufi32x4 $0x88,%zmm15,%zmm7,%zmm6 + vshufi32x4 $0xdd,%zmm15,%zmm7,%zmm15 + vshufi32x4 $0x88,%zmm11,%zmm3,%zmm8 + vshufi32x4 $0xdd,%zmm11,%zmm3,%zmm3 + vshufi32x4 $0x88,%zmm12,%zmm4,%zmm11 + vshufi32x4 $0xdd,%zmm12,%zmm4,%zmm12 + cmpq $1024,%rdx + jb .Ltail16x + + vpxord 0(%rsi),%zmm16,%zmm16 + vpxord 64(%rsi),%zmm17,%zmm17 + vpxord 128(%rsi),%zmm14,%zmm14 + vpxord 192(%rsi),%zmm8,%zmm8 + vmovdqu32 %zmm16,0(%rdi) + vmovdqu32 %zmm17,64(%rdi) + vmovdqu32 %zmm14,128(%rdi) + vmovdqu32 %zmm8,192(%rdi) + + vpxord 256(%rsi),%zmm19,%zmm19 + vpxord 320(%rsi),%zmm1,%zmm1 + vpxord 384(%rsi),%zmm18,%zmm18 + vpxord 448(%rsi),%zmm3,%zmm3 + vmovdqu32 %zmm19,256(%rdi) + vmovdqu32 %zmm1,320(%rdi) + vmovdqu32 %zmm18,384(%rdi) + vmovdqu32 %zmm3,448(%rdi) + + vpxord 512(%rsi),%zmm0,%zmm0 + vpxord 576(%rsi),%zmm9,%zmm9 + vpxord 640(%rsi),%zmm6,%zmm6 + vpxord 704(%rsi),%zmm11,%zmm11 + vmovdqu32 %zmm0,512(%rdi) + vmovdqu32 %zmm9,576(%rdi) + vmovdqu32 %zmm6,640(%rdi) + vmovdqu32 %zmm11,704(%rdi) + + vpxord 768(%rsi),%zmm13,%zmm13 + vpxord 832(%rsi),%zmm10,%zmm10 + vpxord 896(%rsi),%zmm15,%zmm15 + vpxord 960(%rsi),%zmm12,%zmm12 + leaq 1024(%rsi),%rsi + vmovdqu32 %zmm13,768(%rdi) + vmovdqu32 %zmm10,832(%rdi) + vmovdqu32 %zmm15,896(%rdi) + vmovdqu32 %zmm12,960(%rdi) + leaq 1024(%rdi),%rdi + + subq $1024,%rdx + jnz .Loop_outer16x + + jmp .Ldone16x + +.align 32 +.Ltail16x: + xorq %r10,%r10 + subq %rsi,%rdi + cmpq $64,%rdx + jb .Less_than_64_16x + vpxord (%rsi),%zmm16,%zmm16 + vmovdqu32 %zmm16,(%rdi,%rsi,1) + je .Ldone16x + vmovdqa32 %zmm17,%zmm16 + leaq 64(%rsi),%rsi + + cmpq $128,%rdx + jb .Less_than_64_16x + vpxord (%rsi),%zmm17,%zmm17 + vmovdqu32 %zmm17,(%rdi,%rsi,1) + je .Ldone16x + vmovdqa32 %zmm14,%zmm16 + leaq 64(%rsi),%rsi + + cmpq $192,%rdx + jb .Less_than_64_16x + vpxord (%rsi),%zmm14,%zmm14 + vmovdqu32 %zmm14,(%rdi,%rsi,1) + je .Ldone16x + vmovdqa32 %zmm8,%zmm16 + leaq 64(%rsi),%rsi + + cmpq $256,%rdx + jb .Less_than_64_16x + vpxord (%rsi),%zmm8,%zmm8 + vmovdqu32 %zmm8,(%rdi,%rsi,1) + je .Ldone16x + vmovdqa32 %zmm19,%zmm16 + leaq 64(%rsi),%rsi + + cmpq $320,%rdx + jb .Less_than_64_16x + vpxord (%rsi),%zmm19,%zmm19 + vmovdqu32 %zmm19,(%rdi,%rsi,1) + je .Ldone16x + vmovdqa32 %zmm1,%zmm16 + leaq 64(%rsi),%rsi + + cmpq $384,%rdx + jb .Less_than_64_16x + vpxord (%rsi),%zmm1,%zmm1 + vmovdqu32 %zmm1,(%rdi,%rsi,1) + je .Ldone16x + vmovdqa32 %zmm18,%zmm16 + leaq 64(%rsi),%rsi + + cmpq $448,%rdx + jb .Less_than_64_16x + vpxord (%rsi),%zmm18,%zmm18 + vmovdqu32 %zmm18,(%rdi,%rsi,1) + je .Ldone16x + vmovdqa32 %zmm3,%zmm16 + leaq 64(%rsi),%rsi + + cmpq $512,%rdx + jb .Less_than_64_16x + vpxord (%rsi),%zmm3,%zmm3 + vmovdqu32 %zmm3,(%rdi,%rsi,1) + je .Ldone16x + vmovdqa32 %zmm0,%zmm16 + leaq 64(%rsi),%rsi + + cmpq $576,%rdx + jb .Less_than_64_16x + vpxord (%rsi),%zmm0,%zmm0 + vmovdqu32 %zmm0,(%rdi,%rsi,1) + je .Ldone16x + vmovdqa32 %zmm9,%zmm16 + leaq 64(%rsi),%rsi + + cmpq $640,%rdx + jb .Less_than_64_16x + vpxord (%rsi),%zmm9,%zmm9 + vmovdqu32 %zmm9,(%rdi,%rsi,1) + je .Ldone16x + vmovdqa32 %zmm6,%zmm16 + leaq 64(%rsi),%rsi + + cmpq $704,%rdx + jb .Less_than_64_16x + vpxord (%rsi),%zmm6,%zmm6 + vmovdqu32 %zmm6,(%rdi,%rsi,1) + je .Ldone16x + vmovdqa32 %zmm11,%zmm16 + leaq 64(%rsi),%rsi + + cmpq $768,%rdx + jb .Less_than_64_16x + vpxord (%rsi),%zmm11,%zmm11 + vmovdqu32 %zmm11,(%rdi,%rsi,1) + je .Ldone16x + vmovdqa32 %zmm13,%zmm16 + leaq 64(%rsi),%rsi + + cmpq $832,%rdx + jb .Less_than_64_16x + vpxord (%rsi),%zmm13,%zmm13 + vmovdqu32 %zmm13,(%rdi,%rsi,1) + je .Ldone16x + vmovdqa32 %zmm10,%zmm16 + leaq 64(%rsi),%rsi + + cmpq $896,%rdx + jb .Less_than_64_16x + vpxord (%rsi),%zmm10,%zmm10 + vmovdqu32 %zmm10,(%rdi,%rsi,1) + je .Ldone16x + vmovdqa32 %zmm15,%zmm16 + leaq 64(%rsi),%rsi + + cmpq $960,%rdx + jb .Less_than_64_16x + vpxord (%rsi),%zmm15,%zmm15 + vmovdqu32 %zmm15,(%rdi,%rsi,1) + je .Ldone16x + vmovdqa32 %zmm12,%zmm16 + leaq 64(%rsi),%rsi + +.Less_than_64_16x: + vmovdqa32 %zmm16,0(%rsp) + leaq (%rdi,%rsi,1),%rdi + andq $63,%rdx + +.Loop_tail16x: + movzbl (%rsi,%r10,1),%eax + movzbl (%rsp,%r10,1),%ecx + leaq 1(%r10),%r10 + xorl %ecx,%eax + movb %al,-1(%rdi,%r10,1) + decq %rdx + jnz .Loop_tail16x + + vpxord %zmm16,%zmm16,%zmm16 + vmovdqa32 %zmm16,0(%rsp) + +.Ldone16x: + vzeroall + leaq (%r9),%rsp +.cfi_def_cfa_register %rsp +.L16x_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ChaCha20_16x,.-ChaCha20_16x +.type ChaCha20_8xvl,@function +.align 32 +ChaCha20_8xvl: +.cfi_startproc +.LChaCha20_8xvl: + movq %rsp,%r9 +.cfi_def_cfa_register %r9 + subq $64+8,%rsp + andq $-64,%rsp + vzeroupper + + leaq .Lsigma(%rip),%r10 + vbroadcasti128 (%r10),%ymm3 + vbroadcasti128 (%rcx),%ymm7 + vbroadcasti128 16(%rcx),%ymm11 + vbroadcasti128 (%r8),%ymm15 + + vpshufd $0x00,%ymm3,%ymm0 + vpshufd $0x55,%ymm3,%ymm1 + vpshufd $0xaa,%ymm3,%ymm2 + vpshufd $0xff,%ymm3,%ymm3 + vmovdqa64 %ymm0,%ymm16 + vmovdqa64 %ymm1,%ymm17 + vmovdqa64 %ymm2,%ymm18 + vmovdqa64 %ymm3,%ymm19 + + vpshufd $0x00,%ymm7,%ymm4 + vpshufd $0x55,%ymm7,%ymm5 + vpshufd $0xaa,%ymm7,%ymm6 + vpshufd $0xff,%ymm7,%ymm7 + vmovdqa64 %ymm4,%ymm20 + vmovdqa64 %ymm5,%ymm21 + vmovdqa64 %ymm6,%ymm22 + vmovdqa64 %ymm7,%ymm23 + + vpshufd $0x00,%ymm11,%ymm8 + vpshufd $0x55,%ymm11,%ymm9 + vpshufd $0xaa,%ymm11,%ymm10 + vpshufd $0xff,%ymm11,%ymm11 + vmovdqa64 %ymm8,%ymm24 + vmovdqa64 %ymm9,%ymm25 + vmovdqa64 %ymm10,%ymm26 + vmovdqa64 %ymm11,%ymm27 + + vpshufd $0x00,%ymm15,%ymm12 + vpshufd $0x55,%ymm15,%ymm13 + vpshufd $0xaa,%ymm15,%ymm14 + vpshufd $0xff,%ymm15,%ymm15 + vpaddd .Lincy(%rip),%ymm12,%ymm12 + vmovdqa64 %ymm12,%ymm28 + vmovdqa64 %ymm13,%ymm29 + vmovdqa64 %ymm14,%ymm30 + vmovdqa64 %ymm15,%ymm31 + + movl $10,%eax + jmp .Loop8xvl + +.align 32 +.Loop_outer8xvl: + + + vpbroadcastd 8(%r10),%ymm2 + vpbroadcastd 12(%r10),%ymm3 + vpaddd .Leight(%rip),%ymm28,%ymm28 + vmovdqa64 %ymm20,%ymm4 + vmovdqa64 %ymm21,%ymm5 + vmovdqa64 %ymm22,%ymm6 + vmovdqa64 %ymm23,%ymm7 + vmovdqa64 %ymm24,%ymm8 + vmovdqa64 %ymm25,%ymm9 + vmovdqa64 %ymm26,%ymm10 + vmovdqa64 %ymm27,%ymm11 + vmovdqa64 %ymm28,%ymm12 + vmovdqa64 %ymm29,%ymm13 + vmovdqa64 %ymm30,%ymm14 + vmovdqa64 %ymm31,%ymm15 + + vmovdqa64 %ymm0,%ymm16 + vmovdqa64 %ymm1,%ymm17 + vmovdqa64 %ymm2,%ymm18 + vmovdqa64 %ymm3,%ymm19 + + movl $10,%eax + jmp .Loop8xvl + +.align 32 +.Loop8xvl: + vpaddd %ymm4,%ymm0,%ymm0 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm7,%ymm3,%ymm3 + vpxor %ymm0,%ymm12,%ymm12 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm3,%ymm15,%ymm15 + vprold $16,%ymm12,%ymm12 + vprold $16,%ymm13,%ymm13 + vprold $16,%ymm14,%ymm14 + vprold $16,%ymm15,%ymm15 + vpaddd %ymm12,%ymm8,%ymm8 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm15,%ymm11,%ymm11 + vpxor %ymm8,%ymm4,%ymm4 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm11,%ymm7,%ymm7 + vprold $12,%ymm4,%ymm4 + vprold $12,%ymm5,%ymm5 + vprold $12,%ymm6,%ymm6 + vprold $12,%ymm7,%ymm7 + vpaddd %ymm4,%ymm0,%ymm0 + vpaddd %ymm5,%ymm1,%ymm1 + vpaddd %ymm6,%ymm2,%ymm2 + vpaddd %ymm7,%ymm3,%ymm3 + vpxor %ymm0,%ymm12,%ymm12 + vpxor %ymm1,%ymm13,%ymm13 + vpxor %ymm2,%ymm14,%ymm14 + vpxor %ymm3,%ymm15,%ymm15 + vprold $8,%ymm12,%ymm12 + vprold $8,%ymm13,%ymm13 + vprold $8,%ymm14,%ymm14 + vprold $8,%ymm15,%ymm15 + vpaddd %ymm12,%ymm8,%ymm8 + vpaddd %ymm13,%ymm9,%ymm9 + vpaddd %ymm14,%ymm10,%ymm10 + vpaddd %ymm15,%ymm11,%ymm11 + vpxor %ymm8,%ymm4,%ymm4 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm10,%ymm6,%ymm6 + vpxor %ymm11,%ymm7,%ymm7 + vprold $7,%ymm4,%ymm4 + vprold $7,%ymm5,%ymm5 + vprold $7,%ymm6,%ymm6 + vprold $7,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpaddd %ymm6,%ymm1,%ymm1 + vpaddd %ymm7,%ymm2,%ymm2 + vpaddd %ymm4,%ymm3,%ymm3 + vpxor %ymm0,%ymm15,%ymm15 + vpxor %ymm1,%ymm12,%ymm12 + vpxor %ymm2,%ymm13,%ymm13 + vpxor %ymm3,%ymm14,%ymm14 + vprold $16,%ymm15,%ymm15 + vprold $16,%ymm12,%ymm12 + vprold $16,%ymm13,%ymm13 + vprold $16,%ymm14,%ymm14 + vpaddd %ymm15,%ymm10,%ymm10 + vpaddd %ymm12,%ymm11,%ymm11 + vpaddd %ymm13,%ymm8,%ymm8 + vpaddd %ymm14,%ymm9,%ymm9 + vpxor %ymm10,%ymm5,%ymm5 + vpxor %ymm11,%ymm6,%ymm6 + vpxor %ymm8,%ymm7,%ymm7 + vpxor %ymm9,%ymm4,%ymm4 + vprold $12,%ymm5,%ymm5 + vprold $12,%ymm6,%ymm6 + vprold $12,%ymm7,%ymm7 + vprold $12,%ymm4,%ymm4 + vpaddd %ymm5,%ymm0,%ymm0 + vpaddd %ymm6,%ymm1,%ymm1 + vpaddd %ymm7,%ymm2,%ymm2 + vpaddd %ymm4,%ymm3,%ymm3 + vpxor %ymm0,%ymm15,%ymm15 + vpxor %ymm1,%ymm12,%ymm12 + vpxor %ymm2,%ymm13,%ymm13 + vpxor %ymm3,%ymm14,%ymm14 + vprold $8,%ymm15,%ymm15 + vprold $8,%ymm12,%ymm12 + vprold $8,%ymm13,%ymm13 + vprold $8,%ymm14,%ymm14 + vpaddd %ymm15,%ymm10,%ymm10 + vpaddd %ymm12,%ymm11,%ymm11 + vpaddd %ymm13,%ymm8,%ymm8 + vpaddd %ymm14,%ymm9,%ymm9 + vpxor %ymm10,%ymm5,%ymm5 + vpxor %ymm11,%ymm6,%ymm6 + vpxor %ymm8,%ymm7,%ymm7 + vpxor %ymm9,%ymm4,%ymm4 + vprold $7,%ymm5,%ymm5 + vprold $7,%ymm6,%ymm6 + vprold $7,%ymm7,%ymm7 + vprold $7,%ymm4,%ymm4 + decl %eax + jnz .Loop8xvl + + vpaddd %ymm16,%ymm0,%ymm0 + vpaddd %ymm17,%ymm1,%ymm1 + vpaddd %ymm18,%ymm2,%ymm2 + vpaddd %ymm19,%ymm3,%ymm3 + + vpunpckldq %ymm1,%ymm0,%ymm18 + vpunpckldq %ymm3,%ymm2,%ymm19 + vpunpckhdq %ymm1,%ymm0,%ymm0 + vpunpckhdq %ymm3,%ymm2,%ymm2 + vpunpcklqdq %ymm19,%ymm18,%ymm1 + vpunpckhqdq %ymm19,%ymm18,%ymm18 + vpunpcklqdq %ymm2,%ymm0,%ymm3 + vpunpckhqdq %ymm2,%ymm0,%ymm0 + vpaddd %ymm20,%ymm4,%ymm4 + vpaddd %ymm21,%ymm5,%ymm5 + vpaddd %ymm22,%ymm6,%ymm6 + vpaddd %ymm23,%ymm7,%ymm7 + + vpunpckldq %ymm5,%ymm4,%ymm2 + vpunpckldq %ymm7,%ymm6,%ymm19 + vpunpckhdq %ymm5,%ymm4,%ymm4 + vpunpckhdq %ymm7,%ymm6,%ymm6 + vpunpcklqdq %ymm19,%ymm2,%ymm5 + vpunpckhqdq %ymm19,%ymm2,%ymm2 + vpunpcklqdq %ymm6,%ymm4,%ymm7 + vpunpckhqdq %ymm6,%ymm4,%ymm4 + vshufi32x4 $0,%ymm5,%ymm1,%ymm19 + vshufi32x4 $3,%ymm5,%ymm1,%ymm5 + vshufi32x4 $0,%ymm2,%ymm18,%ymm1 + vshufi32x4 $3,%ymm2,%ymm18,%ymm2 + vshufi32x4 $0,%ymm7,%ymm3,%ymm18 + vshufi32x4 $3,%ymm7,%ymm3,%ymm7 + vshufi32x4 $0,%ymm4,%ymm0,%ymm3 + vshufi32x4 $3,%ymm4,%ymm0,%ymm4 + vpaddd %ymm24,%ymm8,%ymm8 + vpaddd %ymm25,%ymm9,%ymm9 + vpaddd %ymm26,%ymm10,%ymm10 + vpaddd %ymm27,%ymm11,%ymm11 + + vpunpckldq %ymm9,%ymm8,%ymm6 + vpunpckldq %ymm11,%ymm10,%ymm0 + vpunpckhdq %ymm9,%ymm8,%ymm8 + vpunpckhdq %ymm11,%ymm10,%ymm10 + vpunpcklqdq %ymm0,%ymm6,%ymm9 + vpunpckhqdq %ymm0,%ymm6,%ymm6 + vpunpcklqdq %ymm10,%ymm8,%ymm11 + vpunpckhqdq %ymm10,%ymm8,%ymm8 + vpaddd %ymm28,%ymm12,%ymm12 + vpaddd %ymm29,%ymm13,%ymm13 + vpaddd %ymm30,%ymm14,%ymm14 + vpaddd %ymm31,%ymm15,%ymm15 + + vpunpckldq %ymm13,%ymm12,%ymm10 + vpunpckldq %ymm15,%ymm14,%ymm0 + vpunpckhdq %ymm13,%ymm12,%ymm12 + vpunpckhdq %ymm15,%ymm14,%ymm14 + vpunpcklqdq %ymm0,%ymm10,%ymm13 + vpunpckhqdq %ymm0,%ymm10,%ymm10 + vpunpcklqdq %ymm14,%ymm12,%ymm15 + vpunpckhqdq %ymm14,%ymm12,%ymm12 + vperm2i128 $0x20,%ymm13,%ymm9,%ymm0 + vperm2i128 $0x31,%ymm13,%ymm9,%ymm13 + vperm2i128 $0x20,%ymm10,%ymm6,%ymm9 + vperm2i128 $0x31,%ymm10,%ymm6,%ymm10 + vperm2i128 $0x20,%ymm15,%ymm11,%ymm6 + vperm2i128 $0x31,%ymm15,%ymm11,%ymm15 + vperm2i128 $0x20,%ymm12,%ymm8,%ymm11 + vperm2i128 $0x31,%ymm12,%ymm8,%ymm12 + cmpq $512,%rdx + jb .Ltail8xvl + + movl $0x80,%eax + vpxord 0(%rsi),%ymm19,%ymm19 + vpxor 32(%rsi),%ymm0,%ymm0 + vpxor 64(%rsi),%ymm5,%ymm5 + vpxor 96(%rsi),%ymm13,%ymm13 + leaq (%rsi,%rax,1),%rsi + vmovdqu32 %ymm19,0(%rdi) + vmovdqu %ymm0,32(%rdi) + vmovdqu %ymm5,64(%rdi) + vmovdqu %ymm13,96(%rdi) + leaq (%rdi,%rax,1),%rdi + + vpxor 0(%rsi),%ymm1,%ymm1 + vpxor 32(%rsi),%ymm9,%ymm9 + vpxor 64(%rsi),%ymm2,%ymm2 + vpxor 96(%rsi),%ymm10,%ymm10 + leaq (%rsi,%rax,1),%rsi + vmovdqu %ymm1,0(%rdi) + vmovdqu %ymm9,32(%rdi) + vmovdqu %ymm2,64(%rdi) + vmovdqu %ymm10,96(%rdi) + leaq (%rdi,%rax,1),%rdi + + vpxord 0(%rsi),%ymm18,%ymm18 + vpxor 32(%rsi),%ymm6,%ymm6 + vpxor 64(%rsi),%ymm7,%ymm7 + vpxor 96(%rsi),%ymm15,%ymm15 + leaq (%rsi,%rax,1),%rsi + vmovdqu32 %ymm18,0(%rdi) + vmovdqu %ymm6,32(%rdi) + vmovdqu %ymm7,64(%rdi) + vmovdqu %ymm15,96(%rdi) + leaq (%rdi,%rax,1),%rdi + + vpxor 0(%rsi),%ymm3,%ymm3 + vpxor 32(%rsi),%ymm11,%ymm11 + vpxor 64(%rsi),%ymm4,%ymm4 + vpxor 96(%rsi),%ymm12,%ymm12 + leaq (%rsi,%rax,1),%rsi + vmovdqu %ymm3,0(%rdi) + vmovdqu %ymm11,32(%rdi) + vmovdqu %ymm4,64(%rdi) + vmovdqu %ymm12,96(%rdi) + leaq (%rdi,%rax,1),%rdi + + vpbroadcastd 0(%r10),%ymm0 + vpbroadcastd 4(%r10),%ymm1 + + subq $512,%rdx + jnz .Loop_outer8xvl + + jmp .Ldone8xvl + +.align 32 +.Ltail8xvl: + vmovdqa64 %ymm19,%ymm8 + xorq %r10,%r10 + subq %rsi,%rdi + cmpq $64,%rdx + jb .Less_than_64_8xvl + vpxor 0(%rsi),%ymm8,%ymm8 + vpxor 32(%rsi),%ymm0,%ymm0 + vmovdqu %ymm8,0(%rdi,%rsi,1) + vmovdqu %ymm0,32(%rdi,%rsi,1) + je .Ldone8xvl + vmovdqa %ymm5,%ymm8 + vmovdqa %ymm13,%ymm0 + leaq 64(%rsi),%rsi + + cmpq $128,%rdx + jb .Less_than_64_8xvl + vpxor 0(%rsi),%ymm5,%ymm5 + vpxor 32(%rsi),%ymm13,%ymm13 + vmovdqu %ymm5,0(%rdi,%rsi,1) + vmovdqu %ymm13,32(%rdi,%rsi,1) + je .Ldone8xvl + vmovdqa %ymm1,%ymm8 + vmovdqa %ymm9,%ymm0 + leaq 64(%rsi),%rsi + + cmpq $192,%rdx + jb .Less_than_64_8xvl + vpxor 0(%rsi),%ymm1,%ymm1 + vpxor 32(%rsi),%ymm9,%ymm9 + vmovdqu %ymm1,0(%rdi,%rsi,1) + vmovdqu %ymm9,32(%rdi,%rsi,1) + je .Ldone8xvl + vmovdqa %ymm2,%ymm8 + vmovdqa %ymm10,%ymm0 + leaq 64(%rsi),%rsi + + cmpq $256,%rdx + jb .Less_than_64_8xvl + vpxor 0(%rsi),%ymm2,%ymm2 + vpxor 32(%rsi),%ymm10,%ymm10 + vmovdqu %ymm2,0(%rdi,%rsi,1) + vmovdqu %ymm10,32(%rdi,%rsi,1) + je .Ldone8xvl + vmovdqa32 %ymm18,%ymm8 + vmovdqa %ymm6,%ymm0 + leaq 64(%rsi),%rsi + + cmpq $320,%rdx + jb .Less_than_64_8xvl + vpxord 0(%rsi),%ymm18,%ymm18 + vpxor 32(%rsi),%ymm6,%ymm6 + vmovdqu32 %ymm18,0(%rdi,%rsi,1) + vmovdqu %ymm6,32(%rdi,%rsi,1) + je .Ldone8xvl + vmovdqa %ymm7,%ymm8 + vmovdqa %ymm15,%ymm0 + leaq 64(%rsi),%rsi + + cmpq $384,%rdx + jb .Less_than_64_8xvl + vpxor 0(%rsi),%ymm7,%ymm7 + vpxor 32(%rsi),%ymm15,%ymm15 + vmovdqu %ymm7,0(%rdi,%rsi,1) + vmovdqu %ymm15,32(%rdi,%rsi,1) + je .Ldone8xvl + vmovdqa %ymm3,%ymm8 + vmovdqa %ymm11,%ymm0 + leaq 64(%rsi),%rsi + + cmpq $448,%rdx + jb .Less_than_64_8xvl + vpxor 0(%rsi),%ymm3,%ymm3 + vpxor 32(%rsi),%ymm11,%ymm11 + vmovdqu %ymm3,0(%rdi,%rsi,1) + vmovdqu %ymm11,32(%rdi,%rsi,1) + je .Ldone8xvl + vmovdqa %ymm4,%ymm8 + vmovdqa %ymm12,%ymm0 + leaq 64(%rsi),%rsi + +.Less_than_64_8xvl: + vmovdqa %ymm8,0(%rsp) + vmovdqa %ymm0,32(%rsp) + leaq (%rdi,%rsi,1),%rdi + andq $63,%rdx + +.Loop_tail8xvl: + movzbl (%rsi,%r10,1),%eax + movzbl (%rsp,%r10,1),%ecx + leaq 1(%r10),%r10 + xorl %ecx,%eax + movb %al,-1(%rdi,%r10,1) + decq %rdx + jnz .Loop_tail8xvl + + vpxor %ymm8,%ymm8,%ymm8 + vmovdqa %ymm8,0(%rsp) + vmovdqa %ymm8,32(%rsp) + +.Ldone8xvl: + vzeroall + leaq (%r9),%rsp +.cfi_def_cfa_register %rsp +.L8xvl_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ChaCha20_8xvl,.-ChaCha20_8xvl diff --git a/contrib/openssl-cmake/asm/crypto/ec/ecp_nistz256-armv8.S b/contrib/openssl-cmake/asm/crypto/ec/ecp_nistz256-armv8.S new file mode 100644 index 000000000000..5e97928ee122 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/ec/ecp_nistz256-armv8.S @@ -0,0 +1,4288 @@ +#include "arm_arch.h" + +.section .rodata +.globl ecp_nistz256_precomputed +.type ecp_nistz256_precomputed,%object +.align 12 +ecp_nistz256_precomputed: +.byte 0x3c,0x4d,0x27,0xcc,0xf5,0x4a,0x4f,0x8f,0xe8,0xc8,0x04,0x68,0x09,0x4a,0x5b,0x80,0x9d,0x7a,0xe8,0x31,0x08,0x76,0x68,0x19,0x9f,0x08,0xb4,0x1f,0x32,0x43,0x89,0xd8,0x34,0xd3,0xf5,0xb7,0xb5,0xee,0x42,0x3e,0x91,0x01,0x06,0x7c,0xbf,0xd9,0x97,0x12,0xd3,0x1a,0xc9,0x04,0x8d,0x53,0x83,0x14,0x28,0xf0,0x8e,0x19,0xcc,0x91,0xe5,0x80 +.byte 0x14,0xd6,0xc1,0x8d,0x61,0x66,0x3b,0xa7,0x20,0x1e,0xe4,0x77,0xd7,0x66,0x05,0xfb,0x5c,0xa9,0x9a,0x7a,0xb2,0x30,0x50,0x28,0x87,0x80,0xfe,0xcd,0xe1,0xb3,0xff,0xa3,0x45,0x3c,0x7e,0x9b,0x08,0xc0,0xc1,0x9f,0x2e,0xad,0x7d,0x89,0x79,0x90,0x60,0xc6,0xac,0x17,0x64,0x59,0x4d,0xcf,0x56,0x7a,0xca,0x82,0xaa,0x6e,0x04,0x2f,0x1f,0x8b +.byte 0xa9,0xdd,0xeb,0x91,0x5c,0x77,0x17,0x99,0x4e,0xc2,0x45,0x69,0x2e,0xcf,0x60,0xc6,0x3c,0xad,0x65,0x33,0x35,0x6f,0xe4,0xd0,0x37,0x1f,0xe2,0x2c,0x66,0x98,0x55,0xe3,0x66,0xa2,0xc6,0x21,0xce,0x63,0x59,0x2e,0xd2,0x2b,0x8a,0x5a,0xcd,0xee,0xa7,0xad,0xf6,0x8c,0x3f,0x44,0x6c,0x12,0x30,0x8d,0xca,0xea,0x46,0x8a,0x4c,0x96,0xf9,0x96 +.byte 0x18,0x10,0x4e,0x46,0xc4,0x3e,0xa0,0x94,0x26,0x9d,0x62,0xd2,0x4b,0xb0,0xbc,0x0b,0xd5,0x56,0xa5,0xd2,0xc1,0x2f,0x2d,0x15,0xd8,0xed,0x97,0x17,0xcb,0x32,0x67,0xc5,0x0f,0x7c,0xde,0xa8,0x8c,0x4d,0xa0,0xb8,0x2e,0xed,0x24,0xd5,0xd5,0x49,0xca,0x77,0x1f,0x48,0x3b,0x83,0x54,0xb2,0xe7,0x7e,0x7a,0xa7,0x5c,0xed,0x7f,0xa1,0x9f,0x05 +.byte 0xd4,0xd4,0x90,0x0d,0xae,0x37,0x4e,0xd1,0x8f,0xd1,0x0a,0xa7,0x63,0x5b,0xb7,0x65,0xcb,0xc8,0xba,0x29,0xec,0x35,0x53,0xb2,0xac,0x32,0xf4,0xb7,0x6a,0xb1,0x69,0xcf,0x56,0x14,0x7f,0xd6,0xc5,0xca,0x88,0x1d,0x49,0xcf,0xfd,0x1f,0xcc,0xb1,0x13,0x30,0x42,0xd0,0x1c,0x6e,0x38,0x8e,0xf9,0x40,0xe7,0xe8,0xd6,0x28,0x1a,0x75,0x31,0xf3 +.byte 0x30,0x46,0x3f,0xb5,0x8a,0x47,0x35,0x4c,0x6e,0xdb,0x26,0x1a,0x25,0xa3,0xd8,0x0b,0x1d,0x51,0x12,0x91,0x4c,0x11,0x76,0x83,0x19,0xad,0x2a,0x3e,0xb4,0x1c,0x3c,0xfc,0x14,0x20,0x84,0x58,0x7b,0xc3,0x94,0x68,0x60,0x5c,0x3f,0x7c,0x26,0xb5,0x75,0x41,0x0b,0xc2,0xec,0xf3,0x96,0x5b,0xbb,0x41,0x32,0x00,0x4e,0x68,0xeb,0xf1,0xd9,0x96 +.byte 0xe7,0x00,0xac,0xb0,0x1b,0x39,0x46,0xf1,0xc9,0x18,0x7d,0xb7,0xc4,0x42,0xbc,0x8b,0x09,0x3e,0xa9,0x97,0x2e,0xc6,0xf8,0x38,0xa3,0xe4,0x2c,0x52,0x5d,0x24,0xf7,0xc5,0x15,0xab,0x16,0x5e,0x46,0x2c,0xd8,0xd7,0x4d,0xb3,0xf2,0xfd,0xe4,0x75,0x3c,0x34,0x95,0xb9,0x8c,0x92,0x35,0x42,0x8b,0xc4,0xc8,0x6c,0xd4,0x1e,0x67,0x35,0xd3,0x6d +.byte 0x79,0x85,0xff,0x74,0xbe,0x40,0x07,0x27,0x75,0x2c,0xea,0x04,0xcc,0xa2,0x72,0x80,0x97,0x5f,0xfe,0x8a,0x56,0x0f,0xf4,0x6d,0xa4,0x61,0x04,0x4b,0x5e,0xb4,0xe2,0xd8,0x87,0xb6,0xfd,0x3d,0x00,0x8a,0xa9,0xe4,0x62,0x5f,0x4f,0xec,0x1e,0x40,0x28,0x6b,0x21,0x0f,0x50,0x26,0x97,0xa0,0x25,0x8f,0x3e,0xf2,0x69,0xdc,0x36,0xe5,0xb8,0xdb +.byte 0x01,0x7d,0xfb,0x73,0x7d,0x3e,0xf7,0x55,0x41,0x39,0xe0,0x33,0x0d,0xe3,0x4b,0x6b,0x7b,0x3e,0x6e,0xdc,0x7d,0x9a,0x6e,0x35,0xb0,0x38,0x13,0x92,0x80,0xa1,0xe6,0xbf,0x03,0x9d,0xb7,0x7f,0x55,0xce,0x46,0x3c,0x22,0xc7,0xfa,0xfb,0x18,0xba,0x06,0xa0,0x09,0x78,0x3f,0xc0,0x79,0x5f,0xe6,0x6a,0x29,0xaf,0xd1,0xc7,0x84,0xa7,0xed,0xb9 +.byte 0xb6,0x82,0x81,0xc1,0x53,0xee,0x00,0x34,0xa8,0x81,0xdf,0x5a,0xd3,0x07,0x7e,0x2e,0x17,0x40,0xa1,0x2b,0xf4,0x2a,0x1f,0x9a,0x67,0x75,0x73,0xa8,0x58,0x65,0x17,0xdf,0xf1,0x84,0x76,0xc5,0x8d,0x48,0x93,0xe1,0x28,0xa5,0x73,0x10,0x6e,0x9e,0x39,0x03,0x69,0x52,0xdf,0xf9,0x46,0x7c,0x5b,0xf3,0x5b,0x9a,0x63,0xd9,0x4f,0xf5,0x8e,0x73 +.byte 0xed,0x33,0x7d,0x23,0xb9,0x6c,0x3c,0x9b,0xa7,0xcf,0x7f,0x34,0x6f,0x97,0xe2,0xfe,0x0a,0x8b,0xe1,0x86,0x83,0x91,0x2e,0xdd,0x6b,0xb1,0xbf,0xa6,0x92,0x4f,0x30,0x79,0x68,0x91,0x3e,0x06,0x17,0xe9,0x0b,0x25,0x07,0xa6,0x88,0x91,0x6c,0x6e,0xc8,0xd8,0xdc,0x68,0x5e,0x45,0xf2,0x55,0xef,0x56,0x38,0x29,0xd0,0x89,0x40,0x58,0x51,0x9f +.byte 0x5f,0xa4,0x08,0xc6,0x94,0x34,0xd2,0x6f,0x59,0x0f,0x6e,0xca,0x85,0x7f,0x56,0x3f,0xac,0x8f,0x25,0x0f,0x47,0xe3,0x9e,0x40,0xed,0xd8,0xae,0x30,0x0d,0xb4,0x47,0x40,0x4b,0xa3,0x23,0x1b,0x7f,0x0f,0xff,0xdf,0x6f,0x1d,0x87,0xb2,0x94,0xa0,0x36,0xbb,0x53,0x13,0x1e,0xaf,0x92,0xf8,0x07,0x95,0xc7,0xe4,0xa8,0x41,0xa9,0xed,0xf0,0x08 +.byte 0xfc,0xc1,0x4a,0xed,0x9a,0x4f,0x13,0xc5,0xed,0x8a,0x95,0xf5,0x69,0xf7,0xee,0x75,0xb6,0x4d,0xba,0x8f,0x65,0x23,0xe8,0x50,0x9e,0x7a,0xd7,0x28,0x3a,0x49,0xe7,0x4c,0x7c,0xc6,0x64,0xbd,0x8c,0x17,0x14,0x0b,0xb5,0xe3,0xb4,0xab,0x0b,0x9a,0xa9,0x29,0x84,0xaa,0xba,0x69,0xc4,0x2e,0xbf,0xca,0x57,0x0d,0xd3,0x36,0x21,0x61,0x00,0x13 +.byte 0x95,0xe3,0xf8,0xa6,0x64,0x74,0x02,0xb5,0xbf,0x86,0x07,0xde,0x67,0x48,0x23,0xe0,0x24,0x96,0x3a,0x86,0xb2,0xfa,0xa7,0x75,0xb4,0x26,0x42,0xcb,0x96,0x4e,0xf7,0x90,0xae,0xa5,0xe4,0xd0,0x45,0x31,0xe7,0x0f,0xe0,0xcb,0xbf,0x94,0x94,0x33,0x4f,0x65,0x04,0xfb,0xc0,0xc4,0x3f,0x51,0xa5,0xf3,0xea,0xc8,0xd5,0x23,0x66,0xe0,0x48,0x09 +.byte 0xba,0x6a,0x27,0x50,0xec,0xae,0xd2,0x2a,0xe6,0xf9,0xe4,0xde,0x35,0x6e,0xcc,0x82,0x76,0xfc,0x36,0x16,0xe1,0x9f,0xc7,0x0d,0xc1,0xc9,0x6a,0x23,0xbe,0xa1,0x3c,0xfd,0xce,0xa7,0x2e,0x91,0x36,0x23,0x5a,0x20,0xdf,0x55,0xc5,0x91,0x32,0x5c,0x62,0x49,0xe7,0x8b,0x0b,0x0e,0x9c,0x2e,0xee,0x1f,0xfe,0xca,0x00,0xfc,0x55,0xd7,0x9c,0x0a +.byte 0x75,0xaa,0xb0,0x46,0x90,0x55,0x2b,0x46,0xab,0x98,0x9d,0xab,0x0e,0x12,0x03,0x58,0xf1,0x4a,0x68,0x59,0x74,0xc9,0x37,0x6d,0x6f,0xe6,0xd3,0x73,0xf1,0xa3,0xdd,0xbe,0x85,0xca,0x74,0xc6,0xb6,0x51,0x6f,0x83,0x6f,0xa1,0x80,0x00,0x00,0x78,0x0a,0xa7,0xff,0xa7,0xe2,0x2e,0x5f,0x4f,0x31,0xbb,0x1b,0x99,0x21,0x33,0x59,0x6e,0x03,0x38 +.byte 0x10,0xd9,0x98,0xf2,0x0c,0xad,0x08,0x6b,0x00,0x49,0xb5,0x5e,0x11,0x60,0x70,0x49,0xff,0x79,0xac,0xba,0x30,0x3d,0x69,0x9f,0xaf,0xfb,0xd7,0xeb,0xe2,0xcd,0x0d,0x97,0xb9,0x94,0xc8,0x6e,0x06,0x3b,0x64,0x80,0x71,0x8f,0x81,0xb0,0x58,0xe0,0xc7,0xbd,0x27,0x6a,0xd4,0xb7,0xd9,0x6c,0xc1,0x44,0x38,0xe1,0x36,0xbc,0x0a,0x33,0x26,0x01 +.byte 0x25,0x90,0xbc,0x0a,0xc2,0xa3,0xbb,0xfc,0xeb,0x0b,0x1a,0x38,0x98,0x26,0x93,0xf5,0x2d,0x29,0x41,0x83,0x3b,0xba,0x40,0x46,0xf3,0xf6,0xfd,0x53,0xb9,0x7a,0x60,0x01,0x8a,0x8d,0xb4,0x57,0xd8,0xf3,0x36,0x72,0x22,0x2f,0x59,0xd3,0x7f,0x25,0xf2,0x05,0x61,0xfa,0x18,0x28,0xac,0xd5,0x14,0x00,0xaf,0x8b,0x7c,0x39,0xb5,0xa2,0xcb,0x1e +.byte 0x62,0x14,0xcb,0x10,0x76,0x17,0x23,0x2c,0xc8,0x25,0xac,0x37,0x9e,0x83,0x81,0x83,0xfe,0x2e,0x2c,0xd2,0x3f,0xf8,0x58,0x2b,0xf1,0x7f,0x4f,0xe1,0x17,0xc7,0xf7,0xad,0x57,0x67,0xc2,0x57,0x77,0x2e,0xfb,0xf2,0xce,0xa9,0x74,0x81,0x47,0xf8,0x5a,0x88,0x76,0xb1,0x43,0x75,0xc8,0xc4,0xc8,0x60,0x1e,0xd7,0xd1,0x1c,0xce,0x89,0x82,0xc6 +.byte 0x77,0x8d,0x87,0xe8,0xd0,0x5b,0x0c,0xf0,0x44,0x48,0x8d,0xee,0x55,0xc6,0xe4,0x2c,0x2c,0x41,0x75,0x5d,0x5a,0xd2,0xa3,0x1d,0x32,0x85,0x08,0xcf,0x03,0x3a,0x3c,0xfe,0x65,0x75,0xef,0xd2,0xa6,0x22,0x16,0x66,0x39,0x30,0x05,0xe3,0x57,0xab,0x71,0x6d,0x28,0xd5,0x2f,0xc6,0xa8,0x25,0x46,0x14,0xfd,0x7e,0xa2,0x67,0x7e,0x20,0x91,0xc2 +.byte 0x2b,0x03,0xdd,0xac,0xaa,0x1a,0xb5,0x2a,0x04,0xd6,0x15,0x9d,0x3f,0x54,0x24,0x7c,0x75,0xab,0x77,0xd9,0x6c,0x85,0xa2,0xf9,0x33,0xeb,0xeb,0xc0,0x27,0xcd,0x9d,0x58,0xae,0xa3,0x34,0x10,0xae,0x85,0x7d,0x4c,0x15,0x4c,0x90,0x46,0xe0,0x5b,0xec,0xa7,0xb2,0x68,0x85,0x01,0xed,0xf9,0x4a,0x85,0xe3,0xb6,0xea,0xe2,0x53,0xc0,0x32,0x83 +.byte 0x73,0x05,0x77,0xac,0xb5,0x96,0xaa,0xf0,0x9c,0x2c,0xa4,0xd2,0xd4,0xbf,0x74,0x2f,0x39,0x47,0x22,0x99,0x50,0x06,0x5f,0xcb,0x99,0xc5,0xc9,0x2e,0x70,0xd6,0x68,0x6a,0xc4,0x73,0x41,0xcb,0x8b,0xfd,0x23,0x98,0x11,0x59,0xad,0x20,0x8a,0x0d,0xaf,0xaa,0xd0,0xe2,0xeb,0x32,0x8b,0x6f,0x0e,0x43,0x12,0xe3,0x27,0x8f,0xf6,0xa4,0x76,0x0b +.byte 0xfb,0x22,0xad,0xda,0x1c,0x0a,0x3e,0x90,0xc0,0x7d,0xf3,0x09,0xbc,0x17,0x33,0xef,0xf1,0xf2,0x84,0x80,0x2a,0x0b,0x82,0xd7,0x95,0xc7,0xd2,0x08,0x4a,0xf4,0xf5,0x6d,0x09,0x06,0x8e,0xe4,0x74,0x63,0x8f,0x09,0xca,0xe2,0xd9,0x0e,0x1e,0x03,0x20,0x1b,0x4c,0xfb,0x1d,0x5a,0x2e,0x28,0xeb,0x84,0x82,0x6f,0x97,0x6f,0xcd,0x7a,0xc3,0xa7 +.byte 0x79,0x73,0x66,0x0c,0x94,0xd5,0xf4,0x8f,0x2c,0x73,0x1f,0x24,0xbc,0x17,0xee,0xd5,0xb0,0xa6,0xb8,0x04,0x6d,0x6a,0xd0,0x61,0xe3,0x1a,0x49,0x97,0x94,0xc5,0x8e,0xbc,0xac,0x5b,0x0b,0x0a,0xc5,0x74,0x06,0x89,0xee,0xc2,0xb7,0x5f,0x1b,0xa1,0x6b,0x1a,0xff,0xed,0xda,0x90,0x91,0xc1,0x0d,0x6a,0x06,0xd6,0xcb,0x02,0x71,0x17,0x95,0x7d +.byte 0xc6,0x3b,0x7e,0x6b,0xc8,0x73,0x03,0x0d,0x6b,0x8f,0x73,0x56,0x59,0x2e,0x09,0x23,0x4e,0xda,0xfc,0x4e,0xfc,0xa4,0x42,0x15,0x2e,0x10,0x6a,0x97,0x48,0x3c,0xb4,0xa4,0x0c,0x64,0x21,0xc3,0xeb,0x6c,0xac,0x27,0x4f,0x43,0x94,0x91,0x78,0xdc,0xfd,0xad,0x2b,0xa7,0x43,0x42,0xb0,0x51,0xdd,0x63,0xcc,0xcd,0xb7,0x15,0xfa,0x13,0x8d,0xc7 +.byte 0x55,0x3a,0x74,0x17,0x23,0x36,0x3e,0x23,0xe1,0x42,0x90,0xe1,0xb7,0xc7,0xda,0xb7,0x57,0xeb,0xc3,0xfb,0x62,0x58,0xbf,0x31,0x2a,0xfb,0xc7,0xdb,0x3d,0xfc,0x87,0x32,0xb1,0x3e,0xe5,0x3d,0x94,0x3d,0x86,0x32,0x61,0xfe,0x19,0xd2,0x32,0x31,0x8b,0x43,0xdb,0xab,0xa4,0xe5,0x34,0xc8,0x30,0xae,0x8c,0x02,0x53,0x99,0x35,0xb4,0x56,0x38 +.byte 0x37,0xcf,0xff,0xb0,0x05,0x21,0x12,0x65,0xc4,0xb3,0x9c,0x83,0x95,0x12,0xd3,0x03,0x7a,0x80,0x97,0x5b,0x67,0x33,0x27,0xfc,0x43,0xf2,0xf7,0xaa,0x60,0xb6,0xfc,0x55,0x44,0x30,0xa3,0x4a,0xa3,0x60,0x31,0xf7,0x01,0xfa,0xb0,0x8d,0x82,0x29,0xa7,0x03,0xb7,0x7e,0x3f,0xe5,0x66,0x26,0xb7,0x51,0xcf,0x8d,0xdd,0x6f,0x83,0x39,0xfc,0x9b +.byte 0xa5,0x3d,0xb6,0x41,0x89,0x54,0xc3,0xb2,0xf0,0x24,0x64,0xcb,0x53,0xfd,0x0a,0x91,0x6c,0x6f,0x28,0xfe,0xc1,0xe9,0x17,0x2e,0x65,0x55,0x2e,0xf2,0x48,0x52,0xb1,0x69,0xf0,0xdd,0x42,0xd5,0xdf,0x7c,0x36,0x75,0xdb,0x5b,0x3d,0xa9,0x6d,0xa4,0xeb,0x47,0x4f,0x2b,0x5c,0xd0,0x30,0xee,0xa7,0x74,0x6a,0x64,0x8a,0xbc,0x9b,0xe5,0x82,0x56 +.byte 0x76,0xe4,0x3f,0xf5,0x05,0x59,0x19,0x1e,0x80,0x47,0xf1,0x77,0xac,0x32,0x43,0x80,0x0a,0x1b,0x28,0xb6,0xf4,0xe8,0x7c,0x2f,0xeb,0xa8,0x4b,0x6a,0x59,0xb5,0xf8,0x77,0x68,0xd4,0x86,0x6c,0x87,0xdc,0xc4,0x00,0x4f,0xce,0xdb,0xf6,0x34,0xc3,0x74,0x02,0x08,0xdb,0x0d,0x34,0x8d,0xea,0x49,0x4a,0x30,0x5f,0x1b,0xcd,0xa6,0x3a,0x34,0x94 +.byte 0x5f,0x32,0x6a,0x62,0x96,0x4b,0x51,0x89,0x30,0xc9,0x90,0xdf,0x77,0x73,0x0e,0x3c,0x5c,0xbd,0x5c,0xee,0xd9,0x77,0xea,0x23,0x42,0xaa,0xa5,0x6b,0xf9,0x8c,0xc4,0x70,0x68,0xdd,0x0b,0x65,0xa3,0xc7,0xe4,0x7b,0x0a,0x89,0x85,0x25,0x7d,0x84,0x99,0x39,0xe6,0xb8,0xbe,0x7f,0x31,0x0f,0x84,0x0c,0x98,0x72,0xab,0x4c,0x44,0xb0,0xa4,0x83 +.byte 0x90,0xbb,0x93,0x73,0x07,0x07,0xba,0x63,0x5b,0x61,0x70,0xe1,0x84,0xae,0xaa,0xd6,0xa3,0x5a,0x54,0xd1,0xea,0xc7,0x2c,0x7b,0x67,0x4b,0x8a,0x7f,0x66,0x28,0x8d,0x22,0xec,0x82,0x64,0x69,0x63,0xf0,0x53,0x2d,0x10,0x9c,0x9c,0x34,0x4f,0xc6,0x96,0x40,0xdb,0xce,0x0e,0xf7,0x3a,0x8a,0xee,0x3f,0x32,0x5f,0x2b,0x0c,0x4a,0xbc,0x63,0xfb +.byte 0x18,0xf6,0x26,0x57,0xc9,0x13,0x13,0xb7,0xe0,0xcc,0x3e,0x4e,0x73,0xfa,0xe2,0x54,0xc1,0x67,0xfe,0xe2,0xec,0xfd,0xaf,0xf9,0x96,0x99,0x9f,0xe9,0xe2,0xd0,0x94,0x39,0x33,0xc9,0xca,0x35,0x27,0xad,0x58,0x46,0x98,0x64,0x17,0x5f,0xe9,0xce,0x4b,0xc8,0xab,0x0d,0xd2,0x88,0xec,0xbb,0x5c,0xba,0xc1,0x30,0x4c,0xd4,0x99,0x0d,0x07,0x95 +.byte 0x0a,0xa5,0xeb,0xa6,0x10,0x4b,0x4d,0x77,0x14,0x76,0x88,0x43,0x7f,0x6b,0x5d,0x9b,0x87,0x1d,0x6b,0x5d,0xb9,0x04,0xa9,0xc7,0x28,0x18,0x70,0xa1,0x99,0xbc,0x99,0xf5,0xf1,0x71,0xa9,0x3a,0xb6,0xe5,0x98,0x98,0x8f,0x7a,0x6c,0xda,0x1a,0x63,0x0e,0xf1,0xe8,0x10,0xa3,0x7c,0x64,0x7e,0xde,0x2a,0x59,0x1b,0x04,0xca,0x69,0x8e,0xba,0x2f +.byte 0x56,0xe1,0xa7,0xab,0x4f,0xe4,0x9d,0x49,0x33,0x9e,0x4e,0x5b,0xe1,0x58,0xc4,0x3f,0x99,0x5a,0x69,0x00,0xe5,0x5f,0x85,0xcb,0x62,0x80,0x5e,0x3d,0x88,0x0a,0x32,0x42,0xc1,0xf9,0x6a,0xa0,0xeb,0x65,0x2f,0x17,0x62,0x25,0x96,0x50,0xa2,0x6e,0xd6,0xdf,0x09,0xb7,0x1e,0x68,0xb2,0x10,0x2b,0xf3,0x9e,0xb2,0x67,0x75,0x9b,0xe3,0x76,0xfe +.byte 0x95,0xbe,0x83,0xcb,0xba,0x77,0x5b,0x2d,0x5f,0xdd,0x94,0xbb,0x0e,0x5d,0x83,0xa2,0xe7,0x48,0x4c,0x84,0x86,0x41,0x47,0x4b,0x96,0x24,0x89,0xa8,0x20,0x04,0xa5,0xef,0x8e,0xb6,0xeb,0xcd,0x3c,0x77,0xc5,0x65,0x5c,0xff,0xa6,0x0d,0x2b,0x58,0x21,0x5a,0x11,0xe2,0x24,0x64,0x1c,0xd6,0x18,0x9a,0xac,0x3f,0x42,0x0e,0xeb,0x32,0x3e,0xed +.byte 0xce,0x61,0xc9,0xe4,0xe7,0xd3,0x3f,0x53,0xa4,0x80,0x2b,0x1c,0xc0,0x99,0x63,0x52,0x93,0x5e,0xdc,0x78,0xe2,0x35,0x9e,0xb2,0xb4,0x1d,0x09,0xd1,0x5c,0x1c,0x4e,0xdb,0x3a,0x5d,0x8c,0x94,0x7d,0xfe,0x63,0xf2,0xa3,0xe9,0x61,0x73,0x78,0xc1,0xd9,0x17,0x5e,0x9a,0x73,0x58,0xc3,0xe7,0xa0,0x1f,0x2a,0x62,0x15,0xf8,0xdb,0xbb,0x38,0x80 +.byte 0x57,0xd3,0x1f,0x4c,0x4a,0x20,0x30,0xa9,0x7a,0x78,0x61,0xd9,0x90,0xb7,0x4f,0xd6,0x46,0x72,0xe7,0x41,0xb2,0xbb,0xfb,0x50,0xfe,0xe1,0xba,0x3e,0x73,0x2f,0x81,0x6d,0x2b,0x0b,0x90,0xbd,0x8a,0x3b,0x23,0x88,0xa2,0x7d,0x62,0x87,0x96,0xc9,0xcc,0x66,0x28,0x89,0xa7,0x29,0x41,0xd2,0xc5,0x5b,0xdb,0xc4,0x0c,0xbb,0x19,0x4e,0xd5,0x12 +.byte 0x53,0x48,0x5c,0xf2,0x9b,0x62,0xd0,0xa3,0x77,0x40,0x85,0x12,0x2b,0x2d,0x52,0x1b,0x31,0xbd,0xe9,0x1c,0xd4,0x87,0xa4,0xd7,0xc9,0x14,0xb7,0x39,0x66,0x8c,0xfe,0x3e,0x83,0x00,0x01,0xae,0x44,0x2d,0x7d,0xa1,0xda,0x66,0xb0,0x66,0xcb,0x62,0x55,0x9f,0x92,0x80,0x4e,0x8d,0x7f,0x70,0x95,0xc2,0xf2,0x1b,0xe9,0x35,0xf8,0x42,0x04,0x65 +.byte 0xf2,0x36,0x4c,0x96,0x30,0xd3,0x47,0x9d,0xb7,0x2b,0x76,0xac,0x75,0xb5,0xb8,0xf1,0x7d,0xa2,0x36,0xef,0x9d,0xa7,0x60,0x51,0x8d,0xcf,0x00,0x3d,0xdb,0xcc,0xe9,0xe2,0xc4,0x7b,0x3a,0xeb,0x2b,0xc3,0xd8,0x0b,0xb0,0x58,0x41,0xa0,0x47,0xab,0x07,0xf5,0x7c,0x9e,0x0b,0x7a,0x16,0x8f,0xb4,0xca,0x09,0xed,0x84,0xa1,0xfa,0xdc,0x7c,0x3c +.byte 0xdd,0x2f,0xb0,0x2d,0xeb,0x93,0x28,0xf5,0x1e,0x0c,0x1a,0x0c,0x35,0x27,0x40,0xf2,0x22,0x66,0x2d,0x82,0xf2,0x94,0x03,0xa5,0x4b,0x84,0x92,0x1d,0x98,0xd5,0xd9,0x09,0x6a,0xfd,0x65,0xe5,0xa1,0x0e,0xe2,0xd9,0xb6,0xd1,0xba,0xbf,0xc7,0x42,0x22,0x39,0x83,0xbf,0x37,0xf6,0x80,0xc2,0xea,0xdf,0xb9,0x33,0xa0,0xaf,0xd7,0xe3,0x70,0x9a +.byte 0x5c,0xf8,0x1a,0x47,0x2b,0xb5,0xdd,0x15,0xe3,0x08,0xc8,0x37,0xe3,0xc2,0x25,0x87,0x0e,0x3c,0xc5,0xae,0x61,0xa4,0x4a,0x56,0x50,0x08,0x58,0x68,0xa3,0x4a,0x28,0x08,0xef,0x92,0xd5,0x13,0x50,0x09,0x76,0x34,0x47,0xae,0xa8,0x7f,0xa5,0x2b,0x13,0xb7,0x5a,0x96,0x65,0x62,0xf2,0xaa,0xb4,0x4b,0x2a,0xad,0xea,0x2c,0x0d,0x1e,0x97,0x82 +.byte 0xe4,0x6f,0xfe,0xf4,0x88,0x14,0x7b,0xba,0x45,0xbe,0x61,0x56,0xd2,0x37,0x1b,0x65,0xb8,0x0b,0x77,0xcb,0x3c,0xfe,0x9f,0xe3,0x39,0xc5,0xfb,0x2a,0x18,0x9b,0x60,0x99,0xd5,0x6f,0x52,0xfe,0xd8,0x04,0x88,0x1c,0x9a,0x50,0xe5,0x3b,0x33,0x3f,0xca,0xc5,0x5b,0x9c,0x5f,0x35,0x13,0x65,0xa6,0x21,0x78,0x19,0xeb,0xff,0x35,0x70,0x81,0xaf +.byte 0x19,0x23,0x61,0xd6,0xeb,0xff,0xa6,0x9e,0x5d,0x3f,0x7f,0x89,0x2e,0x22,0xa4,0x0b,0x9c,0x4f,0xa9,0xff,0xbb,0x23,0x29,0xa1,0xf4,0x8a,0xb7,0x4b,0xfb,0xbf,0xeb,0x0a,0x47,0x87,0x78,0x2b,0x20,0x38,0x82,0xab,0x7e,0x2c,0xdc,0x08,0x2b,0xb4,0xae,0xd8,0x64,0x44,0x1a,0xdf,0x21,0x62,0x27,0xf2,0x61,0x63,0x37,0xad,0xd4,0x06,0x4e,0xae +.byte 0xba,0xeb,0x08,0xfa,0xe5,0xad,0x5d,0xcf,0xce,0x38,0xe5,0xca,0x74,0x83,0x42,0x4b,0xe8,0x8f,0xfb,0xff,0x83,0x4d,0x27,0x88,0x43,0x62,0xdd,0x80,0xa2,0x06,0x98,0x48,0x58,0x6f,0x54,0x16,0x6f,0xbf,0x81,0x36,0xc8,0xf3,0xea,0x4b,0xf7,0x5a,0x7b,0xb7,0xf4,0xa4,0x5e,0x22,0x52,0xe7,0x9e,0xb1,0xb6,0x7a,0xa8,0x22,0xee,0x68,0x82,0x8f +.byte 0xe4,0xcb,0xad,0x71,0xef,0x53,0xf2,0x7d,0xed,0x91,0x9e,0xf6,0x90,0x9e,0x54,0x19,0x30,0xaf,0x4a,0x17,0xc0,0x6a,0x9c,0x49,0x12,0x8b,0x6f,0xc7,0x47,0x1e,0xa2,0x64,0x28,0x1f,0x0c,0xd3,0x3e,0x59,0x66,0x8c,0x2e,0x11,0x52,0x6c,0x69,0x66,0x10,0xfb,0x27,0xe6,0x1c,0xae,0x6f,0x44,0x87,0x86,0x0d,0x3e,0xd3,0xa0,0x80,0xef,0x30,0xb9 +.byte 0xb8,0xd7,0x47,0x84,0x68,0x2b,0xf2,0x32,0x7b,0x89,0x93,0xd2,0x83,0x56,0x35,0xc3,0xbf,0x5c,0x24,0xec,0xad,0x2d,0xa4,0x49,0x63,0x89,0xc6,0xf9,0x24,0x51,0x1c,0x9b,0xd1,0xcb,0x30,0x82,0xda,0xb3,0xa7,0xe1,0x4d,0x96,0xd0,0x44,0x44,0x1d,0x4e,0xd7,0x7d,0x7a,0x51,0x2e,0x2f,0xc4,0x9f,0xdb,0x06,0x53,0xfc,0x51,0x56,0xe5,0xb9,0x6b +.byte 0x4a,0x2c,0x3e,0x62,0xc5,0x9c,0x42,0xe3,0xaf,0x3a,0x0f,0x0e,0x74,0x29,0x66,0x70,0x75,0x2a,0x06,0xd4,0x0f,0x0c,0xfd,0xea,0xcc,0x39,0xd0,0xa7,0x47,0x75,0x92,0x44,0x09,0xa2,0x3c,0x4e,0xad,0xaa,0xc4,0xc6,0xf9,0x35,0x82,0x23,0x25,0x43,0x94,0x26,0x14,0xde,0xf1,0xb9,0xb8,0xe0,0x75,0xe0,0x48,0x70,0x8a,0xc6,0x3c,0x72,0x98,0x72 +.byte 0x8b,0x15,0x58,0x17,0x73,0x29,0x67,0x21,0x56,0xc4,0x25,0x17,0x68,0xbe,0xd7,0x36,0x05,0x4b,0x58,0xa2,0x1b,0x64,0xe5,0x11,0x96,0x5a,0x3b,0xa6,0x90,0xb6,0x2d,0x7e,0x55,0xbb,0x31,0x93,0xe7,0xcc,0x2e,0x74,0xb6,0x9b,0x4d,0x04,0xc5,0x45,0x9b,0x0b,0x26,0xef,0x61,0x23,0x3d,0x7e,0xee,0x01,0x57,0xfa,0x77,0x12,0x47,0x64,0xac,0x8f +.byte 0x25,0xbe,0x8e,0x2e,0x68,0x11,0x95,0xf0,0x1a,0xd2,0x3d,0x66,0xc1,0xdb,0x97,0x9e,0xbb,0xba,0xc1,0x66,0xa4,0xb5,0x71,0x01,0xee,0xf5,0xbb,0x1e,0x9f,0x41,0xfc,0x40,0x74,0x26,0xf7,0xc6,0x2c,0x9c,0x1c,0x59,0xce,0xcf,0x18,0x17,0x81,0x5d,0xd4,0xe3,0xd8,0x46,0x62,0x9e,0x97,0xb1,0xca,0xac,0x01,0x3e,0xf8,0x96,0xa2,0xee,0xe0,0xf8 +.byte 0xf3,0x2d,0xe9,0xd2,0x1f,0x9f,0x41,0xbb,0x2f,0xe5,0x64,0x6d,0x5b,0xe7,0x47,0x0e,0x83,0x7b,0x08,0x5e,0x29,0x35,0x2f,0x75,0x31,0x44,0x4c,0xb7,0x61,0xa4,0x03,0x2e,0x15,0x94,0x7a,0xa0,0x46,0x31,0x7b,0x43,0xd9,0x14,0xa3,0x34,0x0c,0x83,0x93,0x75,0x8e,0x3a,0x1c,0xc3,0xe1,0x36,0x18,0x96,0x7a,0xfb,0x77,0xad,0xbb,0xe9,0x0d,0x4b +.byte 0x21,0x04,0x2e,0xdd,0x7a,0x63,0xc9,0x60,0xb1,0x9b,0xad,0xde,0x1f,0x65,0x8a,0x58,0x18,0x84,0x95,0xa9,0xac,0x3a,0xac,0xcb,0xb7,0xa9,0xeb,0x0c,0x7c,0x3a,0x98,0x9a,0x3f,0x56,0x23,0x51,0x58,0x59,0x4e,0xf5,0x57,0x60,0xe6,0x9d,0xf8,0xf7,0xed,0x9d,0x81,0x14,0x68,0xbe,0xaf,0x19,0xe5,0xb5,0x9b,0x5f,0xe4,0x51,0x44,0x4b,0x23,0x42 +.byte 0xdd,0x92,0x1a,0xe5,0x7e,0xef,0x77,0xbe,0x88,0x77,0x1e,0x8a,0xbd,0x2a,0x77,0xb1,0x0d,0x1b,0xe3,0x8a,0x7f,0x15,0x71,0x93,0xc9,0x5f,0x78,0x2d,0x77,0x9b,0x0c,0xad,0x76,0x3c,0x6b,0xe2,0x15,0x8e,0xe1,0x5e,0x1d,0x90,0xa5,0xd6,0xc7,0x55,0x5d,0x52,0xf7,0xcc,0x82,0x9b,0xdc,0x1d,0x80,0xa4,0xc7,0xbe,0x7c,0x4f,0xda,0x81,0x91,0x78 +.byte 0x88,0x0e,0x31,0xde,0x87,0x4c,0xdc,0x84,0x9a,0x65,0x89,0xfa,0x22,0x3e,0xde,0x3b,0x7f,0x7f,0x9b,0x3f,0x3e,0xda,0x13,0x31,0x59,0x7b,0x08,0x48,0x39,0x37,0xfd,0x1a,0x4f,0xa3,0x12,0xba,0xe5,0xd6,0xfa,0xa3,0x59,0x0b,0x3b,0x7d,0xde,0xc0,0x51,0xce,0x92,0x6b,0x3d,0x4b,0xd2,0xa4,0x68,0xc2,0x32,0x2d,0x01,0xbd,0x66,0x98,0x8f,0xa0 +.byte 0x86,0xfb,0x08,0x36,0xa9,0xd4,0x3b,0x7b,0x01,0x2d,0xaa,0x8c,0x64,0x19,0xa6,0x62,0x24,0x92,0x5e,0xc5,0x02,0x17,0x8e,0xf0,0x88,0xe9,0xd1,0x8b,0x69,0xda,0xed,0x9c,0x60,0x32,0xab,0xc0,0xbc,0x84,0x64,0x6e,0x32,0xb2,0xcd,0x24,0xf6,0xb2,0x9d,0xf5,0xf5,0x71,0xe2,0x01,0xbc,0x77,0x6a,0x5b,0x26,0x56,0xf7,0x04,0x84,0xff,0x7c,0xa4 +.byte 0xe8,0xa8,0x82,0x6c,0x40,0x24,0x93,0x3c,0x6e,0x7d,0x0d,0x22,0xd0,0xe4,0xef,0xc4,0x4e,0x26,0x66,0x61,0x75,0xe9,0x06,0x69,0x06,0xfd,0x97,0x68,0x96,0x67,0xec,0x96,0x09,0x73,0xe4,0x0a,0x3e,0xaa,0xb8,0x25,0x77,0x00,0x91,0x7a,0x2e,0xc8,0x81,0x75,0x78,0xb7,0xa5,0x27,0x55,0xf2,0xcf,0x9a,0xab,0xab,0x51,0x0a,0x65,0x47,0xbf,0x10 +.byte 0xd2,0x19,0x78,0x6b,0x35,0xf4,0xef,0x12,0x2b,0x5f,0x0c,0x28,0x7c,0xe8,0x64,0x55,0x2f,0x26,0x85,0x91,0x7a,0x9d,0x48,0x76,0x12,0x14,0x2d,0x4a,0x8a,0xd6,0xfa,0x7b,0xf9,0xc7,0x24,0x45,0xf6,0xbd,0x47,0xab,0xc6,0x4b,0x9e,0x39,0x77,0x57,0x04,0xa8,0x4d,0x43,0x99,0x5c,0xb1,0x3d,0xc2,0x4e,0xc5,0x17,0x66,0xc4,0xb6,0xdd,0x92,0x80 +.byte 0x85,0x3b,0x07,0x63,0x16,0x5f,0x67,0x76,0x9b,0xb5,0x8e,0xca,0x97,0xbb,0xf4,0x20,0xd0,0x4d,0x7b,0xd0,0xa3,0x74,0x6f,0x8a,0x68,0xc7,0x31,0x78,0x1b,0x72,0x45,0xa4,0xc4,0xf8,0xf8,0x26,0xa8,0x4d,0x08,0x2f,0x7b,0x3d,0xa0,0x2a,0xb5,0x65,0x27,0xc2,0x36,0x13,0x2d,0x8d,0x83,0xeb,0xf4,0x08,0x26,0x41,0x8b,0x32,0xf3,0x09,0x70,0x70 +.byte 0x5d,0x8a,0xcc,0xb8,0xe9,0xf7,0x08,0xdf,0x5f,0x4a,0xb8,0x8a,0xb7,0x1b,0xad,0xe2,0xc3,0x39,0x59,0xe0,0x7f,0xd0,0x66,0x7b,0x99,0x5a,0xde,0x52,0xe2,0x1f,0x47,0xc2,0x63,0x74,0x7a,0xa5,0x88,0xc3,0x24,0x70,0x4a,0x7d,0xdd,0xa4,0xe6,0xf8,0xfd,0x5c,0xfa,0x8c,0x4c,0x0f,0x52,0x95,0xf3,0x2c,0x76,0x47,0x7a,0xe8,0xdb,0xe0,0x9b,0x49 +.byte 0x88,0x5b,0x87,0x5a,0xd1,0x07,0x24,0x06,0x83,0x3b,0x25,0x23,0xe7,0xaa,0x79,0xef,0x74,0x02,0x12,0xfe,0x47,0x5c,0x77,0x73,0xf7,0x2e,0x4b,0x58,0x3b,0x60,0x7b,0x91,0x2f,0x0d,0xb4,0x6d,0x00,0x80,0x19,0xaa,0x88,0xbc,0xb2,0x7b,0xd9,0xb7,0xdd,0x32,0x47,0x62,0xf5,0x0f,0x46,0x95,0x4c,0x6c,0x01,0x67,0xfb,0xe4,0x2b,0xac,0x95,0x84 +.byte 0x25,0x0a,0xe5,0x4c,0x2d,0x4a,0x6e,0x77,0xfd,0xeb,0xe1,0x53,0xc9,0x2e,0x70,0x01,0x32,0x05,0x6d,0xc5,0xc9,0x5d,0x90,0xca,0x56,0xd1,0xd8,0x40,0x2a,0x51,0x4d,0x95,0xc3,0x57,0x8b,0xdd,0x62,0x9c,0x69,0xd1,0x03,0x89,0x95,0x38,0x2c,0xc1,0x6d,0x41,0xf2,0xc3,0xa2,0x9c,0x43,0xea,0xf1,0x02,0x00,0x56,0x46,0xbb,0x87,0x35,0x40,0x0e +.byte 0x18,0x51,0x29,0x39,0xbb,0x6d,0x15,0xf2,0xcd,0x54,0x23,0x95,0x69,0xdc,0x0a,0xb2,0x26,0xd9,0x25,0xe1,0xf1,0x07,0x7b,0x5e,0xc3,0x30,0x68,0x5f,0x2a,0xce,0x91,0x92,0x03,0x0c,0x62,0x11,0x43,0x80,0xe5,0x12,0xec,0xe3,0x4f,0x90,0xfe,0x38,0x6e,0xe9,0x7e,0x94,0x83,0x26,0x59,0x3f,0x3f,0x81,0xc6,0x94,0x98,0x09,0x80,0xff,0x01,0x44 +.byte 0xff,0x77,0x6a,0x4c,0x76,0x91,0xd9,0x12,0x59,0x9a,0x00,0x7c,0x87,0x06,0x17,0xf7,0x12,0xc7,0xee,0x04,0xd5,0x8d,0x68,0xc5,0x8d,0x80,0x10,0xcc,0x14,0x45,0xe8,0xd7,0x43,0x10,0x01,0x9e,0x61,0xc2,0xc0,0x66,0xfe,0xcf,0x5f,0x9f,0xcb,0xa3,0xf8,0xc7,0x07,0x41,0xe3,0xf2,0xda,0x6e,0x01,0x76,0xc6,0x49,0x49,0x01,0xc7,0xcf,0x6a,0x20 +.byte 0x71,0xc5,0xf0,0xb1,0xa0,0xc9,0xed,0xec,0x66,0x71,0x93,0xf5,0xc0,0x27,0x42,0xed,0xd5,0x6f,0x20,0xe1,0x86,0x3e,0xd0,0x5d,0x94,0x17,0x43,0xb4,0x98,0x0d,0x8a,0x31,0x6c,0x59,0xa9,0x0b,0xb3,0xa4,0x0b,0x46,0x0b,0xa8,0x79,0x62,0x3a,0x3d,0xbf,0xef,0x94,0xd3,0x31,0xf2,0xa1,0x55,0xe8,0x92,0x44,0x37,0x62,0x82,0x1b,0x60,0x87,0x67 +.byte 0x85,0x78,0xd5,0x84,0x73,0xa4,0xea,0x56,0x08,0x78,0x68,0x7f,0xfb,0x15,0x20,0x64,0xeb,0x6c,0xf7,0x5e,0xc0,0x79,0x83,0x59,0x7b,0xed,0x2d,0xa9,0x37,0x46,0xf3,0x62,0xb1,0xa1,0x2b,0x48,0x58,0xd9,0x0c,0x03,0xf7,0xf3,0x47,0xeb,0xd7,0x03,0x9b,0x85,0xd3,0xd7,0xd7,0x7e,0xfb,0x1a,0x25,0x83,0xda,0x06,0xa0,0x04,0x0d,0x6b,0x90,0x29 +.byte 0x2a,0xfc,0xcd,0x96,0xe9,0x17,0x4f,0xdd,0x2c,0x90,0xdf,0xf1,0xe3,0x08,0x0a,0xb8,0x0c,0x59,0x2a,0x83,0x62,0x94,0x00,0xd3,0x80,0x1a,0x31,0xd7,0x17,0x70,0xc7,0xa2,0x20,0x17,0x65,0x88,0xae,0x11,0x25,0xc9,0xba,0x76,0xa7,0x61,0x60,0xd1,0x59,0x50,0x22,0xdd,0xaa,0xcf,0x9d,0xc1,0x36,0x7d,0xf9,0x7b,0x69,0xc0,0x98,0xba,0x40,0xd5 +.byte 0xd6,0x46,0x93,0x92,0x7d,0x37,0x3f,0x3a,0x04,0x9a,0x84,0xaf,0x8e,0x61,0x04,0x26,0x54,0x33,0x84,0xc0,0xac,0x21,0x51,0xd7,0x9a,0x93,0x6e,0xf2,0x09,0x87,0xc5,0x35,0xa8,0x96,0xb0,0x64,0x90,0x35,0x52,0xed,0x0e,0xbc,0xdb,0xa6,0x06,0x3e,0xe7,0xea,0x57,0x4b,0xd7,0xc5,0x1c,0x76,0x3d,0x0d,0xc3,0x1f,0x8e,0x4f,0x12,0xdb,0x3a,0x21 +.byte 0x2a,0x69,0xc2,0x94,0xda,0x4c,0x91,0xcc,0xa8,0x36,0x89,0xd7,0x78,0xa8,0x74,0x79,0x63,0x92,0xeb,0x39,0x3b,0x84,0x8c,0xe5,0xc6,0x26,0xf0,0xef,0xcc,0xc1,0x72,0x4b,0x8e,0xcd,0xe4,0xd9,0x00,0x80,0xbc,0xdf,0xe2,0x61,0x53,0x04,0x81,0xb0,0x13,0xc5,0x6c,0x77,0x74,0xa3,0x0c,0x5b,0xef,0xef,0xea,0xc7,0x5b,0xeb,0xbf,0xee,0x54,0xd7 +.byte 0x7a,0x69,0x6e,0x39,0xc2,0xed,0x08,0x44,0x82,0x08,0x16,0x8b,0xf1,0x74,0x5f,0xeb,0x60,0xd5,0x46,0x63,0x80,0x39,0xe9,0x91,0x0a,0x17,0x8b,0xd4,0x09,0xdc,0xa6,0xab,0x6a,0xbc,0xf8,0xe9,0x09,0x19,0xc1,0x83,0x9f,0xdf,0xad,0x6c,0x31,0x94,0xb9,0xc5,0x77,0x83,0xd1,0xd8,0x76,0xeb,0x12,0x3c,0x00,0x31,0xea,0xac,0x97,0x39,0x16,0xd5 +.byte 0x81,0xfa,0x6d,0x10,0x5b,0x3e,0x20,0xe1,0x88,0x5c,0x4b,0xf3,0x04,0xd4,0xc3,0xb9,0xec,0xe5,0xb0,0x13,0xf5,0x09,0x5c,0xe8,0x27,0xe2,0xde,0x9b,0xac,0x2e,0xf2,0xe5,0x2c,0x33,0x4b,0x4f,0xec,0xc7,0x08,0xf9,0xc2,0xd3,0x1b,0x4d,0x81,0x69,0x14,0xa1,0xc5,0x0f,0xb2,0x57,0x8b,0xcc,0xca,0x3b,0xc9,0x9c,0x1f,0xee,0x06,0x4d,0xc7,0x62 +.byte 0xcb,0x8f,0x49,0x81,0xfb,0xa5,0x68,0x81,0x36,0x38,0x33,0x6b,0x9e,0x58,0xd4,0x24,0x67,0xf1,0x30,0xd6,0x08,0x61,0x5a,0x7f,0x2e,0x4e,0xf1,0xd6,0x64,0x75,0x72,0xb0,0xdf,0xcd,0xae,0x04,0x41,0xbd,0x04,0x2c,0x96,0x36,0x34,0x32,0xec,0xbd,0xd0,0xbf,0x8e,0xe8,0x47,0xe3,0x22,0xdd,0x79,0x53,0xcc,0x6a,0x25,0xf1,0x5e,0x63,0x09,0x98 +.byte 0xc5,0x6d,0x0a,0xe3,0x30,0xd6,0x52,0x70,0x21,0xb2,0xef,0x15,0x66,0x4a,0x2d,0x2b,0x5c,0xcb,0x39,0x1b,0x91,0x10,0xa6,0x02,0x22,0xd0,0xcc,0x32,0x50,0x5c,0x70,0x72,0xd1,0x03,0xb3,0x2d,0x2e,0x33,0xed,0xae,0x7a,0x07,0x3f,0x70,0x38,0x35,0xfc,0xcf,0xdb,0xfe,0x7b,0x26,0xd9,0x38,0x1e,0x52,0x07,0x2f,0x72,0x81,0xcc,0xd3,0x21,0x00 +.byte 0x63,0x48,0x38,0x44,0xb8,0x35,0xf2,0x4f,0xe5,0x33,0x8c,0xb3,0x07,0x0c,0xac,0x3d,0x73,0xe8,0xe3,0xb3,0x43,0xc5,0xb4,0x32,0xf4,0x41,0xdf,0x7b,0x06,0x3a,0xb8,0x67,0x17,0xc5,0xec,0x46,0x30,0xc0,0xa4,0x29,0x40,0xe4,0x8a,0xa3,0x14,0x84,0xa6,0x84,0xc7,0x5d,0x4b,0x57,0x37,0x9c,0x42,0xe6,0xa4,0x20,0xf7,0x5d,0xef,0x21,0xe2,0x80 +.byte 0x54,0x6d,0xf5,0xb5,0xbe,0xa3,0x95,0xcf,0x98,0xf8,0x38,0x46,0xa2,0x90,0x57,0x09,0x8f,0xb0,0x6d,0x01,0x5f,0x95,0x5a,0x78,0xf6,0xfd,0x01,0x0f,0xfd,0xa5,0xe2,0xcf,0x54,0xa3,0x2b,0xc1,0x30,0xbe,0x6d,0x1a,0xd3,0xdb,0x5a,0x17,0x43,0x46,0x93,0x81,0x0c,0x85,0x04,0x13,0xda,0xb4,0xde,0x81,0x48,0x5c,0xbc,0x42,0x9e,0x6d,0x6c,0x82 +.byte 0xff,0xa5,0x51,0xb1,0xd3,0xd2,0x3d,0x82,0x82,0xb4,0x96,0xb1,0x38,0x5d,0xc9,0x55,0xcb,0x9f,0xe5,0x47,0xd4,0x52,0x0f,0x76,0x54,0xec,0x39,0xb6,0x40,0xc3,0xc5,0xaa,0xc2,0x30,0x02,0xa0,0x68,0xc3,0x22,0x63,0x5a,0x8c,0x62,0x6d,0x40,0xc5,0xde,0x06,0x29,0x44,0x5d,0x2b,0x18,0x0a,0xa5,0x43,0x47,0xfe,0x5f,0x0f,0x63,0xa4,0x3c,0xa1 +.byte 0x62,0xcb,0x70,0x1d,0xf8,0x0e,0xc9,0xbe,0x27,0x0e,0x87,0x81,0x69,0x4c,0xea,0xbe,0xf9,0x9b,0xda,0xb6,0x9b,0xd0,0xdd,0xa0,0x1e,0x60,0x38,0x88,0x85,0x25,0x53,0xee,0x2c,0x77,0x53,0x82,0xb0,0x88,0x19,0x87,0x2a,0x77,0x7b,0x37,0x4b,0x4c,0xf4,0x96,0x5f,0x73,0xa1,0xbb,0x5c,0xfc,0x7e,0xbb,0xed,0x6f,0xb7,0x6f,0x9d,0x55,0xde,0xd3 +.byte 0xac,0xb9,0x8e,0x36,0x0f,0x3d,0xea,0x87,0xcd,0x19,0x33,0x1d,0xa8,0xee,0xfc,0xcd,0xe5,0x53,0x7b,0xdf,0x37,0x49,0x2d,0x73,0xf5,0x36,0xdd,0x42,0xc6,0x88,0x0d,0xf5,0xf2,0xba,0x2e,0x81,0xed,0x88,0x27,0x8d,0xe5,0x3f,0x83,0x5e,0xde,0x63,0x8f,0x67,0x2b,0x85,0xf3,0x2a,0x9b,0x26,0x3e,0x2b,0xe2,0x29,0xc5,0x5e,0x21,0x04,0xfe,0x5b +.byte 0xb9,0xd8,0xa7,0x7b,0xdf,0xcf,0x61,0xd6,0xaf,0x9b,0x17,0xcb,0xaf,0x8f,0x71,0xb3,0xc2,0x9d,0x9a,0x55,0x1d,0x3e,0x1d,0x17,0x25,0xc8,0x44,0x71,0x29,0x2f,0xc8,0x01,0x3b,0xe4,0xc4,0x2e,0xcc,0x3b,0xdb,0x34,0xbb,0xc0,0xcc,0xb6,0x07,0xe3,0x86,0x4c,0x62,0x02,0xe8,0xc3,0x11,0x85,0x6c,0x18,0x80,0xa3,0xbd,0x02,0x30,0x68,0x36,0xa3 +.byte 0xb6,0xc6,0xbd,0x82,0x43,0x40,0xed,0xa1,0xcf,0xc5,0xce,0xe4,0x27,0x8a,0xeb,0x8c,0x59,0xea,0x4a,0x81,0xd9,0x35,0x87,0x7d,0x6d,0xb2,0x8f,0x67,0x37,0x1f,0x11,0x60,0x0d,0xed,0x34,0xd5,0xa0,0x7b,0x46,0x71,0x68,0x19,0x69,0xd3,0x65,0x1d,0x47,0xf1,0x7e,0x16,0xd8,0xec,0xbb,0x52,0xc3,0x7b,0x62,0x5a,0xb3,0x60,0x67,0x2e,0xfd,0x57 +.byte 0xf2,0xfb,0x3d,0x63,0xe6,0x82,0x20,0xff,0x31,0x90,0x1d,0x5e,0x4f,0x04,0x9a,0xf8,0xb2,0x0c,0x84,0xff,0x7d,0xe2,0xec,0x4b,0x09,0xbb,0xdf,0xae,0xc5,0xaf,0xcb,0x8b,0xb5,0x5d,0xa8,0x53,0x78,0xf9,0xb9,0x43,0x71,0xa6,0xc2,0x10,0xfa,0xad,0xda,0xba,0x46,0x13,0x72,0x97,0xef,0x6f,0xe3,0x4f,0x5f,0xf9,0xec,0x25,0xdb,0xcd,0xca,0x33 +.byte 0x7e,0x50,0x73,0x5b,0xd0,0x9f,0xea,0xd5,0xd9,0x29,0xe8,0x1b,0xc1,0xf8,0x40,0xbf,0x50,0xdb,0x8e,0x39,0x0b,0xb7,0x6c,0xf1,0x34,0x0b,0x1f,0x88,0x27,0x4b,0xea,0x1d,0xb2,0x36,0x07,0x4b,0x22,0xa9,0xd0,0xf8,0xf2,0x13,0x8e,0x97,0x9d,0xd9,0x53,0xd3,0xdc,0x63,0x40,0x11,0xc7,0x74,0x9e,0xd9,0x83,0x01,0xae,0x36,0xcb,0x35,0x9a,0x0c +.byte 0xb5,0x15,0x0a,0xf5,0x41,0xa5,0x6c,0x72,0x40,0x80,0xf0,0x15,0xc0,0x80,0x23,0x0b,0xab,0x98,0xfc,0xab,0x81,0xe0,0x8b,0x61,0x91,0x18,0xd2,0x23,0x71,0xed,0x32,0x80,0x26,0x86,0x96,0xe9,0x90,0x5e,0x43,0xd2,0x89,0x8f,0x89,0x57,0x73,0xca,0xe1,0x42,0xa9,0xa9,0xed,0xdd,0xc5,0x9f,0xf7,0x00,0x0d,0xa3,0xe5,0xc8,0x6f,0x0c,0x14,0xa4 +.byte 0x9d,0x5a,0x14,0xaf,0x96,0x3a,0xb2,0x64,0xa7,0xac,0x20,0xa9,0x01,0x4c,0xec,0x64,0xc6,0x9b,0xfd,0x04,0xc5,0x2e,0xe7,0xdd,0xa5,0x8e,0xe7,0xe7,0x76,0x53,0x59,0x95,0x14,0x07,0xed,0xe9,0x96,0xd0,0x2d,0xc8,0x9d,0xa2,0x11,0xe3,0x02,0x20,0x68,0x09,0x25,0x69,0x07,0x88,0xdb,0x26,0x36,0xf5,0x8e,0xc3,0xf0,0x70,0x8c,0xeb,0xe6,0xcd +.byte 0xad,0xf3,0x49,0x6e,0x8a,0x54,0xa6,0xdd,0x97,0x8e,0x37,0x28,0x3a,0x6d,0xc4,0xdd,0x99,0x85,0xf7,0x96,0x63,0xb4,0xa2,0xdf,0xff,0x81,0x17,0xa1,0x22,0xb1,0x43,0x5b,0x29,0xdb,0x92,0x91,0xc9,0xc6,0x8d,0x29,0x1d,0x6e,0xe3,0x44,0x3e,0xe4,0x20,0xd5,0xf4,0x4a,0xfa,0xae,0xf6,0x2c,0xff,0x80,0xc9,0xce,0x7f,0x13,0x1e,0xd7,0x24,0xa2 +.byte 0xb3,0x90,0xb8,0x20,0x18,0xe5,0x6c,0x0e,0xf5,0xc6,0x26,0xd6,0xe9,0xe8,0x55,0xe4,0x3f,0x49,0x13,0xe2,0xca,0xef,0x9b,0xc0,0x8f,0x24,0x50,0x37,0xef,0x21,0xff,0x79,0xb7,0x5d,0x86,0x03,0xfb,0x85,0x75,0x74,0xbf,0xc5,0x3a,0x30,0xcc,0x00,0xc3,0x0d,0x4f,0x91,0xd6,0x31,0x19,0xd6,0xcd,0x0e,0x1c,0x53,0x88,0x75,0xb8,0xf9,0x68,0x7a +.byte 0xa4,0x3e,0x8d,0xed,0xba,0x05,0xb4,0x6c,0xe0,0x45,0x9c,0x41,0x34,0x24,0x82,0xaf,0x9a,0xcf,0x9e,0xd2,0x27,0x5c,0x7f,0xb3,0xcb,0xe5,0xad,0xb4,0x8e,0x74,0x9d,0xe4,0xba,0x55,0xb3,0xd3,0x32,0xbc,0x62,0x11,0xb3,0xa4,0x82,0xf0,0xd8,0xfc,0x79,0x03,0x70,0xae,0x7f,0x7f,0xc8,0x50,0xb5,0xbe,0x47,0x14,0x31,0xd7,0x16,0x65,0x52,0x3b +.byte 0xbb,0x42,0x38,0x23,0x77,0x4d,0x38,0x0b,0x0a,0x61,0x94,0xac,0xa3,0xc9,0xd7,0x99,0x4f,0x34,0x3a,0x88,0xe8,0x1d,0x0b,0x97,0x48,0x6d,0x5c,0x61,0x4c,0x3f,0xc2,0x7c,0x6c,0x63,0x00,0xdd,0x59,0xae,0xcd,0x17,0x0a,0x21,0x27,0x98,0x15,0x23,0x6d,0x84,0x7e,0x24,0xd4,0x7f,0x1b,0x3a,0x98,0x52,0xc3,0x60,0x33,0xd6,0xc1,0xfe,0x68,0xa8 +.byte 0x49,0x3d,0x7e,0x53,0xee,0x0d,0xed,0x89,0x9a,0x9a,0xe6,0xa1,0x47,0xc7,0xba,0xf3,0x73,0x5b,0xef,0x33,0x51,0x8c,0x1f,0x84,0xa6,0xef,0x77,0x94,0x2d,0xd6,0xda,0x8f,0x85,0x8c,0xd3,0xb6,0x02,0x68,0x9e,0x57,0xb6,0xd9,0x1a,0x8c,0xb5,0xf4,0x61,0x39,0x29,0xb5,0xb7,0x0d,0x0d,0xa6,0x81,0x87,0x54,0xc0,0xca,0x67,0x09,0xca,0x20,0xf3 +.byte 0x37,0x7e,0x03,0x3e,0x31,0x8c,0x51,0x89,0x06,0x81,0xf6,0x7b,0x8b,0xe3,0x4f,0xd0,0xb8,0x0c,0x34,0x7c,0xd6,0xfc,0x25,0xf8,0x00,0xa6,0x10,0x15,0x0d,0xeb,0x22,0x72,0x03,0x79,0x1c,0x84,0x1d,0x3d,0x10,0xaf,0x43,0x6d,0xd7,0xed,0x10,0x2c,0x14,0x26,0xd4,0xa1,0xee,0x6c,0x7f,0x52,0xe4,0x83,0xcc,0x5f,0x1a,0x4b,0xd0,0xc8,0xfb,0x27 +.byte 0x17,0x2c,0xf6,0x90,0x02,0xb4,0xb0,0x63,0x7c,0x14,0xec,0x9e,0x08,0x60,0xec,0x45,0x85,0xc6,0x76,0x42,0x4f,0x1c,0x5f,0x48,0x7f,0x87,0xef,0x8c,0x04,0x23,0x3c,0xda,0x39,0xbc,0xec,0x09,0xda,0xeb,0x9b,0x72,0x7a,0xb4,0x20,0x1c,0xb2,0xdd,0x2e,0x63,0x72,0xd7,0xb1,0xfe,0x5b,0x21,0x28,0xfb,0xeb,0x45,0x31,0x89,0xe5,0x3e,0xa0,0x85 +.byte 0xa6,0x96,0xdb,0x42,0xd5,0xb4,0x27,0x78,0x10,0xa0,0xcb,0x69,0x68,0x1e,0x76,0xed,0xbc,0x3c,0xa1,0x04,0x10,0x81,0x2a,0x4f,0x52,0x78,0x1e,0xae,0x5a,0x47,0x69,0x81,0xee,0xd3,0x14,0x1a,0x68,0x19,0x75,0x92,0x72,0x47,0x61,0x70,0xcf,0x96,0x35,0xa6,0xbb,0x00,0xaf,0x3e,0x90,0x86,0x22,0x9b,0x72,0x8a,0xa1,0x05,0xe2,0xfb,0xdc,0x30 +.byte 0xd5,0xdd,0x46,0x1f,0xf6,0x33,0x43,0xd1,0x59,0xc4,0x93,0x89,0x36,0x6a,0x7b,0x76,0xa7,0x40,0x6c,0xb1,0x9c,0xce,0x3a,0x8c,0xb6,0xd5,0xd1,0x0a,0x78,0xf6,0x08,0xfb,0xf5,0x9c,0xee,0x74,0x0d,0x39,0x51,0x6d,0x0e,0xa6,0xe9,0x22,0xd8,0x30,0xdf,0x16,0xf7,0xe3,0xbd,0xbb,0xe6,0x45,0xb8,0x9c,0xb5,0x49,0xf0,0xe8,0x7c,0xce,0x25,0xf8 +.byte 0x46,0xc0,0x59,0xc2,0xbc,0xdd,0xea,0x3e,0xeb,0x2e,0xf5,0xfd,0xd9,0x05,0x8a,0x2f,0xa3,0xa4,0x63,0xa6,0x50,0x08,0xce,0x2a,0x69,0xe7,0x58,0x57,0xa1,0xb2,0x44,0x41,0x04,0xfc,0x61,0xb1,0xb8,0x19,0x27,0x14,0x71,0x2f,0x55,0x64,0x28,0xa0,0xcc,0x47,0x0c,0xd4,0xed,0xfd,0x07,0x99,0xc6,0x9e,0xdc,0x5f,0x19,0x03,0x1a,0x00,0xda,0xf6 +.byte 0x2c,0x95,0xb0,0xd2,0xaa,0xfb,0xbc,0x1a,0xf3,0x62,0xaf,0x9c,0x38,0xde,0x61,0x30,0xd5,0x56,0x82,0x4b,0xf6,0xeb,0x34,0xc0,0xdc,0x51,0x97,0x89,0x80,0x47,0x9d,0x2a,0xae,0x0e,0x92,0x48,0xd2,0x9d,0x5a,0x67,0xef,0x33,0xa3,0xbe,0xdd,0x80,0x64,0x9c,0xc1,0xaf,0xf9,0x1a,0x4b,0x55,0x67,0x88,0x37,0x37,0xff,0x98,0xe3,0x9e,0xa9,0x4e +.byte 0x1f,0xa1,0x32,0x70,0xa3,0xbb,0xdc,0x6e,0xb3,0x6d,0xfe,0x8f,0x74,0x89,0xed,0xe1,0x13,0x3c,0x8f,0x08,0x75,0x84,0x84,0xee,0xac,0xcc,0xa5,0x47,0x9f,0x3e,0xb9,0xed,0x26,0x20,0xf7,0x7b,0xfb,0x8a,0x48,0x58,0x51,0x24,0xf9,0xeb,0x66,0x6d,0xd6,0x83,0x24,0xff,0x9f,0x0d,0x38,0x9c,0xf9,0x24,0x99,0x12,0x49,0xb6,0xdd,0xce,0x44,0xe7 +.byte 0x31,0x3d,0x4b,0x23,0x8a,0xd5,0x62,0xa2,0xdb,0x78,0x56,0x3a,0x62,0xc8,0x59,0x5f,0xcc,0x58,0x76,0x19,0x5d,0x48,0x4a,0xc2,0x87,0x21,0xc3,0x3d,0x3a,0x38,0xbd,0x20,0xfd,0xc3,0xa6,0xab,0x32,0xb8,0xc8,0xd1,0x5c,0xa5,0xb4,0x64,0x60,0xd2,0x87,0xb7,0xe9,0xc2,0x2b,0xb2,0x75,0x04,0xf4,0x6e,0x96,0x99,0x5d,0x08,0xff,0xa3,0x45,0x8a +.byte 0xad,0x7c,0xee,0x94,0x4e,0x45,0x86,0xad,0x0a,0x7a,0x5c,0x8f,0xff,0x28,0xb3,0x3c,0xf8,0x5e,0xb3,0x1e,0x5c,0xe0,0x22,0xf7,0x4e,0xe4,0xdf,0x1f,0xd2,0xa2,0x37,0x4a,0x87,0xa6,0x16,0x80,0x0c,0xc3,0x75,0x18,0xe4,0x76,0x8f,0xc3,0x1b,0xee,0xb1,0xe4,0x4b,0xeb,0x6f,0x15,0x48,0x60,0xaf,0x8e,0x0e,0xeb,0xbe,0x26,0xa3,0xbd,0x2a,0xb5 +.byte 0x6d,0x8b,0xd1,0xa1,0x0f,0x8e,0xaa,0xaa,0xb8,0x8d,0x84,0xe7,0x65,0x40,0x60,0x3d,0x59,0xb7,0x1c,0xef,0x08,0x0e,0x6f,0x21,0xb4,0xe6,0x10,0xda,0x59,0x9a,0x0f,0xe6,0xba,0xfd,0xed,0x7f,0xc1,0xe3,0x7a,0xb7,0x21,0x5d,0xcf,0x1c,0xbd,0xd2,0x59,0xc0,0x31,0xa5,0x8a,0x39,0x86,0x9e,0x7e,0x6a,0xcb,0x87,0x6f,0x01,0xba,0xa4,0x06,0x6b +.byte 0x3b,0x5d,0x68,0x85,0x11,0xd2,0x2a,0x3c,0x8e,0x3a,0x8c,0x8b,0x59,0xa0,0x4a,0xfb,0x76,0x85,0xe6,0x47,0xc3,0xf4,0xc4,0xe6,0xcc,0x7b,0xff,0x71,0x03,0xd1,0xc2,0x01,0xe4,0x5e,0x49,0x31,0xa6,0x0e,0x17,0x9b,0x42,0xdc,0x75,0xd6,0xfe,0x09,0x0b,0x6d,0x21,0x46,0xfe,0x40,0xcd,0x7c,0xdb,0xca,0xc9,0xba,0x64,0x83,0xd3,0xf7,0x0b,0xad +.byte 0xff,0xfd,0xe3,0xd9,0x49,0x7f,0x5d,0x48,0xaa,0xac,0xe5,0x74,0x2a,0x14,0x6f,0x64,0x21,0x81,0x09,0xcd,0x2d,0x19,0xf5,0x56,0x85,0xa8,0xec,0x98,0x65,0x46,0x99,0xec,0xbe,0xe3,0x86,0xd3,0x41,0x8b,0xe4,0x76,0x9b,0x5b,0x98,0x33,0x9e,0xdb,0xc9,0xde,0x89,0xfa,0x60,0x58,0xa8,0x2f,0x7a,0xca,0x30,0x91,0xc8,0x26,0x14,0x9c,0xd6,0x6d +.byte 0xc2,0x3c,0xca,0xe0,0x9a,0x13,0x72,0x63,0x5e,0x20,0xfd,0xa0,0xca,0xb2,0xed,0x37,0xc5,0xd4,0x4e,0xec,0x1f,0x74,0x25,0x37,0xe2,0xbe,0xb1,0x7f,0x52,0x26,0x28,0x4f,0x02,0xe5,0x6a,0x27,0xf3,0xc4,0x9c,0x69,0x09,0xac,0xff,0x77,0x9c,0xa4,0x1d,0xe7,0xa1,0x7c,0x37,0x70,0x3b,0x3c,0xc4,0x16,0x8f,0x5d,0xe5,0x05,0xa9,0x2c,0x91,0x2e +.byte 0x87,0xb0,0xa9,0x2e,0x32,0x73,0x5c,0x15,0x1e,0xbe,0x01,0xc9,0xd8,0x2e,0x26,0xf4,0x05,0x2d,0xe0,0xc0,0x38,0x81,0x61,0xf4,0x37,0x08,0xa0,0xc0,0x28,0x0a,0xb6,0xd4,0xcc,0x2c,0xc6,0xd4,0xda,0x48,0x49,0xcf,0x76,0x91,0x23,0x51,0x91,0xe7,0x50,0x94,0xae,0xb7,0x15,0x26,0xaa,0x82,0xd0,0x97,0xe8,0x5e,0xaa,0xfc,0xaa,0x60,0x62,0x81 +.byte 0x80,0xfd,0xfd,0xaf,0x65,0xcc,0x29,0x27,0x95,0xad,0x56,0xb9,0x85,0x66,0x49,0x62,0xb3,0x1a,0xf4,0x54,0xc7,0x5d,0x7f,0x73,0xe0,0xd2,0xc8,0x18,0x95,0x62,0x2f,0x5c,0x96,0xfb,0x63,0x15,0x46,0x07,0x5f,0x3e,0x52,0x18,0xf8,0x5d,0x45,0x0b,0xb6,0xf7,0xc5,0x3d,0x16,0xaa,0x0b,0x8f,0x9d,0x16,0xc8,0x93,0x13,0xd2,0xba,0x7a,0x52,0x1a +.byte 0x7a,0x73,0xc4,0xca,0xfb,0x04,0xaf,0x6f,0x3e,0xfa,0xff,0x29,0x09,0xe2,0x74,0x35,0xc1,0xfc,0x21,0xcf,0x5f,0xf7,0x82,0x55,0x75,0x27,0xc9,0x91,0xc5,0xbf,0xe6,0x68,0xb6,0x0f,0x10,0x0e,0x91,0x30,0xb7,0x05,0xca,0x59,0x4a,0x7f,0xb0,0xf6,0xaf,0xf1,0x5d,0xc9,0xc5,0x06,0xc5,0xf4,0xe1,0x75,0x16,0x9a,0x2c,0xc0,0x3f,0xc1,0x98,0x91 +.byte 0xb7,0xe6,0xb1,0xf2,0xf9,0xfa,0x6d,0x27,0x98,0x33,0x8b,0x73,0x7a,0x57,0x12,0x6f,0x80,0x11,0x28,0x17,0x7d,0xf1,0x26,0xaa,0x05,0xf1,0x6e,0x86,0x98,0xe7,0xf6,0x9f,0x9c,0x06,0x8f,0xec,0xd7,0x2d,0xb0,0x83,0xdf,0x23,0x80,0x34,0xd3,0xd7,0xf7,0xd5,0x0d,0x52,0x18,0xcd,0xc7,0xe7,0x15,0xc9,0x1b,0xae,0x58,0xcf,0xc5,0xdd,0x25,0x2a +.byte 0xff,0xa5,0xf3,0x6d,0x20,0xfd,0xda,0xfd,0x78,0x30,0x14,0x1f,0xb3,0x47,0xe3,0x2d,0x54,0x87,0xdc,0x30,0xbe,0x41,0xc0,0x48,0x52,0x82,0x49,0x78,0xad,0xfd,0x24,0xad,0xd6,0xc1,0x14,0x1e,0xa0,0xc1,0x3d,0x82,0x59,0x01,0x9b,0xc3,0xf4,0xf7,0x26,0xce,0x92,0x50,0x13,0x47,0xe0,0xf3,0xfa,0xd9,0x61,0x19,0x80,0x12,0xee,0x73,0x45,0x5b +.byte 0x34,0xfc,0xb2,0x84,0xb2,0x3f,0xdc,0x77,0x8e,0x2d,0xb3,0x62,0xb9,0x03,0x2d,0xb6,0x2a,0x17,0xcd,0xfb,0x54,0xc2,0x5e,0xb9,0xcf,0xd6,0x05,0xe2,0xac,0x3f,0xce,0x50,0x0f,0xa1,0x3e,0x67,0x68,0x46,0x0c,0xab,0xa1,0xdc,0x2a,0x26,0x1f,0x22,0x1b,0xa7,0xc9,0x3b,0x6c,0x97,0x5d,0x5c,0x7d,0x1a,0x46,0x4a,0x99,0x92,0x85,0x87,0x35,0x6c +.byte 0x78,0x9d,0xb0,0x39,0xd6,0x3b,0x52,0x60,0xb4,0xba,0xcc,0x2e,0xe9,0xe1,0x91,0x51,0xc1,0x52,0xc7,0x5d,0x84,0x95,0x54,0x25,0xdd,0xcd,0x40,0x35,0xa1,0xc8,0x7e,0xff,0x82,0x55,0x9f,0x64,0xef,0xa7,0xc1,0x79,0x57,0xc7,0x44,0xa8,0x1c,0x06,0xaa,0x2a,0x05,0x65,0x6c,0xdc,0x90,0x7d,0x2e,0x53,0x3c,0x56,0xe1,0x30,0xdf,0xcb,0x75,0x3d +.byte 0x36,0x88,0xfd,0x72,0x2d,0xc7,0x8e,0x2f,0x11,0x5a,0x2e,0xa9,0xd6,0x37,0x4b,0x31,0x4e,0x6e,0xa0,0x4a,0xd9,0xa9,0x48,0x18,0x50,0xb1,0x28,0xf6,0x74,0x03,0x44,0xa7,0x06,0x55,0x86,0x1a,0x1b,0x07,0x79,0xc4,0x25,0xba,0x5d,0xce,0xa2,0x96,0x7d,0x62,0xa7,0x21,0xf0,0xa7,0xc2,0x91,0x03,0x38,0x37,0x0b,0x20,0x40,0x88,0x7b,0x28,0xf4 +.byte 0xf3,0xc2,0xb0,0x4b,0xf6,0xef,0x2f,0xd9,0xb5,0x81,0x17,0x95,0x42,0x98,0x7f,0x18,0xd4,0x7e,0xa1,0x85,0xbf,0x62,0xdc,0x40,0xe4,0xd3,0xcc,0x78,0x01,0xec,0x12,0xcc,0x04,0x5b,0xfe,0xdb,0x39,0x7c,0x1e,0x56,0x7c,0x72,0x57,0xb9,0xdf,0x9d,0x43,0xd4,0xe3,0x1f,0xbf,0x69,0xfb,0x43,0x23,0xd8,0x75,0x81,0xe8,0x39,0x0f,0xe4,0xe9,0x51 +.byte 0xea,0xb7,0xa7,0xc6,0x17,0xc6,0x75,0x4c,0xa8,0x17,0x41,0x1c,0x55,0x8e,0x8d,0xf3,0x64,0xbc,0xc3,0x33,0xa7,0xc1,0xbe,0xa2,0x89,0x75,0xd6,0xda,0xad,0x44,0xd5,0xdd,0x18,0xe2,0xfc,0x1d,0xa1,0xbc,0x1a,0xb8,0x40,0x1a,0x4f,0x44,0x4b,0x56,0xe9,0xf4,0xa8,0x16,0xe6,0xc9,0x40,0x90,0x9b,0x49,0xae,0x62,0x12,0x3d,0x50,0x2e,0x7b,0x60 +.byte 0x6f,0x04,0x01,0x2c,0x83,0x2a,0xd2,0x92,0x63,0xa2,0xe2,0x39,0x9a,0xc4,0x1e,0x5a,0x53,0x3f,0x4d,0x69,0xfa,0x0a,0x22,0x13,0x80,0xa4,0x6e,0xfb,0x09,0xcb,0x35,0xd7,0x12,0xa4,0xcd,0xfc,0x0b,0x06,0xa6,0x5e,0xc6,0x4a,0x22,0x56,0x5d,0x7f,0x70,0xd0,0xf8,0xe6,0x96,0x77,0xce,0xd9,0x69,0x6c,0x06,0xac,0xaa,0x94,0x6d,0x57,0x1b,0x28 +.byte 0xb4,0x07,0x50,0x19,0xd1,0x86,0xba,0xe6,0xe6,0x31,0x74,0x1d,0x3d,0xe8,0xe2,0x7b,0xfe,0xc9,0x41,0x89,0x20,0x5b,0x6a,0xc0,0x18,0x16,0xee,0x35,0xfa,0x56,0x35,0x3e,0x53,0x99,0xfb,0x8d,0xae,0x75,0x4f,0xc5,0x8d,0xff,0x23,0xd5,0x42,0xf4,0x81,0x5c,0x8b,0x71,0x7a,0x22,0xb0,0x6b,0x45,0x86,0xa6,0xc6,0xdb,0xa6,0x83,0x01,0x28,0xde +.byte 0x38,0xaa,0x6e,0xf8,0x5a,0xf2,0xcc,0x3c,0xc5,0x65,0x78,0x37,0xe8,0x8a,0x59,0xf3,0xfe,0x8b,0xcd,0xf6,0x31,0x46,0xdc,0x72,0x19,0xf7,0x73,0xac,0x5c,0xf1,0xe3,0xfd,0x85,0x51,0xec,0x92,0x3a,0xf3,0xd7,0xb2,0x95,0x53,0x79,0x48,0xd3,0x29,0x84,0xec,0xc5,0x0a,0x71,0x15,0x52,0x69,0x6a,0xe1,0xab,0x69,0x94,0xc2,0x51,0xdf,0x27,0xd8 +.byte 0xb1,0x05,0xc4,0x12,0xea,0x1e,0xda,0x6e,0xf2,0xf5,0x8a,0xa8,0x72,0x74,0x5a,0xe5,0x45,0x5b,0x5f,0xf9,0xb0,0x56,0x5c,0x85,0xf7,0x63,0x8d,0x1d,0xbf,0xe9,0x7c,0x97,0xe9,0x37,0xb3,0x5b,0x4b,0x57,0xfc,0xf4,0x58,0x84,0x26,0x55,0x07,0xc7,0x0a,0xfe,0x5a,0x58,0xd0,0xd8,0x19,0xf4,0x02,0xad,0x2c,0x4e,0xbd,0xe1,0x07,0x48,0x3b,0xc4 +.byte 0xd6,0x23,0x3a,0x63,0xc3,0xf5,0x17,0x46,0x03,0xa4,0x9a,0x10,0xf9,0xac,0x70,0x9c,0x13,0x10,0x94,0xda,0x17,0xc5,0xbb,0x87,0x0f,0x9b,0x4f,0x54,0x55,0x6b,0x57,0x2d,0x12,0x0b,0xa7,0x9c,0x77,0x6d,0x67,0xb0,0x03,0xdf,0xc6,0xa2,0x76,0x96,0x0c,0xac,0x30,0xbc,0xa2,0x55,0x23,0x01,0xae,0x51,0x50,0xd4,0xab,0xd0,0xee,0x75,0xf1,0x96 +.byte 0x75,0xf5,0x2e,0xae,0x52,0x31,0x0b,0x0a,0x8a,0xdb,0x4c,0x4d,0x4c,0x80,0xfc,0xd7,0x68,0x05,0x54,0x47,0xa5,0xc4,0xb1,0x63,0x87,0x43,0x1b,0xe1,0x0b,0x4f,0xff,0x0c,0x02,0xf7,0x00,0xd4,0x8d,0x6e,0xa1,0x21,0x91,0x62,0xec,0x55,0xd5,0x72,0x70,0x59,0x7a,0xa4,0x0e,0x78,0x7a,0x87,0x1f,0x71,0x35,0x3b,0xf7,0x1f,0x66,0x8c,0x90,0xf9 +.byte 0x6d,0x1f,0x74,0x47,0x41,0xf5,0x21,0x98,0x0d,0x42,0x61,0x21,0x0b,0x62,0x59,0xc7,0x5e,0x58,0x37,0xfb,0xee,0xbb,0xa0,0x45,0xa8,0x84,0xae,0x41,0x29,0xc9,0x88,0x64,0x69,0x75,0xc1,0x5f,0x63,0x7c,0x00,0x1c,0x35,0x61,0x9e,0xad,0x19,0xd7,0xd8,0xf1,0x64,0x57,0x10,0x87,0x73,0xa8,0x8b,0x39,0x9b,0x1c,0x1a,0xc2,0x1b,0x01,0x1a,0x41 +.byte 0x26,0x58,0x93,0x8f,0xed,0xf9,0xe7,0xfe,0xcc,0x27,0x1b,0x6b,0xb8,0x28,0x5a,0x0b,0x04,0xa0,0x94,0x23,0x4b,0x21,0x5f,0xb3,0xc9,0xb6,0x7b,0x36,0x5a,0x67,0x6b,0xd2,0xc2,0x53,0x97,0x5d,0xa5,0x43,0xd3,0x79,0x83,0xe2,0x3b,0xe0,0xaf,0x5f,0xbd,0xf3,0xb0,0xfc,0x04,0x95,0x06,0x17,0x0c,0xe2,0x68,0xe8,0xf3,0x90,0xc7,0x2b,0x7b,0xcc +.byte 0xaa,0xce,0xf5,0x0b,0x3c,0x3f,0x10,0xa7,0x31,0x9d,0xf0,0x1e,0x3e,0x74,0x57,0xbd,0x87,0xe7,0x37,0xd0,0x37,0x09,0xae,0x03,0x96,0xb1,0xad,0x8f,0x2d,0x72,0xdc,0x0f,0xdf,0xd9,0xfb,0xcc,0xb8,0x48,0x62,0xf7,0xad,0x05,0x4d,0xc6,0xe5,0x92,0xe3,0x95,0xa0,0x74,0x7a,0xa6,0x84,0x13,0x68,0x17,0xaa,0x8f,0x40,0x2a,0x8d,0x2b,0x66,0xdc +.byte 0xf8,0xf6,0x6d,0x7c,0x7e,0x40,0x22,0x05,0x16,0x20,0xbc,0xe5,0xc2,0x87,0xe2,0xd5,0xbd,0x47,0xd5,0x69,0x95,0x12,0x25,0x1c,0xaa,0x9d,0xb5,0x73,0x08,0xaf,0xfb,0x46,0xa5,0x11,0x2c,0x93,0xc6,0xfc,0xc0,0x5e,0x0e,0x99,0x1c,0x80,0x5f,0xe5,0xc8,0x52,0x73,0x35,0x4d,0xbc,0x70,0xeb,0x40,0xc9,0x47,0x8a,0x8f,0x19,0xd9,0xa9,0xec,0x4b +.byte 0x88,0x53,0x56,0x08,0x4a,0xa2,0x32,0x1f,0xe2,0xbb,0x68,0x35,0xfd,0xf2,0x0e,0x0f,0x7f,0xc8,0xf1,0x59,0xac,0x97,0x8f,0x84,0x69,0xb6,0xb9,0x5f,0x84,0xe9,0xf2,0xf9,0x09,0xf6,0xf1,0x31,0xd7,0x1a,0xa8,0x25,0x32,0x5f,0xb1,0xa7,0x84,0x15,0xfa,0x07,0xa8,0x53,0xce,0x2a,0x26,0xe0,0x4d,0x07,0x4f,0x45,0x63,0x76,0xfd,0xe3,0xb4,0x4e +.byte 0x81,0x5e,0xe6,0x01,0x9c,0xf5,0x82,0x2d,0x71,0x0f,0x98,0xb4,0x72,0x06,0xbc,0x89,0x89,0x60,0x5f,0xd9,0x92,0xcf,0xb9,0x41,0xe3,0x13,0xaa,0xe4,0x80,0xb5,0x75,0xf4,0x9a,0x1b,0xc2,0xa3,0xa4,0xa9,0x0f,0x15,0xdc,0x26,0xdd,0x20,0x10,0x27,0xbd,0x06,0x77,0x12,0xa5,0xb3,0xde,0x9f,0xbf,0xc4,0xb6,0x1d,0x76,0xdc,0x16,0x00,0x2e,0xe2 +.byte 0x00,0x4d,0xb3,0x62,0x57,0x73,0x1e,0x90,0xe2,0xaa,0x4c,0x47,0xdf,0x6b,0x2d,0x66,0x2f,0x82,0x55,0x91,0x26,0x33,0xb9,0x3a,0xc7,0xf1,0x0a,0xda,0x9b,0x6b,0x05,0x82,0x0f,0x0e,0x30,0x74,0x0b,0xea,0x0f,0x49,0x55,0x3b,0xe7,0x42,0x48,0xca,0x82,0x3e,0x8c,0xbc,0xe2,0x88,0x43,0x44,0x0d,0x37,0x9b,0xd1,0xfc,0xf1,0x45,0x46,0x0e,0xe1 +.byte 0xec,0x91,0x39,0x96,0x7d,0xbc,0xd5,0xb1,0x11,0x55,0x54,0x49,0x4f,0x18,0xed,0xec,0x58,0xdb,0xb3,0x7d,0x64,0x8d,0xfc,0x65,0x1f,0xf0,0xe0,0xc0,0x41,0xc0,0x19,0xeb,0x16,0x16,0x71,0x36,0x88,0xcf,0x75,0x3d,0x9c,0xe6,0xa0,0x84,0x54,0x26,0x64,0x95,0x9a,0xe1,0x0b,0x51,0xcf,0x9a,0x55,0x60,0x4d,0x9d,0x1d,0x37,0x71,0xa8,0x94,0x0a +.byte 0x20,0xeb,0xf2,0x91,0x14,0xfc,0x12,0xb0,0x1e,0xe3,0x5e,0x3a,0xbb,0x22,0xde,0x20,0xb1,0x58,0xef,0x0b,0xb1,0xc2,0x2f,0xea,0xd8,0xdb,0x1d,0x3a,0x67,0x7b,0xbd,0x26,0xfa,0x4a,0x3c,0x3d,0xbd,0x87,0x4c,0xba,0x57,0xdf,0xfb,0x1d,0xf7,0x26,0x5f,0x52,0x4e,0xdd,0x9b,0x38,0x62,0xed,0x48,0xc1,0xae,0x7f,0xa8,0x13,0x05,0x09,0xff,0xc0 +.byte 0xd3,0x49,0x75,0x1f,0x6a,0xe0,0x79,0x94,0xc1,0xe9,0xe3,0xf5,0x33,0x40,0xd4,0x6b,0xfe,0x4d,0x6e,0x84,0xb9,0x20,0x68,0x2b,0x6c,0xb3,0xf1,0xb1,0x1c,0xfd,0x93,0x14,0x7f,0x35,0x9b,0xd5,0x07,0x15,0x87,0x56,0xb9,0x45,0x22,0x64,0x73,0xdb,0x34,0x35,0xca,0x15,0x4e,0xa2,0xa2,0xe2,0x7a,0x6e,0x14,0x46,0xf5,0xf1,0x70,0xd3,0x3a,0x2e +.byte 0x38,0x9d,0xf6,0xc6,0x29,0xd5,0x7f,0xc7,0x77,0x2c,0x33,0x55,0x1c,0xc2,0xf1,0xaf,0x8e,0x4d,0x1b,0x22,0x36,0x35,0x93,0x47,0xa5,0x59,0xb4,0x94,0x0f,0x2d,0x66,0x24,0x6f,0x57,0xa4,0x95,0xf3,0xd7,0xf3,0x59,0x9d,0xc0,0xda,0xa7,0xf7,0xf2,0x8d,0x93,0xc9,0x90,0x91,0x9e,0x12,0x3f,0x34,0x01,0x90,0x8b,0x13,0x09,0x3d,0x2f,0xa8,0x31 +.byte 0xfa,0x39,0x4a,0x7d,0x0d,0x34,0xa3,0xf1,0x75,0xdb,0xa2,0xd2,0x5c,0xf1,0x72,0xfd,0x7f,0x7b,0x15,0x92,0xf0,0x71,0xd6,0xa0,0x74,0x53,0x61,0x67,0xa4,0x8b,0x72,0x3a,0x66,0x0a,0xce,0xc9,0x1c,0x5b,0x4d,0xaa,0x0a,0x3a,0x91,0x0a,0xbb,0xef,0x6e,0x8d,0x00,0xc0,0xa1,0x89,0xa9,0xbd,0x5a,0x2d,0xf8,0x7c,0x1f,0xb2,0x5a,0x73,0x33,0xe7 +.byte 0xb3,0xfd,0xd4,0xe3,0x81,0x69,0x30,0xc1,0xf8,0x97,0x7b,0xf3,0x63,0xaa,0xd5,0x5a,0x98,0x95,0xb3,0x65,0x2d,0xf9,0x68,0x2e,0x2c,0x26,0xe6,0x77,0x8f,0x76,0x7a,0x02,0xc7,0x50,0x28,0x40,0xcf,0x44,0x66,0x18,0x54,0x52,0xef,0x79,0x26,0xc2,0x76,0x5b,0x71,0x92,0x49,0xba,0xe1,0xd7,0xf2,0xdd,0x57,0xe0,0x78,0x6e,0xb6,0xdd,0x0d,0x20 +.byte 0x85,0xf9,0x34,0x9e,0x65,0x6b,0x9f,0x41,0x24,0xe2,0xb1,0x2a,0xef,0x8b,0xd2,0x19,0x81,0x73,0x56,0x5a,0x84,0xd3,0x46,0xf8,0x74,0xe3,0x1f,0x3d,0xd9,0x16,0x86,0x38,0xf6,0x7c,0x04,0xab,0x9a,0x64,0x0e,0x48,0x06,0x4c,0x61,0xcd,0x2d,0x4d,0xef,0x6f,0xd6,0x7d,0x31,0x1c,0x56,0x65,0xc4,0xf1,0xa7,0x15,0xac,0xa4,0xe2,0x8b,0x83,0x5e +.byte 0x64,0x36,0x2e,0x77,0x94,0x2e,0x2e,0xa3,0x62,0xcf,0x6e,0x7a,0x6d,0x39,0xaf,0xf7,0x96,0x88,0x31,0x14,0x58,0x46,0x30,0x0c,0x36,0x3a,0x4c,0x53,0xe0,0xa7,0x24,0x76,0x84,0x0f,0xfb,0x7e,0x55,0xa0,0x0f,0x63,0xfc,0xd6,0x1f,0x58,0x68,0xb5,0xcc,0x77,0x4f,0x16,0x91,0xa7,0xfd,0x62,0xb3,0x88,0x13,0x7c,0xcb,0x63,0x6d,0xe4,0x38,0x4c +.byte 0x6e,0x3b,0xf7,0xe3,0x8d,0x52,0x84,0x61,0x19,0x12,0x51,0xbe,0xed,0x32,0x3d,0x77,0xdd,0xa1,0xc3,0x59,0x65,0x79,0xa1,0x6b,0xbc,0x65,0x6c,0xe3,0x7e,0x60,0x49,0xbd,0xcf,0x6f,0x61,0x97,0x98,0xbe,0x74,0x38,0xd1,0x09,0xc1,0x59,0xe5,0x7f,0xfe,0xbf,0xfd,0x60,0x1b,0x96,0x00,0x46,0x56,0x4d,0x81,0x4c,0x70,0x59,0x39,0x66,0x13,0x58 +.byte 0xe7,0x62,0x3a,0xfc,0x1b,0xe5,0xf9,0x03,0xd4,0x4b,0xab,0x1d,0x56,0x22,0x4a,0x09,0xa5,0xdd,0xac,0x39,0xbe,0x27,0x39,0xb3,0xe8,0xad,0xe0,0x07,0x86,0x10,0xce,0xa9,0x4e,0x8b,0x47,0x8d,0xb8,0x63,0x2f,0x61,0x1a,0x8b,0xd4,0xd3,0xfe,0x73,0x82,0x5a,0xd6,0xa9,0x46,0x56,0xa7,0x81,0xe9,0xda,0xb9,0x17,0xa7,0xc8,0x0f,0x24,0x16,0x6a +.byte 0x12,0xfe,0xc3,0x65,0x85,0x77,0xab,0x89,0x44,0x1b,0xa3,0x8b,0xfd,0x07,0xf4,0x77,0xaa,0xe1,0x71,0x33,0x74,0x93,0xdc,0x90,0x53,0x39,0x47,0x8c,0xea,0x18,0xe1,0x6a,0xed,0x8c,0x56,0x08,0x2f,0xa1,0x1f,0x22,0xf2,0xc0,0x12,0xcd,0xb7,0xdf,0xb6,0x3c,0xd6,0x22,0x6c,0x5b,0x00,0x0f,0xdb,0x66,0x5b,0x54,0x35,0x48,0x37,0x8c,0x79,0x74 +.byte 0xd1,0xb0,0x15,0x01,0x22,0x3a,0x7c,0x17,0x8c,0x20,0x06,0x9b,0x13,0x6e,0xee,0xbf,0xb4,0xac,0x01,0x61,0xb9,0x28,0x65,0x8e,0x53,0x12,0x4f,0xe0,0x5f,0xfc,0xdb,0x40,0x6c,0xa2,0x19,0x64,0x49,0x7a,0xc7,0xc5,0xc8,0x53,0x6e,0xd5,0x68,0xe1,0x61,0xe5,0x87,0xc2,0x99,0x59,0x4c,0x27,0xc8,0xd0,0xd0,0x10,0xce,0x9f,0x09,0xff,0xf5,0xa8 +.byte 0xf8,0x79,0xf6,0x0f,0x73,0xda,0x8a,0x36,0x8e,0x48,0x7e,0xbd,0x98,0x76,0x57,0xfa,0x5c,0xec,0xa5,0x3d,0x30,0xfe,0xa3,0xe5,0x27,0x87,0xcf,0x26,0xfe,0x61,0xe4,0xed,0xd1,0xfb,0xfc,0x91,0x5d,0xb6,0x70,0x2c,0x2c,0x59,0x14,0xd5,0x1d,0x9a,0xb9,0x2c,0xef,0x24,0x7b,0x10,0x8d,0x99,0x63,0xaa,0x82,0xf0,0x1c,0xe8,0xa0,0x00,0xa5,0xa7 +.byte 0xf8,0xc0,0x35,0x9e,0x12,0x18,0xaf,0x42,0x9d,0xe5,0x2b,0x72,0x6c,0x31,0xd8,0x8f,0x6c,0xde,0x2e,0x37,0xa6,0x73,0x06,0xe7,0x90,0x43,0x79,0x99,0x64,0xd1,0x17,0xa1,0x43,0x6d,0xd4,0x90,0x50,0xf2,0xcc,0x0b,0x73,0x49,0x9e,0x14,0x7c,0x49,0x92,0x05,0x0e,0x8c,0xda,0xb7,0x18,0xf0,0xcc,0xea,0xe4,0x32,0x58,0xc7,0xbd,0x8e,0xca,0x35 +.byte 0x52,0x9f,0xec,0x5d,0xa0,0x6c,0x83,0x61,0x07,0x74,0x37,0x4a,0x10,0xa0,0x98,0x83,0x3a,0x65,0x17,0x63,0xd0,0x22,0x96,0xb5,0xed,0xbb,0xbb,0x1c,0x18,0x8a,0x49,0x3d,0x0f,0xcc,0x24,0xb3,0x9b,0xb6,0x23,0x2e,0x9d,0x97,0xe7,0x31,0xf8,0x36,0x6d,0x7b,0xa1,0xf1,0x02,0xde,0x7c,0xad,0x77,0x5d,0x85,0x7c,0x39,0x61,0xc7,0xd7,0x3f,0x70 +.byte 0x1c,0xe1,0x0e,0x49,0xf4,0xcd,0xab,0xfd,0x4d,0x2f,0xc7,0xb7,0x53,0xfc,0xed,0xeb,0x41,0x2a,0x80,0x40,0xf3,0x47,0xf8,0x15,0xa0,0x4c,0x8b,0x34,0xf6,0x6a,0xb8,0x30,0x09,0x4d,0xe6,0x60,0xb7,0x24,0x6b,0x4c,0x26,0xdf,0x83,0x37,0xc7,0x96,0xba,0x35,0xda,0x29,0x4e,0xca,0x52,0xf7,0x41,0xd3,0x98,0x27,0xb2,0x9e,0xec,0xcc,0x12,0xdc +.byte 0x77,0xfd,0x11,0xbd,0xbd,0xbb,0x5e,0x0c,0x37,0x29,0xd2,0x4f,0x7d,0x5c,0x97,0xad,0x72,0x93,0x4a,0xfa,0x17,0x07,0x07,0x26,0xee,0xa7,0x29,0x2e,0xdb,0xf6,0x60,0x65,0x2d,0x85,0xbe,0x27,0x4d,0xf7,0x2b,0xb4,0x81,0xf5,0x3a,0x1d,0xae,0x25,0x8b,0x60,0xc2,0x75,0x3a,0xfd,0xf9,0x4d,0x90,0x7a,0x8a,0x3a,0xf6,0xa9,0xf0,0x11,0xd2,0xb9 +.byte 0xdb,0x23,0x40,0x9d,0x33,0xc3,0xbf,0x60,0x95,0x9c,0x6f,0xa9,0x82,0x42,0xe5,0x67,0x52,0x36,0xea,0x68,0x64,0x24,0x85,0x46,0x7e,0x2a,0x1a,0x6a,0x4b,0xa8,0xb0,0xa0,0x9c,0xb8,0x4a,0xb6,0x2e,0xb2,0x6b,0xf4,0x63,0x9f,0x54,0xb5,0x6f,0x1b,0xf5,0x71,0x7e,0xf8,0xef,0xb2,0x92,0xe2,0xcf,0x65,0xb4,0x02,0x9b,0x75,0x4b,0xf9,0x6b,0xa1 +.byte 0x24,0x3b,0xea,0x7f,0x31,0x08,0xd4,0xdc,0xab,0x12,0xc0,0xca,0x64,0xee,0xfa,0x61,0x1c,0x0f,0x24,0xc3,0x8c,0xbd,0xc8,0xd2,0x42,0xf7,0x1f,0x2e,0xd3,0xd1,0x51,0x86,0xfb,0xa2,0x95,0xc5,0x8c,0x5b,0x61,0x14,0xc9,0xe4,0x07,0xa1,0xf7,0x39,0x11,0x40,0x68,0xd6,0xe2,0x38,0x96,0x6f,0x99,0xf1,0xd2,0xfb,0x8e,0xb8,0x3d,0xf2,0x8a,0x4e +.byte 0x3e,0x54,0xd9,0x0e,0xd1,0xc9,0x31,0x04,0xa4,0xee,0xbe,0x51,0xcf,0x5f,0xd1,0xc8,0x13,0x96,0x9d,0x9b,0xdf,0x32,0xa9,0x38,0x8f,0xbc,0x7e,0x22,0x1a,0x52,0x5f,0x14,0x61,0xeb,0x78,0xf4,0x01,0xe9,0x5c,0x18,0x1c,0xb5,0xe1,0x80,0x06,0x3e,0x8e,0x72,0x33,0xf9,0xaa,0x49,0xec,0x5b,0x7a,0x04,0xf2,0x9b,0x48,0x8a,0x58,0x14,0x4b,0x7e +.byte 0x4d,0x26,0x0b,0xe0,0xf0,0x69,0xa3,0x36,0x75,0x3e,0x73,0xec,0x53,0x20,0x35,0x8e,0xfa,0x40,0xf0,0xcd,0x70,0xe1,0xe4,0x64,0x89,0x14,0x55,0xd7,0x20,0xe8,0xbd,0xc2,0x85,0xa8,0x4d,0x51,0x96,0x27,0x54,0x50,0xc7,0xa1,0x9c,0x35,0x52,0x1f,0x8b,0x6f,0xa2,0x62,0x36,0x94,0x02,0xb1,0x01,0xc6,0x4e,0x53,0x83,0x65,0x98,0x25,0x6d,0x26 +.byte 0x6d,0xef,0x4e,0x7a,0xe0,0x56,0x6a,0x6c,0x23,0xe8,0xa6,0x97,0xc1,0xf2,0xb1,0x2d,0x03,0x29,0xef,0xa0,0x6d,0x86,0x8d,0x5a,0x00,0x83,0x14,0xed,0xd4,0x1e,0x79,0xc4,0xb4,0x42,0xfd,0x53,0xaa,0xab,0xd7,0xa3,0xf9,0x7d,0x15,0x26,0xab,0x81,0xc4,0x7a,0x96,0x14,0x94,0x71,0xe1,0x7f,0xc1,0x67,0x5f,0x5f,0x11,0xb4,0x72,0x03,0xf8,0x9b +.byte 0x2f,0x82,0xa3,0x4e,0xda,0xfd,0x2a,0x31,0xf1,0x74,0x6d,0x96,0x7a,0x9c,0xf9,0x01,0xd9,0x55,0x8e,0x52,0xe4,0xae,0x22,0x14,0x7b,0xc0,0x5a,0xc4,0x31,0x23,0x9a,0x2e,0x9d,0x86,0x86,0xd5,0x66,0xc8,0x8b,0xdb,0x49,0x5f,0xca,0x57,0x51,0x50,0x75,0x3f,0xeb,0xb1,0xe5,0x84,0x42,0x8f,0x0f,0xca,0x86,0xcf,0xb0,0x17,0x06,0x06,0x46,0x8c +.byte 0x4a,0x84,0xde,0x28,0x84,0x24,0x7f,0x33,0x48,0xe8,0x89,0x87,0x1f,0x02,0x07,0x4f,0x36,0xa9,0xdc,0x8a,0x42,0xb6,0xc7,0x9c,0x47,0xd4,0xd4,0x2d,0xc0,0x17,0xb0,0xe6,0x23,0xb7,0xae,0x0d,0x9f,0x38,0x0a,0xdf,0x7f,0x73,0xbf,0x93,0x19,0x05,0x23,0xbf,0xc0,0x53,0x2d,0xcd,0x3e,0x73,0x01,0x78,0xa7,0xdc,0x6c,0x85,0x1d,0x25,0xc5,0x54 +.byte 0x68,0x95,0xc1,0x20,0x65,0xd9,0x01,0x85,0x7d,0xc9,0xba,0x63,0x43,0x7a,0x23,0xbb,0x95,0x3a,0x76,0x2d,0x75,0x1e,0xac,0x66,0x3e,0x20,0x30,0x8d,0x37,0x64,0x3c,0xc7,0x6f,0x36,0xb8,0x34,0x60,0xd2,0xb4,0x54,0x07,0x52,0x6c,0xfa,0x04,0xfe,0x2b,0x71,0x03,0x03,0x97,0xfc,0x4a,0xf9,0x4d,0x44,0x1a,0xf9,0xd7,0x4b,0xe5,0xe1,0xf9,0xb9 +.byte 0x41,0xa0,0x5b,0xa2,0x69,0x48,0xba,0xeb,0xcc,0x4e,0x55,0x4b,0xbd,0x41,0x09,0xa8,0x90,0x5c,0xc6,0xe3,0x20,0x0c,0x8f,0xfc,0x7e,0x0e,0x4f,0x3d,0x47,0x65,0x40,0x1e,0x79,0x9a,0xe0,0x8f,0x8f,0xe9,0xcb,0xaa,0x04,0xb8,0xd9,0x91,0x30,0x2a,0x4c,0x17,0x44,0xc0,0x03,0x4c,0x37,0xd3,0xdb,0x20,0xe5,0x8e,0x70,0x87,0x57,0x4f,0x8a,0xcf +.byte 0xee,0x64,0xbc,0xef,0x0f,0x9e,0xcf,0x95,0x5e,0x11,0x4f,0x7a,0x35,0x53,0x8c,0x85,0x6a,0xff,0x72,0x1b,0x35,0x51,0x89,0xf8,0x94,0x65,0x97,0xec,0xfe,0xbd,0x00,0x29,0x3d,0xe8,0x96,0x23,0xa4,0xe3,0xcf,0x81,0xb2,0x8f,0x73,0x4c,0x05,0xc3,0xcc,0x37,0x22,0x97,0xa0,0xda,0x49,0xb2,0xbd,0x07,0x2b,0x26,0xa0,0x6f,0x6b,0x1f,0xa6,0x15 +.byte 0xe3,0x6e,0x12,0xa4,0x51,0x1b,0x72,0x22,0x08,0xfe,0xf7,0x93,0x1a,0x9f,0x62,0x12,0xd4,0x11,0x1f,0xd1,0x80,0xeb,0xa4,0xb1,0xf4,0x37,0x3b,0x60,0xd8,0x2b,0x53,0xae,0x69,0xf8,0x48,0x38,0xf4,0x20,0x28,0xe1,0xfb,0x6a,0xec,0x6e,0x11,0x2e,0x2c,0x59,0x62,0x23,0x8a,0x82,0xc4,0x33,0x7b,0xdc,0x33,0x99,0x41,0x29,0x4f,0xa1,0x6e,0x3a +.byte 0x48,0x13,0x1c,0x1f,0xa3,0x1f,0xd2,0x02,0x79,0xe1,0xe4,0xb9,0x99,0xa4,0x50,0xea,0x53,0x96,0x4e,0x82,0x7c,0xee,0x65,0x07,0x26,0x87,0xf9,0x9d,0x45,0x17,0x37,0x61,0x7e,0x5f,0xb9,0xd2,0x55,0x3c,0x45,0xf7,0xec,0x33,0x08,0xa3,0x41,0x24,0x8f,0xb2,0x75,0x41,0xb6,0xa2,0x21,0xfe,0x94,0x7e,0x1e,0xe6,0x03,0x6e,0xf4,0xeb,0x23,0x59 +.byte 0x51,0x25,0x99,0x19,0x6d,0xf7,0xe3,0x22,0xd8,0x41,0x0f,0xd5,0xaf,0x0d,0xc6,0x3f,0x8e,0x36,0xee,0x90,0x23,0x67,0x03,0xcb,0xe3,0xaf,0xc4,0xf8,0x22,0x1f,0xd8,0x3e,0x94,0xdf,0x13,0xc9,0x4f,0x17,0x22,0x8c,0x93,0x6b,0x3f,0x60,0x1a,0xbd,0xfa,0x9f,0xe6,0x43,0x45,0xe1,0x0a,0x95,0x21,0x06,0x52,0xbd,0x58,0x56,0x84,0x56,0x36,0xf3 +.byte 0x55,0x58,0x46,0x62,0x6c,0xb3,0xa0,0x29,0x5a,0xfc,0xb4,0x87,0x5f,0x89,0xa5,0xab,0x6d,0x5a,0x44,0xc5,0xc8,0x50,0x83,0xe1,0x41,0xd4,0x97,0x6c,0x08,0xb1,0x43,0x33,0x0d,0x3a,0x8b,0x31,0xa1,0xae,0x77,0x71,0xb7,0x67,0x65,0xd7,0xa7,0xc9,0x6c,0x4a,0x9b,0x80,0xd5,0xbf,0xae,0x0f,0x9b,0xce,0x1a,0xa3,0x26,0xc6,0x19,0xa1,0x8d,0x12 +.byte 0xd9,0x09,0xae,0xac,0x9f,0x4b,0xab,0xaf,0xf6,0xc5,0x9e,0x26,0xe6,0x23,0xcb,0x3e,0x60,0x1e,0x3d,0xa1,0xec,0x59,0xca,0xf1,0x87,0x0e,0xaf,0x47,0x5f,0xab,0x17,0x99,0xbd,0x87,0x1c,0x1d,0x00,0xd6,0xb2,0x59,0x56,0xdd,0x49,0x20,0xb5,0x91,0xf8,0x0c,0xf1,0x80,0xc6,0x37,0x92,0xd7,0x2c,0x02,0x0d,0x47,0x1b,0x1b,0x6b,0x3f,0x60,0xd0 +.byte 0x21,0x9b,0x49,0x47,0x3c,0xaa,0x83,0x44,0x1b,0x92,0x8e,0xec,0x63,0x40,0xd6,0x9a,0x48,0x7c,0x5e,0x97,0xe4,0xf0,0x84,0x36,0x30,0x11,0x0b,0x7c,0x79,0x3b,0xff,0xdf,0x77,0xf6,0xc9,0xdb,0x49,0xdd,0x2a,0xe7,0xca,0x9a,0x5b,0xef,0xd4,0x84,0xe2,0x44,0x8b,0xef,0x4e,0x0d,0x13,0xd6,0xbb,0xba,0x29,0x02,0xae,0xfc,0x55,0x24,0xfa,0x4b +.byte 0x7d,0x71,0xc9,0xde,0x71,0x36,0xbc,0xac,0x31,0x5c,0xf8,0x20,0xdd,0xb8,0xae,0x03,0xd3,0xb0,0xdc,0x27,0x7f,0xc5,0xff,0xda,0x8a,0x36,0x2d,0x8f,0xae,0xbd,0xf8,0x92,0x28,0x8e,0x0c,0xc3,0xaf,0x4e,0x33,0xf0,0x71,0xdb,0xad,0x4d,0xc1,0xef,0x52,0x1c,0x84,0xdc,0x0d,0xf3,0xab,0xb9,0x0b,0xe0,0x18,0xa5,0x06,0xdc,0x78,0x41,0x73,0x35 +.byte 0x95,0x37,0x84,0xba,0xc1,0x4e,0x0a,0xe4,0x4d,0x05,0xfe,0x9d,0x74,0x68,0x4a,0x35,0xf0,0x15,0xaa,0x7b,0xfe,0x08,0x47,0xb2,0x84,0x65,0x1d,0x0d,0x9f,0xe7,0xe0,0x04,0xf9,0x1c,0xac,0x66,0xb3,0x75,0x96,0x8f,0x25,0xb6,0x29,0x53,0x52,0x50,0x7a,0x50,0xd1,0x89,0xc7,0x05,0xfb,0x3a,0xb0,0xfa,0x6b,0x96,0x9d,0xfc,0xb0,0xcd,0x68,0x21 +.byte 0x61,0xf6,0x65,0x64,0xa7,0xc6,0x56,0xbd,0xf0,0x9b,0x4a,0x9a,0xe2,0x8c,0xd8,0x88,0x70,0x82,0x0c,0x87,0x51,0x77,0x23,0xd8,0xd8,0xf8,0x4a,0xfe,0xf4,0x6d,0x3f,0x2a,0x36,0x0c,0x67,0x85,0x43,0x13,0x83,0xd5,0xe9,0x32,0xff,0x8c,0xec,0xd4,0x7f,0xd2,0x32,0x4d,0x4e,0xec,0x76,0x55,0xf9,0x0d,0xb7,0x57,0x6c,0xc4,0xd6,0x22,0xd3,0x6e +.byte 0x71,0x23,0x68,0x45,0x03,0x37,0x27,0x3d,0x56,0x89,0xbb,0x7c,0xf1,0xa8,0x09,0xd6,0xb2,0xc5,0xe6,0xf6,0x72,0x77,0x3e,0xb0,0x8a,0x3d,0x17,0xbd,0xd5,0x0d,0xdb,0x62,0xa7,0x07,0x66,0x35,0x19,0x12,0xff,0xcf,0xdd,0xb3,0x09,0xa3,0x58,0x5b,0x0d,0x87,0x76,0x33,0x28,0x98,0x91,0x48,0xac,0xa1,0x22,0x9f,0xda,0x36,0x03,0x8a,0xc1,0x5e +.byte 0x6c,0x2e,0x42,0x8e,0x1a,0x7d,0x75,0x69,0xb2,0xcf,0xb0,0x14,0x80,0xa8,0x91,0xc2,0xbc,0x24,0x8f,0x25,0x9a,0x9e,0xa3,0x4d,0x46,0x55,0x53,0x05,0x0c,0xf8,0xdb,0xe0,0xee,0xe4,0x32,0xff,0x39,0x74,0x9a,0xa8,0xf7,0xa4,0x6e,0x5b,0x9a,0x89,0x33,0x40,0xf4,0xce,0x54,0x4a,0x18,0xdb,0x11,0xe4,0x83,0x69,0x52,0xef,0x12,0xc6,0x13,0x6e +.byte 0x2a,0x14,0xb9,0x8e,0x38,0x8d,0x6b,0xef,0x02,0xc8,0x66,0xf0,0x78,0xaa,0xa6,0x04,0xa3,0xa5,0x1d,0xdb,0xac,0x02,0x23,0x4c,0x2a,0xa5,0xbf,0x66,0xa4,0x47,0xa9,0x8e,0x50,0xd2,0xf8,0xf5,0x0d,0x0f,0xc9,0x07,0xd8,0x1a,0x94,0x84,0xcf,0xb3,0x56,0x53,0x5f,0x83,0x1d,0x30,0xb6,0x94,0x36,0xf4,0x16,0x72,0x8c,0x6d,0x49,0xe4,0x6d,0x93 +.byte 0xb1,0xa1,0x97,0x70,0x75,0x47,0x3a,0x7e,0xa6,0x39,0x1d,0xf5,0xcc,0x37,0xaa,0x90,0x53,0xe1,0x9b,0xcb,0x9a,0x97,0x7d,0x18,0x4a,0x3c,0x1f,0x05,0xf4,0xe3,0x6f,0x7a,0x19,0x84,0xbc,0x68,0xa4,0x6e,0x5a,0xb5,0x7a,0x51,0xda,0xf5,0x75,0x1e,0xfe,0xb0,0x73,0x43,0x39,0x98,0xb7,0x1e,0x17,0x36,0x35,0x15,0x64,0x90,0xb6,0x83,0x43,0x8f +.byte 0xcd,0xb6,0x8c,0xc4,0xe4,0xee,0x0e,0x1c,0xbd,0x3a,0xe6,0x6e,0x44,0x73,0x88,0x30,0xa0,0xf0,0x97,0xf5,0x5e,0x12,0xea,0xd9,0xd7,0xb5,0xc5,0x1d,0xc7,0xc8,0x55,0xbb,0x2c,0x64,0x43,0x50,0x15,0x71,0x02,0xd3,0xf9,0xb4,0xe7,0x2f,0x0f,0x98,0x9e,0x87,0x40,0x2a,0x61,0x06,0x44,0xc2,0x47,0xaf,0x44,0x4f,0xdd,0xa3,0xb0,0xb2,0x8d,0x8c +.byte 0x83,0x96,0xd3,0x2a,0x38,0xdf,0x87,0x5d,0x1c,0x64,0xc8,0x4f,0x3c,0x41,0xc7,0xf8,0x64,0x58,0xa6,0x9b,0xcb,0xcd,0x77,0xdb,0x38,0xe7,0x30,0xb6,0x91,0x88,0xd8,0x9d,0x29,0x71,0x12,0x9e,0xdf,0x20,0xd9,0x14,0xa3,0xa0,0xbd,0x0a,0x99,0x67,0x0a,0xe1,0xe9,0xba,0xd0,0x1b,0xba,0xc8,0x8d,0x76,0x10,0xe8,0x30,0xa1,0x93,0xf4,0x95,0x6a +.byte 0x12,0xd5,0x95,0x31,0x7f,0xdb,0x33,0xfc,0xbf,0x7a,0xbe,0xe4,0xfa,0x50,0x1b,0x24,0x75,0x9b,0xf8,0x81,0x34,0xc8,0xfb,0xda,0x3c,0x6f,0x3b,0x9a,0xb2,0x6f,0x94,0x0c,0xd9,0xc3,0x05,0xd6,0x96,0x10,0x27,0xdb,0xd6,0x88,0x72,0xe4,0x8f,0xfc,0xd3,0x52,0xf8,0x63,0xb2,0xce,0xf1,0x2a,0xbc,0x1c,0x23,0x9d,0xfb,0x27,0xdd,0x8d,0xe4,0xcc +.byte 0x63,0xcf,0xad,0xe6,0xe9,0x4f,0xb8,0x8a,0x20,0x47,0x75,0x73,0x3f,0x27,0x07,0x5d,0x8c,0x8c,0x6e,0x7a,0x91,0xe2,0xf6,0xd5,0x70,0xd8,0x00,0xe5,0x0f,0xde,0x78,0xd8,0xb4,0xd3,0x18,0x5a,0x24,0x43,0x91,0x0c,0xbe,0x8b,0x1b,0x88,0x48,0x7e,0x94,0x05,0xd0,0xec,0xd2,0x71,0x26,0xc7,0x70,0xeb,0x8a,0x83,0x01,0x52,0xdb,0xe5,0x76,0x31 +.byte 0x19,0x14,0x13,0x90,0x5b,0x5a,0x94,0x89,0xe2,0x4e,0x2d,0x17,0xf6,0xbc,0x67,0xee,0x51,0xd4,0x00,0x83,0xe5,0x18,0xa5,0x54,0x6c,0xd2,0x7a,0x1f,0xdb,0x6f,0xed,0x7f,0x07,0xbb,0x9f,0x3a,0xc2,0x8c,0x04,0xf9,0x9a,0x55,0xe3,0x70,0xf3,0x36,0xfd,0x44,0x05,0xd9,0xf3,0xe1,0x87,0x2c,0x29,0xec,0x30,0x8b,0xb7,0xde,0x27,0xa4,0xcd,0xdf +.byte 0x64,0x0b,0x62,0xdf,0x34,0xa0,0xf5,0xa1,0x69,0xc9,0x0b,0x00,0x81,0xf4,0x03,0x5e,0xef,0xb8,0x26,0x49,0x71,0x5e,0xcd,0x76,0xa2,0x38,0x25,0x1f,0x92,0xc3,0xbf,0xdb,0xb3,0x29,0x37,0x06,0xc5,0xc2,0x3b,0xd8,0xbd,0x55,0xf2,0x7f,0xd5,0xd5,0x34,0x32,0xf1,0xa0,0x92,0x9b,0x1c,0xee,0x6f,0x48,0x40,0x6b,0xd1,0x45,0x09,0x3f,0xaf,0xdc +.byte 0xe1,0xac,0x75,0x9a,0x33,0xf7,0x50,0x4f,0x2c,0x3c,0x30,0x69,0x69,0x84,0xcb,0xe9,0xca,0xdf,0x8d,0x02,0x5d,0x30,0x71,0x99,0x7b,0xd5,0xb2,0x55,0xdd,0x9c,0x2f,0xae,0x11,0x41,0x01,0x6b,0xf7,0x95,0xe3,0xda,0xe3,0xcc,0xa4,0x17,0xd0,0x50,0xf9,0x4c,0x31,0x2b,0x4e,0xf7,0x49,0xbb,0x75,0x8f,0x28,0x19,0x9f,0x89,0x7b,0x78,0x80,0x41 +.byte 0x50,0x5a,0x5c,0x1e,0x82,0x93,0x9f,0x4f,0x61,0x96,0x29,0x0c,0x25,0xb3,0xe6,0xff,0x86,0x90,0x78,0x09,0x04,0xf9,0x2a,0x3d,0xa1,0xd5,0x68,0xa8,0x0d,0xd9,0x41,0x01,0xdc,0x41,0x01,0xff,0x20,0xc0,0x63,0x0b,0x4d,0xd5,0x80,0x78,0x82,0x05,0x51,0x62,0x09,0xf9,0x11,0xbd,0xde,0xc0,0x7d,0x3f,0xf2,0x30,0xfb,0x41,0x68,0x39,0xb0,0xc2 +.byte 0x2e,0x33,0x4e,0xa7,0x85,0x01,0x6b,0xd1,0xf9,0x78,0xef,0xe9,0x7c,0x0e,0xaf,0x13,0x1a,0xf5,0x97,0xde,0xf0,0xbb,0x67,0xf9,0x9b,0xab,0xee,0x86,0x73,0x9b,0x23,0x6c,0x56,0x0d,0xa0,0xda,0x4c,0xff,0x2b,0xc5,0x92,0xdb,0xee,0xbd,0xba,0x3a,0x54,0x21,0xc0,0x5c,0xfe,0x21,0xf1,0xbd,0xac,0xaf,0xa3,0x7a,0x52,0x62,0x15,0x8b,0x8f,0xb5 +.byte 0x82,0xc6,0x1a,0xfb,0x22,0xbc,0xa2,0x05,0x42,0xfe,0xb4,0x12,0x6b,0xad,0xa9,0x76,0xb7,0x6b,0x1c,0xd8,0x34,0x5c,0x7d,0xd5,0xa9,0x0d,0x91,0xf6,0xc1,0x47,0x69,0xbc,0x43,0x8f,0xb7,0xfc,0x84,0x2e,0xa0,0x8e,0x3f,0x52,0x3b,0xbd,0x1f,0x28,0x6b,0xc8,0x13,0x37,0xd6,0x44,0xe9,0x8d,0x08,0x92,0x96,0xe5,0x2c,0x57,0x34,0x59,0x21,0x04 +.byte 0xa8,0xaa,0x56,0x25,0xa4,0xc8,0xae,0x68,0x17,0x9e,0xa4,0xf4,0x42,0x64,0x57,0x4b,0x54,0x85,0x8a,0xd1,0x09,0x09,0x25,0x18,0x05,0xb0,0x09,0x9d,0xd9,0x75,0x21,0xd3,0x75,0x31,0xf8,0x35,0x46,0xc8,0xd4,0x47,0x9d,0x87,0xeb,0x40,0x95,0x19,0x24,0x7c,0x6e,0xe9,0xd5,0x14,0xaa,0xc3,0xbe,0x22,0x18,0xc1,0xa0,0x5f,0x34,0x98,0xc2,0x4d +.byte 0x3f,0xa6,0x09,0x57,0x1b,0x75,0xc6,0x89,0xee,0xf0,0xbd,0xbc,0x1a,0xd3,0xea,0x6e,0x82,0x06,0x90,0x4f,0xbb,0x61,0xac,0xbb,0x3e,0x8c,0x94,0xea,0x69,0x58,0x26,0x2e,0x17,0x78,0xad,0x14,0xa4,0x79,0x14,0xbd,0xc1,0x78,0xf9,0xbb,0x11,0x7e,0x8d,0xbf,0x3e,0xc8,0xc5,0x69,0xd7,0x5a,0x4c,0x4b,0x86,0x25,0x4c,0xe9,0x3a,0xc2,0xd9,0xf8 +.byte 0xbf,0x5e,0x46,0x4f,0xca,0xba,0x25,0x58,0x73,0x82,0x02,0x8a,0x41,0x9e,0x2d,0xa9,0x08,0xb4,0x60,0x2a,0x11,0x2c,0x2f,0x3d,0x5e,0x68,0xd8,0xa9,0x2e,0x1c,0xfa,0xdc,0xda,0xfb,0xfb,0xf3,0xb2,0x66,0xd3,0x57,0xe6,0x09,0xeb,0xe5,0xf4,0xed,0x2d,0xb7,0x3a,0xce,0x69,0x2d,0xb4,0x79,0x1a,0x99,0x9d,0xc8,0x99,0x9f,0x9b,0x78,0xd4,0x8a +.byte 0x73,0xd5,0x89,0x9f,0xda,0xdf,0xd0,0xca,0x6b,0x63,0x5a,0x1e,0xe0,0x2f,0x01,0xa4,0xd0,0x62,0xc0,0x5f,0x4e,0xd9,0xd3,0x47,0xe4,0x68,0x73,0x8c,0x87,0x50,0x91,0xec,0x8e,0x0b,0xa7,0xf0,0x4c,0x32,0x19,0xaa,0x00,0xbd,0xe4,0x20,0xab,0x5c,0x00,0xdb,0x18,0xc0,0xff,0xc1,0xc0,0x8f,0xa2,0x8c,0x47,0x91,0x86,0xde,0xa9,0x09,0xb5,0x86 +.byte 0xcc,0x1d,0x7f,0x4b,0x7d,0x16,0xf6,0x21,0xd0,0xf8,0xaa,0x16,0x20,0xa9,0xac,0x3e,0xef,0x56,0xee,0x0e,0x1d,0xd6,0x44,0x7d,0xa9,0x84,0x41,0x8d,0x69,0x69,0x92,0x74,0x87,0x3b,0x8a,0xbf,0x40,0x29,0x45,0xf9,0xa8,0x52,0x8c,0x99,0x95,0xe7,0x6a,0xcd,0x3f,0x74,0x2d,0xde,0x82,0x47,0x41,0xa6,0xd9,0x5a,0x30,0x6c,0x20,0x98,0x3f,0xfb +.byte 0x66,0x08,0x73,0x68,0xe1,0xcd,0xfd,0x3c,0x4f,0x33,0x6b,0x42,0xa4,0xab,0x78,0x22,0xb5,0xd9,0x6f,0x99,0xcb,0x85,0x6a,0x14,0xb9,0xd3,0x0f,0xfb,0xd7,0x07,0x7b,0xbe,0x6a,0xd9,0xba,0xde,0x98,0xac,0xd8,0xe5,0x40,0xcd,0x59,0x7f,0x88,0x3c,0x4e,0xfa,0xfe,0xbe,0x48,0x21,0xb5,0x40,0xd5,0xc8,0x1e,0x8a,0x56,0xd9,0xec,0x25,0xad,0x5e +.byte 0x31,0xf3,0xf2,0x3d,0x0b,0x56,0xb5,0x20,0x08,0xd3,0x02,0x81,0x93,0x29,0x3d,0xbd,0x0a,0x9c,0x26,0x74,0xdb,0x6b,0x7e,0xd1,0x4a,0x1a,0x1c,0x47,0x49,0x34,0xba,0x08,0x7a,0x6a,0xb3,0xd6,0x3b,0xd0,0x28,0x50,0xa1,0xd8,0x17,0x85,0x61,0xab,0x24,0x22,0xda,0xc8,0xb4,0x1b,0x07,0x2e,0x67,0x77,0x84,0xdc,0x6f,0xfd,0x51,0xa5,0xe8,0x34 +.byte 0x63,0xbd,0xae,0xae,0xc7,0x84,0x1d,0x60,0xc8,0x8f,0xde,0x22,0xfd,0x85,0xb4,0x12,0xb4,0x04,0x5b,0xe7,0xb5,0x58,0xf8,0x56,0x66,0xa3,0xb7,0x1e,0x54,0xd0,0xdb,0x12,0xaa,0x9c,0x89,0x5b,0xfa,0xf4,0xe7,0xe2,0xf4,0x9c,0x08,0xa8,0xbe,0x6b,0xe3,0xce,0x6a,0x88,0xb5,0x74,0xb9,0x49,0xaa,0x7b,0xcd,0xbc,0x17,0x81,0x61,0xe2,0x28,0x6f +.byte 0x4b,0xe8,0xa4,0x55,0xc5,0x1e,0x69,0x21,0x8f,0xfd,0xa8,0xd0,0xb9,0x6f,0x1b,0xfe,0x8c,0x5e,0xf9,0x7d,0xd9,0xc2,0xbe,0x0f,0x6f,0xbd,0xa7,0x94,0x10,0x4e,0xe0,0x5a,0xbb,0xa3,0x40,0x9a,0x5a,0xad,0x10,0x97,0x92,0x3b,0xbd,0xa7,0x75,0x77,0xc6,0xa6,0xde,0x42,0x00,0x3b,0xf7,0xe4,0xf4,0xd7,0xdd,0xaa,0x31,0x1e,0x64,0xae,0x17,0x0a +.byte 0x25,0xa0,0x94,0x5f,0x3c,0xbc,0x3d,0x00,0x00,0xd3,0xba,0x7b,0x98,0x81,0xe1,0xdf,0xba,0x60,0x08,0x2a,0xe5,0x66,0x08,0x3e,0xfa,0x81,0x0a,0x89,0x4e,0xe5,0x3b,0xc3,0xdf,0x21,0x9b,0x54,0xa3,0xb3,0xc3,0xc1,0xce,0xb4,0xaa,0x06,0xee,0x2e,0x34,0x55,0xcc,0x8b,0x0f,0xcd,0x1d,0x1b,0xd9,0x9e,0x59,0xf0,0x93,0xc9,0xba,0x35,0x5c,0x99 +.byte 0xf6,0x86,0x9e,0xe9,0xf8,0x84,0x80,0x05,0x76,0x6f,0x8b,0x38,0xb6,0xe0,0xdf,0x0c,0xb3,0xc7,0x6e,0x62,0x53,0xe4,0x69,0x0a,0xc1,0xcf,0x5b,0x84,0x75,0x78,0x56,0x35,0xa5,0x26,0xc6,0xae,0x76,0x2e,0xc8,0x29,0x8d,0x16,0xd1,0x4f,0x27,0x36,0x22,0x41,0x31,0xfb,0xbe,0xd0,0xf9,0x0a,0x06,0xbf,0x59,0x6e,0x06,0x20,0x0d,0x52,0x66,0x63 +.byte 0x38,0x2a,0xb6,0x15,0x0f,0x51,0x14,0x0b,0xd1,0x63,0x40,0x2a,0xfe,0x88,0x51,0x53,0x5d,0x82,0x4e,0x1b,0x91,0x30,0x7a,0x09,0xec,0xb6,0x53,0x10,0x87,0xba,0x34,0x1f,0x8a,0xf7,0x85,0x31,0x77,0x76,0xba,0x55,0x07,0x6b,0x80,0x5d,0x14,0x23,0x50,0xef,0x07,0x91,0xc5,0x71,0x3a,0x55,0x44,0x9d,0xbf,0xe6,0xab,0xde,0x7c,0xdd,0xe0,0xcb +.byte 0xcc,0xc1,0x78,0xb4,0x8c,0xd1,0x35,0x73,0x80,0x9c,0x44,0xff,0xf8,0x8a,0xaa,0x9a,0x94,0xcf,0xc9,0x51,0xfc,0xa5,0x3d,0x86,0xd6,0x67,0x71,0x1b,0xdb,0x83,0xb2,0x67,0xb0,0x17,0xce,0x13,0x1b,0x7a,0x84,0xc8,0xaf,0x69,0x7e,0xf0,0xab,0xc5,0x8c,0x37,0x12,0x43,0x33,0x5f,0xaa,0xde,0xcf,0x4c,0x73,0x7f,0x6b,0x80,0x18,0x27,0x72,0x62 +.byte 0xe8,0x3d,0x1c,0x94,0x91,0xfa,0x33,0xef,0x13,0x94,0x7f,0xb6,0x53,0xe3,0xd7,0x73,0x05,0x3e,0xe8,0x45,0xde,0x1e,0x1d,0xa4,0x41,0x11,0x0a,0x7f,0x62,0x6e,0x9f,0x9f,0xec,0xe9,0x87,0xe0,0x5d,0xbb,0xbc,0x0b,0x37,0xa2,0xf3,0x68,0x8a,0x24,0xec,0x98,0xe5,0x5d,0xbf,0xa1,0x60,0x2b,0xc2,0x74,0x4b,0x8b,0x85,0x44,0x28,0x02,0xd5,0xb9 +.byte 0xae,0x00,0x37,0x1e,0x0b,0x46,0xe6,0x40,0xf1,0xdc,0xa0,0xfc,0xae,0x04,0x7f,0xb6,0x46,0xa3,0x22,0x79,0x92,0xda,0x89,0xa0,0x38,0xf0,0xa2,0x4a,0x76,0x79,0x0c,0x46,0x4d,0xa9,0xe6,0x75,0xff,0x01,0xb3,0xe4,0x13,0xc2,0x53,0xe9,0x6d,0x1f,0xdd,0x88,0xcf,0x10,0xf5,0x16,0xef,0x05,0x59,0x51,0x15,0x49,0x17,0xda,0xff,0x0e,0xb3,0xb9 +.byte 0xae,0x79,0xc6,0xb1,0x94,0x08,0x09,0x30,0x9f,0x2a,0xfd,0x55,0xc0,0x41,0x8c,0xe5,0x0e,0xee,0xc2,0xa0,0x05,0x36,0x66,0x8d,0x9a,0xcc,0xc9,0xeb,0x1d,0x34,0xc0,0x1a,0x29,0xc2,0xcd,0xb7,0x25,0xd3,0x83,0xf8,0x1e,0xa0,0xf4,0x50,0xd4,0x08,0x0d,0xcb,0x6a,0x2f,0xa5,0x8b,0x30,0x94,0x89,0xea,0x94,0x6c,0x00,0x7e,0x7f,0xb5,0x4d,0x61 +.byte 0xa7,0x9d,0x94,0xcc,0x14,0x8f,0x75,0x1f,0xef,0x2b,0xbe,0x37,0xdd,0x19,0x41,0x2e,0x90,0x36,0x27,0xa5,0xa9,0x6c,0x75,0x8c,0x2d,0xe3,0x97,0x74,0x91,0xf3,0xb8,0xcb,0xcb,0x74,0xba,0xf0,0x57,0x70,0x89,0xee,0x4d,0xc5,0xfe,0x3e,0x60,0xe3,0x5b,0x28,0x36,0x91,0x6f,0xcd,0x6c,0x33,0xb6,0x44,0x0c,0xce,0x81,0xe4,0xdb,0x84,0xbe,0x4e +.byte 0xef,0xb8,0x75,0xf7,0x8b,0xb0,0xb7,0x0d,0x00,0x13,0x54,0x39,0xfd,0x9e,0x86,0x5c,0x59,0xd0,0x84,0x0f,0x97,0xc0,0xf8,0xfa,0x4a,0xcf,0x57,0xb8,0x24,0xf0,0xa8,0x40,0x70,0x9d,0xc4,0xe5,0xc7,0xc9,0xcb,0xb6,0xf4,0x0b,0xb5,0xcc,0xe0,0x90,0x2b,0x42,0x81,0xd6,0x59,0x2e,0x11,0xbd,0xe8,0xf5,0xef,0xa8,0x2b,0xdb,0x93,0x62,0x1e,0xef +.byte 0x3a,0x5f,0xf5,0x47,0x15,0x1f,0x03,0x6f,0x40,0x85,0xff,0x50,0x89,0x2e,0x72,0x8f,0x5c,0x0d,0x61,0x84,0x8d,0x8a,0x8f,0x2a,0x47,0x7c,0x97,0xfe,0x8a,0x97,0x6c,0xd5,0x1c,0x97,0xfa,0x59,0xbe,0x2c,0x0f,0x4d,0x85,0x7f,0x18,0xe3,0xea,0xe8,0xde,0x5a,0xf3,0x67,0xe1,0x71,0x7e,0x81,0xa3,0x74,0x0d,0xf4,0x3d,0x5a,0xec,0xc1,0xcf,0x6f +.byte 0x08,0x0f,0x5a,0x63,0x72,0x0b,0x46,0x5d,0x38,0x80,0xea,0xb7,0x12,0x5d,0xce,0x37,0x26,0xaa,0xd3,0x0d,0x93,0x4a,0x34,0x20,0xd5,0x51,0x54,0x1c,0x5e,0x53,0xa9,0xed,0x26,0x3c,0x29,0xaf,0xbe,0x73,0x34,0xa5,0xc3,0xbf,0x8c,0x8a,0xc3,0x30,0x89,0xaf,0xa9,0x2d,0x28,0x35,0x7d,0x6b,0x84,0x23,0x22,0xee,0x8c,0x82,0x04,0xbd,0x26,0x52 +.byte 0x26,0x73,0x76,0x05,0x35,0x0c,0xec,0xf7,0x54,0xb2,0x17,0x68,0xe9,0x68,0x67,0xbb,0x0d,0x98,0x19,0x32,0xa7,0xdb,0xf9,0xef,0x42,0xe7,0xc2,0xe2,0x39,0x9c,0xae,0xbb,0xdb,0x91,0x28,0x82,0x88,0x23,0x61,0x50,0x6d,0x61,0x39,0x73,0xf8,0x6a,0xee,0xf3,0xa9,0x2c,0x78,0x0d,0x5a,0xed,0xb1,0x08,0x8f,0x24,0xe5,0xb7,0xa4,0xdf,0x65,0x9a +.byte 0x72,0x3a,0x39,0x9c,0xf4,0x43,0xdc,0x8a,0xa3,0x3d,0xb5,0x1e,0x7b,0xe5,0x83,0x11,0x07,0xab,0x62,0x7e,0xac,0xab,0x52,0x94,0x0b,0xaf,0xdf,0x54,0x18,0xf1,0xc0,0x9f,0x1c,0x33,0x02,0xd9,0x62,0xc3,0xcc,0xaf,0x32,0x09,0x35,0x77,0xad,0x72,0xd6,0xb5,0x2d,0xaf,0xf9,0x39,0xfb,0x95,0xbb,0xf9,0x84,0x80,0x84,0xc8,0xc6,0x6d,0xb5,0x79 +.byte 0x25,0xf4,0x6c,0x71,0x26,0xda,0x74,0x86,0xad,0x52,0x47,0x8b,0x46,0x32,0xf6,0x2c,0x89,0xdb,0x93,0x1f,0x46,0x83,0x91,0x19,0xd2,0x0c,0x29,0x97,0x5f,0xa9,0x2b,0x87,0x0c,0x87,0x89,0xe6,0x63,0xa1,0x36,0xfb,0xfa,0xb4,0xb8,0x8e,0x5f,0xe9,0x8f,0x62,0xd2,0x81,0x1d,0x7b,0xc6,0x14,0x37,0x56,0x73,0x64,0x3d,0x0a,0xfd,0xe5,0x94,0x01 +.byte 0x09,0xc8,0x0d,0xa8,0x92,0xda,0x43,0xc4,0x41,0xca,0x3c,0x27,0x2c,0xbb,0xc4,0xb2,0x77,0x13,0xa6,0xb0,0x0e,0x97,0x6a,0xb2,0x83,0xe5,0x5e,0xa3,0xc0,0xe8,0x5e,0x0b,0xe6,0x00,0x04,0x6c,0x1b,0xac,0x84,0xab,0xd3,0xac,0x5f,0x39,0xc2,0xf8,0xfd,0x66,0xf7,0x97,0xd7,0xb9,0x6b,0xd8,0x2a,0x49,0xf7,0x67,0xd8,0xd5,0xa4,0x89,0x57,0xa6 +.byte 0x8f,0x7c,0xcf,0xaf,0xfe,0x3c,0x92,0xc8,0x23,0x2c,0x26,0x83,0x86,0x16,0x97,0x34,0x71,0x3e,0x82,0x2b,0xc7,0x75,0x5a,0x59,0xb3,0x44,0xdd,0x4e,0xd4,0x6d,0x1b,0x9f,0x3c,0x35,0xc4,0xe4,0xf2,0x95,0xb6,0x90,0x95,0xa7,0xc4,0x03,0x10,0x7d,0x3d,0xeb,0x74,0x29,0xaa,0x0c,0xd3,0x27,0xcd,0x3a,0x85,0x3c,0x88,0xd5,0x9a,0x46,0x84,0x8e +.byte 0x36,0xde,0xe3,0x6a,0x27,0xbf,0xc3,0xd0,0x3e,0xa3,0x0e,0x62,0x1f,0xdf,0x4c,0x02,0xa7,0x11,0x91,0xb0,0x6b,0x50,0xc1,0xe0,0x18,0x5a,0xc0,0x10,0xc7,0x1c,0xb6,0x36,0xac,0xe7,0x7d,0xad,0x34,0x63,0x4f,0x17,0xcc,0x41,0x30,0xec,0xd7,0x14,0xb9,0xfe,0x07,0x5c,0x3d,0xbe,0x08,0x77,0x5b,0xdf,0xa3,0x20,0x56,0x55,0xa2,0x8a,0xe7,0x0d +.byte 0xf6,0xfc,0x91,0x37,0xb8,0x92,0x6c,0xd9,0x5c,0xb0,0xc2,0xf7,0xc0,0x38,0xfa,0x54,0xc6,0xa1,0xd3,0x4d,0xae,0x49,0x0d,0xd1,0xc0,0xef,0xbe,0x27,0xce,0x23,0x8e,0xf2,0x9b,0x68,0x02,0x67,0x8f,0x53,0x9d,0xf6,0x23,0x57,0x85,0xdd,0x8d,0xd7,0xcb,0x47,0xf1,0xd8,0x17,0xd8,0x46,0x72,0x28,0x4b,0xac,0x94,0xd3,0x5d,0x53,0x4f,0x06,0x19 +.byte 0xc6,0x0e,0x0b,0x9f,0x58,0xc6,0x3f,0xea,0x4e,0x83,0x5e,0xd3,0xcc,0x44,0x55,0xa3,0xc7,0x24,0x19,0xea,0x1b,0x18,0xc1,0x18,0x5f,0x21,0x67,0x73,0x32,0x4e,0x31,0x69,0x05,0x40,0x79,0x7c,0x05,0x13,0xdd,0x50,0xea,0xfa,0xc2,0x26,0xe2,0x33,0xff,0x34,0x0d,0xda,0x77,0x27,0xe0,0xe7,0xa6,0x7b,0x8e,0xcd,0xdb,0x92,0x48,0x3a,0x2d,0x52 +.byte 0xf5,0x59,0xca,0xc7,0x47,0xda,0xb7,0xc7,0x8c,0x37,0x5e,0x29,0x30,0xf5,0x57,0x74,0x8b,0x10,0xcb,0x20,0x31,0x4b,0x12,0xe3,0x84,0xd2,0xb2,0xc3,0xd0,0xe3,0x94,0x18,0xa2,0xdc,0x8f,0x4d,0xc3,0x0a,0x43,0x07,0x2c,0x6b,0x41,0x64,0xc0,0x35,0x8f,0x37,0x9b,0xd7,0x78,0xab,0xd0,0xdc,0x1f,0x77,0x55,0xab,0x71,0xc8,0x99,0x98,0x00,0x29 +.byte 0x1c,0xab,0x3c,0x5f,0x82,0x96,0xc2,0xc8,0x9b,0xd4,0x68,0x3f,0x3d,0xe6,0x5a,0x4c,0x1c,0x7b,0x51,0xa3,0x79,0xe8,0x0e,0x8a,0x78,0xdc,0x98,0x63,0x80,0x74,0x32,0x9d,0x7c,0x3a,0x79,0x54,0xa7,0x4c,0xa4,0x4e,0xfc,0xa5,0x8a,0xa4,0x19,0xce,0x84,0xbb,0x8a,0xb9,0x93,0x4a,0x2d,0x82,0x5d,0x1d,0xf8,0x2f,0x85,0xb3,0x90,0x32,0x61,0x6d +.byte 0x13,0x33,0xac,0xbc,0x5d,0x3a,0x54,0x45,0x04,0x50,0x30,0x30,0xc7,0x58,0xbe,0xed,0xdd,0xa1,0xae,0x6d,0xe5,0xde,0xed,0x63,0x9f,0xd4,0x2b,0x8d,0x1f,0x69,0xde,0xda,0x55,0x3f,0x3b,0xe7,0xc8,0x73,0xc0,0x68,0x18,0x6a,0xb3,0xfb,0xce,0xaf,0x46,0x0a,0xcc,0x81,0xa8,0x96,0x6d,0xb6,0xa4,0x74,0xf3,0x8c,0x95,0x2d,0xa1,0xfe,0x09,0xb8 +.byte 0xdb,0x3c,0xcd,0xdc,0x5b,0x0e,0x2d,0xff,0x89,0x8a,0xfd,0x7a,0xe9,0x69,0x0b,0xdd,0x4e,0x9b,0x94,0x64,0xe4,0xb6,0x5d,0x69,0xef,0x9c,0xf6,0xe6,0x44,0x73,0xd5,0x86,0x47,0x63,0x77,0x3e,0x74,0xaa,0xf3,0x6b,0x1f,0x37,0xbf,0xef,0xa2,0xff,0x86,0x61,0x78,0xc4,0xb5,0xbd,0x5a,0x43,0x49,0x80,0x16,0xf2,0x4c,0xec,0x1e,0x07,0x0f,0x41 +.byte 0x60,0x6f,0x3a,0xd2,0xab,0x85,0xc0,0x5c,0xfc,0x9f,0x48,0xad,0x5e,0xe0,0x7d,0x66,0x8e,0x46,0xf1,0xc3,0xb0,0xbc,0x5e,0x3b,0x10,0x7c,0xfc,0xa3,0x27,0xbd,0x8f,0xae,0xd9,0x61,0x39,0xbf,0xca,0x27,0xbb,0xe7,0xda,0x59,0xa8,0x63,0x38,0x16,0xd9,0xb5,0xa6,0xd9,0x1c,0x2b,0xa1,0x42,0xec,0x50,0xd7,0x63,0x09,0x22,0xe0,0x0c,0xb8,0xec +.byte 0x12,0x9b,0xdb,0x8a,0xd3,0x02,0xcf,0x32,0xa9,0x88,0xa4,0x31,0xc8,0xa9,0xf4,0x03,0xf2,0x9d,0xe1,0x41,0xf0,0x0f,0x23,0x65,0xa8,0x99,0x55,0x87,0xf2,0x17,0x66,0xf0,0x94,0xe8,0xe9,0xb6,0xfd,0x10,0xb9,0x55,0xf4,0xda,0x06,0x7a,0xbe,0xe2,0xd3,0xfa,0xb8,0xf7,0x85,0xdf,0xee,0x39,0xdc,0x0f,0xda,0x87,0xf5,0x66,0xd8,0x1b,0x5c,0x0c +.byte 0x13,0xe8,0xa2,0xcd,0xdf,0x47,0x33,0xd7,0xf4,0x5c,0x79,0xc7,0xf4,0x68,0xe4,0x2d,0xa1,0xde,0x5c,0x06,0x1c,0x85,0xf1,0x2a,0xf9,0x73,0x44,0xbc,0xd3,0x57,0x4f,0x0f,0xcd,0xcc,0x40,0xeb,0x9d,0x35,0x8e,0xdf,0x1d,0x4a,0x61,0xd0,0x66,0xb5,0x16,0xce,0x45,0xc0,0xbf,0x01,0xe3,0xb2,0x51,0xba,0x53,0x18,0x2a,0xff,0x19,0xea,0x41,0xa2 +.byte 0xac,0x0b,0x50,0xd3,0xc1,0x6a,0x9c,0xb0,0x34,0x6f,0xa0,0xcb,0xc7,0xc6,0x79,0x5d,0x17,0x3a,0x4c,0xa3,0x16,0xdc,0xac,0x10,0xf0,0x24,0xad,0x9a,0x5b,0xa9,0x7e,0x45,0xcd,0xe9,0xad,0x87,0x04,0xbc,0x2a,0x05,0x59,0xd1,0xdb,0x86,0x22,0x40,0xdf,0xb1,0xff,0x8d,0x3c,0xf8,0x6a,0xf3,0xcb,0x60,0xf9,0x35,0xa6,0x42,0x81,0xcb,0x0f,0x7c +.byte 0xf7,0x24,0x3b,0x0c,0x94,0x32,0xd9,0xec,0xcf,0xd1,0x31,0x3e,0x3e,0xeb,0xa9,0xf2,0x1f,0x2d,0xa7,0x89,0xf7,0x67,0x7d,0x90,0x9d,0x40,0xf2,0xdb,0x07,0x8f,0xb8,0x6f,0xfd,0x78,0x6e,0xd0,0x9e,0xd5,0x7d,0xb0,0x7d,0x65,0xdc,0x6e,0x50,0xec,0x7a,0x5c,0x2c,0x3e,0x6f,0x64,0xa3,0x10,0x34,0xf7,0x71,0xc8,0x82,0xb6,0x96,0xb8,0xb1,0x2a +.byte 0xb4,0x03,0x95,0x75,0x90,0xac,0x6c,0x81,0x17,0x97,0x06,0xd0,0xb8,0xc5,0x98,0xc5,0x9e,0x46,0x07,0x13,0x02,0x9e,0x47,0x69,0xba,0x85,0x2d,0x09,0x86,0x50,0xe4,0x76,0xb1,0xa2,0xbe,0x8b,0x91,0x6b,0x3b,0x76,0xa3,0xb7,0xf5,0x7f,0xfe,0xf1,0xa4,0xf3,0xc3,0x53,0x64,0xef,0x97,0x86,0x96,0x8b,0xc4,0xae,0x06,0x8b,0xe8,0x3c,0xdc,0xff +.byte 0xfa,0xad,0xcb,0xcb,0x53,0x15,0xf2,0xcc,0x9f,0x48,0xf9,0x57,0x6a,0xcd,0xb2,0xee,0x46,0xc0,0xbf,0x82,0x58,0x60,0xda,0x2f,0xbd,0xde,0xc7,0x41,0xcb,0xf1,0x38,0x56,0x9d,0x38,0x38,0x3d,0xea,0x5e,0x38,0xf1,0xd0,0x02,0x35,0xee,0x4c,0x2f,0x1d,0x19,0xbd,0x08,0x01,0xc3,0x8f,0x75,0xe2,0xf3,0x93,0xbb,0x76,0x6b,0xd7,0x87,0x76,0x7f +.byte 0x3b,0x29,0x08,0x9f,0x3a,0xa5,0x44,0x96,0x5a,0xb3,0x78,0xa9,0xbe,0xf7,0x5d,0xda,0x06,0x37,0x98,0x5d,0xbe,0x6e,0xec,0x58,0x53,0xd1,0xa5,0xd7,0x7a,0x16,0xb1,0x59,0x98,0x42,0x37,0x76,0x1b,0xd6,0x2e,0xa7,0xdc,0x45,0xa6,0x9c,0x9c,0x99,0x24,0x0e,0x22,0xae,0x94,0x65,0xeb,0x4e,0x64,0xc3,0xb0,0xac,0x19,0x41,0xf1,0x62,0x65,0xb2 +.byte 0x35,0xf5,0x2f,0xdb,0xd2,0xf0,0x78,0x19,0x35,0x04,0x6f,0x9c,0xf4,0xaf,0x81,0x68,0x4f,0x8b,0x85,0xfa,0x31,0x23,0x06,0xeb,0x37,0x86,0x43,0x51,0xb3,0xd2,0x2a,0xd7,0xd5,0xa9,0x33,0xba,0xfd,0xb5,0x0e,0x6d,0x9a,0x91,0xf9,0xe7,0x27,0xb7,0xff,0xe6,0xe7,0x34,0xc5,0x1a,0xa3,0x45,0x3b,0x71,0x34,0x87,0x7e,0xe7,0xab,0x74,0xc5,0xff +.byte 0xeb,0x23,0x8f,0x3f,0x5d,0x1c,0x91,0x47,0xeb,0x3e,0x5f,0x5a,0xa6,0x5a,0xde,0xa9,0x5f,0xf4,0x8f,0x95,0xc6,0x25,0x3c,0xd5,0xaf,0xfd,0x4d,0x33,0x68,0xe1,0xa3,0x51,0x1b,0x07,0xad,0xb9,0xec,0xf1,0x50,0x51,0xbf,0xeb,0xe8,0x58,0x2a,0x50,0x0e,0x9d,0xc2,0x8a,0x83,0x8c,0xb0,0xb8,0xde,0x1d,0x7b,0x0f,0xff,0xfc,0xfc,0x31,0xe5,0x62 +.byte 0x40,0xc8,0x28,0x30,0x31,0xc9,0x82,0xab,0xbe,0x50,0xe5,0xfe,0x1f,0x49,0x17,0xf9,0xea,0x23,0xc7,0x6d,0x8d,0x63,0xc3,0x70,0x40,0x32,0x0b,0x48,0x7a,0xd9,0x03,0x52,0x1b,0xf4,0x90,0xd6,0x6d,0xd2,0xfc,0xec,0x24,0x7f,0x21,0x2e,0xd4,0xb5,0x60,0x44,0xd9,0x83,0xb0,0x3e,0x75,0x8a,0x6a,0x09,0xab,0xa8,0x4f,0x48,0x3c,0x2b,0x89,0x30 +.byte 0x29,0xdb,0x1a,0x8e,0x68,0xe4,0x89,0xed,0x10,0xe8,0x46,0xa7,0xf9,0x5f,0x7d,0x42,0xe0,0x8d,0xbc,0x3d,0x4d,0xd8,0x06,0x4a,0xf9,0xbb,0x97,0xa7,0xdb,0x24,0x0b,0xfc,0x49,0x92,0x5d,0x80,0xf8,0xed,0x57,0xc7,0x1e,0x82,0xed,0x41,0xb8,0xfd,0x71,0xb9,0xa5,0x11,0x52,0xdd,0x1e,0xa4,0xf1,0x02,0xc7,0x54,0x7c,0xdc,0x37,0x9f,0xfe,0x37 +.byte 0xe8,0xa5,0xcf,0xb0,0x3d,0x25,0x3f,0x24,0xfe,0xf2,0x63,0x97,0x3c,0x13,0xdc,0x31,0x78,0x07,0xf1,0x8e,0xee,0xc6,0x00,0xf8,0xfd,0x84,0x53,0x4d,0x92,0xa1,0xef,0xd0,0xb1,0x12,0x0a,0x12,0x91,0xeb,0x52,0xdd,0x6e,0x15,0x98,0xd2,0xe1,0x53,0x7a,0x0e,0x02,0x83,0xd3,0xd1,0xde,0x72,0x6e,0x5b,0x4b,0x8d,0x40,0xe3,0x2d,0x22,0x59,0x9d +.byte 0xee,0xbe,0x43,0x18,0x62,0x8c,0x77,0x18,0x91,0xf5,0x9e,0xbc,0x3e,0x8b,0x77,0xb6,0xdb,0x5c,0xcb,0xcd,0xdb,0x36,0xea,0xf5,0x1d,0x9b,0xa7,0x13,0xef,0xda,0xd0,0xe8,0xd8,0xb2,0x4c,0xc6,0x19,0x3d,0x77,0x2d,0x0d,0xad,0xe4,0x32,0x24,0xe9,0xd4,0x7f,0x72,0x1d,0xc6,0x6e,0x83,0x7d,0xb8,0x62,0x64,0x9d,0x9a,0xd7,0x13,0x93,0x92,0xf1 +.byte 0x37,0x98,0xcf,0x44,0x66,0xab,0xd1,0x61,0x6c,0x08,0xa7,0x41,0x4e,0x37,0xc1,0x67,0xfb,0x7c,0x22,0x8f,0xbd,0x93,0xb2,0x09,0x13,0xa0,0x48,0x60,0xaf,0xda,0x73,0x2b,0xa3,0x2a,0xf3,0x4d,0x8e,0x22,0x5b,0x7a,0x32,0xe6,0xca,0xff,0x0e,0xa1,0x0a,0x15,0x33,0x31,0x50,0x71,0x1c,0x85,0x26,0x9b,0x19,0xf2,0xe3,0x69,0x4e,0x2d,0xff,0x79 +.byte 0x80,0xfe,0x2c,0x2f,0x7a,0x49,0x95,0xf3,0x0e,0x78,0xb1,0x0c,0x1c,0x45,0x59,0x68,0x2a,0x37,0xf2,0x48,0x6f,0xd9,0x32,0xf7,0xfc,0xdc,0xbe,0xe3,0xdd,0x61,0x17,0xc0,0x08,0x9d,0xbc,0x2d,0x8d,0x24,0x1c,0xbb,0x53,0xbe,0x37,0x59,0x30,0x87,0xa0,0x14,0xf5,0x08,0xcf,0xd1,0xcc,0x84,0xa7,0x0f,0x69,0xe0,0x77,0x8c,0x0d,0xdc,0x82,0xe5 +.byte 0x88,0x9a,0x58,0x05,0xe3,0x4f,0xdd,0x55,0x1e,0x6e,0x90,0xd5,0x3c,0xa6,0xa6,0x10,0x24,0xe5,0x58,0x97,0xdc,0x31,0x87,0x39,0xdc,0x3a,0xe6,0x24,0x64,0x23,0x45,0xd8,0x01,0x1b,0xf6,0x38,0x68,0x9e,0x62,0x53,0x00,0x97,0x71,0x04,0xb5,0x3b,0x54,0xdb,0xb5,0xcb,0x30,0x91,0x14,0xce,0x94,0xd5,0xe0,0x96,0x70,0x99,0xa5,0xed,0x69,0x32 +.byte 0xc7,0xb7,0x14,0xff,0xc0,0xde,0x19,0x5d,0x31,0xdb,0xa7,0xc0,0x7a,0x94,0xec,0x60,0xfc,0x52,0x71,0x69,0x9b,0xd8,0xbe,0x97,0x0b,0xb5,0x70,0xa7,0x47,0x11,0x37,0x84,0xda,0x3c,0x23,0xfe,0xf2,0x53,0xad,0x55,0x71,0x1e,0x70,0x9b,0x7b,0x61,0x97,0xf8,0x71,0xc4,0xad,0x72,0x98,0x43,0x0c,0x33,0x30,0x2c,0xb2,0xd6,0x21,0x8d,0xbb,0x1b +.byte 0x85,0x82,0x24,0x14,0x85,0x95,0x88,0xff,0x3f,0x8c,0x88,0x96,0xa0,0xf8,0xd7,0x36,0x78,0x37,0x6d,0x92,0x09,0x04,0x76,0x27,0xb9,0xd5,0xea,0x0f,0x07,0x9f,0xe1,0x49,0x0e,0xd1,0x9c,0x46,0xcd,0x2b,0x7a,0x57,0xb6,0x56,0x39,0xe5,0x59,0x6b,0x1b,0x39,0xbf,0x15,0x3b,0x56,0xf5,0xc2,0x08,0x96,0xf5,0x63,0x4c,0x31,0x33,0x65,0x8b,0x74 +.byte 0x4e,0xde,0xa8,0x20,0xe0,0x7c,0x27,0xee,0x91,0x74,0xe8,0x24,0xb3,0xcf,0xa3,0xd4,0xf1,0xb9,0x18,0x43,0x05,0x5d,0x13,0x36,0x82,0xd1,0xbf,0x16,0x89,0x48,0x83,0xf0,0xcc,0x5c,0xbb,0x75,0x7e,0x71,0xc0,0x73,0xd1,0xf5,0x00,0x38,0x7f,0x10,0x98,0xd6,0xb9,0x14,0xea,0xd3,0x3f,0x0f,0xe3,0x61,0x1a,0x5e,0x21,0xd0,0x11,0x58,0x68,0x47 +.byte 0xf2,0xe5,0xe9,0x65,0x9a,0xc1,0xf4,0xa0,0x98,0x8e,0x9f,0x7f,0xbe,0x7e,0xd0,0xb6,0x88,0x4e,0xce,0xc1,0x8b,0xd4,0xd3,0x93,0xb7,0xd8,0xf3,0x0b,0xf3,0x73,0xc9,0x08,0x2f,0xcf,0xd8,0xbd,0xa6,0x1d,0x7c,0xfa,0x44,0x82,0x9f,0x03,0xca,0x56,0x3b,0xbf,0x4d,0x1e,0xbc,0x06,0xc2,0x37,0xfb,0xde,0xd3,0xa9,0xe3,0xae,0x61,0xef,0x26,0x7d +.byte 0xbd,0x2f,0xee,0x2d,0xe1,0x65,0x71,0x77,0xab,0x9c,0x96,0x4f,0x00,0xe2,0xde,0xd7,0x05,0x54,0x00,0xb6,0xaf,0x12,0x0c,0x79,0x1a,0xed,0x20,0x72,0xc7,0x3b,0x3a,0x10,0x15,0x74,0xff,0xbd,0xf8,0xaa,0x8f,0x3a,0x83,0x39,0x24,0xfa,0x53,0x2d,0xc3,0x61,0xfc,0x12,0x6b,0x54,0x33,0xbf,0x83,0xc9,0x59,0x00,0xf0,0xdc,0xa8,0x64,0xbc,0xb5 +.byte 0xc3,0x96,0x60,0x3e,0x7b,0xe2,0x08,0x19,0x92,0x17,0x80,0x9b,0x0c,0x09,0x49,0x68,0x8b,0x15,0xe3,0xce,0x0c,0xfa,0x0c,0x8b,0xf0,0xdc,0x58,0xb0,0x7b,0x82,0x85,0xd2,0x56,0x1c,0xfb,0xb5,0xd0,0x0e,0x0a,0x55,0x61,0xda,0xd8,0x20,0xc1,0x79,0x70,0x3c,0x69,0x8e,0x49,0x5f,0x1c,0xdb,0x22,0xb8,0xdd,0x4c,0x4f,0xca,0xe9,0x0f,0x9a,0x4e +.byte 0xff,0x56,0xbc,0xcf,0x72,0x09,0xa6,0x41,0x38,0xf0,0x7d,0xe7,0x45,0x0a,0x71,0x2c,0x92,0xdd,0x21,0x17,0xb2,0x3b,0x31,0x3c,0x91,0x11,0x69,0x29,0x50,0x31,0xe6,0xa6,0x10,0xc7,0x35,0xe8,0x44,0xec,0x74,0xa3,0x7e,0xb6,0x34,0xe5,0xb7,0xba,0xdf,0x5b,0x2f,0x85,0x02,0x6c,0xb0,0x71,0xb1,0x43,0xff,0x0e,0x47,0x04,0x63,0x4d,0x5b,0x81 +.byte 0x81,0x28,0x8b,0x84,0x79,0xad,0x2a,0x45,0x00,0x1c,0x0c,0x9f,0xef,0x35,0xbb,0x6d,0xc5,0x6a,0x6b,0xef,0x2b,0xae,0x78,0x66,0x05,0x7a,0x61,0x4c,0xe9,0x5e,0xf7,0x95,0x66,0x7e,0x1a,0xa7,0xdf,0x4c,0x4d,0x7c,0x66,0xa5,0x38,0x84,0x86,0x8d,0x66,0xcc,0x7f,0x32,0xb2,0x9c,0xc5,0x0d,0x3d,0xb7,0xb1,0xa6,0xc5,0x80,0x68,0xaf,0x79,0x81 +.byte 0x15,0x8f,0xec,0x50,0x5c,0x1b,0x57,0x31,0xd2,0xb9,0x16,0x66,0xf8,0x16,0xfd,0xcd,0xc7,0xa8,0x84,0x6f,0x35,0xea,0x3f,0xa4,0x72,0x8d,0xad,0xf4,0xd1,0x14,0x46,0xcc,0x06,0xed,0x71,0x39,0x07,0x99,0x28,0xc8,0xf9,0xc4,0xc2,0xec,0xde,0xb8,0x92,0xae,0xc5,0xf8,0xb2,0x49,0xc9,0x32,0x58,0xec,0x9f,0xb0,0x59,0xaf,0x49,0xef,0xe8,0x0d +.byte 0x4c,0x56,0x8d,0xf7,0x57,0xb0,0x09,0xbe,0xc2,0x6a,0x62,0xc4,0x87,0xf3,0x20,0x07,0xc9,0xe3,0x3b,0x31,0xcc,0x8d,0xcf,0x5d,0x18,0x00,0x2a,0x9f,0xde,0x80,0x1a,0x7e,0x95,0x93,0xd1,0xbd,0xe6,0xd4,0x69,0x37,0x96,0xbb,0x70,0xc5,0x3c,0x87,0x8f,0xff,0x95,0x97,0xfe,0x95,0x56,0x7b,0xba,0x03,0x3d,0x29,0x0f,0xdb,0xd0,0x65,0x4f,0xf8 +.byte 0xa8,0xf3,0x42,0x09,0xb5,0x81,0x34,0xc6,0xa9,0x60,0xb9,0xef,0x3e,0x9d,0xc5,0x42,0x1e,0x79,0x5d,0x2b,0xf2,0x46,0x0d,0xeb,0x88,0x84,0x8f,0xad,0x60,0x69,0x57,0x49,0x33,0xb4,0xdd,0xfe,0x10,0x65,0x65,0x51,0xaf,0x68,0xa0,0xce,0xbd,0xe1,0x6e,0x03,0xe1,0x5f,0xba,0x3f,0x36,0xca,0xed,0x20,0x95,0xfa,0xff,0x3c,0x65,0xa8,0xb1,0x6b +.byte 0xc5,0x91,0xa0,0xd5,0x36,0x38,0x1c,0x38,0xe9,0x1d,0x1b,0x67,0x4c,0x17,0xd3,0x29,0x92,0xa2,0x27,0x76,0x3d,0xe2,0x26,0x37,0x2a,0x2c,0xf6,0xee,0x64,0x40,0x8a,0x1c,0x2b,0xc1,0xd3,0x28,0xd0,0xcf,0x2d,0xc2,0x45,0xf4,0x37,0x5a,0x63,0xfb,0x18,0x67,0x01,0x0a,0xe8,0xe2,0x41,0xf7,0x15,0x47,0xa7,0xe9,0xc8,0x05,0xbc,0xc7,0x8f,0xf0 +.byte 0xc3,0xc5,0x9a,0x4e,0x0d,0x7b,0xf0,0x20,0x8c,0x21,0x49,0x99,0x0d,0xf7,0x34,0x84,0x35,0xfb,0x11,0x33,0xd6,0x46,0x14,0x3c,0xf1,0xb3,0x37,0xac,0x75,0x63,0xe7,0x1a,0x19,0xa4,0x49,0xf2,0x58,0x1d,0x56,0x55,0x64,0x46,0x25,0xff,0x7d,0x90,0x34,0x21,0x5d,0x00,0xa1,0xa8,0xaa,0xe0,0x93,0xe7,0xda,0x11,0x34,0x1d,0xa3,0x0c,0x67,0xae +.byte 0xf5,0x60,0x72,0x14,0xdf,0x08,0xf6,0x72,0x3e,0x48,0x41,0x3d,0x00,0x58,0xfb,0x0c,0x15,0x80,0x2d,0xd9,0x72,0x47,0xa6,0x20,0x6a,0x74,0x9e,0x06,0xb9,0xac,0x68,0x3a,0xe7,0xf1,0x19,0xb8,0x0b,0x66,0x07,0x4d,0xa0,0xb5,0xab,0xea,0x70,0xa1,0xdf,0x41,0x76,0x85,0x18,0x5b,0x6f,0x78,0x5a,0x5d,0x08,0xe0,0x1b,0xd8,0x06,0x73,0x1e,0x16 +.byte 0xcb,0xdb,0x02,0xf8,0x96,0x64,0x65,0xc5,0xc1,0x52,0xd4,0xd8,0xb3,0x1e,0xd4,0x09,0xfd,0xa7,0x30,0x41,0x5a,0xce,0x53,0x4d,0x11,0xc8,0xdd,0x13,0x50,0xd5,0x2e,0xa0,0xe6,0x48,0x49,0x31,0x4b,0x1d,0xce,0xfc,0x42,0xed,0x8f,0xc8,0xb3,0x0a,0xae,0x1d,0x4c,0x1e,0x4f,0x39,0xa4,0x37,0xc8,0x54,0xdf,0x40,0xa6,0x42,0x61,0x7d,0x34,0xd4 +.byte 0x75,0x0a,0x9f,0xf0,0x33,0x54,0xf3,0xc4,0xdc,0x4e,0x2f,0x81,0xc2,0x20,0xaa,0x4f,0xa0,0xae,0xa6,0xb8,0x50,0xf8,0x45,0xf1,0xf2,0xd1,0xd2,0xcf,0xc8,0xf0,0xf4,0x54,0x37,0xdc,0xfb,0x13,0xdf,0x38,0xc2,0x3f,0xe0,0x59,0xb5,0x9a,0x0f,0x27,0x87,0xd4,0xd3,0xdc,0xfd,0xda,0x1d,0xfa,0xdd,0x12,0xe0,0x7f,0x34,0x01,0xde,0x28,0xf5,0x0e +.byte 0xff,0x59,0xc7,0xbd,0x6a,0xe4,0x0c,0x85,0x7b,0x87,0xf9,0xd7,0xe2,0xed,0xb2,0xf7,0xb7,0x13,0xfb,0xfc,0x4d,0x25,0x52,0xfd,0x23,0x6b,0x10,0xd0,0x80,0xd8,0xbd,0xbd,0xf0,0x87,0xfc,0x38,0x85,0x83,0x20,0x5f,0x7c,0x26,0x14,0x93,0xd3,0xe1,0xdc,0xa4,0xda,0xa7,0xf9,0xfd,0x6c,0x9a,0x2b,0x75,0x82,0xf1,0x9f,0x1b,0x0c,0x43,0xd4,0x2d +.byte 0x5b,0x0c,0x54,0x7e,0x61,0x24,0x8e,0x50,0x25,0xd8,0x54,0xfd,0x30,0xec,0x4c,0xa8,0xb6,0xf0,0x35,0x67,0xf7,0xe4,0x3c,0xfd,0xc8,0x40,0xf4,0x2d,0xc5,0x4d,0xc3,0x29,0xc2,0x88,0x60,0xab,0xd9,0x2a,0xe8,0x31,0xcc,0x0c,0x9f,0x97,0xa8,0x2e,0xaa,0xa5,0xb6,0xee,0x3c,0x71,0xa9,0xff,0x90,0xb4,0x43,0x2e,0x16,0x80,0x8c,0xfe,0xb5,0x7a +.byte 0x40,0x58,0xd5,0x98,0x7e,0xca,0xaf,0x95,0xee,0x00,0x26,0x8d,0x5b,0xba,0x33,0xee,0x35,0xb5,0x9b,0xf8,0x08,0x1e,0x15,0x2d,0x01,0xb1,0x83,0xa6,0x57,0x58,0xd1,0xf3,0xa4,0xf1,0x3a,0x00,0xf4,0x40,0xee,0x35,0x3a,0x20,0xc2,0x13,0x1e,0xda,0x32,0xc2,0x35,0x74,0x29,0xce,0x51,0x3f,0xec,0xb2,0xd7,0x23,0xa7,0xc6,0xef,0x70,0xb9,0x88 +.byte 0x6f,0xa8,0xf5,0x5b,0xff,0xc5,0xf5,0xb4,0x3b,0x12,0x75,0x20,0xbf,0x61,0x8a,0xb1,0xae,0x01,0x9b,0x17,0xf4,0xf3,0x2d,0xfb,0x44,0xe8,0xac,0x29,0x81,0xc2,0x6d,0x50,0x05,0x11,0xd9,0x43,0xf8,0xc7,0x58,0x5d,0xbc,0x2d,0xc0,0x83,0xd2,0x81,0x41,0x1c,0x46,0x62,0x60,0x6e,0x65,0x52,0x4b,0x1c,0x88,0x72,0x1b,0x0e,0x8e,0x7d,0xa2,0xb5 +.byte 0x4e,0x28,0x32,0xf2,0xb1,0xfa,0xf1,0x4b,0xc5,0x85,0x95,0x2c,0x08,0x78,0x85,0x68,0xe5,0x20,0x23,0x8b,0xc4,0xf5,0xb2,0xdb,0xc1,0xdd,0xe5,0x69,0xa4,0x97,0xa9,0x6c,0x2e,0x3a,0x25,0x1c,0x24,0x54,0x97,0x3e,0x8d,0x61,0x61,0xa3,0x60,0xf5,0xd2,0x4e,0x90,0x25,0x06,0x09,0x31,0x7b,0x96,0xce,0xcc,0xb7,0xbc,0x63,0x9f,0x04,0x7d,0xec +.byte 0xa1,0x4a,0x65,0xd3,0x26,0xe1,0xbf,0xf9,0x88,0xea,0x5c,0x5d,0xfe,0xe9,0x60,0x77,0xbd,0xf2,0xa0,0x11,0x91,0x24,0xca,0xa1,0x0d,0x05,0x7b,0xe2,0x7d,0x22,0x2e,0xd2,0xc9,0x4b,0x78,0xce,0x0c,0x7b,0x49,0xaf,0xd6,0x59,0x5f,0xb4,0xbd,0x2e,0x4a,0x22,0xcb,0x5d,0x1c,0xd5,0xde,0xea,0x86,0x74,0xd5,0x15,0x52,0x59,0xfc,0x3d,0x7b,0x1c +.byte 0x3f,0x14,0xec,0xf2,0xc8,0x3c,0x88,0xbf,0x89,0xd5,0x23,0xc3,0x94,0x3c,0x28,0x04,0x91,0x6c,0x36,0x35,0x4b,0x75,0xf8,0xdc,0xf3,0xff,0xba,0x8c,0xa4,0xc7,0x85,0xc5,0x1a,0x30,0x4b,0x7c,0xc5,0x2f,0xb9,0x2a,0x14,0xaa,0x65,0xe3,0x92,0xdc,0xe1,0xed,0x3f,0xb6,0xff,0x0e,0x74,0xe0,0xb3,0xc9,0x4b,0xd1,0x96,0xfc,0x49,0x72,0xbe,0xb0 +.byte 0xc8,0x4a,0xd5,0xf0,0xb3,0x58,0x29,0x35,0x97,0xd4,0x5c,0xc7,0x0b,0x27,0x1d,0x14,0xdb,0xb7,0x5c,0x7e,0x6d,0xc1,0x56,0xa9,0x80,0x72,0x7d,0x75,0xc2,0x2f,0x07,0x28,0xb4,0xff,0xef,0xa7,0x34,0xed,0x31,0x44,0x85,0xe6,0xc3,0xa4,0x5f,0xe2,0xe8,0xab,0xd1,0x59,0xe7,0x32,0x20,0xd1,0xcc,0xef,0x6f,0xe1,0x10,0x89,0x6c,0x0c,0xf3,0x5f +.byte 0xe8,0xc7,0x1c,0x3b,0xeb,0x3e,0xa5,0x53,0x2d,0x48,0x64,0x92,0xa0,0xec,0xf3,0x75,0x5b,0x5b,0xe2,0x83,0x87,0x04,0xa7,0xd8,0x1b,0x44,0xfb,0x42,0xee,0xd8,0xf2,0x98,0xff,0x30,0xc8,0x09,0xf8,0x1a,0x95,0x46,0x2d,0xe7,0x43,0x10,0x90,0xf4,0x2c,0x8f,0x0b,0x60,0x6d,0xeb,0xbf,0x19,0xc1,0x9d,0x5c,0xc0,0xff,0xb1,0x86,0xbc,0x01,0x73 +.byte 0x35,0x1f,0xd8,0xf4,0xa1,0xd4,0x7f,0x2d,0x1b,0xf9,0xa6,0x78,0x1a,0x2e,0x2c,0xe2,0xcc,0x8b,0x5f,0xbb,0xb9,0x80,0x31,0x32,0xa5,0x5d,0x70,0x59,0xae,0xe3,0xac,0xab,0xde,0x38,0x09,0x07,0x57,0x5f,0xbf,0xe8,0xa0,0xb8,0xd0,0x03,0xac,0x02,0x0d,0x7f,0x7e,0x0c,0xd2,0xcf,0x46,0x01,0x07,0x9f,0x16,0xf6,0x2b,0x94,0xaf,0xae,0x66,0x09 +.byte 0xca,0x4c,0x5f,0x37,0x53,0xa6,0x50,0x82,0x3a,0x0a,0x7b,0xb3,0x52,0x2e,0x0f,0xe4,0x64,0xab,0x40,0x21,0x2d,0xb7,0x20,0x9b,0xe3,0x2f,0xec,0x2b,0xb3,0x31,0x60,0x51,0x2e,0xb6,0x68,0xac,0xae,0xee,0x2d,0x28,0x5b,0xe0,0xa7,0x85,0xab,0x95,0xba,0x53,0x8c,0xc0,0xf8,0x16,0x8f,0x42,0x01,0xef,0x00,0x32,0x44,0x8e,0x41,0xc9,0x05,0x5b +.byte 0xe0,0x3f,0xe1,0xd8,0xd4,0x97,0x8e,0xa0,0x14,0x84,0xce,0x5c,0xef,0xbe,0xa4,0xae,0x18,0x91,0xd9,0x48,0x9b,0xc3,0x7a,0x8f,0xfb,0xb3,0x3e,0xa9,0x87,0x74,0x84,0xd2,0xc6,0x7c,0xc9,0xce,0x01,0xa5,0xcc,0xff,0x5a,0xe8,0x94,0x98,0x54,0x2a,0x6e,0xd9,0x58,0x75,0xd4,0xdd,0x6c,0x7d,0x83,0x32,0xc9,0x4e,0x35,0x2c,0x51,0x26,0x68,0x1f +.byte 0x95,0x20,0x82,0x54,0x0a,0xad,0x5e,0xe2,0xba,0xf9,0xa3,0x54,0x24,0x93,0x4a,0x62,0xff,0x28,0x05,0xd2,0x22,0x62,0x82,0xd4,0x2d,0xe2,0xec,0x66,0xc5,0xee,0x63,0xd0,0xf6,0x93,0xa8,0x37,0xbf,0xdd,0xe0,0x95,0x0b,0x19,0xa1,0x9d,0x9a,0xf8,0x94,0x1a,0x3a,0x50,0x9e,0x66,0x75,0x8c,0x25,0xbd,0x18,0xb0,0x58,0x76,0x7f,0x2d,0x3d,0x06 +.byte 0x02,0xb3,0xcf,0xa3,0x14,0x6e,0xe7,0xc8,0xcd,0xe6,0xbe,0xae,0x92,0xd6,0xa2,0xfe,0x12,0xf0,0xdf,0x9f,0x9e,0xad,0x77,0x77,0xfb,0xfc,0x36,0xb7,0x82,0x9c,0xf1,0x51,0xc2,0x58,0xa0,0xf3,0xa0,0xd6,0x6e,0x64,0x28,0xac,0x09,0x8f,0x7b,0xef,0x19,0x87,0x76,0xb9,0x4e,0xca,0x1f,0x05,0xb6,0x00,0x4a,0x14,0x83,0xaf,0xff,0xd9,0xa1,0xc6 +.byte 0x0f,0x98,0x3a,0xcf,0x85,0x18,0xea,0xa6,0x9a,0x1e,0xae,0x7c,0xaa,0xae,0xef,0x89,0x5e,0x14,0x5d,0x2f,0x73,0x8f,0xd1,0xf0,0x77,0xcd,0x45,0x92,0x7f,0xee,0xb9,0x7c,0xc2,0x3c,0xff,0x56,0x56,0xa5,0xa5,0x49,0xe4,0x20,0xd6,0xa2,0xb6,0xe4,0xfc,0x86,0x53,0xce,0x9e,0x2b,0x7b,0xcb,0xcf,0x6a,0xd5,0x62,0xb7,0x34,0x0e,0x39,0xe2,0xaa +.byte 0x1c,0x24,0x30,0x71,0x94,0xb3,0x57,0xd8,0xe8,0xd4,0xc5,0x4f,0x33,0x2c,0x73,0x7e,0x48,0xba,0xb3,0x55,0x84,0x6d,0x10,0xcf,0x8f,0xf2,0xb6,0xdb,0x4e,0xcf,0x49,0x08,0xf6,0x5a,0x3c,0x7e,0xef,0x3f,0x5c,0x11,0x09,0xfe,0x26,0xfb,0xff,0x30,0xcb,0x81,0x12,0xea,0x1e,0xa9,0x6e,0xf8,0xea,0x4f,0x92,0x2c,0x23,0x99,0x35,0xa5,0x59,0xca +.byte 0x1d,0x66,0x72,0xad,0x5b,0x7c,0xb3,0x4a,0x7c,0x76,0x4c,0xf6,0xc1,0xec,0x68,0x5f,0x2c,0x17,0xbe,0x92,0xe1,0xa1,0xee,0x40,0x24,0x25,0x6b,0xc5,0x0b,0x6f,0x06,0xc0,0x05,0x8c,0x23,0x24,0x76,0xea,0xe9,0xb9,0xa1,0x3d,0x59,0x15,0xe7,0x65,0x47,0x5a,0x75,0x9b,0xc8,0x7b,0x86,0x97,0xf4,0x4a,0xa3,0xec,0x54,0x0e,0x66,0xef,0xda,0x41 +.byte 0xb8,0x3b,0xa6,0x86,0x63,0xe1,0x4e,0x89,0x92,0x40,0xf4,0x8b,0x32,0x47,0x3b,0x4b,0xb4,0xe6,0xd8,0x4b,0x1c,0xac,0x03,0xab,0xde,0x2e,0x63,0x96,0x3f,0x27,0xa1,0x32,0x11,0x35,0x24,0x6a,0xe9,0x0b,0x73,0x61,0x4e,0xd8,0xdc,0x91,0x98,0x01,0x8a,0x0d,0x61,0xec,0x39,0xbe,0x3b,0xb9,0x78,0x77,0xea,0xaa,0xa2,0x12,0x20,0x92,0x98,0x16 +.byte 0x27,0x3b,0xd1,0xfa,0x59,0xef,0x81,0x38,0x9f,0x42,0xe8,0xb4,0xab,0x4f,0x26,0x9a,0xe7,0x0b,0x05,0x03,0xfa,0xe1,0xe1,0x3d,0x45,0xac,0x7d,0x40,0xcc,0x2f,0xf2,0xb0,0x33,0x42,0x14,0xbd,0x91,0x3e,0xe1,0xb7,0x17,0x25,0xc3,0x92,0xcb,0x9e,0x44,0x1e,0x13,0x93,0x98,0x1f,0x96,0x64,0x3a,0xaa,0x53,0x9a,0x18,0xc0,0x34,0x3c,0x47,0x94 +.byte 0x14,0x70,0x67,0x76,0x2a,0x82,0xd3,0x6a,0x18,0x13,0xe7,0x01,0x8d,0x97,0x52,0x51,0x8e,0x08,0xde,0x44,0xb0,0x74,0x07,0x58,0x35,0xc2,0x29,0xb5,0xd7,0x00,0x46,0x31,0x34,0xd7,0x1f,0xdd,0xaa,0x5c,0x27,0xc7,0x37,0x71,0xe8,0xbe,0xad,0x89,0xf1,0xb2,0xd1,0x46,0x33,0x0c,0x2f,0x26,0x21,0x5e,0xc9,0xda,0x25,0xcd,0xd0,0x17,0x23,0x87 +.byte 0x15,0xc2,0xa0,0x1a,0x9f,0x6e,0xfb,0x63,0xe9,0x69,0xdf,0x79,0x18,0x33,0x2f,0x47,0xca,0x54,0x23,0x7e,0x4f,0x6e,0x38,0x06,0x99,0xfb,0xcd,0x22,0xdb,0x4b,0x3f,0x8a,0x05,0x2e,0x5c,0x56,0x65,0xb7,0xab,0x57,0x8b,0xdd,0x28,0xab,0x7e,0x77,0x32,0x0f,0xc6,0x3c,0xf3,0xde,0x43,0xb0,0x13,0x3b,0xbd,0x28,0x3a,0x8b,0xd5,0x6b,0x1d,0x5d +.byte 0x20,0x1a,0x5f,0xa6,0x01,0xed,0x88,0x7f,0x87,0x55,0x38,0xc2,0x0d,0x03,0x6c,0x41,0x6a,0x43,0xdf,0x09,0xf3,0x58,0x69,0x13,0xa1,0xd6,0x39,0x0c,0x8e,0x8f,0x40,0x67,0xe8,0x0e,0x9b,0x9b,0x42,0x30,0xd7,0xae,0x04,0x75,0x66,0xfb,0x4a,0xa7,0xe0,0xe9,0xea,0x6d,0x28,0x4f,0xc0,0x5c,0xd4,0xd4,0xb7,0x60,0x5a,0x35,0xc1,0xe8,0x5f,0xc3 +.byte 0x4f,0x7a,0x5d,0x8d,0xc2,0x29,0x6e,0x36,0x50,0x5b,0x82,0x63,0xf2,0xda,0x8d,0x02,0x61,0x09,0x69,0x0a,0x47,0x9d,0x58,0xf3,0xf6,0xe0,0xc0,0x09,0xd9,0x3b,0x8d,0xf5,0xba,0xf6,0xc4,0xf0,0x65,0x89,0x7b,0xdd,0x93,0x6b,0x6e,0x21,0xa1,0x2a,0x66,0xe0,0x8f,0x62,0xb0,0x49,0x60,0xa3,0x48,0x42,0x62,0xcc,0x26,0x1f,0x59,0x3a,0x7b,0xa7 +.byte 0x82,0x10,0x5f,0xc6,0xf8,0xa2,0xc0,0x07,0x7b,0x26,0x26,0x11,0xe2,0x5b,0xb8,0x86,0xb7,0x66,0xcf,0x0a,0xcc,0x6f,0xe8,0x02,0x22,0x4c,0x13,0x75,0xdc,0x68,0xf0,0x7c,0x0c,0x46,0x9a,0xa2,0x4c,0xf5,0x50,0x3f,0xf9,0xbc,0x01,0xb1,0xa1,0x28,0x90,0x07,0x6b,0x17,0x69,0x89,0x7b,0xe5,0x0a,0xf7,0x7b,0xe1,0x94,0x30,0xfc,0xd3,0x8d,0xd3 +.byte 0x99,0x37,0x91,0xd5,0xdf,0x59,0x2a,0x4f,0xfe,0x6c,0x37,0x4b,0x78,0x2c,0xa9,0x28,0x6a,0x5c,0xd6,0xe1,0x0b,0xad,0xae,0x62,0x7c,0x09,0xb8,0x90,0x3f,0x29,0x37,0x7b,0x79,0xee,0x55,0x02,0x05,0xef,0x28,0xa2,0xc7,0x07,0x2b,0xe6,0xab,0x87,0x9d,0x8f,0x4c,0x0f,0xc1,0x75,0x5d,0x88,0x7f,0x26,0xe0,0x1e,0xf8,0x3f,0xb5,0x2a,0x6c,0xe6 +.byte 0x7f,0x85,0xae,0x55,0x7b,0x58,0x34,0x4c,0x81,0x05,0x21,0xa1,0x5e,0xd7,0xb6,0x20,0x6e,0xf9,0x60,0x15,0xa4,0xb2,0x8f,0x68,0xd2,0x23,0x9f,0xbf,0xfa,0x6a,0xcb,0x87,0x7d,0x41,0x4a,0xae,0x28,0x4f,0x9e,0xbb,0x69,0x1c,0x37,0xb2,0xc9,0xd2,0x21,0xa1,0x2b,0x6b,0x5d,0xff,0xd6,0xdb,0x8f,0x21,0xd9,0x17,0xd6,0xe6,0x74,0xf2,0x20,0x0e +.byte 0x06,0xb5,0x0c,0xdc,0x74,0x4e,0x93,0xcb,0x27,0xc7,0x4b,0xf3,0xef,0x46,0xa8,0xf0,0x58,0x1c,0xa0,0x65,0x09,0x84,0xc7,0x2e,0xba,0x51,0xd9,0xd4,0x53,0x20,0xc7,0x20,0x85,0x93,0x2b,0xf3,0x42,0x93,0x7b,0x22,0x1c,0x8d,0x22,0x76,0xcf,0xde,0x6a,0xa1,0x76,0xea,0x65,0x20,0x2f,0x2e,0xdb,0x85,0xdd,0x73,0x43,0xf8,0xe0,0xe3,0x3a,0xe5 +.byte 0x02,0x57,0x96,0x54,0xbc,0xaf,0xa4,0xd5,0xda,0x9d,0x9d,0x8b,0x85,0x01,0x7c,0x72,0x03,0xfe,0x39,0x46,0xab,0x04,0xcc,0x62,0x71,0xf5,0xa5,0x67,0xd7,0xfc,0xc0,0xb6,0x95,0x74,0xdf,0x1c,0xfe,0x1c,0x5b,0x25,0xae,0x42,0x75,0x00,0x71,0x3c,0xec,0xfc,0x3c,0x7b,0x0f,0xec,0x44,0xc7,0xec,0x9b,0x86,0xf5,0x3d,0x47,0x15,0xf0,0x25,0xba +.byte 0x43,0xc8,0x68,0x15,0x4f,0xeb,0x35,0x76,0x2d,0x04,0xb7,0x9b,0xb8,0xa7,0x0d,0xb3,0xb4,0xf2,0x93,0x85,0xb1,0xb8,0x81,0x7c,0xd6,0x5f,0xbd,0xc2,0xcc,0xf4,0x0e,0x98,0x2c,0x06,0x54,0x2f,0x5e,0x49,0x94,0x93,0x78,0xa0,0x0a,0x33,0x2e,0x3f,0xb2,0xa7,0x81,0xed,0xe9,0xb6,0xb5,0x86,0x4b,0xa5,0xc0,0x51,0x30,0x9d,0xe2,0x9f,0xc2,0x56 +.byte 0x92,0x6b,0x96,0xca,0xcb,0x65,0x5c,0x0e,0xf4,0x91,0x2b,0x89,0xf4,0x27,0x55,0x26,0xd7,0x7b,0x00,0x19,0x1f,0x67,0x4e,0x43,0x24,0x81,0x05,0xb7,0xc6,0x41,0x1a,0x39,0x3d,0x40,0x3e,0x8a,0x03,0x94,0x63,0x1b,0xb1,0x87,0xb6,0xe1,0x52,0xd0,0xe8,0xbb,0x0e,0x37,0x72,0xe5,0xde,0x86,0xc0,0xdf,0x5b,0xc2,0xc6,0x0a,0x67,0xa7,0x4c,0x03 +.byte 0xb6,0xd8,0x7f,0x1d,0xb3,0xe3,0x84,0xb7,0x5c,0x04,0x15,0xe0,0xd0,0xae,0x44,0xac,0x39,0xa5,0xa2,0x86,0xc8,0xad,0x27,0xa0,0x36,0xa1,0x6e,0xaa,0x87,0x7a,0x43,0xae,0xa0,0x45,0x1a,0xac,0x04,0xe2,0x55,0xf2,0x9a,0x97,0x67,0xfb,0x01,0x8f,0xb8,0x80,0x9c,0x27,0x1d,0xbe,0xa3,0xf1,0x6d,0x66,0xf2,0x1a,0x99,0x99,0xf6,0xa5,0xba,0x58 +.byte 0x28,0x58,0xb5,0x44,0x5b,0x38,0x4a,0x3f,0x37,0x85,0x7e,0x36,0x8e,0x16,0xb9,0x1e,0x0b,0xbf,0x7d,0x0a,0x0c,0x83,0x53,0x0d,0xcc,0x37,0xe1,0x42,0xbb,0x0d,0xfc,0x01,0x25,0x10,0xbe,0xb5,0x83,0x2f,0xa5,0x42,0x98,0xbc,0xd6,0x50,0x75,0xda,0x32,0x2b,0x3f,0xd6,0xc1,0x1a,0xe7,0x0b,0x80,0x07,0x6f,0xfe,0x77,0x9e,0xe9,0x1e,0x45,0x65 +.byte 0x68,0x92,0x34,0x8b,0xce,0xf3,0xcd,0x94,0x17,0xe0,0x41,0x92,0x96,0xb5,0xd1,0x98,0xd1,0x25,0xd1,0x3d,0x76,0x88,0x86,0xb1,0x01,0x80,0xc7,0xde,0x60,0x20,0xb8,0x03,0xe7,0x3f,0x44,0x39,0xb1,0xb8,0x19,0x53,0x5a,0xc6,0xa0,0x18,0x8e,0x0e,0xb6,0xfd,0x7e,0xe7,0x7e,0x8a,0xeb,0x4c,0x35,0x4a,0x0f,0x52,0x81,0x68,0x12,0xe4,0x46,0x2e +.byte 0x20,0xb4,0x41,0x59,0xb3,0x16,0x02,0x9f,0xdb,0xe8,0xea,0xfd,0xe3,0x5d,0x14,0xd0,0x97,0x52,0x66,0xcb,0xb4,0x48,0xa3,0x05,0xab,0x73,0x8e,0x2c,0x46,0xc2,0x94,0xd5,0xc8,0x57,0xc4,0x13,0xa4,0x0b,0x7c,0x34,0xbf,0xb4,0x07,0x28,0x92,0xe2,0x1d,0x00,0xa6,0xf0,0xb0,0xbf,0xdd,0x5d,0x20,0x05,0x9f,0x53,0xcf,0x07,0xf7,0xe8,0x79,0x04 +.byte 0x57,0xd1,0xac,0x9c,0xdd,0xae,0xcd,0x8b,0x04,0x0a,0x2d,0x0a,0x0f,0x21,0x09,0xc8,0x0d,0xfa,0x23,0x26,0xe3,0xdb,0x84,0xc8,0x8e,0x9c,0x96,0x93,0x4f,0xcc,0x2f,0x96,0xed,0x04,0x91,0x0d,0xc7,0xbb,0x27,0xa3,0x6b,0x9d,0xe2,0x15,0x83,0x31,0x78,0xb5,0xb9,0x6d,0xb1,0x6c,0xa2,0x3e,0xf5,0x45,0x77,0xf4,0x96,0x3a,0xe6,0x10,0x08,0xfd +.byte 0x23,0xcc,0xda,0x27,0x73,0x67,0xbb,0x8b,0x59,0xe2,0xcf,0xda,0x57,0xf9,0x17,0xeb,0xeb,0x98,0x39,0x48,0xbf,0x3d,0x5b,0x7b,0xc2,0x11,0x4b,0xd6,0xb6,0x8a,0x14,0xb3,0xf5,0xc3,0x18,0xff,0xde,0x62,0x98,0x4a,0x1d,0x6b,0x4e,0x00,0x4f,0x7d,0x2f,0x67,0xf4,0x22,0x1e,0xdb,0x69,0xd5,0x87,0xfd,0xee,0x97,0x56,0xd4,0x00,0x0c,0x9e,0x22 +.byte 0x11,0xda,0x8e,0x3b,0x91,0xad,0xf1,0xb6,0x0a,0xba,0xe7,0xc6,0x14,0x0e,0xc4,0x85,0x5f,0x7d,0x69,0x7d,0x73,0x9c,0x83,0x6a,0x69,0xef,0x10,0xb0,0xe6,0x33,0x32,0x0f,0xd8,0x54,0xa4,0x9d,0x39,0xaf,0xfc,0x6d,0x4f,0xeb,0x34,0x89,0x2e,0xb0,0xa1,0xcd,0xe1,0x5b,0xab,0xe1,0xff,0x82,0x85,0x6b,0x5e,0xa9,0x9e,0x43,0x02,0x0d,0x38,0x33 +.byte 0xe1,0xbc,0xa4,0x77,0x8a,0x5e,0x54,0xa8,0xcf,0xc9,0x76,0xcb,0x73,0x21,0x1f,0xa7,0x1e,0x5c,0x0a,0xd6,0xa2,0x36,0x6f,0x07,0xa1,0x6b,0x0d,0x5a,0x21,0x3a,0xc3,0xc0,0xcd,0x9d,0xed,0x83,0x96,0x89,0xaa,0x55,0x56,0xfd,0x0a,0x97,0x3a,0x50,0xfd,0x95,0x3f,0xb7,0xfa,0x87,0x7d,0xa6,0x5d,0x12,0x65,0x3f,0x61,0x4f,0x86,0xdd,0x58,0x64 +.byte 0xd7,0xde,0xd6,0xb9,0x68,0x87,0xde,0xba,0x96,0xf5,0x1c,0xec,0x8e,0x81,0xfc,0xca,0x77,0xe2,0x85,0x11,0x93,0xc7,0xf2,0x0f,0x77,0xbb,0x7c,0xed,0x20,0x7a,0xe3,0xc5,0x76,0xff,0x04,0xc7,0xe6,0x7a,0xa1,0xfe,0x58,0x52,0x1b,0xec,0x27,0xbb,0xd4,0x27,0x7c,0xc7,0x4a,0xfb,0x07,0x62,0x99,0x36,0xff,0x6e,0x71,0x2f,0xbd,0x25,0xff,0x8d +.byte 0x97,0x14,0x56,0x23,0x7f,0x13,0x89,0x10,0xd8,0x29,0x1f,0x91,0x56,0x52,0x85,0xa7,0xd3,0x04,0xc9,0xe2,0x09,0xa2,0x0f,0xaa,0x28,0xb1,0x79,0xf9,0x08,0xf4,0x14,0x57,0xc4,0x54,0xd7,0x69,0xb0,0x37,0xf0,0x80,0x90,0xce,0x75,0x81,0xe7,0x75,0x0f,0x7f,0x71,0x58,0x3b,0x78,0x53,0x9b,0x4a,0x5e,0xcc,0x23,0x04,0x9e,0x0c,0xd7,0xd8,0x69 +.byte 0x90,0xdf,0x36,0x99,0x90,0xd3,0xfa,0x35,0xf7,0x13,0x64,0xb0,0xc0,0x70,0x0c,0xd4,0x87,0xc0,0xca,0xd8,0xca,0x8a,0xc3,0x9a,0xfa,0x73,0x34,0x18,0xe9,0x3a,0x85,0x42,0xc5,0xe1,0xaa,0xb5,0x87,0xac,0x43,0x9c,0xfa,0x7e,0x05,0x35,0xed,0x7e,0x0d,0x38,0x82,0x17,0x7f,0x22,0xa2,0x3d,0xd3,0x0d,0xd1,0xff,0x0a,0x68,0x52,0xd2,0x17,0x59 +.byte 0xaa,0x57,0xbd,0xd3,0xea,0x0c,0xe8,0xb0,0x22,0x13,0x59,0x42,0x46,0x34,0x58,0xa9,0x16,0xc5,0x9f,0x88,0x8f,0x75,0x02,0xbf,0x63,0xda,0x28,0xba,0x9a,0xcf,0xbb,0x73,0x58,0xb1,0x13,0xf2,0x68,0xd8,0x6b,0xfd,0x49,0x50,0xcf,0x09,0xea,0x6a,0xff,0x20,0x39,0xc5,0xae,0x70,0x79,0xea,0xec,0x9d,0x09,0xf8,0x51,0x1f,0xfd,0x01,0xd5,0x9f +.byte 0xec,0x29,0x36,0xfc,0x39,0xb4,0x4c,0x1f,0xe6,0xb4,0xcc,0x97,0x21,0xe5,0x19,0xe9,0x7a,0x60,0x6d,0x39,0x3c,0x31,0xd4,0x43,0x76,0xba,0x10,0xd9,0x3f,0x75,0x7a,0xa6,0x1d,0x02,0x88,0x3d,0xa5,0x9f,0x91,0x61,0x4e,0x32,0xec,0xf5,0xd3,0xe4,0x65,0xf7,0x0e,0x3b,0x8a,0x8f,0x22,0x31,0x71,0x8f,0xf1,0x5f,0x7b,0x04,0x88,0xf9,0x88,0x67 +.byte 0x14,0x85,0x74,0x9e,0x54,0x0b,0xed,0x7a,0x48,0xcd,0xcf,0xd2,0x05,0x38,0xd5,0x58,0xa2,0xaf,0x6a,0x28,0x21,0xfd,0x38,0x4e,0x83,0x06,0x15,0x60,0xfb,0x89,0x2a,0x72,0xfe,0x75,0xc7,0xa4,0xae,0xe4,0x5b,0xbb,0xde,0x54,0xde,0x77,0xbb,0x9d,0xd2,0x07,0x05,0x61,0x53,0x65,0x31,0xd4,0x3a,0x8a,0x7d,0x9d,0x30,0x09,0x25,0x28,0x72,0x19 +.byte 0xe4,0xae,0x1d,0xbf,0xa7,0xef,0x75,0xd0,0xe3,0xdc,0x0b,0xd1,0x17,0x9c,0xc6,0xdf,0x65,0x9a,0x7c,0x9d,0x0b,0x9a,0x3d,0x8f,0xb0,0xf5,0x51,0x46,0x6b,0x12,0x0d,0xe6,0xa9,0x3a,0xb5,0xe9,0x52,0x85,0xa5,0x25,0x1f,0xc9,0x8b,0xff,0xe3,0x37,0x25,0x97,0xd8,0x91,0x17,0xed,0xcf,0x2a,0x6d,0x4f,0xef,0x74,0x5e,0x92,0xa2,0x2d,0x84,0xa6 +.byte 0x09,0xc4,0xfc,0x36,0x95,0x54,0x25,0x9e,0xeb,0xd9,0xea,0x5a,0x01,0x0c,0x54,0xdb,0x82,0x01,0xed,0x0b,0xf7,0x9f,0x0d,0x8f,0x2e,0xee,0x7c,0x6e,0xb3,0xe7,0xe8,0x04,0xef,0x8d,0x5e,0xfe,0x3d,0x96,0x3a,0x65,0xd3,0xb2,0x11,0x75,0x1c,0x6f,0x2a,0xd3,0x26,0x1f,0x5f,0x35,0x02,0x0b,0x9f,0x38,0x5b,0xa5,0x3a,0x90,0x3e,0x03,0x9f,0x50 +.byte 0xf2,0xd7,0xe4,0x3c,0xd3,0x28,0x67,0x0a,0x5a,0xe8,0x59,0x6f,0x38,0x8f,0x8b,0x0d,0xe4,0x1c,0xfc,0x6e,0x07,0x69,0x7b,0xfb,0x04,0x30,0xe7,0xa6,0x13,0xfb,0x33,0xa0,0x52,0x6a,0xec,0x64,0xad,0x90,0xbd,0xba,0x15,0x12,0x48,0xed,0xd1,0x94,0x2d,0xe7,0x19,0x28,0x5e,0x7a,0x94,0xf4,0x79,0xd7,0x79,0xc9,0xf6,0x16,0xb4,0x88,0xee,0x15 +.byte 0xa2,0x68,0xe3,0x1d,0xd0,0xd2,0x63,0x78,0x7c,0xb3,0x30,0xac,0x63,0x7a,0x36,0xc5,0x50,0xbf,0x57,0xf6,0xfe,0x4e,0x43,0x4e,0xf9,0xc4,0xa2,0x2a,0xa7,0xa4,0x2c,0x18,0xb9,0x43,0x7b,0xe8,0xf6,0x14,0x4f,0x07,0x6e,0x65,0x9a,0xdd,0x10,0x2a,0x4c,0xa4,0x58,0x86,0x19,0xad,0x6d,0x5e,0x30,0xfb,0x5f,0xb6,0x9f,0x2a,0xac,0x90,0x0d,0xae +.byte 0xf9,0xab,0xc1,0x33,0xd3,0x73,0x1d,0x46,0xe5,0xc8,0x1e,0x1d,0x61,0xf1,0xda,0x53,0x3e,0x61,0xf0,0x9a,0xe4,0xb7,0x04,0xe9,0x5e,0xf6,0x11,0xa6,0x56,0x39,0xed,0xfb,0x06,0xd0,0x92,0xb9,0xb8,0xb5,0x3b,0x39,0xec,0xa5,0xc0,0xb1,0x7e,0x7e,0xfb,0x89,0x86,0xa8,0x70,0x47,0xa5,0x60,0x8c,0xf8,0x47,0x31,0x04,0x54,0x29,0xf3,0xa2,0x79 +.byte 0xac,0x24,0xda,0x33,0x6c,0x1c,0x34,0xc2,0xa0,0x96,0x27,0xbb,0x31,0xbf,0xc1,0xd9,0xc8,0x35,0xbc,0xb3,0x13,0x8a,0xb6,0x25,0x92,0xdc,0xcc,0x3b,0x8a,0x65,0xf3,0xf9,0xd1,0x2a,0xcd,0xb0,0xf4,0xd7,0x44,0xa0,0x27,0xfc,0x0e,0x69,0x46,0x0b,0x56,0x5b,0x58,0x40,0xd9,0xc4,0x37,0x9b,0x4d,0xa1,0x45,0xd8,0xab,0x4d,0x02,0x31,0x4f,0x93 +.byte 0x56,0xd0,0x26,0x99,0x1c,0xc7,0x2b,0xc2,0x80,0xb4,0xbd,0x6e,0xfe,0xa1,0xf7,0x8f,0x13,0x74,0x2c,0xa8,0x63,0xb1,0x3d,0x6d,0x32,0x4a,0x80,0x6a,0x7f,0xcf,0x6c,0x51,0xa9,0x21,0x34,0x4e,0x13,0x19,0x8f,0x33,0xfc,0x06,0x46,0x05,0xf0,0xcf,0xf1,0xce,0x20,0xe0,0x40,0xf2,0x0a,0xd0,0xf6,0xcc,0xcc,0xc2,0xc7,0x07,0x2e,0x9e,0x0a,0x1e +.byte 0x53,0x59,0xbb,0xe3,0x02,0xc8,0x20,0x9f,0x3c,0xe6,0xec,0xf7,0x8a,0x6d,0x3c,0x0f,0xb3,0x14,0x66,0x5c,0x51,0xbe,0x82,0xc2,0x0b,0x10,0x63,0xa9,0xd4,0x7f,0x12,0x88,0x13,0x81,0x8a,0x06,0x8a,0x7f,0xc8,0x89,0xe7,0xbd,0xce,0x51,0xdc,0x93,0x03,0x07,0x6f,0x8c,0xe6,0xcc,0x0d,0x45,0xa8,0xfc,0x02,0xe2,0x3e,0xa7,0xc8,0x83,0x77,0x98 +.byte 0x91,0x4e,0x1f,0x8d,0xed,0xa5,0x38,0x54,0x0e,0x4e,0x53,0x1c,0x0c,0x47,0x11,0x59,0x54,0x15,0xb5,0x47,0xb0,0x21,0xa1,0x3d,0xaa,0xef,0xee,0x9e,0x26,0x3c,0x39,0x75,0xff,0x1a,0x8c,0xbb,0x1a,0x49,0x62,0x21,0x76,0xe8,0x3d,0x10,0x55,0xf5,0x5a,0x44,0xf0,0xb3,0x81,0xd0,0x35,0x96,0x95,0x63,0xf7,0x50,0xb1,0xa0,0xf0,0x29,0x97,0xc9 +.byte 0x27,0x73,0xd8,0x29,0xef,0x74,0xd2,0x6d,0xf4,0xfb,0x72,0xa9,0x4f,0x12,0xd5,0xfd,0xc9,0xba,0xf0,0xbd,0xfd,0x5e,0x5c,0xfa,0x53,0xe3,0x96,0xab,0x57,0xc3,0xb6,0xe8,0x0e,0x43,0xe4,0x77,0x97,0x04,0x69,0xff,0x72,0xd0,0xd8,0xab,0xb9,0x19,0x25,0x89,0xf7,0xbb,0x01,0x03,0xf2,0xc6,0x8d,0xd5,0x86,0xe3,0xfe,0x9c,0xff,0x78,0xd7,0xfc +.byte 0xda,0xd4,0x69,0x8e,0xd6,0x31,0xfb,0x15,0xd3,0x38,0xfd,0x53,0xe2,0x4e,0xce,0xcc,0xfe,0x17,0xc5,0x88,0x92,0x28,0x98,0xb7,0xcf,0x7b,0x53,0x7b,0x96,0x14,0xaf,0xeb,0x5b,0x2d,0x16,0x41,0xcc,0x7b,0x65,0xe1,0x73,0x81,0x4e,0x8f,0xc3,0xad,0xe1,0x3f,0x0c,0xa7,0xbe,0x38,0xed,0x02,0x67,0xf5,0xfa,0x1d,0xb0,0xd5,0x4c,0xe1,0xd8,0x62 +.byte 0xc9,0xb5,0xf8,0x84,0xc4,0x51,0x57,0x14,0x11,0xf8,0x7d,0x1d,0xe7,0x81,0x85,0x61,0xa9,0x9f,0xc8,0x45,0xb9,0x2d,0x8a,0xc9,0xa3,0xfe,0x5a,0xf9,0xe0,0x1c,0x80,0xd8,0x77,0xaa,0x85,0xca,0x93,0x9a,0x2e,0x10,0x03,0x71,0x3d,0xb1,0x2a,0x64,0x2e,0xad,0x64,0xba,0x5c,0xaa,0x8a,0xc2,0x2a,0x80,0x28,0x2e,0xf9,0x93,0xe1,0x71,0x72,0xae +.byte 0xda,0xd8,0x4f,0x4c,0xec,0xb5,0xe3,0x05,0x10,0x5f,0x4c,0xe6,0xe1,0xf4,0x07,0x63,0x75,0x6f,0xc5,0xf9,0xcd,0xfc,0xfc,0x35,0x2f,0xe4,0xca,0x4b,0xfc,0xc3,0x20,0x8b,0x5c,0x4a,0x3c,0xf8,0x92,0xca,0x2b,0xb0,0xce,0xd9,0x4b,0xf0,0x44,0xcb,0x4e,0x83,0xf3,0x9d,0xb0,0xd4,0xab,0xba,0x2a,0x76,0xaa,0x87,0xcd,0xa2,0xd1,0x3f,0xa0,0xb9 +.byte 0xdb,0x7e,0x67,0x2d,0x92,0x4c,0xeb,0x3c,0xa6,0x8c,0x62,0x80,0x18,0x78,0x2b,0x9d,0x8f,0x5e,0xc3,0xa5,0x3b,0x10,0xb3,0x8a,0x3b,0x00,0x96,0xb2,0xab,0xce,0x8d,0xff,0x3c,0xee,0xeb,0x4f,0xfb,0xab,0x96,0x38,0x4c,0x15,0x6e,0x7c,0xf3,0x31,0x5f,0x8f,0x99,0x88,0x52,0x48,0x8b,0x71,0x1b,0x31,0x3f,0x7c,0xe4,0xae,0x9c,0x7b,0xeb,0x64 +.byte 0xe3,0x80,0xd4,0x56,0x9a,0x6a,0xd9,0xca,0xc5,0xf0,0x86,0xe7,0xda,0x80,0x8f,0x17,0x61,0xca,0x24,0x0b,0xb6,0xf9,0x24,0xc5,0x7a,0x28,0x42,0x32,0x7f,0x2b,0xde,0x44,0x30,0xed,0x69,0x63,0x07,0x3f,0xca,0x7b,0x02,0xea,0x6e,0xef,0x27,0x1d,0x76,0x32,0xc2,0x81,0x3d,0x03,0x9a,0xe7,0x0d,0x28,0x07,0x03,0x0c,0x65,0x73,0x58,0x26,0xc6 +.byte 0xfe,0xcc,0x33,0x7f,0x33,0xad,0xea,0x81,0x05,0xcc,0x61,0x1e,0x78,0x69,0x70,0xc9,0x1f,0x6e,0x4f,0xb8,0x19,0x42,0x03,0x03,0x9d,0x56,0x87,0x0e,0x9a,0x32,0x3a,0xba,0xb9,0x11,0x66,0x9f,0x4d,0xd1,0xb0,0x11,0xbf,0x46,0xfc,0xcf,0xe5,0xef,0xf1,0x61,0xeb,0xad,0x31,0x7c,0x0d,0x66,0x0d,0xa9,0x1f,0xe4,0xf9,0x80,0x9e,0xae,0x9e,0x34 +.byte 0x1e,0x95,0x6c,0xa2,0x77,0x69,0x84,0x77,0xb7,0xe8,0xca,0x1f,0xea,0xc1,0x34,0xe6,0x0d,0x4f,0xba,0x77,0x2b,0x8c,0xbe,0xff,0xc4,0x06,0xa3,0xb6,0x1a,0xbe,0x55,0x99,0x57,0x6f,0x54,0x24,0x93,0x7a,0x0d,0x52,0xd6,0xbb,0xd2,0x9c,0xd5,0x76,0x6a,0x22,0x66,0xdc,0x43,0x9a,0x7b,0x1b,0x11,0x80,0x02,0x0c,0x8f,0xc6,0xc6,0x02,0x42,0x29 +.byte 0x00,0xc4,0xb2,0xa1,0x6a,0x7f,0xa9,0x60,0x8d,0x41,0x4f,0xd3,0xde,0x33,0x5a,0x44,0x31,0xb0,0xdc,0xc0,0x0c,0x31,0x03,0x96,0x71,0x0a,0xce,0xe3,0x0b,0xc7,0xe3,0x5d,0xe0,0x88,0x4b,0xfd,0x4c,0x1a,0xce,0xaa,0x89,0xc6,0x99,0xa8,0xd3,0x1e,0xe9,0x6c,0x2a,0xbd,0x26,0x81,0x03,0x6a,0xf2,0xf2,0x0f,0x1e,0x9d,0x8a,0x59,0x45,0xbf,0x6d +.byte 0xb7,0xc8,0xec,0x77,0xb0,0x70,0x1a,0x31,0x21,0xeb,0x25,0x12,0xff,0x13,0x33,0x6b,0x47,0x34,0xd8,0x66,0x11,0x8a,0xc9,0x93,0x5b,0x2c,0x55,0x42,0xb2,0x9b,0x60,0xc6,0xba,0xab,0x12,0x12,0x5d,0x0a,0xd4,0x54,0x79,0x17,0x6d,0x31,0x7d,0x4f,0xf2,0x94,0x16,0x65,0x62,0x38,0x76,0x3a,0x7d,0x55,0x05,0xd9,0x17,0x45,0x62,0xb4,0x1d,0x31 +.byte 0x34,0x40,0xd3,0x8e,0xf9,0x29,0x4d,0x3f,0x93,0x9a,0x2e,0xa4,0x75,0x66,0xf6,0x62,0x8f,0xf9,0x8d,0x79,0x4b,0x51,0x7e,0xfb,0xeb,0x9a,0x86,0x96,0x01,0x79,0xbe,0xe4,0x42,0xb3,0xc8,0x28,0x9e,0xed,0xa8,0xb6,0x6d,0xd3,0x31,0xed,0x30,0x9e,0x6a,0x5b,0x02,0x4b,0xbd,0xb3,0xf2,0xf0,0x9d,0x50,0x09,0x40,0x71,0xfe,0x4b,0x91,0xc9,0xd6 +.byte 0x07,0x87,0x9e,0xdb,0xa9,0xcd,0x0b,0x95,0x18,0x5a,0x55,0x10,0xaa,0xe1,0x70,0xe9,0x2e,0xc2,0x31,0x6b,0x48,0x84,0x2f,0xe5,0x7b,0xdd,0x4c,0x03,0xed,0xb6,0xb6,0x64,0x24,0x38,0x7a,0x5a,0x15,0x35,0x9d,0x66,0x08,0x4d,0xa6,0x3c,0x96,0x1a,0xcd,0x02,0x61,0x40,0xde,0xac,0xc3,0x15,0x8c,0xca,0xe6,0x62,0xe9,0x61,0x68,0xf6,0x60,0xd3 +.byte 0x7e,0x5f,0x44,0xcf,0x09,0x01,0x60,0xc2,0xb1,0xfc,0x2f,0x41,0x4c,0xc1,0x06,0x72,0xcc,0xde,0x25,0xe0,0x8c,0x34,0xb8,0xe0,0xb2,0xeb,0x05,0x5d,0x9e,0x7e,0xf7,0x1e,0x24,0xcd,0x1b,0x14,0x3f,0x1b,0x13,0xc0,0x64,0x38,0x43,0x95,0xba,0x7b,0x61,0xa0,0xdc,0xe0,0xf5,0x80,0x13,0xa1,0xc5,0x48,0x92,0xc5,0xd5,0xd0,0x87,0x0c,0x73,0xae +.byte 0xe2,0xb3,0xe8,0x70,0x4a,0x7e,0xa0,0x13,0xc3,0xc6,0x9c,0x77,0x51,0xca,0x88,0xcf,0xe0,0x1e,0xff,0x6c,0xe2,0xc3,0x33,0xce,0x7f,0x3e,0x7d,0xd5,0x37,0x23,0x09,0xb7,0xbd,0xb7,0xec,0x9a,0x29,0xd6,0x4f,0xea,0x79,0x24,0x4c,0x09,0x74,0x9c,0x97,0x3b,0x08,0x1f,0x82,0xcc,0xae,0xc4,0x3f,0xcf,0xc6,0xcb,0xaf,0x8c,0x89,0x15,0x79,0xeb +.byte 0x88,0xb9,0x03,0xab,0xc6,0xf8,0x6e,0x54,0xde,0x50,0x6e,0xcf,0x8a,0x4b,0x3f,0x64,0xd0,0xcb,0x69,0xc2,0xe3,0x40,0x4a,0x94,0xe2,0x04,0xfa,0x9b,0x4a,0xf6,0x2b,0x93,0x0c,0x0e,0xf8,0x68,0xbc,0x6e,0x6c,0xe6,0xd9,0xb6,0x04,0x40,0xf4,0x60,0xbc,0xc1,0x1e,0x67,0x1f,0xce,0x5c,0x4d,0xba,0x78,0xa8,0xf5,0x96,0x00,0xb9,0x61,0x82,0x65 +.byte 0xb2,0x1d,0x42,0xb8,0x88,0x66,0x43,0xd9,0xfe,0xe0,0x86,0xef,0x5d,0x4d,0xcc,0xeb,0x57,0x9a,0x2b,0x27,0xf2,0xcf,0x68,0xc3,0x05,0x92,0x4d,0x4d,0xb7,0x46,0x7e,0xfd,0xb7,0x4a,0x4d,0x6f,0xac,0xc8,0x8d,0xf2,0xcd,0x52,0xcf,0x91,0x77,0x2d,0x68,0x06,0x7a,0xc9,0xf3,0x17,0xc6,0x8f,0x8f,0xb5,0x8f,0x74,0xfa,0x90,0xcc,0xfc,0xaf,0x4e +.byte 0xd2,0x29,0xd9,0x57,0x71,0xe9,0x52,0xd8,0x50,0xfa,0x4d,0x13,0x7c,0x42,0x15,0x22,0x65,0x26,0x08,0xda,0xaa,0x53,0xcf,0xeb,0xd1,0x87,0xd5,0x7c,0x4e,0x66,0x1c,0x7d,0xc9,0x03,0x59,0xf8,0x09,0x3e,0x1b,0x94,0x4c,0x39,0x56,0xeb,0xfd,0xb6,0xd0,0xf9,0x76,0x8b,0x5d,0x6e,0x44,0x15,0xcf,0x27,0x7f,0x69,0x9a,0x00,0x96,0xbe,0x80,0x5e +.byte 0xbb,0x5a,0x05,0xea,0x15,0xdd,0x44,0x69,0x9e,0x64,0xcd,0xba,0xf2,0x6f,0x67,0x10,0xc5,0xa1,0x75,0x85,0x5f,0xdc,0x61,0x43,0x34,0xc3,0x52,0x06,0xd4,0xe9,0x9f,0xdf,0xd4,0xa6,0x96,0xac,0xb1,0x21,0xdd,0x20,0x46,0x20,0x89,0x5f,0x0e,0x9d,0xa8,0xc7,0x75,0x3a,0x54,0x9e,0x7c,0x3a,0xd5,0xb2,0x68,0x77,0x06,0x1b,0x1c,0xbd,0xb3,0x02 +.byte 0xb5,0xdd,0x87,0x55,0x6b,0x00,0x9f,0x2c,0x30,0xb7,0x4e,0xc3,0x67,0x38,0x37,0x61,0x81,0x68,0xcb,0x14,0x81,0x27,0xd7,0x38,0x18,0x81,0x68,0x45,0xca,0xf4,0xaa,0xae,0x58,0x9e,0xf8,0xbe,0xe9,0x1e,0x05,0x19,0xf0,0xea,0x89,0xf8,0xa1,0x9c,0x7b,0x63,0xc1,0xcd,0x81,0xc8,0x95,0x56,0x81,0x81,0x29,0xb0,0x4d,0xbf,0xe6,0x8d,0xa3,0xb3 +.byte 0xfa,0xae,0x13,0xc8,0xca,0x4d,0x5c,0x5e,0xd9,0x17,0xf8,0x87,0xdb,0x5b,0xe2,0xd9,0xba,0xe3,0xe8,0xdb,0xcb,0x74,0x36,0x7e,0x0e,0x3a,0x94,0x6a,0xe9,0x9e,0x50,0x8e,0xf4,0xd4,0x15,0xb7,0x50,0x60,0x3f,0x14,0x72,0x41,0x9d,0x51,0x63,0x8c,0x31,0x95,0xf2,0xbc,0x14,0xc7,0x64,0x2c,0xee,0x0b,0xe6,0xde,0xf6,0x33,0x85,0x65,0x00,0x54 +.byte 0x54,0x84,0x85,0x94,0x87,0xa0,0xc3,0x95,0x4e,0x74,0xcb,0x2d,0x82,0x9e,0x46,0x7f,0xf5,0x64,0x60,0xfe,0x1a,0x37,0xee,0xa7,0xb6,0x85,0xb5,0x4e,0x30,0x11,0x39,0x4b,0xe9,0x57,0x18,0x3a,0x2c,0x6b,0xb9,0x8e,0x5a,0x54,0xa9,0x31,0xf7,0xe1,0xe0,0xc7,0x52,0xfe,0x76,0x9b,0xc6,0xfe,0xde,0xe0,0xe9,0xf9,0xf6,0x10,0xda,0xef,0x72,0x24 +.byte 0x9c,0xbe,0x4a,0xba,0x58,0x21,0x1b,0xe3,0x1d,0x80,0x10,0x76,0x70,0xde,0x8f,0xf3,0x07,0x93,0x01,0xe0,0xb4,0xd9,0x7d,0x60,0x0d,0x08,0x07,0xa4,0x6d,0x9b,0x2b,0x8c,0x9a,0x58,0x65,0x5e,0x29,0xf1,0x24,0xb2,0x31,0xfb,0xb7,0xad,0xf0,0x50,0x8e,0x25,0x1b,0x75,0xc5,0x82,0x88,0x8c,0x68,0x14,0x2c,0x28,0xa2,0xb6,0x93,0x14,0xe3,0x28 +.byte 0xd0,0x95,0x6f,0x79,0x91,0x03,0x75,0x82,0x5c,0x20,0x46,0x0d,0x53,0x40,0x2c,0x88,0x62,0xa4,0x8c,0xd5,0xf1,0xc1,0xbf,0xde,0x57,0x91,0xb2,0xa6,0x66,0x29,0xf0,0x6b,0xb8,0x5e,0x78,0x5f,0xd1,0x76,0x98,0xf2,0x56,0xc2,0x5f,0x48,0x1f,0xa6,0x98,0xb0,0x87,0x53,0x13,0x1d,0x1a,0xa7,0xdf,0xa5,0xea,0x37,0x12,0x6d,0x64,0x53,0xdc,0x04 +.byte 0x2d,0xb9,0xeb,0x78,0x89,0x7b,0x70,0xd2,0x6d,0x45,0x8d,0x45,0x50,0x57,0xc7,0xb2,0xaf,0xdd,0x72,0x0f,0x9f,0x1b,0x29,0x61,0x68,0xb5,0x4a,0xd4,0xe9,0xd7,0x10,0xe7,0xcd,0xe8,0x22,0xd3,0x54,0x0c,0x0b,0x32,0x77,0x7d,0x3e,0xed,0x6e,0x79,0x4b,0x7b,0x99,0x1f,0x9e,0xbe,0xe7,0x12,0x7c,0x94,0x36,0x1c,0x20,0x8a,0xd0,0xab,0xda,0x95 +.byte 0xf6,0x4f,0xbe,0x6f,0x44,0x0b,0xa3,0x7b,0x4d,0x00,0xf6,0xdf,0x6f,0xc8,0x50,0x9e,0x3e,0x0c,0x1e,0xfe,0xb8,0x39,0x9f,0x83,0x4f,0xb3,0x1f,0x7e,0x53,0x54,0x64,0x04,0xa3,0xf7,0x79,0x01,0x71,0xce,0x18,0x0d,0x47,0x4e,0xae,0x88,0x6a,0xe7,0x26,0x4e,0x59,0xee,0x3a,0x03,0xc2,0x4d,0x0c,0x29,0xf0,0x96,0x9d,0xc0,0xa3,0xb3,0x82,0xf9 +.byte 0xc4,0xf8,0x8b,0xae,0x68,0x47,0x39,0xdc,0x10,0xd7,0x09,0xb4,0x86,0x87,0xfa,0x7e,0x0c,0xe4,0xee,0x3a,0x35,0x1a,0x0e,0x95,0x88,0xce,0xe7,0x9e,0xcc,0xa5,0x58,0x98,0x48,0xbd,0x9c,0x27,0xe6,0xb9,0xf7,0xca,0x66,0xee,0x54,0x87,0xd0,0x6d,0xab,0x31,0x1a,0x57,0x33,0x8b,0x89,0xa0,0xc0,0x18,0x9a,0x87,0x5e,0x58,0x02,0xe5,0x50,0x47 +.byte 0x0f,0x60,0x53,0x9d,0x99,0xe4,0x0a,0xfa,0x4a,0xc3,0x77,0x4b,0x4d,0x4e,0x0c,0xbb,0x68,0xd9,0xb3,0xd3,0x59,0x78,0xdf,0x65,0x97,0x6e,0x22,0x5b,0x24,0x26,0xf9,0x2a,0x14,0x73,0xa7,0xec,0x65,0xfc,0xdf,0x7d,0x35,0x0d,0x44,0x1b,0x4b,0xad,0x6b,0x8f,0x0e,0xa3,0x3b,0x6b,0x40,0xb3,0xe3,0xd9,0x41,0xba,0xbf,0x95,0xbb,0x6e,0x91,0xf6 +.byte 0x63,0xb3,0xde,0xdb,0xc2,0x6f,0xfe,0x00,0xf1,0x53,0x96,0x37,0xa4,0x27,0x48,0x3e,0xf9,0x32,0x23,0x90,0x90,0xe0,0x01,0xde,0x08,0xad,0xc4,0x6c,0x25,0x7a,0x7f,0x2f,0xb7,0xb7,0xc6,0xaf,0xeb,0x91,0x9c,0xa2,0x9c,0xf7,0x7f,0x9f,0x74,0x9b,0x7d,0x54,0x66,0xf9,0xe0,0x73,0xb4,0x15,0x2b,0xaa,0x71,0x50,0xd0,0x74,0x5d,0xcd,0x1c,0x09 +.byte 0x4c,0x80,0xcc,0xdc,0x10,0xd9,0x96,0xb3,0xdc,0x09,0x73,0x1f,0x36,0x4c,0x1b,0x86,0x25,0x13,0x7c,0xd2,0xc6,0x9d,0x5a,0xce,0xd6,0x22,0x97,0x66,0x7b,0x7b,0x84,0xba,0x69,0xd2,0x87,0x9b,0x08,0xda,0x77,0x66,0x90,0xbc,0x7c,0x3c,0x5d,0x43,0x92,0x5f,0x05,0xfb,0x23,0x46,0x88,0xf7,0xa4,0x10,0xbd,0x7d,0x00,0x29,0x2d,0xa5,0x6a,0xab +.byte 0xcc,0xdd,0xcf,0x1e,0x2b,0x9b,0x5f,0xa9,0x94,0x14,0x99,0x6e,0x3b,0x41,0x52,0x61,0x16,0x17,0x44,0xcf,0x5b,0x34,0x5c,0x27,0x29,0x4a,0xc3,0xba,0x9a,0x0c,0x20,0x17,0x2b,0x92,0xd9,0xf1,0x76,0x51,0xd8,0xa5,0x4a,0x4b,0x4a,0x0b,0xe4,0x6b,0x93,0x61,0xc7,0xb3,0x23,0x7a,0x24,0xfa,0x5e,0xee,0x80,0x10,0x65,0x44,0xa5,0xed,0x72,0xd9 +.byte 0x8a,0x06,0x2a,0x86,0xa9,0x26,0x50,0xa1,0xb2,0xb2,0x8b,0x7b,0x4a,0x29,0xf1,0x18,0xef,0xff,0x61,0xf1,0xa1,0x48,0x0f,0x84,0x8c,0xef,0xd8,0x02,0x65,0x44,0x11,0xf2,0xe1,0xba,0x98,0x03,0xbe,0x5a,0x5d,0xb8,0x0a,0x88,0xd8,0x4a,0x49,0x4c,0x70,0xa6,0x98,0x81,0x36,0x56,0x92,0xde,0xcb,0xaf,0x33,0xf5,0x1c,0x0a,0xce,0x7a,0xc0,0xff +.byte 0x24,0x54,0xd3,0x9a,0x0f,0x82,0x76,0xe5,0x0e,0x82,0xb4,0xfe,0xc2,0xac,0xe4,0xba,0xa3,0x4c,0x8a,0x0d,0xa7,0x3e,0x2b,0x71,0x73,0x5f,0xd2,0x35,0xd3,0xae,0xc0,0x3e,0x6f,0x67,0x98,0x51,0xa6,0xdf,0xb2,0xf4,0xd2,0xc1,0x43,0xe2,0x0a,0x7c,0xa0,0xb6,0xff,0xfc,0xc0,0x88,0xe5,0x34,0x20,0x79,0x50,0xc3,0x06,0x5b,0x20,0x9f,0x05,0x33 +.byte 0x22,0x30,0xaf,0xc4,0xc3,0x17,0x09,0xbb,0x30,0x0f,0x42,0xb7,0xc1,0xe0,0x4c,0x71,0xc5,0xf7,0x96,0xb4,0xd4,0x0f,0x44,0x47,0xa3,0x06,0x17,0xbd,0x0f,0x7c,0xc6,0x53,0x07,0x34,0x9a,0x9a,0x2f,0x3f,0x01,0xea,0xdf,0x1c,0x06,0x33,0x15,0x9c,0x5a,0xe3,0x33,0x29,0xce,0x40,0x4b,0xb1,0x99,0xe0,0x80,0x6e,0x0c,0xa1,0x4c,0x34,0x01,0x21 +.byte 0x12,0xbe,0x67,0x26,0xe6,0xdb,0xab,0x8d,0x45,0xdd,0x12,0x60,0x02,0x1a,0xdd,0x85,0xd6,0x33,0x78,0x23,0xe1,0x58,0x2a,0x46,0xf0,0xc2,0x4d,0x71,0x59,0x5b,0x8d,0x65,0xa7,0x97,0xf4,0x71,0x88,0x7d,0x60,0xe0,0x2d,0x2d,0x09,0x2f,0x26,0x15,0xa7,0xbf,0x30,0x0b,0x99,0x08,0xd7,0x85,0xfc,0x0c,0x19,0x31,0xde,0x5e,0x55,0x91,0x13,0x45 +.byte 0x3a,0x6d,0xd0,0x61,0x02,0x81,0xa0,0x42,0x7d,0xd8,0x7d,0x41,0x11,0xd2,0x25,0xb7,0x15,0xa1,0x16,0x3e,0x70,0x77,0x1b,0x80,0xb7,0xf1,0x24,0x8e,0x70,0x8d,0x73,0x6d,0xba,0xf1,0x46,0x32,0x60,0xe4,0xc8,0x4d,0x69,0xc8,0x10,0xf8,0x2d,0x53,0xe1,0x81,0x96,0x20,0x9d,0x59,0x74,0xae,0x93,0x92,0x44,0x5a,0x09,0x79,0x20,0xcb,0xff,0xb2 +.byte 0x08,0x7a,0x81,0xee,0x98,0x83,0x0b,0xa4,0x15,0xb0,0xaa,0x55,0xb0,0xb5,0x60,0x09,0x21,0xeb,0xe2,0x9b,0x57,0x41,0xb9,0xb4,0xd9,0xbe,0x7d,0x60,0x5d,0x25,0xde,0x9f,0x9e,0x5b,0x7c,0xee,0xeb,0x87,0x54,0x6a,0xc3,0xcf,0xec,0x57,0xce,0x97,0x2e,0x47,0x84,0x4c,0x15,0xf4,0xf5,0xe9,0xd4,0x45,0x23,0x20,0xf0,0x0f,0xda,0x97,0xc2,0xb9 +.byte 0xb2,0xe2,0x44,0xea,0xbd,0x95,0x73,0xcc,0x94,0x03,0x0b,0x97,0xeb,0x03,0xc1,0x51,0xc8,0x14,0xa6,0x7d,0x18,0x30,0xa1,0xda,0xa3,0xcd,0x78,0x67,0xb0,0xc1,0x6c,0x88,0xdd,0xd6,0x52,0x4b,0x85,0x1d,0x4a,0xaa,0x44,0xec,0x3b,0xff,0x00,0xd8,0x9e,0x18,0xf8,0xac,0x4f,0x73,0x6d,0xc7,0x4b,0x59,0x15,0x85,0x87,0x02,0xd8,0xf1,0xe6,0xfb +.byte 0x66,0x57,0xcf,0x06,0x84,0x50,0xc5,0x67,0x94,0xc6,0x96,0xb2,0x1a,0x37,0x06,0x3d,0x21,0xf2,0x1e,0xb4,0xe7,0xcb,0x36,0x8b,0xa3,0xe3,0x84,0xa0,0x9a,0x31,0xdb,0x87,0xf9,0xb0,0xef,0x06,0xfe,0xb0,0x8a,0x32,0x53,0xb4,0x41,0x79,0x6b,0xf7,0x7c,0xf7,0x9c,0xc1,0xea,0x61,0xf3,0x75,0xac,0x1f,0x92,0x75,0x44,0x58,0x9a,0x20,0xa4,0x20 +.byte 0xe3,0x19,0x1c,0x0d,0x27,0xe5,0x2e,0xbd,0x14,0xcb,0x40,0x3f,0x1c,0x19,0x7c,0xf9,0x92,0x13,0x1a,0x71,0x87,0xaf,0x77,0x0f,0x50,0x92,0x06,0x75,0x2d,0x75,0xe0,0x2e,0x37,0x54,0xcd,0xac,0xcb,0xca,0x7c,0x0e,0x66,0x53,0x10,0x50,0x70,0x9a,0xa4,0x79,0x76,0x87,0x71,0x4a,0x55,0xd4,0xa3,0x83,0xb3,0x04,0xed,0xa9,0xd6,0x84,0x7d,0x1a +.byte 0x64,0x5d,0xf7,0x4f,0x55,0x97,0x5e,0x26,0x9c,0x03,0x42,0x0a,0x16,0xd3,0xdf,0xc8,0x07,0xb8,0xb3,0xe9,0xac,0xa9,0x99,0x83,0x32,0x5b,0x83,0xde,0x7f,0x2b,0x70,0xca,0x15,0x09,0x33,0x0e,0x28,0xc9,0x89,0xc6,0xa6,0x47,0xd1,0x56,0x04,0x40,0x5d,0xd2,0x17,0x1d,0x32,0x21,0x6d,0xb2,0xc7,0x89,0x14,0x98,0xc6,0x58,0xc4,0xca,0xda,0x0f +.byte 0x32,0xdd,0xe1,0xe1,0x9a,0x25,0x09,0x31,0x16,0xf1,0x48,0x40,0x1c,0xc2,0xf9,0xd0,0xba,0xec,0x07,0x94,0xea,0x17,0xcf,0x6e,0xbc,0xfd,0x70,0xb4,0xbb,0x40,0xae,0xc3,0xae,0xf7,0x56,0xf5,0x13,0x55,0xfb,0x4b,0x81,0x5d,0xab,0xf2,0x3f,0xd7,0xa7,0xe6,0xcf,0x17,0xef,0x1f,0x71,0x1b,0x92,0x67,0xd3,0xd2,0xed,0x89,0x14,0x8f,0x8d,0x83 +.byte 0xef,0x7f,0xca,0x65,0x6d,0x79,0x13,0x5f,0x6e,0xf9,0x5d,0x9a,0x68,0x54,0x71,0x5c,0x9d,0x03,0x7c,0x73,0x7a,0xc2,0x17,0x9b,0x5a,0x7d,0x45,0x24,0x0c,0x41,0x13,0xe4,0xcb,0xdb,0x7b,0xc6,0xfb,0x93,0x48,0xca,0xd3,0x01,0x68,0x3f,0x36,0xc0,0x4b,0x1d,0xfa,0x9f,0x25,0x0e,0xcc,0xd0,0xf7,0xa0,0x7a,0x14,0xac,0xd7,0x6e,0x00,0x9f,0xf1 +.byte 0xc0,0xdc,0xfc,0x3b,0xd9,0xbf,0x68,0xfd,0x65,0x34,0x66,0x18,0xe5,0x02,0x9a,0x2d,0xff,0xaa,0xf7,0x73,0x58,0x21,0xe3,0xff,0x23,0x0f,0x63,0x1f,0xf3,0x8b,0x08,0xc7,0x00,0x46,0xe7,0xef,0x85,0x5f,0x7f,0xd9,0x5f,0xc2,0x36,0xe2,0xb6,0xa3,0x00,0xcb,0xff,0xe0,0x22,0x28,0x8c,0xb1,0xb1,0x17,0x91,0x4a,0x4a,0xc8,0x77,0x5a,0xa9,0xb2 +.byte 0x6e,0xb7,0xf0,0x4f,0x70,0x34,0x7f,0x87,0x2a,0x0c,0xcb,0x16,0x24,0x9b,0x41,0xb2,0x3e,0x0a,0xc1,0x33,0xf3,0xbb,0x48,0x17,0x2f,0xe6,0xfc,0xf4,0x27,0xc0,0xdb,0x58,0x24,0x9b,0x99,0x43,0x25,0xfb,0xd3,0xcf,0x1c,0x5a,0x5f,0xbe,0x28,0x3a,0x84,0x51,0x19,0xc3,0x53,0x6b,0xc8,0x73,0x44,0x6e,0x3d,0x7e,0x01,0x37,0xc2,0x2b,0xf7,0xa8 +.byte 0x1f,0x8e,0xd8,0x02,0x5a,0xae,0x56,0x81,0x2b,0x46,0x1b,0x7d,0xca,0x27,0x1f,0x48,0x99,0x24,0x54,0x59,0x08,0xfd,0xb7,0xdf,0x0a,0x77,0xef,0x4e,0x89,0x21,0x71,0x71,0x3f,0x8c,0xd7,0x52,0x89,0x7a,0x0d,0x68,0x09,0xc8,0x88,0x9c,0x0c,0x60,0xca,0x77,0x96,0xeb,0x05,0xeb,0xeb,0x60,0x5b,0x68,0x51,0x2c,0xcb,0x8f,0xca,0x3b,0x18,0x39 +.byte 0x28,0x8f,0xda,0x17,0x9b,0x53,0x71,0x26,0xa9,0x19,0xfb,0x1e,0x4a,0xd0,0x14,0x93,0x1c,0xee,0xe1,0x21,0xea,0xb3,0x16,0x47,0xaf,0x50,0xe5,0xe5,0xd3,0x21,0x8c,0x67,0x46,0x5d,0x97,0x19,0xda,0x6e,0xd9,0x70,0x7d,0x9f,0xd6,0x25,0xd0,0xfb,0x01,0x62,0x0a,0x9e,0x49,0x3d,0x33,0x0d,0x35,0xe5,0xae,0xfd,0xeb,0xb5,0x9b,0xd8,0xc1,0x2a +.byte 0xee,0x4d,0xf2,0xfc,0x16,0x51,0xab,0x58,0x7a,0x9e,0x5c,0xca,0x0a,0x92,0xbb,0xbb,0xa8,0x5b,0xfb,0xf9,0x33,0x67,0x0e,0x13,0x4c,0x83,0x3a,0x25,0x84,0x23,0xe1,0x41,0xfb,0xf1,0x42,0xc1,0x8d,0x58,0x0c,0x5e,0x75,0x09,0x34,0x58,0x96,0x32,0x54,0xb6,0xd8,0xaa,0x48,0xc1,0xed,0xc0,0x92,0x5a,0xec,0xeb,0xb1,0x75,0x59,0xf6,0x35,0xf5 +.byte 0xfd,0x7d,0x96,0x9b,0x83,0x38,0x31,0x10,0xa4,0xd7,0xfb,0x28,0xf0,0xc9,0xe4,0x33,0x5d,0x66,0x81,0x9c,0x31,0x9a,0xe9,0x9a,0x5e,0x70,0xf7,0x61,0xf9,0x93,0xaf,0x2b,0xbd,0x78,0x9e,0xdc,0x61,0xe0,0xa9,0xd1,0xa0,0x8e,0x3a,0x5f,0xb1,0x71,0xe7,0x9e,0xfd,0x81,0xee,0xf0,0xd6,0x63,0xec,0x4a,0xca,0x30,0xaf,0xb6,0x2d,0xaa,0x2d,0xa1 +.byte 0x5a,0x38,0xb5,0xc6,0x3f,0x5f,0x63,0x48,0xd3,0x18,0xeb,0xe3,0x36,0xca,0x91,0x86,0x4b,0x6f,0x57,0x66,0x47,0x2f,0xce,0xe4,0x44,0x26,0xe4,0xfd,0x8c,0xde,0x74,0xdc,0x17,0x0e,0x7d,0x6a,0xcf,0x89,0x0e,0x7f,0x09,0x65,0xf8,0xeb,0x58,0x00,0x3d,0xc5,0x1b,0x14,0xc5,0xca,0xca,0x28,0xbc,0xb7,0x63,0x6f,0x3b,0xa4,0x62,0x23,0x0e,0xd5 +.byte 0x04,0x76,0x0c,0xe8,0xea,0x64,0x10,0x3a,0x76,0x03,0xd6,0xea,0x69,0x52,0x14,0xa7,0x5e,0x40,0x7e,0x14,0xdb,0x7f,0xbf,0xe8,0xf6,0xf0,0xdd,0x5e,0xac,0x55,0x44,0xfb,0x28,0xf3,0x16,0xcb,0xed,0x8f,0x10,0x01,0x91,0xac,0x2c,0x27,0x46,0x0c,0x51,0xd6,0xf6,0x30,0xa3,0x34,0xd0,0x5e,0x93,0xe8,0x4e,0xc0,0xb4,0x9b,0xc1,0xe8,0x20,0x7d +.byte 0xb7,0x68,0xdd,0xf1,0xc4,0x60,0x20,0x97,0xdd,0x5c,0x7c,0x9b,0xea,0xc0,0x22,0x84,0x2c,0x65,0x78,0xbd,0x18,0xa1,0x62,0x7e,0x06,0x49,0x96,0xde,0xd1,0x89,0x06,0x0d,0x35,0xa0,0xcc,0x22,0xd3,0xf5,0xa6,0x4b,0xb6,0xca,0x43,0x34,0x5a,0x3d,0x39,0x95,0x0b,0x95,0xbe,0xdc,0xe6,0x61,0x72,0xbe,0x2f,0x19,0x1c,0xe8,0x22,0x5e,0x18,0xc9 +.byte 0x59,0x4a,0x08,0xa3,0x85,0x5c,0x06,0x36,0x00,0x2e,0x84,0x3e,0x3e,0x07,0x5b,0xfa,0xda,0xbb,0xbb,0x57,0x20,0x6f,0x1b,0x8d,0xe5,0xc5,0xdb,0x8d,0x23,0x1a,0xfc,0x67,0xa9,0xc8,0xea,0xe1,0x54,0xbb,0x8a,0x8a,0x0b,0xa6,0x02,0x35,0xd6,0xd5,0x4d,0xff,0x09,0x79,0x31,0x9a,0xc2,0xad,0xa7,0x66,0xb5,0x3c,0xbd,0xb7,0xcb,0x17,0x30,0x4b +.byte 0x56,0xf5,0xd2,0x51,0x90,0xbb,0x47,0x00,0xc0,0xf3,0x8b,0xd7,0x10,0x33,0x6d,0xe8,0xe4,0xcf,0xd6,0xbf,0x35,0x75,0x8d,0x40,0x55,0xd7,0x5d,0xb0,0x40,0xf6,0x95,0xfb,0x1a,0x97,0x24,0xb8,0xc1,0x91,0x5f,0x66,0x6c,0xc7,0xdb,0x16,0xba,0xb8,0x07,0xf8,0xf8,0x91,0xb2,0x8c,0x26,0xb9,0xa2,0x59,0xb0,0xde,0x49,0x63,0xcc,0x7c,0x4c,0x48 +.byte 0xb5,0xe4,0xf9,0x81,0x28,0x48,0x9f,0xa0,0xa4,0xf8,0x0d,0xcc,0x7b,0xf3,0xce,0x08,0x85,0x73,0x4a,0x64,0xfc,0xa8,0xc0,0xae,0x7a,0xbf,0xa5,0x3f,0x45,0xaf,0xe7,0x7f,0x41,0x61,0x34,0x08,0x6e,0x09,0x0d,0x9d,0xea,0x90,0xbe,0x62,0x7c,0x38,0x92,0xa7,0x63,0xfa,0x03,0x80,0x10,0xc4,0x53,0x46,0x0b,0x44,0x88,0xea,0x50,0xb6,0x82,0xf8 +.byte 0x0b,0x2d,0x93,0x63,0x82,0x80,0x2b,0x61,0x3e,0x17,0xd1,0xd8,0x6c,0xb1,0xb4,0xbd,0xfd,0xad,0x1c,0x10,0x30,0xc1,0x78,0xd4,0x5f,0x21,0x49,0x54,0x7a,0x08,0x2b,0x25,0x3b,0xc9,0xb7,0x0a,0xf2,0x37,0x83,0xc0,0x43,0x73,0xee,0xd6,0x8b,0x92,0x15,0xde,0xfe,0x14,0xf1,0xfb,0x8b,0x4a,0x85,0x8d,0x78,0xe6,0x36,0x1a,0xbb,0x32,0x6c,0xdd +.byte 0x43,0x76,0xad,0x68,0x90,0x08,0xd2,0xbd,0x24,0x41,0xd4,0x93,0x17,0xa8,0x9f,0xeb,0x33,0x25,0x1f,0x1a,0xfd,0x45,0x20,0xc1,0x47,0xf1,0x25,0x09,0x89,0x14,0x9e,0x4c,0x88,0xa4,0x1c,0xb8,0xba,0x84,0xd5,0x7d,0x73,0xb2,0x9c,0x48,0x9f,0x84,0x31,0xd3,0x2c,0xe1,0x94,0x61,0x3e,0x5f,0x37,0x25,0xc7,0xb7,0x2d,0xc3,0xa9,0xaf,0xcc,0x0e +.byte 0xe6,0xc7,0x9a,0xa7,0x06,0xe3,0x41,0xb8,0xa6,0xa8,0x9a,0xe7,0x76,0xef,0x83,0x5a,0x80,0xa4,0xe3,0x0c,0x04,0xa2,0x0b,0x91,0x33,0x34,0x17,0xa4,0x02,0x2d,0x12,0x84,0x67,0x85,0x6b,0xc0,0x3a,0x0d,0x16,0xf2,0x66,0x04,0x71,0xe9,0xec,0xa6,0xbb,0x58,0x42,0x92,0x70,0xf5,0x0d,0x52,0xcd,0x1e,0x2d,0xd4,0x28,0x0f,0x68,0x35,0xd9,0xa4 +.byte 0x40,0x09,0x30,0xe9,0xbb,0xaf,0x77,0x63,0x4f,0xba,0x56,0x97,0xe8,0x92,0xcc,0xba,0xdb,0xe4,0xe0,0xdf,0x19,0x21,0x71,0x23,0x3d,0xd0,0xb1,0x25,0xd3,0xf8,0x53,0x01,0x30,0x9a,0xea,0x84,0x1b,0x18,0x68,0x4a,0xb9,0x9e,0x60,0xc4,0xfc,0xf7,0x56,0xb7,0x49,0xe1,0x50,0x38,0x7d,0x3d,0x87,0xa2,0xad,0x38,0x5c,0x0c,0x53,0x21,0xa0,0x56 +.byte 0x3a,0x94,0xd7,0xa8,0x23,0x96,0xa9,0x66,0x4e,0x88,0xae,0x4b,0x6e,0xcb,0xc6,0xa6,0xdb,0x1f,0x2e,0xae,0xe7,0x24,0xe2,0x1e,0xf7,0x3a,0x14,0x48,0x5e,0xfa,0x90,0x0a,0x84,0xa6,0x1c,0xaa,0x60,0xc0,0x2c,0x69,0xe8,0x36,0xb3,0xee,0x55,0x2a,0xf7,0x90,0xa1,0x92,0x4f,0x29,0x1e,0x49,0x6e,0x73,0x22,0x1f,0x8b,0x0c,0xb6,0xf4,0x3c,0xbf +.byte 0x82,0x47,0x49,0xc3,0x94,0x0e,0xcf,0x9b,0x86,0x88,0xc2,0xd0,0xd7,0xa7,0x43,0xfb,0x89,0x4b,0xbd,0x5d,0x4c,0x6b,0x7a,0xc7,0x74,0x1b,0xfb,0x48,0x12,0x68,0x61,0x91,0xf9,0xf3,0xb6,0x7f,0x4f,0x72,0x89,0xf0,0x72,0x46,0xf7,0x6f,0x84,0xd1,0x38,0x6d,0xd9,0x1b,0xa5,0xd1,0xe2,0x29,0xe0,0xa6,0xbf,0x1c,0xbd,0xfb,0xdd,0xdc,0xa5,0xae +.byte 0x7a,0x9c,0xd0,0xc3,0xfa,0x6f,0x72,0xa3,0xa2,0x8b,0x87,0x0d,0x9a,0x6a,0xfc,0x53,0x9a,0x08,0x61,0x86,0x67,0x2a,0x90,0x6a,0x09,0x20,0x8e,0xde,0x32,0x35,0x34,0x75,0xc0,0xa8,0xab,0x1b,0xc4,0x7c,0xc8,0xd9,0x90,0xcf,0x32,0x27,0x6c,0x68,0xf9,0x18,0x14,0x05,0x57,0x39,0xc6,0x9e,0x5e,0x38,0x07,0xdb,0x81,0xb4,0xa4,0x54,0x06,0xd6 +.byte 0x79,0x78,0x0e,0xc8,0xb9,0x56,0xda,0x08,0x2e,0x77,0x26,0xcc,0xf7,0xa5,0x2d,0xd8,0x91,0xa6,0xfc,0x25,0x0e,0x91,0xdd,0x3c,0xa8,0x14,0x7a,0x95,0x05,0x5b,0x15,0x7d,0x1d,0x9b,0x3c,0x8c,0xfd,0xdc,0xa5,0xcd,0xec,0xea,0x7a,0x2b,0x7e,0x79,0x21,0x54,0xea,0x7f,0x52,0xb4,0xbb,0x4f,0x07,0x95,0x39,0x4a,0xaf,0x2e,0xb4,0x1e,0x9e,0xc6 +.byte 0x0a,0x07,0x58,0xd4,0xa5,0x44,0x73,0xa8,0x84,0x26,0x67,0xb8,0x0f,0xc7,0x6b,0xa7,0x28,0xf6,0x05,0x91,0x3e,0x22,0xcd,0xd7,0xf5,0xfc,0xae,0x22,0x42,0x96,0x3b,0x57,0x91,0xce,0x44,0xd0,0xfd,0xc3,0x4c,0x8b,0x8b,0x67,0xfe,0x03,0x86,0x92,0x34,0xf7,0xf9,0x53,0xb3,0xdf,0x36,0xcf,0x16,0x1c,0x68,0x36,0x17,0x1f,0x41,0x56,0x1d,0xda +.byte 0x90,0xb3,0xab,0x03,0x97,0x88,0x23,0x65,0x89,0x72,0xe3,0x6d,0x8e,0x37,0x5d,0xee,0x89,0x81,0x11,0x27,0x8b,0xf0,0x9b,0xef,0xa2,0x34,0x45,0xcc,0x41,0xcf,0x2a,0x88,0x70,0xe4,0x78,0xfc,0xe1,0xb5,0x51,0x70,0x84,0x64,0xd1,0x10,0x71,0x5d,0xa4,0xb4,0x6d,0xb5,0x98,0x6e,0xcc,0x9a,0x62,0x14,0x30,0xce,0x1a,0xff,0x49,0xd6,0xaa,0xcc +.byte 0xe1,0x99,0x42,0xb1,0xfe,0x77,0x8a,0x2d,0xdb,0xc0,0x0d,0x50,0x53,0x0d,0x92,0xe5,0x2b,0xd0,0x78,0x83,0x08,0x4a,0x0c,0x1d,0x5b,0x03,0x22,0x65,0x3d,0x9e,0xdb,0xcf,0x01,0x61,0xf7,0x6d,0x2b,0x99,0xef,0xba,0x80,0x50,0xda,0xda,0x2d,0xbf,0x00,0xdf,0x6f,0xec,0x95,0xbc,0x5b,0x4e,0xda,0x83,0xe4,0x5d,0xf0,0xa7,0x1b,0x27,0xf1,0x76 +.byte 0x04,0x5d,0x3d,0x2c,0x12,0x15,0xad,0xef,0x47,0xdc,0x22,0x9b,0xc2,0x80,0x91,0xf3,0xbf,0x16,0xe9,0xd3,0x35,0x94,0x4b,0xfd,0xa3,0xa1,0xee,0x98,0xad,0x99,0xea,0x07,0xe1,0x0f,0xa7,0xbd,0x0b,0xfb,0xc0,0xd5,0xb0,0x49,0x37,0xc6,0x5f,0xe7,0x18,0xc1,0x60,0xe9,0x1d,0x5e,0x0e,0xea,0x73,0xf2,0xa1,0x75,0x7e,0x39,0x51,0x07,0x1e,0xcb +.byte 0x2a,0x5b,0x26,0x75,0xbe,0x02,0x5e,0xde,0x6c,0x37,0xb1,0x3c,0x1f,0x25,0x65,0x7d,0x9e,0x5d,0xa1,0x0b,0x98,0x27,0x53,0xb9,0xbb,0xc2,0x3e,0x8d,0x2d,0x5e,0x5c,0xbf,0xed,0x66,0xe8,0xd1,0x7d,0xaa,0xef,0xca,0x0e,0xd0,0x78,0x2b,0x89,0x07,0x76,0xb6,0xc3,0x92,0x42,0x3a,0x84,0x1d,0x81,0xc1,0xe8,0x1a,0xb8,0xe6,0xf1,0x43,0xcc,0x7a +.byte 0x59,0x4d,0x9f,0x00,0xfe,0x6a,0xe5,0x42,0x71,0x3c,0xcb,0xc8,0x45,0x18,0xf0,0xf2,0x81,0x9d,0x5a,0xb7,0x8d,0xbe,0x31,0xcb,0x7d,0xca,0xb7,0x19,0x57,0xb1,0x61,0x36,0x90,0x42,0xe2,0xc3,0xf5,0xa5,0x4b,0xc3,0xd4,0xe7,0x6c,0xb6,0x0c,0x06,0x19,0x4b,0x54,0x8f,0x2d,0xdc,0xc5,0x2b,0xff,0x1c,0x61,0x29,0xda,0x95,0x4f,0xa1,0x21,0x25 +.byte 0x24,0xbe,0xc7,0x34,0x2f,0xbf,0x33,0x6d,0x82,0x8f,0xf1,0xa9,0x97,0x5a,0x49,0x7f,0x60,0x00,0xf2,0x3e,0x7b,0x64,0xdf,0xc8,0xd3,0x5f,0x6e,0x1f,0xfb,0x71,0x80,0xf3,0x55,0x42,0xbe,0x32,0x7b,0xa9,0xeb,0xf6,0x31,0xe2,0xf0,0xd1,0xe9,0xbe,0x96,0x0e,0xb3,0xdf,0x3e,0xb2,0x2c,0xc3,0xce,0xbd,0xe7,0xfe,0x1c,0xed,0x2c,0x0b,0xaa,0x32 +.byte 0x76,0x82,0xb4,0x6b,0x18,0xa7,0x68,0x19,0xb7,0x27,0x21,0x4c,0xb0,0x22,0x98,0x58,0xd5,0x90,0x80,0xab,0xa1,0xfe,0x83,0xc5,0x66,0xf6,0x3e,0xa2,0xa9,0x6f,0x73,0xce,0x7f,0x0c,0xe6,0xde,0xee,0xb0,0xe6,0x2a,0xcc,0xcc,0xb0,0x53,0x8c,0xce,0xc8,0xdc,0xea,0x83,0xb4,0x0e,0x69,0x8d,0x90,0x86,0xaa,0xe3,0x3b,0xfb,0x88,0xe2,0xe8,0x27 +.byte 0x65,0x36,0x07,0xb3,0x91,0x0e,0x5a,0x6b,0x9f,0x0f,0xbd,0x81,0xb3,0x54,0x65,0x71,0xa4,0x2c,0x8e,0xda,0x47,0x04,0xce,0xfe,0x00,0x52,0xf1,0xdf,0x82,0x27,0x70,0x2a,0xb1,0x79,0x2f,0x27,0x7f,0xae,0x9e,0x5c,0x36,0xec,0xa0,0x2a,0xf3,0x74,0x78,0x01,0x17,0x74,0x2a,0x21,0x4f,0xb8,0xd2,0xe4,0xfe,0x5b,0x06,0x14,0xa5,0xb1,0xb1,0xff +.byte 0xee,0x79,0xf7,0x18,0xb9,0x31,0xa4,0x63,0x47,0x1c,0xdf,0x38,0x04,0x2d,0x18,0xca,0x14,0xf8,0x2f,0xec,0x0d,0x58,0xad,0xbb,0xf4,0x45,0x11,0x0e,0xfa,0x17,0x4c,0x5e,0xd4,0xa6,0xde,0xe4,0x13,0x44,0x2c,0xb9,0xfd,0xcd,0x41,0xe7,0xf9,0xda,0xbc,0x28,0x8f,0x0c,0x41,0x4d,0xa7,0x0d,0xf5,0x96,0xd7,0x8f,0x10,0x96,0xfb,0x75,0x75,0x86 +.byte 0xc9,0x6e,0x23,0x92,0x71,0x69,0x7b,0x94,0x61,0x1c,0x3f,0xcf,0x66,0x34,0x62,0x68,0x5d,0xee,0x7b,0x34,0x5d,0x2a,0x39,0xbb,0x6a,0x34,0xea,0x6e,0xe3,0xe9,0xdb,0xe4,0x34,0x6e,0x29,0x0b,0x21,0x38,0xe7,0x5b,0x79,0x37,0x54,0xf0,0xed,0xaa,0x07,0x2b,0x21,0x29,0x67,0xfe,0x7d,0xa5,0x99,0x0e,0x5d,0x05,0xe7,0x61,0x6e,0xd1,0x4a,0x15 +.byte 0x4a,0x56,0xb1,0x13,0x49,0x8c,0xf4,0x4f,0xd7,0xe9,0x68,0xae,0x09,0x37,0xd3,0x96,0x21,0xe8,0x1f,0x9f,0xa9,0xc6,0x54,0x57,0x63,0x09,0x1e,0x71,0xf2,0x48,0x9e,0x50,0xbb,0xb3,0xf1,0x4e,0x2d,0x1d,0x79,0x69,0x0a,0xa2,0xa9,0xdd,0x1b,0x55,0x62,0x6b,0x0d,0xcc,0x9c,0xb1,0x5e,0xc8,0x4c,0x4f,0x62,0x3c,0xc4,0xa3,0xb4,0xe4,0x34,0xec +.byte 0x9d,0x0c,0x1b,0x46,0x60,0x68,0xd5,0x04,0xd7,0x1b,0x3c,0x7a,0x98,0x0c,0xd9,0x87,0x2b,0x4f,0x97,0x5b,0x56,0x65,0xb0,0x06,0x6e,0x9e,0x06,0x37,0x0e,0xd2,0xa1,0x52,0xf5,0xaa,0x2b,0xec,0xbd,0x0f,0xb6,0xba,0x48,0x63,0x57,0x51,0xe3,0x00,0x53,0xf5,0x77,0xb2,0xa4,0xb1,0x44,0x01,0x3e,0xcf,0xe9,0x2a,0x7a,0xf5,0x19,0x5e,0x43,0x36 +.byte 0xe0,0x38,0x41,0xbc,0xda,0xb5,0xd0,0x69,0xdf,0xd2,0x04,0xd4,0xf8,0x38,0x37,0x1c,0x90,0x30,0xf2,0x3d,0x03,0xe4,0x3f,0x84,0x2c,0x9a,0xa4,0x8a,0x00,0x4e,0x49,0x24,0x62,0x06,0xb4,0x9d,0x33,0x8a,0x8e,0xd2,0xbd,0x1b,0xa1,0x83,0x0b,0xa5,0xa2,0x5c,0xcf,0xb1,0x65,0x85,0x92,0x1f,0xb0,0x2e,0x3b,0xb2,0xf3,0x80,0xff,0x9d,0x41,0x4d +.byte 0xcd,0x25,0x09,0x02,0x85,0xb3,0xa8,0x49,0x12,0x10,0xe7,0x5c,0x94,0x13,0x4b,0x52,0x53,0x35,0x9c,0xbc,0x7a,0xad,0x04,0x19,0x54,0x8a,0xbc,0x42,0x73,0xf1,0x0a,0x22,0x75,0xbf,0x3b,0x12,0xa8,0xa4,0x47,0x5c,0x95,0x48,0x60,0x71,0x5c,0x9a,0x39,0x5c,0xdb,0x44,0xe8,0x74,0x92,0x3e,0x2b,0x3b,0x1b,0xb7,0x21,0x98,0xe1,0x87,0x32,0xaf +.byte 0x4a,0xe3,0xda,0x4a,0x46,0xde,0x15,0x4c,0xdc,0xc6,0x60,0xe6,0xd7,0x92,0x29,0x05,0x21,0x22,0x9b,0xaf,0xc4,0xd7,0x6a,0xea,0x2c,0x82,0x5d,0xc7,0x81,0xe2,0x67,0x85,0xd2,0x16,0x6f,0x83,0xa8,0x82,0x5f,0x8f,0xf5,0x3a,0x50,0xba,0x04,0xcb,0x76,0x4d,0x80,0x16,0x12,0x72,0xa8,0x6c,0xac,0x78,0xf1,0x8c,0x93,0xab,0xe0,0xb5,0xdc,0xd1 +.byte 0xa5,0x40,0x0e,0x50,0x88,0xd2,0x9d,0x56,0xf6,0xa0,0xd4,0x45,0xcf,0xef,0x16,0x1a,0xa4,0xaa,0x91,0x5c,0xa3,0x8f,0x84,0xf8,0x3e,0x30,0x1f,0x5f,0x55,0xf9,0xd3,0x3d,0xb8,0x64,0xbb,0x3c,0x91,0xe4,0x0d,0xa5,0x43,0x14,0x75,0xe7,0xec,0x8c,0x12,0x56,0x34,0xb0,0xa9,0xae,0x93,0x91,0x34,0xfc,0x78,0xa3,0x81,0x51,0x45,0x7d,0x9f,0x7d +.byte 0x5e,0xc7,0x5e,0x51,0x17,0xfa,0x02,0x5d,0xb2,0xf7,0x79,0x4b,0x49,0xd2,0x1b,0x6f,0xfd,0x9e,0xff,0x75,0x74,0xf0,0x26,0x7e,0xd7,0x65,0xb0,0xf3,0x0a,0x0c,0xd2,0xa2,0x26,0x98,0x03,0x26,0xb5,0x67,0xc4,0xc0,0xed,0x80,0xd4,0x20,0xf6,0x7e,0x17,0x54,0xeb,0xde,0xc3,0x86,0x51,0xda,0xf7,0xe5,0xc7,0xfe,0xfc,0x71,0x83,0x80,0xbe,0xde +.byte 0x4b,0xda,0x83,0x76,0x63,0x04,0x03,0xdd,0xe0,0xe0,0x4e,0xb6,0x32,0xd5,0xd0,0xce,0xd7,0xaa,0xcd,0x5f,0x64,0xa6,0xd8,0x9e,0xc5,0x97,0x30,0xad,0xf1,0x82,0x8f,0x7c,0x18,0xec,0x30,0x1d,0x2d,0xb6,0xdb,0x33,0x65,0xed,0xe2,0x24,0xd8,0xba,0x0a,0x1f,0x79,0x2a,0x1c,0xe1,0x4e,0x04,0xa6,0x74,0x74,0x37,0x42,0x94,0xc4,0x99,0x0e,0xf8 +.byte 0x3f,0xf3,0xff,0xeb,0x7f,0x95,0x9c,0x47,0x56,0x68,0x6a,0x0d,0x6e,0x66,0x71,0x3b,0x51,0xd5,0x12,0x7e,0x59,0x39,0x43,0xb5,0x53,0xd3,0x1d,0xa2,0xe9,0xa1,0xc8,0x8d,0xf2,0x8e,0xa1,0x9c,0x36,0xdd,0xda,0xd3,0x61,0xd8,0xe9,0x76,0x5e,0xcb,0x0a,0x52,0xc8,0x5a,0x25,0x00,0x21,0xea,0x6a,0x96,0xde,0x02,0x76,0x02,0x63,0x73,0x28,0x63 +.byte 0x46,0x37,0xe1,0x75,0x2f,0x42,0x8f,0xee,0x2c,0x84,0x82,0x43,0x43,0x2d,0xa9,0x13,0x50,0x46,0x54,0xed,0x76,0xbd,0x10,0x1c,0x9b,0xa1,0x42,0x97,0x68,0xca,0x84,0x2e,0x1d,0x6f,0x86,0x67,0xaf,0xb7,0x20,0xc1,0x7c,0xab,0x70,0x20,0xa1,0x79,0x71,0xe4,0xb7,0x45,0x8a,0x04,0xd3,0x70,0x10,0xa8,0x28,0xc3,0x56,0xff,0x43,0x36,0x13,0x88 +.byte 0xb6,0x2d,0xfd,0x7f,0xbc,0xc9,0x1d,0x11,0x9a,0x7c,0xd0,0xfc,0x11,0xac,0x54,0xd5,0xc3,0x03,0xd1,0xe3,0x9e,0xff,0x03,0xdb,0xd9,0xd8,0x77,0x96,0x08,0xf4,0x1b,0xd9,0xfa,0x70,0xed,0xab,0x53,0x78,0xca,0x28,0xa7,0x29,0x49,0x45,0x37,0x10,0x8f,0x61,0x7d,0x11,0x99,0x2e,0xe8,0x5d,0x45,0x3a,0xe7,0xd2,0x6c,0xb6,0x03,0xc4,0x6d,0xaa +.byte 0x52,0x60,0x8c,0xc6,0x9c,0x17,0xba,0xf6,0x3b,0xd4,0x4b,0x26,0x63,0x92,0x8c,0xb9,0x6a,0xf2,0x26,0x91,0x9d,0x8d,0x99,0x39,0x26,0x7d,0xb5,0x4f,0x4c,0xc6,0x0e,0x2e,0xe1,0xc6,0xcb,0x98,0x93,0x71,0x9b,0xaa,0x01,0x40,0x70,0x93,0x2a,0xe8,0x27,0xc5,0x20,0xa7,0xd2,0x06,0x8b,0xb0,0x29,0xcd,0x4f,0x2c,0x5a,0xde,0x35,0xc7,0x2a,0x8e +.byte 0xa7,0xae,0x02,0xfa,0x8e,0x4d,0xf3,0x77,0x67,0xe0,0xcb,0x84,0x69,0xc6,0x05,0xe4,0x84,0xe3,0x6e,0x02,0x6c,0x3b,0x93,0x30,0x3e,0x89,0x2c,0xc7,0xa5,0x7e,0xaa,0x58,0x59,0x25,0xf6,0xff,0x56,0x9a,0x4a,0x70,0xbf,0x88,0x20,0x8d,0x51,0x5e,0x08,0x13,0x26,0x2c,0x5d,0x88,0x13,0x3e,0x32,0x7a,0xf6,0x17,0x5c,0xdb,0xc4,0xcd,0x5a,0x16 +.byte 0x65,0xe4,0x34,0xeb,0x21,0x6d,0xb9,0x30,0x5d,0xc0,0xa2,0xea,0x4f,0x63,0x0e,0xbe,0x32,0x91,0x89,0x6f,0x96,0x40,0xf3,0x5f,0xa3,0xf2,0x15,0xc3,0x3c,0x3c,0xb8,0x2f,0x0d,0xc2,0xcd,0x4e,0xa0,0xa5,0xf6,0x78,0x40,0x0b,0x90,0x11,0x52,0xff,0x8f,0x7f,0x6a,0x0c,0xd6,0x3b,0x64,0x80,0x47,0xfa,0x70,0xbe,0x01,0xdc,0xdf,0x5b,0x75,0x7c +.byte 0xca,0x66,0xf0,0x2a,0x53,0x89,0x55,0x87,0xf8,0xec,0xd1,0x18,0x22,0x0c,0xd5,0x0e,0xc8,0x1c,0xbc,0x1e,0x66,0x14,0x44,0x10,0x3c,0xd4,0x2e,0xca,0x0b,0xd8,0x3f,0x81,0xd8,0x9f,0x81,0xf6,0x62,0x23,0xe4,0xc7,0x0d,0xb0,0x1b,0x00,0xd8,0xf4,0x1a,0xdd,0x9b,0xa1,0x74,0xeb,0xf0,0x65,0x5c,0x82,0x00,0x17,0xa6,0x68,0x29,0xd5,0xa4,0x64 +.byte 0xd3,0x15,0x90,0xd0,0x91,0x17,0xfc,0xd2,0xd7,0xad,0x4b,0xd8,0x41,0x03,0x51,0xfd,0x61,0xac,0x34,0xd4,0xff,0xaa,0xb1,0x64,0x6c,0x79,0x78,0xf7,0x6b,0x18,0x03,0x2b,0x6b,0x9a,0xd7,0xce,0x55,0x6e,0xdd,0xab,0x2e,0xbc,0x27,0x3a,0x8c,0xa5,0x8d,0xf0,0x55,0x81,0x0c,0x6e,0x8d,0xd8,0xd2,0x24,0x5e,0x2e,0x56,0xa8,0x1e,0x9c,0x98,0x88 +.byte 0xd3,0xbe,0x90,0x56,0x70,0xe5,0xcc,0x49,0x2a,0x13,0x98,0x99,0xbd,0xc9,0x9f,0x53,0x85,0x07,0xbe,0x54,0xa7,0x4c,0xd6,0x96,0x7d,0x8f,0x24,0x79,0x67,0xb2,0x62,0x4c,0x6a,0xc1,0x6c,0xb7,0xdc,0xe9,0x21,0xe3,0x27,0xc7,0x53,0xff,0xe7,0xd1,0xea,0x60,0xa8,0x56,0x08,0x5c,0x29,0x0a,0x04,0x0c,0xda,0x7a,0x70,0x8c,0x3d,0x55,0x3f,0xcf +.byte 0x9e,0xea,0x74,0x8b,0xbc,0xf0,0xf1,0x3a,0x86,0x22,0xe5,0x54,0xa7,0x70,0xc2,0xcd,0xb8,0x9f,0x4e,0x9f,0x48,0xa8,0xc0,0x82,0x0d,0x73,0x8b,0x3c,0xfc,0x20,0xf4,0xbe,0x79,0xde,0x8e,0x3c,0x26,0x85,0xde,0x74,0xd1,0xe3,0xd5,0x8f,0x39,0x71,0x46,0x8c,0xbd,0x68,0x28,0x2d,0x36,0x0d,0x66,0xc1,0x0b,0x96,0x3e,0x11,0x2e,0x44,0x17,0xd5 +.byte 0xfe,0x0d,0x70,0x84,0x96,0x20,0x34,0x2f,0xbe,0xf0,0xf5,0x9b,0xb4,0x5a,0xa9,0x50,0x6a,0xda,0xdb,0x69,0xea,0xef,0xa9,0xaa,0x06,0xc0,0x68,0xa4,0x61,0x1b,0x4b,0xf8,0x0b,0x56,0x91,0xc8,0x6f,0x39,0x15,0xe2,0xcc,0xbf,0x2b,0x36,0x96,0x0c,0x84,0xfb,0x3d,0x4b,0x09,0xe3,0xc2,0x4b,0x05,0x5e,0xfa,0x30,0x75,0xc5,0x54,0xa5,0xbd,0x45 +.byte 0x1e,0x14,0x72,0xd6,0xfd,0xe0,0x8f,0x7b,0x46,0x9b,0x11,0x07,0x27,0x03,0xe1,0x2d,0xcc,0x0a,0x01,0x49,0x61,0xc4,0x61,0x78,0x06,0x5f,0xaa,0x01,0x5b,0x68,0xd7,0x29,0xb4,0x9e,0xd3,0xaf,0xc7,0x45,0xf0,0x23,0xaf,0x28,0xcd,0x96,0x23,0x61,0xb2,0xb4,0x21,0x96,0x5d,0x91,0x3e,0x71,0xb5,0x41,0xf1,0x29,0xf4,0x5b,0x45,0x77,0x16,0x00 +.byte 0x9d,0x39,0x2a,0x1c,0x38,0x6d,0x36,0x97,0x98,0x4c,0x84,0xfc,0xf5,0xf1,0x59,0x7a,0x8c,0x21,0xfb,0xbc,0x9b,0x0c,0x8d,0x60,0xb6,0xc4,0xe3,0x4b,0x33,0x4f,0x04,0x4c,0x27,0xd2,0xa0,0xe1,0x71,0x0b,0x6d,0x40,0x8d,0xba,0xb3,0x11,0x9b,0x07,0x97,0x82,0x01,0x47,0xaa,0x2a,0xd4,0xcc,0x02,0xd3,0x86,0x86,0xb5,0xd7,0x5d,0xbc,0xd0,0x0f +.byte 0x97,0x5c,0xe5,0xac,0xc6,0x53,0xb3,0x39,0x09,0x68,0x2e,0xcc,0xf3,0x43,0xba,0xed,0x15,0x90,0xbe,0x9d,0xeb,0xa4,0xfb,0x4a,0x20,0xcf,0x10,0xb9,0x47,0x99,0xb0,0x89,0x26,0xb9,0xbd,0x4b,0xf6,0xa5,0xbd,0x2f,0xad,0x1a,0x75,0xe8,0xff,0xc6,0x6b,0x6a,0x31,0xbe,0xec,0xd2,0xc4,0x39,0x9e,0x3b,0x05,0x3f,0x24,0xba,0xf1,0x4d,0x0c,0x0c +.byte 0x05,0x60,0x60,0x22,0x0c,0x1b,0x0b,0x6c,0x80,0xd5,0xe8,0x8f,0x81,0xee,0x80,0x41,0x4a,0x69,0x47,0xc6,0x4c,0xeb,0xf6,0x2b,0x91,0x7c,0x9f,0x22,0x74,0x7b,0x43,0x95,0x56,0x55,0xba,0x85,0x23,0xb3,0xc3,0xee,0x6a,0xcc,0x49,0x2c,0x6c,0x86,0x6d,0x60,0x5d,0x84,0x0c,0x3c,0x88,0x61,0x58,0x1d,0xfc,0x00,0x2c,0x84,0x49,0x4d,0x95,0x75 +.byte 0xc0,0x03,0x02,0x59,0xc0,0xe9,0x84,0xea,0xce,0x3f,0x8b,0x76,0xbf,0x19,0xaa,0x13,0x1b,0x8d,0x9f,0xb2,0xeb,0xb3,0x02,0x87,0xee,0xfe,0x73,0xdb,0xc4,0x19,0x27,0xaf,0x15,0x8d,0xf4,0x58,0x97,0x43,0xb9,0x45,0x32,0x5f,0x24,0x2d,0x08,0xfe,0xec,0xf2,0xf1,0x34,0x99,0x7a,0x66,0x44,0x3d,0xd4,0xf7,0x82,0xcf,0xca,0x6f,0x53,0x9f,0x0a +.byte 0x74,0x79,0x9b,0x45,0x5b,0x07,0x92,0x35,0xc6,0xf4,0xd1,0x90,0x2b,0x62,0xec,0x93,0x7b,0x05,0x90,0x75,0xb7,0xb6,0xd9,0x6c,0x30,0xdd,0x9b,0x2a,0x32,0xb1,0xba,0xab,0x1a,0x6c,0x2b,0xd8,0xfb,0x39,0x8e,0x80,0x98,0x6c,0xd0,0xb3,0xf3,0x76,0xe2,0xe6,0x5e,0xee,0xd0,0x29,0xd7,0x57,0x8f,0xc3,0x13,0xcb,0x45,0x90,0x3e,0xa2,0x54,0x88 +.byte 0xd5,0x50,0xd3,0x75,0xed,0x2d,0xa6,0x50,0x11,0x6b,0xb0,0xb6,0xf0,0x1d,0xc9,0x3d,0x1d,0x2a,0xda,0x5e,0x43,0x44,0xf4,0xef,0x3e,0xc7,0xa9,0xe0,0x6d,0x3c,0x38,0xbf,0x84,0x72,0xaf,0xea,0x60,0x15,0x03,0x14,0x77,0xb7,0xb3,0x15,0x4c,0xbc,0xbf,0x55,0x86,0x24,0x73,0x97,0x22,0x9d,0x59,0xa0,0x39,0x76,0x38,0xd1,0x1f,0x25,0xb0,0x64 +.byte 0xf3,0x10,0x67,0xf2,0x7c,0x11,0xf2,0xce,0xbe,0xaf,0x5e,0x2e,0xc5,0xc1,0x01,0xfa,0x80,0xf9,0x87,0xfc,0x5c,0xfd,0x66,0x50,0x01,0xc2,0x00,0x92,0x84,0x0f,0xdc,0xfc,0x10,0xa5,0x6e,0x45,0xf5,0xff,0x58,0x78,0x45,0x5e,0x50,0xbe,0xe3,0xc7,0x25,0x1e,0xdf,0x7f,0x68,0x6f,0xa5,0xb8,0xf8,0x69,0x89,0x5a,0x55,0x65,0xf4,0x96,0xe5,0x7a +.byte 0xa6,0x89,0x69,0x8d,0xdd,0x4f,0x24,0x5a,0x29,0x92,0x1e,0xca,0x74,0x65,0x7f,0xb8,0x32,0x75,0xb5,0x7b,0x15,0xea,0xeb,0xcc,0xf1,0x23,0x69,0xc7,0x58,0x1c,0x3a,0xaa,0x27,0x0a,0x11,0x79,0xcf,0xc9,0xb6,0xbd,0x9d,0x56,0x47,0x36,0x6b,0x7f,0x82,0xb5,0xa7,0x9f,0x79,0x72,0x16,0xba,0x50,0xef,0x37,0x68,0xdf,0xe0,0xd8,0x0c,0x16,0xcc +.byte 0x50,0x6c,0x25,0x63,0xc2,0xd6,0x7b,0xef,0xd9,0xa1,0xef,0x62,0x81,0x97,0x51,0x49,0x69,0xe3,0x13,0x6c,0x1a,0xd0,0x64,0x1b,0x3e,0x48,0x25,0x5b,0x34,0xe9,0xee,0x41,0x34,0xfb,0x8e,0x9d,0x3c,0xbc,0xc8,0xcf,0xe7,0xf8,0x72,0x21,0x0f,0x95,0xde,0x57,0xd7,0x2f,0x80,0x97,0xbd,0x8f,0x2c,0xde,0x19,0xa3,0xba,0x5c,0x92,0xa3,0x75,0x83 +.byte 0xe3,0xc9,0x33,0x3f,0x8f,0x09,0xfa,0x0b,0x60,0x0a,0x2f,0xb3,0x45,0x9d,0x8e,0x9d,0xa3,0x66,0x2d,0xda,0x37,0xe0,0x21,0x52,0x74,0x9d,0x59,0xa4,0x9e,0xea,0x15,0x22,0xb0,0xbf,0x3c,0xd4,0x59,0xef,0x27,0x60,0xf7,0xbf,0x5d,0x1d,0x36,0x9a,0xa5,0xfb,0x53,0x90,0x40,0x83,0x3a,0x20,0x3d,0x6b,0x47,0xbc,0xc3,0xe6,0x07,0xfe,0xd0,0x8e +.byte 0x40,0x42,0x65,0x2b,0x27,0xba,0x69,0x61,0x03,0x36,0x58,0x35,0x7e,0x82,0x53,0xb5,0xe2,0x25,0x31,0xc3,0x77,0xc1,0x91,0x13,0xa4,0x92,0x52,0xea,0x9f,0x43,0x44,0x6b,0x43,0xe9,0x11,0xd4,0x3d,0x53,0xba,0x6b,0x96,0xb5,0x96,0x29,0xa3,0x2a,0x0a,0xf2,0xb5,0x0c,0x5d,0x62,0x37,0xe0,0xd6,0xa2,0xbf,0xcd,0xf9,0x58,0x7f,0xa2,0xfd,0x54 +.byte 0x6a,0xa1,0x90,0xa5,0x61,0x9e,0xa6,0xc2,0xb9,0x80,0x7a,0xb8,0xaf,0x60,0x68,0xa7,0x27,0x77,0x41,0x03,0x4e,0xc1,0x96,0x46,0x23,0x1b,0xff,0xa1,0x37,0x28,0x33,0x27,0xc2,0x99,0xf7,0xcb,0x7f,0x1a,0xfb,0x41,0xc3,0x59,0x11,0xf8,0x39,0x50,0xbd,0x90,0x61,0x4a,0x67,0x4a,0x07,0x5f,0xb1,0x07,0x66,0x0b,0x52,0xad,0x90,0xc2,0xd7,0x4e +.byte 0x42,0x9e,0xcc,0x5c,0xeb,0xf2,0xdc,0xaa,0x52,0xcf,0x0e,0x7d,0xae,0x3e,0x1a,0x2c,0x9e,0x79,0xfb,0x29,0x10,0x29,0x61,0xa4,0x93,0x9d,0xa9,0xe9,0x71,0xc5,0xf7,0x07,0x13,0xe9,0xbd,0x2e,0x2d,0x0c,0xd6,0xaf,0x54,0x48,0x58,0xc2,0x91,0x37,0xf4,0x61,0x3a,0x96,0x81,0xdc,0x82,0x02,0xff,0xc9,0xf7,0xf7,0x9f,0x9f,0x28,0xd1,0xb1,0xe3 +.byte 0x2b,0x3d,0x85,0xef,0x15,0x82,0x3b,0x9a,0x17,0xee,0x7f,0xd3,0xa5,0x7c,0x41,0x27,0xc9,0x4c,0xe9,0x7a,0x30,0x9f,0xc5,0x34,0xaf,0xc8,0x1c,0x8a,0x7c,0xa6,0xf4,0xdc,0xa6,0xdb,0x68,0xc1,0xa1,0x13,0xb0,0x54,0x49,0x25,0x43,0xc0,0xd4,0x93,0xd6,0x70,0x53,0x3e,0x5f,0xd5,0x42,0x6e,0x78,0xb8,0x15,0x07,0x6a,0x91,0xe8,0xf1,0x2f,0xcf +.byte 0x07,0x84,0x25,0xb3,0x20,0xb9,0x35,0x25,0xbb,0x26,0x96,0x02,0x25,0xd5,0x83,0x23,0x71,0x6d,0x62,0xa7,0x99,0x73,0x63,0x2a,0x51,0x25,0x34,0x3d,0x51,0x95,0xc7,0x9b,0x01,0x0a,0xab,0x11,0xb2,0x32,0xcd,0xe3,0xef,0x63,0xa4,0x6d,0xdb,0x7b,0xf6,0x5f,0xc5,0xf3,0xe5,0x8c,0x6b,0x0a,0x04,0x33,0x53,0x0d,0xf6,0x13,0x8c,0xb8,0xc7,0xba +.byte 0xc2,0xf0,0xd4,0xa7,0x1a,0xce,0x7c,0x54,0x72,0x2b,0x89,0xf4,0x05,0x5c,0x30,0x42,0xe5,0x58,0x65,0x3a,0x2e,0xf9,0x40,0xab,0x2b,0xf9,0xc3,0x99,0x40,0x3c,0xb1,0x7b,0x2c,0xdc,0xfe,0x41,0x21,0x71,0x00,0x75,0xbd,0xea,0xf3,0x84,0x88,0x6b,0x9c,0xe2,0x80,0x2f,0xad,0x9f,0x9d,0x0a,0xdf,0xb5,0x38,0x61,0x89,0xfb,0x67,0x45,0x9c,0x39 +.byte 0xf9,0x84,0x54,0xc4,0xd6,0x6f,0x00,0x39,0x90,0x82,0xfa,0xce,0xae,0xe8,0xaf,0xa4,0x97,0x3a,0xfe,0x71,0xaf,0x5e,0x00,0xd1,0x9e,0x33,0x41,0x63,0xca,0xa5,0x5a,0x8b,0x09,0x2a,0x26,0xef,0x96,0xb7,0x5d,0xc4,0x92,0xfa,0x51,0xdb,0x1d,0x63,0x5f,0x7c,0x94,0x53,0x84,0xed,0xa3,0x99,0x07,0x9f,0xdc,0x55,0xb3,0x31,0x67,0x1a,0x63,0x05 +.byte 0xec,0x36,0x79,0x57,0xf8,0x39,0xc3,0xdd,0xd5,0x6a,0x21,0xfc,0x54,0xe6,0x28,0xc4,0xf1,0xd2,0xce,0x02,0x43,0x50,0x30,0x15,0x4d,0x3c,0xd0,0x1c,0xf6,0x7e,0xd0,0xa4,0x86,0xe7,0xf5,0xc2,0x06,0xc5,0xc4,0xa8,0xe2,0xd3,0xc7,0xcf,0xbd,0xab,0x9f,0xe3,0x42,0xc4,0xcd,0x65,0xfa,0xd3,0xcd,0xdf,0x55,0xc4,0xce,0x6e,0xe8,0xfc,0x96,0x0f +.byte 0xe2,0x92,0xca,0xde,0x37,0x7c,0xc9,0x80,0x4a,0x54,0xe9,0xfd,0x3c,0x4b,0x81,0xb8,0xd9,0x1a,0xf1,0x91,0x5d,0x9d,0xef,0x3e,0xd1,0x78,0xe2,0x1e,0x0e,0x09,0x62,0xdd,0xc6,0xb9,0xde,0x29,0xba,0xb0,0x62,0x49,0x53,0xb6,0x8d,0x9f,0xbf,0x4d,0x77,0xa4,0xd1,0x0b,0xf0,0x31,0x2e,0xe5,0x71,0x2e,0x18,0xa4,0xa7,0xcb,0xa6,0x30,0x24,0x11 +.byte 0x8d,0x16,0x21,0x71,0x6a,0x19,0xde,0x3c,0x5a,0x00,0xa6,0xe2,0x43,0x98,0xe8,0x83,0x10,0x76,0xef,0xca,0x67,0x61,0x80,0x98,0x48,0x06,0xa9,0xcd,0x13,0xa6,0x1e,0x5b,0x2b,0xef,0xb7,0x3a,0x24,0xf7,0x10,0x8d,0xc2,0xaa,0x9c,0x78,0x0d,0xd1,0x54,0xb1,0x4e,0x5a,0x21,0xc2,0xb4,0x11,0x15,0xdb,0xb3,0x9c,0xe4,0xf1,0xfc,0xa5,0x66,0x0c +.byte 0x56,0x34,0x05,0x14,0x88,0x2c,0xfc,0x3f,0x97,0x30,0xd5,0xd0,0xba,0xa3,0xf1,0x47,0xc0,0xf1,0x59,0x3c,0xda,0x1a,0xc1,0x90,0xae,0x4b,0x26,0xd3,0x5f,0xc9,0x8f,0x62,0x56,0x9c,0x64,0xec,0xda,0x63,0x37,0xa1,0xa2,0x87,0x74,0xcb,0xcc,0x27,0xcb,0x2a,0x97,0x57,0xa3,0xb9,0xac,0xe2,0xbd,0x97,0x93,0x21,0xb9,0x8b,0x82,0xa1,0xe7,0x76 +.byte 0xc1,0x49,0xd6,0xb2,0x52,0x7b,0xd6,0xbb,0x31,0x0f,0x87,0xc0,0xaa,0x91,0x70,0x19,0x76,0xa5,0xea,0xf0,0x87,0x47,0x50,0xc1,0xff,0xf7,0xa6,0x6c,0x65,0xff,0xdf,0x83,0x5c,0x54,0xf0,0xb1,0x18,0xe0,0x13,0x58,0x74,0xc0,0x67,0x0e,0xb8,0xdc,0x59,0x6c,0x19,0xf4,0xee,0x3a,0x07,0x63,0x68,0x1d,0x62,0x60,0xb5,0x71,0xce,0x21,0x61,0x8c +.byte 0xa5,0x74,0x9b,0x77,0x8e,0x15,0x20,0x18,0x19,0x96,0xf6,0xfa,0xd2,0x6c,0x03,0xcb,0xcb,0x8c,0x91,0x0d,0x29,0x91,0x70,0xc5,0x96,0x60,0x18,0xad,0x65,0x66,0x43,0xf9,0x13,0x97,0xe3,0xe3,0xcb,0xbf,0x68,0x0b,0xb2,0x87,0x9c,0xfa,0x96,0x48,0x14,0xef,0x6e,0xbd,0x45,0xb9,0x2f,0xbb,0x80,0x80,0xc5,0xf6,0x22,0x41,0x9a,0xec,0xdd,0x41 +.byte 0xfc,0xf3,0x0d,0x8e,0x2e,0x3c,0xda,0xef,0x2c,0xbd,0xbc,0x0e,0x88,0xd2,0x97,0x3d,0x40,0x37,0xa6,0xde,0x1d,0x00,0xeb,0x39,0xea,0x44,0xee,0x8a,0x2f,0x77,0xea,0xea,0x1d,0x90,0xd1,0xec,0xe4,0x31,0x0c,0xde,0x6f,0x55,0x17,0x5c,0x1e,0x19,0x91,0xac,0x36,0x00,0x26,0x17,0xa6,0xcd,0x8b,0xe2,0x72,0x6f,0x8f,0x3c,0xc6,0x76,0x6e,0x3d +.byte 0x4e,0x93,0xb3,0x8b,0xad,0x24,0x17,0x39,0xc0,0xfe,0xba,0x90,0xc5,0xbd,0x4b,0xe4,0xae,0xac,0xf6,0x55,0x72,0x3e,0xf0,0x12,0x32,0x5a,0xdd,0x8a,0x3f,0x67,0xb6,0xdf,0xf6,0x11,0x02,0xf5,0x84,0xcc,0x7d,0x36,0xe7,0x1b,0xf0,0x9a,0x52,0xbe,0xf3,0x06,0xd6,0xdb,0x02,0xd4,0x80,0x0b,0xcd,0xf0,0xfe,0xec,0x86,0x3f,0x89,0x34,0xcb,0x88 +.byte 0x34,0x28,0x57,0x00,0x33,0xeb,0x4f,0xfa,0xdb,0xd8,0x09,0xd9,0x56,0x53,0xc1,0x02,0xc0,0xa8,0x4c,0xdc,0xfd,0x26,0xb3,0x55,0x1d,0x47,0x0d,0x68,0x50,0xb8,0xa3,0xb4,0xf1,0x31,0xfa,0x16,0x33,0x94,0x40,0x95,0x53,0x9c,0x9f,0x5b,0x25,0x47,0xb1,0x27,0xbc,0x38,0x7d,0x23,0x01,0x7f,0x70,0x7a,0x61,0x0e,0x46,0x5c,0xcc,0xd7,0xcc,0x15 +.byte 0x15,0x0a,0xed,0x4c,0x99,0x66,0x3a,0xc3,0xc1,0x9a,0x7a,0x38,0x6a,0x0c,0xde,0x13,0x67,0x65,0xfc,0x06,0x99,0x7c,0xa5,0x90,0x8a,0x90,0x58,0xce,0xf3,0x23,0x76,0xfc,0x03,0xfb,0xb3,0x36,0x54,0xa9,0x33,0x35,0xfe,0xe3,0x3d,0x53,0x7e,0xe0,0xae,0xcf,0xc0,0xa2,0xe1,0x28,0xb9,0x97,0x96,0x87,0x90,0xa1,0x13,0xd0,0x1d,0x5b,0x43,0xf1 +.byte 0xa5,0xfa,0x81,0x83,0xe7,0x7b,0xa1,0x5f,0x9f,0xf5,0xd3,0xb6,0x80,0x8b,0x91,0xed,0x31,0x14,0x05,0x78,0x85,0x9d,0xea,0x59,0x69,0xa5,0x29,0xc5,0xf1,0xd7,0x9d,0xa3,0x8b,0x9d,0xe0,0x8d,0xc3,0x4e,0x2d,0xfa,0x1c,0x6c,0xd2,0xd7,0xcb,0xda,0x86,0x5d,0xb3,0x1a,0xb4,0x12,0xe3,0xa8,0xd7,0xe1,0x84,0xce,0x0e,0x06,0xd0,0x9e,0xf0,0xb1 +.byte 0x5b,0x2f,0x77,0x10,0x6f,0x41,0x2f,0x5b,0x48,0x43,0xf3,0xef,0xdb,0x09,0xdb,0x01,0x89,0xfc,0x7a,0x4a,0xc0,0x96,0x33,0xdf,0xbe,0x49,0x85,0xa7,0x88,0x93,0x05,0xf2,0x15,0x12,0x85,0x04,0x20,0x7d,0x8c,0xe2,0x0a,0xea,0xfe,0xed,0xbf,0x98,0xdb,0x9d,0x1f,0xaf,0x0f,0xbf,0xf7,0x12,0x4f,0x69,0x4e,0x87,0x09,0xf0,0xae,0x2a,0x4d,0x4c +.byte 0xbf,0xaa,0x08,0x2c,0x78,0x2d,0xbe,0xb9,0xf5,0x3c,0x4c,0xcd,0x75,0x93,0xc3,0x3c,0xc2,0x86,0x47,0xca,0xc1,0x9c,0x1c,0xe5,0x0d,0x8d,0x36,0x9c,0x44,0x40,0x89,0xfa,0x17,0x57,0x08,0xd4,0x22,0x9a,0x5b,0x94,0xbf,0x39,0xcd,0xbe,0xf7,0xd1,0xcd,0x35,0x74,0xdf,0xfa,0x5d,0x00,0xaa,0xaa,0x82,0x6d,0x9b,0xf8,0x69,0x51,0x9c,0xaa,0xaa +.byte 0xc8,0x2c,0xa2,0x68,0x57,0x3c,0x5f,0x10,0xa2,0x7b,0xee,0xc9,0x97,0x8d,0x5c,0x41,0x08,0x0d,0x30,0xd5,0x2b,0x5f,0x8d,0xdd,0xdc,0x2c,0xa8,0x52,0x6e,0xea,0x61,0x77,0xca,0x75,0xc3,0x56,0x6e,0x17,0x51,0x0e,0x00,0xb6,0x18,0xa0,0xe5,0x9d,0x49,0x4e,0x20,0x78,0x1e,0x5f,0x3e,0xec,0xc3,0x4a,0x41,0xf3,0xfe,0x89,0x64,0xac,0x4c,0x4d +.byte 0xa8,0x73,0x4f,0x31,0xc4,0xe2,0x62,0x69,0x2b,0x40,0xdf,0xef,0xed,0xf0,0x62,0x4e,0xc3,0x65,0xcc,0xcb,0xef,0xc1,0x28,0x61,0x71,0xac,0xa5,0x89,0x52,0x7b,0x32,0x59,0xc2,0x16,0x1a,0x63,0x18,0xb0,0xd8,0xe4,0x28,0x92,0xff,0x45,0xc1,0x24,0x56,0x86,0x66,0x23,0x7a,0xff,0xf7,0x33,0x30,0xdc,0xd1,0x7d,0xaf,0x68,0x10,0x4b,0xde,0x3e +.byte 0x4a,0x70,0xbe,0x31,0x1a,0x37,0x28,0xee,0xe0,0xba,0x65,0x8b,0x7d,0xea,0x07,0xce,0xf2,0x51,0x3d,0xcb,0xb2,0x33,0xd8,0xf3,0xa4,0xa0,0xcd,0x53,0x76,0xf9,0x46,0x5b,0x82,0xf9,0x9d,0x0e,0x29,0x5b,0xcf,0x76,0xd4,0x5c,0x47,0xf1,0x98,0x02,0x5a,0x16,0x18,0xf2,0x61,0x6d,0x3e,0x64,0x7f,0xbe,0x13,0x18,0xc2,0x45,0xd2,0x87,0x17,0xff +.byte 0xf1,0x01,0x0b,0x5d,0x21,0x0d,0x73,0x9a,0xeb,0x82,0xc4,0x9a,0xb3,0xe4,0x31,0x44,0x58,0xa2,0xfd,0x76,0xf6,0xbe,0x6f,0x75,0xcc,0xbb,0xe3,0xa2,0xa9,0x78,0x0f,0x4b,0x1d,0x47,0x2d,0x32,0x2c,0x45,0x5e,0xcd,0x8f,0x13,0xe2,0x9a,0x9d,0xa2,0xce,0x73,0x54,0x20,0xc0,0x44,0x1c,0x26,0xde,0x0d,0x72,0xb2,0xfa,0x4d,0x32,0x35,0xac,0x69 +.byte 0x4d,0x16,0x4a,0xd5,0x51,0x33,0xc1,0xe0,0x90,0x9c,0x93,0x66,0xed,0x16,0xac,0x7e,0x79,0x2b,0x0f,0xb4,0x42,0xaf,0x80,0x22,0x80,0x07,0x7d,0x72,0xe4,0xb3,0x3a,0x2c,0xb8,0x68,0x14,0x4d,0x31,0x5f,0xbb,0xac,0x43,0x3b,0x28,0xd6,0x81,0x81,0x26,0xe5,0xc4,0x67,0x7c,0x4a,0x42,0xc4,0x1a,0x59,0x04,0x2d,0xb8,0x26,0xfc,0x4e,0xc7,0xfc +.byte 0x11,0x61,0xe3,0x4b,0x2c,0x3f,0xdb,0x43,0xe4,0x24,0xb4,0xd1,0xc0,0xc0,0x01,0xe1,0xeb,0x84,0x0b,0x6d,0x93,0x83,0x07,0x9f,0x01,0xb8,0x9d,0xe5,0x7e,0x4d,0xa2,0x05,0x3e,0xf2,0x40,0x59,0x88,0xc8,0x8c,0x62,0x44,0x95,0x20,0x96,0x28,0xa9,0x3f,0x7c,0xed,0x85,0x03,0x65,0x49,0xf7,0x94,0x3d,0x51,0xe2,0x8e,0x21,0x19,0x7b,0x55,0x5f +.byte 0x55,0x70,0xf8,0xf0,0xce,0xd9,0x1a,0x10,0xbb,0xfe,0x65,0x72,0x8a,0x5b,0x6c,0x27,0xd3,0x57,0x61,0x07,0x7b,0x85,0xd6,0x21,0xd2,0x07,0x81,0xaa,0x17,0x73,0xb5,0xef,0x2d,0x84,0x7b,0x8f,0xe0,0xb3,0x9e,0x9f,0x31,0x82,0x33,0x07,0x14,0x84,0x79,0x18,0xc4,0xec,0x20,0xb5,0xec,0x21,0x4b,0x51,0x78,0x96,0xc6,0xe7,0xf0,0x6a,0x7a,0xb5 +.byte 0xe5,0xc2,0xef,0x24,0x4c,0x57,0xb6,0xf5,0xee,0xe5,0x69,0x2b,0x73,0x9e,0x66,0x91,0x9d,0xd4,0x24,0x58,0x4b,0x72,0x68,0xf6,0x62,0xb4,0x0c,0xe3,0xbd,0x1f,0x0b,0x42,0x6c,0xf9,0x6e,0x6a,0x64,0x64,0x69,0xa5,0x6d,0xe7,0x38,0x9f,0xb2,0x65,0x35,0x6b,0xd9,0x20,0x84,0xe4,0x5f,0x8b,0xfd,0x58,0xab,0x5f,0xe1,0x4c,0xf7,0xd7,0xf5,0xe7 +.byte 0xae,0xe8,0xc1,0x68,0xfe,0x0c,0xb1,0xe2,0xe4,0xca,0xf0,0xf1,0x20,0xbc,0xf9,0x99,0xef,0x4e,0x63,0xca,0x89,0xe4,0x7c,0x17,0x49,0x40,0x47,0xce,0x67,0x8e,0xbd,0xd0,0x96,0x8b,0x5a,0x0d,0x2f,0xd0,0x8f,0x4f,0x42,0x06,0x01,0x8e,0x47,0x35,0x13,0x9e,0xd1,0x24,0x85,0xe4,0x17,0x59,0xe8,0x1c,0xb3,0x25,0x53,0xf9,0xb4,0x96,0xb1,0x33 +.byte 0x97,0xb2,0x60,0xc7,0xb3,0x48,0xa2,0xfc,0x7f,0x86,0x94,0x2a,0xd3,0x94,0xfe,0x6d,0xa6,0x7a,0xa1,0xe1,0x96,0x5b,0xe8,0xe4,0x91,0xfb,0xf3,0x2c,0x84,0xb4,0x2f,0xbe,0xc9,0xdd,0x1c,0x9f,0x72,0x12,0xcb,0xbd,0x22,0x07,0xc4,0xec,0x05,0xe8,0x32,0x47,0x21,0x27,0xf6,0xc1,0x36,0x59,0x25,0x6c,0xbe,0xb9,0x3e,0xd4,0x1b,0x59,0x11,0x27 +.byte 0x6b,0xa3,0x64,0x71,0x98,0xeb,0x21,0x65,0xc0,0x4c,0x30,0xbd,0x51,0x2b,0xc3,0xfb,0xb1,0x33,0x56,0x1e,0xf0,0x92,0x0f,0x4b,0x63,0x3a,0x9c,0xfb,0xd1,0xac,0x8c,0xf0,0x3e,0xb7,0x0b,0xd2,0x52,0x62,0xd8,0x37,0x9a,0xef,0x79,0xdc,0xcb,0x87,0x1e,0x3d,0x9d,0x91,0x12,0xba,0x78,0x8a,0x11,0x57,0x96,0x44,0x8e,0x2b,0xd2,0xe3,0x4d,0x27 +.byte 0xec,0xba,0xef,0x1c,0x04,0x8d,0x56,0x56,0x11,0x74,0xc0,0xcc,0x1f,0x3d,0x7a,0xad,0x79,0x49,0x59,0xa3,0x71,0xe0,0xf5,0x89,0x89,0x8f,0xcf,0x1e,0x63,0x77,0x91,0x91,0xf1,0x0c,0x1c,0xcc,0x77,0x00,0xd7,0x28,0x9f,0x68,0xbc,0xb6,0x9d,0x33,0x43,0xb2,0x4a,0x72,0x3e,0x57,0x26,0xd0,0x00,0x93,0xc9,0x4c,0xc9,0x53,0x52,0xd9,0xe2,0x31 +.byte 0xc5,0x7f,0xf6,0xb6,0xc2,0x10,0x51,0x67,0xae,0x63,0x35,0x74,0xcc,0xd4,0x05,0xb3,0x08,0x23,0x35,0x37,0x8e,0xf1,0xbb,0x1d,0x56,0xff,0x62,0xa2,0x13,0x7b,0x01,0x75,0x6d,0xb3,0x92,0x51,0xdc,0x6e,0x08,0x76,0x25,0x52,0xbf,0x9a,0xea,0x89,0x0f,0x96,0xcc,0x79,0xd4,0x72,0xcf,0x65,0x79,0x4e,0x40,0xa3,0xae,0x67,0x0c,0x82,0x85,0x05 +.byte 0xfd,0x43,0x84,0x17,0x24,0x79,0xa9,0xa7,0x7f,0x24,0x76,0x57,0x66,0x11,0xd5,0x33,0x30,0x42,0x5b,0x5f,0x7c,0x04,0x4b,0x45,0xc3,0x69,0x20,0x02,0x92,0xe3,0x6a,0x06,0x8f,0xdf,0x30,0xf6,0x17,0x8f,0xc6,0x8c,0x5e,0x42,0xf3,0x59,0x7a,0x3a,0x55,0x3a,0xc1,0x96,0xd5,0x67,0x3d,0xab,0x32,0xee,0xf0,0x08,0x28,0x73,0xb0,0x11,0x1a,0x92 +.byte 0x4d,0xcc,0x0c,0x86,0xb2,0xa1,0xbf,0x9f,0xcd,0xc7,0x1c,0xbc,0xee,0x39,0x77,0x75,0xfc,0xe6,0x3b,0x62,0xf2,0xaf,0xd5,0xb6,0x77,0x2d,0x86,0x38,0x13,0x00,0xdb,0x71,0x4a,0x87,0x03,0x6d,0x99,0x28,0xf8,0x6a,0x23,0x2e,0xe2,0xb8,0x9c,0x18,0x02,0x00,0x9e,0x5b,0xf0,0x6f,0x9b,0x32,0xdc,0x6b,0x61,0xeb,0xeb,0xe9,0xfc,0xee,0x44,0xbc +.byte 0x4a,0x88,0x04,0xc0,0x10,0xc8,0x65,0x6c,0xa4,0xae,0x9a,0x36,0xb6,0x68,0xd5,0xbf,0x6d,0xe3,0x6f,0x5d,0xad,0xd6,0xf9,0xc8,0x06,0x36,0x25,0x64,0xc9,0x5b,0x71,0x7f,0xbf,0xe3,0x56,0x31,0x2a,0x93,0x47,0x46,0x39,0x91,0x80,0xc5,0xdd,0xdd,0xa1,0x25,0x85,0xd9,0x05,0x49,0x4f,0x1b,0xeb,0x2f,0x6e,0xd9,0xe4,0x65,0x3d,0xcd,0xbd,0x47 +.byte 0x37,0x27,0xb0,0xd1,0x9b,0xa4,0x89,0xd5,0xa0,0x0f,0x8b,0xc5,0xfd,0x91,0xa8,0x86,0x22,0x65,0xf1,0xe1,0x1e,0xb6,0xf7,0x50,0xe6,0x1e,0xf0,0x2b,0x9d,0x02,0xc9,0xe8,0x2a,0xb8,0x9b,0x89,0x28,0x25,0x43,0xcf,0x23,0x08,0xe2,0xa7,0x70,0x31,0x89,0xab,0x5b,0xd9,0x2e,0xa9,0xe4,0xe9,0x1d,0x63,0x7f,0xc6,0xc1,0xfb,0x63,0x45,0x9c,0xf1 +.byte 0xd4,0xc3,0x56,0xb6,0xad,0xb3,0x00,0xce,0x12,0x9e,0x63,0x33,0x25,0xd3,0xb2,0xee,0xa7,0x6b,0xa1,0xfd,0x20,0xa3,0xb2,0x07,0x1a,0x9d,0xed,0xe0,0x1d,0x70,0x5b,0x9f,0xc0,0xbc,0x83,0x09,0x94,0x47,0x8c,0x05,0xef,0x73,0x96,0x31,0xc7,0x35,0xc2,0x2c,0x00,0x2a,0x68,0xd1,0xc4,0xb3,0x3d,0x84,0x44,0x8c,0x93,0xfd,0x64,0x00,0x77,0x46 +.byte 0x18,0xac,0x83,0x9d,0xe5,0xe5,0x46,0x61,0x37,0x72,0x9f,0x0e,0x76,0x55,0xf7,0xca,0x36,0x57,0x24,0x16,0xfc,0x11,0x27,0xaa,0x44,0xa4,0xb0,0x58,0x41,0x46,0x94,0xc7,0x3b,0x9c,0xa3,0xe4,0x89,0xd9,0xdb,0x7b,0x64,0x69,0x84,0x9f,0xc8,0x09,0x6f,0xf7,0xf0,0x58,0x10,0x56,0x9f,0x26,0xf0,0x74,0x0c,0x76,0xcb,0x9d,0x45,0x3d,0xe7,0x94 +.byte 0x54,0xa3,0x84,0x08,0xb5,0x9c,0xff,0xdb,0xba,0x62,0x5e,0x87,0x0d,0x11,0x5d,0x96,0x06,0xd6,0xec,0xf4,0x3e,0x9d,0x66,0xbd,0xc4,0x64,0xed,0x03,0xe0,0xad,0x3f,0x4e,0xb4,0xef,0x16,0xdd,0xee,0xd6,0x00,0x27,0x62,0x74,0x0a,0xe0,0x68,0x72,0x4c,0x6d,0x62,0x15,0x87,0x6a,0xf0,0x25,0x9f,0x33,0x1d,0x92,0x3b,0xa3,0xa4,0xf1,0x81,0xdf +.byte 0xa8,0xed,0xaf,0xa5,0x8d,0x19,0x20,0x72,0x03,0x91,0xf0,0x34,0x60,0x70,0xbe,0xaa,0xdf,0xaa,0x24,0x1a,0x1f,0x1a,0x8d,0xb0,0x7b,0xef,0x10,0x43,0x69,0x24,0x74,0xf2,0x72,0x71,0xa1,0x8f,0x85,0x75,0x3e,0x8c,0xf6,0x0e,0x88,0xe2,0x1d,0x5c,0xb8,0xf1,0xc4,0x8a,0x21,0x76,0x20,0x50,0x3f,0xb3,0x8b,0x9f,0xa4,0x45,0x9e,0x07,0x60,0x22 +.byte 0x2c,0xa6,0xb1,0xc2,0xd2,0xcb,0xc6,0xd8,0xe9,0x94,0x66,0xfb,0x10,0x73,0x92,0x25,0x7e,0x31,0x42,0xf4,0x4a,0x75,0xac,0x78,0x43,0xcb,0xc0,0xc9,0xb0,0xaf,0xb4,0x22,0x8f,0x51,0x36,0x0f,0x5a,0xb8,0xbb,0x44,0x03,0x09,0xd0,0xf9,0x04,0xc8,0x73,0x8e,0xa1,0x76,0x27,0xde,0x72,0xf4,0x3a,0x79,0x63,0x85,0x32,0x09,0xad,0x12,0xe4,0xd7 +.byte 0x8f,0x8e,0x24,0x03,0x4f,0xde,0x39,0xac,0x81,0xe8,0x64,0x09,0x17,0xd7,0x99,0xe6,0x62,0xb7,0x53,0x20,0x9f,0xb9,0x3a,0xb9,0xb1,0x81,0xfa,0x6e,0x33,0xe7,0x4a,0xca,0xd7,0xa7,0xfa,0x7a,0xbf,0x0b,0x0a,0x99,0x3c,0xc7,0xbd,0xef,0xc7,0x90,0xda,0x62,0x30,0xc6,0x94,0x94,0x6b,0xee,0xbd,0xb7,0x0d,0x86,0xc5,0xb1,0x9a,0xb9,0x86,0x34 +.byte 0xc2,0x81,0x2b,0x09,0x7a,0x88,0x09,0x65,0xcf,0x51,0x78,0x19,0x1d,0x5a,0x62,0x2f,0xb3,0x43,0x8d,0xf5,0x9d,0x26,0x2f,0x4a,0x27,0x96,0x22,0x1b,0x4c,0xc8,0xd9,0x73,0x4b,0x32,0x01,0x11,0x7b,0x59,0x85,0xda,0x50,0x92,0x17,0x45,0xd4,0x1f,0xcf,0x98,0xf6,0x2c,0x69,0xba,0x43,0x22,0xdc,0x36,0x31,0xfb,0x1e,0xe8,0x54,0x24,0x0f,0x24 +.byte 0x4c,0xcd,0xbe,0xdb,0xd8,0x23,0x69,0xe2,0x97,0xf5,0x66,0xb2,0x66,0x6c,0xf2,0x90,0xd0,0x15,0x14,0x9a,0x47,0x65,0x97,0xb0,0xf2,0x3e,0x35,0x09,0xd2,0x3d,0x01,0x9c,0xb3,0xfd,0xf3,0x32,0x46,0x4e,0x11,0xab,0x88,0x9e,0x04,0x6d,0xf0,0xe1,0x9d,0x48,0x01,0x24,0xc3,0x87,0xdf,0x58,0xb6,0x6d,0x6d,0x4f,0xb9,0x1b,0x13,0xee,0x03,0x5b +.byte 0x75,0x39,0x28,0x31,0x90,0x70,0x49,0x10,0x71,0x87,0x76,0x30,0xac,0x88,0xb0,0xf6,0x6c,0xaf,0x5b,0xf4,0xf3,0xe7,0x25,0x75,0x8c,0xa3,0xf4,0xa7,0xd8,0x94,0x78,0xc8,0x77,0xc1,0x48,0x6c,0x62,0xf6,0x2c,0xb5,0x41,0x59,0xf6,0xd3,0xae,0x1b,0x55,0xed,0xdf,0xd1,0x59,0x63,0x76,0x03,0x65,0xd3,0xd0,0xcd,0xb6,0x5b,0x8f,0x1a,0x78,0x88 +.byte 0x78,0x07,0x14,0x3f,0xc3,0xd4,0x1c,0x69,0xd8,0x15,0x25,0xca,0x76,0x15,0x24,0x7d,0xed,0x69,0x2a,0xb5,0x04,0xd2,0x3b,0xbd,0x7a,0xb2,0xae,0x04,0x51,0x85,0x2b,0x1b,0xb0,0x3f,0x6d,0xbc,0xa0,0xc7,0x19,0x40,0xab,0x75,0x51,0x4b,0xa8,0x5a,0xd7,0xb5,0xc7,0xa8,0xfc,0x4a,0xcf,0xa9,0x9c,0xe6,0x2e,0x35,0x51,0x3b,0x05,0x41,0x43,0x7c +.byte 0x1f,0x2e,0x16,0x5d,0x2f,0xa8,0xe9,0xce,0x6d,0x06,0xa7,0x5a,0xed,0x07,0x39,0xe4,0x7e,0xc3,0x01,0x2d,0x97,0xe4,0xc1,0x89,0x2c,0xb4,0xb1,0xb5,0x7f,0x0a,0xe2,0x9f,0x82,0x36,0xee,0x9b,0x76,0xbc,0x9d,0x37,0xdf,0x5e,0x81,0x95,0x9b,0x2b,0xc4,0x58,0x20,0x6a,0xd2,0xc7,0xb6,0x82,0xe6,0xa2,0x52,0x73,0x4a,0xaf,0x37,0x5a,0xf6,0x6b +.byte 0xc4,0x2b,0x53,0x4e,0xca,0x44,0x17,0x9f,0x1c,0xeb,0x4d,0xf2,0xd1,0xb0,0x35,0xaa,0xc3,0xfe,0x77,0x34,0x2a,0x4a,0xe8,0x85,0x96,0x2f,0xa4,0x7d,0xdf,0xd0,0x6a,0x4a,0x0c,0x9b,0xd9,0x6a,0x00,0x92,0xb4,0xb1,0x9f,0xc3,0x56,0xee,0xcb,0xa5,0x3a,0x37,0x68,0xc8,0x7c,0x1e,0xa8,0x0a,0x3d,0xbc,0xd1,0xd0,0xd7,0x8b,0x32,0x34,0x20,0xfc +.byte 0xd3,0x9e,0xf5,0x18,0x3a,0xb9,0x87,0xae,0xde,0x6c,0xc0,0x7d,0xbd,0x20,0x00,0xe5,0x7b,0xcb,0xf9,0x7d,0x70,0x9a,0x10,0x45,0xc9,0x33,0x13,0x9d,0x2c,0x16,0x67,0xe6,0x36,0x38,0xcf,0xa2,0xf1,0xad,0xec,0x48,0x7f,0x9b,0x2a,0xdc,0x13,0xe2,0xee,0xef,0xf2,0x5c,0x3f,0x52,0x3a,0x72,0x79,0x9b,0xba,0x50,0xb2,0x2b,0xfb,0x97,0x8e,0xe6 +.byte 0x27,0x39,0x63,0x72,0x05,0x11,0x7d,0x2e,0xa8,0x44,0x08,0xf7,0xf3,0x26,0xe5,0xe4,0x6c,0x98,0x7b,0xb1,0x42,0x6d,0x74,0xd4,0x3b,0xfa,0x35,0xfa,0x0a,0xac,0x5e,0x9e,0x8f,0xc7,0x07,0xc5,0x50,0x25,0xfd,0xbf,0x13,0x52,0x3d,0xf1,0x18,0x1e,0x19,0x8c,0xf3,0x8b,0x4d,0xc8,0xfb,0x76,0xa4,0xe3,0x3f,0xb2,0x47,0x9c,0x50,0x97,0x32,0x65 +.byte 0x9e,0x42,0x81,0x21,0xd1,0x92,0xd2,0x81,0x4a,0x93,0x68,0xa2,0xc1,0x76,0xc8,0x40,0xce,0xfe,0x4e,0xc5,0xa7,0xb2,0x77,0x9f,0xc8,0xe5,0x41,0xb1,0xda,0x15,0xf6,0xfa,0x21,0x3f,0x11,0x5c,0xc6,0x62,0xda,0x01,0x7f,0x0f,0x9f,0x9e,0x98,0xfe,0x38,0x53,0x6c,0x7f,0xba,0x8b,0x55,0x01,0x36,0x33,0x41,0x5e,0xa9,0x78,0xbf,0x2e,0x60,0x4f +.byte 0xcb,0xe9,0x27,0x09,0x8c,0x01,0x2d,0x82,0x7d,0x3f,0xaf,0x8f,0x1e,0x37,0x79,0x35,0xfb,0xce,0x83,0xc5,0xf8,0xc5,0x54,0xfd,0x50,0xec,0x31,0xd1,0xb5,0x8a,0x4d,0x37,0xf6,0x7f,0x0e,0xbe,0x35,0xdd,0xa8,0x9e,0x5e,0xb9,0x3c,0xf4,0x2b,0xd2,0x97,0x56,0xd0,0x28,0xcb,0x60,0x27,0xcf,0x27,0x68,0x8a,0xa1,0xbf,0x9f,0xa3,0x45,0x4a,0x44 +.byte 0x71,0xe2,0xb2,0x9c,0x69,0x0b,0x18,0x69,0xcf,0x03,0xcc,0xc3,0x93,0xe0,0xf5,0xb7,0x4e,0xa4,0xdc,0x96,0xe0,0x2e,0xf8,0x3b,0xc6,0x67,0x30,0x06,0x5e,0xb9,0xb9,0x7d,0xaf,0x97,0x38,0x9a,0xf4,0x22,0x20,0x5a,0x9e,0x83,0x26,0x3c,0xcc,0x93,0x84,0x20,0x15,0x2e,0x85,0x23,0x17,0x1d,0x28,0xb4,0xe2,0x8f,0x2d,0x22,0x99,0x66,0xfd,0x6a +.byte 0xa8,0xe6,0xb7,0x19,0x18,0xec,0xbd,0x54,0xc2,0xcc,0xb7,0xb4,0x6b,0x10,0xdd,0xb5,0xe3,0x3b,0xb7,0x77,0xbf,0x66,0x65,0x82,0x6a,0xc6,0x0d,0x26,0xe6,0xe8,0xe1,0x96,0xe4,0x0b,0x3c,0xe3,0xf2,0xfb,0xd6,0x91,0x5d,0xb6,0x08,0x15,0x67,0x10,0xfa,0xf8,0xdc,0x72,0x84,0xca,0x48,0x29,0x75,0x98,0x62,0x30,0x43,0xa9,0xf1,0xde,0x58,0xb5 +.byte 0x6e,0x67,0x53,0x62,0x0d,0x06,0xa8,0x97,0x35,0x04,0x02,0x34,0x3f,0xd7,0x77,0x38,0xed,0x51,0x32,0x7c,0x6f,0x25,0x94,0x04,0x30,0xa5,0xfc,0xf1,0xb0,0x65,0x77,0x16,0xec,0xb0,0xf9,0x6d,0xaf,0xbc,0x75,0x6e,0x29,0x44,0x20,0x86,0x36,0xbe,0x22,0xe0,0xe1,0xc4,0x0c,0x97,0x10,0x45,0x3e,0x06,0xc3,0xee,0xa5,0x1f,0x97,0xc7,0xde,0xdb +.byte 0xf1,0x05,0xe3,0xb7,0x24,0xc5,0xa5,0xca,0x4e,0x8e,0x9e,0x44,0x7e,0x98,0xb1,0x3c,0xe9,0xa6,0xe5,0xa6,0x08,0xcb,0x08,0xd7,0xf6,0x38,0x37,0xa4,0x46,0xd1,0xdc,0x53,0x6f,0x6c,0x3f,0xca,0xa1,0x9b,0x7c,0xa6,0x44,0xd4,0x08,0x33,0xd2,0xf8,0x32,0xd2,0x4f,0x60,0x75,0x0f,0x49,0xf1,0x70,0x52,0x56,0x16,0x5b,0x3e,0x34,0x0e,0xe4,0x94 +.byte 0xc3,0xa9,0xd4,0x1c,0x9e,0xa4,0x10,0xce,0xc1,0x69,0x5b,0x3a,0xc9,0xd5,0xab,0x98,0x81,0x78,0x42,0x7e,0xf2,0x76,0x10,0xad,0x97,0x85,0x98,0x2f,0xe2,0x3f,0xb1,0x1d,0xc0,0x4d,0xa4,0x0b,0x54,0x7e,0x19,0x16,0x0a,0x71,0x74,0x37,0xfd,0x67,0x23,0x86,0xb2,0x3b,0x1e,0x49,0x92,0x92,0x1b,0x5f,0x65,0x56,0x76,0x6d,0x97,0x3b,0x91,0xc0 +.byte 0x5a,0x7e,0xf1,0x5b,0xe9,0x83,0xb9,0x67,0x2f,0xe1,0x0c,0xcf,0xe9,0x51,0x26,0x45,0x03,0x06,0x63,0xa4,0xb2,0x06,0xe0,0x8e,0xa3,0xbf,0xf5,0x7c,0x19,0xdf,0xfe,0x38,0x28,0x98,0xa1,0x23,0x16,0x69,0xc4,0x9f,0x20,0xe4,0x42,0x27,0x4e,0x7b,0xc9,0x42,0x5e,0xd2,0xb9,0xbf,0x33,0x03,0xbb,0x96,0x6d,0x80,0x65,0x90,0x3b,0x82,0x5b,0x68 +.byte 0x46,0x4f,0xe3,0xe0,0x0e,0xc5,0x90,0x91,0x80,0xf8,0xf4,0x9c,0xfe,0x03,0xaf,0x31,0x44,0xb7,0xfc,0x1f,0x65,0xc8,0x65,0x68,0xcc,0x27,0xb4,0x0d,0x81,0x14,0x9e,0x52,0xab,0xdd,0x71,0xf6,0xd9,0xcf,0x29,0x04,0xcd,0xae,0x6f,0xd6,0x41,0xb5,0xfd,0x1d,0x0f,0xbf,0x71,0xc2,0x60,0x98,0xb9,0xc0,0x6e,0x8a,0x2c,0x7d,0xec,0x31,0xa5,0xea +.byte 0x1a,0xb1,0xe4,0xc2,0x36,0xcb,0xf0,0xf4,0x3f,0x1d,0x03,0x01,0xcd,0xac,0xd0,0x9d,0x2e,0xa3,0xc4,0x54,0x49,0x75,0x90,0xac,0x7e,0x1e,0xc3,0x90,0xab,0x55,0xb0,0x34,0x0d,0xd6,0x99,0xb5,0x40,0xda,0xdd,0x30,0x57,0x61,0x15,0xec,0x8f,0x8c,0xc7,0xda,0xfc,0xf5,0x0a,0x86,0xd8,0x6b,0x0f,0x6e,0x09,0xb8,0x50,0x2a,0xea,0x51,0x84,0x33 +.byte 0x7a,0x97,0x0c,0x56,0x61,0x2c,0xd9,0x83,0xb9,0xb1,0x53,0x31,0x72,0x20,0x79,0x85,0x7f,0xdc,0xb8,0xfe,0xfa,0x9a,0xd4,0x6a,0x3c,0xc7,0xcc,0x75,0x20,0xba,0x9c,0xb9,0x1a,0xff,0x9c,0xbe,0xfd,0x87,0xb4,0xd7,0xe8,0x5e,0x22,0x6a,0x1b,0x91,0x52,0x6a,0x58,0xbc,0xf4,0xde,0xcc,0x18,0x37,0x0e,0xf5,0x22,0x91,0xd2,0x4f,0x08,0x91,0x62 +.byte 0x1c,0xb7,0xa0,0x7e,0x66,0x97,0xda,0xa0,0x3c,0xc8,0xe8,0xdc,0x61,0xa4,0x64,0x8b,0x0a,0x43,0x90,0x0c,0x78,0xd9,0x96,0x8a,0xb0,0x17,0x0f,0x32,0x17,0x11,0x82,0x69,0x9d,0x7c,0xa9,0xfd,0x9b,0xe3,0xeb,0x0d,0x44,0x1d,0xcb,0xf6,0xee,0x26,0x6b,0xd5,0x4c,0x49,0x69,0x18,0xd7,0xf3,0x63,0xd9,0x7e,0x83,0xdd,0xa3,0x2d,0xdf,0x88,0x10 +.byte 0xd1,0x5c,0xb0,0x7e,0x44,0xfe,0x64,0x39,0x33,0x05,0x04,0x54,0x74,0x4d,0xd5,0xbc,0xdf,0x19,0x52,0x81,0x60,0x92,0xc5,0x4e,0xa4,0xff,0xf0,0xa2,0xfd,0x88,0x96,0xde,0xb4,0x8d,0x58,0x06,0xfb,0x96,0x6f,0x0e,0xb0,0x4a,0x2b,0xed,0x15,0xa7,0xfb,0x9f,0xf2,0x30,0xc4,0xce,0x02,0x4d,0x83,0xb8,0x5d,0x10,0x60,0xb8,0xbc,0x05,0xa2,0xd4 +.byte 0xf1,0xae,0x46,0x56,0xb9,0xac,0x68,0x79,0x41,0x90,0xee,0x79,0xda,0x3a,0x91,0x7a,0xf6,0xdb,0xe3,0xea,0x91,0x48,0x77,0x4a,0xa3,0xab,0x9c,0x99,0x49,0x1f,0xc9,0xcd,0xe7,0x2e,0xe3,0xe7,0x78,0x6d,0x07,0x1b,0xc6,0x08,0x48,0xd8,0x20,0xff,0x19,0x8a,0x73,0x1d,0xc6,0xa1,0xd4,0x95,0x33,0xf7,0x45,0xab,0xea,0x05,0x3e,0xdf,0xde,0x68 +.byte 0xb2,0xb6,0xef,0x71,0xb4,0xd1,0x09,0x4b,0x43,0x16,0x35,0x1a,0xb6,0xcb,0x78,0x63,0xca,0x9e,0x9a,0xe3,0x86,0xb2,0x8e,0x7b,0x68,0x89,0xa7,0x5c,0xd3,0x06,0x21,0x88,0x94,0xde,0xa1,0xb1,0x3a,0xe8,0xb7,0xfa,0x58,0xc5,0xc8,0x01,0xfa,0x56,0xe4,0x0e,0x6b,0xeb,0x5d,0x67,0xf4,0x63,0xd4,0x44,0xe2,0xe7,0x42,0xfe,0x09,0x58,0xdf,0xd9 +.byte 0x1d,0xb7,0x14,0x91,0xac,0x88,0x49,0xf6,0x7c,0x03,0x92,0x11,0xb4,0x66,0x68,0x6c,0x94,0x2a,0x22,0xaf,0xa6,0xb1,0x29,0x2a,0xae,0xdd,0xa8,0x65,0xe4,0xa9,0x39,0x00,0x1e,0xca,0x17,0x99,0xba,0xd6,0xf2,0x20,0x21,0xbf,0x1a,0xab,0xca,0x7c,0x92,0x22,0xee,0x3c,0x0c,0xc6,0x63,0xcc,0x86,0xfe,0xc0,0x8f,0xac,0x18,0x4e,0x2b,0xa5,0x2e +.byte 0x46,0x57,0x8a,0xbf,0xdc,0xd1,0xd2,0x2c,0x5b,0xe2,0x96,0x81,0xca,0x41,0xb5,0x17,0x38,0x4a,0xa4,0xd2,0x0e,0xac,0x5d,0xe9,0x44,0x63,0x1b,0xb8,0x81,0xd6,0x69,0x1c,0x99,0xc5,0xdb,0xdd,0x18,0xc1,0x6d,0x28,0x7d,0x36,0x52,0x82,0xaa,0x1a,0x10,0x01,0x9d,0xf1,0x7b,0x09,0x69,0x56,0xb1,0x31,0xa3,0x54,0x3c,0x56,0xf9,0x82,0x8c,0x06 +.byte 0x5a,0x32,0x2d,0xc0,0x7c,0x7e,0x91,0x6d,0x73,0x7b,0x7c,0x45,0x0b,0x2c,0x2a,0x4f,0x3c,0xea,0x6b,0x2b,0x84,0x76,0xab,0x8d,0x4c,0x5c,0x64,0xa3,0x97,0x9f,0x56,0x20,0x05,0xf9,0xc2,0x20,0xf3,0xd0,0x6a,0x7f,0x7d,0x12,0xfc,0x20,0x52,0x5d,0xff,0x92,0xaf,0x4e,0x7f,0x8f,0x2f,0xd0,0x73,0x06,0x23,0x09,0xce,0x11,0xc0,0x1b,0x48,0x7d +.byte 0x11,0x51,0x06,0x0e,0x05,0x95,0xca,0x42,0x71,0x87,0xa3,0xa3,0xc1,0x27,0xf8,0xb1,0x24,0x92,0x38,0x95,0xf6,0x8f,0x3b,0x70,0x74,0x19,0x9b,0x08,0xb3,0x49,0xe9,0x57,0xd4,0xce,0x5b,0xdd,0xab,0x95,0x26,0xe9,0x70,0x21,0xef,0x16,0xdd,0x36,0x89,0xe5,0x9e,0xaf,0xc5,0x28,0x0c,0xd3,0x67,0x64,0xbc,0xfb,0x18,0x17,0x15,0x1e,0xa7,0xb7 +.byte 0x72,0x3d,0xfd,0x10,0x5c,0xa2,0xc1,0xbf,0x62,0x79,0x2b,0xa7,0xb9,0x1f,0x73,0xe6,0x11,0xd8,0xbc,0x74,0x6c,0x45,0x95,0xef,0xa2,0xda,0x90,0xc3,0x00,0x00,0xbb,0xc7,0x28,0x36,0x82,0xd4,0x5e,0x5c,0x11,0xea,0x7c,0xf6,0x79,0x66,0xff,0x93,0x77,0x49,0x05,0xc9,0xc1,0x8d,0x5c,0xf6,0xff,0xb9,0xf9,0xcd,0xb3,0x01,0x83,0x83,0x43,0x2d +.byte 0xa1,0x90,0x73,0xc9,0x32,0xae,0xdb,0xd0,0xf3,0x61,0x63,0x72,0x06,0xde,0x21,0x7b,0x3b,0x2d,0xec,0xd3,0x1d,0xfe,0xbd,0x6e,0xd8,0xe3,0x39,0xe0,0xa1,0x9f,0x67,0xaf,0xab,0x79,0xbc,0x59,0xf9,0xa7,0xdf,0x28,0x75,0xea,0x34,0x6b,0x25,0xde,0x49,0x1b,0x07,0x95,0x19,0x47,0x86,0x46,0x7b,0x68,0x30,0x70,0xec,0x9c,0x05,0xb6,0xc9,0x00 +.byte 0x68,0x10,0x4b,0xc4,0xe5,0xf1,0x67,0x3f,0xd4,0x3c,0xd6,0x49,0x98,0x71,0x23,0xff,0x07,0x6e,0x01,0x01,0x08,0x08,0x3d,0x8a,0xa1,0x71,0xdf,0x25,0x1a,0xef,0x60,0x86,0x6d,0x1c,0xd9,0x90,0x29,0x95,0xf2,0x4c,0x96,0xd3,0x17,0xe8,0x96,0x32,0x25,0x8c,0x65,0x38,0xbc,0x44,0x6a,0x5a,0xef,0x5a,0x72,0x12,0x43,0x2b,0xaf,0xc3,0xdc,0xb3 +.byte 0x6c,0x9f,0x57,0x61,0x2f,0x12,0x3f,0x72,0x16,0x4f,0x34,0xe3,0xb5,0xca,0x72,0xca,0x1c,0xdb,0xd2,0x8d,0x70,0x1f,0x19,0x75,0xb3,0x1b,0xdf,0xdb,0xb3,0xbf,0x6c,0x9a,0x70,0x64,0xa8,0xac,0x30,0x2d,0x4b,0x30,0xf5,0x4f,0x12,0x19,0xbd,0x65,0x25,0x70,0x33,0xe1,0x6f,0x18,0xdf,0x17,0xec,0xa3,0x80,0x51,0x6e,0xbb,0x33,0xa5,0xa8,0x58 +.byte 0x95,0x3c,0xab,0x86,0xd1,0x33,0xbe,0x55,0x04,0x8c,0x20,0x0d,0xfc,0x1a,0xa9,0x9d,0xb1,0x16,0x42,0x56,0x20,0xcc,0xa6,0x73,0xa0,0x85,0x3d,0xbf,0x1e,0xe0,0x01,0x51,0xd2,0xd7,0x2e,0x9d,0xd8,0x3c,0xea,0x03,0xf9,0x9a,0xbf,0x19,0x17,0x04,0x99,0xaf,0x8b,0xfc,0x9c,0x86,0xdf,0x58,0x78,0xfc,0x54,0x0d,0xac,0x26,0x27,0x2f,0x2e,0xbc +.byte 0xdd,0x4a,0xd5,0x6f,0x7c,0xd8,0x93,0xe3,0x51,0x9e,0xcc,0xc8,0xd2,0xfe,0x68,0xfb,0x5b,0x22,0xda,0xef,0x76,0xb9,0xc3,0xdd,0x13,0x52,0x24,0xb6,0x23,0x1f,0x69,0x22,0xb6,0xf5,0x86,0xff,0x2e,0x6e,0xd0,0xe0,0x21,0xbc,0x31,0x81,0xb5,0xc5,0xdb,0x36,0x58,0x44,0xe7,0xb8,0xf7,0xfd,0xd3,0x34,0xee,0xab,0xe6,0x99,0xf2,0x84,0x86,0x9b +.byte 0x67,0x45,0x08,0x07,0x66,0xae,0x6a,0x55,0xa2,0x74,0x46,0xda,0x02,0x82,0x67,0x93,0x60,0x64,0x5d,0x1f,0xac,0xe7,0x36,0xb6,0xcd,0x31,0x28,0x78,0x93,0xcd,0x54,0xe9,0x42,0xbb,0xb4,0xb3,0x15,0x72,0x12,0x31,0x85,0x15,0x68,0x3a,0x31,0x35,0xd6,0xc9,0x0d,0x3f,0xa0,0x4b,0x36,0x03,0xda,0xfd,0x7a,0xd6,0xce,0x0c,0xf5,0x14,0x23,0x71 +.byte 0x47,0x85,0x64,0xe7,0xe7,0x8b,0x8e,0x25,0x03,0x32,0x5f,0xa9,0x3b,0xdb,0x2b,0x27,0x7c,0x02,0xfb,0x79,0xd7,0x7a,0x76,0x75,0x69,0xfd,0x74,0x24,0xd2,0x72,0x8c,0xdd,0xc5,0xa1,0x45,0x90,0x50,0x65,0x95,0x41,0xae,0x7e,0x5c,0x83,0x3e,0x24,0x3c,0x02,0xa9,0x37,0x49,0x36,0x63,0x2f,0x18,0x92,0x3a,0x8a,0xe5,0x2a,0x6a,0x5c,0xa7,0x3e +.byte 0x98,0x24,0xfd,0xd9,0x3b,0x2d,0x4c,0xe2,0x8e,0x05,0x5b,0xdd,0x47,0x0f,0x19,0x5a,0x62,0x94,0xd6,0x6e,0x45,0xd8,0x99,0x43,0x78,0xa0,0xb1,0xdf,0x68,0x8a,0x56,0xa8,0xfb,0x2e,0x52,0x4e,0xfa,0x21,0xec,0x62,0x14,0xf5,0x90,0xdb,0x8c,0x02,0xa7,0xff,0x29,0x22,0xb8,0x40,0x87,0x58,0xda,0x4e,0xfd,0xab,0xeb,0xa2,0x40,0xce,0xfc,0x58 +.byte 0x46,0x37,0x3f,0x04,0x4e,0x36,0x76,0x44,0x3c,0xfc,0x54,0xb8,0x6f,0x4b,0x66,0x6a,0x4a,0x78,0x8f,0x33,0x86,0x07,0xe4,0x3c,0xb5,0x0f,0x86,0x2e,0x21,0x7e,0x44,0xce,0x18,0x77,0xe0,0xcc,0xd7,0x7f,0xc9,0xac,0xb7,0x2b,0x94,0xb5,0x91,0xcd,0x2c,0xfa,0xc7,0x98,0xbd,0xb0,0x2a,0x85,0x77,0xcf,0x82,0xd9,0xae,0x76,0x33,0x34,0xc0,0x9d +.byte 0x3a,0xbc,0x27,0xbc,0x97,0x25,0xf4,0xf1,0x43,0x53,0xac,0xf6,0xde,0xf5,0x1f,0xa6,0x6a,0xd5,0xe3,0x11,0x32,0x49,0x46,0x5b,0x56,0x68,0x07,0xdb,0x03,0xad,0xc2,0x35,0x16,0x8f,0x01,0xcc,0x8a,0xd2,0x0c,0x6b,0xb2,0x62,0x73,0x99,0xb5,0x74,0xf1,0x4b,0x2e,0xbc,0x8e,0xed,0xc0,0x55,0x56,0x40,0xae,0x24,0xf2,0x7e,0x1f,0xba,0x9d,0xc4 +.byte 0xd1,0x69,0xd3,0xba,0x21,0x83,0xf5,0xc4,0xbf,0x78,0x96,0x74,0xa1,0xd8,0x8c,0x35,0xba,0x9f,0xa0,0x0f,0xb5,0x6a,0xb2,0x72,0x52,0xfa,0x02,0x71,0xbb,0x79,0x61,0xbd,0xa9,0xee,0x22,0x7c,0xc5,0xac,0x6b,0x52,0x67,0xab,0xc4,0xd2,0x8d,0x26,0x1c,0x2b,0xaf,0x0c,0xa4,0xce,0xb5,0x11,0x99,0x4d,0x22,0x69,0x68,0xe0,0xc6,0x3e,0x84,0x3d +.byte 0xeb,0xad,0xc9,0x5b,0xb5,0xb4,0xba,0x06,0x9b,0x0a,0xb2,0x54,0x89,0xf2,0xb0,0x5f,0x41,0xb4,0x8b,0x21,0x31,0x29,0x94,0x52,0x1e,0xa7,0xc4,0xc2,0x97,0xb9,0x74,0x95,0xa3,0x30,0xfb,0x02,0x77,0x01,0x4f,0x32,0x03,0x34,0x8f,0x51,0x2d,0x10,0x61,0xee,0xc5,0x2f,0x89,0x42,0x3c,0xbe,0xed,0x66,0xa6,0x7a,0x10,0xc6,0x06,0x7e,0xb2,0x3d +.byte 0xf2,0xc9,0xd1,0x08,0x97,0x6c,0x6f,0x6d,0x06,0x9d,0x72,0xd0,0x5e,0x79,0x3b,0xa5,0xa5,0xd0,0xdc,0xc6,0xda,0x73,0xd2,0xf3,0x0a,0xfd,0x94,0xc2,0x9c,0x4b,0x85,0x38,0x8d,0xb2,0xfb,0x29,0xdd,0x90,0xc2,0xb7,0x8f,0x2c,0x52,0xa2,0x32,0x5e,0xa1,0x0f,0x62,0x38,0x58,0xfa,0x46,0x4e,0x87,0x4b,0xcf,0xc5,0xe9,0xfc,0xf2,0x97,0x62,0xdd +.byte 0x92,0xd2,0x41,0x7b,0xa2,0x2a,0xae,0x6e,0x4d,0xbc,0xef,0x43,0x18,0x6e,0xbb,0xe5,0x06,0x45,0x53,0xa1,0x00,0xef,0xf5,0x4b,0xad,0xbd,0xa5,0x2c,0x77,0x0a,0x37,0x04,0x22,0x95,0xeb,0x7b,0xc1,0x3c,0x20,0x0a,0x44,0xdf,0xa2,0x23,0xc9,0xfc,0x85,0xf3,0x5b,0x9b,0x0f,0x40,0x2a,0xe3,0xc7,0x5a,0xa1,0xf6,0xe4,0x39,0x2a,0xfe,0xd7,0xe7 +.byte 0x33,0xd8,0xbc,0xd6,0x1f,0xef,0xac,0xa9,0x3f,0x2d,0x55,0xb0,0x85,0x74,0xef,0xeb,0xcd,0x9b,0x23,0xa3,0xe6,0x19,0xde,0xea,0x7c,0x9c,0x83,0x48,0x4b,0x12,0xfd,0xe3,0xcb,0x1b,0x70,0x2d,0x9f,0x2c,0x13,0x82,0x87,0x68,0xca,0x60,0x5e,0xc0,0x2e,0x60,0xde,0xf2,0x6b,0x78,0x0a,0x63,0xaa,0x9c,0x9b,0x61,0x63,0xc7,0x0c,0x98,0x92,0x68 +.byte 0xc7,0x44,0x00,0x6a,0x76,0x43,0xa0,0x61,0x7c,0x37,0x62,0x1a,0xd4,0x9b,0x58,0x59,0xe5,0xae,0x78,0x79,0x80,0xf0,0x75,0x68,0x9e,0xab,0x02,0xb8,0x00,0xc5,0x33,0x0d,0xea,0xb1,0x91,0x0f,0x17,0x57,0x96,0x23,0x8d,0x36,0x4d,0x89,0x94,0x42,0xc9,0x61,0x6e,0xf6,0x9f,0x37,0xee,0xa5,0x4b,0x3d,0x06,0x08,0xee,0x9a,0x7c,0x73,0xa9,0x58 +.byte 0xcd,0xcb,0x78,0xa9,0x3d,0x5c,0x11,0x0e,0x5a,0xd9,0xb0,0x7b,0xc4,0x3e,0x83,0xdc,0xe2,0x11,0xe9,0x6d,0x8a,0x8b,0x24,0x28,0x1d,0x7e,0x45,0x1b,0x05,0x5a,0x6b,0x97,0x1c,0x25,0x15,0x84,0x5c,0x3f,0x95,0x44,0xd5,0x4f,0x3c,0x4b,0x52,0xb1,0x0b,0x6a,0xb3,0xae,0x4e,0x1b,0x12,0xcf,0x16,0x78,0xd7,0xcb,0x32,0x43,0x39,0x88,0xf4,0x5e +.byte 0x26,0x29,0xe7,0x93,0x08,0x19,0x14,0x88,0x8f,0x54,0x91,0x13,0xb6,0x57,0xd1,0x87,0xd4,0x9d,0xf7,0xec,0x9b,0x22,0x6b,0x91,0x79,0x9d,0x6c,0x32,0x47,0x4a,0x79,0x55,0x7d,0xac,0x87,0x98,0x59,0x97,0xa5,0x71,0xbc,0xbf,0x1b,0xf0,0x6f,0xbb,0x81,0x8e,0xc2,0xef,0x7c,0x63,0x2f,0x80,0x37,0xb6,0xc5,0xae,0x59,0x5e,0x57,0x5e,0x1f,0x3a +.byte 0xe5,0x6b,0x6b,0x5e,0xdb,0x8e,0xd2,0x87,0xf7,0x94,0x7b,0x11,0x0e,0x4b,0xa6,0x9f,0x49,0xc6,0x68,0xc7,0x52,0x5f,0x28,0x87,0x33,0x84,0x52,0x5f,0xc8,0x5f,0x81,0x85,0x10,0xe8,0x92,0xce,0x13,0x6c,0x01,0x28,0x5e,0x59,0x8f,0xbb,0xa9,0x9c,0xdc,0x85,0xd3,0x73,0xa0,0x5a,0xbf,0x5b,0x04,0x80,0x99,0x90,0xc8,0x16,0x44,0x0d,0x09,0x01 +.byte 0xcd,0x24,0xe7,0x59,0xe7,0x42,0xe0,0xdd,0x01,0x93,0x1f,0x9e,0x1f,0x36,0xdb,0xcd,0x49,0xdb,0xea,0xa9,0x63,0x71,0xb9,0x2c,0xcd,0xca,0x1a,0x64,0xe1,0x95,0xbe,0xe1,0x64,0x2e,0xc7,0x59,0x15,0x61,0xe1,0xf9,0x45,0x0f,0x2a,0x3a,0x85,0xf8,0x7c,0x06,0xae,0x53,0x84,0xd2,0xe7,0xee,0x8b,0xbf,0x7a,0x72,0xa3,0x57,0xf1,0xc2,0x12,0x40 +.byte 0x9c,0x93,0xe1,0x04,0x81,0xde,0xc6,0xa8,0xae,0x4f,0x5c,0x31,0x93,0xc7,0x11,0x1d,0x89,0x70,0x85,0xd5,0x6f,0xab,0x58,0x1f,0x3f,0x76,0x45,0x7e,0x19,0xd0,0x6c,0xc1,0x41,0xa9,0x64,0x0a,0x79,0xb5,0xe0,0x9e,0xbc,0x4f,0x10,0x0c,0xac,0xfc,0x54,0xad,0xcf,0xb8,0xd0,0xfd,0x9b,0xed,0xea,0x54,0x05,0xbf,0x4f,0x91,0xbd,0x16,0x4a,0x57 +.byte 0xa9,0xda,0x38,0xb9,0x40,0x0d,0x63,0x68,0x83,0x7d,0xec,0x1c,0xe6,0x7f,0x9c,0xec,0x16,0x4e,0x0b,0xd0,0x91,0xb4,0x2c,0x04,0x65,0xb8,0x12,0xdf,0x3f,0xff,0x6a,0x08,0x4e,0x65,0xdf,0x09,0xa5,0xea,0xb1,0xac,0xa9,0x67,0xd2,0xbb,0x73,0x51,0xd2,0x37,0x72,0xfc,0x3f,0x69,0xe2,0x3f,0x01,0x94,0x3a,0xf7,0x23,0x0e,0x5d,0x23,0x44,0x82 +.byte 0xc7,0x38,0x35,0x9f,0xfa,0x13,0x15,0x47,0x0d,0x18,0xab,0x02,0x39,0x6e,0xb2,0x7c,0x29,0x11,0x9a,0x5a,0x01,0x2d,0xb2,0x10,0xea,0x9d,0xb7,0x37,0x4b,0xf2,0x2b,0x76,0x22,0xf7,0xaf,0x8a,0x5f,0x1d,0x6b,0xb2,0x13,0x9e,0x84,0xf5,0xbc,0x6e,0xad,0x66,0x5c,0x1b,0x5d,0x12,0xb0,0xe1,0x48,0x94,0x83,0xa0,0x26,0x54,0xd2,0xfd,0x3c,0x8d +.byte 0x81,0xac,0x31,0x9a,0x15,0xc6,0xd8,0xd5,0x07,0x1b,0x21,0x3f,0x04,0x40,0x3a,0x60,0x80,0x5f,0x1f,0x42,0x3e,0xd7,0x2b,0x7a,0x5f,0x71,0x93,0xb4,0x9d,0xf0,0x8b,0x5e,0xf1,0xc6,0x19,0x0a,0xa9,0x43,0xac,0xb2,0xc1,0x73,0x0d,0x44,0x6a,0x92,0x22,0xd0,0xda,0x40,0x14,0x7d,0x88,0xd1,0x5e,0x10,0xc9,0xa4,0x4d,0xd8,0xe0,0x7d,0x74,0x1b +.byte 0x2b,0xcb,0x50,0x24,0xbd,0x50,0x4a,0xe4,0xed,0x0e,0xe8,0xc0,0x5b,0x50,0x6d,0xf5,0x68,0x59,0xd1,0xc3,0x6f,0x32,0x86,0x29,0xe0,0x32,0x3f,0x05,0x86,0xa2,0x7f,0x93,0xd8,0xb7,0x02,0x68,0xb3,0x16,0xaa,0x0c,0xd3,0x4d,0xec,0x9a,0x66,0x06,0x7c,0x74,0x35,0x6f,0xde,0x8b,0xd9,0xdb,0x79,0x0a,0x15,0x84,0xc4,0x63,0xba,0x42,0xa2,0x3c +.byte 0x29,0xc8,0x65,0xdc,0x06,0x60,0x0a,0x08,0x4e,0x80,0x33,0x5c,0xfa,0x4b,0x91,0xdb,0xf6,0x57,0xd6,0x25,0x7d,0x70,0x80,0x09,0xb2,0x27,0xdb,0x80,0x4c,0xa7,0xe8,0x35,0xf5,0x18,0x2d,0x10,0x62,0x22,0xf9,0xb1,0x22,0xf3,0x9b,0x74,0xa0,0xc5,0x25,0xd3,0x44,0xc9,0x27,0x7c,0xba,0x01,0xfe,0x32,0x23,0xf7,0x90,0x90,0xbc,0x0d,0xad,0x9e +.byte 0x22,0x77,0xc5,0xfb,0xf2,0x0e,0xda,0xe5,0x7c,0xb4,0xbb,0xed,0xd4,0xfd,0xb0,0xfb,0x4a,0x4c,0x2a,0x32,0x2d,0x81,0xcd,0xef,0x74,0x3c,0x6a,0x9a,0x0c,0x95,0x58,0x25,0xd0,0x3a,0xb4,0x84,0x8f,0xa5,0xef,0xad,0x91,0xd7,0x2d,0xae,0x61,0xaf,0x9d,0x3f,0x03,0xa8,0xab,0xa4,0x66,0xd4,0x73,0x3a,0x84,0x0d,0x4c,0x6a,0xca,0xbd,0x0c,0x3c +.byte 0xdc,0x1d,0x37,0xea,0xe6,0x5a,0x7f,0x15,0xbe,0x9d,0xc7,0xce,0xbd,0x46,0x97,0xd3,0x07,0x19,0x82,0xaf,0x58,0x39,0x39,0x95,0x5d,0x4b,0x8e,0x1b,0xe9,0xf1,0xf6,0xa9,0xb3,0xfc,0xe6,0xe0,0x68,0x2c,0xbb,0xfa,0xd9,0x9b,0xc1,0x69,0xf3,0x5a,0x8f,0x67,0xd5,0x9c,0x11,0x1e,0x02,0x20,0x20,0xfe,0x4b,0xc9,0x8b,0x62,0x17,0x9a,0xfa,0x47 +.byte 0x7f,0xa2,0x8b,0xc1,0x3b,0x02,0x78,0x38,0xff,0xce,0xe1,0x54,0x40,0x3f,0x27,0x5c,0x9d,0xdd,0x56,0x38,0x48,0xea,0x39,0xbe,0xa0,0x76,0x43,0x82,0xef,0x74,0x50,0xdf,0xda,0x4c,0xca,0x47,0x46,0x7e,0xc5,0xff,0xce,0x66,0xdf,0xeb,0x5b,0x6e,0x45,0x77,0x19,0xac,0x01,0x1f,0x20,0xa1,0xad,0x01,0x5f,0x87,0x3e,0x3a,0xd0,0x83,0x13,0x17 +.byte 0x53,0x40,0xfe,0x26,0x99,0x42,0xfa,0x54,0xa8,0x82,0x79,0xa7,0x44,0xd0,0x9e,0x59,0x64,0x77,0xec,0x70,0x0e,0xcd,0xb9,0xb1,0xc2,0xe2,0x39,0x93,0xb7,0xd1,0xd5,0x67,0x9f,0xb0,0x5b,0xd9,0x50,0x8b,0x17,0xec,0xbc,0x83,0x64,0x35,0xaa,0x43,0x3f,0x4c,0x8c,0x56,0x83,0x76,0xa2,0x72,0x30,0xe7,0xe8,0x9f,0x88,0x35,0x8e,0x8d,0x11,0x31 +.byte 0x8e,0xb5,0x71,0x75,0x31,0xc8,0x28,0x15,0x50,0xe6,0x0a,0x00,0x4d,0x75,0x51,0x7c,0x33,0x14,0x96,0xff,0xe8,0xf3,0xa0,0xb1,0x9c,0xeb,0x9d,0x8a,0x45,0xcf,0x62,0x82,0xeb,0xce,0xea,0xa5,0xb9,0x10,0x83,0x54,0x79,0xf8,0xcf,0x67,0x82,0x1d,0xea,0xce,0x86,0xcf,0xc3,0x94,0xf0,0xe8,0xf4,0x80,0x8b,0x84,0x96,0x06,0x2e,0xe4,0x58,0x21 +.byte 0x98,0x42,0x1a,0xb7,0x8c,0x5d,0x30,0x15,0x83,0xe8,0x17,0xd4,0xb8,0x7b,0x90,0x57,0x35,0x72,0x6d,0x1b,0x7c,0xc0,0x88,0x0a,0xa2,0xea,0xcd,0x58,0xcc,0xf1,0xb4,0x8b,0xcd,0x66,0x3c,0xa5,0xb0,0xd4,0xc9,0xcc,0x42,0x1d,0xef,0x3b,0x42,0x22,0x9b,0xfb,0x45,0x24,0xcc,0x66,0xd7,0x67,0x73,0xb2,0x12,0x03,0xf6,0xa3,0x06,0x61,0xe2,0xab +.byte 0x91,0x8e,0x33,0x0b,0x9f,0x6a,0x80,0x5e,0x0f,0x68,0x41,0x5a,0x7e,0xd8,0xe2,0x32,0x50,0xc2,0x88,0x60,0xca,0xe3,0x23,0x86,0xff,0xdc,0x0c,0x19,0xbb,0xba,0x01,0xa3,0x41,0x89,0xf0,0x79,0x55,0x79,0xa6,0xa4,0x66,0x7b,0x46,0xde,0xac,0xae,0xb1,0xde,0xe1,0x1e,0x8d,0x62,0xc1,0xd6,0xeb,0x39,0x2f,0x1d,0x50,0x27,0x53,0xc9,0xea,0xb6 +.byte 0xd3,0x91,0x9b,0xdd,0xc1,0x68,0x8c,0xb6,0xe1,0x5e,0x9f,0xea,0xbe,0x98,0x88,0xeb,0xa8,0x77,0xf6,0x69,0x64,0xab,0x99,0xf3,0x7a,0x08,0xff,0x8c,0xa6,0x17,0x1b,0x2e,0x6e,0xcc,0xd8,0x33,0x30,0xef,0x5a,0x86,0x07,0x49,0xa5,0x13,0x08,0xbc,0xd6,0x88,0x7e,0x19,0xe0,0x1c,0x23,0xa9,0xe5,0x0a,0xa7,0xaf,0x8a,0xe9,0x81,0x3f,0xd8,0x99 +.byte 0xa6,0x01,0x6b,0xec,0x14,0x08,0x90,0xb1,0x76,0x16,0x3a,0xcb,0x34,0x0b,0x91,0x26,0xe9,0xec,0xe5,0xbc,0xd6,0xdc,0xf0,0xa9,0xfd,0xf2,0xe9,0xcc,0xa1,0x9d,0x7f,0x32,0x0d,0x0a,0x2a,0x92,0xff,0xc4,0x38,0xf8,0x9e,0x31,0x78,0x47,0xbf,0x3f,0x27,0x71,0xe1,0x7a,0x33,0x48,0x91,0xe8,0x8e,0x1a,0x66,0xcf,0xa1,0x61,0xc2,0x62,0x30,0x7c +.byte 0x69,0x35,0x21,0x67,0x9b,0xa7,0x1c,0x72,0x06,0xd8,0x28,0x94,0x6e,0x6d,0xf0,0x22,0x85,0xb4,0x6c,0x89,0xe8,0x2e,0x3a,0xc5,0xdc,0xe3,0xe3,0x0c,0x8a,0xba,0x1c,0x57,0x86,0xef,0x55,0x6a,0x24,0x59,0x5e,0x6e,0x47,0xb8,0xad,0xc5,0x10,0xff,0xbe,0x2d,0x93,0x09,0xfe,0x17,0x03,0x16,0x4d,0x4a,0x9a,0x15,0x38,0x94,0x38,0x18,0x45,0xa7 +.byte 0xcf,0xe4,0x16,0xd3,0x26,0x72,0x49,0xe7,0x89,0x9a,0xb4,0xc7,0x78,0xc3,0x18,0x3b,0xc8,0x08,0x9d,0x66,0x0f,0x48,0xc8,0x23,0x91,0x57,0x61,0xf1,0xf3,0x01,0x3e,0x0a,0xa3,0x4c,0x6c,0x34,0x5b,0x98,0x40,0x47,0x42,0xc1,0xeb,0x58,0x58,0xff,0x1f,0x4b,0x5f,0xf1,0x29,0x2e,0x7e,0x76,0x15,0x56,0x17,0x9c,0xe7,0x55,0x09,0x22,0x0a,0xa2 +.byte 0xd8,0xbf,0xd9,0x44,0x49,0xa9,0x24,0xd7,0x4f,0x12,0x04,0xa2,0x18,0x1c,0xdc,0x54,0xc0,0x22,0x27,0x3c,0xeb,0x1f,0x02,0xae,0xb3,0x33,0xb2,0xa2,0x84,0x23,0x76,0xc6,0x2b,0x94,0x53,0xae,0x7b,0xee,0xbb,0x81,0x64,0x8a,0x3f,0xe0,0x75,0x6b,0x2c,0xd5,0x60,0xad,0x49,0x0c,0xf8,0x65,0x64,0x1a,0x83,0xc7,0xb9,0xd9,0x01,0x5b,0xde,0xb0 +.byte 0x76,0x9b,0x1c,0x0d,0x89,0x2d,0xd5,0x09,0xc7,0xa9,0xbb,0x0a,0x54,0x5c,0xd4,0x5b,0xbf,0xbc,0x5e,0x00,0x29,0x0b,0x30,0x19,0x73,0x66,0xfd,0x3f,0xdb,0xd4,0x1b,0xd4,0xc0,0x27,0xde,0x49,0x90,0x5f,0x65,0x87,0x3c,0xc4,0x43,0xd0,0x49,0x76,0x64,0x39,0x88,0xd7,0x0e,0xfc,0x27,0x52,0xb1,0x8d,0xd0,0x27,0x29,0x84,0xe3,0x49,0xb9,0x0c +.byte 0x2d,0x4e,0x73,0x95,0x57,0xa8,0x07,0xa0,0xe1,0x5b,0x5a,0xb6,0xbc,0xa1,0x7f,0xfd,0x4b,0x9c,0x4d,0x7d,0x0c,0x5c,0x4c,0x4b,0x42,0x70,0xc3,0x0a,0xc1,0x89,0x12,0xb5,0x46,0x04,0x3c,0x56,0x25,0xc6,0x8f,0x49,0x7d,0x3b,0xf1,0xcd,0xfc,0xb8,0xa6,0x66,0xb1,0xc2,0xa3,0xa7,0x98,0x93,0x0e,0xdb,0xcd,0xce,0xdf,0x7f,0x68,0x5e,0xea,0xf2 +.byte 0x85,0x61,0x8f,0xd6,0x23,0xb4,0x5f,0x2f,0xf8,0x78,0x47,0x15,0x59,0x2d,0xca,0x35,0x0f,0xf5,0x91,0x74,0x3b,0x32,0xe1,0xcf,0x54,0x1b,0xf4,0x9d,0xdb,0x20,0x5e,0xf8,0x71,0x10,0xa3,0x31,0xf1,0xb8,0x98,0x8d,0x76,0x70,0xce,0x4c,0xed,0xd3,0x81,0x6b,0xd5,0x8d,0x73,0x5f,0x8c,0x66,0x7c,0x87,0x73,0xfa,0x20,0xbe,0xcd,0xba,0x41,0x88 +.byte 0x46,0xc3,0x38,0xc0,0xd9,0x08,0x79,0x30,0xda,0x7f,0x2a,0xc0,0x72,0x47,0xb0,0xc9,0x41,0x68,0xb1,0xe8,0xb4,0x86,0xcb,0x5d,0xb0,0x5b,0x7a,0x26,0xfd,0xf2,0x1b,0x4e,0x1f,0x4c,0x6a,0x8a,0x84,0xd4,0x07,0x2f,0xf4,0x06,0x73,0x3d,0x1c,0x55,0x04,0x6a,0xa5,0x8a,0xbb,0xaa,0x8a,0x8d,0x8f,0x05,0xcc,0x63,0x04,0xe0,0xc6,0x6f,0x6b,0xf8 +.byte 0x24,0x56,0xbb,0x9d,0xa9,0xe5,0x4c,0xac,0x9d,0xbe,0xfd,0x70,0x9d,0x1f,0x98,0xc4,0xfc,0xdb,0x3c,0x45,0xe7,0xbb,0xea,0x51,0xb6,0x56,0xe0,0x2c,0xb2,0x77,0x1b,0x80,0x9b,0x43,0xa7,0xb2,0x9a,0x40,0x8f,0xdb,0x2d,0x51,0x7b,0x2c,0x89,0xfd,0x14,0xf5,0x77,0xbf,0x40,0x3d,0x32,0xe0,0x10,0x32,0xcd,0xc4,0x3f,0xe2,0xe8,0xb4,0xdf,0xc2 +.byte 0x43,0x7a,0x0b,0x17,0x72,0xa1,0x0e,0xd6,0x66,0x35,0x8f,0xf4,0x21,0xf1,0xe3,0x46,0x13,0xd7,0xcd,0xc7,0x7b,0xb4,0x9b,0x39,0x1e,0x33,0x3c,0x18,0x15,0x7a,0xea,0x77,0xc5,0x57,0x4d,0xf9,0x35,0x8a,0xc1,0xb5,0x78,0x5d,0xc3,0x3e,0xd5,0xfd,0xb5,0x50,0xee,0x44,0x24,0xa2,0x55,0xb6,0xd8,0x3d,0x5d,0x75,0x2a,0x26,0x37,0xe7,0x85,0xb3 +.byte 0xff,0x70,0x5d,0x99,0x8d,0x99,0xba,0x9d,0x09,0x97,0xf2,0x67,0xe5,0xa3,0x86,0x06,0x21,0xb4,0x03,0x9b,0x63,0x76,0x1f,0xf8,0x09,0xd8,0x4e,0x22,0xcb,0x48,0xcf,0x79,0x72,0xc9,0x3f,0x84,0x5e,0xb8,0x39,0x87,0x27,0x92,0x1e,0x59,0xdf,0xc2,0xe6,0xd2,0xc4,0x5f,0xad,0x6e,0x9c,0xa4,0xec,0xd5,0x7d,0xf6,0x2b,0x9b,0x93,0x56,0xcd,0xa3 +.byte 0xc5,0xfa,0x82,0x39,0x46,0x29,0x57,0x43,0x08,0xe2,0xe1,0x3e,0x80,0x3b,0x8e,0x08,0xe5,0xc5,0xfe,0x05,0x17,0xaf,0xe0,0xf0,0xb7,0x5b,0x34,0x33,0x59,0xfa,0x93,0xbf,0x6a,0xb3,0x6c,0xbc,0x99,0x62,0x34,0x2c,0xf2,0x3b,0x62,0xf2,0x1c,0x48,0x07,0xc9,0x60,0x03,0xa5,0xe1,0x66,0x8d,0x84,0x36,0xc7,0xf9,0xc6,0x3b,0xa9,0xee,0x0f,0x48 +.byte 0xff,0xff,0xad,0x95,0x21,0xb5,0x12,0x63,0x7d,0x0f,0x0d,0x09,0x63,0x51,0x64,0x69,0xb4,0x95,0xd3,0x25,0xf0,0x3b,0x6d,0xc4,0xdd,0x8c,0x80,0x0d,0x3b,0xd2,0x4b,0xe0,0x67,0xcb,0xcd,0x7d,0x2e,0xbd,0x61,0x4b,0x0c,0x32,0x1f,0xfd,0xd2,0x31,0xed,0xa8,0xaa,0x98,0xf4,0x85,0x21,0xbc,0x08,0x14,0x2f,0xbb,0xbf,0x01,0xba,0x24,0x5e,0x5c +.byte 0xf3,0x72,0xed,0x05,0xec,0xf3,0xd1,0x9b,0xb0,0x63,0x8a,0x14,0xd1,0x9e,0xae,0x9b,0xce,0x4d,0x6c,0xb6,0x7a,0x78,0x9e,0x1d,0xcd,0x1e,0x50,0x66,0x26,0x70,0x74,0x2b,0x43,0x6a,0xc7,0xd7,0xe9,0xa2,0xcf,0xf3,0x09,0x9a,0x81,0x80,0x04,0xb8,0x5a,0x4f,0x2e,0x10,0x35,0xb2,0xb0,0xc6,0x40,0x97,0xa5,0x6a,0x24,0x5a,0x6b,0x97,0xc7,0xc0 +.byte 0x24,0x50,0x8d,0x65,0x21,0x25,0xce,0xb9,0x19,0xfc,0x40,0x08,0xcf,0xfd,0x1c,0xc4,0x30,0xd4,0x06,0x70,0xac,0x8a,0x3c,0x3f,0xfc,0xc3,0xeb,0xdd,0x43,0x56,0x4a,0xf6,0x50,0x92,0x9d,0xce,0x9c,0xea,0x15,0xdd,0x7c,0x5e,0x40,0xf5,0x7e,0x41,0x70,0xdd,0xc7,0x62,0x21,0x5a,0x20,0xc8,0x71,0x10,0x97,0xd5,0x12,0xfa,0x31,0x96,0xfb,0x38 +.byte 0x17,0x66,0x73,0x32,0x7a,0x93,0xf0,0x82,0xb9,0xf1,0x24,0xc5,0x64,0x0b,0xa9,0x24,0x4a,0x47,0xac,0xfb,0xf1,0x55,0xd7,0xb3,0x9a,0x64,0x63,0x0b,0x2e,0x13,0x9e,0x1a,0xee,0x21,0xd0,0x70,0x5c,0x0c,0x25,0xe7,0x38,0x23,0xd7,0x2f,0x6a,0x20,0x59,0xef,0x70,0xb2,0x8e,0xb4,0x15,0xee,0x6f,0x70,0xd0,0x75,0x19,0x9d,0x42,0xa7,0x17,0xad +.byte 0x99,0xaa,0x0d,0xa3,0x87,0x3d,0xf1,0x7b,0x0e,0xfa,0x62,0x9a,0x20,0x64,0x17,0x64,0x07,0xc2,0x84,0x13,0xb2,0x59,0x81,0x66,0x45,0xab,0x47,0x6d,0xfc,0x7b,0x60,0x05,0xac,0x30,0xb2,0x86,0x7e,0x34,0x6b,0xaf,0x37,0x00,0xa6,0x47,0x4c,0xb9,0x10,0xbd,0x9e,0xce,0x47,0x9e,0xc2,0x0e,0xfd,0x47,0xfa,0xd8,0x08,0xd1,0xc2,0xaa,0x6d,0x8c +.byte 0x91,0x2c,0x18,0x32,0x52,0x84,0x47,0x71,0x3b,0xc9,0xa1,0xf5,0xfc,0x90,0xb8,0x79,0xbf,0xe5,0x59,0x1b,0x91,0x22,0xcb,0xd3,0x87,0x7e,0xd4,0xb5,0x33,0xb2,0xfc,0x7c,0xee,0x22,0xfb,0xe8,0xb0,0x3c,0xa7,0x8b,0x05,0xd7,0x7f,0x17,0x52,0xbe,0xb6,0xe0,0x1e,0x47,0xce,0xfd,0x79,0xdf,0x16,0x5f,0x01,0x70,0x0c,0x47,0x5a,0x01,0x96,0x08 +.byte 0x3e,0x9b,0xc4,0xb2,0x58,0x73,0xc4,0x38,0xd6,0xf2,0x1b,0x0a,0x2c,0xb9,0x2a,0x96,0xb5,0x89,0x2d,0x33,0xdf,0xa4,0x5f,0x24,0x1b,0x79,0x0e,0xb6,0x9f,0xec,0x46,0xd3,0x27,0x4a,0xc1,0x26,0x94,0x95,0x41,0xd5,0xb3,0x84,0x74,0x62,0x47,0xc5,0x4d,0xb4,0xe2,0xe7,0xdb,0xc3,0xc3,0x7b,0x33,0x2a,0xbf,0x69,0xf6,0x5e,0xdc,0xfe,0xa4,0x81 +.byte 0x91,0xf3,0xa8,0x26,0x82,0x44,0x37,0xea,0xe1,0x20,0xff,0x52,0x33,0x5b,0x0b,0x6f,0xf8,0x33,0x4e,0x02,0x4d,0x38,0x93,0xcd,0xc0,0xfc,0x73,0x1a,0xf9,0xf6,0x9f,0x53,0xfc,0xf7,0xe2,0x4b,0x25,0xdd,0xa7,0x4d,0x1e,0x5c,0x17,0xc3,0xa0,0x41,0x1d,0x67,0x45,0xff,0xcb,0x41,0x49,0xc4,0x18,0x68,0x7e,0x7f,0xb6,0x6f,0xdb,0xbc,0x73,0x2f +.byte 0xc7,0x9a,0x46,0x8c,0x0b,0x57,0xa3,0xd3,0x0a,0x34,0xb7,0x27,0x67,0xbb,0xe1,0x64,0xa7,0x7e,0x79,0xac,0x4f,0x09,0x54,0x9b,0x43,0x5e,0x9a,0x33,0x02,0x45,0xdc,0x85,0x0b,0x59,0x8d,0x78,0xe8,0xd8,0xb5,0xd3,0x31,0x9d,0x2a,0x60,0x5b,0x91,0xed,0xf1,0xf1,0x37,0x3f,0xdb,0xda,0xd6,0xd1,0x8f,0x14,0x7e,0xe1,0xfc,0x92,0x60,0xa5,0x33 +.byte 0x86,0xef,0x29,0xbf,0x94,0x84,0x2b,0x24,0x20,0xb4,0x5e,0x23,0x34,0x08,0x63,0xc9,0xe6,0x80,0xa0,0x27,0x27,0x2f,0xab,0xc0,0x52,0x44,0x66,0x29,0x32,0x2e,0x91,0x96,0x02,0x1c,0x3b,0xb4,0x6e,0x33,0x49,0x5b,0x60,0x6f,0x14,0x93,0x65,0x0d,0x97,0x01,0xfb,0xf9,0x42,0x74,0xb6,0x21,0xf7,0xc2,0x5d,0xbf,0x91,0x2b,0xf5,0xb1,0x4e,0xe2 +.byte 0xd6,0x24,0x57,0x41,0x7a,0xcb,0xdd,0xb6,0x96,0x8b,0xfc,0x42,0x19,0x21,0x7f,0x41,0x32,0x3d,0x69,0x9b,0xee,0xda,0x97,0x45,0x26,0x71,0x0d,0x12,0xf0,0x20,0x7f,0x44,0x0f,0x4c,0xd2,0xd3,0x34,0x93,0xc7,0xe5,0xe7,0x83,0x62,0x13,0x0b,0x7d,0xc6,0xe4,0xd2,0xae,0x53,0x2e,0xd1,0x18,0x81,0xd0,0x81,0xf6,0xc0,0x98,0xaf,0x1d,0xb2,0x8a +.byte 0xcb,0xd3,0xde,0x1d,0x53,0x71,0x92,0x0e,0x4b,0x8c,0x7c,0x8e,0x65,0xf6,0xe2,0xc2,0x5a,0x4f,0x8c,0x59,0x0f,0x35,0x5e,0xe4,0x43,0x50,0xab,0xb7,0xdd,0xfc,0x66,0xf9,0xb1,0x9b,0x6b,0x1b,0xaf,0x2e,0x85,0xe6,0x3e,0x4c,0xa2,0xd4,0x55,0x47,0xb9,0x66,0x66,0x7b,0xa3,0xb2,0xd5,0x8a,0x8e,0x88,0x0e,0xfb,0x4e,0xad,0xf4,0x39,0xd2,0xd6 +.byte 0x39,0xef,0xe0,0xee,0x0f,0xf3,0x94,0x47,0xa7,0x32,0x24,0x9a,0xb0,0x82,0x08,0x67,0x00,0x3f,0xe6,0x95,0x76,0x84,0x0a,0x5c,0xb7,0x74,0xc1,0x64,0x5e,0x7c,0xba,0x0b,0x2e,0x6f,0x26,0xc3,0x20,0x2e,0x95,0xc1,0xf0,0x8c,0x55,0x4a,0x45,0x26,0xe6,0xf3,0x55,0x78,0xbd,0xd4,0xdb,0x07,0xbd,0xff,0x61,0x51,0xde,0x7f,0xdb,0x56,0x73,0x6b +.byte 0x9c,0xa4,0xb0,0x72,0xa7,0xd0,0x93,0x4d,0x1d,0x3a,0x92,0x78,0xde,0x77,0x65,0xe8,0x07,0x41,0x92,0xc1,0xbb,0x69,0x79,0x20,0x43,0xab,0x21,0x2e,0x6d,0xdf,0x43,0xeb,0x73,0x49,0x12,0x1f,0x53,0x75,0x01,0xed,0xce,0xf4,0x05,0x05,0x2b,0xc7,0x2a,0x65,0x29,0xe8,0xcf,0x5b,0xf0,0xc1,0x5b,0xd8,0xa8,0xac,0xbb,0xe3,0xac,0x29,0x0a,0x90 +.byte 0x79,0x2f,0x5b,0x92,0x14,0xf2,0xc7,0x2d,0xe5,0x33,0x6e,0x5e,0x31,0xe2,0xab,0xdf,0x21,0x71,0x4a,0x44,0xaa,0xc6,0xe9,0xb8,0x51,0x1d,0xe2,0xf3,0x07,0x19,0xa1,0x98,0x9e,0x8a,0xed,0xe4,0x9e,0x52,0x16,0x1f,0x2f,0xd3,0x4c,0x97,0x1e,0x38,0x49,0x84,0x2e,0x45,0xb5,0x4b,0x4f,0xfe,0xdb,0x25,0x3e,0xa9,0x6e,0x7d,0x60,0x3b,0xa7,0x7e +.byte 0xda,0x32,0x1a,0xd6,0x04,0xbe,0x0c,0x92,0x4e,0x6d,0x85,0xf9,0x9c,0x26,0x9a,0x88,0xf5,0x50,0x95,0x7b,0x9e,0x43,0x07,0x97,0xd4,0xdb,0xa0,0x6e,0x30,0x5d,0x44,0xa9,0x41,0xc2,0xdf,0xdf,0x37,0x35,0xc4,0x85,0x83,0x08,0xea,0x22,0xfa,0xae,0xdd,0x95,0xe5,0x35,0x47,0x23,0x86,0x27,0xfa,0x71,0x88,0xa0,0x12,0x00,0xe0,0xa7,0xd1,0x1b +.byte 0x5e,0x78,0x6f,0x38,0x30,0xa9,0x80,0x75,0xd7,0x61,0xcc,0xfd,0x33,0xd2,0xb8,0xf8,0xd7,0x12,0xf5,0x03,0xf9,0x53,0x6d,0x3b,0x6b,0xff,0x24,0x0a,0x3b,0xe8,0x2a,0xe9,0xae,0xb7,0xc3,0xe3,0x0f,0x26,0x71,0x55,0xc5,0x03,0x60,0xf4,0x47,0x01,0xa3,0x69,0xb2,0x98,0x75,0x5b,0x90,0x4a,0xf9,0x61,0x49,0xd6,0xc4,0xdb,0xab,0x04,0x0c,0x47 +.byte 0x1e,0x31,0x75,0xfa,0xa2,0xc5,0xfa,0x66,0x0c,0x4a,0x93,0xa0,0xea,0x56,0xf9,0x49,0xd4,0xc7,0xcc,0x2c,0xe5,0xdc,0xab,0x61,0x8e,0x0c,0xf3,0x2f,0xb5,0x9f,0x36,0xa1,0x05,0xab,0xb6,0xbc,0x4a,0x6d,0x97,0xe7,0x19,0xe5,0xfe,0x92,0xa5,0x94,0xd5,0xc0,0xf5,0x31,0xf6,0x8a,0xf7,0x24,0x62,0xdd,0x56,0x12,0x84,0xf5,0xc6,0xa0,0x37,0xa3 +.byte 0xfc,0xbd,0x16,0x2a,0xa6,0x36,0x8e,0xd4,0x29,0xfe,0xc4,0xc5,0xcb,0xdd,0xdd,0x8b,0x7e,0xa6,0x9d,0x08,0x28,0x10,0x6b,0xff,0xd7,0x79,0x48,0x35,0x2f,0xbe,0x34,0x9a,0xfb,0xd0,0x7d,0x5c,0xad,0xf0,0xde,0x96,0xea,0x2d,0xc5,0x8b,0xa9,0x7a,0x8b,0xbe,0x97,0xde,0x7a,0x95,0xc7,0x95,0xd9,0x86,0xde,0x3c,0x8d,0x15,0x8e,0x45,0x69,0x27 +.byte 0xd4,0x27,0xa8,0xe3,0xa9,0x1e,0xa0,0x95,0x74,0xf1,0x8b,0xbe,0x3b,0xff,0xa3,0xf6,0x23,0x78,0xd9,0xbd,0xc2,0x44,0x3a,0x93,0xb5,0xa6,0x87,0x7c,0x65,0xd1,0xd8,0xd5,0x43,0x2a,0xb2,0xc8,0x65,0x86,0x83,0x06,0xf7,0x33,0x88,0x3b,0xc0,0x2c,0xb3,0x3b,0x23,0xa3,0x67,0x15,0x49,0x09,0x02,0xbb,0x11,0x08,0xe3,0x37,0x9a,0x9b,0x67,0x8e +.byte 0x63,0xc3,0x8b,0xff,0x21,0xa6,0xbe,0x3b,0xa6,0x57,0xc1,0x56,0x2a,0x02,0xdb,0x24,0x50,0x4a,0x4f,0x60,0x49,0x03,0xcf,0xba,0x55,0x1c,0x64,0xfe,0x0c,0x58,0xb4,0xb0,0x89,0x91,0xd5,0xbc,0xbc,0x85,0xe6,0x96,0x32,0x89,0x1f,0xa0,0x48,0xd1,0x6e,0xa7,0x03,0x86,0x8a,0xf2,0x5f,0xc3,0x5a,0x57,0x8a,0xa3,0x4a,0x61,0x90,0x18,0xb2,0x0d +.byte 0xc7,0x94,0xb9,0x3e,0x40,0x8b,0x1d,0x54,0xd0,0x4c,0xe7,0x2a,0xd5,0x85,0xa7,0x93,0x07,0x10,0x58,0xc4,0x8a,0x18,0x0a,0x49,0x30,0x87,0x93,0x0e,0xcf,0xc7,0x95,0x9f,0xd1,0x3f,0x9b,0x06,0xe3,0xf9,0x4f,0x16,0x58,0x04,0xb4,0xf0,0xf0,0xf3,0x3a,0xab,0x4a,0x35,0xf1,0xec,0x23,0x15,0x0c,0x24,0xba,0x90,0xdc,0xd1,0xfe,0x47,0xca,0xb2 +.byte 0x95,0x33,0x30,0x45,0xba,0x18,0x15,0xec,0x58,0x36,0x02,0xdf,0x28,0x09,0x74,0x4b,0x09,0x01,0x24,0x0f,0x00,0x7b,0xb3,0x65,0x45,0x42,0x63,0x15,0xf8,0x50,0x8b,0x4f,0x28,0x73,0x03,0x3a,0x31,0xe5,0x0d,0x56,0x8f,0x6b,0x4b,0x9e,0xda,0x71,0xee,0x68,0xba,0x85,0x81,0x3d,0x5d,0x74,0x5e,0xda,0x60,0x87,0xf4,0x5a,0x38,0xad,0xc5,0x3f +.byte 0xb5,0x15,0x02,0x59,0x1c,0xd2,0x93,0x66,0x54,0x65,0xf1,0xe7,0x9b,0xf0,0x30,0x2d,0x9e,0xba,0xc5,0x86,0xf4,0xf6,0xc7,0x92,0x73,0x12,0x3b,0x28,0x21,0x1b,0x3d,0x84,0xc0,0x1a,0x7d,0x35,0x8b,0xd4,0x35,0x39,0x35,0xa6,0x51,0xd9,0x19,0x8b,0x92,0xa3,0xea,0x8c,0x7e,0x25,0x05,0x1f,0x1d,0x8f,0x4d,0xba,0xdf,0x20,0x8c,0x8d,0xe2,0xac +.byte 0xdd,0x3d,0xf1,0x04,0x3f,0x77,0x4b,0x8f,0x39,0x7d,0x01,0xb7,0x71,0x4b,0x7b,0xe1,0x6f,0xd4,0x28,0x1a,0x57,0x96,0x4d,0xe2,0x84,0xf6,0x64,0x10,0xbb,0x0f,0xbc,0xe0,0x19,0xed,0x92,0x9e,0x60,0x15,0x78,0xd1,0x30,0xc0,0x53,0x4b,0x94,0xca,0x4b,0x5a,0x44,0x8b,0xa9,0xda,0x2f,0x08,0x70,0x94,0xe4,0x54,0xe1,0x28,0x6e,0xdd,0x34,0x56 +.byte 0x54,0xb0,0xd4,0x87,0x00,0x72,0x1e,0x46,0x10,0x3a,0x27,0x5d,0xc6,0xb5,0x72,0x20,0x2b,0xbe,0x17,0x01,0xbb,0x04,0x11,0x16,0x7d,0xbf,0x91,0xd3,0x7b,0x44,0x58,0x13,0x2a,0x9c,0xda,0x9d,0x26,0x46,0xf5,0x5f,0x51,0xef,0x6c,0xf6,0x36,0xdb,0xb7,0x21,0xde,0xdb,0x87,0xa0,0xd8,0x60,0x24,0x86,0x6d,0x64,0x85,0x9e,0x94,0xd9,0x21,0x0d +.byte 0xed,0xda,0x33,0xea,0x3c,0xdf,0x74,0xe3,0xa5,0xc7,0xc7,0x9e,0xe5,0xb1,0x29,0xdf,0xfa,0x20,0x25,0xcd,0x13,0x08,0xee,0xe6,0xba,0xf1,0x62,0x39,0xcf,0xe3,0x29,0xb8,0xaa,0x65,0x43,0x8a,0x48,0xb5,0xb5,0x70,0x35,0x66,0x42,0xf4,0x32,0x70,0x0b,0x0c,0xa7,0x46,0x79,0xdf,0xb2,0x80,0x13,0x72,0x7a,0xeb,0xf9,0x52,0xcb,0xb8,0x9f,0x4b +.byte 0x4f,0x29,0x2b,0xb3,0x94,0x02,0x0a,0xe1,0x20,0xe5,0x91,0x15,0x6a,0xa1,0x0c,0x71,0x96,0x77,0x01,0x80,0xf7,0x51,0x0b,0xaf,0x54,0x9b,0x3c,0x7b,0x91,0xd2,0xbd,0xaf,0x13,0xa5,0x32,0x17,0x7c,0xca,0xd0,0x22,0xd5,0xe5,0x83,0x44,0x24,0x5c,0xcc,0x24,0x31,0xcd,0x81,0x4e,0x96,0xcd,0x60,0x9f,0x7a,0xe7,0x2e,0x89,0x16,0xd5,0x66,0x6b +.byte 0xac,0x31,0x11,0x7c,0x76,0xc6,0xde,0xbe,0x46,0x55,0x20,0xdf,0x9d,0x2c,0x33,0xa5,0x80,0x76,0xb1,0xc9,0x1c,0x84,0x17,0x4d,0x15,0xe6,0x6d,0xce,0xed,0xea,0xc7,0xe6,0xff,0x01,0x10,0x60,0x26,0xf7,0x63,0x5f,0x91,0x89,0x7e,0xc1,0x7c,0x76,0x67,0x7b,0x7e,0xfa,0x28,0xa0,0xa7,0x82,0x1b,0x28,0x82,0x6a,0x4f,0x78,0x61,0x48,0xbf,0x13 +.byte 0x0b,0x71,0x0c,0xad,0xee,0xd7,0xf8,0xcc,0x0f,0x77,0x74,0x7d,0x2b,0x8a,0x09,0xd8,0x47,0xa0,0xfc,0x45,0x40,0x24,0xf3,0xce,0xdb,0x81,0xa1,0x50,0x9e,0x0a,0xd0,0x58,0xf7,0xaf,0xf1,0x09,0x12,0xa8,0x24,0xb2,0x34,0x99,0x67,0x17,0x53,0x1f,0x9d,0x09,0x7b,0xcb,0x83,0x6e,0x6a,0x0b,0xbf,0x8f,0x6e,0x3d,0xdb,0x29,0xe5,0xd0,0x06,0xdb +.byte 0xb8,0xf2,0xf3,0x43,0x4e,0xa7,0xf3,0x73,0x93,0xe8,0xab,0x2f,0xc8,0x75,0xce,0x62,0xda,0x74,0x39,0x57,0xe4,0xe4,0xb1,0x41,0x8f,0x9d,0xda,0x43,0xb4,0x2c,0x4b,0xd5,0x1c,0x10,0xf0,0x29,0x6b,0x94,0x15,0x04,0x3c,0xd3,0x45,0x73,0x29,0xb3,0x60,0x87,0x93,0xdb,0xbf,0x60,0x4e,0xdf,0x4d,0xbb,0xde,0xb2,0x57,0x67,0x14,0x0d,0x0b,0x60 +.byte 0x63,0xd5,0xc6,0x81,0x82,0xd6,0x0c,0xe6,0x4c,0x43,0x13,0x02,0x74,0x56,0x20,0x6b,0x21,0x28,0xe6,0xe2,0x0b,0xc1,0x7a,0xc3,0x08,0x60,0x82,0xe0,0x4f,0xbf,0x1e,0x3f,0xf0,0xa9,0xb2,0x2e,0x0c,0xbf,0xd6,0x03,0x1d,0x0d,0xd6,0x1c,0x36,0xb5,0xb2,0x14,0x56,0x21,0xc2,0xe0,0x1e,0xff,0xee,0x8a,0x70,0xae,0x3f,0x1e,0xe5,0xac,0x05,0x46 +.byte 0x6b,0x81,0x32,0xce,0x50,0xbb,0x82,0x66,0x32,0x93,0x46,0xf7,0xee,0x77,0x1c,0x9a,0x2f,0x31,0x60,0xa2,0x09,0x7c,0x14,0xd9,0x81,0xe9,0x19,0x27,0x31,0x5e,0xa0,0x98,0x71,0x42,0x2f,0x30,0x71,0xd6,0x31,0x94,0xe0,0x61,0xed,0x50,0x66,0xfa,0xba,0x12,0x5e,0xc6,0xc8,0x67,0xe5,0x8e,0xfd,0x34,0xa9,0xeb,0xde,0x25,0x43,0xbf,0xe7,0xb5 +.byte 0x16,0xf5,0x62,0x66,0x5d,0x0b,0x13,0x9a,0xd4,0x8c,0x2b,0x8f,0xe6,0x91,0x33,0xcb,0xa0,0x70,0x48,0x3e,0x22,0x7d,0xe4,0xf3,0x75,0xc9,0x49,0x82,0x50,0xc9,0x90,0x04,0x32,0xab,0x99,0x6e,0xf1,0xf0,0x0b,0x60,0x80,0x35,0x25,0x45,0x88,0xe9,0x82,0x06,0xe1,0xbb,0x85,0x11,0x40,0xf8,0x0e,0xbd,0x19,0x7a,0xdd,0x78,0xf9,0xc2,0x46,0xe4 +.byte 0xb5,0x27,0xfb,0xb6,0xba,0xbc,0x7d,0xb8,0x27,0xe7,0xbf,0xfe,0x8e,0xfe,0x7e,0x83,0x63,0x43,0x92,0x26,0xf0,0xbb,0xde,0xb6,0x93,0x4f,0x55,0x0c,0x07,0x99,0x3c,0x98,0xa1,0x8c,0x73,0xc1,0x4c,0x9a,0x09,0xa8,0xea,0x16,0x0b,0x49,0x2a,0x43,0xee,0x90,0x61,0x6f,0x09,0x1b,0xc3,0x2d,0x62,0x4b,0xfc,0x90,0xa1,0x8e,0x84,0x2e,0x90,0x8d +.byte 0x5f,0x80,0xff,0x6a,0x3c,0x61,0x0f,0xf2,0xac,0x70,0x20,0xc1,0xf2,0x85,0xcf,0x94,0xc8,0x94,0xe7,0xa0,0x04,0xdf,0xaf,0xef,0x26,0xd2,0xbc,0x07,0x70,0xc1,0x48,0xd6,0x87,0xd6,0xbe,0xea,0x95,0x6a,0xce,0xa2,0x48,0xac,0x46,0x46,0xb1,0x74,0x70,0x96,0x6c,0x26,0x58,0x75,0x9d,0x84,0xd7,0xd9,0x17,0x9a,0x46,0xe9,0xd7,0x3d,0xde,0xfd +.byte 0x7e,0xf4,0xd8,0x7e,0xf8,0x8f,0x1c,0xb5,0xfb,0xe9,0xc4,0xca,0xba,0x52,0x5f,0x17,0xee,0x75,0x7d,0x1d,0x50,0x16,0x9f,0x16,0x1e,0x00,0x8b,0xc1,0x2f,0xab,0x73,0x65,0x88,0x7b,0x80,0xa6,0x71,0xb7,0xfb,0xb0,0xda,0xd1,0x96,0x18,0x5c,0x48,0x6e,0x18,0x45,0x59,0x45,0xef,0x5c,0x65,0x35,0x99,0x5e,0xb9,0xd4,0x1a,0x07,0x7d,0x1e,0xa6 +.byte 0x69,0x42,0x9d,0xfa,0xec,0x02,0xdc,0xc4,0x19,0x6b,0x9c,0xb1,0x5e,0xa3,0xb4,0x6d,0xb4,0xa6,0x25,0xa8,0xe4,0x3f,0x3d,0x6e,0x2c,0x95,0xf7,0xcd,0xa5,0x4e,0x32,0xca,0x7e,0xe0,0x7b,0x11,0xf9,0x0a,0xe1,0x61,0x41,0x60,0xec,0xb3,0xb1,0x92,0x89,0x33,0x17,0xe9,0xaf,0x70,0x7f,0x1c,0x07,0xb5,0x24,0x3a,0x37,0x84,0x38,0xf5,0xb6,0x11 +.byte 0xfc,0x0c,0x12,0xc1,0xfc,0xa9,0x82,0x67,0x4d,0x17,0xe8,0xea,0xd0,0x62,0x17,0xb2,0x9c,0x59,0x01,0x87,0xfb,0x54,0x8e,0xa7,0xa5,0x85,0xa9,0x8a,0xec,0xfe,0x29,0xc0,0x73,0xc6,0xa0,0xbf,0x66,0x9a,0xc5,0xf8,0xee,0xa4,0xcb,0x09,0x44,0x74,0xfe,0x32,0xf5,0x42,0xea,0xf0,0xa6,0xec,0x74,0xea,0x14,0x5c,0x43,0x51,0xfa,0x3a,0x48,0x1e +.byte 0xa0,0x2e,0x59,0x2e,0xdb,0x3a,0x19,0xfe,0x1f,0x95,0x25,0xee,0x27,0x2b,0x99,0xb4,0xe1,0xd0,0xe6,0x33,0x91,0xa1,0xaf,0x30,0xa0,0x89,0x00,0x3c,0x13,0x31,0x18,0x70,0x90,0x42,0x55,0x0a,0xc9,0xc5,0x0c,0x43,0xa5,0xee,0xd6,0x90,0x07,0xae,0xc4,0x8c,0xdc,0xe4,0x07,0xbb,0x61,0x70,0xd1,0x10,0xe4,0x68,0x96,0x70,0x78,0xab,0xe9,0x3a +.byte 0x6e,0xc7,0x75,0x93,0xa0,0xba,0xff,0x6a,0x2d,0x57,0xaa,0x93,0x09,0xc3,0x6b,0x81,0xf3,0xde,0xc2,0xee,0xac,0x86,0x0a,0xfb,0xad,0xdb,0x6f,0x2a,0xa0,0x15,0x7b,0x96,0x77,0x38,0xf8,0x86,0x51,0x33,0x7a,0x6f,0x1c,0xf8,0xd5,0x15,0xcd,0x76,0x7f,0x37,0x68,0x82,0xdf,0xab,0xc3,0xdb,0xbe,0xeb,0x2b,0xa8,0x34,0x72,0x20,0x34,0xfb,0x12 +.byte 0x64,0x17,0x05,0x64,0xc0,0xa1,0xca,0xd3,0xac,0x27,0xc2,0x68,0x28,0x40,0x42,0xe2,0x0a,0xdd,0xd7,0xd6,0xf6,0x92,0x95,0x3c,0x10,0x17,0x4e,0xef,0x75,0xae,0x98,0x2d,0x10,0xc8,0xa8,0xac,0x15,0xf7,0x5b,0x81,0xc1,0xdf,0x5e,0xbe,0x88,0x49,0xe3,0xd1,0x88,0x1c,0xcb,0xce,0x20,0x01,0x12,0x60,0x57,0x0b,0xf6,0x32,0x57,0xaf,0x59,0xef +.byte 0xc9,0xe7,0xbf,0x62,0xf3,0xb6,0xe6,0x5c,0xee,0x36,0x7e,0x11,0x90,0xd1,0xeb,0xfa,0x62,0x0b,0xc6,0xf3,0x1a,0xd5,0x8b,0x95,0xec,0xb4,0x38,0xfe,0x45,0xb0,0xb5,0xff,0x84,0x0a,0x27,0x3a,0xa2,0x5a,0x2a,0xc9,0xa4,0xc0,0x11,0xc6,0x61,0x13,0xb7,0x53,0xa3,0x47,0x45,0x6d,0xc6,0xa9,0x00,0xd1,0x40,0xf4,0x77,0xac,0xb3,0xd3,0x26,0x99 +.byte 0xf1,0x36,0x59,0x28,0xb4,0xd0,0xdd,0x0e,0xed,0x53,0x33,0x45,0x71,0x9c,0x5c,0x11,0x27,0x2c,0x2f,0x10,0x9e,0x5b,0x8a,0x5b,0xc5,0x1f,0x36,0xc9,0x2a,0xba,0xc7,0xa5,0x31,0xd7,0x9f,0x2b,0x0a,0x09,0xcb,0x7c,0x4f,0xa2,0xdc,0xc5,0x64,0x0d,0xe6,0xfe,0xb0,0x9d,0x3b,0xf0,0xa7,0x19,0x8c,0x84,0x21,0x6b,0x9e,0x1c,0xb5,0x7b,0x66,0x77 +.byte 0xd0,0x85,0xb4,0x22,0x93,0x6e,0x84,0x29,0x9b,0x60,0x90,0x37,0x9d,0x8c,0x94,0x95,0x95,0x3b,0xf1,0x2d,0x56,0x5b,0x53,0x60,0x2d,0xe5,0x7f,0x80,0x71,0x56,0xa7,0x6e,0x66,0x76,0x1f,0xaa,0x0d,0xba,0xfb,0x0e,0xcf,0x20,0x68,0x74,0x2b,0x99,0x13,0xe1,0xa8,0x33,0xc9,0xf6,0xbc,0xd3,0xf4,0x46,0x01,0x02,0x85,0x27,0xf4,0x20,0x97,0xa3 +.byte 0xba,0xbc,0x47,0x30,0x48,0xed,0x60,0xe6,0xca,0xbf,0x76,0x8c,0x2c,0x6a,0x43,0x32,0xfd,0x90,0x04,0x95,0xc2,0x42,0xcb,0xca,0xc4,0x33,0xe1,0xd3,0x23,0x92,0xa1,0xde,0x09,0x38,0xce,0x00,0x93,0xb3,0xed,0x82,0x8e,0xfb,0xce,0x4c,0x9a,0x10,0x6e,0xce,0x4a,0x37,0x05,0x75,0x37,0x58,0xc3,0x8e,0x57,0x50,0xa0,0x7d,0x80,0x2d,0x51,0xea +.byte 0x08,0xcd,0x1b,0xd2,0x81,0x85,0x19,0xc1,0xe8,0xce,0x31,0x18,0xcf,0x54,0x37,0x96,0x77,0x3d,0x64,0xfb,0xc2,0xa9,0xdb,0xb8,0x37,0x03,0x83,0x34,0x3c,0x25,0x6a,0x22,0x33,0xfa,0x27,0x70,0xc7,0x0a,0x27,0x12,0x1e,0xb3,0xd0,0x59,0x6f,0xa3,0xc5,0x73,0x95,0x4c,0x1f,0xf1,0x3c,0xb3,0xc2,0xa2,0xc6,0x45,0x17,0x53,0xa8,0xfc,0x00,0xff +.byte 0x77,0x40,0x28,0xd2,0x53,0x90,0x92,0xe9,0x86,0x6c,0xa5,0x40,0xce,0xbc,0x79,0x6f,0x8f,0x12,0xef,0x1b,0x38,0x1f,0xb3,0x24,0xf0,0x75,0x17,0x20,0x9e,0x03,0x9c,0x2b,0x51,0x57,0x93,0x44,0xce,0x74,0xc9,0x12,0xe7,0xcb,0x2f,0x5e,0x1b,0x95,0xf2,0x4d,0x2e,0x51,0x8d,0x52,0xd5,0x21,0xe3,0x1b,0x33,0xe7,0xf2,0x18,0x61,0xa2,0x53,0xdb +.byte 0x73,0xaa,0x6a,0x6c,0xf9,0xf4,0xef,0x3d,0x40,0xa3,0x00,0x80,0x82,0xed,0xe6,0x66,0xd1,0xd6,0xe9,0x93,0xd8,0x92,0xfa,0xdf,0xf9,0x9c,0x7a,0xfb,0x2b,0xc7,0xa7,0x73,0x67,0x2b,0xed,0x76,0xb1,0x52,0xaa,0xcf,0x34,0x84,0xa1,0x6d,0x56,0x85,0xef,0xcb,0xbc,0xa3,0xc6,0xf3,0x5a,0x88,0x04,0xd5,0xd8,0xf1,0x7b,0xf8,0x11,0x6f,0xa0,0x44 +.byte 0xa5,0x0f,0x76,0xed,0xd7,0x98,0xe3,0xda,0xb8,0x1b,0xc7,0xe6,0x89,0x08,0x19,0x1f,0xf8,0xe3,0x32,0x32,0xa5,0x3c,0x71,0x9f,0x11,0xde,0x50,0x29,0xb0,0x54,0x7e,0x3b,0x5e,0xeb,0xf7,0xab,0xa8,0xa0,0x35,0x96,0xc7,0xc5,0xea,0x60,0xc0,0x37,0xca,0x61,0x55,0x96,0xac,0xb4,0xd0,0x29,0x9a,0x1a,0x3f,0x9e,0xf5,0xf5,0x3d,0xed,0xc5,0x7c +.byte 0x2c,0x9d,0x67,0xf8,0x4d,0x82,0x6e,0x2a,0x9a,0xfc,0x5f,0xdc,0x02,0xb0,0x3d,0xa5,0x1c,0x08,0x5d,0x4a,0xaa,0xd0,0x38,0xfb,0xbc,0xbb,0x7f,0x37,0xfb,0xec,0xc0,0x62,0x79,0xaa,0xde,0xfd,0x23,0x9c,0x4c,0x4a,0xe1,0x48,0x40,0x36,0xc0,0x0a,0x6f,0x43,0xb7,0xad,0x4c,0xf6,0x56,0xb5,0x44,0xf4,0x72,0xcd,0x13,0x10,0xea,0x0d,0x24,0xc1 +.byte 0xa9,0x36,0x3b,0x36,0xf2,0x6e,0xf9,0x0a,0x67,0xcd,0x02,0x67,0xb3,0x5c,0x63,0x3a,0x7c,0xc1,0x3b,0xf2,0x1d,0x3d,0xf1,0xff,0xbf,0xf7,0x97,0x9f,0x30,0x1f,0xaa,0xd8,0xdb,0x53,0x9b,0x0a,0xbd,0x38,0xd8,0xb6,0xf1,0x4a,0x78,0x1a,0xc2,0x46,0xd2,0x0c,0xa8,0xcd,0x7b,0x39,0xc7,0x42,0x55,0xc8,0x3e,0x02,0x1d,0xf4,0xad,0x55,0x01,0x6a +.byte 0x11,0x2d,0xfa,0x67,0x48,0xae,0x45,0x31,0x9b,0x09,0x7d,0xd9,0xdd,0xaf,0x5c,0xd5,0x40,0x51,0x2a,0xa1,0x0f,0xb3,0x6e,0xc2,0x94,0xfe,0xde,0x70,0xaf,0x6c,0xea,0x5f,0x7d,0x3c,0x72,0x85,0x86,0x24,0x20,0x0a,0x7a,0xe7,0x69,0x32,0x66,0x7d,0x34,0x13,0x60,0x62,0xc7,0x68,0x32,0xde,0x34,0x30,0x36,0xc8,0x8e,0xb7,0x13,0x66,0xf1,0xce +.byte 0x5f,0x7a,0x3a,0xfe,0x62,0xd6,0x72,0xb6,0x1b,0x80,0x43,0x8a,0x3e,0x13,0x15,0xe4,0x1c,0x7b,0x08,0x70,0x0b,0x6e,0xb3,0xfe,0x07,0x91,0x23,0x21,0x57,0x48,0xc6,0xa9,0xa3,0xa8,0xc7,0x19,0x89,0x8a,0x49,0x12,0x25,0x88,0xd2,0x11,0xa5,0xa8,0x9e,0x0e,0xa7,0x71,0xfe,0xaf,0x88,0xee,0xa7,0x1c,0x3b,0x27,0x27,0x7e,0x79,0x92,0xed,0x77 +.byte 0x74,0x65,0xbd,0x46,0x41,0x25,0xd9,0x8b,0x21,0x73,0x9f,0xaa,0x35,0xa0,0x22,0xb3,0xc8,0x71,0x28,0x72,0xd2,0xcb,0xf4,0x2a,0x06,0x0a,0x63,0x96,0x55,0x2e,0x83,0x0b,0xe8,0x07,0x99,0x9d,0x59,0xde,0xde,0x62,0xbd,0xb4,0x3e,0x70,0x15,0xed,0x95,0xa8,0x2f,0xb7,0xa2,0xb6,0x65,0x56,0x9d,0xe5,0x81,0xa0,0x05,0x5b,0xce,0x00,0xd4,0xb9 +.byte 0x28,0x5a,0xc1,0x9a,0x74,0xc6,0xd7,0x27,0xdd,0x7c,0xbe,0xe8,0x0d,0x47,0xfc,0x81,0x05,0x6b,0x4f,0x68,0xc7,0xcc,0x5d,0xd5,0x66,0x83,0x34,0x72,0x35,0xab,0x39,0x64,0x19,0x67,0xbd,0xff,0x15,0x44,0x20,0x18,0x2a,0xaf,0xbc,0x58,0x94,0xdb,0x18,0x50,0x55,0x11,0x6a,0xc4,0x1d,0xee,0xe2,0xe0,0x75,0x73,0xf1,0xa1,0x83,0xf4,0xcb,0x40 +.byte 0x96,0xf4,0x77,0x45,0x61,0x8b,0x1a,0x8c,0x0c,0xfc,0xd2,0x7e,0x0b,0x1e,0x18,0xd2,0x95,0xa5,0x4c,0x5b,0xd6,0x9d,0x40,0x8b,0xc0,0x51,0xe8,0x2d,0xe5,0x16,0xbf,0xd7,0x98,0x8a,0xa0,0x46,0x1f,0xc4,0xe9,0x12,0x31,0x40,0xc5,0x2d,0x59,0xf8,0x9b,0x5f,0xe3,0x3a,0x10,0xdf,0xda,0x72,0x9e,0xab,0x13,0x7b,0x8f,0xc8,0x52,0x9f,0x58,0x45 +.byte 0x7a,0xe6,0x3a,0xbb,0xdd,0x1d,0xc7,0x3b,0xc4,0x26,0xdc,0x99,0x29,0xf2,0x74,0x16,0x84,0xe9,0x8a,0x86,0xc0,0x1e,0x49,0x96,0x2f,0x5c,0x2a,0x49,0x71,0x88,0xe6,0x82,0xb2,0x18,0x88,0xc1,0x86,0xcb,0x26,0x3c,0xa5,0x50,0x31,0x22,0x9a,0x8f,0x45,0x2b,0xde,0xf0,0x86,0x8e,0x13,0x86,0xc4,0x4a,0x9b,0x35,0x27,0x93,0x0b,0x13,0xc8,0xef +.byte 0x96,0x74,0x97,0x85,0x09,0xc0,0xa0,0x32,0xfe,0xc3,0xe3,0x92,0x2e,0xe8,0x54,0xbd,0xc2,0x23,0xeb,0x4b,0x02,0xf5,0x5a,0x0b,0x0d,0x58,0x50,0x45,0xe7,0x01,0xd4,0x17,0x00,0xdb,0x0d,0xd4,0x2e,0xa0,0xde,0x38,0xf4,0xb1,0x1e,0xd0,0xf0,0xa3,0x6b,0x21,0x0c,0xbd,0xae,0x84,0x7e,0x42,0x36,0x4f,0x2e,0x46,0xae,0x23,0x91,0xb9,0x06,0xac +.byte 0x86,0x7f,0x29,0xca,0xfb,0xe9,0xde,0xdb,0x90,0xfe,0x6f,0xbc,0xdb,0x3c,0x48,0x3d,0x6e,0x06,0x68,0x49,0xbb,0x43,0x8d,0x9d,0xc4,0x5f,0x45,0xcb,0x77,0x28,0xe0,0x35,0xd1,0xb4,0x25,0xb2,0x45,0x6d,0xb4,0x89,0x53,0x26,0x33,0x98,0x83,0x45,0x9d,0xf5,0xad,0xf9,0xa7,0x59,0xb6,0x6e,0xa8,0x25,0xa5,0xef,0xee,0xf6,0x6a,0xd5,0x6c,0x60 +.byte 0x9a,0xea,0x78,0x9e,0xe4,0xa2,0x29,0x0b,0x70,0xb3,0x6e,0x3a,0xfd,0x07,0xc7,0x7f,0x1b,0x07,0xc7,0xca,0x1b,0xb8,0x08,0xe1,0xc9,0x94,0xb2,0x62,0x7c,0x04,0x96,0xa6,0xda,0x65,0x28,0xfd,0xf9,0x70,0x22,0xb7,0x21,0xd3,0xa6,0x38,0x0f,0x1e,0x88,0x7e,0x73,0xec,0x04,0x99,0x8b,0x23,0x91,0x13,0xe6,0x4f,0x74,0x81,0xcc,0x1f,0xdd,0xaf +.byte 0x58,0xc4,0x80,0x00,0x4d,0x1d,0xbe,0x84,0x7d,0xfe,0x85,0xe7,0x77,0x20,0x3c,0x65,0x4e,0x0e,0x2e,0x5d,0xc1,0xd9,0xcb,0xf7,0xbb,0xc8,0x8d,0xbf,0x16,0xa8,0x1e,0x63,0xf5,0x10,0x5e,0xa5,0x9c,0x63,0xb6,0x9a,0xeb,0x98,0xa8,0xb1,0x59,0x82,0x66,0x51,0xae,0x3c,0xfc,0xa8,0x11,0x92,0xf4,0x45,0x88,0x7c,0x03,0x6f,0xe6,0x87,0xe4,0xa8 +.byte 0x79,0xbf,0xb3,0x0d,0xd6,0x0b,0x8d,0xa3,0x16,0x2a,0xfb,0x79,0xb9,0xe7,0xdb,0xa7,0xdb,0x94,0xd3,0xe6,0x3a,0xdd,0xe9,0x5f,0x30,0x7d,0x68,0x90,0x35,0xfd,0x18,0x91,0x8e,0xc5,0x12,0xd6,0xf9,0x98,0xa0,0x5b,0xcd,0x81,0x76,0x84,0x08,0xd0,0xab,0x59,0x2d,0x3b,0x8a,0xf9,0xd9,0x95,0xde,0x8b,0xbb,0x92,0xef,0x35,0xc3,0x3e,0x46,0x73 +.byte 0xf3,0x3b,0x09,0xbf,0x22,0x2b,0x9c,0x0f,0x70,0x9a,0x16,0x0e,0x4b,0xa7,0x1a,0x96,0x98,0xb7,0x5a,0x40,0x06,0x81,0xf4,0xac,0xa6,0xe6,0xab,0xf2,0xda,0x87,0x18,0x61,0xcb,0xc1,0x67,0xbd,0x2f,0x6f,0x06,0x21,0xaf,0x73,0x98,0xe1,0x3f,0x7a,0x17,0x7f,0x44,0xcb,0x1d,0xdd,0x60,0xb3,0x2c,0x58,0x20,0x8a,0x04,0x74,0x56,0x9b,0x26,0x51 +.byte 0x61,0xb0,0x07,0x50,0x53,0x83,0x31,0x42,0x59,0xb3,0x33,0xfa,0xfe,0xbc,0xad,0x7f,0x99,0x9b,0x86,0xf1,0xaa,0x85,0xf1,0xbb,0xc0,0x0c,0x91,0x8d,0x1a,0x0f,0x8f,0x9f,0xfe,0x62,0x2b,0x35,0xae,0xcc,0x8c,0x09,0xe3,0x29,0x96,0xd1,0xbe,0x7f,0x25,0xd6,0x03,0xf0,0x4c,0x53,0xad,0x5b,0x56,0x66,0x68,0x9a,0xa3,0xc4,0x07,0x71,0xde,0x49 +.byte 0x82,0xbb,0xf7,0x9a,0x2b,0x96,0xcf,0x50,0xf6,0x00,0xf7,0x0b,0x27,0xdd,0xf5,0xf6,0xc5,0xc8,0xbd,0x2a,0xa2,0x06,0x2c,0x42,0x3f,0xa0,0xf8,0xcc,0x1d,0x64,0xcf,0xbc,0xb4,0xc4,0x63,0xde,0x6b,0xd3,0xb4,0x61,0xdf,0xbd,0x73,0x50,0x34,0xc3,0x20,0x45,0x06,0x73,0x9b,0xf0,0xfb,0xa6,0x2b,0xec,0x92,0x32,0xa9,0x1f,0x4f,0x1e,0x38,0x78 +.byte 0x2a,0xd2,0x7c,0x1d,0x89,0xf9,0x70,0xbc,0xef,0x09,0x77,0xd3,0x6a,0x56,0xa1,0x8b,0x4b,0x23,0x1b,0xb1,0x2f,0xec,0x84,0xe5,0x59,0xc5,0x20,0x23,0xbc,0x3f,0x0a,0x43,0x97,0x1c,0x5e,0xf7,0xee,0xfe,0x0b,0x2a,0x42,0x08,0x2a,0x39,0x91,0xce,0x8a,0x33,0x9f,0x63,0x77,0x6d,0xf6,0xf3,0x0e,0x1d,0xb3,0xfb,0xcf,0x2f,0x7f,0x95,0xc2,0x71 +.byte 0x1c,0xa0,0x0b,0xc6,0xb8,0xde,0x4d,0xd8,0xcc,0x4c,0x4f,0xaf,0x07,0x87,0x6d,0x3b,0xab,0x95,0xab,0xa1,0x6a,0x50,0x9f,0x7c,0x35,0xb6,0x65,0xdd,0xe3,0x06,0xe5,0xb3,0x42,0x5f,0x4d,0xe5,0x3e,0xfa,0x6c,0xdf,0x19,0x58,0xd1,0xf6,0xc6,0x94,0x1c,0xce,0x30,0x90,0xd3,0xeb,0xa3,0x7c,0xe5,0x3f,0x57,0x99,0x2e,0x22,0x0a,0x94,0x2f,0xfe +.byte 0x39,0x16,0xe6,0xfa,0xd0,0xb5,0xf9,0xb4,0x88,0x61,0xa4,0xa8,0xc3,0xb8,0xb7,0x52,0xaf,0x90,0xc1,0xe0,0x19,0x78,0x04,0x2b,0x71,0x04,0x03,0x2f,0x63,0xbe,0x40,0xf5,0x82,0x3b,0x1b,0x6b,0xde,0x6d,0x1e,0x86,0x87,0x82,0xc3,0x31,0x97,0x20,0xdd,0xdd,0xce,0x61,0x64,0x99,0xf6,0xbe,0xbf,0xec,0x37,0x54,0x8b,0x92,0x29,0xda,0xc5,0x7b +.byte 0x4d,0xc5,0xaf,0xb8,0x4e,0x4b,0x4a,0x2b,0x35,0x30,0xf5,0x19,0x9e,0x32,0xd8,0x2e,0xc1,0x19,0xfe,0xd1,0x61,0xb0,0xaa,0x05,0x58,0x15,0xd9,0x0e,0x4e,0xca,0x4e,0x10,0x83,0xe6,0xe6,0x57,0xe8,0x8d,0x13,0xb4,0x6f,0x85,0x59,0xf2,0x83,0xc8,0x37,0xaa,0xa2,0xe5,0xc8,0x77,0x06,0x82,0x21,0x5d,0x84,0x58,0x67,0x9b,0xcc,0x9c,0xfc,0x1b +.byte 0x28,0x2f,0xac,0xc8,0x96,0x91,0x26,0x46,0x42,0x2b,0x68,0x57,0xb0,0x79,0x1e,0xb1,0x9b,0x92,0x2c,0xeb,0x67,0x00,0xd4,0x26,0x7d,0xca,0x45,0x97,0x55,0xea,0x2a,0x20,0x70,0x7c,0x20,0x14,0x38,0x40,0x3d,0x4f,0xf5,0x3a,0x1f,0x0a,0xe3,0x9a,0x48,0xcc,0xb2,0x7d,0xee,0x5b,0x48,0x90,0x0d,0x12,0x77,0xd8,0xd3,0xb6,0xd7,0x66,0x9e,0x48 +.byte 0xbb,0x92,0xc1,0x7c,0x4e,0x90,0x4d,0xd5,0x96,0x99,0xea,0x86,0x2d,0xb9,0x5a,0x50,0x05,0xc2,0x6b,0xa7,0x0c,0x43,0x44,0x22,0x09,0xb9,0xc0,0x56,0x47,0x5f,0xdf,0xaf,0x6b,0x91,0xe2,0xd7,0x45,0x77,0x17,0x7a,0x71,0x6d,0x27,0x93,0xe2,0xc6,0x10,0x2f,0xc8,0x3b,0x75,0x78,0x11,0xae,0x07,0xe6,0xba,0x64,0xd4,0x06,0xfa,0xf9,0x1d,0x74 +.byte 0x9e,0x4f,0x6d,0x02,0xfc,0x40,0x80,0x9a,0x2e,0xd4,0x15,0x32,0x15,0xe8,0x97,0x0a,0xd4,0x65,0x6a,0x87,0xd3,0x66,0x4b,0xb8,0x66,0x84,0x8e,0xb9,0x4b,0xa7,0xcf,0x58,0x13,0x66,0x3a,0x4e,0xa5,0x76,0x17,0x13,0x92,0x79,0x42,0x67,0x6d,0xb6,0x65,0xec,0xc8,0xb5,0x5f,0x17,0x2a,0x2d,0x4b,0x19,0xe9,0x00,0x6e,0x38,0xaf,0xe9,0x06,0xb6 +.byte 0xe8,0x99,0x69,0x8a,0x74,0xe7,0x7e,0x70,0x69,0x4b,0xbc,0xce,0x5d,0x61,0x94,0x1b,0x47,0x41,0x38,0x5f,0x2e,0xcf,0x2b,0xe1,0xcd,0xa3,0x98,0x71,0xf7,0x09,0x65,0xfe,0x5f,0x62,0x4b,0x9e,0x91,0x88,0x35,0xa2,0x66,0x02,0x1d,0xc9,0x93,0x0c,0x19,0x50,0x4b,0x95,0x71,0x79,0xdd,0x74,0xe1,0xda,0x5a,0xb7,0x38,0x70,0x61,0x18,0x3f,0x68 +.byte 0x08,0x34,0xd8,0xfe,0xbb,0xd1,0xbf,0x57,0xed,0xc2,0x52,0x6d,0x54,0x3e,0xcb,0x0c,0x32,0xc7,0x09,0xa9,0x31,0x10,0xe8,0xbd,0x70,0xe3,0x0e,0xe9,0x4f,0x7a,0xd6,0x42,0x45,0x2e,0x1b,0x3c,0x0d,0x15,0x6d,0xb4,0xad,0xe9,0xc5,0xa2,0x12,0x77,0x34,0x43,0x20,0x95,0xc1,0xb7,0x51,0x72,0xed,0x78,0xa0,0xae,0x3c,0xae,0xb4,0xd4,0xda,0x58 +.byte 0x83,0x62,0xa9,0xc6,0x01,0x3d,0x14,0x19,0x07,0x00,0x3c,0x82,0x16,0x7e,0x8a,0x91,0x78,0xa1,0x65,0x0b,0x5b,0x3a,0x40,0x72,0xe5,0xf0,0xd4,0x82,0x04,0xe4,0x01,0xf1,0x84,0x87,0x96,0x26,0x91,0x66,0x77,0xf7,0x59,0xd6,0xc2,0xca,0x29,0x3b,0x68,0x2a,0x27,0x99,0x64,0x86,0xc2,0x96,0xbf,0x11,0x3c,0xa8,0x0c,0xf7,0x86,0xb8,0xc1,0x40 +.byte 0x15,0x1a,0x84,0xe3,0x93,0x23,0x73,0xa9,0x8b,0xbd,0xb4,0x8a,0xe4,0xf1,0xa5,0x8f,0x56,0xa3,0xdc,0x77,0xbd,0x7d,0x15,0x74,0x2b,0x18,0x92,0x56,0x45,0xbc,0xaf,0xf2,0x55,0xce,0x9d,0xc2,0xab,0x39,0x90,0xec,0x78,0x3f,0xa5,0x14,0xeb,0x40,0x2f,0x01,0xca,0xeb,0xad,0x73,0x85,0xbc,0xe1,0x91,0xaa,0x77,0xa9,0x6c,0x02,0x66,0x6a,0x65 +.byte 0x63,0x6c,0x50,0x62,0x83,0x83,0xef,0x16,0x4f,0x21,0xfd,0x28,0x8e,0x52,0x66,0x5b,0x6f,0x8f,0xbe,0x8d,0x17,0xb9,0xd5,0x99,0xf7,0x39,0xd1,0xbc,0xa2,0x43,0xd7,0x0a,0x80,0xea,0x42,0xf8,0x38,0x53,0x95,0x07,0x6f,0xb7,0x7c,0xc1,0x16,0x88,0xc8,0xb7,0x59,0xde,0x76,0x51,0x2f,0x92,0xd0,0x40,0xfd,0xd9,0x2d,0xca,0x9e,0x8d,0x28,0xae +.byte 0x48,0xc1,0x0a,0xe0,0x76,0x9c,0x02,0x0b,0xc5,0xd1,0xf9,0x83,0x90,0x86,0xa4,0xeb,0x5c,0x64,0x65,0xf8,0x98,0x38,0xc5,0xce,0xef,0x6f,0xc3,0x88,0xb6,0x2f,0x8a,0x40,0x55,0x52,0x47,0x06,0x75,0x16,0x46,0x9c,0xff,0x3c,0x68,0x97,0xc3,0xfb,0x10,0x11,0x7b,0xba,0x04,0xcc,0xad,0xba,0xcf,0xf0,0xae,0xba,0xe6,0x59,0x9c,0xf5,0x27,0xeb +.byte 0xdd,0x5c,0x86,0x25,0xa1,0xb6,0xb8,0x1c,0x94,0x98,0xa5,0x79,0x82,0x4e,0xdf,0x09,0x3f,0x2f,0x8a,0x4e,0x1b,0x5a,0xab,0xd4,0xe6,0x21,0xb3,0x02,0x19,0x39,0xa9,0x2e,0x0e,0xae,0x86,0x30,0xc7,0xa0,0x00,0xed,0x72,0xdc,0x71,0x77,0x42,0x76,0x54,0x68,0xb2,0x8d,0x5d,0xc3,0x5c,0x86,0xf8,0xb1,0x6c,0x67,0xdf,0x24,0x40,0x6a,0x2b,0x1d +.byte 0xbc,0x0d,0x25,0x7d,0x9e,0x1c,0xbd,0x18,0x85,0xda,0x7a,0x86,0x5e,0xed,0x10,0x80,0x83,0xa6,0xef,0x1e,0x93,0xac,0xce,0xe6,0x32,0x35,0xdf,0xb8,0xc7,0x9b,0xf0,0x0f,0x9d,0x37,0xbd,0xd9,0x58,0x33,0x19,0xa1,0x23,0x51,0x5f,0xa7,0x5a,0x99,0x7e,0x2a,0xfd,0x85,0x3c,0x26,0xad,0xcc,0x7e,0x07,0x32,0x7b,0x24,0x5a,0x6b,0x4b,0x71,0x4e +.byte 0xca,0x8b,0xc4,0x03,0x26,0x76,0x02,0x68,0x0d,0xa1,0x09,0xe0,0x2e,0xa4,0x82,0x88,0x05,0x5a,0xc4,0xcb,0x31,0x9d,0x56,0xda,0x0d,0x00,0x04,0xbc,0x07,0xca,0x1f,0xdf,0x9e,0x44,0xed,0x36,0xbd,0xa0,0x22,0xff,0x78,0xd1,0xcb,0x62,0xe0,0x0d,0x2e,0xdc,0x2e,0x36,0x28,0x8e,0xd3,0xa9,0xe0,0x38,0xd4,0xc5,0x2b,0xee,0xaf,0xa4,0x08,0x7d +.byte 0xed,0x2c,0x8a,0xf5,0x86,0x5e,0xed,0x2a,0x0d,0xbf,0xe6,0xfb,0x6f,0xc4,0x02,0x75,0x36,0xe5,0x7b,0xe9,0x4a,0xb3,0xf1,0xf4,0x86,0x6c,0x9a,0x6e,0xaa,0x7a,0xbe,0x4b,0xd6,0xf2,0x6b,0xcb,0x78,0x6f,0xf9,0x42,0x1a,0x19,0x7b,0x7e,0xba,0x59,0x02,0x8b,0xe3,0x5c,0x44,0xa4,0x84,0xa8,0x4a,0x67,0x93,0xee,0xc4,0x17,0x07,0x26,0xfe,0x86 +.byte 0xf1,0xc6,0xba,0xbf,0xc4,0x3d,0x33,0x41,0x4d,0xc4,0xf0,0xa8,0x6d,0xe1,0x06,0x16,0x2d,0xc9,0x5d,0x2a,0xf5,0x4a,0xc6,0xd2,0x8c,0x98,0x55,0xe8,0x8d,0xd0,0x31,0x5f,0xc7,0x05,0xd1,0xca,0xd2,0x72,0xe6,0xd0,0xcb,0x62,0x79,0xac,0x60,0x59,0x94,0x59,0x48,0x9e,0x91,0x17,0xa7,0xa0,0xac,0x4a,0xe5,0x08,0xe5,0x52,0xa4,0xd4,0x83,0x8c +.byte 0x83,0x57,0xe7,0xe5,0xfc,0x9b,0x43,0x78,0xc8,0x7e,0x94,0xc4,0x35,0x3e,0xac,0x4a,0x8d,0x60,0x80,0xdc,0x72,0xe3,0x15,0x09,0x2a,0xbd,0xcc,0x9a,0xe4,0x1a,0x18,0xa8,0xf1,0x29,0x9b,0xca,0x58,0x0b,0x6d,0x7b,0x33,0x91,0x05,0x27,0x6a,0x48,0xbe,0xac,0x08,0xa5,0x2a,0x64,0xf5,0xae,0x2a,0x90,0xf1,0x2d,0x3f,0xa8,0xff,0x17,0x92,0xc4 +.byte 0xec,0x3a,0x09,0xbf,0xae,0xd3,0xe2,0x1c,0x3c,0xc8,0x6f,0x91,0x72,0x99,0xe3,0x82,0x30,0x4f,0x40,0x5c,0x0c,0x8d,0xfd,0xbe,0x10,0xbc,0xce,0x1e,0x0a,0x09,0xbf,0xde,0xdc,0x72,0x7e,0x4c,0xbc,0xec,0x34,0xe2,0x96,0x8a,0xc6,0xee,0x19,0x6c,0xa8,0xf1,0xa5,0xb2,0x71,0x88,0x13,0xe8,0x11,0xda,0x3b,0x77,0x10,0x9c,0x9f,0x74,0x49,0x21 +.byte 0x16,0xcf,0x6f,0x05,0xc5,0xc1,0x4d,0xfe,0xe7,0x4d,0x67,0xe8,0x12,0x14,0xf7,0xaf,0x66,0x8d,0x55,0x34,0x00,0x18,0x10,0x6e,0x6a,0xd2,0x4c,0xd9,0xd3,0x15,0x40,0xbf,0xce,0x7b,0x10,0x69,0xbd,0x15,0x0e,0x60,0x2b,0x76,0x50,0x80,0x92,0x02,0x3c,0x0f,0xea,0x47,0x03,0xd9,0xf6,0x2c,0x00,0xde,0x29,0xb9,0x2e,0xf6,0x80,0x10,0x81,0x28 +.byte 0x6f,0x41,0xfc,0x88,0x65,0xe9,0xb5,0xd4,0x78,0x53,0xff,0x04,0xc4,0xdd,0xd7,0x35,0x34,0x59,0x85,0x33,0x01,0x33,0x67,0xe1,0x4e,0xc2,0xac,0xe6,0x24,0x24,0xb6,0x83,0x48,0x08,0x0c,0x73,0xe5,0x9c,0x98,0xe4,0x4c,0x3c,0x1f,0x6e,0x77,0xea,0x8c,0x76,0x23,0xbb,0x41,0x5e,0xc1,0x8a,0xba,0x3e,0xe5,0x3e,0x86,0x89,0xab,0x32,0x65,0x1b +.byte 0x00,0x92,0x56,0xe0,0x62,0xc1,0x8f,0xeb,0x15,0x7f,0x86,0xdf,0xa2,0xc2,0x8d,0xf5,0xb5,0x88,0x72,0x8c,0xba,0x92,0x30,0x53,0x58,0x3e,0x0b,0xe6,0x4f,0xd4,0xef,0x34,0xab,0xbb,0x61,0xe0,0x31,0x3c,0xe7,0xb2,0x5f,0x64,0xcb,0x52,0xc7,0x1d,0x95,0x96,0xd2,0x8c,0x87,0x34,0x92,0xf2,0xad,0xd9,0x78,0x1d,0xa1,0x67,0x58,0xfa,0xfb,0x06 +.byte 0xc8,0x7f,0x9e,0xf7,0x02,0x12,0xd9,0x8c,0x68,0xbc,0x2b,0xd3,0xe1,0x0e,0x1e,0xbd,0x33,0x7a,0xfd,0x03,0x41,0xb9,0x72,0x2e,0x63,0xfe,0xb1,0x39,0xc3,0x0f,0xa0,0xa9,0x76,0x4f,0x7b,0xab,0xae,0xda,0x22,0xec,0x83,0x32,0xb0,0xec,0xd1,0xfd,0xc2,0x28,0x1e,0x42,0x29,0x31,0xd5,0xb3,0x33,0xcd,0x13,0x1d,0x9f,0xac,0x73,0x27,0xf7,0xea +.byte 0xc6,0x66,0xd2,0x32,0x91,0x60,0x35,0xf4,0x28,0x34,0x43,0x6a,0x74,0x8c,0x05,0x2a,0x84,0x34,0xfd,0x84,0xa5,0xcb,0x1d,0x2b,0x41,0x28,0xa6,0x19,0xed,0xcd,0xad,0xea,0x6e,0xf7,0x14,0x18,0xac,0x56,0x9a,0xf5,0xaa,0x7d,0x4e,0x8a,0x99,0xd1,0xda,0x41,0xaf,0xe8,0xfc,0xef,0x66,0x88,0xd0,0xed,0xfd,0xae,0x2a,0x85,0xc0,0x60,0xa2,0x30 +.byte 0x5d,0x1b,0x48,0xf6,0x3e,0xcf,0x56,0xdf,0x53,0xdc,0x2d,0xf5,0xfd,0x7f,0x2a,0x2a,0x4d,0x4f,0x11,0xcc,0xea,0x72,0xdb,0xb9,0xeb,0x92,0x0e,0x9f,0xc1,0x26,0xe9,0xbf,0x25,0x6a,0x27,0xe1,0x63,0x9b,0xdd,0x62,0x38,0xad,0xd3,0xb2,0x75,0x62,0x45,0xbf,0xbf,0xf4,0xe2,0xd6,0x97,0xe9,0xeb,0xeb,0x98,0xab,0x73,0xdc,0x8a,0xde,0xaa,0x3b +.byte 0x69,0xfd,0x61,0x6f,0xbb,0xfc,0x28,0xc0,0xff,0x37,0x2e,0xeb,0x31,0x59,0x57,0xfb,0xd3,0x0e,0xed,0x01,0x66,0x50,0x63,0x53,0xa2,0xd1,0x24,0x8c,0xc8,0x8d,0x80,0x03,0x2a,0x1e,0x11,0x3a,0xb9,0x6c,0xf4,0x5f,0x58,0xa2,0xd6,0x58,0x6b,0x85,0x61,0xd1,0xe7,0xdc,0x90,0x07,0x34,0x6e,0xb9,0x0b,0x0d,0xcb,0xd5,0xe3,0xc6,0x9d,0xb8,0x51 +.byte 0x37,0x61,0xd0,0x6c,0x2e,0xed,0xe0,0xbc,0x55,0x74,0x63,0x1b,0x42,0x17,0x6a,0x9c,0x91,0x1b,0x96,0x76,0xc8,0xe4,0x2b,0x2e,0x90,0xd9,0xe5,0x3f,0x56,0x1b,0x2f,0x93,0x81,0x86,0x2a,0xb4,0xdf,0x93,0xcb,0xfa,0x01,0x85,0xd9,0x26,0x46,0x46,0x97,0x2a,0x2e,0xb3,0x91,0xe4,0xcf,0xd9,0x01,0x5a,0x37,0xa6,0xca,0x5e,0xed,0xa9,0x94,0x35 +.byte 0x2c,0x69,0x5b,0x1e,0xf8,0x38,0x61,0x41,0x10,0xf6,0xe9,0x6e,0x96,0xee,0xe6,0x5f,0x78,0x14,0x93,0x12,0xd2,0x57,0xe5,0xf4,0x58,0x46,0xca,0xc8,0x75,0x59,0xbd,0xd0,0xe4,0x70,0x35,0xa5,0x4a,0xfd,0x54,0xe2,0x91,0x76,0x0e,0xe6,0xe3,0xbb,0x31,0x65,0x4b,0x18,0xa8,0xb4,0xfa,0xa6,0x7d,0x7a,0xa9,0x47,0x3d,0x2b,0x2e,0x66,0xac,0x5b +.byte 0x3e,0x5e,0x8c,0x27,0x0c,0x33,0x04,0x03,0x4e,0x5f,0xcd,0x6b,0x9c,0xaa,0x13,0x83,0x38,0xe9,0x38,0xcf,0x03,0x70,0x5a,0x0f,0x18,0xf5,0xec,0x64,0xf3,0x0c,0xe8,0xb1,0xa9,0x07,0x70,0xf7,0xde,0x0c,0x35,0xf5,0xe2,0xcd,0xed,0xe6,0x4d,0xac,0x5c,0x4d,0x3e,0x03,0x96,0x90,0x7b,0x4c,0x3e,0x18,0x42,0xc0,0xa7,0x23,0x12,0x8e,0x54,0xc1 +.byte 0xa1,0x2f,0x82,0x13,0xe6,0x1f,0x74,0xae,0x7b,0x4a,0xa4,0xbb,0xdc,0xc0,0x68,0x0f,0x83,0xbc,0xda,0xce,0xa2,0xe7,0xbe,0x18,0xcd,0x8b,0x35,0x05,0xa3,0x4b,0x6f,0xf0,0x53,0x12,0x42,0x2f,0x3c,0x09,0x87,0xb7,0xe3,0x36,0x29,0xe1,0xa2,0xb6,0x60,0x05,0xb9,0x66,0x80,0xe9,0xec,0x40,0x2a,0x55,0x78,0x5f,0x1c,0x5f,0xc3,0xc7,0x49,0x69 +.byte 0x87,0x97,0x5f,0xa5,0x31,0xa8,0x83,0x66,0x5a,0xd7,0xaf,0xf0,0x15,0xf3,0x01,0x62,0x9a,0x88,0x76,0x0f,0xb3,0xdf,0xf1,0xc6,0x34,0xc3,0xac,0x68,0x60,0x9a,0x91,0x03,0x13,0xea,0x0e,0x36,0x9c,0xf5,0x51,0xb7,0x0c,0xa4,0xeb,0xf0,0x41,0x85,0x54,0x05,0xed,0x7a,0xc2,0xba,0x3b,0xb8,0x1c,0x41,0x0d,0xbb,0xad,0x16,0x7e,0x64,0x4f,0x88 +.byte 0x7a,0x17,0xae,0x76,0x55,0x78,0x93,0xe8,0x99,0xa1,0x70,0x1f,0xf6,0x8a,0xb9,0xeb,0x41,0xb9,0x08,0xb8,0x9d,0x78,0x57,0xa1,0xe1,0x23,0xa0,0x03,0xd3,0x16,0xbc,0x16,0x24,0xed,0xc5,0x12,0x16,0x0a,0x8a,0x23,0x11,0x22,0xc2,0xfe,0x49,0x9d,0x3d,0x10,0x3d,0x4b,0xeb,0xab,0xcb,0x21,0x9d,0x9d,0xb1,0x64,0x87,0xe5,0x4d,0xb9,0xe7,0x10 +.byte 0x05,0xa0,0x55,0x2f,0xdf,0x53,0x5e,0x03,0xec,0x7e,0xe4,0x1f,0x9b,0x16,0x0c,0xfc,0xd9,0xf9,0x66,0x39,0x93,0x9e,0x49,0x34,0x97,0xd6,0xa5,0x56,0x00,0xf1,0xaf,0x08,0xeb,0x58,0xcf,0x87,0x02,0xc4,0xf1,0x24,0xe8,0x29,0x83,0xc9,0x5d,0x56,0x68,0xa2,0xaa,0xba,0xb3,0x86,0x23,0x59,0x8d,0x32,0x96,0x4a,0xbb,0xe9,0xf2,0x53,0xb2,0x87 +.byte 0x4a,0xf5,0xdc,0x23,0xd4,0x2f,0x36,0x70,0xb5,0x1d,0xee,0x47,0x51,0x6c,0x35,0x2a,0xad,0x35,0x74,0x1b,0x98,0xb5,0x33,0x2c,0x6d,0x4c,0xf8,0x39,0x07,0x92,0x6c,0xc7,0x65,0x10,0x64,0xcd,0x53,0xa3,0xcb,0xcc,0xe4,0xb2,0x46,0xb3,0xb7,0x44,0x01,0x92,0x44,0x12,0x23,0x25,0x3e,0x00,0xe3,0xeb,0x5f,0xe5,0x76,0x48,0x4e,0x4a,0x7f,0x36 +.byte 0xf0,0x0b,0x5e,0xc0,0x97,0x0d,0xc8,0xcf,0xd5,0xb8,0xc0,0x11,0x8d,0xb9,0x1e,0x31,0x0f,0x84,0x36,0x2e,0xe0,0x42,0xe6,0x02,0x9d,0xa4,0xdb,0xa2,0x76,0xfd,0xa1,0x95,0xe0,0x49,0xe6,0xf1,0xd2,0xae,0x27,0x6b,0x11,0x05,0x47,0xb0,0xaa,0x61,0x01,0xd4,0xe6,0xcd,0x9d,0x7e,0x33,0x5d,0xec,0x22,0x96,0x59,0xb7,0xc5,0x50,0x83,0xa4,0x66 +.byte 0x56,0xc7,0x43,0xa6,0xf7,0x5d,0xb2,0x45,0xc0,0x96,0xa0,0x5b,0xb8,0xed,0xae,0x29,0xb3,0x7d,0xbd,0x01,0xde,0xc0,0xe7,0xcc,0xe9,0x55,0x32,0x32,0xbf,0xdd,0x03,0x1b,0xb0,0x4e,0xff,0x53,0x1f,0x4b,0xc6,0xec,0x16,0x9d,0x5b,0x78,0x74,0xc4,0x75,0x51,0x8a,0x1c,0xae,0x6b,0xcd,0x9c,0x77,0x47,0xbf,0xd1,0x38,0x3e,0x9e,0xc0,0xad,0x16 +.byte 0xb7,0x15,0x6b,0xdc,0xad,0xe9,0x13,0xbc,0x48,0xc1,0xaf,0x69,0xce,0xc4,0xcc,0x9b,0x73,0xf9,0xd5,0x7c,0xab,0xf0,0xf1,0x9b,0xea,0xc6,0x0b,0x19,0x47,0x42,0xc1,0xa0,0x02,0x64,0x17,0xce,0x88,0x4f,0x16,0xa6,0xed,0xdb,0xfe,0x61,0xd3,0xd6,0xc0,0x11,0x30,0x16,0xd2,0x45,0xb3,0x7e,0x52,0xd0,0x94,0x77,0xf0,0x0e,0xbf,0x16,0xc0,0x4a +.byte 0x2a,0x5c,0xac,0x55,0x57,0xb1,0x41,0xb6,0xa3,0x68,0x8c,0x0a,0x66,0x15,0xb4,0xf5,0xd9,0x9a,0xa9,0x68,0xf2,0xbc,0x06,0xc5,0x7c,0xd1,0x18,0x55,0x9a,0x2d,0x94,0x2e,0x04,0x4b,0x7d,0x3c,0xb1,0xe3,0x03,0x7a,0xa7,0xe3,0xe5,0x63,0x49,0x7c,0x3f,0x0a,0xc5,0xbd,0xd3,0x0f,0x04,0xfd,0x99,0xf7,0xe6,0x05,0x35,0x66,0x17,0x05,0x85,0x3b +.byte 0x98,0x92,0x11,0x26,0xe2,0x21,0x52,0x1b,0x54,0x08,0xc8,0xf0,0x4e,0x75,0x22,0x3f,0xe8,0xb6,0x35,0xa4,0x02,0x52,0x70,0xc2,0xce,0x5a,0x00,0xe2,0xe2,0x92,0x8c,0x97,0xa7,0x1d,0x42,0x52,0x8b,0xf1,0x81,0xa7,0xce,0x60,0x46,0xbe,0xf0,0x1d,0x34,0xdf,0x73,0x2a,0xd6,0x9a,0x2d,0xf9,0xe3,0x91,0x05,0xe4,0x1f,0x31,0x11,0x30,0xb0,0xff +.byte 0x8f,0x61,0x74,0xf4,0xef,0xcd,0xf6,0xa4,0x9a,0xd2,0x5e,0xba,0x27,0xe8,0x78,0x38,0xfc,0x75,0xff,0x3b,0x6c,0xde,0x4a,0x46,0x47,0x8e,0x97,0x28,0xe4,0x23,0xe0,0x10,0x07,0xca,0xcb,0x6d,0xed,0x29,0xc0,0xee,0x98,0x96,0x7c,0x90,0x1f,0x89,0x12,0x0f,0xd5,0x28,0xcf,0x6e,0x4b,0x9b,0x2d,0xb3,0xcd,0x97,0xb8,0xeb,0x58,0x23,0x26,0xb1 +.byte 0xb4,0x95,0x11,0x1e,0xee,0x00,0xde,0x24,0x28,0xa6,0x3f,0x15,0xa2,0x9a,0xcb,0x9d,0xe3,0x04,0x5d,0xc3,0x60,0x97,0x14,0x2c,0x84,0x2b,0x69,0x9c,0x2a,0xbf,0x08,0xba,0xc4,0x38,0x36,0xaa,0x89,0x11,0x32,0x63,0x01,0xa2,0x44,0x5f,0x50,0xf0,0x5b,0x11,0x15,0xc8,0x80,0xc9,0xa6,0xe7,0x5d,0x70,0xa8,0x34,0x42,0x97,0x2a,0x60,0x99,0x20 +.byte 0xa6,0x60,0xc0,0x70,0x8d,0x2f,0x3f,0x8a,0x14,0x80,0x8a,0xbe,0x05,0xb3,0x50,0x16,0xaf,0x32,0xb4,0x35,0x3e,0x1d,0x31,0x42,0xdd,0x50,0xeb,0x04,0x82,0x4c,0x83,0x3d,0x8f,0xb6,0x1e,0xc2,0xa9,0xd2,0x30,0xba,0x33,0xdb,0x97,0x6d,0x2d,0x97,0x59,0x33,0xc0,0xf8,0xa5,0x59,0xc5,0x44,0x9c,0xf1,0x06,0xc4,0xf2,0x31,0x3e,0xff,0xb8,0x12 +.byte 0x00,0x4d,0x6c,0x2d,0xa1,0xc7,0x83,0xea,0x55,0x93,0x0e,0x89,0x76,0xbf,0x56,0x2a,0x99,0x62,0x54,0xad,0x2c,0xe8,0xf0,0xf9,0x70,0x18,0xa5,0x2b,0x24,0xac,0x59,0xc9,0x84,0xe3,0x1a,0x9d,0xa0,0xdb,0x1b,0x7f,0xd5,0x7e,0xb5,0xe0,0x86,0x36,0xc5,0x71,0x6a,0xab,0xdb,0xa5,0x84,0xf1,0x9e,0x9e,0xf6,0x1b,0xab,0x47,0x94,0x38,0x8e,0x5d +.byte 0x55,0xb4,0xf5,0xc3,0x59,0xc2,0x2c,0x6d,0x9d,0x28,0x7d,0x33,0xcd,0xc7,0xd6,0xdf,0x10,0xda,0x7c,0xd0,0x6c,0x91,0x88,0xd6,0x6b,0xe7,0x72,0x75,0x18,0xb1,0x87,0xe4,0xbb,0x10,0xe0,0xa3,0x0f,0xea,0x65,0x0a,0x70,0xc8,0xee,0x52,0x05,0x0a,0x27,0x39,0x66,0xda,0xd6,0xa6,0xfe,0x97,0x24,0x09,0x9d,0x20,0x76,0x4e,0x97,0x9d,0xa9,0x9f +.byte 0x76,0x20,0x27,0x57,0x5b,0xf4,0x76,0x1a,0x4b,0xcf,0x13,0x6c,0x9e,0x63,0x53,0x97,0xca,0x10,0xd6,0x90,0x7d,0xfc,0xe3,0x03,0x2c,0x6c,0x79,0x93,0x1a,0xae,0x0f,0x43,0xdb,0x75,0xde,0x56,0xa6,0x69,0x93,0xce,0x2d,0x94,0x56,0x77,0x90,0x19,0x71,0x7f,0x7a,0x99,0xbd,0x9c,0x79,0x62,0x00,0x49,0x3a,0x62,0x49,0x4b,0x92,0x65,0x8b,0xe2 +.byte 0xa8,0x3d,0xa5,0x89,0x23,0xac,0xea,0xf1,0xbf,0x38,0x84,0xd7,0xe2,0x65,0xb6,0xc7,0xbc,0x02,0x11,0xfd,0xe3,0x4c,0x57,0x38,0xd4,0x36,0x54,0xe8,0xbb,0x63,0x17,0xe9,0xda,0x82,0x50,0xf1,0x8c,0x34,0x4d,0x75,0x2a,0x64,0x49,0xaf,0x98,0xc3,0x1d,0xad,0x31,0xf3,0x90,0x23,0x39,0xf5,0xb5,0xf4,0x37,0x88,0x67,0x12,0x5d,0xfc,0xee,0xe5 +.byte 0x44,0x52,0x2c,0x78,0xb1,0x90,0xc1,0xc2,0x77,0x6e,0x31,0x3e,0xa0,0x36,0x87,0xb0,0xc6,0x6c,0x94,0xc2,0x43,0x4a,0x7b,0xa2,0x73,0xe7,0xa0,0xc3,0x4c,0xaf,0x4f,0xa6,0x92,0x1c,0x9a,0x6d,0xee,0xe8,0x4d,0xe1,0xe0,0xc7,0x67,0xcf,0xcf,0x7d,0x7f,0x0f,0x07,0x0d,0x6c,0x06,0x06,0xc2,0xc9,0x28,0xfc,0x8d,0xcd,0x23,0x01,0x97,0x5b,0x4d +.byte 0x1c,0xdb,0x34,0x51,0x6e,0xe2,0x56,0x24,0xd7,0xbd,0x12,0xc4,0x2f,0xb4,0x3b,0x02,0xaa,0x47,0xda,0x61,0xf6,0xca,0x44,0xa8,0x02,0xbf,0xbc,0x58,0xfb,0xa2,0xff,0xf3,0x54,0x59,0x5f,0xd7,0xa0,0x7c,0x83,0xa6,0xef,0xeb,0x71,0x51,0x74,0xa1,0x27,0x10,0x97,0x13,0x1f,0x42,0x91,0xdd,0xa8,0xf8,0xc7,0x60,0x90,0xca,0x2e,0xc8,0xaf,0x9f +.byte 0x65,0x1f,0x24,0x0a,0x30,0x5f,0xb9,0x4c,0xfb,0xcb,0xa3,0x96,0x5e,0xad,0xab,0xac,0x09,0x91,0xf5,0x96,0x1f,0xe0,0x96,0x14,0xc5,0xa0,0x26,0xa1,0xf1,0x91,0x80,0x38,0x7f,0x38,0xdc,0x98,0x96,0x20,0x46,0x50,0x20,0xd2,0x20,0xce,0x79,0xd5,0x81,0x60,0x97,0xb2,0xb0,0xeb,0x58,0x75,0x3c,0x99,0xf0,0xe0,0xfd,0xfc,0x90,0xc5,0xd1,0x3d +.byte 0x68,0x07,0xfd,0xa1,0x3f,0xeb,0x47,0xd0,0x58,0xe3,0xfa,0xbe,0xbf,0x20,0xdf,0x66,0x08,0x91,0xa4,0x5c,0x52,0x3e,0xdf,0x5c,0xb8,0xee,0xca,0xa6,0x89,0x06,0x97,0xb4,0x8d,0x60,0x35,0xb1,0xff,0x1e,0x39,0xf2,0x67,0xbc,0x71,0xee,0xeb,0x48,0x94,0x19,0x1a,0xee,0xc5,0xe2,0x7e,0x0d,0xf1,0xca,0xe8,0x2c,0xb0,0xaa,0x02,0x58,0x23,0x23 +.byte 0xce,0x37,0x5e,0xcb,0x58,0x40,0x2e,0x1a,0xa6,0x09,0x11,0x95,0xc4,0x6f,0x10,0xb0,0x15,0x22,0x48,0x67,0x74,0x6c,0x2f,0x4f,0x4a,0xb4,0x01,0xe5,0xa3,0x77,0xab,0xad,0xa4,0x04,0x22,0x71,0x58,0x4a,0x71,0xb1,0xe8,0xdf,0x43,0x18,0x0e,0x95,0x7c,0x8c,0x23,0x3a,0xf3,0x9c,0x20,0x60,0x20,0x69,0x51,0x28,0x7e,0x13,0x67,0x5c,0x7d,0x35 +.byte 0xfa,0x1b,0x04,0x8b,0xcf,0x42,0x6e,0x15,0x55,0xcd,0x04,0xdb,0x73,0xdb,0x47,0x5f,0x83,0x6e,0xd1,0x5a,0x15,0xa2,0xbb,0xf7,0xbb,0x84,0x58,0xce,0x75,0xe8,0xd2,0x92,0xd5,0xb7,0x76,0xf2,0x94,0x67,0x27,0x5f,0x32,0x91,0x3a,0xaf,0xd4,0x31,0xf8,0x92,0xce,0x63,0xb7,0x45,0x27,0xb4,0xb8,0x7a,0x1e,0x4e,0xde,0xcb,0xc8,0x5e,0xd3,0xbb +.byte 0x52,0x91,0xd5,0x72,0xad,0x98,0xec,0x07,0xa1,0x56,0xb4,0x8e,0x04,0xfa,0x48,0x3f,0x17,0x07,0xf7,0xef,0x92,0x61,0x69,0xaf,0xdd,0xfc,0x76,0x03,0xe2,0xe9,0xe2,0xbe,0x5c,0xf2,0x8a,0xc5,0x99,0x51,0x7f,0xa4,0xf1,0xac,0x16,0xec,0x16,0xf5,0xb8,0x95,0x88,0x87,0xdb,0x27,0x2e,0x63,0x12,0x31,0x7d,0x6b,0x2b,0xa0,0x9b,0xb5,0xf9,0x82 +.byte 0x42,0x04,0x94,0xee,0x60,0x6e,0x4e,0x54,0x9b,0xfd,0xeb,0x01,0x3a,0xad,0x42,0xeb,0x08,0x3c,0x6a,0xa3,0xf2,0x46,0xfb,0x18,0x59,0x2c,0xa3,0x0b,0x22,0x1d,0x5d,0x47,0xa6,0x8c,0x06,0x9c,0xa1,0xcc,0x20,0x67,0xbd,0xf0,0x5b,0x94,0x9f,0xc6,0x10,0x8c,0xc8,0x15,0x52,0xe3,0x19,0xa1,0x89,0xfd,0x99,0xad,0x4f,0x10,0x51,0x0a,0xe4,0x4b +.byte 0x02,0x7b,0x0d,0x73,0x2d,0xae,0xa4,0x68,0x1d,0xb6,0xcf,0x58,0x67,0xc0,0xd0,0xca,0x11,0x34,0x31,0x9e,0xa3,0xbc,0x12,0x28,0x1e,0x8e,0x5a,0x63,0xf5,0xda,0xf2,0x36,0x94,0x63,0x2c,0x39,0x3d,0xf9,0x80,0x9f,0xbf,0x8d,0xef,0x1f,0x15,0xc8,0xdb,0x62,0x58,0x7d,0xdc,0x0a,0x7f,0x87,0xaf,0x6d,0x2e,0xac,0x92,0x4f,0x51,0xdf,0x5e,0x75 +.byte 0x5e,0x0f,0x7c,0x51,0x49,0x88,0x0f,0x7b,0x49,0xa5,0x7c,0x41,0x4e,0x2a,0x0f,0xd0,0x0f,0x78,0xeb,0x42,0xfc,0x07,0x8a,0x8b,0x4e,0x3e,0xf2,0x42,0xc5,0x21,0x01,0x66,0xe2,0x50,0xf6,0x3d,0x28,0x1e,0xbf,0xdc,0x71,0x7f,0xc5,0x6e,0xc1,0xab,0x1a,0x33,0x49,0xdd,0xa2,0xb9,0x52,0xbe,0x93,0x97,0x97,0x7a,0xf0,0x22,0xa8,0xc5,0x01,0xc6 +.byte 0x76,0x6f,0xb6,0x2c,0x09,0x80,0x62,0x5b,0x84,0x05,0x7f,0x79,0x28,0x04,0x67,0xa2,0x0f,0xfc,0xbb,0x17,0xe2,0x85,0xe3,0xa0,0xf3,0x44,0x47,0x96,0x68,0x80,0xb2,0xbf,0xba,0x63,0x53,0x38,0x6c,0x3b,0xcd,0x3c,0xa4,0x10,0x48,0x80,0xd8,0x49,0x5a,0xf0,0x5c,0x38,0x02,0x02,0x5b,0xf2,0x77,0xa4,0xfd,0x16,0xfd,0x13,0xc8,0x8b,0x9b,0xcd +.byte 0xe1,0x8d,0x70,0xb6,0x3d,0x24,0x65,0xda,0x1a,0x42,0x6f,0x90,0x64,0x9a,0x9b,0xda,0x54,0x44,0xc0,0xe0,0xd7,0xfb,0x73,0x10,0x3c,0xcf,0xa6,0x04,0x99,0xd9,0x45,0xe5,0x74,0xfe,0xdf,0x81,0xac,0xc8,0x30,0xe5,0x66,0x45,0x02,0xca,0xcd,0xd7,0xe6,0x7b,0x0d,0xda,0xe1,0xa0,0xa1,0xa1,0x87,0x34,0x63,0x0b,0xa7,0x82,0x39,0x83,0xba,0x18 +.byte 0x0b,0x16,0x35,0x11,0x53,0x8d,0xbe,0x7d,0xa8,0x7e,0x3f,0xf4,0x71,0xc9,0x37,0x6f,0x1a,0xd9,0x3f,0x8e,0xc4,0xc1,0xd3,0x80,0xdf,0xee,0x0e,0x6b,0x23,0xf7,0xbc,0x42,0x93,0x7a,0x36,0x6f,0x03,0x24,0xb4,0x9c,0x62,0xa0,0xed,0xed,0x0b,0x66,0xa8,0x25,0xe6,0x1a,0xd4,0x13,0xd1,0x16,0x14,0x2b,0x90,0x7d,0x2e,0xa4,0xda,0xb2,0xf9,0x33 +.byte 0x54,0xf9,0x0a,0x04,0x27,0x03,0x14,0xd2,0xd7,0xe2,0xc1,0xaa,0xb6,0xe8,0xe5,0x4c,0xf2,0xdb,0x4c,0xc8,0xb3,0xa4,0xeb,0xbf,0x12,0x5c,0x9d,0x65,0xaa,0x9a,0x66,0x77,0x42,0xb4,0xd5,0x5b,0x1f,0x3b,0xd7,0x91,0x89,0x57,0x2f,0xd0,0x86,0x99,0xb2,0xc8,0xc1,0x31,0xde,0x33,0x43,0x36,0x81,0xdb,0x97,0x7b,0x17,0x3b,0xa5,0x99,0xdb,0x63 +.byte 0x2b,0x48,0x4c,0xa6,0x5c,0x6c,0xd8,0xc9,0x6e,0x72,0x39,0xbe,0x6e,0x55,0x7e,0x9d,0xb7,0x20,0x8d,0x8f,0x81,0x20,0x78,0xae,0xc6,0x1d,0xe0,0x2d,0xb1,0xe7,0x64,0xbb,0xd4,0xc8,0x08,0x61,0x14,0x29,0x08,0xbc,0x1a,0xeb,0xfa,0x64,0x33,0x91,0x7d,0x91,0x41,0x65,0x8e,0x4c,0x0c,0xb2,0x79,0xc3,0x01,0x68,0xfc,0xd6,0xbb,0x50,0xcc,0x07 +.byte 0xa5,0xf6,0x2c,0x5e,0x10,0xd6,0xa3,0x62,0x18,0xec,0xa2,0xf2,0x6b,0xad,0xcd,0x02,0x01,0x75,0xbb,0x36,0x27,0x56,0x0f,0x55,0x03,0xe0,0x57,0xe1,0x72,0xeb,0x66,0x00,0x21,0xff,0x9a,0xbc,0xc1,0x1e,0x2c,0x93,0xe6,0x4d,0x93,0x28,0x10,0x7d,0x67,0x6c,0xf1,0xa4,0xe6,0x3a,0xa6,0x30,0xc8,0x50,0x1d,0x8b,0x6e,0x7b,0x76,0x98,0x14,0x4e +.byte 0xed,0x84,0x67,0x2a,0x5f,0xac,0x0b,0x7b,0x47,0x40,0xb3,0x2d,0x7a,0xc1,0x23,0xdc,0x62,0xf8,0x8e,0x90,0x77,0xd4,0xf9,0x00,0x4b,0x67,0x04,0x72,0xf8,0xc9,0x2c,0x2d,0x0e,0x3c,0x3c,0xf3,0xfc,0xa8,0xe2,0x49,0xa4,0x00,0x82,0x98,0x72,0xa9,0xec,0xea,0xbd,0x3a,0x4e,0xd7,0x32,0xf1,0x11,0xf0,0x0d,0x9e,0xa2,0xe8,0xfe,0xcc,0x67,0xec +.byte 0xfc,0xd6,0xfe,0x83,0x5e,0x7c,0x2b,0xb3,0x42,0xf4,0x2d,0x9a,0xbe,0x20,0xd1,0x81,0x62,0xe9,0x59,0x19,0x28,0xdf,0x97,0x10,0x54,0xf7,0xde,0x60,0x51,0x6a,0xce,0x32,0x03,0x75,0x5c,0x25,0x25,0x82,0x9c,0x07,0xf7,0x2d,0xa8,0x1b,0x9f,0xd3,0x32,0x46,0x25,0x1f,0xb1,0xc5,0xbb,0x28,0x14,0x3e,0xed,0xa8,0x83,0x20,0xf4,0x9c,0x75,0xf4 +.byte 0xe6,0xc4,0x2d,0x05,0x88,0x31,0xfd,0x48,0xca,0x6c,0x7f,0xab,0xb4,0x77,0x93,0x1d,0x87,0xc3,0x4e,0xb8,0xad,0xb4,0x3d,0x37,0x7a,0xd2,0x77,0xff,0xc2,0xcb,0x9c,0xc7,0xbf,0x02,0x02,0x70,0xc9,0x9f,0x77,0x8a,0x7d,0xa7,0x9a,0x10,0xd1,0x0e,0xb7,0xec,0x61,0xee,0x77,0x24,0xe9,0x3d,0xcd,0x12,0xca,0xee,0x50,0xb0,0x27,0x5d,0xe5,0xac +.byte 0xa3,0x92,0xc7,0xd0,0x23,0x54,0xb1,0xe5,0x50,0xc3,0x15,0xd7,0x66,0x32,0x38,0x34,0xb1,0x59,0x1b,0xc3,0x59,0xe8,0xad,0x59,0x90,0x58,0x6e,0x02,0x40,0xb1,0x51,0x65,0x78,0x25,0x26,0x01,0xdd,0xcf,0x04,0xa2,0xfe,0xc3,0xbb,0x80,0x1c,0xb0,0x4e,0x9c,0x49,0x48,0xa3,0xe2,0xcc,0x81,0xc5,0xa8,0xd4,0xd5,0xe4,0xab,0x39,0xe7,0xe8,0x97 +.byte 0xc7,0x51,0xb4,0x5e,0x3f,0xe6,0xa7,0xcc,0x45,0x18,0xa2,0x6a,0xb3,0xa8,0x0b,0x7d,0xce,0x1a,0x97,0x4a,0x67,0xe1,0x3c,0x7c,0x4e,0xad,0x90,0xcf,0x2a,0x8f,0xb8,0xb6,0x96,0xaa,0x9a,0xc3,0x73,0xe6,0x71,0xdb,0x11,0x9b,0xd9,0xd9,0xfe,0xba,0x4a,0xf0,0x77,0xa4,0x15,0xb5,0xca,0xe1,0xb4,0x16,0x06,0x46,0xdf,0xc5,0x49,0x07,0x66,0xb3 +.byte 0xf5,0x30,0xe3,0xfb,0x44,0xac,0x80,0x3a,0x21,0xd9,0x5b,0x22,0x54,0x3a,0xae,0xbe,0xbd,0xf0,0x99,0x8d,0xb5,0x2a,0xf7,0xc9,0xf2,0xd3,0xfb,0x07,0x7c,0xd7,0x75,0x30,0x2a,0xcd,0x80,0xa8,0x2a,0x6a,0xb9,0x47,0xe2,0xa1,0xb0,0x76,0x6a,0x0f,0x9f,0x4a,0x56,0x3e,0xde,0xb3,0x89,0x12,0x25,0x63,0x1a,0x9d,0xea,0x64,0x08,0xc5,0x78,0xa7 +.byte 0x53,0xce,0xf8,0xb2,0xe5,0x97,0x3a,0xeb,0xd1,0x92,0xe1,0x4d,0xe0,0xf5,0x93,0x39,0x73,0xad,0x67,0xc9,0x0e,0x6b,0x16,0x4a,0x00,0xaa,0xb4,0xe6,0xa6,0xa5,0x67,0x95,0x90,0x04,0x5e,0x4d,0xc3,0x7f,0x6b,0xa1,0x50,0xb0,0x3b,0x72,0x0d,0xb3,0xec,0x9a,0x18,0x92,0x65,0x0c,0x2d,0x0f,0x94,0xd6,0x0f,0x95,0xba,0x4b,0xe6,0xc3,0x07,0x22 +.byte 0x0d,0x40,0xd4,0x0d,0x97,0x44,0xba,0x54,0x8c,0xf8,0x97,0x52,0x1f,0xa7,0xb2,0xe8,0x1b,0x0a,0xd5,0xde,0xff,0x1b,0x33,0x60,0x6a,0x28,0x68,0x36,0xb9,0x5a,0x3e,0x43,0x84,0x9a,0xb1,0x3d,0x3d,0xdb,0x1b,0xa2,0xc5,0x0e,0x2d,0xb5,0x5a,0xa5,0x36,0xe7,0xbf,0x7e,0xc3,0x76,0xad,0x1e,0xb5,0x49,0xc2,0xd5,0xa2,0x69,0x97,0x45,0x43,0x3e +.byte 0xeb,0xcd,0xdf,0x4f,0xab,0xb3,0xe8,0x49,0xaa,0x9c,0x9c,0x58,0x1e,0xc8,0x1c,0x79,0xe9,0x16,0x1d,0xfe,0x54,0xac,0x55,0x18,0x10,0x73,0x97,0xdc,0xbe,0x45,0x63,0xfb,0x48,0x41,0x88,0xb4,0x0b,0x3a,0x1d,0x65,0x40,0x1b,0x10,0x66,0xeb,0xbe,0xed,0xc7,0x6c,0xd5,0x0c,0x19,0x85,0x23,0xb1,0x38,0xb3,0x4b,0xcd,0xc7,0xc5,0x06,0x18,0x40 +.byte 0xbd,0xef,0x9f,0x2e,0x3a,0x71,0x33,0x05,0x30,0x71,0xca,0xe9,0x7a,0x2c,0xe7,0x83,0x4e,0x3d,0x4b,0xc8,0xc7,0xcb,0x74,0x9c,0xa2,0xc7,0xbb,0x8c,0x44,0x0d,0xd8,0xb3,0x01,0x7c,0xdf,0x79,0xee,0x47,0xcb,0x91,0x6f,0xc3,0xfd,0x0f,0xfb,0xf8,0x6b,0x9b,0x00,0xaf,0xf6,0x69,0x82,0xa5,0x58,0x54,0x22,0x7f,0x4b,0xee,0xa7,0x03,0xdb,0xb6 +.byte 0x5f,0x12,0xe1,0x04,0x43,0x17,0xec,0xd4,0xdd,0x39,0x28,0xfa,0xa3,0x09,0x5e,0x14,0xaf,0x6b,0xfe,0x0c,0x65,0x01,0x13,0x75,0x3d,0xe7,0x6d,0xd9,0xda,0x1d,0x13,0xc1,0x56,0x40,0x50,0x95,0x65,0x8f,0xad,0x51,0x3f,0x13,0x05,0x2f,0x83,0xcd,0xca,0x8b,0x75,0xa2,0x39,0x61,0xde,0xd7,0x36,0xf9,0x1d,0x43,0x5b,0xc4,0x9a,0xc9,0xfc,0xa8 +.byte 0xf4,0x76,0x90,0x91,0xe8,0x52,0x5b,0x84,0xe7,0xc9,0x8e,0x7d,0x84,0xba,0xb1,0x32,0x12,0xce,0x06,0x9e,0x98,0x83,0x1f,0x7f,0x31,0xd7,0xf0,0x8a,0xa2,0xca,0xae,0xb3,0x50,0x51,0x93,0xfb,0x2f,0x43,0x0a,0xee,0x06,0x85,0xec,0xb8,0xf1,0x73,0xb1,0x65,0x37,0x05,0x8e,0x68,0xf7,0x7a,0xff,0xe7,0x17,0x08,0x5e,0x19,0x75,0x3d,0xf9,0x5e +.byte 0xd5,0x25,0xf6,0x3b,0x99,0xb9,0x96,0x42,0x7a,0x37,0x8f,0x0d,0xde,0x22,0x83,0x89,0xf0,0x77,0x1f,0x22,0x42,0xc7,0xb5,0x70,0xcb,0xfd,0xf0,0xa9,0x87,0x8e,0x1f,0x01,0x9a,0x26,0xa6,0x8c,0x41,0xb9,0x12,0xd6,0xf2,0x5b,0xe5,0xfd,0xdc,0x74,0xbd,0xa1,0xc8,0xf7,0x3b,0x8c,0xe1,0x1d,0x42,0xb4,0x07,0x24,0x18,0x84,0x94,0x8a,0xce,0x00 +.byte 0xbd,0xd7,0xb0,0xfd,0x8f,0x0a,0xd3,0x75,0xa4,0xe8,0xfc,0x09,0xa9,0xa3,0x57,0x68,0x79,0x0e,0xef,0x37,0x46,0xd5,0x3b,0x8c,0x0d,0x67,0xbc,0x2c,0x5d,0x3e,0xf7,0xcc,0x9c,0x9e,0x81,0x62,0xc8,0xec,0x38,0x20,0x07,0x66,0xe4,0x83,0x15,0x13,0x3b,0x47,0x23,0xd9,0x46,0xaf,0x65,0xe1,0x40,0x2d,0x14,0x84,0x72,0xc1,0xbf,0xbe,0x81,0xc4 +.byte 0xcb,0x04,0x16,0x5e,0x2f,0x60,0x3a,0x8e,0x1a,0xd3,0xa2,0x00,0x25,0x6c,0xb7,0xdb,0x0d,0x20,0x99,0xb8,0x45,0x54,0xbf,0xc4,0x52,0x52,0x92,0x7d,0xcd,0xa1,0x9a,0x12,0x5e,0x27,0xe9,0xcf,0x79,0x9d,0xa8,0x6c,0xcd,0x37,0x20,0x08,0x09,0xc6,0x94,0x53,0x00,0x04,0xf5,0x3b,0xea,0x00,0x1b,0xc3,0x02,0xff,0xbc,0x18,0x1f,0xb7,0xf7,0x26 +.byte 0xe8,0x8b,0xc4,0x5f,0xf7,0xbe,0x9b,0xb3,0xba,0xae,0xbd,0x9c,0x3f,0x95,0xf7,0xcd,0x2b,0x40,0xf4,0x1c,0x6f,0xd7,0x52,0xe1,0xa7,0xdc,0x79,0xa4,0x88,0xff,0xfc,0xcf,0xfb,0xbb,0xe6,0xef,0xb6,0x31,0xac,0x24,0xa7,0x40,0xea,0x76,0xa2,0x34,0x6c,0xb1,0xfb,0x96,0x6b,0xfa,0xdd,0x60,0x70,0x73,0xb8,0xfd,0x66,0x3d,0xf9,0x63,0xc9,0x04 +.byte 0x70,0x20,0x35,0xca,0x04,0xb8,0xb3,0x4f,0x24,0x64,0x54,0xc2,0xd9,0x4d,0x8b,0xad,0x07,0xad,0xc5,0xb9,0x84,0xac,0x7c,0x65,0x4b,0x98,0x1d,0x09,0x23,0x95,0x5c,0x85,0x26,0xe5,0x8e,0xec,0xeb,0xc3,0xd5,0x15,0x9c,0x37,0x4e,0xf3,0x3c,0x97,0x92,0x75,0x99,0x48,0x48,0x52,0x4b,0x7b,0x93,0x54,0xd7,0x4f,0x7f,0xe5,0x51,0xdc,0x74,0x85 +.byte 0x9a,0xae,0xbd,0xf8,0xe6,0xe8,0x3f,0x1b,0xee,0x8b,0xf4,0xd8,0x5c,0x6c,0x46,0x6e,0x1d,0xaf,0x67,0x27,0x9a,0x39,0x4e,0x6b,0x99,0xcc,0xc0,0x66,0x54,0xbf,0x60,0xf6,0x24,0x64,0xfd,0x16,0xbf,0x56,0xb2,0x07,0x87,0x46,0xa6,0xef,0x40,0x67,0x78,0x2f,0x78,0x49,0x81,0x25,0xbd,0xa1,0xcf,0x78,0x68,0x25,0x8e,0x93,0x0a,0x4b,0xe1,0x92 +.byte 0x33,0x9c,0x13,0x70,0xd4,0xdf,0x74,0x34,0x8f,0x21,0xb9,0x51,0xd7,0x74,0xa9,0x02,0x6e,0xdd,0xb2,0xb4,0x6e,0x2a,0x95,0xdb,0xe4,0xaf,0x17,0xf5,0x9b,0xa5,0xc1,0x72,0x36,0x35,0x02,0x37,0x1c,0x38,0xaa,0x81,0x76,0xc6,0x1c,0xc3,0x2c,0xc5,0x45,0xaf,0x03,0xea,0xe6,0x14,0x51,0x44,0x84,0x9e,0x32,0xfe,0x4b,0x47,0xe9,0xb4,0x12,0x96 +.byte 0x13,0x6f,0x4c,0xed,0xe4,0xb0,0x79,0x7b,0xe5,0xc0,0x37,0x87,0x78,0x28,0x42,0xf7,0xd4,0xde,0xfc,0xd2,0x23,0x11,0x09,0xa5,0x11,0xc3,0xc4,0xf5,0xe0,0x2b,0x47,0x01,0x63,0xf2,0x85,0x1f,0x45,0x28,0xae,0xd3,0x29,0x04,0x1a,0x4b,0x83,0xab,0xf2,0x35,0x3a,0x40,0x2c,0x8d,0xb3,0xc7,0x47,0x0d,0xd1,0x3c,0xd0,0x1c,0x6b,0x5d,0x9b,0x4e +.byte 0xdf,0x36,0x8d,0xc6,0x54,0x9e,0x61,0x51,0xf1,0xd2,0xa4,0x39,0xad,0x4a,0x14,0xa1,0x0b,0xd3,0xae,0x91,0x1a,0x29,0xeb,0xc5,0x75,0x88,0x13,0x1e,0x96,0xdd,0x6f,0x86,0x92,0xaa,0x37,0x16,0x95,0x86,0xbc,0xb1,0x35,0xbf,0x5f,0x75,0x40,0x46,0xe1,0x6f,0x2f,0x33,0x2d,0x13,0x35,0xef,0xca,0x09,0x04,0xe4,0x42,0xef,0x69,0x66,0xda,0xa6 +.byte 0x01,0xda,0x09,0xfd,0xb1,0x40,0x8d,0xaa,0xdd,0x08,0x0d,0xf5,0xf1,0xd6,0xc6,0x11,0x3b,0xbd,0xd3,0x04,0x70,0x76,0xaf,0xec,0x9b,0xcc,0x6a,0x1d,0xeb,0x95,0x4a,0x01,0x0a,0x03,0x62,0x00,0x32,0xb3,0xe0,0xd1,0x36,0xb6,0xeb,0xde,0x4b,0x5f,0x35,0x79,0x07,0x4a,0x0d,0xa1,0x8c,0xde,0x6b,0xd2,0xca,0x71,0x64,0x73,0xf7,0x9c,0x1d,0x95 +.byte 0x5c,0xdc,0xb9,0x4f,0x00,0x2e,0x86,0x3d,0x81,0x7b,0x05,0xa5,0x9e,0x03,0xa3,0x62,0xcf,0x22,0x78,0x0b,0xfe,0x09,0x3e,0x62,0x93,0x19,0x6e,0x47,0x7d,0x92,0x4a,0x0b,0xae,0xcb,0x37,0x4d,0x5a,0x3a,0x7a,0x68,0xde,0xb2,0x7e,0xd7,0xda,0x5c,0x45,0xd2,0x0f,0x1d,0x03,0xbc,0xed,0xd8,0xe5,0x2e,0x26,0x10,0x82,0x46,0x5a,0xe0,0x13,0x32 +.byte 0xf8,0xb9,0x18,0x8c,0xbd,0xb4,0xb3,0x8c,0x2f,0xb0,0x5d,0x0b,0xf3,0x8f,0x5a,0xda,0x8b,0xda,0x39,0xfe,0xe6,0x66,0x95,0x3f,0xfe,0x49,0x89,0xbf,0x43,0x36,0x77,0xc7,0x6d,0xea,0x92,0x5c,0x71,0xa6,0x29,0x50,0xb0,0x2f,0xed,0x89,0x9f,0x2c,0xd6,0x6b,0xfa,0xbe,0x62,0x9f,0x62,0xc7,0xe3,0x2e,0xd4,0xf2,0x2c,0x9c,0x98,0x37,0x38,0x5e +.byte 0x81,0x6c,0x9e,0xcc,0xff,0x0f,0xfa,0xfa,0xe8,0xdd,0x2e,0x2d,0xb5,0x92,0x44,0x5e,0x2f,0xe1,0xd0,0x6c,0xc3,0xb9,0x11,0x95,0x70,0x4b,0x01,0xa0,0xc1,0x5e,0xe8,0x1d,0x40,0x16,0x9b,0x6e,0x29,0x1b,0x13,0xb9,0xda,0x39,0xbd,0x40,0x42,0xe2,0x06,0x35,0x57,0x2f,0xa8,0xf5,0xa7,0x00,0x60,0x07,0x26,0x21,0x6b,0xe6,0x23,0xa2,0x2a,0x70 +.byte 0xeb,0x85,0xcb,0xa9,0x73,0x31,0x62,0xf7,0xb0,0x90,0xd7,0x26,0xc1,0xd3,0xd7,0xcc,0x15,0x72,0x86,0xa6,0x0f,0x4a,0x24,0x14,0x5d,0xcd,0xbe,0xad,0x7d,0xf0,0x05,0x39,0x0c,0x10,0xbe,0x11,0x9a,0x36,0x9f,0x60,0x41,0xc6,0x7c,0xab,0x54,0x8a,0xac,0xc4,0xea,0xbd,0x43,0xeb,0x19,0x5a,0x8d,0x05,0xd1,0x83,0x58,0x92,0xb8,0xc6,0x75,0x56 +.byte 0x2c,0x58,0xb8,0x2d,0xe1,0x42,0xb4,0x0b,0xc9,0x97,0x79,0xb8,0x62,0xd0,0x15,0xd1,0x5d,0x0d,0x57,0x83,0xe4,0xba,0x73,0xa2,0x27,0xb8,0x56,0x64,0x28,0xaf,0xd2,0x58,0xe3,0xe6,0x12,0x01,0x6e,0x6a,0xfb,0x81,0x57,0xcd,0x32,0xc2,0x42,0x2a,0xe2,0x51,0x4a,0x4c,0xf8,0x69,0x0e,0xc0,0xe6,0x9f,0xf4,0x46,0x4b,0x60,0xcc,0x41,0x03,0xa4 +.byte 0x14,0xf0,0x15,0xb5,0xe5,0x39,0xfd,0x69,0xee,0xce,0x23,0x3a,0x50,0x66,0xdb,0xf4,0xe4,0x31,0x23,0xe9,0x06,0x93,0xdd,0x38,0xbc,0x2d,0xb9,0xf2,0x64,0x39,0x2f,0x1b,0xa9,0x71,0x0c,0x68,0xf7,0xb0,0x5b,0x74,0xe5,0x08,0xc6,0x5d,0xbe,0xb8,0xf7,0x40,0x0e,0xb4,0xe6,0x76,0x0c,0x14,0x8f,0x9d,0x25,0x95,0x6c,0x05,0x78,0x68,0x8a,0xa6 +.byte 0x80,0x24,0x8a,0x0b,0x6a,0xd7,0xfc,0xec,0x36,0xba,0x57,0xdd,0x49,0x82,0x3c,0x5f,0x9d,0xf4,0x57,0xac,0x16,0x99,0xed,0x73,0xa6,0xb0,0x2c,0x23,0xdb,0xf8,0x45,0x22,0xf4,0x82,0x16,0xc4,0x68,0x2f,0xe7,0x8c,0x85,0x6e,0x3c,0x43,0xdd,0x3d,0xea,0x90,0xeb,0xf4,0xef,0xf1,0x36,0x48,0x15,0x29,0x07,0x96,0x51,0xb5,0x78,0xa1,0xa3,0x59 +.byte 0x18,0x4d,0x11,0x5d,0x5e,0x67,0x69,0x28,0x29,0xcb,0xeb,0xbc,0x8f,0x17,0x12,0x57,0xaf,0xda,0xb5,0x86,0xef,0x59,0xdf,0xb1,0x6b,0x6a,0x33,0x66,0x67,0xd1,0x42,0xee,0xec,0x65,0xf2,0xeb,0x97,0x17,0x4e,0x01,0x3f,0x4d,0xb4,0x06,0x8e,0xf9,0xa8,0x79,0xb6,0xf1,0x67,0x8b,0xff,0x0b,0x5f,0x93,0x70,0x76,0x54,0xae,0x7b,0x0d,0x4a,0xbc +.byte 0xf7,0xdc,0x11,0x64,0xb3,0x6a,0xd1,0x69,0x45,0x1b,0x57,0xfc,0xb5,0xfe,0x86,0xb2,0xd6,0xde,0x82,0x23,0x86,0x6b,0x21,0x78,0x8b,0x2e,0x96,0xf8,0x04,0x8b,0xba,0x15,0xae,0x33,0x91,0x27,0x88,0xe3,0xc1,0xe7,0xf8,0xc3,0xa6,0xb6,0x73,0xec,0x84,0x95,0x22,0x45,0x58,0xb1,0x50,0x99,0xde,0x8a,0x37,0x41,0x9f,0xb8,0x27,0xd6,0xd8,0xaa +.byte 0x0f,0x0e,0xac,0xe4,0xd0,0x38,0xcf,0x2f,0x03,0x6f,0x3d,0x8a,0xd7,0x51,0xd6,0xf3,0x17,0x76,0xb5,0x0f,0xc5,0xf8,0xa7,0x0a,0x91,0xaa,0x8d,0xbc,0x15,0xd6,0x46,0xb9,0xdc,0x18,0x47,0x9c,0xd9,0x13,0xa5,0xb1,0xb5,0x45,0x2f,0x03,0x32,0x5c,0x8b,0xac,0x42,0x5b,0xd9,0x1a,0x41,0x1e,0x27,0xf9,0x92,0x72,0xc1,0xc7,0xc1,0x50,0x25,0x22 +.byte 0x7a,0x00,0x41,0x1f,0x2d,0x28,0xaf,0x41,0x96,0x8e,0x97,0x3b,0x36,0x80,0x16,0xe6,0x51,0x8f,0x07,0x13,0xd9,0x81,0x79,0x94,0x92,0xaa,0xb9,0xb6,0x39,0xf2,0x4d,0x24,0x6b,0x77,0x25,0x7e,0x47,0x6c,0xc7,0x62,0x3d,0x96,0x21,0xac,0x1a,0xf0,0x5f,0x5d,0x5a,0x7e,0x17,0xdd,0x47,0xd5,0x19,0x0a,0x85,0x3e,0xd5,0x6b,0x52,0x12,0xe2,0xbc +.byte 0x43,0x79,0x28,0x1d,0x72,0xcc,0xa6,0x6c,0xea,0x9b,0xe9,0x04,0x34,0x2c,0x41,0x3a,0x64,0xe8,0xcb,0x12,0xfa,0xd5,0x45,0xad,0xe8,0x3e,0xa2,0x5c,0xb8,0x83,0x52,0xdb,0x0c,0x98,0x24,0x76,0xd2,0x00,0x62,0xff,0xac,0xd7,0x11,0xee,0xcf,0xfb,0xdd,0x65,0xd2,0x75,0xb0,0x25,0x4e,0x76,0x3f,0xa2,0x1a,0xae,0xee,0xc1,0x59,0x1b,0x0c,0x42 +.byte 0x70,0x42,0x06,0x00,0x64,0x31,0xe0,0xce,0x3a,0x91,0x5e,0x9d,0x56,0x83,0xab,0xa7,0x73,0xc2,0x15,0x29,0xba,0xf9,0x1d,0xc8,0x4b,0xc6,0x3a,0x9e,0xab,0xd7,0xfd,0x17,0x8d,0x80,0xf0,0xa1,0x8a,0x5a,0x7a,0x80,0xd8,0x1f,0xa9,0x5b,0xec,0x68,0x99,0x3a,0x66,0xcc,0x5a,0xdf,0x5f,0xe9,0xd5,0x6a,0xf2,0x2c,0x7e,0xf8,0xa7,0xdf,0x0c,0x59 +.byte 0xbd,0x85,0xf0,0xc9,0x91,0x44,0x9c,0x86,0x24,0x60,0xfb,0xe9,0xff,0x3c,0xa7,0xa7,0x6d,0x4b,0x17,0xb3,0x24,0x99,0x14,0xbc,0x64,0xd0,0x41,0xaa,0xcd,0x26,0xd3,0xa3,0x51,0xeb,0x25,0x1d,0xb2,0x7d,0xf1,0xf3,0xf3,0xf0,0x3a,0xe0,0xb5,0xa9,0x24,0xc3,0x78,0x4a,0xef,0x9b,0x34,0x93,0xf8,0x0c,0x71,0x10,0x5b,0xf0,0xe7,0x08,0x4d,0x5f +.byte 0x74,0xbf,0x18,0x8b,0x48,0x8d,0xd7,0x23,0x81,0xed,0xa2,0x29,0xa9,0xdb,0x91,0xf6,0x61,0x7c,0xca,0x1e,0xe0,0xa7,0x21,0x9d,0xfc,0x04,0x3a,0x87,0xbb,0xf9,0xa4,0x3b,0xbb,0xc4,0x89,0xa1,0x7f,0xdc,0x83,0xfa,0x5e,0x0f,0xcf,0xdf,0xf6,0x41,0xd3,0xa3,0x76,0x76,0x44,0x3e,0x01,0xee,0xce,0xf6,0xc3,0xb9,0x49,0x43,0x6e,0xee,0x09,0x4c +.byte 0x87,0xe6,0xa3,0xf5,0xa0,0x8d,0x99,0xb3,0x3b,0xd6,0xeb,0x27,0xf9,0x34,0x68,0xc8,0x04,0x80,0xb2,0x4d,0xb6,0xde,0x98,0x81,0xe0,0xec,0xc9,0x06,0xde,0x86,0xee,0xf0,0x87,0xb8,0x67,0x0e,0xce,0xf8,0xc5,0xb1,0xd2,0xe1,0xe3,0x53,0x1d,0xbe,0x6c,0xdd,0x5e,0x83,0x02,0xf5,0xc8,0xda,0xcf,0x3c,0xcb,0x88,0x2c,0xca,0x65,0x65,0x9e,0x71 +.byte 0x4e,0xf2,0x98,0x96,0xb2,0x54,0xb4,0x96,0xdc,0x84,0xb5,0x39,0x74,0x9b,0x61,0xcf,0x52,0xef,0xb3,0x0c,0x62,0xc9,0x92,0xe1,0xe5,0x6f,0x2f,0x0c,0x61,0x0d,0x6f,0xfd,0xd8,0x84,0x25,0xba,0x20,0x59,0x00,0xf5,0xa9,0xf1,0x77,0x6e,0x9a,0x3d,0x93,0x69,0xde,0xaf,0x9a,0xe6,0xe3,0xfd,0xb9,0xd3,0x04,0x82,0x18,0xa1,0x5b,0x9b,0xe0,0x29 +.byte 0x4c,0x64,0xf5,0x95,0x57,0x25,0xd3,0x04,0x8b,0x4a,0xe9,0x57,0x6f,0xd1,0x8c,0x40,0x73,0x49,0x32,0x93,0x3f,0x26,0xb4,0x6b,0xd3,0xd4,0x90,0xb7,0xe1,0xaf,0xa0,0x9a,0xc0,0x86,0xb7,0x5e,0xec,0x29,0xaa,0x03,0x4e,0x56,0xb5,0xcd,0x46,0x7d,0xe0,0x26,0x3d,0x5f,0xd3,0x55,0x86,0x68,0x4a,0xc5,0x42,0x5d,0x60,0x3a,0x39,0x6f,0x45,0xb9 +.byte 0x6a,0xea,0xf4,0x05,0xc8,0x24,0xf8,0xcd,0xe5,0xeb,0xca,0x3a,0xe7,0xb4,0x59,0x83,0x5a,0xa5,0x1d,0xe4,0x6a,0xaa,0x35,0x00,0x42,0x32,0xa5,0x6c,0x3e,0xc1,0xc2,0xc4,0x9d,0x2e,0x43,0x57,0x79,0x52,0xf6,0x1e,0x02,0xb8,0x9b,0xcd,0xf0,0x3d,0x57,0xa3,0x6f,0xf7,0x12,0x54,0x6c,0x63,0x0d,0xb2,0xba,0xff,0xa1,0xf6,0xf5,0xdf,0xa5,0xed +.byte 0xda,0xdf,0x56,0x72,0x1e,0xc5,0x3f,0xad,0xd0,0xf9,0x38,0x94,0x51,0xe3,0xa4,0xb4,0xbf,0xd5,0x24,0x2a,0x90,0xfe,0xd4,0x34,0x6c,0xa8,0xc8,0x1c,0x9a,0xaf,0xac,0xff,0x5b,0x67,0x44,0x4c,0x4d,0xa7,0x59,0x2c,0x9f,0x67,0x07,0x25,0xe1,0x7f,0x4e,0x4a,0xaa,0x8f,0x5d,0xd1,0x26,0x0d,0x73,0x9b,0x69,0x5d,0xdf,0xb2,0xa5,0x89,0xbb,0x82 +.byte 0x0b,0x09,0xf3,0x11,0x76,0x5d,0x2d,0xad,0xc3,0xc1,0x15,0xbc,0xaf,0xa2,0xe6,0xd5,0xb0,0x6d,0x80,0xa6,0xda,0xfa,0x3b,0x9c,0xaf,0xff,0x98,0x40,0x83,0x3a,0xe1,0xb8,0x98,0x0e,0x97,0x00,0x89,0xfb,0x37,0xcb,0x81,0x36,0x34,0x33,0xbb,0x5c,0xd0,0x51,0x37,0xd6,0xb5,0x6c,0x3a,0x61,0x0a,0x27,0x23,0x96,0xa9,0x79,0x8d,0xf0,0xbe,0x31 +.byte 0xba,0xdc,0x89,0x4e,0x88,0x98,0xe4,0x10,0x15,0x8a,0xe1,0xae,0xe8,0x6d,0xa4,0x61,0x56,0x14,0x84,0x59,0x64,0xc2,0xaa,0xd8,0xfd,0x19,0xfc,0x17,0xf1,0xfc,0x6d,0x17,0xcb,0xea,0x7a,0x47,0x00,0x75,0x17,0xf3,0x62,0xfe,0x3a,0xbc,0x28,0x1a,0x0e,0x88,0x48,0x63,0x4a,0xcb,0x20,0x46,0xa4,0x75,0xf8,0xf1,0x7a,0xd6,0x92,0x7f,0x92,0xfa +.byte 0x91,0x95,0x2f,0xbc,0x5b,0x42,0xf1,0x55,0xaf,0x91,0xa2,0x3b,0x29,0x5c,0xc8,0x5e,0x97,0x91,0xa2,0x2e,0xd2,0xa8,0x1c,0xf6,0x16,0xc5,0x15,0xf2,0x42,0xb3,0x41,0x59,0x52,0x8d,0x94,0x52,0xc4,0xc6,0x2c,0xdd,0x6f,0x01,0xea,0x62,0x42,0x83,0x7e,0x2e,0xf8,0xb8,0xc1,0xf3,0x71,0xd1,0x11,0x14,0x7a,0x3d,0xcd,0xec,0xe0,0x79,0x8b,0xbd +.byte 0x28,0x12,0x60,0xf0,0x66,0xf1,0x1c,0x1c,0x19,0x07,0x8c,0x26,0xff,0xcc,0x72,0x9a,0xbd,0x12,0xe6,0x2b,0x2b,0xb1,0x32,0x04,0x98,0x92,0xd9,0x24,0x97,0x59,0x46,0xc6,0x11,0xe1,0x31,0x14,0x46,0x27,0x96,0xb1,0x06,0x81,0xd5,0xe8,0xff,0x45,0x3d,0x3c,0x04,0x9a,0xd8,0x0b,0x1f,0x41,0x03,0xba,0x1b,0x3e,0x4e,0xd5,0x7d,0x48,0x00,0x68 +.byte 0xb3,0xe8,0xe0,0xc8,0x3c,0xcf,0xdc,0xbe,0x29,0x90,0x64,0x51,0x18,0xdc,0xcd,0x87,0xcb,0xa8,0x3d,0xf8,0xb4,0x73,0x11,0xdc,0x7a,0xcb,0xa4,0x81,0x9e,0x3a,0x72,0xde,0x18,0x36,0x86,0x15,0x91,0xbc,0xeb,0x7f,0xe2,0xfb,0x6b,0xf1,0x5a,0x3d,0x05,0x50,0xeb,0xcf,0xd2,0xcc,0xf2,0x62,0xb1,0x32,0x46,0x14,0x95,0x4e,0xdf,0x73,0x64,0x61 +.byte 0x5f,0x3d,0xbf,0x52,0x3e,0xa7,0x55,0x01,0x9a,0xd8,0x01,0xef,0xf7,0x60,0x6f,0x83,0x43,0x6b,0x4c,0xa2,0xc8,0x04,0x34,0x70,0x70,0xa1,0x99,0xc9,0xa7,0x54,0x1e,0x87,0x99,0xb3,0xec,0xfe,0xe9,0x2d,0x39,0xef,0x6f,0x4d,0x8c,0xf2,0x4b,0xd2,0x12,0x5d,0xb6,0xa7,0x0b,0x04,0x3b,0x69,0xdd,0x9a,0x18,0x2d,0xd9,0x22,0x00,0x38,0x15,0x9a +.byte 0x6e,0x6c,0x0c,0x84,0x32,0x32,0xb2,0xf9,0x61,0xef,0x74,0x35,0xec,0xcc,0xd7,0xbc,0x9d,0xe9,0xcd,0xe3,0xa0,0xa5,0x15,0x0a,0xfe,0x1f,0x37,0x35,0x2b,0x7c,0x42,0x50,0x81,0x67,0x52,0xb7,0xa7,0x9e,0x8f,0xda,0x64,0xc0,0xc0,0xc3,0x93,0xc7,0x9d,0x41,0xb8,0x4b,0x69,0x80,0x13,0x88,0x8a,0x07,0xf9,0x47,0xad,0xc9,0x4f,0x3d,0xc7,0xba +.byte 0xd2,0xf2,0x7a,0xa0,0x38,0xbe,0xe1,0xfa,0x83,0xda,0x79,0x29,0x7f,0x4c,0xfa,0x0e,0x9b,0x59,0x1e,0x89,0x76,0x05,0x60,0x84,0x13,0x63,0x11,0x14,0x20,0xa9,0x2b,0xd0,0xc3,0x58,0xcc,0x73,0x3e,0x2c,0xa8,0xa7,0xa5,0xd0,0x2f,0x03,0xfc,0xa9,0x5d,0xdd,0xcd,0x40,0x91,0x90,0x1f,0xda,0x0a,0x73,0x58,0xd8,0x84,0x05,0x45,0x01,0x84,0x52 +.byte 0x8b,0x9b,0x17,0x98,0xa8,0xc4,0xc3,0xb5,0x94,0xd5,0x32,0x86,0xe9,0x10,0xe5,0xa5,0x99,0x8d,0x57,0x3e,0x32,0x25,0xfa,0xb4,0x5c,0x3a,0x5f,0xa6,0x2d,0x7d,0x4e,0xd3,0x7b,0xee,0x41,0x23,0x5e,0xc2,0xc9,0x91,0xf4,0x21,0xe0,0x4f,0x0d,0x87,0x30,0x53,0xf1,0x0e,0x63,0xe8,0x5b,0x3d,0xee,0x4a,0xc8,0x78,0x38,0xa2,0xa4,0xe8,0x72,0x41 +.byte 0xf1,0x37,0x30,0xe3,0x3d,0x93,0xc6,0x4b,0x10,0x0d,0xf6,0x20,0x15,0x0a,0x77,0x41,0xd5,0x7d,0xcb,0xf9,0xda,0x3b,0x17,0xa6,0xf1,0xe4,0x56,0xd4,0x65,0x7b,0x33,0xe4,0xef,0x34,0xfb,0x8c,0x9f,0x87,0x86,0xfc,0xce,0x90,0x60,0x77,0x57,0xc0,0xe4,0x37,0x2c,0xdf,0x41,0x95,0x85,0x89,0x4e,0x77,0x3f,0xa0,0xc7,0x55,0x4c,0x3f,0xa8,0x10 +.byte 0xd2,0x87,0x7e,0xd2,0x97,0xa1,0x6c,0xe7,0xec,0xaa,0xf6,0x93,0x13,0x2e,0x10,0xed,0x5b,0x7a,0xed,0x53,0xb4,0x55,0xaa,0xb4,0x67,0x78,0x07,0x5f,0xc2,0xd2,0xf1,0x7b,0x98,0xf0,0x82,0xf6,0x7c,0xb2,0xd4,0xa8,0xc2,0x53,0x39,0x21,0x7f,0xa0,0x76,0x37,0x1a,0x69,0xb3,0x49,0xd4,0xc3,0xd1,0xcb,0x31,0x76,0xec,0xaf,0x75,0x66,0x31,0x65 +.byte 0xeb,0x44,0x63,0xa0,0x13,0xf5,0x9e,0x67,0x40,0x41,0x76,0xce,0xd3,0xd6,0x91,0xb1,0x3a,0x07,0xff,0x38,0x1e,0xaf,0x55,0x57,0x55,0xd1,0x94,0x63,0xd3,0x81,0x16,0x59,0x68,0x01,0xe8,0x6d,0x7d,0x7a,0xa1,0x39,0xb9,0xa2,0xba,0x79,0x9d,0x69,0x00,0x13,0x59,0x2f,0x3d,0xef,0x10,0xe7,0x3c,0x02,0x7d,0xa3,0xa8,0xee,0x31,0x1a,0xad,0xa6 +.byte 0xdb,0x1b,0xe3,0x4a,0xdd,0x60,0xfb,0x4e,0xa6,0x49,0xbb,0xea,0x34,0x5d,0x21,0xac,0x83,0xa4,0xb5,0x23,0x8e,0x69,0xb3,0x25,0x14,0x8d,0xc2,0x89,0x8d,0xcf,0x38,0x46,0x18,0xb6,0x0c,0xce,0x45,0x22,0xeb,0xb5,0xb2,0xed,0xe5,0x0f,0x35,0x8f,0xdd,0xa1,0x15,0xd6,0x50,0x5b,0xe1,0x04,0xa7,0x32,0xc0,0xc9,0x03,0x56,0xc2,0x33,0xe8,0x16 +.byte 0x1c,0xd4,0x7a,0xfd,0x6b,0x4d,0x04,0xc0,0x9e,0xf8,0x32,0x9f,0x52,0x24,0xac,0xc5,0xb0,0xa1,0x63,0x77,0xc9,0x14,0xaf,0x46,0x60,0x67,0x52,0x81,0xbb,0x3f,0xf5,0x7f,0xad,0xef,0x7c,0x3a,0x71,0xc1,0x1e,0xea,0x4a,0xe0,0xd7,0xdd,0x31,0xf2,0x4b,0xdf,0x53,0x8a,0xc9,0x59,0x7a,0xb2,0x6f,0x7e,0xc0,0x00,0xa4,0x0d,0x09,0x9c,0xf7,0x22 +.byte 0x22,0xa9,0x37,0xde,0x3b,0xe1,0x74,0x85,0xcf,0xc5,0xb7,0x7b,0x0a,0xfd,0x6b,0xfa,0x98,0x49,0xa9,0x7f,0x52,0x23,0x0e,0xc0,0x4a,0xb3,0x81,0xa6,0x96,0x46,0x24,0xe7,0x01,0xd1,0xf2,0xac,0x31,0xb2,0x5e,0x61,0xe3,0xab,0xf8,0x1b,0x28,0xca,0xa2,0x78,0x3c,0xdf,0x8a,0xc1,0x17,0x46,0x9d,0xbd,0x69,0x31,0x41,0x8b,0xc1,0xc8,0xaa,0x68 +.byte 0xd5,0x35,0x65,0x49,0xfe,0xc6,0xa4,0x99,0xcc,0x62,0x4b,0x81,0x1c,0x21,0xa4,0xd8,0xe3,0xb3,0xe9,0x7c,0xf8,0x33,0x2f,0x21,0xa5,0x88,0xf2,0x8e,0x7d,0xee,0x00,0x00,0x62,0xcf,0x07,0x37,0x00,0x68,0x6c,0xb5,0x2d,0xc6,0x1b,0xcc,0x86,0x71,0xf0,0x4f,0x68,0xaf,0x0c,0x9a,0x25,0x69,0x71,0x2d,0xb5,0x87,0x90,0x02,0xd3,0xfc,0xbb,0x63 +.byte 0xa9,0xf1,0x13,0x4f,0xda,0x71,0x69,0x5c,0x0b,0xfd,0x3f,0x6c,0x2f,0x0b,0x4f,0x07,0x72,0x2d,0x2f,0x77,0xcb,0xa4,0xe4,0xbd,0x30,0xc7,0xe4,0xd9,0xf9,0x5d,0x2f,0x65,0xe4,0x41,0x5c,0xbc,0x03,0xa2,0x01,0xf9,0xfa,0x06,0x14,0x52,0x08,0x44,0x67,0x75,0x4e,0xbd,0x66,0x4a,0x26,0x3a,0x49,0xc4,0xba,0x02,0xb3,0x8e,0xa2,0x42,0xe7,0x92 +.byte 0x03,0x6d,0x61,0x10,0x73,0xd0,0x6f,0xe1,0x6e,0x67,0xff,0xb0,0x29,0x62,0x70,0x3c,0xeb,0x80,0xed,0x11,0x06,0xd6,0x18,0x60,0xe1,0x3d,0x21,0xa9,0xe9,0xd2,0x92,0x00,0x9e,0x13,0xf2,0x5d,0x38,0x71,0xdf,0xf3,0x5f,0x8a,0x90,0x45,0xf0,0x47,0x1f,0x0b,0x2d,0x12,0xf7,0x10,0x07,0x6a,0x52,0xe8,0xe2,0x26,0x9b,0x4b,0x7a,0x5f,0x97,0xb6 +.byte 0xf1,0x6d,0x47,0x3a,0x1e,0xc8,0x1d,0x78,0x5b,0x0a,0xb8,0x03,0xb1,0xe1,0xe7,0xc8,0xf0,0xe7,0x00,0xac,0xfc,0xd7,0x4a,0xde,0xaa,0xcd,0x0f,0xaf,0xf7,0x56,0x8e,0xed,0xfb,0xbe,0x7e,0xfe,0x62,0x75,0x7a,0x07,0x96,0xff,0xc3,0x21,0x35,0x71,0xb9,0x73,0x41,0xc2,0xb0,0xa8,0x6a,0x65,0x48,0xc4,0x50,0x31,0xe2,0xba,0xf4,0xe9,0x6c,0x03 +.byte 0x26,0x2c,0x77,0xfe,0x1a,0xd5,0x96,0xf6,0x6d,0xe4,0x14,0xfc,0xe2,0x1d,0x20,0x0c,0x14,0xa2,0x39,0x63,0xe5,0x16,0xef,0x6a,0xeb,0xe1,0x69,0xb8,0x67,0xa0,0x91,0xc1,0x8f,0xed,0xff,0xdf,0x26,0x1f,0xc3,0xb7,0x5d,0xe9,0xd2,0x72,0xe2,0x54,0x27,0x46,0x4f,0x33,0x25,0x59,0xaf,0xfa,0x87,0x4b,0x5a,0xda,0x7d,0x15,0x71,0x5d,0xb4,0x8d +.byte 0x95,0xb6,0x09,0x5b,0x8b,0xeb,0xe6,0xba,0xc8,0x2f,0x8f,0x9e,0xa8,0xab,0x6a,0xa6,0x26,0xb6,0xf5,0x80,0xd0,0x7d,0xe7,0x4c,0x18,0x5a,0x72,0x8f,0x3e,0x90,0xe5,0xa1,0x16,0x33,0x66,0xc3,0x7b,0xf6,0xb6,0xdd,0x15,0x94,0x6d,0xca,0x8b,0xd7,0xa5,0x05,0xfb,0x5f,0x4e,0x94,0x6a,0xcc,0x54,0xed,0xeb,0xc0,0xb1,0xe1,0xc9,0x7f,0xc4,0x90 +.byte 0x2f,0x50,0x34,0x81,0x3c,0x83,0x47,0x3c,0x5a,0xb2,0x33,0x63,0xb6,0xa7,0xfb,0x59,0x70,0x87,0xea,0x7f,0x30,0x22,0xb4,0x54,0x48,0xfb,0x40,0xd2,0x7b,0xc9,0x49,0x80,0x18,0x27,0xc2,0x75,0x09,0x06,0x0a,0x83,0x1e,0x7a,0xf1,0x97,0xa1,0xc2,0x34,0x3f,0x6d,0xd6,0x2d,0xfe,0x5d,0x8b,0xfd,0x64,0x5d,0x6f,0x7f,0xbf,0x4e,0x01,0xb7,0x46 +.byte 0xfb,0xf7,0xd5,0x6f,0x5f,0x74,0xc8,0xca,0x9a,0x2e,0x74,0x08,0xe9,0x3d,0x8b,0xfd,0x97,0x38,0x72,0x67,0xbb,0x8a,0x34,0xee,0xf5,0x3a,0x2b,0x5e,0x64,0x64,0x06,0x7c,0x60,0x0f,0x7a,0x88,0x45,0x1b,0x69,0x90,0xb8,0xb0,0x4d,0x71,0x80,0x77,0xa8,0xaa,0x9f,0xd3,0xc6,0xfb,0xb8,0x12,0x1e,0x0c,0xf4,0x94,0x67,0x44,0xdc,0xb1,0x95,0x0e +.byte 0x51,0xd1,0x06,0x69,0x92,0xbf,0xe6,0x67,0xe3,0xcd,0x0b,0x87,0x03,0x12,0x2e,0xa7,0x23,0x72,0x13,0xe9,0x89,0xcf,0x15,0x43,0xc0,0xa7,0x68,0xbd,0xce,0xec,0x28,0xb6,0x85,0x36,0xbe,0x52,0x5d,0x57,0xfa,0x7d,0x72,0xd1,0x4b,0x88,0xc9,0x64,0xbc,0x7a,0x18,0xe5,0x0e,0xab,0x19,0x81,0xee,0x11,0xbe,0xe0,0x68,0x44,0x81,0x49,0x3f,0xd8 +.byte 0x12,0xd1,0x8b,0xc1,0xe0,0x51,0xf7,0xc3,0x64,0xa7,0xc5,0x61,0x9b,0x32,0x6d,0xf0,0x6c,0xa6,0xaf,0xf9,0x4a,0xdf,0x94,0xaf,0xc8,0xf2,0x86,0xb1,0x4e,0x2e,0xa9,0xb4,0x35,0x82,0x15,0x8a,0x58,0xf3,0x03,0x2f,0x78,0x07,0x8f,0xb9,0x16,0x7c,0x42,0xfa,0x36,0xaa,0xa5,0x66,0x62,0x44,0xca,0xa6,0x55,0x95,0x27,0xdb,0x48,0xea,0x0a,0x1d +.byte 0x5a,0xae,0x5c,0xad,0x99,0xfe,0x00,0xf1,0xb9,0x94,0xda,0x09,0x48,0x52,0x9d,0xfc,0xb4,0xb2,0x80,0x19,0x16,0xf8,0xcd,0x68,0x10,0xec,0x1c,0x16,0x3f,0xbb,0x42,0xb4,0x10,0xe3,0xdb,0xaa,0xe4,0x3f,0x2e,0x8e,0xb5,0xce,0xba,0x8f,0xf2,0xb5,0x76,0x98,0x15,0xa7,0x77,0x4b,0x1c,0x30,0xb7,0x6f,0xc9,0xa9,0xa4,0x64,0x59,0xab,0x3a,0x43 +.byte 0x74,0x33,0xab,0xe1,0x3e,0x5e,0x79,0x1c,0xa5,0xb4,0x87,0xe1,0xcb,0xea,0x0e,0x02,0x4b,0x01,0x84,0xbc,0xdc,0x75,0xf4,0x2c,0x2b,0x8d,0xc8,0x5f,0xb5,0xba,0x6b,0xb2,0x4a,0x7c,0xe7,0xaa,0x61,0xa5,0x0c,0xf8,0x02,0x73,0xec,0x11,0x13,0x6b,0x31,0x07,0xaa,0x79,0x78,0x86,0x01,0x77,0x5e,0xa3,0x09,0xd1,0xec,0xaf,0x7d,0xb7,0x65,0xa9 +.byte 0xd8,0x99,0xd2,0xd7,0x6d,0x32,0x97,0x0f,0x0e,0x51,0x0d,0x69,0x81,0x7a,0x94,0x48,0x31,0xe1,0xff,0x26,0x4d,0x30,0x49,0x93,0xfb,0x6e,0xdb,0xea,0xaf,0xcb,0xb4,0xa9,0xc9,0x9f,0xeb,0xca,0x52,0x36,0x26,0xac,0x47,0xda,0x02,0x3d,0xd0,0x93,0x8b,0x61,0x78,0x26,0x54,0x32,0xe8,0x14,0xac,0xf3,0xd2,0x46,0x04,0x12,0x89,0x9f,0xf6,0x11 +.byte 0xf5,0x64,0x83,0x66,0x00,0x50,0x55,0x05,0xb5,0xf6,0x58,0x9f,0xbf,0x4b,0x95,0xf1,0x7f,0x0b,0xb4,0xf7,0x63,0xea,0x6f,0xf7,0xb0,0x20,0x53,0xfe,0x95,0xbc,0xc4,0xe2,0xff,0x75,0xbd,0xab,0x73,0x68,0x44,0x18,0xf7,0x6b,0x04,0x46,0xde,0x6c,0x65,0xb2,0x22,0x4e,0x25,0x8e,0xba,0x7c,0x3a,0x6f,0x80,0x99,0xb4,0xe7,0xf9,0x97,0x68,0x40 +.byte 0xa9,0x96,0xfc,0x6b,0xcf,0x08,0x75,0xe4,0xda,0x6f,0xaf,0x71,0x4f,0x31,0x62,0x31,0x18,0xbf,0xb9,0xa0,0xcc,0x9e,0xa7,0xa2,0x27,0x2a,0xb8,0x6b,0xc0,0x93,0xf5,0x1f,0x41,0x25,0xa7,0x4d,0x9f,0xb4,0x12,0x5c,0x27,0x38,0x5d,0x80,0x88,0xa3,0xb8,0xb2,0xc3,0xd2,0xfb,0x1d,0xba,0x7b,0xac,0x51,0x0b,0x71,0x58,0x3f,0xe5,0xfa,0x36,0xb8 +.byte 0xc7,0x90,0x46,0xd0,0x5a,0x94,0xf0,0x7d,0x6e,0x6c,0x4c,0xb1,0xfa,0xdb,0x97,0x1e,0x19,0xf2,0x1f,0x4e,0x05,0x25,0x0e,0xbd,0x47,0x94,0x2a,0xd3,0x1a,0xbe,0x4a,0x04,0xaa,0x57,0x02,0xc9,0x42,0xc1,0x74,0xcd,0xe1,0x78,0x8b,0xff,0xc1,0xc6,0x17,0x4e,0x71,0xc4,0x2c,0x00,0x23,0x56,0x57,0x1f,0x47,0xd8,0x93,0x80,0xc1,0xc5,0x7b,0xd9 +.byte 0x25,0x30,0xac,0x72,0x37,0x00,0xd2,0xbc,0xc7,0x33,0x73,0xf9,0x14,0x86,0x7c,0xb0,0x28,0x14,0x5d,0xbf,0xbd,0x98,0x1c,0x00,0x05,0x19,0x2b,0x0a,0x55,0xad,0xb4,0x06,0x28,0x58,0x03,0xa1,0xe6,0x27,0xa3,0x32,0x5f,0x41,0xd5,0x6a,0x0b,0xbc,0x0f,0xaa,0xf5,0xc1,0xa7,0x09,0x2f,0x86,0xda,0x56,0xb0,0x04,0x49,0xd4,0x20,0xc6,0xa2,0x6c +.byte 0x27,0x56,0x4e,0xcd,0x22,0x46,0xac,0x0f,0xd3,0x99,0x69,0x83,0xc4,0xae,0x9f,0x88,0xed,0x9c,0xba,0xfb,0xf3,0x66,0xc7,0x3d,0x65,0x55,0xd0,0xe3,0x04,0x03,0x6a,0x02,0x5c,0xbf,0x9f,0x23,0x34,0x79,0xe1,0xbe,0x7d,0xad,0xb4,0xc7,0x9e,0x4d,0x80,0x73,0x6d,0xe5,0x37,0x03,0xac,0xa3,0xf4,0x93,0xad,0x1e,0xf3,0xcd,0xb8,0xe2,0xeb,0x30 +.byte 0xc7,0x50,0xfe,0x0a,0x63,0x5e,0x0f,0xc9,0xd0,0x06,0x58,0xc1,0x6e,0x65,0x54,0x54,0x5d,0xaf,0xf1,0xe8,0x3e,0x95,0xe3,0x70,0x40,0x8e,0xb8,0x4d,0x76,0xda,0xa8,0xe8,0x9e,0x88,0xd8,0xaf,0x67,0x83,0x3b,0x77,0x65,0x58,0x00,0xbb,0xf7,0xe9,0x52,0xf0,0xba,0x0d,0x0a,0x59,0x28,0xe4,0xa7,0xfb,0x06,0xe5,0x34,0xbe,0xcf,0x10,0x7c,0x73 +.byte 0xa8,0xf3,0xa2,0x93,0x96,0x9e,0x4f,0x9b,0x3c,0xd1,0x9f,0x64,0x5b,0x8c,0xc1,0x89,0x66,0x67,0x13,0x52,0xb2,0xaa,0x6b,0x8e,0xea,0x97,0x27,0x20,0x2e,0x64,0xec,0xf0,0x72,0xc9,0x54,0x8a,0xed,0x78,0x3a,0xd7,0x4f,0xc2,0xba,0xc3,0xb8,0x64,0x7f,0xe4,0x5f,0x3d,0xf7,0xe5,0xd9,0xf1,0x8d,0xb1,0xd2,0xf6,0xcc,0x34,0xd8,0x7d,0x16,0xca +.byte 0x47,0xaf,0x85,0xe5,0x4a,0x57,0xb9,0x5a,0x9e,0xff,0xb8,0x83,0xec,0x7c,0xb8,0x07,0xf5,0xd3,0x31,0x31,0x2b,0xf0,0x40,0x46,0xc3,0x63,0x27,0xe4,0xb0,0x3b,0x84,0x0d,0x50,0x05,0x80,0x0c,0xfa,0x8b,0x0e,0x33,0x6b,0x10,0xd4,0xf5,0x4f,0x8b,0x2d,0x9e,0xc5,0x01,0x92,0x52,0x62,0x1a,0x89,0x1e,0xca,0x48,0xc3,0xd6,0xfa,0xd2,0x94,0x7c +.byte 0x77,0x6e,0xa7,0xeb,0xd7,0x4f,0xe8,0xc8,0xc2,0x71,0xb2,0x9e,0x86,0x30,0x18,0xfd,0x4c,0x56,0x4c,0xd0,0xa4,0x84,0x37,0x02,0x02,0x6a,0x8d,0x57,0x6b,0xc2,0x06,0xd1,0x8a,0xdb,0xa0,0xcc,0x31,0xf9,0xcf,0xbf,0xf2,0x29,0x7c,0x26,0xac,0x1f,0x03,0x20,0x26,0x76,0x03,0x6f,0xa5,0xb5,0x33,0xfb,0x02,0xe8,0xf6,0xe9,0x5e,0xb1,0x36,0x7c +.byte 0x96,0x56,0xb1,0x98,0x2d,0x9c,0x38,0x9b,0xd4,0x56,0x28,0xcc,0xdb,0x08,0xd3,0x42,0x00,0x35,0x24,0xd9,0x74,0xa2,0x0d,0x55,0x21,0x06,0xb7,0xf9,0x6a,0xa0,0x81,0xc1,0x2d,0xb6,0x67,0x91,0x92,0x24,0x36,0xfd,0x2e,0xd8,0xc0,0xcb,0xc8,0x87,0x1a,0x41,0x11,0x70,0xbf,0xd2,0xe7,0x82,0x10,0x74,0xdf,0x65,0x46,0x19,0x6b,0xb4,0x89,0xeb +.byte 0x9e,0xcf,0x79,0x35,0xba,0x25,0x75,0x32,0x64,0x6a,0xfb,0xaf,0xe5,0xed,0x85,0x98,0x34,0x75,0x31,0x40,0xbb,0xd8,0xe3,0xf5,0xa7,0xa2,0x9a,0x9e,0xcd,0xc4,0xf8,0xd8,0x15,0x6c,0x64,0x0c,0x6c,0x16,0x60,0xe9,0x40,0xf4,0x7a,0x14,0x37,0x7b,0x45,0x9b,0x0e,0x29,0x7a,0x1a,0x88,0x10,0xb9,0x2b,0xee,0x13,0xbd,0x8a,0xde,0x7a,0xe9,0x30 +.byte 0xe8,0x39,0x77,0x74,0xf5,0x2f,0xe3,0x10,0x19,0x89,0x28,0x21,0x3a,0x68,0x38,0xb4,0x4d,0x20,0x8d,0x7d,0xec,0x3f,0xf7,0x61,0xbf,0x53,0x32,0x3b,0xb8,0x6a,0xc9,0x58,0xeb,0xd4,0x33,0x0e,0xee,0xc7,0xb9,0x5e,0x3d,0x17,0x7e,0x36,0xa2,0xa6,0x94,0xb1,0x56,0xb6,0x8e,0x94,0x05,0x50,0x69,0x52,0x4f,0x31,0xe5,0x97,0x18,0xde,0x8f,0xb7 +.byte 0xff,0x2e,0x6f,0x1b,0x6a,0xda,0xfd,0xa1,0xd1,0x9a,0x4e,0x6a,0x1b,0x46,0x71,0x52,0x76,0x66,0xf9,0x70,0x8d,0x7d,0x97,0xb0,0xc3,0x8d,0xbc,0x35,0x26,0xe8,0x0b,0x80,0xc7,0x58,0x19,0x22,0x70,0x33,0x06,0xeb,0xcf,0x26,0x22,0xe0,0x97,0x91,0xbf,0xd6,0x94,0x05,0xe1,0x84,0xe2,0x31,0x66,0x57,0xc7,0x1e,0x36,0x30,0x50,0xaf,0x72,0xb3 +.byte 0x31,0xad,0x84,0xcc,0xb5,0x76,0x03,0xe1,0x56,0x97,0x87,0x36,0xf5,0xaa,0x97,0x99,0x38,0xa5,0xf5,0xb7,0x42,0x86,0x3b,0x2f,0x8a,0xb9,0x8e,0x6a,0x0b,0xe0,0xca,0xbc,0x4c,0x6c,0xc1,0x3f,0xbe,0x45,0xef,0xd2,0x57,0xcd,0x29,0xfb,0xfb,0xa5,0x79,0xf2,0xb1,0xbb,0x4b,0x55,0x26,0x2f,0x5c,0x84,0x5e,0x6a,0xc6,0xa9,0xd5,0x23,0xe4,0xd1 +.byte 0xe5,0xf0,0xbc,0x50,0x6a,0x2a,0xaf,0xa2,0x7c,0xcc,0x36,0x95,0xf9,0x5c,0x04,0x6d,0x04,0x31,0xbe,0x1d,0xb2,0x50,0x97,0x8f,0xdf,0x8a,0xed,0x4e,0x4e,0x0a,0x0b,0xfc,0xfc,0x1d,0xa9,0x6a,0x76,0x6a,0x33,0xd7,0x0a,0xcf,0xd5,0xdd,0xc6,0x62,0xe5,0x59,0x02,0xba,0x9c,0x43,0x32,0x8a,0x0e,0x47,0x91,0x00,0x07,0x47,0x93,0xc4,0xad,0x29 +.byte 0x33,0x57,0x15,0x45,0x44,0xb9,0xf3,0xc4,0xe6,0xd2,0xb9,0x3a,0x44,0x16,0x32,0x8d,0x57,0x78,0xac,0xf5,0xdb,0xa2,0x93,0x97,0x64,0x08,0x9b,0x66,0x4b,0xa0,0x64,0xab,0xa0,0xd6,0x0e,0x2c,0xa1,0x25,0x16,0x5c,0x6f,0x82,0xff,0x8e,0x89,0xfb,0xca,0x03,0xa6,0xf8,0xa1,0xf6,0x87,0x02,0x5c,0x90,0xcb,0x33,0xa0,0xc0,0x90,0xc2,0x1f,0xdd +.byte 0x5c,0x50,0x93,0xf2,0x8b,0x87,0xa1,0x73,0xda,0x5f,0xa3,0x20,0xd4,0xe7,0x45,0xd7,0xea,0x4b,0x5d,0xd6,0x80,0xfc,0x2d,0xdc,0x45,0x6a,0xf6,0xaf,0xd4,0x7a,0x91,0x64,0x15,0x17,0xbf,0xc7,0x58,0x54,0x7c,0x08,0x42,0x4f,0x8d,0xab,0x9b,0xd0,0x1d,0x57,0x71,0x50,0xa7,0xe3,0xb4,0xf2,0x14,0x0c,0xd7,0x2f,0x7c,0x8b,0x17,0x61,0x98,0xfa +.byte 0x19,0x34,0xb9,0x65,0xc5,0x5c,0xfe,0xa3,0x80,0x6f,0x99,0xec,0xfa,0x06,0x22,0x71,0xa9,0x10,0x2a,0xcf,0x12,0xb3,0x17,0xe5,0x59,0x3a,0xaa,0xcb,0x55,0x5f,0x45,0x9d,0xe9,0x29,0x56,0x34,0x11,0x62,0x6e,0x0a,0x95,0x12,0x5d,0xd4,0xa2,0x28,0x05,0xf1,0x0f,0x2d,0xa0,0x1e,0xe1,0x2b,0x42,0x6c,0xf0,0xe6,0x47,0xe0,0xb2,0xbd,0x89,0x20 +.byte 0x5e,0x24,0x05,0xec,0xf1,0x33,0xfc,0xa9,0x2f,0xef,0x3a,0x1f,0xfe,0x39,0xfe,0x01,0x09,0x0a,0x2a,0xe0,0x96,0x1e,0xde,0xad,0x96,0xaa,0x48,0xeb,0x8a,0xe6,0x54,0xbb,0x5d,0x7a,0xbe,0x4a,0xbf,0x96,0xf6,0x15,0x7a,0x70,0x6f,0xee,0xe7,0xf5,0x53,0xaf,0xe1,0xbb,0xaf,0x58,0x51,0xd4,0xa0,0xc6,0x44,0x03,0x47,0x33,0xce,0x58,0x62,0xd3 +.byte 0x93,0x21,0xa5,0xa5,0xb4,0xef,0x1d,0x93,0xcc,0x8c,0xf7,0x14,0xe3,0xec,0x40,0x52,0x47,0xe6,0xbc,0xe6,0x85,0x69,0xd0,0x15,0xad,0x24,0x21,0x4f,0x26,0x01,0x60,0x0f,0x0f,0xcb,0x7e,0x14,0x01,0xe1,0x90,0x11,0x06,0x17,0x38,0x2d,0xd8,0x26,0xe2,0x7c,0xd6,0xef,0xe0,0x59,0xf0,0x8c,0x2a,0xbd,0xba,0xe5,0x8b,0x07,0x56,0xd3,0x35,0xb3 +.byte 0x64,0x83,0x9e,0xb9,0xb9,0xeb,0x88,0x03,0xff,0x14,0xf3,0x8b,0x14,0xd3,0xa4,0xac,0x08,0xd9,0x75,0xf6,0x2c,0x9d,0x7f,0xc8,0x9d,0x11,0x3b,0xd1,0x71,0x14,0x4b,0x2a,0x6d,0x20,0x83,0x32,0x35,0x7e,0x1f,0x20,0xa6,0x69,0xbf,0xcf,0x22,0xd9,0xa2,0x57,0x4b,0x66,0xb1,0x9f,0x5a,0xa8,0xaa,0xb8,0x11,0x1d,0x45,0x28,0xac,0x86,0x09,0x37 +.byte 0xe9,0x1f,0xef,0xb4,0xe0,0x6f,0x75,0xad,0xe5,0xd8,0x25,0x06,0x19,0xb4,0xa8,0x07,0x78,0x79,0x43,0x63,0x40,0x26,0xbd,0x28,0x50,0x2d,0x29,0x26,0xf9,0xfc,0x5c,0x71,0x8f,0xfd,0x62,0x12,0x7c,0xd0,0x67,0xb3,0x65,0xef,0x31,0xc0,0x99,0xc1,0x54,0xfc,0x32,0x6e,0x25,0x56,0x77,0x6e,0xc1,0x6b,0x11,0x50,0x7c,0xa1,0x0b,0x97,0x8a,0xfe +.byte 0x0f,0x5b,0x16,0x93,0x83,0xe0,0xd8,0xb7,0xbf,0xa8,0x90,0x6d,0xd6,0x8b,0x4b,0xd9,0x17,0xbb,0xe8,0xd9,0xbb,0x5f,0x39,0x4a,0x33,0x7c,0xb3,0x12,0x99,0x1e,0xfc,0xb2,0x05,0x91,0x67,0xdf,0x8d,0x0b,0x55,0xfb,0xd1,0x8d,0x0c,0x9b,0x80,0x81,0xee,0x8c,0x05,0xe2,0x16,0x30,0xad,0x1f,0x88,0x04,0x75,0xc1,0xe5,0xec,0x32,0xf8,0xa0,0x5b +.byte 0x21,0xf6,0xd8,0x13,0x26,0xe4,0xa1,0x32,0xa8,0x93,0x91,0x5d,0x33,0x45,0x83,0x72,0x52,0x59,0x23,0x84,0xf6,0x7b,0xe2,0x90,0x20,0xc6,0x40,0x33,0xa9,0x94,0xcd,0xb9,0xab,0xe4,0x44,0x0b,0x06,0xbb,0x4c,0x2c,0x2a,0x5e,0x4d,0x57,0xb7,0xe0,0xb8,0x86,0x74,0xab,0xea,0x37,0x1c,0xa0,0xa6,0x21,0x33,0xc7,0xf5,0x24,0x7d,0x14,0xc8,0x8b +.byte 0x9d,0x8f,0x31,0x23,0x29,0x9d,0x11,0x42,0x07,0xe8,0x2c,0xec,0x7d,0x70,0x8d,0xb5,0xa4,0xca,0x33,0x30,0x03,0x75,0x17,0xa1,0x10,0xe7,0x6b,0x87,0xf9,0x0b,0xef,0x43,0xef,0xf8,0x24,0xc2,0xf1,0x7a,0x1a,0x70,0x7e,0x2f,0xd4,0xeb,0x97,0x40,0xa6,0xe6,0x2d,0xc1,0xd8,0x3b,0xee,0xa4,0xda,0xd3,0x50,0x41,0x18,0xbf,0xad,0x66,0x02,0x85 +.byte 0x60,0x14,0xcf,0xce,0x50,0x88,0x5e,0xb6,0x73,0x11,0xbb,0x6a,0xca,0xb1,0x46,0x8e,0xbb,0x58,0x2c,0x63,0x61,0x20,0xec,0xc9,0x98,0x0c,0xdb,0x5c,0xe5,0x47,0xb5,0x89,0xe9,0x14,0xc8,0xbc,0x35,0xf2,0xa7,0x2d,0x84,0xcc,0x61,0xc8,0xb6,0x9d,0xeb,0xcb,0x8b,0x73,0x90,0x6d,0x06,0xc9,0x42,0xcf,0xd2,0x15,0x80,0x2d,0x39,0xeb,0x71,0x83 +.byte 0x27,0x0d,0x85,0xf9,0xa3,0xce,0xef,0x29,0x3b,0x10,0xb7,0xe9,0xd0,0x86,0x6e,0x88,0x1e,0x3b,0xdd,0xaf,0x52,0xde,0xa2,0xa4,0x13,0x3c,0x1f,0xcb,0x84,0x74,0x12,0x04,0x91,0x40,0xb8,0x1b,0x15,0xfd,0xdb,0xe8,0x74,0xcc,0x4d,0x41,0xb5,0x5a,0x92,0xd3,0x71,0xf7,0x57,0xa5,0xf7,0x18,0x5a,0x57,0x36,0xde,0x8f,0xb2,0x81,0x59,0xc8,0x5c +.byte 0x22,0xcf,0xdc,0x7d,0xff,0x83,0xf2,0xad,0x8c,0x7b,0xd5,0x04,0xc4,0xb9,0x79,0x4a,0x12,0xa7,0xb1,0x7e,0x57,0xa5,0x6b,0x56,0x8a,0x11,0x96,0x57,0xde,0x35,0xdd,0xef,0x9b,0x03,0x41,0xde,0x61,0x5b,0x73,0x8c,0x6a,0x0c,0x6f,0xae,0x45,0x4b,0x56,0x4d,0xbe,0x8a,0x3f,0xdb,0x79,0x58,0x88,0xad,0xcb,0xfa,0x66,0x06,0x0e,0x74,0x21,0x1d +.byte 0xe1,0x94,0xd7,0x06,0xea,0x60,0xe2,0x7d,0x70,0xcf,0xa9,0x4f,0xe6,0x9b,0xba,0x19,0x71,0x69,0x94,0x66,0x5a,0xb8,0x49,0x0c,0xd1,0x9a,0xc4,0x5f,0xa7,0xf4,0x9e,0x3d,0x9e,0xc2,0xd8,0x0e,0xd2,0x6d,0xc6,0xc8,0x99,0xc3,0x5e,0x3b,0xb9,0xd8,0x48,0xc0,0x38,0x48,0x95,0x89,0xff,0x7e,0x1d,0x80,0x53,0xac,0x7b,0xd7,0xfc,0x6f,0x5d,0x25 +.byte 0x2f,0xcf,0x15,0xdb,0x1a,0x64,0xc1,0x16,0x91,0x65,0x84,0x99,0x0a,0xc1,0xbf,0x4d,0x11,0xa5,0x55,0x55,0x35,0x93,0x6f,0x47,0xf1,0x75,0xb8,0xb6,0x11,0x9d,0x6e,0x3b,0xd1,0x11,0x20,0xa2,0xa2,0x5c,0x33,0x85,0x09,0xb8,0x13,0xc9,0xdd,0xf2,0xd4,0x32,0x37,0xf2,0xef,0x47,0xfa,0x25,0x1a,0xcc,0xdf,0xf4,0xe4,0x2c,0x2c,0x7f,0x23,0xb6 +.byte 0xa8,0xd4,0x6a,0xd4,0xb4,0x06,0x2e,0xb0,0xaa,0xa1,0x18,0x8a,0x5c,0xc6,0xb2,0x4c,0x71,0x92,0x4a,0xdc,0x81,0x20,0x51,0x8d,0x3f,0x71,0x7d,0x8c,0x25,0x79,0x07,0x14,0xa9,0x7a,0x8b,0xda,0x00,0xfc,0x51,0xdb,0xa0,0x50,0x2b,0x15,0x39,0xf6,0xad,0xdc,0x9e,0x22,0x93,0x2f,0x43,0xd8,0x5c,0xa2,0x5e,0xfa,0x70,0x8c,0xe0,0x6b,0x0e,0x93 +.byte 0x6c,0x89,0xfe,0x22,0x4c,0xec,0xb0,0x7e,0xc1,0x06,0x69,0xf7,0x2f,0x3e,0xe5,0xa4,0x45,0x53,0xab,0x9c,0xf5,0x40,0x05,0x53,0x64,0xc6,0xa7,0xf9,0xc4,0xd6,0x89,0xd9,0x47,0x72,0x8e,0x42,0xf9,0x64,0x12,0xeb,0xd9,0x25,0xdc,0x4c,0xc6,0xea,0x9c,0x4b,0x93,0xb4,0xa2,0xa6,0xae,0x95,0xc1,0x84,0x75,0xc9,0x22,0xe3,0x22,0x81,0x31,0xd1 +.byte 0xfd,0x2e,0x91,0x4a,0xc3,0x00,0xa6,0x57,0xbb,0x89,0x9f,0x2d,0xc3,0x2e,0x1f,0xa2,0x47,0xc4,0xa3,0xcd,0x2b,0xc2,0x29,0xaf,0x89,0xce,0x2e,0x87,0x8e,0xd8,0xfc,0xee,0xab,0x8a,0xbd,0x2f,0xee,0xcf,0x94,0xe0,0x74,0x70,0x86,0x00,0x42,0x11,0x8b,0x6c,0x81,0xd4,0x82,0xf2,0x29,0x3e,0x9c,0x68,0x71,0xaa,0x20,0x0a,0x51,0x5d,0x80,0x4c +.byte 0xca,0x04,0x23,0x23,0xe2,0x69,0xb3,0xf5,0x65,0x98,0x19,0xee,0xa9,0x4d,0xd8,0xe0,0x06,0x4b,0x17,0xed,0xfa,0xf2,0xe3,0xd3,0x69,0x48,0xe4,0x4e,0xc0,0x5a,0x16,0x90,0xdb,0xb6,0x32,0x6e,0x6b,0xd7,0x7a,0xb6,0xd4,0x82,0xe4,0xcc,0x31,0x31,0x5c,0x18,0x84,0xef,0x75,0x9f,0xda,0xf6,0x62,0x2d,0x96,0x4d,0xa1,0x3c,0xb5,0x4a,0xbb,0xbf +.byte 0x9d,0xb3,0x33,0x00,0xc1,0x73,0xc5,0xb2,0xeb,0x85,0x74,0xb0,0x68,0xed,0x16,0x66,0x71,0xc9,0x7e,0x6f,0x74,0xa6,0xe7,0xed,0xf0,0xfa,0xab,0x41,0xdd,0x10,0xf9,0xff,0x4c,0xb6,0x4f,0x15,0xe3,0x77,0x31,0x17,0x5c,0x5a,0xef,0xb2,0xa9,0x44,0xbe,0x97,0xa9,0x75,0x5a,0xb7,0xe0,0x16,0x17,0x37,0x1b,0x71,0x03,0xb9,0xaa,0x7b,0x7b,0x52 +.byte 0x46,0x58,0x6b,0x9b,0x87,0x27,0xa6,0x8a,0x0e,0x84,0x03,0x45,0x95,0x04,0xf1,0x7e,0xb6,0xf6,0x79,0xd5,0x66,0x6d,0x50,0x8c,0x5a,0x67,0xe0,0xdd,0x69,0xd8,0x92,0x75,0x15,0xcb,0xa5,0x05,0xfe,0x7a,0xc1,0xd6,0x11,0x57,0x10,0xa3,0xc3,0xb6,0xe9,0xe3,0x97,0xa5,0x46,0xc9,0xe9,0x9b,0x68,0xb6,0x55,0x0b,0xf2,0x17,0x9d,0x0e,0x7f,0xd9 +.byte 0x26,0x0c,0x01,0xff,0x95,0xe1,0x05,0xb7,0xbf,0x0d,0x77,0x12,0x96,0x03,0x71,0x01,0xc9,0x98,0xb4,0x44,0x94,0xc0,0xad,0x3d,0xfc,0x6f,0xe5,0x0c,0xa4,0x65,0xd7,0xe7,0x76,0x7c,0xb8,0xa0,0x0a,0xcd,0xe8,0x01,0x26,0x8e,0x94,0xec,0x94,0x65,0x86,0xee,0x4d,0x3b,0xc5,0xb5,0x2e,0x51,0xb7,0xa9,0x68,0xcd,0x14,0x90,0xd8,0x36,0xfb,0x52 +.byte 0x04,0x52,0xb4,0xca,0x9b,0xbf,0xc6,0x94,0x28,0xc5,0x7e,0x27,0x73,0xae,0x6d,0xba,0xe7,0x56,0xce,0x2e,0x00,0xeb,0x36,0x19,0xd7,0x4f,0x20,0x5e,0xfd,0x0f,0xd4,0x4c,0x02,0xaf,0xdb,0x74,0xef,0xf0,0x73,0x1e,0x2a,0x1a,0xe7,0x3a,0xe0,0xa5,0x89,0xcf,0x1a,0x66,0xbd,0x72,0x65,0xb4,0xf4,0x86,0x33,0x44,0xee,0x35,0xf6,0x09,0xbe,0x13 +.byte 0x96,0x84,0x04,0x95,0x3f,0x35,0xbb,0x01,0x2c,0x78,0x25,0xe8,0x1e,0x46,0xdb,0xd9,0xb1,0xe8,0xfb,0x2b,0xa8,0x59,0x72,0x5f,0x91,0xd3,0x7c,0x21,0x95,0xa9,0x50,0xa2,0x45,0x6f,0x48,0x0c,0xf2,0x51,0x10,0x3c,0xcd,0xea,0xeb,0x5d,0xc7,0xf9,0x0e,0xae,0x1a,0x02,0x05,0x15,0x12,0x10,0xc0,0x35,0x12,0x97,0xcd,0x5b,0x61,0x4f,0xd1,0xd3 +.byte 0x5b,0xec,0x2b,0xa0,0x20,0x03,0x2b,0xf3,0xe6,0x71,0x23,0xca,0x1d,0x48,0x64,0x3f,0x7e,0x52,0x8b,0xf9,0x96,0x33,0x31,0xbc,0xbd,0x73,0x2f,0xa6,0x80,0xb8,0x0b,0x3a,0xd7,0xf8,0x05,0xf0,0x06,0xc7,0xa5,0xce,0x6a,0x6a,0x62,0xae,0x06,0x93,0xa4,0x5f,0x0b,0x5d,0x4d,0xb8,0xa4,0xfa,0x2e,0xfc,0xb6,0x58,0x8c,0x2a,0x46,0xa4,0x55,0x1f +.byte 0x9b,0x9b,0x13,0xdd,0x17,0x2a,0x3d,0x04,0x51,0xb6,0xbe,0x9c,0xca,0xf3,0x23,0xb6,0x7b,0x7a,0x92,0xb7,0x2f,0xf9,0x69,0x9a,0xee,0xb3,0xa1,0x60,0x56,0xcf,0x9d,0xab,0xfe,0x86,0x7a,0x41,0x94,0x15,0xbe,0xa3,0xa5,0x85,0x09,0xfb,0x7b,0x89,0xbd,0xc3,0x09,0x10,0xa6,0xfc,0x41,0x8e,0x57,0x27,0xdc,0x58,0xf4,0x01,0x7c,0x31,0x5e,0xca +.byte 0xaf,0x31,0x2f,0x98,0x8b,0xbe,0x19,0x16,0xa1,0x81,0x7e,0xb3,0xa9,0xc5,0x15,0xd2,0xad,0x51,0xa1,0x73,0x56,0xd3,0x6a,0x15,0x35,0xe3,0xb1,0xdb,0x83,0x4c,0xe2,0x85,0x8c,0x03,0x12,0xc4,0x64,0x69,0xc0,0x23,0x16,0x7b,0x68,0x46,0x44,0x22,0x84,0xa6,0xb5,0xe4,0x90,0x91,0xc1,0xdd,0x25,0x7c,0x54,0x0e,0xce,0x5b,0x11,0xe4,0x50,0x1c +.byte 0x3c,0x0d,0xc7,0xc1,0x0c,0x10,0x2d,0x8b,0xb7,0xde,0xe2,0x4f,0x7e,0x22,0x53,0xfc,0x07,0x55,0x19,0x14,0x3b,0x33,0xf5,0xf3,0xd8,0x7b,0x5e,0x40,0xa2,0x81,0x6d,0x40,0x0d,0x20,0x36,0x4b,0xa1,0x34,0x34,0xac,0x43,0x59,0xb5,0xb1,0x90,0x8b,0x48,0xcf,0x15,0x57,0x17,0x0e,0xd0,0xbf,0x28,0xcd,0xa4,0x77,0x4d,0xae,0x09,0x4c,0x67,0x51 +.byte 0x18,0xaa,0xb4,0xc9,0x35,0x41,0x0b,0x34,0x4d,0xb3,0xef,0x3f,0x46,0x97,0x6e,0xae,0x75,0xd7,0x6a,0x2b,0x22,0x9c,0xef,0x8e,0xaf,0x72,0xb0,0x14,0x90,0xbd,0x11,0x90,0xde,0x9a,0x02,0x8c,0x20,0xf5,0xc7,0x33,0x4d,0x94,0x88,0x9a,0x6c,0x18,0xb4,0xc0,0xa9,0x94,0x07,0x9a,0x4b,0x10,0x8f,0xe8,0x25,0xcd,0x9b,0xf5,0xfa,0x91,0x8a,0xc0 +.byte 0x93,0x61,0x1c,0x00,0xd1,0x34,0x9a,0x29,0xa3,0x35,0x38,0xe4,0xa7,0x9f,0xb6,0x88,0x0f,0xad,0x88,0x96,0xa0,0x73,0xe7,0x10,0xea,0x36,0xe8,0x88,0x6c,0x7f,0x03,0xbc,0xfe,0xe0,0xb2,0x4b,0x24,0x98,0xf6,0x73,0x6f,0xab,0x00,0x1e,0x26,0x83,0x0d,0x86,0x5b,0xa6,0x51,0x8f,0x5f,0xa9,0x8f,0xf4,0xa0,0x51,0xff,0xe0,0x64,0x09,0x95,0xfb +.byte 0x56,0x53,0x18,0x61,0xea,0xc5,0x33,0xe8,0x6f,0x8a,0x07,0x97,0x1a,0x6c,0xb5,0xf8,0x73,0xae,0xe4,0x4e,0x6d,0xb2,0x83,0x20,0xfa,0xfd,0x79,0xa6,0x6c,0xaa,0x9b,0x7b,0x2c,0xfe,0x63,0x73,0xbc,0x87,0xd4,0x56,0xd1,0xb1,0xf1,0x0f,0x72,0x2c,0x2f,0xf0,0xf0,0x53,0xe2,0x6c,0x19,0x0d,0x9c,0xad,0xc8,0x0a,0x62,0x72,0xcb,0xc3,0x12,0x90 +.byte 0x4c,0x26,0xe3,0xa0,0x07,0x35,0xee,0xaf,0x81,0x35,0x07,0xa9,0x31,0xa0,0x59,0xc8,0x40,0xa5,0x45,0xb6,0x6d,0x3e,0xa2,0x5f,0x6a,0x79,0x74,0x65,0xa1,0xe3,0x1c,0xca,0xae,0xcc,0xa6,0xb6,0x0a,0x12,0x99,0x8e,0xc3,0xef,0x43,0xcf,0x42,0x92,0xa4,0x12,0xa3,0x8b,0x97,0x7d,0x6f,0xe0,0x35,0xed,0xac,0x69,0xae,0x8c,0xe1,0x32,0x11,0xa4 +.byte 0xe0,0x76,0x7f,0x75,0x92,0xda,0xfe,0x94,0x33,0xeb,0xe1,0xa4,0x3c,0x95,0x7c,0xc6,0xbc,0x3d,0xf2,0x39,0xa1,0x29,0x39,0x24,0x09,0xd4,0x52,0x68,0xfb,0x80,0xd0,0xd4,0x57,0xc6,0x4c,0xa5,0xa6,0x90,0xa6,0x61,0x15,0x2f,0xd3,0x35,0x36,0xf5,0x16,0xb3,0x65,0x0a,0xc4,0xcb,0x7f,0x73,0xe4,0xba,0x9a,0xd8,0x8b,0xc3,0x01,0xa0,0x08,0x57 +.byte 0x9e,0x26,0x54,0xbc,0x55,0xd1,0x5f,0xaa,0xb5,0x0d,0x42,0x75,0x04,0x76,0x8c,0xef,0xcf,0x64,0x3a,0x2e,0x4c,0x78,0xe5,0x37,0x8d,0x55,0xec,0xc1,0x7b,0xce,0x5f,0x5f,0x43,0x8b,0xdd,0x46,0x43,0xf5,0xa8,0x41,0xa6,0x82,0x1b,0x12,0xcb,0xcb,0x6d,0xa1,0x6c,0xb6,0x79,0x46,0x12,0x89,0x12,0x61,0xd6,0x4f,0xf9,0x43,0x2d,0x27,0xa9,0x61 +.byte 0x2e,0x2a,0x29,0x1b,0x6d,0xad,0x32,0x0b,0x6c,0x7c,0xf4,0xb8,0x98,0x91,0xbb,0x78,0xda,0x85,0xe8,0xfb,0x4e,0x11,0xc4,0x2a,0x07,0x54,0xa0,0x67,0x73,0x1b,0xa4,0x60,0x15,0x5c,0x83,0xbf,0x3f,0xd9,0x61,0x30,0x02,0xbb,0xa6,0x67,0xcd,0x0c,0xd1,0xb4,0x11,0x7e,0xca,0xf4,0x1e,0xed,0x83,0x34,0x66,0x54,0x23,0x39,0x36,0x8c,0xa0,0xc6 +.byte 0xef,0xad,0xa1,0x95,0x04,0x20,0x46,0x42,0xa8,0x99,0xd2,0x98,0xc6,0x0a,0x92,0x11,0xd1,0x84,0x4a,0xbf,0x25,0xe5,0xcf,0x78,0x98,0x81,0x80,0xaa,0x31,0x0a,0xa4,0xfb,0xef,0x35,0xfa,0xa4,0xac,0x5f,0x01,0x6b,0xb7,0x8e,0x86,0xc1,0x46,0x97,0x88,0xe2,0xaa,0x3b,0x1f,0xb5,0xf8,0xa9,0x90,0xf0,0x45,0x6d,0xdd,0xa3,0xdd,0xd8,0xef,0x36 +.byte 0x6f,0x87,0x55,0xf6,0x96,0xcd,0x88,0x43,0x03,0x97,0x82,0xea,0x5a,0x1c,0xa1,0x1a,0x7b,0x1b,0xa7,0xfc,0xaa,0x86,0xb4,0x71,0xde,0x0d,0x0a,0x52,0x98,0xd2,0x65,0x5d,0xa4,0xea,0x91,0xc9,0xe4,0x8b,0xd0,0xdb,0x85,0xe3,0x86,0x85,0x50,0xe1,0x41,0x1f,0x48,0x97,0x64,0xec,0x34,0xe4,0x54,0x42,0xf4,0x01,0xed,0x6f,0x4d,0xe3,0x1f,0x86 +.byte 0x14,0xbc,0x01,0x9c,0x7f,0x02,0x0c,0x65,0x94,0xd2,0x90,0x2c,0x1b,0xab,0x41,0x88,0xad,0x58,0xb5,0x71,0xd3,0xd6,0xe1,0x3f,0xf3,0x3c,0xb6,0xab,0x22,0x08,0x17,0xc7,0xf5,0x7e,0x34,0x56,0xae,0x1d,0x1e,0x7e,0xdb,0x24,0xe2,0xc2,0x38,0xf3,0x4d,0x46,0xe4,0x45,0xcb,0xb7,0x2f,0x0f,0x96,0x72,0x7e,0x31,0x89,0x17,0x9c,0xed,0x85,0xb9 +.byte 0xc8,0x8f,0x65,0x93,0xfb,0xb8,0x9e,0x41,0xa2,0xc1,0xcf,0xdb,0xe2,0x4c,0x26,0x4a,0xc7,0x2a,0x72,0xf6,0x28,0xbc,0x18,0x22,0xde,0xa1,0xfa,0x46,0xbe,0x95,0xc8,0xe2,0x19,0xbb,0x20,0x7b,0xd5,0xf8,0x34,0x15,0xaa,0xec,0xe2,0x9e,0xa9,0x3d,0xa1,0xd9,0xaa,0xc9,0x18,0x39,0x07,0x5c,0x81,0x61,0xe7,0x00,0xc5,0x57,0x3e,0xca,0x4d,0x89 +.byte 0x33,0x02,0xa6,0xc8,0x15,0xb7,0x24,0xdd,0x5c,0x55,0x56,0x11,0x5c,0x17,0x1b,0xda,0xc6,0xd5,0x46,0x6e,0x9f,0x70,0xe7,0x1e,0x41,0xee,0x91,0x1a,0xa0,0xad,0x35,0x64,0xdf,0x4a,0x18,0x03,0xa7,0xa8,0x88,0x8f,0x65,0xbc,0x76,0x34,0x08,0xab,0x50,0xc6,0xd3,0x08,0x7c,0xc1,0x4f,0x77,0xcd,0x1a,0xc6,0xed,0x35,0xea,0x4e,0x8a,0x6a,0x38 +.byte 0xa3,0xa3,0xd8,0xa9,0xa2,0x68,0xa7,0xd8,0xe0,0xc8,0x3f,0xfe,0xe7,0x73,0xc6,0x6b,0xd8,0x0c,0xd5,0x8f,0x81,0xe7,0x37,0x08,0x93,0x28,0x73,0xef,0xc4,0x91,0x52,0xa5,0x30,0xff,0x47,0x95,0x02,0x0d,0x8c,0xfd,0xc9,0x28,0x60,0xa9,0xad,0x30,0x00,0xcc,0x3a,0x00,0xbb,0x25,0xab,0xd0,0xf8,0x25,0x46,0x20,0xc0,0x67,0x9b,0xd6,0x10,0xa6 +.byte 0x84,0x6f,0x66,0x60,0x66,0x75,0xb6,0xfb,0x39,0x3a,0x9f,0x7d,0x32,0x7f,0x12,0x6f,0x8c,0xed,0x79,0x40,0x47,0xa3,0x27,0x17,0xa8,0xa4,0x02,0x93,0xb9,0x32,0x03,0x34,0x06,0x76,0x71,0x40,0x90,0x2b,0xe7,0xd0,0x3f,0x59,0xa7,0xfb,0x3a,0x7b,0xc8,0xa5,0x86,0x21,0x0d,0xf6,0xc6,0x49,0x07,0x56,0xe9,0xfc,0xac,0x61,0x30,0xa5,0x7e,0x90 +.byte 0x10,0xc8,0xdb,0x15,0x2b,0x75,0x27,0x77,0x51,0x42,0xcf,0x50,0xe8,0x6c,0x0b,0xb7,0x17,0x1a,0x89,0x7d,0xfe,0xd2,0x75,0xfa,0xb7,0xe5,0x68,0x10,0x1c,0x27,0x85,0x8b,0x52,0x7d,0x87,0x57,0x50,0x77,0x25,0x9d,0xcc,0x08,0x6a,0xad,0x63,0xf8,0x8e,0xe0,0x21,0x62,0x56,0x48,0x29,0xed,0x81,0x1d,0x6b,0x60,0x55,0x78,0x6a,0xce,0xd6,0x79 +.byte 0xe1,0x66,0x18,0x9f,0x71,0xf7,0x0c,0xec,0x35,0x53,0xef,0x39,0xfe,0x57,0x71,0xc0,0x49,0x4b,0x55,0xe8,0x3d,0x9b,0xe3,0x9a,0xbb,0xf8,0x61,0x31,0xa1,0x94,0x94,0x8a,0xb1,0xd2,0x0f,0x01,0xe0,0xd4,0x26,0xa0,0x59,0x70,0xd0,0x5e,0xb8,0x6f,0x63,0x7b,0x71,0x49,0xe1,0x98,0xfb,0xdb,0x22,0x26,0x18,0x16,0x31,0x08,0x90,0x32,0xd5,0x7a +.byte 0xc0,0xd8,0xeb,0xae,0x93,0x3d,0x46,0xeb,0x0e,0xdd,0x08,0xa2,0xde,0x4e,0xc1,0x88,0x26,0xc2,0xf8,0xc6,0x5e,0x8a,0x9b,0x0d,0x9f,0x2b,0xcf,0x4e,0x13,0x43,0x4a,0x65,0xf6,0x47,0x1a,0x0a,0xae,0xf9,0x9f,0x7c,0xc5,0x18,0x65,0x09,0xcb,0x85,0x7d,0x33,0x36,0x43,0x19,0x99,0x20,0xa2,0x64,0xb2,0xf5,0x20,0xd2,0x74,0xc6,0x2c,0x29,0x46 +.byte 0xde,0xa7,0x4a,0x7f,0x3b,0x05,0x3e,0x11,0xb6,0xc1,0x98,0xfb,0xf5,0x9d,0x93,0x95,0x76,0x11,0x80,0x41,0x44,0xd3,0x2f,0xf4,0xfd,0x92,0x1e,0xd7,0xa7,0x5f,0x02,0x4a,0xbc,0xb7,0x96,0x33,0xc0,0x0d,0x2d,0x97,0xb8,0xd4,0x67,0x7a,0x4c,0x74,0x93,0xa7,0x8d,0x68,0x78,0xed,0xc8,0xc9,0x02,0x6e,0xae,0x10,0x97,0x7c,0x56,0x11,0x2a,0x29 +.byte 0x87,0x5c,0x21,0xec,0x75,0x9c,0x17,0x17,0x8d,0x45,0x08,0x31,0x36,0x64,0xc0,0xf7,0x95,0xb6,0x72,0xcf,0xac,0xd8,0x52,0x02,0x6f,0x3b,0x14,0x34,0x30,0xcc,0x39,0x7c,0xe4,0x1f,0x38,0x23,0xcf,0x1f,0xb7,0x7e,0x92,0x66,0xf7,0xda,0x9f,0x27,0xbb,0x83,0x45,0x71,0x67,0x63,0x6c,0x85,0x64,0x34,0xa8,0x93,0x5a,0x13,0x0c,0xff,0x8b,0x3a +.byte 0x2a,0x10,0x1d,0xb6,0x43,0xef,0x57,0xf3,0xf0,0x29,0x2e,0x59,0x72,0x2e,0xc3,0xb6,0xd3,0xd0,0xdd,0x17,0x19,0x82,0x49,0x05,0xd4,0xfc,0xd6,0x2e,0x5d,0xd7,0x0c,0xb6,0x18,0xd5,0x08,0xbb,0xe5,0x3b,0x2e,0x85,0x62,0xc0,0x1e,0xa3,0xb8,0x92,0x21,0x06,0xfa,0xf1,0x2d,0xab,0x62,0x67,0x62,0xee,0x13,0x7f,0x07,0xb6,0x24,0x64,0x94,0x4f +.byte 0x69,0xb9,0x7a,0xdc,0x23,0x5e,0x19,0x96,0xc5,0x4d,0xcb,0xee,0x2d,0x4a,0x7d,0x1d,0xd2,0x72,0x18,0x8f,0x43,0x8f,0x76,0xbf,0x30,0xd8,0xf1,0xfe,0x9c,0xe7,0x63,0x38,0xff,0x1a,0x3f,0x40,0xbd,0x73,0x66,0xf7,0xa9,0xd9,0x17,0x4a,0x8a,0x79,0x04,0x0e,0x20,0xe1,0x39,0x49,0xd9,0x30,0x9c,0x52,0xf9,0x14,0x8f,0xdc,0x9d,0x52,0xd5,0x34 +.byte 0xaa,0x58,0xfe,0x5d,0x68,0xcb,0xab,0x3b,0x3c,0x9e,0x25,0xde,0x6d,0xdd,0x58,0x0d,0x1b,0x99,0xa9,0xcc,0x26,0x4e,0xc0,0x3c,0x8b,0x1e,0xaa,0x52,0x3d,0x4d,0xb8,0x27,0xc1,0xd1,0xa2,0xaa,0x78,0xb9,0xee,0x5f,0x26,0x46,0x5f,0x41,0x0d,0xe1,0x70,0x7d,0xcd,0x3f,0x4a,0xca,0xb2,0xca,0x2f,0x36,0x1f,0x68,0xe6,0x66,0x8a,0xf6,0xe3,0x94 +.byte 0xe5,0xab,0x90,0xeb,0x2f,0xe8,0xb2,0x6c,0xa9,0x69,0xd2,0xe0,0x5f,0x4a,0x65,0xa8,0x6b,0xc1,0xfb,0x03,0x51,0x17,0x3b,0xf8,0xe0,0x67,0xc3,0x5a,0xe8,0x18,0xdf,0xc1,0xf8,0x7f,0x44,0x68,0x4a,0x01,0xbe,0xf8,0xa5,0x7a,0xb9,0x3b,0x0f,0x05,0x8e,0x4b,0x28,0x14,0x61,0x2f,0x2e,0xc7,0xf2,0x96,0xc7,0x60,0x99,0xc4,0xbf,0xe8,0x37,0x98 +.byte 0x00,0x34,0xf7,0x5a,0xd7,0x6f,0x90,0xc4,0x19,0xb5,0x07,0xd1,0x76,0x6e,0x65,0xcc,0xf6,0x51,0x88,0x5c,0x81,0x91,0xa8,0x4d,0xb7,0x33,0x53,0xb6,0x93,0x42,0x52,0x82,0xfa,0x2b,0xca,0xa0,0xbd,0xf3,0x09,0x2b,0x0f,0x09,0x02,0xdd,0x29,0x5f,0xa6,0x49,0x7b,0x97,0xe8,0x96,0xbf,0x6f,0x76,0xb7,0xa2,0x76,0x58,0xda,0x1d,0xb2,0xdb,0x6d +.byte 0x9d,0x3b,0x32,0x6e,0x9c,0xea,0x45,0xfd,0x33,0xeb,0x41,0x91,0x91,0x52,0x2b,0x68,0xa3,0xf3,0xc6,0x92,0x43,0x13,0x49,0x8a,0x10,0xb1,0x2f,0x9a,0x0f,0xe1,0x94,0x21,0x18,0x76,0x87,0xaf,0x50,0xe4,0x71,0x5d,0x0a,0xba,0x75,0xaa,0x17,0xf5,0x37,0xf2,0x84,0x9b,0x29,0xdf,0x44,0x60,0xd0,0xac,0xcf,0x25,0x87,0x66,0x64,0x1f,0x0d,0xba +.byte 0xb3,0xdb,0x14,0xb6,0x1f,0x00,0x70,0x98,0x83,0x1d,0x9e,0xbd,0xf9,0x17,0xf4,0x57,0xae,0xa8,0xae,0x7b,0xa7,0xde,0x1f,0x31,0xc6,0x29,0xb2,0xf7,0xef,0x36,0x31,0xe7,0x50,0x33,0x69,0x4e,0x8c,0xb5,0xe4,0xdd,0x74,0x87,0xc8,0xf5,0x22,0x1b,0x4b,0xec,0xc4,0xe1,0x5a,0x7d,0x5a,0xe8,0xb9,0x2f,0xf4,0xd1,0x83,0xa2,0xb7,0x97,0xe0,0x1e +.byte 0xf7,0x3a,0x74,0xef,0x5f,0xb3,0x30,0xce,0xfa,0x23,0xd5,0x98,0x56,0x19,0x24,0xb5,0xc7,0x60,0x8b,0x03,0x8e,0xe7,0xdf,0x2c,0x36,0x4c,0x3b,0x3b,0x84,0x45,0x97,0x40,0x29,0x30,0x98,0xc3,0xc0,0xa2,0xf0,0xdf,0x69,0x47,0x95,0x26,0xdb,0x6c,0xcc,0xff,0x2d,0x32,0xaa,0xa7,0xb8,0x6b,0x24,0xec,0xff,0x94,0x4d,0x36,0xdd,0x7b,0x4d,0xc5 +.byte 0x8d,0xe2,0x3c,0x14,0x5a,0x37,0x75,0x1f,0xd6,0x98,0x7d,0xd3,0xdc,0xb0,0x24,0x69,0xe7,0x65,0x60,0x2a,0xe7,0x00,0x5b,0x68,0x99,0xa0,0x9e,0x10,0xf0,0x5c,0xa8,0x39,0x85,0x59,0xde,0xe4,0x46,0xf3,0xde,0xda,0xc0,0xb1,0xd2,0xf1,0xd2,0x05,0xd5,0xd4,0x2c,0x2e,0x7e,0x44,0x5c,0x52,0x80,0x85,0xbb,0x54,0x97,0xb6,0xad,0x6d,0x57,0x49 +.byte 0xed,0x67,0xaf,0x27,0xb4,0x5b,0xce,0x0f,0x3c,0x58,0xa2,0x24,0x22,0xa2,0xcb,0xfc,0x4e,0x8e,0xc2,0x3c,0x32,0xc6,0x07,0xc4,0xc6,0xc0,0x50,0xc3,0xe3,0x1b,0x96,0x76,0x62,0xf9,0xea,0x5e,0xdc,0xc5,0x96,0xe8,0xaa,0x20,0x26,0xac,0x44,0xfb,0xf2,0x16,0x72,0x72,0x4c,0x5c,0xee,0x51,0x07,0xb0,0x74,0xf6,0xde,0xd7,0x5d,0x73,0xf4,0xe9 +.byte 0x0d,0x29,0x06,0x5f,0xca,0xe2,0xbb,0xa4,0x3e,0xdc,0xf7,0x74,0x99,0x53,0x7a,0x52,0x60,0x46,0xaa,0xf0,0x34,0x97,0x0c,0x81,0x5b,0xd8,0x95,0x52,0x76,0x55,0xcb,0xc4,0x6d,0x50,0x26,0x3f,0x7e,0xc2,0x93,0x6e,0x14,0x0c,0xd7,0x49,0x5f,0x52,0x8f,0x34,0x49,0xb4,0xe7,0x12,0xfe,0xae,0xd1,0xfa,0xfc,0xc5,0x80,0x38,0x26,0x9c,0xf1,0x81 +.byte 0x01,0x58,0x15,0x99,0x29,0x8d,0x1b,0x2d,0x74,0xca,0xf1,0xf4,0xfa,0xcd,0xae,0xfa,0xa9,0x1d,0xbb,0xf1,0x55,0x2e,0x69,0x46,0x6e,0xe4,0x91,0xa3,0x48,0xb5,0xaa,0xb3,0x85,0xab,0x14,0xd2,0x84,0x8c,0xb1,0xb6,0x0c,0xa5,0x4a,0x90,0xed,0x6e,0xdf,0x1e,0x15,0x36,0x7b,0xa3,0x59,0xd6,0x8d,0x7d,0x7b,0x12,0x7c,0x9a,0x40,0x8a,0x28,0xde +.byte 0xb5,0xbc,0xc4,0x52,0x96,0xfb,0x62,0x1f,0xc9,0xe0,0xc9,0x1d,0xc7,0xc4,0xcb,0x8a,0x96,0x21,0x42,0x7c,0x0a,0xdd,0x42,0x74,0xcf,0xc4,0x57,0x8f,0x28,0x0a,0x7c,0x4f,0x49,0x5a,0xc6,0x21,0xb2,0xd4,0xd0,0x61,0xa5,0x35,0xbd,0x4a,0x0c,0x16,0x68,0x1f,0xe3,0xff,0x3f,0x72,0xf0,0x1d,0x50,0x26,0x48,0x91,0x27,0x1b,0x2b,0x0d,0x8b,0xf2 +.byte 0xa0,0xc0,0xa0,0x5d,0xdb,0xcf,0x71,0x41,0x83,0x00,0xb9,0x3c,0xe0,0x4a,0x96,0x43,0xf8,0x64,0x0f,0x42,0xc5,0x75,0xec,0x26,0x62,0x99,0x13,0xeb,0xf9,0xa6,0x86,0xe4,0xc9,0xaf,0x3c,0x2c,0xc9,0x4f,0x89,0xf4,0xc0,0x46,0x99,0xb8,0xd1,0x9e,0x7b,0xb7,0x41,0x0a,0x5f,0x40,0x98,0x65,0x29,0xdd,0x60,0x6b,0x27,0xbf,0x66,0x08,0x32,0xc2 +.byte 0xcf,0xea,0x91,0x44,0x45,0x49,0x1c,0xb4,0x16,0x7f,0x11,0x1a,0x8c,0xb4,0x59,0x54,0xc6,0xcf,0x40,0xd2,0xe9,0xc1,0x54,0x9c,0xe2,0x6e,0xd5,0xfe,0xfb,0x4a,0xa3,0x98,0x63,0xef,0x86,0xe0,0x63,0x30,0x32,0x5a,0xbd,0xd4,0x7c,0xe8,0xbe,0xf1,0xed,0xa2,0x19,0x98,0xc8,0x34,0x65,0x4c,0xef,0x1a,0xb3,0xbc,0x87,0xbe,0x6b,0x75,0x2c,0xe5 +.byte 0x54,0xcc,0xe5,0x69,0xb2,0xc8,0xdb,0x57,0xf8,0xa7,0x82,0x07,0xf7,0x20,0x95,0x7f,0x6d,0x7b,0x33,0x66,0x67,0xa1,0x38,0x0e,0x9c,0x3b,0x22,0xab,0xc1,0xd3,0xed,0x87,0x32,0xfb,0x4a,0x5d,0xad,0x3a,0xe1,0x90,0xa6,0xe3,0x4d,0x6b,0x00,0xe4,0x5c,0x66,0x59,0x90,0x63,0x24,0x5b,0xe1,0x3b,0x69,0xb6,0xc9,0x05,0x83,0x3a,0x7b,0xf4,0xa5 +.byte 0xc8,0x47,0xf9,0x8e,0xab,0x92,0xbd,0xd3,0x41,0xc7,0x61,0xf4,0xce,0x30,0xdb,0xae,0x27,0x69,0x0f,0xcc,0x69,0x50,0xe8,0x18,0xf2,0x39,0x04,0x5a,0x29,0x12,0x61,0x46,0x5c,0x1b,0x2e,0x15,0x9c,0xfa,0x73,0x50,0xe3,0x51,0xda,0x4d,0x88,0x25,0xb2,0xff,0x55,0x27,0xce,0x86,0xca,0xe6,0x2a,0xb8,0x0c,0xa7,0xd0,0x06,0xbf,0x70,0xb5,0x6b +.byte 0x80,0x44,0x65,0x5d,0x23,0xfa,0x0d,0x74,0x5c,0xfc,0xc7,0x86,0x5e,0x23,0x8a,0xf1,0xff,0x80,0xf0,0x19,0xaa,0x98,0xae,0x56,0xcf,0x12,0x74,0x6c,0x70,0xb2,0x39,0xbe,0x66,0x71,0xee,0xe3,0x43,0x3b,0xfa,0x79,0xa9,0x7e,0x69,0x6a,0x19,0x42,0xd5,0x0e,0x1e,0x92,0xfe,0x8a,0x0f,0xca,0x74,0xf2,0x68,0x71,0xf5,0xcb,0x05,0x94,0xc1,0x06 +.byte 0x1b,0xae,0x55,0xe9,0x16,0x03,0xa9,0x97,0xad,0x49,0xaf,0x88,0x8c,0x26,0x33,0x4d,0x46,0x75,0xb3,0x9c,0xee,0x70,0xe1,0x57,0x43,0xeb,0x59,0xff,0x77,0x89,0x8a,0x77,0x3f,0x7e,0xe6,0xbe,0xa2,0x05,0xb1,0xe3,0x41,0x5e,0xc7,0xd4,0x14,0xda,0xc0,0x84,0xd0,0x05,0x50,0xdd,0x62,0xdb,0x4c,0x3b,0x16,0xb0,0xe0,0xf5,0x2b,0xf1,0x83,0xea +.byte 0x7b,0x89,0xbb,0xde,0x57,0xdb,0xc0,0xb9,0x7d,0xdf,0x53,0x0f,0x6c,0xc5,0x5a,0x0b,0x36,0xeb,0xa3,0xc3,0xe6,0xc5,0x80,0x98,0xf3,0x87,0x29,0x97,0xc9,0x2e,0xd6,0x3b,0x43,0x2a,0x36,0x3b,0xba,0x43,0x85,0xf5,0x0d,0x18,0x2e,0x78,0x43,0xae,0xa4,0x24,0x6d,0xdc,0xab,0x05,0x94,0x09,0x94,0x27,0x17,0xef,0xbc,0x7e,0x52,0xa4,0x80,0xda +.byte 0x28,0xf5,0xc3,0x20,0x99,0xbb,0x5d,0xb6,0x7e,0x0e,0x59,0x3b,0x5e,0x1d,0x1b,0x4f,0xd1,0x91,0xe4,0xe4,0xc7,0x35,0xc7,0x2e,0xc1,0xba,0x60,0x05,0xa4,0xd5,0xca,0x5f,0x09,0xbf,0x79,0x06,0xcb,0xa7,0x32,0x7c,0xf4,0xdc,0xa8,0xb3,0x8b,0x26,0x59,0x6d,0xcb,0x74,0x37,0x56,0x51,0x96,0x0b,0x44,0xf1,0x95,0x16,0xe3,0x9b,0x9b,0x3b,0xb3 +.byte 0xea,0x6a,0x1b,0x76,0x99,0x69,0xd6,0x5b,0x10,0x5a,0x91,0x23,0xb5,0xc3,0xf9,0x6a,0xba,0xc4,0xe6,0x18,0x28,0x50,0x9d,0x09,0x14,0xbe,0xed,0x73,0xd2,0x51,0xff,0xf8,0x14,0x2b,0x8b,0xdd,0x2a,0x1a,0x8e,0x48,0xae,0xd8,0xdf,0xb9,0x5b,0xcb,0x8f,0xc2,0x8c,0xd6,0xb3,0xfb,0x40,0x2f,0xb0,0x6c,0x9a,0xea,0xd0,0x14,0x8c,0xc5,0xc7,0xc7 +.byte 0xf8,0xf5,0x4f,0xe2,0xd7,0x41,0xcd,0xb6,0x34,0x3e,0x81,0x19,0x09,0xa2,0x51,0xb4,0x60,0xfb,0xf2,0x6c,0xe6,0xae,0x68,0x47,0xb9,0x93,0x7b,0xc9,0xe7,0x00,0xc4,0xa7,0xf2,0xef,0x8b,0xd8,0xfc,0x9f,0xe5,0x6d,0x48,0xe2,0x6c,0x32,0x73,0x5c,0x30,0x7c,0x12,0x13,0xca,0xc3,0x31,0xc3,0xa2,0xb4,0xf7,0x23,0xc4,0xd0,0x47,0x39,0x93,0xc8 +.byte 0xa0,0x7b,0xb4,0x09,0x3f,0xe8,0x15,0x15,0x9c,0xa7,0xe6,0xa8,0xbe,0xba,0x60,0xf9,0x28,0x88,0x66,0x7b,0x62,0x32,0x17,0x18,0x68,0x87,0x53,0xf5,0xbc,0xf5,0x77,0x17,0xa1,0x3f,0x62,0xd1,0x10,0x0a,0x54,0x96,0x9c,0x31,0xc3,0xb7,0x1d,0xaf,0xc7,0xb3,0x27,0x9e,0x46,0xfe,0x7e,0x9b,0x88,0xf2,0x9e,0x6e,0x19,0x0f,0xb1,0x88,0xe4,0x08 +.byte 0x76,0x7c,0x77,0x46,0x09,0xa7,0x9e,0xf4,0xd9,0xbf,0x67,0xe8,0x9d,0x6a,0x75,0xa7,0xf5,0xee,0x29,0xba,0x84,0xa0,0x44,0x46,0x35,0x4c,0x22,0xef,0xb3,0xea,0xb0,0xf2,0xd6,0x78,0x20,0x97,0x28,0x5c,0x7e,0x90,0x06,0x80,0x19,0x63,0xa4,0x8a,0xef,0x0a,0xea,0x88,0xa9,0xa2,0xae,0x23,0x2e,0x40,0xce,0xc5,0xc2,0xbf,0xfe,0x5a,0x8f,0x14 +.byte 0xb8,0x66,0x1a,0x2d,0xdb,0x43,0x39,0xbd,0xe7,0x7b,0xbc,0x41,0x58,0x74,0x56,0xd1,0xe7,0xd0,0xba,0x24,0xd2,0x41,0xbf,0xd0,0x4e,0x97,0x38,0x8f,0x6b,0x6f,0xe2,0x7d,0x6d,0x32,0x94,0x43,0xa7,0x66,0xf7,0x90,0x21,0xe0,0xdd,0x19,0x48,0x72,0xc1,0xa5,0xbc,0x9c,0xe2,0xdd,0x2c,0x6e,0x50,0x45,0x2c,0xa0,0x95,0xcb,0x1d,0x2c,0x1d,0xa6 +.byte 0xbe,0x9c,0xd4,0x6c,0x07,0x2e,0x5e,0xc8,0xc1,0x05,0x61,0x7d,0x44,0x28,0xe6,0xad,0xf0,0x9d,0x2d,0x3d,0xce,0x90,0x7d,0x79,0x2e,0xf3,0x08,0xbe,0x7a,0xa9,0x58,0x04,0xa7,0x39,0x05,0xdd,0xb4,0x87,0x6c,0x7b,0xd5,0xb3,0x2d,0x6b,0x43,0xf4,0x37,0xd9,0x6f,0x5c,0xa2,0x23,0x92,0x53,0xb9,0xd7,0x1b,0x2d,0x5d,0xcd,0x6d,0x3f,0xef,0xc8 +.byte 0x66,0x91,0x10,0x1b,0xc5,0x24,0x50,0x87,0x70,0x93,0x03,0x3f,0x7b,0x40,0xc8,0x0c,0x9b,0xec,0x3d,0x82,0x27,0x96,0x2a,0xbe,0xca,0xaf,0x1b,0xbf,0xef,0x14,0x0c,0xdc,0xa6,0xc7,0x48,0x18,0xce,0x8e,0x43,0x58,0x97,0xb3,0x5e,0xd6,0xc9,0x70,0x65,0xd0,0x0e,0x17,0xac,0xa0,0x6b,0xc9,0x55,0x30,0x12,0x7c,0xbe,0xe5,0x46,0xfc,0xd8,0x3f +.byte 0x0e,0xd7,0x96,0x16,0x32,0x8e,0xb7,0x2d,0x07,0xd1,0x26,0x98,0x70,0x4c,0xb1,0x6f,0x92,0x32,0x75,0x4f,0x57,0x6b,0x78,0xe0,0xc5,0x9b,0xf0,0x08,0x59,0x0b,0xfa,0x2d,0x79,0xbe,0xde,0x44,0x3d,0x65,0x77,0x27,0x3b,0xd9,0xea,0x55,0x79,0x22,0xe8,0xf7,0x62,0xb1,0xe3,0x32,0x4e,0x03,0x17,0x65,0xd3,0x5d,0xee,0xa0,0x9b,0xc2,0xbd,0x9f +.byte 0xcd,0xdc,0xde,0xd7,0x6c,0x95,0x7a,0xf1,0x09,0x4c,0x14,0xb9,0x37,0x1d,0xd0,0xdd,0x4b,0x2e,0x93,0x0b,0xfa,0x08,0x40,0x01,0x36,0xdf,0x89,0x46,0xa6,0xbb,0x19,0xd9,0x4f,0xf9,0xe1,0x7b,0x03,0xc9,0xef,0x01,0x25,0xe9,0x6d,0x95,0x84,0x7f,0xf8,0x8e,0x02,0xfd,0x6f,0x30,0xed,0x1b,0x98,0xd0,0xb3,0xdd,0x92,0x65,0x46,0x49,0x61,0xde +.byte 0x76,0xf5,0x4b,0x29,0x03,0x6f,0x79,0xee,0xbe,0x7a,0x07,0x6e,0xa8,0x29,0xb8,0x03,0xb4,0x6c,0x50,0x1f,0x4a,0xa2,0xaf,0xbd,0xde,0x18,0x72,0x90,0xa2,0x12,0xa9,0x59,0x7b,0xf6,0x96,0x2d,0xda,0x3d,0x90,0xba,0x7c,0x79,0x3e,0x6e,0xef,0x94,0x37,0xe2,0xef,0x6b,0x2a,0x74,0x6b,0x52,0xa0,0xc2,0x1e,0xa1,0x24,0x59,0x84,0xeb,0xdc,0xd0 +.byte 0x34,0x60,0xa8,0x81,0xaf,0xdd,0x57,0xc2,0xa6,0x02,0x7f,0xcf,0x9e,0x64,0x28,0x18,0x7c,0x95,0x98,0x90,0x7a,0x76,0x3f,0x78,0x16,0x2c,0xe0,0xa7,0xdf,0x0d,0x4d,0x5e,0xcc,0x0d,0x73,0x12,0x26,0xd7,0xe9,0x32,0x3e,0xa1,0xa9,0xde,0x29,0xb2,0x3b,0x6f,0x3b,0x6e,0x12,0x0c,0x10,0x34,0x86,0xf2,0xa0,0xd4,0x9c,0xf6,0x14,0x5a,0x41,0x06 +.byte 0x31,0xb1,0xe4,0x31,0x52,0xf4,0xcb,0xe3,0x39,0xcd,0x0b,0xc2,0xca,0x90,0xba,0xb3,0x21,0xbf,0x94,0x13,0x75,0x3b,0x0e,0x0a,0xc0,0x05,0x35,0xe6,0x28,0x74,0x63,0xc5,0x34,0x44,0xd8,0x9a,0x0e,0xec,0xb3,0x1b,0x30,0x58,0xfc,0xa0,0xc4,0xd1,0x26,0x50,0x6b,0x22,0x88,0xfc,0xad,0xa9,0xb4,0x3e,0x36,0xb6,0xb1,0x6d,0x62,0x7e,0x60,0x8f +.byte 0xf5,0x17,0x65,0x1c,0xf6,0x51,0x4d,0x89,0x4a,0x7e,0x5d,0x23,0x3b,0x83,0x1f,0xa6,0xc8,0xd2,0x1a,0x90,0xd3,0x53,0xfc,0x48,0x64,0x94,0x6e,0x1c,0x72,0xef,0x5d,0xd4,0x23,0xa2,0x3a,0x93,0xe4,0x29,0x33,0x8a,0xbd,0xe5,0x17,0xc2,0xe9,0x18,0x6a,0x81,0x1e,0x5b,0x03,0x41,0x45,0x35,0x14,0xe7,0xc8,0x45,0x5c,0x37,0x69,0x77,0x62,0xf8 +.byte 0xd7,0xec,0x9d,0x62,0x2e,0xfa,0x43,0x3a,0xdc,0x8b,0x86,0x86,0x1b,0x31,0x71,0x0e,0x92,0x59,0xf7,0xef,0x96,0xfd,0x04,0x1e,0x1d,0x74,0x7d,0x08,0x06,0x21,0x54,0x39,0xd3,0x9f,0x30,0xa1,0x19,0x7f,0xc8,0x19,0x16,0xd1,0x21,0x2a,0xf3,0x21,0xce,0x19,0x1a,0xde,0x70,0x1b,0x87,0x05,0x9e,0xe8,0xf3,0xfd,0x1d,0xaa,0x61,0x6c,0xfb,0xdf +.byte 0x50,0x9a,0xa0,0x32,0x4e,0xe4,0x68,0xda,0x0e,0x2f,0x2a,0x70,0xe1,0x51,0x66,0xb4,0x2d,0x5b,0xb6,0x32,0x3f,0xcb,0xc0,0xaf,0x01,0x03,0xcd,0xd6,0xb8,0x4e,0x3d,0x24,0x17,0xe2,0x30,0x3b,0xa4,0x08,0x0e,0x6a,0xcf,0xbe,0xc2,0x5c,0x79,0x5d,0x25,0xe2,0xae,0xa7,0x7f,0x42,0xff,0xa9,0xa5,0x05,0xbf,0xf4,0x92,0x30,0xaa,0x1d,0x96,0x7a +.byte 0x49,0xbc,0x1c,0xaa,0x5c,0x8d,0xe8,0xf3,0xd3,0x1a,0x67,0x7f,0x47,0x09,0x90,0x35,0x82,0x4e,0xcc,0x2e,0x50,0xfe,0x2c,0xb9,0x29,0x39,0xff,0x49,0x8f,0x7e,0x89,0x8d,0x4a,0x15,0xd1,0xd6,0x83,0xdb,0x25,0xac,0xc1,0x81,0x23,0x70,0x3f,0xb9,0xce,0x7f,0x03,0x46,0xa8,0x39,0xab,0xff,0x71,0xc9,0x7b,0x3c,0xb3,0x5e,0x9f,0xfe,0x8a,0x0a +.byte 0x39,0xad,0x6a,0xc1,0x8e,0x5a,0xa8,0x71,0xb7,0x01,0x25,0x28,0x15,0xd9,0x0a,0xae,0xc1,0xf9,0x23,0x1c,0xc1,0xe8,0x86,0x1d,0xb8,0x71,0x6e,0xa2,0xa4,0x67,0x22,0x4d,0x0e,0xd2,0xaa,0x70,0x26,0x23,0xfc,0x15,0xed,0x67,0x11,0x87,0x69,0x6f,0xc6,0x4c,0xe1,0x4b,0x04,0x86,0xe9,0x56,0x40,0xea,0x07,0xb1,0x6f,0xe9,0x8f,0xdd,0x2f,0xce +.byte 0x8d,0xca,0x0a,0x58,0x01,0x44,0x2c,0x74,0xd0,0x14,0x07,0x9a,0xb7,0x5a,0xc1,0xea,0xa9,0xdd,0xa4,0x94,0x84,0xc2,0x11,0xa5,0xe2,0x00,0xd8,0xfc,0x77,0xb9,0x5e,0xe6,0x72,0xef,0xc5,0x38,0xe0,0x90,0x11,0x16,0xfd,0xa7,0x77,0xbd,0x4c,0x1d,0xeb,0x32,0x54,0xdb,0x2a,0x43,0xa1,0x87,0xbb,0x2e,0x79,0x22,0x4d,0xb3,0xdf,0x1a,0xee,0x75 +.byte 0xb0,0xdd,0xf2,0x09,0x05,0xf4,0x6a,0x3c,0x86,0xc6,0xe7,0x60,0x2a,0xee,0xb6,0x55,0xae,0xdc,0xce,0xf8,0xe4,0xd7,0xdf,0x72,0x42,0x91,0x6d,0xc4,0xd8,0x60,0xf1,0xe8,0x06,0x71,0x38,0xa3,0x03,0x3e,0x1b,0x14,0x47,0x74,0x93,0xb5,0x61,0x28,0xde,0x23,0x8f,0xbe,0x88,0x5e,0xdf,0x87,0x47,0xd4,0x5f,0x91,0x40,0xeb,0x02,0xda,0x27,0x3b +.byte 0x65,0x9f,0xd8,0xf1,0x78,0x7f,0xba,0x9b,0x35,0xb3,0x10,0xaf,0x7f,0x51,0x37,0xa5,0x63,0x64,0x1f,0xf1,0xc3,0x1b,0x9e,0xe4,0xdd,0x93,0x8c,0x3a,0x98,0x20,0x9a,0x75,0x22,0x7b,0x48,0x0a,0x9d,0x55,0xed,0x07,0x1a,0x79,0x3b,0x98,0xe3,0x16,0x9b,0x16,0x2c,0xb2,0x03,0xc1,0xf5,0x6c,0xac,0x00,0x6a,0xb6,0xc1,0xc2,0x49,0x4d,0x9d,0xf5 +.byte 0x0e,0x7b,0x60,0x09,0xcc,0xa7,0x35,0xbb,0x70,0x34,0x18,0x49,0x2c,0xf1,0x41,0x4f,0xce,0x68,0x03,0x60,0x14,0xa7,0x2e,0x59,0x0f,0xa2,0xc4,0x2f,0x33,0xf0,0xb6,0xa4,0x31,0x75,0xdc,0xb4,0x88,0xe4,0xe3,0x0e,0x4b,0x3f,0x58,0xd0,0xa4,0xea,0x9a,0xef,0x47,0xb7,0xf7,0x20,0x71,0x52,0xd3,0x8a,0x1c,0xd9,0x2d,0x88,0x05,0x03,0x8a,0x1c +.byte 0x3d,0x69,0xf0,0x39,0xf0,0x25,0xad,0x95,0xd4,0x47,0x3c,0xbb,0xfa,0x48,0xd7,0x8e,0xf5,0xdc,0x33,0x43,0x0a,0xbb,0xf0,0xd3,0xb1,0xc3,0x94,0x81,0xcd,0x22,0x79,0xdc,0xd0,0x92,0x8b,0xd3,0xc3,0xac,0x73,0x72,0x83,0xaa,0xa2,0x52,0x13,0x27,0x0e,0xc5,0x8c,0xa5,0x69,0x21,0x6e,0x9c,0x9d,0x9b,0xeb,0x7a,0x19,0xfe,0xb6,0xdb,0x4e,0xc1 +.byte 0xa6,0xec,0x42,0xb0,0x86,0x69,0x60,0xde,0x36,0x11,0x6a,0x86,0xd7,0xbf,0x15,0x48,0xa2,0x73,0x8f,0x68,0xde,0xd6,0xb2,0x6d,0xe0,0xc5,0x1f,0x1f,0xd5,0xc5,0xef,0xce,0xa1,0x90,0x5c,0xe6,0x6c,0x15,0x73,0xa7,0xcc,0x2d,0xe8,0xcf,0x4c,0xc8,0x17,0x3c,0xfa,0x5e,0xdb,0x4f,0x54,0xf3,0xa3,0xff,0x50,0x3e,0x42,0x60,0x0d,0xf3,0xf7,0xbb +.byte 0xc6,0xf5,0xe7,0x63,0x50,0x49,0xc1,0x94,0x60,0x68,0xbd,0x62,0xc0,0x81,0x80,0x16,0xfd,0x65,0xfb,0x2e,0x23,0x67,0xb3,0xb6,0xf8,0x95,0xfa,0x00,0x3f,0x1d,0x10,0x16,0xd5,0xd9,0x66,0xf8,0x25,0xb4,0xce,0xf2,0x2e,0x4f,0xa2,0x21,0x14,0xbd,0x2c,0x63,0xec,0x44,0x57,0x07,0x87,0x3c,0x2f,0x22,0xcf,0x48,0xd3,0x20,0x51,0xfc,0x5d,0xd5 +.byte 0x9f,0x67,0x9c,0xaf,0xe3,0x89,0x36,0xc5,0xfa,0x7c,0xca,0x07,0xdc,0x56,0x2a,0x4e,0xa5,0x76,0xe6,0x09,0x99,0xfb,0xb7,0xba,0xaa,0x0b,0x9c,0xe2,0x0f,0x73,0xab,0x9b,0xbe,0x6f,0x50,0xe3,0xf7,0x28,0x32,0xf2,0xab,0x86,0xa3,0x89,0x3a,0xea,0xd7,0x52,0x52,0x6e,0xed,0x1b,0x94,0xf0,0x59,0x9d,0xbb,0x7a,0x88,0x6f,0xbf,0xaf,0x6a,0x87 +.byte 0x47,0x34,0x7f,0xf4,0x8b,0x0d,0x33,0x12,0x2b,0x67,0x6b,0xc9,0x1d,0x18,0x23,0x2e,0x54,0xee,0x07,0x28,0xbd,0x9d,0xa1,0xaf,0x85,0x7a,0x0f,0xe5,0x5d,0xf7,0x8b,0xca,0xd9,0x3d,0x8f,0x4f,0xcc,0xce,0xc3,0x6e,0x3a,0x40,0x08,0xd2,0x14,0xf0,0x28,0x9b,0xc0,0x4a,0x7a,0x3c,0xc2,0xed,0xe0,0x20,0x04,0xf5,0xf9,0xee,0xb8,0x35,0x94,0xbc +.byte 0x53,0x46,0xf2,0x1a,0xab,0xe9,0xde,0xd8,0x27,0x67,0x0d,0x63,0x2a,0x7b,0x3a,0x38,0x91,0xbc,0x48,0x2c,0x38,0x09,0xa0,0xe3,0x66,0xe3,0xeb,0xb9,0x02,0x2d,0x80,0x87,0x81,0x4f,0x5c,0x1c,0xfd,0x2b,0x0f,0x99,0x37,0x3a,0xfa,0x0f,0x8e,0x8c,0x87,0x76,0x72,0xd3,0xcf,0xc8,0x1e,0x8a,0x3b,0x97,0xa0,0xe6,0x32,0x66,0x3c,0x55,0x2c,0xfb +.byte 0xa9,0x41,0xfd,0xf9,0xd4,0x50,0xe0,0x5b,0x03,0xb7,0x1e,0x49,0xfa,0x59,0xeb,0x55,0xb1,0x21,0xd0,0x52,0xeb,0xe6,0x0f,0x21,0x81,0x4f,0x82,0x9a,0x8f,0x67,0x3d,0x0d,0x1d,0x11,0x1f,0x70,0x59,0x09,0x87,0x99,0xe5,0xf2,0x89,0xa6,0x56,0x8d,0x52,0x55,0xa8,0x91,0x5d,0x51,0x48,0xec,0x66,0x05,0xd6,0x18,0xd1,0x61,0x02,0x5a,0x80,0xcc +.byte 0xee,0xf3,0x3b,0x8e,0x73,0x2a,0xb1,0x22,0xda,0x1d,0xca,0xb2,0xd6,0x7f,0xd7,0x7d,0xaf,0x23,0x8d,0xff,0x24,0x8e,0x5e,0x38,0x29,0x23,0x1f,0xbc,0xfd,0xe4,0x3d,0xcd,0x66,0xe3,0xe1,0x0f,0x85,0xe3,0xda,0x34,0xc6,0xba,0x60,0x5f,0xaf,0x32,0x79,0x34,0xc0,0x01,0x93,0xae,0x1e,0x72,0x7f,0xd2,0x32,0xa1,0xdc,0x0b,0xca,0xee,0x5a,0x7a +.byte 0x09,0x98,0x2a,0x46,0x0a,0xe7,0xfd,0x0f,0x76,0xa0,0x3b,0x2b,0x3d,0xe5,0xcd,0x04,0xa2,0x5e,0x9b,0xba,0x4a,0xd5,0x0a,0xce,0x94,0x77,0xbb,0x24,0xa4,0x12,0xbc,0x24,0xb6,0x60,0x40,0x62,0xd2,0x70,0x0e,0x3f,0x62,0x72,0x2f,0xa1,0xc9,0x12,0x03,0x0f,0x39,0x57,0x77,0x7c,0x5c,0x31,0x13,0xcb,0x8c,0x2c,0x84,0xfd,0x7b,0x6f,0x60,0xbb +.byte 0x1a,0x0b,0x65,0x8c,0xc1,0xe6,0x4b,0x60,0x8c,0xe7,0x3e,0x94,0x2a,0xcc,0x70,0x9f,0xd0,0xfd,0x00,0x0e,0x36,0xb2,0xf1,0x62,0x78,0x6a,0xc8,0x9b,0xbe,0x8b,0x54,0xa7,0xad,0xee,0x3e,0x8e,0x1c,0x23,0xbe,0xa2,0x73,0x43,0xbe,0x15,0x32,0x84,0xdd,0x22,0x75,0xd5,0x9a,0xfb,0x93,0x38,0x55,0x2f,0xa4,0x34,0x4c,0x33,0xc3,0xd7,0x7c,0x9f +.byte 0x42,0x2f,0x9f,0xf6,0x27,0x90,0x15,0x6b,0x14,0x4f,0xbc,0x4b,0x07,0x42,0x24,0x98,0xa6,0xc4,0x4c,0x2f,0x22,0xd9,0x80,0x99,0x97,0x6b,0x7d,0xe8,0x2b,0x31,0x37,0xfe,0xd1,0x8b,0xbd,0xbf,0x08,0x4a,0x56,0x3d,0xff,0xb5,0x12,0x6d,0xc4,0xcf,0xbc,0x75,0xe9,0xe6,0x6f,0x1a,0x30,0x34,0x5b,0x2c,0x1d,0x8f,0x85,0xa0,0xe8,0xfd,0xfd,0xe2 +.byte 0xe7,0x13,0x73,0xcd,0x63,0x63,0x90,0xa5,0xa4,0x3f,0x91,0x65,0x77,0xd4,0xed,0x0c,0x1d,0x06,0x95,0x93,0x74,0x85,0xec,0x31,0xde,0xc9,0xb9,0x2e,0x7c,0x6d,0x2c,0x0d,0x15,0xb7,0x6b,0x0c,0xd2,0xe8,0xa8,0xcb,0x90,0x5c,0x11,0x53,0xc5,0x9d,0x54,0xf4,0x90,0xf7,0xc8,0x17,0x65,0xc0,0x3f,0xea,0xf6,0x28,0x8e,0xf0,0x1c,0x51,0xcc,0xfd +.byte 0x99,0x67,0x3d,0xa5,0x82,0x1f,0xb3,0x75,0x08,0x27,0x85,0xa9,0x7b,0x54,0x91,0x6e,0x80,0x9a,0xdb,0x6c,0x17,0x4a,0x36,0x73,0x0e,0x61,0x2e,0x01,0xae,0x32,0xf8,0x54,0xdb,0xcf,0x24,0xa5,0x13,0xb1,0x7e,0x0b,0xf5,0xe7,0x0e,0x27,0x9a,0xef,0x01,0x0b,0x34,0x4f,0x91,0xc2,0x93,0xe0,0xe6,0x14,0x64,0xf8,0x7b,0x41,0x37,0x22,0x39,0xad +.byte 0xf4,0xa9,0x3b,0xfb,0x7e,0x2b,0xd8,0x2b,0x0f,0x7e,0x40,0x55,0x5a,0x48,0x61,0x2f,0x95,0x5e,0x5c,0x25,0xe5,0x06,0x89,0x17,0x23,0xb6,0x1b,0x38,0x2e,0x7b,0x45,0xa5,0x11,0x0a,0x8d,0xd3,0x8d,0xb6,0x8d,0x47,0xc5,0x4f,0x8f,0x8b,0xe2,0x03,0x85,0xa1,0x5a,0xa2,0x8d,0xca,0x4d,0xef,0xc9,0xde,0x7d,0x06,0xa1,0x3f,0x21,0xb9,0x38,0x7b +.byte 0x91,0xf7,0x5c,0x9f,0x97,0xe3,0xeb,0x5d,0xea,0x5e,0xc1,0xa5,0x30,0xb0,0x7f,0xe0,0x4c,0xef,0xe5,0xe3,0xa0,0x2d,0x23,0xb6,0x08,0x21,0xe6,0x67,0x35,0x82,0x07,0x59,0x02,0xd4,0x68,0xa5,0xf1,0x42,0x70,0xb4,0x5e,0x54,0xed,0x1e,0x99,0xb2,0x55,0xf1,0x69,0x2e,0x7c,0xaa,0x6c,0x5e,0xd4,0xfa,0x16,0xa7,0x1f,0xdb,0x46,0x70,0x65,0x26 +.byte 0x98,0xf1,0xb6,0x42,0xb3,0x48,0x99,0x7c,0x07,0xbe,0x2b,0xee,0xb4,0xc1,0xf0,0xb7,0x47,0xf8,0xcf,0xe4,0x8d,0x34,0xa6,0xe5,0x17,0x9a,0xb7,0x2c,0x2e,0x03,0x30,0xfd,0xfb,0x42,0xe7,0xa1,0xe0,0x34,0x49,0x64,0xd8,0x0c,0xd5,0xb8,0x77,0x9f,0x0e,0xe2,0x73,0x0d,0x20,0x0c,0x21,0x07,0xaf,0x0f,0x93,0x94,0xd6,0xdc,0xe3,0xac,0x8d,0x8e +.byte 0xae,0x87,0xbd,0x2c,0x19,0x66,0xef,0x90,0x4a,0xd9,0xb0,0xf6,0xac,0x3a,0xe2,0xb5,0x2e,0xb4,0x63,0x91,0xf1,0x8b,0xac,0xce,0x51,0xc2,0xe0,0x02,0x7d,0xf8,0xab,0xe4,0xd6,0x85,0xd6,0xbb,0xd7,0x72,0xd0,0x5f,0x4e,0x90,0x09,0xcc,0x51,0xee,0x5b,0xad,0xb2,0xf6,0x16,0x37,0x09,0xa8,0xfc,0x74,0xa5,0x2e,0x26,0x27,0xff,0x53,0xd4,0x45 +.byte 0x82,0xb1,0xb6,0x16,0x65,0xc6,0xbb,0x54,0x0b,0x89,0xa1,0x0e,0x09,0x7c,0xc9,0xc9,0x48,0xa7,0x51,0x78,0x1d,0x3a,0x30,0xc5,0xe7,0x02,0x9e,0x91,0xd6,0x39,0xc8,0x35,0xf0,0x33,0xab,0xf6,0x0f,0xf9,0xce,0xef,0x26,0x46,0x48,0x56,0xbc,0x45,0x44,0xe2,0xd7,0xfc,0xdf,0xb2,0x95,0x20,0x07,0xeb,0x47,0x1c,0xde,0x88,0x5e,0x08,0xee,0xa1 +.byte 0x56,0x9a,0x5d,0x8f,0x35,0xc5,0xb3,0xd3,0x7d,0xe3,0x25,0x82,0xcc,0xcb,0xad,0xd8,0xef,0x83,0x76,0x08,0x55,0x9e,0xf4,0x00,0x1f,0x92,0x24,0x0e,0xf6,0x96,0x98,0x34,0x10,0x10,0x93,0x27,0x3b,0x96,0xbd,0x75,0x45,0x9d,0xad,0xc1,0x79,0xa7,0x09,0x68,0x0a,0xbc,0x14,0xe9,0x62,0xf6,0x5e,0x4e,0x6d,0xfb,0xf2,0x25,0x20,0x8b,0x53,0xa6 +.byte 0xc2,0x31,0x71,0xaa,0xfa,0xa2,0x1c,0xa1,0xb3,0xa2,0xd7,0x22,0x5a,0x72,0x61,0x5c,0x30,0x75,0xcc,0x82,0xb0,0xd0,0x07,0x8c,0x95,0x11,0x57,0xa4,0xe2,0x42,0xf3,0x3d,0x87,0x56,0x45,0x38,0xd6,0x1b,0x2b,0x26,0x11,0x99,0xce,0xcc,0x2e,0x96,0x1b,0xa1,0x06,0xa1,0xa9,0x65,0xe1,0x1f,0x53,0xb6,0x1e,0x5c,0x44,0x40,0xa2,0xf2,0x03,0xe7 +.byte 0x39,0x24,0x59,0x5f,0xdd,0x30,0xf0,0x78,0x9f,0x34,0xf1,0xd3,0x5d,0x9a,0xdd,0xf9,0x02,0x16,0x4b,0xfa,0x8d,0xab,0x2f,0x96,0xdb,0x67,0xf6,0x1e,0x7a,0xf8,0xd8,0xe6,0x71,0xdc,0x1a,0xbf,0x44,0xd2,0xbd,0xb3,0x6d,0x47,0x69,0xe0,0x14,0xef,0xe5,0x5e,0x0a,0xe9,0x1a,0x8b,0x3f,0x67,0x1e,0x1c,0x37,0x86,0x25,0x02,0x52,0x3f,0xf5,0xde +.byte 0xe0,0xbe,0x1d,0x61,0x44,0x3d,0xd2,0xe9,0x26,0x3d,0x4b,0xa4,0xb1,0xb9,0x62,0xc5,0x70,0xfb,0x1d,0xaf,0xe6,0x19,0x97,0x0f,0x6e,0x6d,0x4e,0xdf,0x5f,0xc9,0xb2,0xb0,0xb9,0x4b,0x72,0xc7,0x60,0x5d,0xf8,0x7d,0x3b,0xd8,0x74,0x29,0xf2,0x56,0x25,0xd9,0xd9,0x12,0x3a,0x50,0x01,0x54,0xd3,0x0e,0x4c,0xbd,0xc9,0xf5,0x66,0xc4,0x4b,0xa2 +.byte 0x68,0x31,0xb1,0x9d,0x47,0xd8,0x28,0xce,0x6b,0xe4,0x5f,0x78,0x75,0x22,0x7d,0x44,0x08,0x71,0xfb,0xd8,0xa0,0x6e,0xd1,0xbd,0x64,0x4e,0x00,0x99,0xf7,0x85,0xad,0x31,0xde,0x5c,0x4c,0x7c,0xc3,0x89,0x49,0x9f,0xea,0x22,0x86,0xa0,0x48,0x48,0xcf,0x47,0xfb,0x68,0x04,0x4c,0x05,0x62,0x57,0x60,0x9b,0xa0,0x37,0x41,0x77,0xe4,0x7d,0x3e +.byte 0x36,0xda,0xd5,0xfd,0x68,0x47,0x8c,0x68,0x61,0x4c,0xea,0x38,0x20,0xa5,0xe4,0x12,0x6e,0xd5,0x14,0x37,0x01,0xcf,0xbd,0xdd,0x55,0x97,0xb4,0x30,0xf0,0x65,0x15,0xee,0x1f,0xc8,0x5b,0x07,0x82,0xae,0x43,0xad,0x11,0xda,0x0e,0x61,0x23,0x0a,0x5f,0x52,0xf9,0x9d,0xc5,0x98,0x4e,0xaf,0x77,0x21,0xc8,0x9f,0x6d,0x25,0x94,0x4f,0x91,0x1a +.byte 0xb4,0x2d,0xe3,0x15,0xe5,0xe6,0x25,0xb8,0x8e,0xd8,0x33,0xe3,0x05,0x01,0x7b,0x6b,0xa8,0x39,0x44,0x4b,0x58,0x3c,0x17,0x53,0x17,0x5c,0xbc,0xd5,0xcd,0xd4,0x29,0xe7,0x17,0x7a,0x69,0xa6,0x75,0x8e,0x0a,0x00,0x41,0xbe,0xb4,0x8d,0x79,0x1d,0xac,0x2a,0x0f,0x9b,0x7b,0x5a,0xe8,0x17,0xe2,0xb3,0x1d,0x03,0xde,0x5a,0x7c,0x31,0x18,0x8c +.byte 0x1c,0xf9,0x19,0x7b,0x37,0x1f,0x53,0x77,0xce,0x1f,0xad,0xb6,0x0d,0x21,0xe1,0xb0,0xf9,0x42,0x52,0x99,0x02,0xa8,0x58,0xab,0x94,0xf8,0x9f,0x99,0x2d,0x1e,0x68,0x4f,0x5a,0x91,0x2b,0xdf,0xe8,0xe6,0x34,0xb6,0x80,0x9b,0xb1,0x0e,0x87,0xec,0x29,0x17,0x4d,0x98,0x2d,0x40,0xd0,0xf7,0xca,0x55,0x9d,0x56,0x19,0xd5,0x7c,0x4e,0x2e,0x75 +.byte 0x5d,0xe7,0x3e,0xed,0x47,0xdc,0xb1,0x04,0xe5,0x61,0x0f,0xe7,0xc4,0x16,0x71,0xf4,0xf8,0x8a,0xf1,0xfc,0xd5,0xdb,0xeb,0x0b,0x82,0x0f,0xfe,0x64,0xa2,0xb0,0x53,0xab,0xf5,0x01,0xc2,0x8f,0xa0,0x4d,0x5d,0x1b,0x54,0x32,0x48,0xca,0x8a,0x42,0x59,0x4a,0x85,0x68,0x75,0xd1,0x1b,0x03,0x11,0xfe,0x28,0xd7,0xd5,0x37,0x81,0x7a,0xfb,0x84 +.byte 0xfd,0xa8,0x98,0x54,0xf7,0x81,0xb0,0x2d,0x2d,0x5d,0x95,0x0a,0x5b,0x80,0x13,0x95,0xad,0x8f,0x88,0xaa,0x38,0x7e,0xbc,0x88,0xc2,0xf6,0xa6,0x1e,0x6d,0x78,0xc9,0x4f,0xa9,0xb3,0xaa,0x23,0x0c,0x62,0x19,0x6f,0x26,0x5d,0xca,0x36,0x23,0xf8,0xd1,0x76,0x80,0x32,0x59,0xa0,0x47,0x86,0xee,0xc9,0x0f,0x1d,0x37,0xd9,0xc9,0x4e,0x65,0x22 +.byte 0x17,0x95,0x88,0x85,0xb3,0x8a,0x5d,0xb9,0xe6,0x3b,0x6c,0x02,0x81,0x61,0xe0,0xab,0x19,0x6c,0x9a,0x29,0x33,0xf1,0x7b,0x0c,0x22,0x16,0x0c,0xd6,0xfa,0xc2,0x84,0xe5,0x74,0x9e,0x8e,0xf8,0xdb,0x44,0x68,0xa0,0x58,0x52,0x9f,0xad,0xe6,0x2b,0x23,0x70,0xf3,0x6e,0xdc,0xf1,0x2d,0xa5,0xc2,0x7f,0xef,0x5f,0x58,0xc2,0x96,0x66,0x67,0x4b +.byte 0x7c,0xe0,0xd7,0x96,0xda,0xf7,0xd7,0x7a,0x7d,0xb4,0x4f,0x48,0xbd,0x87,0x6b,0xf4,0xbd,0xd1,0x45,0xdc,0xba,0x4f,0xd2,0x00,0x7f,0xde,0x3c,0x57,0xd7,0x3b,0x5b,0xa9,0xf3,0x17,0x76,0x47,0x0c,0xcf,0x48,0x07,0xa8,0xc3,0x30,0x60,0xc6,0x98,0x20,0x29,0xba,0x5f,0x76,0x6d,0x63,0x5f,0x87,0x7e,0x36,0xbc,0xa3,0xe4,0xd6,0x6a,0x55,0x73 +.byte 0x8b,0x8b,0x62,0x40,0xc5,0x7e,0xa3,0x33,0x04,0xce,0xe2,0x9d,0x9f,0x67,0x1c,0xf0,0xa1,0x78,0xd2,0x0b,0x58,0xc1,0x2e,0xec,0x78,0x0a,0xc9,0x0b,0x1d,0xfb,0xcc,0x72,0xd8,0xe4,0x15,0xcb,0x09,0x8b,0xd9,0x33,0xa9,0xb6,0x24,0x7e,0x59,0x48,0xbf,0xda,0xdb,0x5c,0x99,0xd1,0x92,0x1b,0xb6,0xf6,0x75,0x78,0x53,0x69,0x89,0x27,0x6b,0x3c +.byte 0xfb,0xd2,0xa7,0xeb,0xc5,0xf7,0xea,0x8b,0x38,0x59,0x8e,0x02,0xc7,0x6e,0x96,0x8a,0x85,0x1c,0x91,0x1b,0x97,0x97,0x9e,0xa7,0x9d,0x10,0xa4,0x4a,0x6e,0xa8,0x51,0x05,0xbe,0x5f,0x9a,0x5b,0x94,0xf2,0x2c,0xa1,0x1e,0x33,0xc5,0xe8,0x92,0xb8,0xd2,0xfa,0x27,0x07,0x12,0xa1,0xdc,0x24,0x43,0x28,0x06,0xe5,0x43,0x57,0x8f,0x66,0x72,0x2f +.byte 0x26,0xf7,0xea,0xa1,0xcf,0x57,0xd6,0xa6,0xf7,0x37,0x1d,0x6e,0xd9,0xde,0x1a,0x8c,0xf5,0x01,0x76,0xc3,0x56,0x40,0x57,0x3d,0x4a,0x14,0x04,0xf2,0xfc,0xba,0x3b,0x60,0xf1,0x88,0x1e,0x16,0x08,0x99,0x90,0xfe,0x27,0xaa,0x04,0x53,0xd8,0x7e,0x0c,0x58,0x6a,0xd9,0x5a,0xe4,0x11,0xd4,0xcc,0x48,0xbe,0x03,0x08,0xbc,0x61,0x47,0xdd,0xde +.byte 0x5f,0x03,0xc7,0x8f,0x9c,0x08,0x93,0xe3,0xaa,0xee,0x9c,0xe3,0xc6,0x06,0x78,0xda,0x0a,0xdd,0xb0,0xc3,0xf3,0x0b,0xe5,0xa0,0x5f,0x1e,0x3e,0xb3,0x15,0x7f,0xf1,0xf4,0x38,0xb2,0xed,0xf2,0xa6,0x8b,0x1d,0x78,0xb6,0x03,0x19,0xcd,0x17,0xb4,0x18,0x17,0x49,0x61,0x17,0xbd,0xbe,0x4b,0x04,0x00,0xce,0x4b,0xcc,0x47,0x61,0x76,0x85,0xdc +.byte 0x2b,0x85,0x48,0x82,0xf4,0x9b,0xb4,0x62,0x53,0xc7,0x06,0x50,0xf2,0x3e,0xba,0x6d,0xf2,0x19,0x0f,0x7f,0x84,0xce,0xa6,0x4d,0x96,0x97,0x94,0x12,0xb6,0xd0,0xd6,0xa4,0xc1,0xcc,0x14,0x54,0xf6,0x7a,0xf1,0x94,0x62,0xa1,0xc7,0x22,0x9b,0x0d,0x0e,0x69,0xcf,0x38,0x5c,0xda,0x9f,0xc0,0xfa,0x93,0x81,0x24,0xce,0x9f,0xf3,0xc2,0x66,0xad +.byte 0x06,0x21,0xf2,0x48,0x6c,0x4a,0x0d,0xb8,0x41,0x86,0xaf,0xb7,0x6c,0x65,0xcb,0x83,0xd8,0x75,0x11,0x60,0xfa,0x06,0xe5,0xd2,0x11,0x87,0x29,0xb8,0x41,0xcb,0x17,0xb5,0xbd,0xbd,0xf9,0xd5,0xbc,0x89,0xb6,0x60,0x65,0x59,0xbb,0x38,0x9d,0x70,0xf9,0x81,0x6b,0xe6,0x12,0x80,0x08,0x73,0x9f,0xfb,0x2f,0x72,0x4e,0x18,0xff,0x65,0xab,0xa6 +.byte 0xaa,0x78,0xf1,0xa4,0xe9,0x1a,0x7d,0xa5,0xdd,0x91,0x77,0xa9,0xa3,0xf3,0xe3,0xe5,0x5a,0xa2,0x0d,0x3a,0x2a,0x4a,0x11,0x9a,0x8d,0xc3,0x00,0x6e,0xd4,0x4f,0xb9,0xe7,0x39,0x78,0x89,0x64,0xb2,0xc8,0xfd,0x1f,0xe6,0xa9,0x54,0x17,0x83,0x3f,0xeb,0x97,0x77,0xac,0xc8,0xba,0x0e,0x77,0x02,0xb0,0x29,0xbe,0x51,0x62,0xef,0xa5,0xd5,0xab +.byte 0x79,0x98,0xab,0x7a,0x1e,0x13,0xe8,0x87,0x4f,0x61,0xa3,0x37,0xdf,0xe6,0xda,0xb9,0xf5,0x69,0xf7,0x7a,0xee,0xd6,0x5f,0x6a,0xb3,0x95,0x55,0x59,0xd1,0x6c,0x5b,0xd5,0xba,0x8b,0x74,0x85,0xbf,0x1e,0xe5,0xb3,0x24,0x28,0x4b,0xc8,0x4a,0xec,0xa1,0x1d,0xda,0x99,0x3f,0xdf,0xfc,0xe6,0x2e,0x1b,0xa4,0xba,0x1a,0x03,0x89,0xb7,0x93,0x4e +.byte 0xaf,0x40,0xb0,0x7e,0x3f,0x34,0x0d,0x94,0x75,0x8c,0x8a,0xfb,0x88,0xcd,0xd3,0xc2,0x61,0x95,0x63,0x51,0xaa,0x78,0x1f,0x24,0x95,0x5a,0xb5,0x98,0x9a,0xd4,0xb8,0x34,0xe1,0x47,0x1c,0x68,0x0f,0x08,0xf1,0x69,0xe6,0xd4,0xaf,0x23,0xf6,0x32,0x71,0x51,0x01,0xa9,0xf2,0xa1,0x45,0x0b,0x75,0x82,0x09,0xe4,0x9c,0x2a,0x1d,0x0b,0xd6,0xd2 +.byte 0x26,0xe8,0x30,0x44,0xdf,0xa3,0x2b,0x97,0x11,0xc7,0xe7,0x47,0xfd,0xc7,0xbf,0x59,0xf3,0x28,0x32,0x46,0xc0,0xc4,0x7a,0x96,0x08,0x0d,0x2c,0xa1,0x82,0x6c,0x0a,0x33,0x82,0x55,0xd7,0xcf,0x3e,0x08,0xbb,0x22,0x15,0x96,0x12,0x66,0xd2,0xae,0x21,0x3a,0x54,0x6a,0xe0,0x33,0x0c,0xa4,0x96,0x4b,0x5d,0xf2,0x86,0xb9,0x70,0xe4,0x65,0x45 +.byte 0xe4,0x2f,0xa7,0xb4,0xc1,0xd5,0x9a,0x02,0xa1,0x5b,0x4e,0x58,0xca,0xf8,0x63,0xae,0x45,0x1c,0xf4,0xa7,0xc8,0xa5,0x84,0x23,0x87,0xcb,0x3e,0x88,0xca,0xe9,0xa9,0x49,0xc5,0xc6,0x63,0x37,0x99,0xe0,0x27,0x03,0x96,0x7b,0x73,0x8c,0x36,0xde,0x89,0x80,0x30,0x2c,0x00,0x94,0x0b,0xfb,0x1f,0x39,0xe0,0xed,0xb6,0x31,0x21,0x90,0xfe,0xa4 +.byte 0xee,0xa5,0xe5,0x7b,0x9a,0x11,0x41,0x51,0xab,0x89,0x54,0xe0,0x8d,0x5f,0x10,0x1b,0x76,0x27,0x77,0x3d,0xb0,0x58,0x86,0x7b,0xb7,0x45,0xfb,0xd0,0x81,0xa8,0xcd,0xc0,0xc8,0x5f,0xfb,0xfe,0x8c,0x0a,0x3d,0x5d,0x61,0x4b,0x9b,0x32,0x75,0x66,0xa9,0xac,0x32,0x35,0xe9,0x1a,0xdf,0x06,0x8d,0x13,0x5d,0x40,0xcb,0x7d,0x50,0x3e,0x54,0xab +.byte 0x04,0xbc,0x83,0x32,0x8f,0xf5,0x93,0x1d,0x9b,0x5a,0xe1,0x19,0x70,0x4a,0xba,0xfc,0x4c,0x6a,0xf3,0xd6,0xd1,0xfd,0x48,0xd0,0x7c,0xa4,0xab,0x0b,0xb6,0x5f,0xe1,0x31,0xce,0x99,0x10,0x98,0xfc,0x6e,0x1c,0xaa,0x9c,0x34,0xa2,0x55,0xdc,0xe0,0x81,0x1b,0x9e,0xff,0x75,0x2e,0x25,0xe9,0x2c,0x20,0x83,0xf6,0x66,0xf9,0x63,0x31,0xfe,0xa7 +.byte 0xbf,0x4d,0xfd,0xff,0x0b,0x93,0x84,0xd4,0xb4,0x72,0x13,0x38,0x90,0x75,0xc9,0xff,0x61,0x4b,0xf9,0x55,0x62,0x58,0xf0,0x60,0xce,0x2d,0xec,0x94,0x06,0x0a,0xde,0x48,0xc0,0x46,0x89,0xfb,0x5c,0xf7,0x9f,0x37,0xad,0xd2,0xff,0xbe,0xfb,0x81,0x21,0xe0,0x20,0x43,0x88,0xad,0x40,0x47,0x7a,0xa9,0x30,0x88,0x10,0x16,0x41,0xf8,0x25,0xe0 +.byte 0x8f,0xc2,0xe3,0x9f,0x48,0xd3,0xfe,0x61,0x70,0xb9,0xa1,0x9e,0xaa,0xa6,0x73,0xcf,0xc3,0xd6,0xab,0x69,0x65,0x4a,0x3c,0xec,0x28,0x02,0x63,0x62,0xa1,0xb6,0xa3,0xd5,0x8c,0x9e,0x11,0x81,0x98,0x12,0x4f,0xec,0xb6,0xe5,0x3a,0x96,0xa1,0x11,0x13,0x77,0x5f,0x0f,0x19,0x40,0x14,0x28,0xcc,0xf1,0x3e,0x19,0x1d,0x78,0x31,0xac,0x5c,0xce +.byte 0xd7,0x29,0xfa,0x02,0x3b,0x29,0xd8,0x3a,0x37,0xcb,0x94,0xb2,0x38,0xc7,0x7f,0x3a,0x46,0xd2,0xb7,0xfe,0xfb,0x54,0x7c,0x01,0xa2,0x9b,0x53,0x57,0x04,0x73,0x4e,0x06,0x90,0xe5,0x78,0x0a,0x45,0x67,0x12,0x83,0xd7,0x31,0x59,0xa4,0x76,0xaa,0x7c,0xde,0x72,0x92,0x11,0x94,0x4c,0x6a,0xe4,0x35,0x35,0x3a,0x2e,0xef,0x7c,0xc1,0x91,0x76 +.byte 0xd0,0xfe,0x84,0xd1,0xa1,0xf9,0x03,0xc3,0xba,0x09,0xbb,0x2c,0xe2,0xb5,0x06,0x7e,0x23,0xb7,0xe0,0xc1,0xd3,0xfd,0x55,0x01,0xf3,0xba,0xc5,0x1b,0xf8,0x02,0x60,0x92,0x0a,0x93,0x1c,0xc4,0x19,0x03,0x88,0xf5,0x45,0xe5,0x8f,0x7d,0xce,0x2c,0x87,0x2e,0xf6,0x55,0x8c,0xf9,0xb0,0xd2,0x72,0x2d,0x93,0x6d,0x28,0x6e,0x8e,0x3a,0xed,0x68 +.byte 0x02,0xda,0x80,0xd0,0x71,0x4a,0x8f,0x06,0x59,0x38,0x89,0x81,0xcb,0x1a,0x74,0x1e,0x62,0xa3,0xa5,0xb8,0x85,0xc3,0xd2,0x04,0x3d,0x3b,0x93,0x36,0x0c,0x12,0x55,0xfb,0x7b,0xc8,0xa3,0x25,0xa7,0x93,0xb0,0x3e,0x49,0x86,0xbf,0x76,0x8f,0xc4,0x4c,0xfe,0xce,0x4a,0xf6,0x2f,0x15,0x33,0x06,0x3a,0x35,0x49,0xe7,0x08,0xff,0x99,0xac,0xf6 +.byte 0x20,0x6d,0xab,0xb2,0x05,0xa9,0xe4,0x06,0x57,0x9c,0xf4,0x76,0x8c,0x82,0x64,0xd5,0x67,0xe0,0xad,0xe1,0x69,0xdc,0x9e,0x2c,0x59,0x92,0x3a,0xc8,0xc1,0x0a,0x61,0x89,0x45,0x9f,0x8b,0xf8,0x64,0x0a,0x5a,0x75,0x55,0x37,0x24,0xe1,0x42,0x43,0x7c,0x9c,0xcd,0x4e,0x9e,0x19,0xfb,0xd9,0x15,0x29,0x30,0x52,0x33,0xf3,0xc8,0x88,0xdb,0xaa +.byte 0x07,0x27,0xfb,0x2b,0x0c,0xc0,0xa1,0x5f,0x51,0xf1,0x54,0xf8,0x90,0x0a,0x35,0x07,0x6e,0x9c,0x64,0xd8,0x4f,0x2d,0xb3,0x61,0xbc,0x18,0x1f,0x22,0x84,0x94,0x4b,0x85,0xfc,0x4a,0xf9,0xe5,0xfc,0xdd,0x7a,0x07,0xa2,0xbb,0xbe,0x7e,0x1f,0x4e,0xf9,0x29,0xb8,0xde,0x56,0xe9,0x04,0xc1,0xc2,0xb6,0xa8,0xc7,0xb6,0x83,0xf2,0x85,0x3d,0x35 +.byte 0xe3,0xeb,0x2f,0x2f,0x3c,0x1a,0x3a,0xf1,0x61,0x1f,0xe8,0xf0,0xce,0xa2,0x29,0xda,0x3f,0x38,0xf5,0x82,0x7a,0xb8,0x55,0xf1,0x1a,0x6e,0x5b,0x5c,0xd0,0xc8,0xc8,0x3a,0xe2,0xaf,0xb4,0x6f,0xba,0xe4,0x03,0x78,0x5f,0x47,0x4b,0xaf,0xfe,0x2a,0x7e,0x27,0xba,0x17,0xb4,0x92,0x27,0x70,0x13,0xd9,0xbb,0x6b,0x1c,0x9a,0x3e,0x29,0x85,0x9a +.byte 0xb7,0x64,0x5b,0x6d,0x7b,0xec,0xb2,0x26,0x3a,0x4b,0xb7,0x17,0xaf,0xb5,0xa1,0xbc,0x4d,0x67,0x4c,0x86,0xd1,0x53,0x2e,0x5d,0x64,0xe8,0x55,0xd9,0xbb,0xae,0xc1,0x55,0x41,0x99,0x8e,0x4d,0xed,0x3d,0x9e,0xea,0xe3,0xf2,0x76,0x45,0x6d,0xaa,0xbb,0x89,0x0b,0xc0,0x13,0xfe,0x99,0x2c,0xb0,0xd2,0xa9,0xeb,0x58,0x57,0x4d,0x88,0x2e,0x04 +.byte 0x4f,0x7a,0x76,0xaa,0x3a,0xa6,0x08,0x93,0x42,0x74,0x2f,0x3a,0x35,0xb0,0x36,0xcc,0x77,0xec,0x54,0x41,0x2e,0x81,0xf6,0x9f,0xf3,0xe7,0x23,0xc0,0x3f,0xa4,0x52,0x83,0x38,0xe2,0x12,0xed,0xdb,0x23,0xa0,0x0b,0xbf,0x61,0x98,0x89,0xb0,0xa4,0x3d,0xa9,0x6a,0x73,0xa1,0x99,0xc9,0x9e,0x68,0x45,0x37,0x4b,0x6c,0x87,0xfb,0x93,0xf2,0xaa +.byte 0xe8,0x1d,0x53,0x6c,0x4b,0xda,0xc5,0x6f,0xaa,0xde,0x99,0xd2,0xba,0x7c,0x27,0xc2,0x4e,0xd5,0x5b,0xc8,0x13,0x9e,0xa2,0x10,0x6a,0xbb,0x39,0xf9,0xa7,0x55,0x0a,0x65,0x88,0x3c,0x9b,0xff,0x83,0x4e,0xf7,0x9c,0x99,0x69,0xbd,0x64,0x0d,0xd1,0xc0,0xb0,0x43,0xd6,0x63,0x50,0x13,0x68,0x8d,0xd1,0x7e,0x56,0x93,0xb5,0x8e,0x8f,0x12,0xe5 +.byte 0x37,0x96,0x21,0x64,0xd5,0x0b,0xf6,0x27,0xf8,0xaa,0x34,0x8e,0xc4,0x2b,0x7b,0x6a,0x7c,0x89,0x4e,0x15,0x15,0x3d,0x17,0x93,0xd4,0x99,0xfe,0x97,0x95,0x20,0x85,0xcc,0xd4,0xcd,0x73,0x67,0x80,0x22,0x06,0xed,0x5e,0xce,0x90,0x59,0x01,0x31,0x24,0x17,0x37,0x4a,0x63,0x96,0xc2,0xf3,0xe0,0x21,0x0a,0x3b,0x9f,0x94,0xad,0xd6,0xa4,0xa9 +.byte 0xa2,0x54,0x0d,0x2a,0xb3,0x5c,0xfa,0xbe,0xeb,0x21,0xd6,0x13,0x22,0xa5,0x95,0x5e,0x25,0x72,0xf9,0x18,0x1f,0x50,0x64,0x04,0x5b,0xe8,0x0e,0x1f,0x6c,0xe1,0x4e,0xf5,0x7f,0xf0,0x13,0x4f,0xda,0x75,0xab,0x5a,0x98,0xd3,0x07,0x32,0x96,0x2a,0xc7,0x1e,0x0f,0x14,0xdb,0x96,0x5f,0xac,0xc1,0xef,0x5b,0x2d,0xd6,0x6d,0x13,0x01,0xd9,0x04 +.byte 0x9c,0xcd,0xe5,0x5e,0xbe,0x3a,0x47,0x14,0x09,0xbe,0x11,0xad,0x87,0x3f,0x0e,0xe1,0xcb,0x97,0xd0,0x6e,0x1f,0x49,0x07,0xd1,0x8c,0x2b,0xe0,0xf0,0xb2,0xaa,0x8b,0x70,0x18,0x7f,0x29,0xcc,0xc4,0x23,0x66,0x48,0xc4,0xb5,0x5e,0xf1,0x10,0xd7,0x1d,0x2a,0xba,0xe4,0x12,0x64,0x1d,0xf5,0x03,0x35,0x71,0x57,0x5d,0xf4,0xa4,0xb5,0x99,0x0b +.byte 0x4c,0x80,0x65,0x07,0x2f,0xbc,0xf7,0x28,0x8b,0xc0,0x8f,0x84,0x63,0x7e,0xf5,0x01,0x23,0x8c,0xaf,0x71,0x35,0xd4,0xe1,0x70,0xc7,0xef,0x1f,0x66,0xa9,0x34,0x57,0xaa,0x9a,0xbb,0x80,0x43,0x15,0x96,0xc4,0x03,0xd9,0xae,0xbe,0x89,0x1c,0xa1,0x9f,0x65,0x61,0xe5,0x90,0x9f,0xa6,0xf4,0x3b,0xde,0xa1,0xd1,0xf1,0xf9,0x2d,0xd7,0xa7,0x7e +.byte 0x3d,0x42,0x3d,0x1b,0x99,0xed,0x49,0x2e,0x92,0x6b,0x47,0x0e,0x0b,0x90,0x56,0xe0,0x1b,0x6b,0xfe,0x97,0xfe,0x9b,0xa2,0x50,0xcc,0xbf,0xea,0xae,0xe8,0xf0,0xc4,0xe5,0x81,0x20,0x4a,0xb0,0xf7,0xa5,0x23,0x24,0xf6,0x3f,0x9e,0x9c,0xcc,0xce,0xe4,0x95,0x49,0xea,0x66,0x4a,0x35,0x31,0xf3,0x03,0xc3,0x08,0xf9,0x5f,0x95,0x4c,0xbc,0x84 +.byte 0x13,0xbe,0x7f,0x35,0xbb,0xd7,0x35,0x3c,0xfb,0x05,0x43,0x95,0xbf,0x87,0xf2,0xc3,0x2d,0xef,0x13,0x1d,0x65,0x17,0x82,0x75,0x3d,0x67,0x51,0xcd,0x6e,0x42,0x5f,0x49,0x53,0x8b,0xaf,0x34,0x7d,0xa8,0xc1,0x45,0xcd,0x3d,0x29,0x00,0xa3,0xf3,0xbb,0x44,0x00,0x05,0x57,0xa5,0xeb,0xfd,0x98,0xa6,0xae,0xc6,0xc4,0x6c,0x6d,0x7d,0xf6,0x3e +.byte 0x82,0x1d,0x12,0xe7,0xcd,0xd2,0xd5,0xfe,0x41,0xf8,0xa4,0xb3,0x6a,0x04,0x13,0x28,0x10,0x40,0x27,0xc9,0x43,0x74,0xcf,0xaf,0x9b,0x60,0x17,0x43,0x8f,0xd7,0xb7,0x56,0x72,0xf3,0x48,0x0a,0xe6,0x36,0xf2,0x3f,0x51,0xf9,0x6e,0xc8,0xa3,0x04,0x8c,0x01,0x86,0x6e,0x83,0x27,0xe2,0xba,0xf2,0x8f,0x8f,0xa1,0x39,0xe7,0x17,0xdd,0x06,0x10 +.byte 0x0c,0x7f,0xfa,0x22,0x5d,0x88,0x35,0xc6,0xcd,0x60,0xa2,0xf0,0xfd,0xc9,0xed,0x85,0xac,0x88,0xfd,0x7d,0xc0,0x77,0x1b,0x80,0x3d,0x21,0x1e,0x8e,0x4d,0xdb,0x20,0xe2,0x38,0xad,0xd4,0xb5,0x2b,0x2b,0x31,0xbc,0x7b,0x02,0xa2,0x25,0x50,0xc0,0x01,0x20,0x76,0x6f,0x98,0x0b,0x3d,0x46,0xed,0xbb,0x2b,0x39,0x74,0x30,0xce,0x3e,0x6d,0x91 +.byte 0xa1,0x89,0x83,0xde,0x69,0x93,0x1a,0x14,0xa1,0xb0,0xaa,0x80,0xb0,0x1c,0x02,0x3f,0x13,0x9a,0x15,0x7f,0xb4,0x02,0x8f,0x30,0x0b,0xee,0xd9,0x72,0xcb,0x74,0x95,0x4a,0x39,0xb3,0x4e,0x78,0x12,0xb1,0x77,0x89,0xc0,0xaf,0x17,0xfd,0xc1,0x68,0x65,0xd1,0x08,0xae,0x56,0x5c,0xe0,0xe7,0x6f,0xb3,0x1e,0x10,0xce,0xd8,0xdf,0xee,0x67,0xad +.byte 0xd8,0x08,0xe0,0x79,0x36,0xe4,0x57,0x1c,0x45,0x22,0xa7,0x44,0xa8,0x12,0x37,0x92,0x85,0x9f,0x3a,0x48,0xd0,0xfd,0xb3,0x40,0x20,0x10,0xed,0x11,0xe0,0x9a,0xa6,0x09,0x5b,0xe9,0x21,0x95,0xe1,0x45,0x19,0x39,0xcc,0x85,0x5f,0xa5,0x6b,0x46,0x37,0xe1,0xa1,0x17,0x3f,0xb6,0xe9,0xb0,0x81,0x25,0xf6,0xd1,0xb8,0x22,0x5a,0x27,0x48,0x83 +.byte 0x01,0x36,0xd4,0xb8,0xc0,0x9f,0x37,0x52,0x22,0xd2,0x69,0x7b,0x3d,0xfb,0x31,0xc1,0xa3,0xb4,0xa1,0x1d,0x0e,0x24,0x9a,0xda,0x02,0x15,0x4b,0x46,0x24,0x0e,0xb1,0x79,0xc2,0x5b,0x01,0x60,0x4a,0x24,0x8a,0xbb,0x70,0xaa,0xf4,0x45,0xc1,0x0d,0x04,0x26,0x3f,0x74,0xbd,0xdd,0x33,0xaa,0xd6,0x62,0x56,0xb1,0xe7,0x2d,0x7b,0x66,0xa2,0x40 +.byte 0xb4,0xe4,0xbd,0x8e,0x35,0xba,0xf1,0x2f,0x59,0xa7,0x01,0x6d,0x5a,0xa7,0xa6,0x3b,0x82,0xa3,0xb4,0x54,0x51,0x33,0x6b,0xfb,0x78,0x4a,0x74,0x88,0x7f,0x55,0xea,0x08,0x8e,0x19,0x78,0xbc,0x80,0x19,0x2f,0x41,0x97,0x20,0xa0,0x9e,0xbf,0x44,0xae,0x2e,0x26,0x66,0xe3,0x25,0xa0,0x92,0xa9,0xbe,0x8c,0x0d,0x96,0xec,0x93,0x99,0xe2,0xe7 +.byte 0x81,0xd5,0x10,0x62,0x3a,0x97,0x38,0x51,0x36,0x11,0x00,0xe0,0xc1,0x3a,0xc5,0xd4,0xa5,0x19,0xf4,0x82,0x66,0x0c,0xf9,0xb3,0x04,0x3e,0x57,0xc3,0x43,0xab,0xc6,0x52,0x95,0x8f,0xd3,0xf1,0xde,0xd9,0x57,0x6d,0x32,0x4f,0xc7,0x8c,0x1b,0x7a,0x53,0x6a,0xcf,0x56,0xea,0x61,0xb4,0xe5,0x64,0x2d,0x02,0x26,0x5b,0xcf,0x1c,0xc7,0x37,0xc3 +.byte 0x41,0xd2,0x1b,0x6c,0x5b,0x47,0xb8,0x73,0x89,0xfe,0x0e,0x7a,0x35,0x05,0xfc,0xea,0x6a,0x34,0x74,0x69,0xf0,0x12,0x29,0xa9,0x33,0xce,0x93,0x15,0xa0,0x68,0xb3,0x46,0x43,0xdb,0x8d,0xfa,0xef,0x93,0x66,0x72,0x18,0xae,0xe4,0xab,0xf4,0x8a,0xd1,0xb5,0x42,0xbd,0x2d,0xda,0xcb,0xf6,0x44,0x25,0xb1,0x01,0x8a,0xff,0xd5,0x34,0x16,0xec +.byte 0x7e,0x38,0x7b,0x50,0x41,0x61,0xf9,0xdf,0x4c,0x3e,0x02,0xd6,0xc3,0xce,0x19,0x9f,0x12,0x45,0x0c,0x99,0xb1,0xd9,0xeb,0xb9,0xe3,0xd5,0xb6,0x2b,0x25,0x8c,0x0b,0x04,0xf8,0x8d,0x41,0x41,0x3d,0x39,0x1b,0x7f,0x88,0xa7,0x8f,0x61,0x30,0xfe,0x67,0x75,0x35,0xd1,0x41,0x90,0xda,0x73,0x80,0xcf,0xc9,0xf6,0x44,0x00,0x67,0xcd,0xca,0xaf +.byte 0x6d,0x84,0x39,0x9a,0xb2,0xbb,0xfc,0xac,0x9b,0xb2,0x95,0x2f,0xc9,0x06,0x3a,0xa4,0x7b,0x9a,0x25,0xc6,0xe5,0xdb,0x7a,0xc6,0x8b,0x84,0x6a,0xb7,0x1e,0x22,0xaa,0x10,0x96,0xd3,0x55,0x50,0xa2,0x02,0x04,0x69,0x92,0xd7,0x6b,0x1f,0x9b,0x45,0x07,0x71,0xda,0xdc,0x76,0xc5,0xb8,0x34,0xa2,0x32,0x33,0x16,0x2e,0xb0,0x2a,0x90,0x43,0x40 +.byte 0x92,0x77,0x74,0x4e,0xdc,0xb4,0xe2,0x7d,0xc1,0x57,0xaf,0xf4,0x2c,0x20,0x65,0x77,0x88,0xc9,0x6e,0x69,0x38,0xc8,0x19,0x95,0x32,0x54,0x59,0x7f,0x37,0xd7,0x3c,0x07,0x05,0x87,0x2b,0xf9,0x58,0x74,0xc7,0x61,0x13,0x3d,0xc2,0xd9,0xec,0x3b,0x36,0x9f,0x8e,0xae,0x52,0xdd,0x5c,0xaa,0x29,0x6b,0x31,0x34,0x48,0x61,0x34,0x62,0x56,0xce +.byte 0x25,0xa8,0xc0,0x62,0xf5,0x35,0x58,0x4d,0x8e,0x61,0xd4,0xae,0x25,0x50,0xee,0x45,0xdd,0x14,0x7d,0x46,0x81,0x47,0xc3,0x3f,0x3f,0x81,0xdb,0x9a,0x59,0x56,0x4f,0x45,0xed,0x9c,0xe2,0xfc,0x96,0xff,0x5d,0x37,0x70,0xad,0xd2,0xeb,0xd9,0x2d,0x2a,0xaf,0xb9,0x16,0x4a,0x79,0x5d,0x76,0xb5,0x8f,0x74,0x19,0x6f,0x74,0x7d,0x4a,0xee,0x83 +.byte 0xa5,0x81,0xf3,0xd5,0xa0,0x43,0x5e,0x46,0xba,0xbe,0x49,0xa8,0xce,0x72,0x36,0x32,0xcd,0x8c,0x9b,0xa0,0xf9,0x5d,0xb7,0xb9,0xc7,0x8c,0xb2,0x59,0xb4,0x44,0xc1,0x90,0x53,0x92,0xd2,0xa8,0x4c,0xf9,0x35,0x40,0x32,0xd1,0xf0,0x2f,0xcb,0x6a,0x0b,0xe0,0xbe,0x34,0xc9,0x82,0x18,0x8d,0xfb,0xfc,0x50,0x8d,0x67,0xd5,0x86,0xd4,0xf1,0xb1 +.byte 0xaa,0x2f,0x9c,0xbc,0x52,0xbb,0x9f,0x17,0x1c,0x74,0x1d,0xdf,0x2d,0x1a,0x94,0x43,0x9b,0x80,0xb9,0x48,0xa3,0xaf,0x4b,0x30,0x0d,0xd9,0x3f,0x11,0x48,0x79,0x60,0xcc,0x25,0x6a,0xdb,0x8a,0xda,0xab,0xda,0x09,0x7c,0x9c,0x4a,0xaf,0xf9,0x0d,0xfb,0x7a,0x92,0x61,0xa5,0x17,0xf8,0x79,0x1b,0x00,0x52,0x56,0x5e,0x27,0x22,0x37,0xf4,0xbe +.byte 0x52,0x36,0xd3,0xdc,0x9a,0x33,0xf5,0x44,0x0e,0x53,0x0b,0xf6,0x9b,0xb0,0xb6,0x11,0xe4,0xd5,0x45,0x2e,0xdc,0xdb,0x46,0x18,0x9a,0x90,0x8b,0xcc,0xfe,0xc6,0x94,0x4f,0x97,0xb9,0x42,0xb6,0xd3,0x8f,0x7c,0x20,0xd1,0xa8,0xe6,0x85,0xce,0x65,0xeb,0x95,0x38,0x11,0x5c,0x1a,0x9d,0x34,0x25,0xc2,0xf0,0x33,0xbb,0x2c,0xc9,0x8d,0x0a,0x7a +.byte 0xb1,0x90,0x9f,0x24,0xed,0x35,0x3c,0x7e,0x71,0x82,0x12,0x3a,0x79,0x29,0xc8,0xa7,0x3e,0xa2,0x4e,0x50,0x03,0x94,0x7a,0x94,0xb7,0x2b,0x61,0x95,0x3d,0x5e,0x60,0x1c,0x68,0x51,0x82,0x73,0xe0,0x4a,0x2a,0x48,0x26,0xda,0xa3,0x53,0x8c,0x83,0xba,0x9f,0x95,0x37,0x5e,0x68,0x54,0x19,0x21,0xf8,0x31,0xaf,0x6b,0xfc,0x3a,0x3e,0xe3,0x3f +.byte 0xdb,0x16,0xb5,0x7e,0x13,0xf8,0xfd,0x7f,0x36,0xd6,0x8e,0x33,0xaa,0xe9,0xa4,0xa7,0xfd,0xf0,0x32,0xa6,0xdf,0xfa,0x22,0x7d,0xff,0x2a,0xe6,0x0d,0x6f,0xe2,0x21,0x54,0x6c,0x1a,0x99,0x17,0x56,0xad,0xce,0x39,0x6b,0x1a,0xe8,0x27,0x13,0x12,0x9c,0x4b,0x84,0x69,0x73,0xde,0x44,0x14,0xb2,0x7c,0x44,0x54,0x91,0x4f,0xeb,0x83,0xec,0x04 +.byte 0x73,0x85,0xb1,0xa8,0x44,0x72,0xa7,0x77,0xaf,0x0c,0xe0,0x52,0x65,0x04,0xe7,0x2a,0xee,0x0c,0x20,0x83,0x32,0x34,0x17,0x00,0x61,0xf9,0xf5,0x42,0x03,0xa4,0xb8,0x02,0x6f,0xb2,0xd3,0x65,0x51,0x2a,0x8e,0xdf,0x28,0x78,0x8a,0x8a,0x00,0xfb,0x24,0xd6,0xd5,0x86,0xaa,0xfb,0x86,0x93,0x5d,0x11,0xa4,0xf3,0xfd,0x36,0x18,0xf3,0x61,0xea +.byte 0x33,0xa8,0x0c,0xf0,0xb4,0x68,0xee,0xd3,0xe3,0x4f,0x22,0x24,0xde,0x1f,0x29,0x84,0x8b,0x5b,0x73,0x15,0xd6,0x62,0xa3,0x71,0x7d,0xf0,0x65,0x36,0xca,0x68,0x8a,0x6d,0x61,0x9c,0x0d,0x53,0xdd,0xf4,0x12,0xb3,0x5f,0xf0,0xb1,0x86,0xd6,0xe2,0xd6,0x80,0x4a,0x01,0x09,0x99,0x65,0xdb,0xae,0xe6,0xfc,0x68,0x5b,0xf9,0x10,0x99,0x8b,0x9f +.byte 0x08,0x52,0x09,0xae,0x59,0x4d,0x6c,0xf9,0x91,0x2b,0x57,0xea,0xf0,0xa3,0xdb,0xb8,0x99,0x29,0x2f,0xab,0x95,0x01,0x7d,0xec,0xd8,0x77,0x73,0x75,0x4f,0x88,0x44,0x69,0x76,0xc9,0x3c,0xf0,0x2d,0x7b,0x0d,0xbe,0xd4,0x88,0x0d,0xbc,0xa0,0x52,0xf4,0x2a,0xd1,0x62,0x2a,0xa9,0xe2,0x41,0x2f,0x52,0xce,0x96,0x7d,0x65,0x9b,0x74,0x82,0xde +.byte 0x43,0x4d,0xf8,0x8e,0x77,0x1c,0x18,0xf5,0x7e,0xab,0x94,0x3e,0xe7,0x90,0x2b,0xa1,0x16,0x00,0x7f,0x9c,0x9d,0x86,0xd1,0x74,0x7e,0xf7,0xbd,0x5a,0xa7,0x2f,0x0f,0xb0,0x5c,0xfc,0xfb,0x59,0x00,0xf3,0x84,0x09,0x77,0x66,0x17,0xf6,0x5d,0x0e,0xe2,0xe2,0xd4,0xb3,0x9e,0x79,0x88,0x66,0xa5,0x8e,0x30,0xae,0xca,0x7e,0x2b,0x32,0xa2,0x89 +.byte 0xe9,0x7e,0x59,0x21,0xd5,0x99,0xc7,0x10,0xa8,0x6f,0x95,0x8d,0x84,0xb4,0xcf,0x61,0xe7,0x5c,0x09,0xf3,0xbc,0xeb,0xf6,0x0c,0x84,0x1a,0x8d,0x13,0xf8,0x49,0x22,0xeb,0x09,0x55,0xef,0x56,0x12,0x21,0xcb,0x61,0x87,0xbf,0xef,0x43,0x5b,0x82,0xa8,0xc2,0xa2,0x5e,0xad,0x54,0x9a,0xcc,0x95,0xa2,0x01,0x05,0xb2,0xbb,0x26,0xa8,0xfd,0x6b +.byte 0x66,0x95,0x9c,0x0b,0x7b,0x23,0x32,0xff,0xdd,0x6c,0x18,0x1e,0x77,0x01,0x3c,0x82,0xaa,0x97,0x28,0x0f,0x93,0xa5,0x6c,0x85,0xe5,0x94,0x40,0xe0,0xa3,0x01,0x57,0x56,0x43,0x40,0xdd,0xa9,0xaf,0x21,0x79,0x10,0x8b,0xff,0x4b,0x51,0xe4,0xa2,0xe5,0xd7,0x0c,0xe2,0x9e,0x1e,0x38,0xdb,0x64,0xe1,0xb1,0x5b,0xe5,0x40,0xab,0xf6,0x05,0xd2 +.byte 0xba,0x85,0x78,0x61,0x2d,0x2e,0x07,0x06,0x6d,0x86,0x59,0xaa,0xd9,0x2c,0xfb,0x83,0x34,0xd0,0x2d,0x1d,0xad,0x5f,0xe4,0xac,0x05,0x46,0x3a,0x7b,0xd9,0xef,0x9f,0x2b,0x0c,0x18,0x21,0xf1,0x24,0x8a,0xb4,0x6e,0xd2,0x98,0x75,0x08,0x96,0x0c,0x7b,0x41,0xb7,0xf7,0x1f,0xcd,0xa8,0x1f,0x44,0xb1,0xed,0xdc,0x0e,0xcb,0x94,0xa0,0xb8,0x62 +.byte 0x67,0xdc,0x24,0xde,0x9e,0xe9,0x89,0xcd,0x92,0x7c,0x91,0x15,0xff,0xbd,0xfd,0xee,0xf8,0x29,0xd7,0xf9,0xe8,0x51,0xe7,0xc8,0x21,0xc5,0x20,0xe4,0xb8,0xa6,0xdb,0xfb,0x09,0x65,0x1c,0x3b,0x9e,0x39,0x44,0xcf,0xf5,0xc2,0x7b,0xf3,0x14,0x7d,0x69,0xf2,0xd0,0x97,0x63,0xf1,0xa7,0x81,0x56,0xfb,0xdf,0x4d,0x83,0x55,0x4f,0xde,0x50,0x7d +.byte 0xfe,0xb0,0xc0,0xc8,0x3b,0x3d,0x78,0x74,0x58,0x74,0x5e,0xfc,0xb7,0x0d,0x9a,0x26,0x3b,0x39,0xb6,0xf7,0xe0,0xe4,0x12,0x3c,0xd6,0x88,0x1c,0x9b,0x51,0x89,0xe7,0x53,0xcd,0x24,0x2e,0x34,0xa2,0xee,0xfa,0x5a,0x87,0xe5,0x7e,0xd5,0xf2,0x2f,0x15,0x99,0x57,0x5d,0x31,0x02,0xf8,0x08,0x38,0xea,0x8c,0x30,0x21,0xb0,0xff,0x94,0x51,0xcf +.byte 0x23,0xb7,0x02,0x5d,0xa3,0x75,0x7f,0x9d,0x66,0x49,0xe5,0xbe,0xc7,0x06,0x5e,0x1d,0xc9,0xe2,0x82,0x8a,0xc4,0x17,0x83,0x7e,0x65,0x6d,0x85,0x26,0x66,0xc0,0xf4,0xa5,0x1c,0x6e,0xba,0x32,0xfa,0x41,0x7b,0x2b,0x64,0x98,0x58,0x8c,0xce,0x2f,0xf3,0x56,0xf0,0x67,0xef,0x73,0x79,0xc4,0xc2,0x07,0xd7,0x85,0x1d,0x75,0x38,0x1e,0x15,0x82 +.byte 0x9d,0xf3,0xdd,0x3a,0x72,0xa3,0x23,0x0e,0x4a,0x1a,0x3a,0x97,0xc8,0xf1,0xf1,0x58,0x5d,0x1f,0xae,0x6d,0xc8,0x03,0xe0,0x7b,0x0f,0xf5,0x6f,0x35,0x41,0x8d,0xd5,0x03,0x85,0xdd,0xeb,0x3d,0x73,0xb1,0x93,0x35,0xc0,0x0f,0xfb,0x42,0xd4,0xf1,0x6b,0x35,0xe2,0x96,0xc5,0xd9,0xf2,0x69,0xbb,0x70,0x5e,0xf0,0x0c,0xe6,0xb5,0x81,0x94,0xc9 +.byte 0x29,0xa1,0x34,0x89,0xd9,0x9c,0x49,0x01,0x37,0x56,0x16,0x30,0x47,0x6f,0xe4,0x7c,0x5b,0xdd,0xfb,0x80,0x7f,0x0c,0x38,0x53,0x3d,0x57,0xf7,0xc4,0x80,0xf9,0x12,0x3a,0x9f,0xf9,0xb0,0xb6,0x94,0x6d,0xde,0x41,0x4e,0x30,0xac,0x1f,0x25,0x34,0xa0,0x95,0xe8,0x00,0x86,0x32,0x40,0xbb,0xc1,0x49,0x2d,0x07,0x49,0xb8,0x5f,0xcd,0x1b,0xd3 +.byte 0x0e,0x0c,0x54,0x0f,0xe4,0x20,0xe5,0xa1,0xed,0x98,0x65,0x5a,0xe7,0xce,0x68,0x9c,0x4c,0x48,0x03,0x9c,0x5b,0x68,0x4b,0x75,0x71,0x11,0x40,0x69,0xca,0x9a,0x3a,0xb2,0x3d,0x35,0x2c,0x70,0x35,0x8b,0x80,0x53,0x86,0x30,0x7d,0x4c,0xe9,0xc0,0x30,0x60,0xd0,0x06,0xbe,0xc2,0xad,0x39,0xcc,0xb2,0xec,0x90,0xcc,0xbd,0x7c,0xb5,0x57,0x20 +.byte 0x34,0x2e,0xfc,0xce,0xff,0xe3,0xd9,0xac,0xb8,0x62,0x6b,0x45,0x22,0x34,0xdf,0x8e,0x4b,0xf1,0x80,0x28,0x8d,0x0f,0xd5,0x3b,0x61,0x3e,0x91,0xa1,0xb1,0x85,0x27,0x78,0x88,0xbc,0xc4,0xb1,0xa1,0xbe,0x4f,0xc3,0xfd,0x1f,0xb9,0x30,0x31,0x2f,0xc1,0x9d,0xa3,0xb6,0x29,0xa4,0x60,0x82,0x73,0x93,0x74,0xea,0x97,0x67,0xf2,0xa3,0x97,0x50 +.byte 0x2f,0x9f,0x7b,0x23,0x18,0xb6,0xb4,0xee,0x15,0xa0,0xa4,0x07,0x1a,0xe9,0xb6,0x63,0x7e,0x88,0x40,0x57,0x86,0x79,0x6b,0x75,0xbe,0x57,0x8f,0xfe,0x0d,0xdf,0x4c,0x7f,0x39,0x9a,0x97,0xa6,0x87,0xc5,0xfd,0x52,0x77,0x36,0xc9,0x66,0x63,0xcf,0xc7,0x34,0x3b,0xf4,0x7a,0x12,0x56,0xf0,0xbc,0x7a,0x1a,0xa2,0xa2,0x51,0xb8,0xc1,0x70,0x81 +.byte 0xcf,0x1d,0xb5,0xe2,0x82,0xbb,0xfc,0xa3,0x80,0x18,0xf8,0x4b,0x76,0x9c,0xdf,0x9d,0x6c,0xf1,0xd8,0x2a,0xab,0x0c,0x12,0x02,0x29,0x09,0xfd,0x28,0xfb,0x57,0x38,0x05,0x2c,0xc5,0x67,0xd1,0xaa,0xbc,0x98,0xe6,0x22,0x78,0x06,0x4f,0x69,0x6a,0x63,0x1a,0x13,0x0b,0xa5,0xd2,0x61,0xc7,0x45,0x5b,0x21,0xab,0xbf,0x7b,0x7f,0x8c,0x2c,0xba +.byte 0x93,0x9f,0x41,0x67,0xc4,0x5f,0x53,0xac,0x90,0x05,0x86,0xb5,0x80,0x1f,0x5b,0x35,0x4f,0x92,0xf5,0xa8,0x5f,0xfb,0x56,0xdd,0x2d,0x9b,0xea,0xcb,0x0f,0x98,0x3c,0x4e,0xf1,0xa5,0x2c,0x37,0x70,0xe3,0x5c,0xaf,0x96,0x36,0xa8,0x2a,0xec,0xe0,0x2c,0x00,0xcd,0xaf,0x03,0x1d,0x05,0x2f,0x8c,0xe7,0xfe,0x4d,0xe9,0x97,0x6d,0xe1,0xf9,0x23 +.byte 0x60,0x08,0xea,0xfb,0x27,0xc8,0xf9,0xdf,0x49,0xfe,0xd9,0x48,0x35,0x6b,0x43,0xc5,0x19,0x90,0xb1,0xf1,0xee,0x84,0x7a,0x57,0xfa,0xa5,0xd6,0xd8,0xc9,0xf0,0x8a,0xe7,0x13,0x84,0xfc,0x28,0x54,0xae,0x99,0xfd,0x91,0xbe,0x91,0x27,0x98,0x28,0xdc,0xd7,0x2e,0xc1,0x21,0xcb,0x31,0xf8,0x47,0xe6,0x77,0x6d,0xee,0x7b,0x12,0xe4,0x9e,0x9d +.byte 0x07,0x46,0xa9,0x15,0x0b,0x3c,0xbe,0xc7,0x2d,0xe5,0xd6,0x25,0x4c,0xea,0x61,0xdc,0x18,0xb2,0x9d,0xb0,0x9a,0xff,0xa3,0x5f,0x2b,0xab,0x52,0x7d,0x1b,0xc3,0xa3,0x41,0x8f,0x5a,0x29,0xbd,0xc4,0x56,0x54,0x43,0x2d,0x61,0x07,0xed,0xd1,0x81,0x45,0xdb,0x61,0x0f,0xda,0xea,0xa6,0x1e,0xf9,0x9c,0xc0,0x8c,0xc4,0x8e,0xc7,0xca,0x38,0xe2 +.byte 0x45,0xde,0xdc,0xc5,0xc6,0xb0,0x43,0x17,0x8b,0xb1,0x58,0xd1,0x10,0x8e,0xa5,0x17,0x37,0x85,0xca,0x61,0x67,0x5c,0xd0,0x72,0x22,0x6b,0xd3,0x3b,0x53,0xbc,0xfb,0xe1,0x1e,0xa4,0x1b,0xd3,0xc3,0x8a,0x50,0x03,0x39,0xf5,0x36,0xdf,0x51,0x2e,0x05,0x4a,0xa8,0xdb,0x91,0x87,0xae,0xfe,0x3f,0x5c,0x35,0x5e,0xf9,0x8f,0x43,0x9e,0x92,0x36 +.byte 0x91,0x27,0x90,0xe8,0x7c,0xcc,0xc4,0x9c,0x13,0xbb,0x61,0x40,0xec,0x4f,0x49,0xcf,0x04,0x38,0x77,0x3b,0xb5,0xf8,0x69,0x8d,0xbb,0xb2,0x30,0x32,0x42,0x4d,0x7d,0x6c,0x56,0xdc,0xf4,0x8f,0xfc,0xb8,0x53,0xc5,0x11,0x17,0x23,0x94,0xf9,0x6d,0x6f,0xee,0xee,0x31,0xbf,0xce,0x11,0x8b,0x9e,0xd7,0xa5,0x09,0x36,0x89,0x72,0x25,0x18,0x1f +.byte 0x13,0xa7,0xdf,0xc5,0x91,0x7e,0xd6,0x2b,0xb8,0x08,0x9c,0x12,0x83,0x21,0x97,0x3d,0xad,0xac,0x1c,0x54,0xf3,0x65,0x04,0x2f,0x09,0xd1,0xd2,0xe5,0xce,0x24,0xb1,0xd9,0xe4,0x38,0x1f,0xb4,0xce,0xea,0x27,0x7f,0x5f,0x16,0x52,0xa4,0x2f,0x2f,0xaf,0x91,0xec,0x7a,0x21,0xf7,0xa1,0x38,0x78,0x78,0xc5,0xa9,0x94,0x63,0x87,0xf8,0x95,0x9e +.byte 0xf9,0x82,0x98,0x6d,0x9d,0x48,0x80,0xaa,0x7a,0x36,0xf9,0x5f,0xfb,0x39,0x3d,0xae,0xbc,0xcd,0xfc,0x67,0x46,0x07,0x7e,0xdf,0xef,0xff,0x8d,0x67,0xe7,0xd9,0x60,0x90,0x7b,0x49,0x10,0x65,0x3a,0x60,0x87,0x7a,0xed,0x9a,0x44,0x48,0x81,0xcc,0xad,0xe4,0x6a,0x62,0xf8,0x02,0x6f,0x41,0x8a,0x8d,0x44,0x28,0x1a,0xb8,0x52,0x60,0x4b,0x3f +.byte 0xfc,0xdd,0x33,0xad,0x14,0xb1,0x34,0x63,0x1f,0xdc,0xeb,0x9a,0x3f,0x99,0x82,0x28,0x36,0x6f,0x8e,0xd7,0x39,0x2e,0xc0,0x37,0xfb,0xad,0x57,0x6c,0x82,0x1a,0xc6,0xe4,0x4b,0xca,0x00,0x68,0x57,0x34,0xf0,0x57,0x6a,0xcb,0x50,0x5d,0x8d,0xfa,0xcd,0x89,0x41,0x91,0x23,0x98,0x1f,0x4f,0x18,0xb6,0xd2,0x9d,0xde,0x2f,0x5c,0xe6,0x08,0x76 +.byte 0x97,0xba,0x24,0x4e,0x84,0xd7,0xeb,0x80,0xde,0xec,0xee,0x51,0x5a,0x0e,0x5f,0xb7,0x37,0xda,0xa5,0x94,0x2b,0x6d,0x73,0xb7,0x6c,0x22,0x95,0x3a,0xaa,0x5c,0x6f,0x89,0x90,0xec,0xb3,0x31,0x00,0x37,0x28,0x18,0xbb,0x98,0x23,0xfc,0x3e,0x21,0x7c,0xaa,0x44,0x54,0x7b,0xe6,0xa0,0x17,0x58,0xef,0x11,0x3f,0x48,0xb8,0xa8,0x15,0x4a,0x92 +.byte 0xa9,0x39,0xe2,0xa6,0x38,0x03,0xa6,0xd3,0x79,0x8b,0x38,0x06,0xaf,0x4b,0xd4,0xab,0x0a,0x13,0xff,0x2d,0xfa,0xab,0x4b,0x64,0x9e,0xb0,0x3d,0xba,0x18,0x01,0xfd,0xc3,0x6a,0x6f,0x21,0x9c,0xf5,0x2f,0xab,0x2d,0x42,0x12,0xc9,0x72,0xde,0x83,0x42,0x6a,0xf0,0xd4,0x96,0x73,0xf1,0x93,0xa3,0x2d,0x9b,0xb4,0x94,0x51,0x0c,0x6e,0x8e,0xf0 +.byte 0x5e,0xbf,0x98,0xbf,0x08,0x0f,0xd8,0x6c,0x65,0x4e,0xb5,0x47,0xeb,0x7c,0x1b,0x73,0xe0,0xe6,0x2c,0x03,0xd2,0x2a,0x32,0xff,0xa7,0x03,0x6d,0x38,0x47,0x56,0x4b,0x25,0x0b,0x39,0x73,0x87,0x4b,0xa5,0x12,0x79,0x79,0xf3,0x88,0x37,0xe2,0x4f,0xb8,0xbf,0x70,0x0e,0xf7,0x8c,0xe6,0xa3,0xbc,0x35,0x10,0xcd,0x72,0x56,0xd6,0x83,0xc1,0x0b +.byte 0x5b,0xf3,0xa8,0x74,0xc7,0xb9,0x84,0xc8,0x6c,0xff,0x66,0xad,0x95,0x6f,0xbc,0x82,0x84,0x2a,0x11,0x40,0xf9,0xa8,0x3f,0x05,0xf9,0xab,0x19,0x55,0xce,0x80,0x90,0x65,0x49,0x3d,0xe1,0x54,0x2c,0x1a,0xdb,0xf3,0xaa,0x2f,0xeb,0xf5,0x10,0x1f,0x8c,0x35,0x46,0x68,0xb1,0x4c,0x52,0xe7,0xe9,0x58,0x78,0x33,0xfd,0xc6,0x13,0x0e,0x69,0xae +.byte 0xf4,0x1a,0x8a,0x77,0x8f,0xcc,0x98,0x74,0x88,0x20,0x84,0x5b,0x83,0x54,0xa9,0xee,0xc2,0x0f,0x8a,0x46,0xb1,0xc7,0xfb,0xfd,0xf2,0x2c,0xaf,0xfa,0x72,0x34,0x7a,0x79,0x50,0x10,0xc6,0x04,0xfd,0x0a,0x1e,0x4a,0xb5,0xf5,0xe7,0x4d,0x98,0x80,0x5d,0x0b,0x81,0x23,0xc3,0x6e,0xbf,0xc8,0xcd,0x35,0x96,0x5a,0x58,0xec,0xef,0x6a,0x8d,0x48 +.byte 0xda,0x48,0xbb,0x8f,0xcc,0x1f,0x86,0xff,0x7a,0x27,0xef,0xe6,0xb7,0xc7,0x2a,0x47,0x8d,0x6c,0x4a,0xc6,0x0a,0x32,0x67,0x1d,0x2f,0x83,0x3d,0x46,0x41,0x46,0x1c,0x75,0x7b,0x29,0x89,0xa2,0x65,0x9b,0x53,0x3d,0xd9,0x90,0x83,0xce,0xab,0x07,0xbb,0x46,0x61,0xb1,0x54,0xbd,0xc9,0x98,0xf7,0x96,0x76,0x03,0xdc,0x1f,0x1b,0xf2,0x5c,0x07 +.byte 0xdd,0x24,0x94,0x72,0x1e,0x94,0xb1,0x14,0x0b,0x40,0x77,0xde,0x3d,0x3f,0x1c,0xf0,0x8f,0xa4,0xcb,0x34,0xb5,0x2b,0x72,0x53,0x78,0xf3,0x3f,0x8e,0x47,0x30,0xb2,0x7e,0x73,0x3f,0x9a,0xef,0x19,0xb1,0xef,0x82,0x99,0xd4,0x17,0x60,0x94,0xf6,0x15,0x75,0x50,0x1f,0xb3,0xdd,0xae,0x1f,0xf8,0x63,0x9a,0x30,0x2c,0xf0,0xdd,0xbf,0x49,0x70 +.byte 0xd7,0x86,0x4a,0x5c,0x46,0x10,0x48,0x46,0x02,0x18,0xa4,0x39,0xb6,0x75,0x11,0x21,0xae,0x62,0x64,0xd8,0x85,0xc8,0xda,0xd2,0xd6,0x69,0xcc,0x37,0x57,0x49,0x73,0x1a,0x10,0x7b,0xd7,0x58,0xdd,0x0b,0xf3,0x16,0xe7,0x62,0x2c,0x32,0x92,0x0e,0x70,0x6f,0x77,0x74,0x0d,0xff,0xc2,0x8d,0x3b,0x3f,0x29,0x28,0x8f,0x88,0xb8,0x02,0x5b,0x3a +.byte 0x8b,0x65,0x89,0x92,0x2f,0xc7,0x30,0x73,0xc3,0x20,0xbc,0xa4,0xe4,0x5e,0xea,0xf8,0x21,0xb6,0xc5,0x47,0x56,0x35,0x8f,0xf6,0xd5,0xdd,0x77,0x1d,0xdf,0xd0,0x27,0xa3,0x04,0xb9,0xd0,0xc4,0x28,0x16,0xa5,0xaf,0x47,0x55,0x85,0x93,0x38,0xf4,0xac,0x13,0x30,0x7d,0x77,0x1f,0x3d,0xd5,0xd7,0x22,0xbe,0xe2,0x4e,0x6d,0x4b,0x0e,0xbe,0x1d +.byte 0x43,0x79,0x34,0x95,0x6f,0x38,0xa1,0xb3,0xa0,0xed,0xf6,0x17,0xf4,0x24,0x70,0x26,0x18,0x3e,0x1c,0xde,0xdc,0xa9,0x67,0x12,0xd3,0xc8,0xd7,0x70,0x13,0xa5,0xb3,0x25,0xe1,0x0a,0xe9,0xf6,0x4e,0x56,0x82,0x17,0xdc,0xbc,0x96,0x2f,0x59,0x03,0x9b,0xf4,0xc3,0x66,0xd2,0x90,0x95,0x1d,0xe0,0x99,0xfb,0xd8,0xa8,0x14,0xc7,0xa6,0x12,0x6b +.byte 0x08,0x6a,0xc8,0x0f,0x34,0x2a,0xb6,0xc4,0x9a,0xcd,0x61,0xf7,0x61,0xa3,0x59,0x29,0x11,0x30,0x76,0xb5,0x97,0xbc,0x2f,0x87,0xd8,0x12,0xb3,0x1d,0x99,0x8d,0x5d,0x57,0x0c,0xda,0xb0,0x9f,0x51,0x1a,0xb5,0xc6,0x94,0xc3,0xe9,0x5a,0x72,0x0c,0x37,0x76,0xb6,0x3c,0x00,0x02,0x69,0xad,0x8e,0x66,0x8b,0x5c,0x13,0x48,0xb7,0x9e,0xc5,0x7e +.byte 0xe0,0x35,0x07,0xd2,0x04,0x9c,0x35,0x95,0x8b,0x55,0x87,0x03,0x32,0x36,0xeb,0x11,0x88,0x54,0x8d,0x3e,0x88,0x46,0xc2,0xfe,0x24,0xa4,0x4b,0x92,0x19,0x44,0x6c,0xc9,0x69,0x32,0x22,0x95,0x5b,0xda,0x58,0xa4,0x00,0x33,0x83,0x2d,0xa4,0x17,0x2e,0x00,0x4d,0x9a,0x7d,0xef,0x04,0xa8,0x8b,0xf2,0x7c,0xb9,0xdb,0x54,0xcf,0x63,0x14,0x52 +.byte 0x5b,0x79,0xf6,0x89,0x5c,0xfa,0x8a,0x85,0x88,0x7f,0xca,0xed,0xfb,0x62,0xbc,0x1d,0x0d,0x90,0x51,0x27,0x45,0x74,0xa0,0x55,0xfc,0x60,0xea,0xef,0x6e,0x40,0xeb,0x0b,0x61,0x45,0x44,0xee,0xb6,0x20,0x4c,0xe1,0x08,0x62,0x29,0xdd,0xd0,0xa1,0xd5,0x7f,0x42,0xb9,0x0f,0x12,0xef,0xfb,0x13,0xa2,0xf1,0x85,0xaa,0x56,0x18,0x6c,0x70,0x7a +.byte 0x4d,0x52,0x76,0xce,0xa9,0xed,0x0a,0xcc,0x55,0xf0,0x01,0x99,0x44,0xe9,0xc4,0x74,0x33,0x2a,0xce,0x53,0xf3,0x4f,0x8f,0x1c,0x67,0x39,0x2b,0x0e,0x46,0xe2,0x49,0x06,0x52,0xbf,0xc4,0x3f,0x93,0x84,0x46,0x0a,0x9b,0xcb,0x1d,0xa5,0x66,0x9c,0x3e,0x3d,0xd1,0x92,0xda,0xe2,0x11,0x5b,0x89,0x7a,0xc4,0x33,0xba,0xa9,0x19,0xfd,0x3c,0xe3 +.byte 0xf0,0xa0,0x9b,0x83,0x50,0xce,0xa9,0x62,0xe3,0x85,0xc6,0xc4,0xe5,0x22,0xbb,0x1a,0x8e,0x04,0xb5,0x4d,0xca,0x18,0x7d,0xb0,0x99,0x50,0x78,0x88,0x69,0x43,0xe0,0xfd,0x90,0xa6,0xbf,0xdc,0xe3,0x03,0xf2,0x5d,0xa1,0xa2,0x88,0xc7,0xab,0xa9,0xc2,0xda,0x3f,0xff,0x79,0xa6,0x07,0xfd,0xc4,0xb1,0xfb,0x47,0x3d,0x75,0x82,0x26,0x52,0x85 +.byte 0x3f,0xf9,0xc9,0x85,0x46,0x24,0xe9,0x0f,0x96,0x8c,0xbb,0x02,0x83,0x60,0x69,0x49,0x8c,0x38,0xd1,0x4e,0xd0,0x63,0x2c,0xb6,0x12,0xb2,0x8e,0x4b,0xd3,0xe3,0xdf,0x20,0x00,0x99,0xf1,0x06,0x93,0xbf,0x27,0x42,0x8b,0xe3,0x8d,0x4c,0x3b,0x05,0x62,0x64,0x21,0xb1,0xfe,0xce,0x08,0xd2,0x23,0x69,0x11,0x74,0x31,0x3a,0x90,0x10,0x07,0x1a +.byte 0xd5,0xf5,0xc2,0x09,0x61,0x67,0x65,0x99,0x3a,0xf3,0x9e,0x4a,0xd8,0xa1,0xb2,0x50,0xf4,0x07,0xf0,0x7b,0x89,0x6d,0x4d,0x6a,0xd4,0x54,0xb9,0x3c,0xd5,0x4e,0x1c,0x12,0x0f,0x19,0x92,0x97,0x21,0x65,0x83,0x33,0x20,0x92,0x95,0xd4,0x0e,0x78,0xf4,0x92,0x16,0x36,0xd8,0x1b,0xd8,0xbf,0x41,0xe4,0xfb,0xb9,0x81,0x26,0x72,0x7e,0x1b,0x58 +.byte 0x05,0x45,0x97,0x66,0xf2,0x23,0x16,0xca,0x4e,0x95,0xc2,0x6c,0x60,0x84,0x5f,0x77,0x82,0x44,0x0e,0xf7,0x30,0xaa,0x51,0xa9,0x85,0x8b,0x03,0xfc,0x3d,0x6d,0x66,0x91,0x37,0xa5,0x1c,0xf8,0xcf,0x9d,0xd8,0xcd,0x8c,0xa1,0x29,0xbd,0xb5,0x4f,0x47,0xba,0xd1,0x55,0x3b,0x4e,0xc9,0xce,0x4c,0xcf,0x2e,0x19,0xa0,0x95,0xe6,0xcb,0x36,0x97 +.byte 0x3e,0x23,0xbe,0x09,0xfd,0x38,0x47,0x00,0x03,0xec,0x49,0xbb,0x49,0x1f,0x45,0x84,0x0f,0x1e,0x74,0xab,0xc9,0x07,0x00,0x04,0x70,0xe9,0xbd,0x61,0xb1,0x92,0xee,0x67,0x9a,0x5e,0x90,0xdc,0xe7,0x99,0x36,0xd0,0x58,0x15,0xe5,0x15,0xa2,0x1d,0x61,0x18,0x39,0x5f,0x6c,0xc7,0xbe,0xd0,0x23,0x1e,0x41,0xc8,0xaa,0x8e,0xbf,0xb8,0xdb,0x90 +.byte 0x8c,0x60,0x07,0x1e,0xe9,0x6c,0xe4,0xde,0xec,0x73,0x34,0x94,0x54,0xa4,0x6b,0x49,0xcf,0x87,0xb5,0x88,0x98,0xe6,0x2c,0xce,0xb7,0x76,0xa5,0x29,0xf1,0x29,0x50,0xc5,0x9e,0x13,0xe4,0x61,0x6a,0x54,0xb2,0x26,0xfa,0xfa,0x4a,0x41,0x3b,0x0a,0xf5,0x9a,0x60,0xbb,0xfc,0x1e,0x5d,0x21,0x7e,0x91,0x51,0xd6,0x5e,0x92,0xf9,0x21,0x80,0xa8 +.byte 0x35,0xc0,0xbb,0x7a,0xeb,0x75,0xb4,0xa3,0xd3,0x8d,0xaf,0x07,0x53,0x65,0x36,0x11,0xf9,0xb6,0x69,0x29,0x1e,0x5d,0x8f,0x57,0x5d,0xed,0x42,0xf9,0xd5,0xf6,0xc3,0x1e,0x29,0xc4,0x49,0x04,0xe4,0xfb,0xbf,0x9b,0x4a,0x7b,0xdd,0x57,0x51,0xfe,0xc4,0xd1,0xd9,0xe9,0x8f,0x94,0x78,0xbc,0x5c,0xeb,0xb6,0xbc,0x51,0xb0,0x82,0x87,0x47,0xb4 +.byte 0xf7,0xf9,0x02,0xd7,0xac,0x23,0xc0,0xe5,0x9a,0xc3,0x2f,0xd2,0xb8,0xb2,0x62,0xb9,0xdb,0x49,0x85,0x77,0x92,0xa6,0xe5,0x24,0x43,0x4d,0x0d,0x67,0x94,0x01,0x29,0xd6,0x2e,0xee,0xd9,0x2e,0x97,0x0e,0x20,0x7f,0x84,0x19,0x3c,0x3a,0x6f,0xa5,0xb0,0x8b,0x8f,0x8d,0x96,0xbb,0x76,0x61,0x97,0xc2,0x65,0x83,0xd8,0xda,0xab,0x42,0xfa,0xe5 +.byte 0x1e,0x42,0x93,0xa7,0x66,0x03,0x06,0x3b,0xbe,0xb8,0xae,0x71,0xee,0xdb,0x5d,0xdf,0x40,0x64,0x17,0x17,0x2e,0x03,0xca,0x37,0x2a,0x71,0x92,0x0a,0x01,0xa3,0x0f,0x0b,0x09,0xf2,0x0e,0x4b,0x4d,0x18,0xf3,0xc4,0xf2,0x51,0x7b,0x53,0x30,0xab,0x24,0xa2,0x47,0x38,0xc9,0x2c,0xdf,0x0d,0x32,0x3e,0x3f,0x57,0x2d,0xfc,0x44,0x19,0x64,0x8b +.byte 0xe9,0x9a,0xc2,0xf2,0xf6,0x2d,0x30,0x0c,0x0f,0xc3,0xc3,0xfe,0xc2,0xd1,0xbc,0xe0,0xbf,0xaf,0xeb,0x40,0x64,0x28,0xe2,0xd9,0x3c,0x7e,0x24,0x94,0x8f,0xe8,0x54,0x8b,0x26,0x6b,0xe1,0x4e,0x44,0x5a,0x7d,0x7b,0x12,0x36,0x2c,0x12,0xad,0x26,0xbc,0xa7,0xa3,0x2b,0x25,0xb9,0xde,0xe6,0x64,0x2d,0xab,0x7f,0x15,0x22,0x51,0x26,0x1c,0x15 +.byte 0x5d,0x13,0x18,0x93,0xc1,0x19,0x65,0xca,0xf3,0x8b,0xe0,0xcf,0x8c,0x43,0xe9,0xfd,0xa1,0xbd,0xe9,0xde,0x78,0x26,0xcb,0x7c,0xdc,0x68,0x06,0x98,0xf6,0x90,0x44,0x40,0xf0,0x5e,0xe1,0x16,0xf5,0x5d,0x4d,0x9b,0x85,0xe6,0x26,0xbd,0xab,0xcc,0x46,0x62,0x18,0x51,0xd5,0x3c,0x9f,0x6e,0xfa,0xe7,0x94,0xfc,0xc2,0x1a,0x9d,0x63,0x2c,0xdc +.byte 0xc3,0x89,0x67,0x94,0x37,0x58,0x0d,0x13,0xb8,0xdf,0x41,0x3d,0x70,0x78,0x1e,0x61,0x75,0x77,0xcc,0xbf,0x5f,0xa8,0xd3,0x89,0xcc,0xd3,0x40,0x4e,0x65,0xbd,0xce,0x3c,0xf0,0x5a,0x8f,0xe2,0xe1,0x24,0xaa,0xed,0x0f,0xd1,0x03,0x0d,0xf5,0x36,0x98,0xcd,0xa5,0x77,0x40,0x24,0x0a,0x82,0x68,0x79,0x82,0x38,0x68,0x6f,0x2b,0x0b,0xce,0x0f +.byte 0xcd,0x0f,0xba,0xdb,0xb5,0x22,0x38,0xd2,0xb0,0x9f,0x0f,0x08,0x0d,0xd8,0x5e,0xa7,0xd0,0xa9,0x39,0x66,0x4c,0x46,0xce,0x2a,0xc3,0x67,0x8c,0x91,0xdc,0xf1,0xc0,0x3a,0x58,0x50,0x1f,0xb0,0xa4,0x4d,0xbf,0x99,0x57,0xcf,0xae,0xb2,0xaf,0x6a,0x42,0xd2,0x7f,0x85,0x8c,0x40,0xc6,0x9a,0x93,0x57,0x54,0xf5,0xb4,0x83,0x59,0xb5,0x19,0x52 +.byte 0x7c,0x8b,0x76,0xee,0x35,0x90,0xbf,0xbe,0x65,0x58,0x3b,0x25,0x52,0x18,0xd8,0x7f,0x1f,0xe6,0x70,0xce,0x56,0x1a,0x45,0xa0,0x81,0xee,0x95,0x6f,0x55,0x43,0xaa,0x6e,0x87,0xa9,0xab,0x7d,0xe9,0xa1,0xa3,0x63,0xe7,0x1b,0x6b,0xa6,0x2c,0xe5,0x4a,0xb2,0x1e,0x73,0x5e,0xb5,0xae,0x83,0xe6,0x54,0x0b,0xc5,0x6b,0xb6,0xc4,0x73,0x62,0x1a +.byte 0xbf,0x1a,0x65,0xa2,0x5e,0x3a,0x45,0xd9,0xba,0x5b,0xef,0xf7,0x13,0x0c,0x7c,0x68,0xa1,0x98,0x71,0xb7,0x39,0x7c,0xbc,0x69,0xdb,0xd4,0xac,0x3f,0x82,0x63,0x9b,0x71,0x25,0x3a,0x06,0x73,0x60,0x71,0xc3,0x30,0xd3,0x96,0x02,0x4b,0x46,0xbd,0xd4,0x6e,0xc6,0x29,0xcc,0xd0,0xe1,0x0b,0x66,0x62,0xea,0x29,0xc7,0xcf,0x35,0x9e,0x2f,0x1f +.byte 0xa0,0xfc,0x8c,0x4a,0x83,0x8e,0x3b,0xf5,0x7a,0x6f,0x52,0xaf,0x99,0x9c,0x86,0xab,0xe5,0x1b,0x82,0xb3,0x18,0x35,0x77,0x9b,0xa3,0x94,0xc8,0x39,0x30,0x3f,0xad,0xa9,0x0f,0x93,0xb8,0xc8,0xed,0x04,0xf2,0x0b,0x9a,0xb1,0xd1,0xc9,0x9e,0x40,0x4f,0x71,0x21,0x63,0x2a,0x05,0x26,0x53,0xa3,0x3f,0x43,0xe4,0xf8,0x7c,0x2f,0xa3,0x5a,0x6e +.byte 0xc1,0x40,0xa8,0x4d,0xbc,0x03,0xae,0xe9,0x36,0xb6,0x37,0xdc,0x5f,0xef,0xb0,0x35,0x33,0xdf,0x33,0x71,0xaf,0x80,0xf2,0x69,0xd9,0xb5,0xfc,0xff,0xd2,0x5b,0x6a,0xeb,0xdc,0xe0,0x26,0x43,0x38,0x7b,0x24,0xb2,0x79,0x53,0x52,0x57,0xc4,0x1f,0x6d,0xc9,0x50,0xf2,0x63,0x9d,0xc1,0x22,0x5f,0x11,0x82,0x38,0xdb,0xd3,0xb4,0x1d,0x10,0x72 +.byte 0x9e,0x4d,0x03,0x30,0xba,0x5e,0xe9,0x8c,0x21,0x12,0xe6,0x3a,0xd6,0x4c,0x18,0xa4,0x27,0xc9,0xf5,0x50,0xbd,0xbe,0xf0,0x86,0xd8,0x00,0x56,0xf0,0x10,0x81,0xec,0xeb,0xfc,0x5b,0x29,0x88,0xff,0x73,0x60,0x6b,0xf5,0x8c,0x0b,0x30,0x04,0x53,0x85,0x61,0x0c,0xfc,0xff,0x8f,0x21,0xd2,0xa1,0xcb,0xf7,0x90,0x53,0x3b,0xf4,0xf0,0x2c,0x7d +.byte 0xb6,0x84,0xe7,0x4c,0x88,0xea,0x4f,0xdf,0xff,0x0f,0x5d,0x0f,0xd3,0x2d,0x4f,0x7e,0xdc,0xd1,0x22,0x71,0x0d,0xae,0xa8,0xcf,0x05,0x7b,0xfc,0xfe,0x87,0x40,0xa5,0xe8,0xfd,0x3f,0xdb,0x2f,0x00,0x21,0xb9,0x70,0x02,0x2c,0x96,0x24,0xaf,0x35,0xe2,0x87,0xcb,0x50,0xcf,0x7e,0xfa,0xaf,0x39,0x82,0x0c,0xd5,0xa6,0x3f,0x9c,0x77,0x60,0x16 +.byte 0xbf,0x42,0xcc,0x97,0xd1,0x19,0x0d,0x8a,0x50,0x98,0x7d,0x19,0x7b,0x40,0x1c,0x22,0xde,0x50,0x90,0x32,0x9a,0x3d,0x07,0x35,0xc0,0x48,0x4c,0x0a,0xcd,0x91,0xab,0xf7,0xf3,0x06,0x77,0x80,0x96,0x7b,0x59,0x33,0xe6,0xbf,0x93,0xb8,0x59,0xd0,0x3a,0x1f,0xcc,0xe7,0x1d,0xd4,0xb5,0x58,0xee,0xe7,0x95,0xfa,0x75,0xdb,0x37,0x74,0xb0,0x7d +.byte 0x4d,0xee,0xef,0x20,0x13,0xe5,0x82,0x07,0x8e,0xdd,0x57,0x75,0x33,0x56,0xc4,0x80,0xb0,0x06,0x9f,0x6b,0x72,0x31,0xcf,0xac,0x5f,0x96,0x13,0xeb,0xf4,0x34,0xb6,0x6b,0x55,0xef,0x55,0x26,0x4e,0xdb,0x6c,0x2f,0x64,0x29,0x91,0x3c,0x6d,0x29,0xd2,0x94,0xbd,0x2c,0x99,0xb9,0x97,0x76,0xee,0x7d,0xfd,0xb2,0x8d,0x14,0x4f,0x09,0x81,0xb3 +.byte 0x68,0x3e,0x79,0x28,0x56,0x50,0x3f,0x86,0x4c,0x95,0x6c,0xad,0xf6,0xc5,0x43,0x25,0xea,0xbc,0xe2,0xba,0x77,0x18,0xc6,0x82,0x65,0x73,0x38,0x90,0x9d,0xc9,0x57,0xcd,0xa2,0x7c,0xd3,0x26,0x59,0x44,0xd9,0x79,0xae,0xdd,0x6f,0xe9,0xdc,0x16,0x73,0xba,0x05,0x8a,0x40,0x9f,0xe7,0xcf,0x29,0xa4,0xdf,0x49,0x7f,0x1d,0x73,0xc7,0x8b,0x8d +.byte 0xad,0xb5,0x3d,0x1b,0x64,0xb1,0x8f,0x78,0x06,0xbe,0xaa,0x2c,0x08,0x73,0xc7,0x2c,0xdc,0xd8,0x3f,0x9f,0x1b,0xd2,0xe1,0x4f,0x9d,0x87,0xb8,0xa9,0xdc,0xef,0xbc,0x31,0x9f,0xf7,0x84,0x09,0xe7,0xbc,0xec,0x2a,0xcb,0x3b,0x3a,0x30,0xe2,0x5b,0xbc,0xcd,0xa8,0xdb,0x46,0x80,0xec,0xaa,0x06,0x8e,0xd8,0x6c,0x35,0x65,0x52,0xb8,0xc3,0xf9 +.byte 0x97,0x68,0x06,0x2d,0x3e,0x91,0x71,0x44,0x6e,0x01,0x51,0x10,0x5b,0x74,0xb9,0x3f,0xd7,0xf9,0x5c,0x98,0xe6,0xf8,0x98,0x32,0x26,0x9b,0x5e,0x9c,0x88,0xfb,0xaa,0x70,0xd2,0x2e,0xc2,0xf6,0x02,0x92,0x33,0x55,0x92,0xba,0xfb,0x0e,0x0b,0x08,0xdf,0x5d,0xdd,0x47,0x28,0xae,0x32,0xb3,0x27,0x8d,0xd4,0x18,0x43,0x64,0xc4,0x7f,0x60,0x62 +.byte 0xd9,0x63,0xd1,0x28,0xc9,0x75,0x3b,0x44,0xb4,0x8e,0x2a,0x93,0xf9,0x4c,0x4f,0x7e,0x6b,0x98,0xc9,0x1a,0x82,0x51,0x9a,0xb2,0x80,0x70,0x2e,0xff,0x19,0x66,0x1b,0xb6,0xbc,0x15,0x8e,0xe6,0x0f,0x8e,0x04,0x10,0x94,0x44,0x6c,0x32,0x4b,0x61,0xbc,0x4a,0x16,0x7b,0x25,0x2a,0x27,0x96,0xa9,0xa9,0x61,0x10,0xc1,0x46,0xdd,0xf5,0xe3,0xe8 +.byte 0x1f,0x5b,0xa0,0x77,0xe1,0x42,0x9a,0xd4,0x04,0x33,0x68,0x72,0x1c,0x44,0x29,0xce,0x98,0xe0,0xc7,0x3a,0x9e,0x3c,0xb9,0xb4,0x29,0xef,0x57,0xee,0x8c,0x8f,0x7c,0xe6,0xe1,0x43,0x6e,0x45,0x0e,0xdd,0x4e,0x11,0x4b,0x28,0x69,0xde,0xb8,0xfa,0x32,0xbe,0xc6,0x4f,0x11,0x99,0xe5,0xe3,0xe2,0x1f,0x03,0xbe,0x4a,0xad,0x60,0x68,0xc8,0x13 +.byte 0x80,0x4e,0xb6,0xc0,0xc5,0xc7,0x97,0x5c,0x0b,0x0e,0x64,0x43,0x78,0x70,0x95,0x91,0x8e,0x36,0x6b,0xad,0x57,0xc7,0x1e,0x9c,0x54,0xc9,0x89,0xf0,0x13,0xde,0x0a,0xbe,0xc0,0xa9,0x35,0x77,0x0a,0x01,0x7f,0x98,0x51,0x82,0x92,0x14,0xe0,0x9a,0x08,0xa3,0x0c,0x6c,0x67,0xf2,0x05,0xaa,0xa9,0x4e,0xce,0x3b,0xb1,0xb6,0x8c,0x82,0x5d,0x11 +.byte 0xf2,0xe5,0xd7,0xda,0x3a,0x65,0xa0,0xe3,0xa4,0x09,0x01,0x1c,0xb2,0x08,0x90,0x94,0xb5,0x51,0x56,0x24,0x22,0xfd,0x12,0xad,0x7a,0x75,0xcf,0x0f,0x0f,0x23,0xc3,0xa6,0x1f,0xf8,0x39,0xbc,0x2f,0x18,0x53,0x14,0xef,0xdf,0x90,0x6a,0x50,0x2b,0x8c,0x8b,0xa8,0xd4,0x8c,0x59,0x8f,0xd8,0x81,0x86,0x57,0xc1,0xd1,0xfb,0xe7,0xa6,0x20,0x6e +.byte 0x7c,0xbf,0xce,0xe3,0xce,0x28,0x35,0x7c,0x8e,0x1a,0x66,0xea,0x7d,0x81,0x09,0xdb,0xa8,0x64,0xba,0x3c,0x07,0x3f,0x23,0xd3,0x05,0x97,0x4c,0x92,0xc2,0xa4,0xe8,0x6c,0xfb,0xa0,0x9d,0x8b,0x4d,0xcb,0x3a,0x96,0xe7,0x04,0x0f,0x48,0x87,0x2c,0xdd,0x51,0xf3,0x46,0x7e,0x61,0x89,0xbe,0xb8,0xb0,0x9e,0x9c,0xc4,0x37,0x55,0xe6,0x4f,0x78 +.byte 0x7e,0xb0,0x59,0x42,0xca,0xba,0x4a,0xb2,0x50,0xbd,0x16,0x68,0x99,0x42,0xb4,0x8b,0x60,0x3d,0x54,0x41,0x17,0x11,0x39,0x42,0x5d,0x41,0xec,0xc2,0x53,0x82,0x7c,0x32,0xc9,0xd1,0x34,0x49,0xd8,0x4f,0x29,0x21,0xeb,0x97,0x98,0x4c,0xeb,0x21,0xce,0x50,0xd6,0x53,0xd9,0xf1,0x6e,0x26,0xfa,0xe4,0x71,0x34,0xd8,0x38,0xac,0x39,0x4f,0x02 +.byte 0x36,0x93,0xf2,0x08,0x88,0xdc,0x24,0xdd,0x1f,0xf5,0xe9,0x7f,0x83,0xa0,0xa4,0x6b,0xc5,0xef,0x8e,0x82,0xf9,0x92,0xbc,0x82,0x3f,0xce,0x86,0xa6,0x34,0xf8,0x16,0xa7,0xdb,0x97,0xca,0x54,0x43,0xd8,0xfc,0x31,0xde,0x73,0xd0,0x79,0x1a,0xac,0x61,0x15,0xbd,0x38,0x64,0x3b,0xc6,0xb5,0x95,0xeb,0x2e,0x68,0xe4,0x1d,0x6b,0x18,0xab,0x88 +.byte 0xb0,0x96,0x51,0x8c,0xbe,0x41,0x63,0xd6,0x9a,0x21,0x60,0xe8,0x26,0x37,0xb3,0x10,0x76,0x46,0x31,0x90,0xb0,0x9f,0x17,0xab,0x0f,0x93,0xcc,0x12,0x78,0xee,0x17,0x1c,0xd8,0xc7,0x76,0x0a,0x5a,0xb4,0x8b,0xb1,0x67,0x11,0xde,0x48,0x14,0x8a,0x2a,0xc7,0x71,0x46,0x94,0x15,0x29,0x44,0x9e,0x35,0x03,0x10,0xf7,0x51,0x8a,0xaa,0x9c,0x4a +.byte 0x9a,0x44,0xd5,0xc7,0x37,0x9d,0xb4,0xad,0x41,0xd0,0xda,0xd2,0x1a,0xf9,0x93,0xee,0x28,0x32,0x65,0x0b,0x9c,0x12,0xe3,0xad,0x9f,0x82,0xeb,0x3f,0x03,0xe7,0x6a,0x58,0x83,0x3f,0xbe,0x9f,0x27,0xd3,0xd6,0xe2,0x45,0xbf,0x90,0xe2,0x12,0x61,0x0b,0x57,0xd7,0x06,0x72,0x39,0x2c,0x3e,0x65,0xb2,0xf4,0xf7,0x54,0xef,0x32,0x99,0x44,0x0d +.byte 0xf0,0x5c,0xde,0x4c,0x2e,0x22,0xcd,0x3c,0x25,0x02,0xa5,0x0d,0x79,0x16,0xb0,0x51,0x3f,0x3c,0x84,0x56,0xfa,0x00,0xae,0x7a,0x36,0x45,0x3a,0xcc,0x1d,0x66,0xff,0xf4,0x49,0xce,0xb5,0x5c,0x51,0xf4,0x3e,0x07,0xf2,0x83,0x84,0x4d,0x4e,0xb7,0xce,0x03,0x7b,0x23,0x63,0xdf,0x64,0xa2,0x55,0x92,0xf9,0x2e,0xa5,0x21,0x89,0x29,0x42,0x48 +.byte 0x36,0xc5,0xab,0xd6,0x82,0xe3,0xff,0x45,0xfc,0x61,0xa6,0x4f,0xb9,0x51,0xba,0xd5,0x03,0xa9,0x0b,0xe7,0x73,0x83,0x97,0x1d,0xb2,0xc6,0x75,0xa0,0x52,0x99,0xfc,0x1b,0x27,0x7a,0x10,0xc1,0xed,0x70,0x21,0x4b,0x93,0xa4,0x20,0xed,0x16,0x76,0x97,0x82,0xab,0x21,0xfe,0xa4,0x3f,0xd9,0xbd,0x9c,0x2f,0x19,0x42,0xbc,0xb3,0x4f,0x44,0xf3 +.byte 0x9e,0xd0,0xe7,0xc9,0x7e,0x31,0xaa,0xbc,0x4b,0xba,0x73,0xe1,0xc3,0xbf,0x5d,0xa2,0xd8,0xb7,0xb6,0xfc,0x0a,0x32,0xb9,0xff,0x80,0xb6,0x2a,0x8b,0xea,0x81,0xa0,0xeb,0x1e,0x9e,0x69,0xdd,0xbe,0xc1,0x8a,0x5d,0xfb,0x66,0x21,0x98,0x5c,0x6f,0xd8,0xb4,0xcf,0x8a,0x1a,0x4b,0xde,0xa2,0x20,0xe8,0x5a,0x5a,0xee,0x14,0x09,0xcb,0x63,0x1c +.byte 0x14,0x7d,0x9b,0x47,0xf8,0xfa,0xda,0xb7,0x0e,0xc6,0xbd,0xb2,0x13,0xb8,0x10,0xe2,0x71,0x04,0x36,0x78,0x6d,0x3a,0x8b,0x45,0xd3,0x05,0xec,0x8a,0x2d,0xfa,0x85,0x7c,0xdd,0x75,0xb3,0x2d,0xd1,0xae,0xfc,0xdd,0x02,0x2e,0xcc,0x43,0xc5,0xed,0xe4,0x3f,0xee,0x2c,0xd7,0x37,0x81,0x3a,0x44,0xe6,0xed,0x8c,0x9d,0x9d,0xfa,0xb5,0xdc,0xde +.byte 0xb2,0x7c,0x51,0x58,0xa4,0x21,0xac,0xe2,0x79,0x96,0x90,0xe2,0x0b,0xbf,0x51,0x66,0x77,0x02,0xff,0x67,0x0a,0x70,0x1f,0x04,0x6c,0xb0,0x5b,0x2d,0x26,0x23,0x5a,0x85,0x73,0x66,0x6e,0x7c,0xb3,0xeb,0x36,0x73,0x0f,0xcd,0xb2,0x07,0xee,0x78,0xd1,0xbd,0x5e,0xfa,0x31,0xf6,0x82,0x67,0x94,0xaa,0xff,0xef,0xd2,0x23,0xfc,0x82,0xaa,0xe2 +.byte 0xef,0xc3,0x74,0x79,0x6c,0xe9,0x3f,0x8d,0xe1,0x1b,0xc8,0xb4,0xff,0x15,0xf4,0x60,0xe8,0x84,0x3f,0xaa,0xc6,0x53,0x51,0x1a,0x9b,0x04,0x9b,0xab,0xc5,0xee,0x9a,0x98,0x80,0x89,0x8d,0x5b,0xef,0x0a,0x69,0x71,0xd2,0xf3,0x49,0xc1,0xc1,0x87,0xb3,0x18,0x4b,0x82,0x02,0x87,0xb0,0xf1,0x76,0x4b,0x3e,0xad,0x95,0x51,0xb1,0x64,0xb1,0x03 +.byte 0x5b,0xd2,0x10,0x7b,0x4e,0xd4,0x08,0xf8,0xfd,0xea,0xf0,0xc7,0x16,0x43,0x86,0xa6,0xdb,0xcd,0x75,0xce,0xa9,0xfd,0xa8,0x7c,0x51,0xf7,0xa5,0x29,0x6f,0x0d,0xee,0x66,0x8f,0xc6,0xcd,0x9e,0x3f,0x00,0x24,0x21,0xca,0x69,0x79,0x27,0x03,0x62,0xdf,0xad,0xb9,0x8c,0xd8,0x08,0x88,0x0d,0x0c,0xa1,0x29,0xf9,0xba,0x92,0xb5,0xdd,0xb8,0x1a +.byte 0xbb,0xab,0x44,0xb2,0xda,0x1b,0x8b,0xc1,0x3c,0x61,0x9f,0x7a,0x8b,0x89,0x99,0x09,0xc3,0xb4,0xe4,0x24,0xf5,0x3b,0x36,0xa6,0x61,0x0a,0xec,0x2a,0x1c,0x92,0x7c,0xb1,0x7c,0xd8,0x0b,0x98,0x48,0x8d,0x52,0xa2,0x57,0xc1,0x28,0x89,0xbb,0x60,0x5c,0x58,0x62,0x41,0x1c,0xd6,0xfb,0x69,0x09,0x93,0x90,0x31,0xc4,0x72,0x71,0xf0,0x4f,0xcf +.byte 0x10,0xbb,0xb7,0x6c,0x3b,0x53,0xa3,0x0b,0xff,0x44,0x4c,0x37,0xd5,0x26,0x83,0x7e,0x5c,0xb9,0xa5,0xe8,0x8b,0xc4,0x15,0xf6,0xc7,0xd1,0x39,0x67,0x01,0xb7,0xca,0xa7,0x71,0xa8,0x04,0x95,0x0f,0xfc,0x0a,0x9e,0x52,0xb2,0xfb,0x48,0x47,0xb6,0xa5,0x14,0xc2,0x4f,0xa8,0xd5,0x0f,0x10,0x76,0x39,0x23,0x74,0x2e,0xe5,0x17,0xcb,0xad,0x8a +.byte 0x4a,0x25,0xc8,0x9b,0x25,0x94,0x34,0xbc,0x4b,0x2f,0xdc,0x0a,0xcd,0xc1,0x02,0x72,0x7d,0xa0,0x10,0xa7,0x32,0x68,0xe8,0xd5,0x23,0xe8,0xc9,0xbc,0x05,0x05,0x1e,0xac,0x55,0x45,0xfb,0x42,0x2f,0x0f,0x51,0x8d,0x31,0xb1,0xbc,0x10,0xa1,0x03,0xc3,0x6f,0x35,0x08,0xa5,0x2f,0x91,0x4e,0x43,0x6b,0x62,0x3b,0x00,0x4c,0xd0,0xb8,0x33,0xbc +.byte 0xca,0x57,0xb8,0x1b,0xb4,0x52,0x1a,0xa7,0x03,0x78,0xa0,0x4f,0xda,0x86,0xb9,0xd8,0xc6,0x69,0xe6,0x61,0x2e,0x62,0x96,0x60,0x0d,0x76,0xdc,0x5d,0x0e,0xa8,0xf3,0x86,0xde,0xcf,0x39,0x34,0xc7,0x69,0xed,0xcb,0x9a,0xf5,0xc3,0xce,0x6d,0xa5,0x7f,0xae,0x73,0xb9,0xa6,0xbf,0x88,0x93,0x2b,0x0e,0x8b,0x4b,0xa5,0xeb,0x62,0xc6,0x1a,0xc7 +.byte 0x63,0x63,0x58,0x62,0x37,0xc6,0xbc,0x00,0x72,0xac,0x3d,0x7c,0x22,0xa5,0x59,0xf1,0x6e,0x60,0x45,0x3e,0x99,0x76,0x40,0x82,0xa7,0x52,0xf3,0x48,0x8e,0x4a,0xa3,0xe1,0x3b,0xea,0x77,0xa7,0x7d,0x13,0xe7,0xc4,0xc6,0xa6,0x6e,0xda,0xe8,0x50,0xc8,0x39,0x30,0xab,0x8a,0xe1,0x08,0xa9,0xe3,0xbd,0x8d,0xbd,0x83,0x3c,0xbc,0x6c,0x92,0xed +.byte 0xf1,0xa9,0xd3,0x50,0xf2,0x29,0x8b,0x39,0x46,0xaf,0x08,0x7e,0x00,0x64,0x2f,0xa8,0x18,0xab,0x7e,0x07,0xd3,0x63,0x2a,0xd3,0xd3,0xbb,0xf9,0xdd,0x2b,0xec,0x70,0x35,0x1a,0x94,0x6b,0x87,0xe4,0x1a,0x0a,0x44,0x46,0x08,0xa6,0xce,0x1b,0xf7,0xd7,0x20,0x87,0x1a,0x96,0x6c,0xbe,0xdf,0x73,0x3b,0xc9,0xaf,0x89,0x1c,0x2f,0x47,0xe9,0xd8 +.byte 0x03,0xa6,0x03,0x6c,0x73,0xa9,0x65,0x20,0x36,0xea,0x6f,0xe7,0x96,0x7c,0x01,0x87,0xb0,0x21,0xba,0xb4,0xed,0x1f,0x81,0x65,0x97,0x36,0xda,0x68,0x80,0x64,0x99,0xe6,0xda,0x95,0x04,0xdf,0x5d,0xfd,0x86,0xd1,0xfd,0xfa,0x1c,0xd7,0x89,0xbf,0xe6,0x99,0x6c,0xf5,0x01,0x56,0x20,0x88,0x79,0xa7,0x8d,0x88,0x82,0xe5,0x32,0x38,0xe0,0xf0 +.byte 0x98,0x63,0xa9,0xab,0xeb,0x09,0x8d,0xaf,0x3f,0xa8,0x57,0x98,0xde,0xc8,0x9c,0x8d,0x1d,0x18,0xc5,0xa8,0x82,0x51,0x9b,0x6f,0xc6,0xb8,0x09,0xd3,0xea,0xd4,0xe3,0xac,0xd1,0x0e,0x88,0xda,0xdf,0x38,0x53,0x14,0x87,0x28,0x6f,0x13,0x35,0xdb,0xfe,0xa1,0xe7,0x43,0xb5,0x02,0x46,0x08,0x1a,0x31,0x0d,0x9e,0x3d,0x3b,0xbf,0xbb,0x82,0x9c +.byte 0x09,0xf3,0xd9,0x22,0x0a,0x82,0x07,0xd3,0xe8,0x19,0x6e,0x21,0xd2,0xa2,0xa8,0x14,0xbc,0x42,0xb6,0xeb,0x8c,0x40,0x9b,0xb2,0xa9,0x17,0xad,0x2c,0x19,0xaa,0x4b,0x22,0xf9,0x4e,0xde,0x8f,0xbe,0x78,0x9b,0xab,0xb9,0xfa,0xb1,0x3e,0x68,0x86,0x1a,0x4a,0x61,0xba,0x63,0x51,0x25,0x11,0x59,0xd0,0xb7,0x0c,0xb7,0xcc,0x45,0x05,0x6d,0x5a +.byte 0xe2,0xd7,0x10,0x80,0x19,0xd3,0xa9,0xab,0xb6,0x9f,0x53,0x7a,0xaa,0x19,0x74,0x01,0xc9,0xd6,0x45,0x42,0x2c,0xe5,0xc0,0xcf,0x62,0xe6,0x95,0x6f,0x4c,0x90,0x50,0x97,0x61,0x83,0x73,0xd0,0xc2,0xd5,0xf0,0x05,0xca,0xe9,0x6f,0x67,0xa9,0x51,0xb8,0xb4,0x9d,0x30,0x8e,0xe3,0x29,0xf9,0x3b,0x3d,0x17,0x25,0xad,0xbb,0xb0,0x34,0x68,0x29 +.byte 0x06,0xad,0x0e,0xdf,0x41,0xa6,0xf1,0xa6,0x25,0xc4,0xf0,0x0d,0x57,0x84,0x34,0x2c,0x3b,0xb1,0x41,0xd6,0x83,0x00,0x3a,0x91,0x98,0x8e,0xd0,0x59,0x0b,0x2d,0xc9,0x65,0x03,0x91,0xcb,0x03,0x97,0x57,0xde,0x11,0x8b,0x4b,0x1b,0x85,0x0b,0xb6,0x68,0x25,0x3c,0x1a,0x04,0x7d,0xd5,0x2b,0x16,0x69,0x1f,0x64,0x8b,0x47,0x60,0x17,0xaa,0x68 +.byte 0x45,0xf2,0x0b,0xf8,0xa2,0x27,0xf8,0x47,0x86,0x41,0x94,0x3f,0x92,0xc3,0x02,0xab,0x80,0x2b,0x0e,0x3c,0xd0,0x13,0x59,0x08,0xfc,0x13,0x33,0x52,0xbb,0x2d,0x6b,0x22,0xa2,0x8b,0x9f,0x7c,0x8e,0x40,0x35,0xa4,0xc7,0x45,0xb7,0xf8,0x10,0x22,0x95,0xc5,0x48,0xc1,0x50,0x4d,0x4a,0x36,0xe1,0xec,0x1e,0x07,0xf7,0x68,0x63,0xcb,0x13,0x03 +.byte 0x70,0x63,0xb1,0x9b,0xf3,0x60,0x01,0x6e,0x63,0x5c,0x4d,0x2c,0x5c,0x5c,0x58,0x8b,0xbb,0x6e,0xd1,0x69,0xdd,0x19,0xfe,0xfb,0xd6,0xdc,0x68,0x97,0x9c,0x46,0x0d,0xdd,0x4d,0xbd,0x52,0xe4,0xd9,0xc2,0x03,0x4e,0x4c,0xe2,0x66,0x6b,0x4d,0xbe,0x6b,0xf3,0xd6,0xbe,0x2d,0xba,0xdd,0x1b,0x4f,0x60,0x02,0x74,0xa1,0xf0,0xd0,0xfa,0x23,0x33 +.byte 0x29,0x7e,0x00,0x09,0x47,0x15,0xa8,0xd8,0xdb,0xb8,0xe1,0x20,0xd5,0xe2,0x91,0xd0,0xe8,0xfa,0xa1,0x0d,0x80,0xbd,0x7d,0x62,0x9d,0xf2,0xbc,0x03,0xa1,0x44,0x9f,0x8d,0x3d,0xe3,0xb4,0xec,0x32,0xd9,0x66,0xb0,0xc7,0x75,0x11,0xaa,0xab,0xb7,0x84,0x1d,0x5b,0x4f,0x25,0x5c,0x53,0xed,0xbb,0x6d,0x06,0x1f,0x12,0x5f,0xc0,0xeb,0x55,0x3e +.byte 0xd0,0x5b,0x4d,0x07,0xf7,0x84,0x12,0xbc,0xc8,0xd4,0xf4,0x69,0xdb,0x71,0x8a,0x00,0x58,0xf5,0x84,0xff,0xc3,0xbc,0x13,0x6e,0x5f,0xac,0xd6,0x72,0x1b,0x2d,0xbb,0x27,0xfd,0x8d,0xcc,0x59,0x79,0xb9,0x63,0xe8,0x0a,0xf3,0x7f,0xa4,0x9f,0x4c,0x35,0x9a,0xdc,0xff,0x11,0x42,0xf3,0x1c,0x86,0xd0,0x22,0x7e,0x81,0x79,0x04,0x93,0x5c,0xf2 +.byte 0xab,0xdf,0xb7,0x1d,0x84,0xbd,0xde,0xfb,0xd2,0x75,0x43,0xb8,0x19,0x63,0x97,0xfe,0x0e,0x91,0x9d,0x38,0x50,0xc5,0x7a,0xd6,0x51,0xd4,0xfc,0x8d,0xec,0xd5,0xe2,0x07,0xce,0x21,0x03,0x02,0xa1,0x61,0x8d,0xf1,0xf5,0x1f,0xb3,0xaf,0x9f,0x13,0xd8,0x81,0xd2,0xf7,0xe9,0xe2,0x62,0x49,0xca,0x1c,0x15,0x07,0x39,0xe6,0x01,0xec,0x6c,0x7d +.byte 0x3b,0xf1,0x52,0xda,0xf2,0x97,0x55,0xef,0x6f,0x88,0x82,0x0e,0xe6,0xf4,0x3e,0x33,0xf6,0x61,0x6d,0xef,0xbf,0xa8,0x9a,0x91,0x2f,0xb3,0xd2,0x3d,0xaa,0x7a,0x4e,0x80,0xe1,0x04,0xbe,0xc7,0xf8,0xc3,0xc9,0xd8,0xa2,0x01,0x5d,0x30,0xae,0x6d,0x39,0x52,0x60,0x9d,0x07,0xd5,0xa2,0x86,0xf0,0x88,0x00,0xec,0x18,0x11,0x2d,0x69,0x86,0xa9 +.byte 0x5a,0x73,0xda,0x4e,0x4c,0xdb,0xb8,0x02,0xad,0x53,0xec,0x20,0x0f,0x35,0xe0,0x4f,0x6e,0xd5,0x04,0xcc,0xa0,0xf5,0x8c,0x7d,0x31,0x04,0xa4,0xcf,0xf0,0x27,0xd2,0xb6,0x7d,0x8c,0x26,0x5f,0x19,0xba,0x79,0x80,0xec,0x6d,0xfe,0xaf,0xc1,0x3a,0xc2,0x3d,0x14,0x3c,0xa0,0xc5,0x77,0xf4,0x96,0x56,0x51,0x8b,0x7c,0x7e,0xe5,0x23,0x5d,0x46 +.byte 0x1b,0x2e,0x28,0xc0,0x80,0x6b,0x6a,0x85,0x6c,0xcf,0xaa,0x28,0xf3,0x83,0x2d,0x42,0x6f,0xf3,0x5e,0x5d,0xa2,0x7b,0xba,0x5c,0x12,0xb0,0xda,0xa0,0xeb,0xdf,0xad,0x1d,0x4c,0x54,0xcf,0xad,0x02,0x68,0xcd,0xfe,0x5c,0x5b,0x65,0x6d,0xa5,0xcc,0xd3,0xed,0x32,0x74,0x6c,0x58,0x83,0x3a,0xc1,0x71,0xbf,0xb5,0xa2,0xbd,0x10,0xe5,0x46,0xc5 +.byte 0x00,0x82,0xb1,0xeb,0x6f,0x73,0xf9,0x12,0x23,0xe4,0xda,0xff,0xa3,0xc4,0x9c,0xf1,0xcc,0x0e,0x1a,0x7a,0x10,0x62,0x8f,0xa5,0xb2,0x35,0x51,0x67,0xb5,0x95,0xbe,0x4c,0x81,0x53,0xfc,0xdd,0x27,0x26,0x97,0x42,0x01,0xec,0x08,0x91,0xb8,0xf0,0xaf,0x57,0x54,0x73,0x52,0x8f,0xde,0xca,0xed,0x1b,0xca,0x8d,0x97,0x1e,0xdc,0xe7,0xfa,0x68 +.byte 0xaf,0x37,0xb0,0x62,0xa3,0x9f,0xbc,0xac,0x9f,0x28,0x1e,0xb7,0xaa,0xb0,0x91,0xe4,0x95,0xad,0xf9,0xe5,0xd4,0xcc,0x23,0x0f,0x4a,0x2d,0xdd,0xea,0x64,0xd1,0x04,0x3c,0xd0,0xca,0xfe,0xd3,0x19,0x9d,0x28,0xa5,0x1c,0xff,0x3e,0xae,0xe9,0xfb,0x12,0x03,0x6d,0xcf,0xbc,0x5f,0x27,0xce,0x1a,0xb9,0xc0,0x31,0x88,0x6e,0x2e,0xaf,0x35,0x5f +.byte 0xf0,0xce,0x92,0xf8,0x6f,0xd6,0x67,0x1c,0xc6,0x5c,0xee,0x59,0xaa,0xd6,0x8c,0xa8,0x13,0xe6,0xf7,0xe2,0x82,0x2f,0x82,0x1e,0x4c,0x0d,0xab,0x3e,0xdb,0x4d,0xc5,0x90,0x32,0xe4,0xf0,0x74,0xc1,0x92,0x1b,0xdd,0xf3,0xa7,0xf6,0x6b,0x01,0x9d,0x8d,0x78,0x3d,0x5a,0x46,0x74,0x16,0x93,0x44,0xca,0xbe,0x31,0xea,0xb4,0x65,0xcd,0xe6,0xdd +.byte 0x56,0x9d,0x63,0x48,0xf0,0xf3,0x15,0x91,0x6c,0x27,0xf9,0xf7,0x3b,0x9f,0x04,0x6d,0x4d,0x1d,0xf1,0x7c,0xd1,0x81,0x06,0xef,0x04,0x47,0x98,0x5d,0x21,0xf4,0xe0,0xa0,0x13,0xaf,0x1d,0xb0,0xd5,0x45,0x64,0x92,0x46,0x99,0xff,0xb4,0xbf,0x36,0x01,0x2d,0x23,0x6a,0xc4,0x6b,0x3f,0x91,0x10,0x03,0xaf,0x6e,0x79,0x86,0xdb,0x15,0xde,0xfa +.byte 0x0d,0x71,0x04,0x16,0x12,0x31,0x9b,0x69,0xb9,0xe0,0xe7,0x4e,0xfd,0x0e,0xd5,0x71,0xa0,0xc7,0xd7,0x46,0xdb,0xda,0xbd,0xcd,0xdc,0x77,0xe5,0x71,0x9d,0xa1,0xf4,0x02,0x10,0xc6,0x27,0x76,0x4e,0xa6,0x35,0xe6,0x9e,0xda,0xbe,0xd8,0xc0,0x21,0x15,0xd4,0xcc,0xd5,0x4b,0xdf,0x38,0xc5,0x15,0x4b,0xfa,0x4e,0x83,0xf4,0x27,0xdb,0x8a,0xb1 +.byte 0x0e,0x1f,0xc9,0x3c,0x1c,0x36,0x35,0x54,0x8b,0x54,0xf8,0x31,0x1e,0x0e,0x1c,0x4e,0x44,0x29,0x90,0xad,0x28,0x85,0xb4,0x72,0x2d,0x1b,0x8b,0x26,0x2f,0xb6,0xc2,0x14,0x0e,0x81,0xd0,0x37,0x29,0x5c,0x0f,0xdc,0x21,0x62,0x10,0x7a,0xeb,0xa3,0x6e,0xd4,0x5b,0xb4,0x13,0x2e,0xd6,0x8f,0xd9,0x57,0x0d,0x9b,0xfd,0x1e,0x66,0xb7,0x6e,0xac +.byte 0x88,0xb9,0x75,0x60,0x62,0x83,0x72,0x96,0xc6,0x2e,0xdc,0xfe,0x88,0xee,0x07,0x9a,0x62,0x19,0xde,0xf1,0xa5,0xfb,0xcc,0xdb,0x4a,0xeb,0x16,0x60,0x34,0x46,0xfc,0xf2,0x6d,0xee,0xfc,0xa0,0x3a,0xb1,0x11,0x03,0x8b,0xae,0x26,0xef,0x86,0x91,0x20,0x7a,0x19,0x35,0xd6,0x12,0xfc,0x73,0x5a,0xb3,0x13,0xf8,0x65,0x04,0xec,0x35,0xee,0xf8 +.byte 0x70,0xb2,0x0b,0xe1,0xfc,0x16,0x35,0xec,0x6b,0xdd,0x8b,0xdc,0x0d,0xe8,0x91,0xcf,0x18,0xff,0x44,0x1d,0xd9,0x29,0xae,0x33,0x83,0xfe,0x8d,0xe6,0x70,0xbb,0x77,0x48,0xaa,0xe6,0xbc,0x51,0xa7,0x25,0x01,0xcf,0x88,0xc4,0x8b,0xfc,0xb1,0x71,0x01,0xc7,0xfc,0xd6,0x96,0x63,0xee,0x2d,0x04,0x1d,0x80,0x24,0xd0,0x80,0x03,0xd9,0x18,0x96 +.byte 0xec,0x6a,0x98,0xed,0x6e,0x9a,0xe0,0x42,0x5a,0x9d,0xec,0xed,0x46,0x3c,0xb5,0xf0,0xd6,0x88,0x92,0x89,0x38,0x5f,0xd6,0xba,0xfd,0x32,0x31,0x81,0xe9,0xf1,0x56,0x89,0xa3,0x56,0xa6,0x03,0x00,0x60,0xe1,0xa8,0x59,0xdb,0xbe,0x72,0x39,0x6c,0x08,0x4d,0x26,0x57,0xa6,0xf6,0x13,0x7d,0x4a,0x2f,0x64,0xb8,0xa7,0x23,0x2c,0xa4,0x4a,0xad +.byte 0xcf,0xa1,0xa2,0x32,0xbb,0xd1,0x98,0x02,0xe4,0x1a,0x41,0x26,0x23,0xba,0xa2,0x17,0x62,0xaa,0xa6,0xc7,0x74,0x9d,0xea,0xc7,0xa0,0x08,0x0a,0x1a,0x4e,0x71,0xd9,0x45,0xf7,0xe8,0x57,0x79,0x12,0xd0,0x38,0x2f,0xdb,0xbd,0x5a,0x84,0xe1,0xb2,0x62,0x7e,0x56,0xb3,0x50,0x2a,0xa0,0x32,0x1f,0x86,0x71,0xc4,0xa5,0xba,0x93,0x5b,0x22,0x97 +.byte 0xf4,0xe5,0x44,0x27,0x6b,0x06,0x84,0x55,0x19,0x45,0x12,0x75,0x4b,0xf0,0x76,0x6d,0x3c,0x0a,0x17,0xc2,0x9d,0x96,0x72,0xe7,0x5e,0x79,0x84,0x0a,0x39,0x64,0x09,0x6e,0x7e,0xd7,0x77,0x40,0x75,0x2c,0xbd,0x98,0xae,0x3e,0x34,0x08,0x4d,0xda,0x2c,0xcf,0x0c,0xa2,0x8c,0x40,0xfa,0x34,0x43,0x15,0xed,0x4f,0x69,0xa6,0xef,0x2d,0x3c,0x55 +.byte 0x7a,0xe1,0x67,0xd1,0x0a,0x89,0xe0,0x2d,0x02,0x35,0x57,0xc8,0x9a,0x4b,0xc4,0x46,0xa7,0x57,0x03,0x89,0x7d,0x3f,0x70,0x47,0x03,0x06,0xd9,0x81,0x1f,0x8d,0x7e,0x36,0x9b,0xfd,0xad,0x20,0x9d,0x5a,0x29,0xe9,0x40,0x6a,0xb8,0x07,0x6b,0xc7,0x2b,0x58,0xd2,0x1d,0xef,0x88,0xa5,0xfb,0x3b,0xd6,0x9f,0xfd,0x89,0x0e,0x50,0xd4,0xbc,0x89 +.byte 0x3f,0x3c,0x6c,0x50,0xc6,0xe3,0x8b,0x7e,0x34,0x8b,0x26,0x99,0x2a,0xfa,0xa5,0x19,0x53,0xb5,0x5e,0xfd,0x94,0xe8,0x33,0xb2,0x6d,0x9c,0x3c,0x0c,0x14,0x90,0xc4,0xa2,0x4a,0x3a,0xca,0x07,0x72,0x46,0x37,0xfc,0x02,0x5d,0xf4,0x97,0xca,0x8e,0xc6,0xc4,0x63,0xda,0x5c,0x89,0xc3,0x6c,0xb1,0x1a,0xf5,0x2a,0xbc,0x2e,0xe3,0xcd,0x2f,0xe2 +.byte 0x91,0x16,0xf9,0x94,0x0e,0x1b,0xe6,0x01,0x73,0x61,0x1e,0xcf,0x5e,0x21,0x70,0xcb,0x5b,0x87,0xc1,0x46,0x39,0x59,0xa6,0x74,0x82,0x7f,0xa2,0x6c,0x4a,0x50,0x5f,0xbd,0x1c,0x1a,0x65,0x80,0x01,0x44,0x19,0xcf,0xcd,0xef,0x3d,0x5e,0x1b,0x71,0x82,0x4f,0x8b,0xc1,0xa0,0x9a,0x77,0xee,0xac,0x06,0xdc,0x6a,0xa0,0x34,0x50,0xa4,0xe0,0xda +.byte 0x3d,0xa0,0xf7,0x9a,0xb8,0xd5,0x59,0xe0,0x7f,0x05,0x04,0xd5,0x32,0x8c,0x49,0xf5,0x0a,0x0e,0x99,0x83,0xf5,0x47,0x2b,0x7c,0x7b,0x65,0x25,0x02,0xc4,0x88,0xbb,0x6a,0x4f,0x89,0x31,0x60,0xc2,0x47,0x8b,0x22,0xfc,0x4a,0xde,0xb3,0xb9,0xed,0xb8,0xdf,0xd7,0xd5,0x09,0x98,0xcc,0x5f,0xaf,0xbb,0x02,0xc3,0x62,0x62,0xee,0x99,0x42,0x1b +.byte 0xbe,0x5b,0xa8,0x5c,0x40,0x03,0x86,0x29,0x29,0x06,0x0b,0x53,0x46,0x29,0x03,0x3b,0x11,0x64,0xf1,0x09,0xca,0x69,0x69,0xfa,0xcc,0x85,0x23,0x14,0x1b,0xfd,0x65,0xb9,0xf5,0x6b,0xbb,0x2a,0x9d,0x6e,0x64,0x1a,0xe1,0x37,0x39,0xd4,0x85,0x40,0xa3,0xf9,0x04,0xec,0x9e,0x3b,0x74,0x97,0xa4,0x64,0x8a,0x48,0xb2,0x62,0xc1,0x1c,0xed,0x67 +.byte 0x6f,0x23,0xae,0x0f,0x64,0x2e,0xe5,0x92,0xb6,0xb5,0x71,0x24,0xc0,0x60,0x9a,0x10,0x23,0x6b,0x4a,0x22,0xe9,0x0a,0xaa,0x09,0x62,0x39,0xe0,0x40,0xee,0x13,0x27,0x14,0x73,0xeb,0x75,0x7b,0x4a,0xe1,0x42,0x65,0x37,0xae,0x80,0x08,0x26,0xf9,0x53,0x98,0x58,0xdd,0xf5,0xed,0x26,0x37,0x37,0x85,0xb5,0x88,0x91,0x05,0x2d,0x04,0xa6,0xd5 +.byte 0xa6,0x98,0xb0,0x0e,0x4b,0x4c,0x53,0x76,0x79,0xad,0x82,0xc5,0x16,0xba,0xd8,0x20,0x5f,0x4c,0x1d,0x69,0xa0,0xe0,0xe9,0xbc,0xb8,0x5c,0x10,0x4a,0x0a,0xd3,0x52,0x9c,0x2e,0x1b,0x6c,0xf7,0x43,0x83,0x6f,0xa9,0xcc,0x00,0xed,0x16,0x4c,0xc3,0x24,0x79,0x59,0x68,0xfb,0xf9,0xf6,0xb0,0xb4,0x01,0xc2,0xdd,0xf7,0xe5,0x3b,0x60,0x48,0x49 +.byte 0x32,0x48,0x05,0xa8,0x62,0xa3,0x03,0x9f,0x3d,0x91,0xdb,0x84,0x64,0x6f,0x1e,0x50,0x8e,0xdf,0x1a,0xa0,0xb1,0xf4,0x34,0x7c,0xe6,0xb7,0x7c,0x14,0xa1,0x65,0x1a,0xb4,0xdb,0x67,0x78,0xb1,0x88,0x3c,0xc2,0x5e,0x0e,0xea,0x32,0x15,0xc7,0xda,0xe4,0x9a,0x44,0xde,0x61,0x90,0x3b,0x97,0x11,0x5b,0x6d,0xa5,0x9a,0x2f,0x1b,0x8b,0xd7,0xdd +.byte 0x73,0xe4,0xc3,0x19,0x5d,0x68,0xcf,0x0e,0xe4,0x69,0xa5,0xeb,0x50,0x6f,0x79,0xff,0x91,0xc6,0x95,0x83,0xe8,0x72,0x6a,0x01,0x49,0x2b,0xcf,0x8f,0x93,0x1e,0xef,0x31,0x17,0x8f,0xa8,0x2b,0x5f,0x4b,0x79,0x8b,0xe5,0x6c,0xb7,0x61,0xd5,0x9e,0xe0,0xd4,0x25,0xc3,0x93,0x31,0x8f,0x66,0x6c,0x48,0x30,0x65,0xf4,0xd7,0xde,0x64,0xee,0xbd +.byte 0xbd,0xad,0x32,0xfc,0xf3,0xd8,0x7c,0x85,0x7c,0x24,0x40,0xb6,0xd4,0xe0,0x4b,0xc0,0xab,0xcc,0xeb,0x77,0x7c,0xb7,0x33,0x3c,0x90,0x04,0xaf,0x85,0xaa,0xb4,0xaa,0x90,0x67,0x29,0xd9,0x85,0x6a,0x34,0xf4,0xc4,0x6c,0xbc,0xb4,0x86,0x54,0x83,0xd5,0x5e,0xf3,0xdd,0x1a,0x56,0x5e,0xa5,0xd8,0x06,0xc0,0xa7,0x27,0xd4,0x0d,0x5b,0x08,0xf4 +.byte 0xb4,0x15,0xf9,0xb4,0x56,0x1c,0x80,0x98,0xc9,0xcd,0xf0,0x38,0x18,0xbe,0x99,0xec,0x7e,0x0c,0x3d,0xc1,0x98,0x26,0x9d,0x50,0xe4,0x00,0xcf,0x0f,0x0b,0x77,0x86,0x31,0x55,0x38,0xa4,0x31,0x50,0x51,0x64,0x88,0x81,0x05,0x32,0x99,0x38,0xd1,0x62,0x20,0x8e,0xf0,0x29,0x31,0xf5,0x79,0xbb,0x1e,0x0f,0xba,0x51,0x94,0xa9,0x54,0xcd,0x43 +.byte 0xce,0xe5,0x2c,0x29,0xa5,0x51,0x23,0x97,0x5d,0x36,0xff,0x51,0x5c,0x66,0xb7,0x62,0x1b,0x5f,0xd7,0x2f,0x19,0x07,0xff,0x0a,0xfc,0xf6,0x6e,0xb5,0xfd,0xa9,0x92,0x40,0xd3,0xe6,0x99,0x15,0x6f,0x1e,0x91,0xad,0x1f,0x4d,0x1c,0xe2,0xd9,0xcf,0x01,0x71,0xec,0x1a,0xa3,0xba,0x48,0x40,0xfd,0x18,0xb1,0x24,0x2b,0xd2,0x37,0xb5,0x74,0xdd +.byte 0x7e,0xf6,0x18,0xb4,0x7b,0x0e,0x7d,0x65,0x46,0x7b,0xe3,0x51,0x03,0xae,0xe1,0xd0,0x74,0xc6,0xc9,0xda,0x0e,0x79,0x6f,0xf5,0x62,0xc0,0x7e,0x76,0x3e,0x13,0x8b,0xe0,0x4c,0xfa,0x7e,0xe1,0xa2,0xee,0x9d,0x3f,0x91,0x9d,0x21,0xdd,0xc2,0xd0,0xa5,0x1d,0x17,0xd6,0xdc,0xeb,0xa3,0xc0,0x71,0xa0,0xfe,0xf0,0xaf,0x31,0xdc,0xa3,0xd4,0x21 +.byte 0x4a,0x32,0x1d,0x54,0x25,0x3b,0xc8,0x8f,0x68,0xcd,0x99,0xce,0x76,0x39,0x42,0xd8,0xca,0xf2,0x46,0x72,0xfe,0x52,0xc2,0x90,0x83,0xed,0xa0,0x6d,0x1b,0xf5,0xb1,0x09,0xae,0x2b,0x34,0x4f,0xd3,0x78,0x19,0x7f,0xad,0x8d,0x50,0x26,0x9c,0x36,0xa3,0xb5,0x3d,0x0b,0xa6,0x87,0x65,0xa0,0xdb,0x88,0x20,0xff,0xb6,0xfd,0xc5,0xbd,0x0a,0x28 +.byte 0xc8,0x9c,0x42,0x7f,0x24,0x58,0xe9,0x07,0x53,0x4b,0x9a,0x2a,0x1e,0x7b,0x90,0x97,0x78,0x74,0x80,0x5d,0xe5,0x6e,0xae,0x15,0x68,0xd4,0x2a,0x3a,0xd3,0x00,0x4f,0x4b,0xff,0x8f,0x1e,0x8f,0x9f,0x75,0xe5,0xea,0x9d,0xb9,0xed,0x8f,0xa9,0x2b,0x70,0xa8,0xcb,0x08,0x85,0xd3,0x8f,0x5d,0xc7,0x49,0x66,0xcc,0xa8,0x6d,0xbd,0x01,0x93,0xd5 +.byte 0xe6,0x75,0x2e,0x25,0x07,0x59,0x86,0x3f,0x44,0x8b,0x0b,0xb5,0x38,0xd5,0xbd,0xcf,0x48,0x8a,0xf7,0x71,0xd6,0x6b,0x2e,0x93,0x3d,0x0b,0xc0,0x75,0xee,0xa8,0x5d,0x9c,0x3d,0xa5,0xdb,0xc5,0x8d,0xac,0xda,0xf4,0xcd,0x5f,0x24,0xfe,0x86,0x14,0x44,0x65,0x3f,0x89,0x7f,0xd3,0x61,0x48,0xb0,0x43,0xf0,0x1e,0xde,0xbc,0xb7,0x51,0x0f,0xfc +.byte 0x32,0xf2,0x04,0xe2,0x4b,0xcb,0xbb,0x63,0x7d,0x5b,0x9a,0xb1,0x91,0x57,0x89,0xdc,0xed,0xde,0x91,0x2d,0xdd,0x42,0xc8,0x3c,0xb0,0xd7,0xa5,0xbc,0xa7,0x33,0x14,0x32,0xaf,0xf7,0xe9,0x25,0xd2,0x1a,0x64,0xf7,0x1b,0xab,0x0e,0xbc,0x50,0xbc,0x85,0x44,0xe0,0xa6,0xf1,0x4a,0x32,0x2f,0x30,0x27,0x48,0x4f,0xfc,0x8a,0x5a,0x78,0xe7,0x16 +.byte 0x55,0xcf,0xca,0x15,0xa8,0xa8,0xa2,0xef,0x9a,0x16,0x02,0xf4,0xb0,0x44,0xfd,0xc4,0x51,0x01,0x4f,0x1d,0x9d,0x09,0x62,0x42,0xe9,0x8b,0x18,0xa4,0x65,0xef,0x8b,0xfe,0x71,0x9f,0x4b,0x47,0x48,0x41,0x73,0x5c,0x0c,0x52,0x7d,0x79,0xbc,0x93,0x2a,0xaa,0x81,0x99,0x21,0xa5,0x9e,0xac,0xcd,0x57,0x51,0x50,0xbc,0xc9,0x96,0xaf,0xdf,0x1a +.byte 0x8f,0xee,0x36,0x05,0x20,0x32,0xe8,0x51,0x94,0x72,0x12,0xa3,0x17,0x25,0x7f,0x0a,0x3e,0xcc,0x22,0xcf,0x05,0xb2,0x2b,0xaa,0x36,0x01,0xdf,0xd4,0x4e,0xe1,0x02,0x43,0x4e,0xac,0x50,0x64,0xcd,0x2f,0xc2,0xa9,0xb0,0xf2,0xf2,0x4c,0xdf,0x16,0xa6,0x54,0xf7,0xbf,0x1a,0x69,0xeb,0xa1,0x5a,0xc7,0xcf,0x46,0x2d,0xc2,0x3a,0x7f,0x4a,0x14 +.byte 0x22,0x15,0x46,0x46,0x2d,0xc1,0x98,0xf7,0x0b,0xf3,0x27,0xfc,0x78,0x67,0x05,0xd8,0xe0,0xf6,0xb8,0xb6,0x0b,0xdb,0x4d,0x6b,0x7e,0x9b,0xbf,0x5c,0x15,0x97,0x49,0x9f,0x6f,0x11,0x6c,0x6e,0x1d,0x1e,0x65,0x5b,0xb9,0x60,0x8f,0xa3,0xa9,0x99,0x17,0x92,0xb8,0x65,0x25,0xc4,0xef,0xea,0xa6,0xc0,0x57,0xa9,0x4c,0x78,0xe3,0xd6,0xf2,0x19 +.byte 0x9c,0x86,0x9e,0x45,0x3e,0xfd,0x21,0x4c,0x2a,0x56,0x7c,0x23,0xf2,0x22,0xa1,0x81,0xdb,0xe6,0xfa,0x85,0x19,0x3b,0x1d,0x61,0xb3,0x21,0xb5,0x64,0x1d,0x07,0x66,0xd2,0xe5,0x9c,0xb0,0x76,0x9d,0xc9,0x02,0x6a,0x8d,0xd5,0x84,0xd5,0xa7,0x7c,0x70,0x64,0x46,0xd6,0xff,0xc7,0x9f,0x2f,0xed,0xc1,0x5a,0xcb,0x56,0x12,0x31,0x9d,0xff,0x66 +.byte 0x9a,0xf8,0x50,0xc6,0x54,0xfd,0x8d,0x49,0x32,0x8c,0xdd,0x8c,0xbe,0x30,0x79,0xaf,0x1a,0xd5,0x28,0x1d,0x03,0x87,0x12,0x60,0x7a,0xcc,0xe6,0xe8,0x4e,0x21,0x5d,0xa3,0x06,0xfb,0xdf,0xf6,0x31,0xd6,0x10,0x3e,0xec,0x23,0x69,0xc7,0x7b,0xf6,0x78,0xa6,0xd1,0x8a,0x48,0xd9,0xdc,0x35,0x1f,0xd4,0xd5,0xf2,0xe1,0xa2,0x13,0x8a,0xec,0x12 +.byte 0xa7,0xf1,0x5d,0xb2,0xc3,0x6b,0x72,0xd4,0xea,0x4f,0x21,0xff,0x68,0x51,0x51,0xd9,0xd7,0x2f,0x28,0xd7,0xdf,0xbc,0x35,0x4f,0x49,0x7e,0xe7,0x21,0x82,0xd7,0x0c,0x7c,0xf4,0x86,0x86,0x62,0xcd,0xf5,0x23,0x77,0xc1,0x14,0x8a,0xc4,0x2a,0x82,0x74,0x0e,0x90,0x93,0xd5,0x5a,0xc0,0x57,0x93,0x1a,0xe1,0x1c,0x13,0x17,0x72,0xc3,0xa6,0x54 +.byte 0xc4,0xe2,0xfc,0xd3,0xa0,0xce,0x08,0x87,0x9e,0x2a,0xaf,0xa7,0xbb,0x2d,0xaf,0xc0,0x38,0x97,0xc8,0x6d,0xb8,0x7b,0x75,0xc5,0xf2,0x79,0x62,0xdc,0x7c,0xa9,0xfd,0x19,0xa2,0xb1,0xee,0xdf,0x90,0x18,0x5a,0xdb,0x3c,0xba,0x0d,0x84,0xd6,0xaf,0x15,0xee,0xb6,0xa5,0x78,0x38,0x87,0xdf,0x42,0xd6,0xd1,0xa2,0xe9,0xe0,0xa6,0xf2,0x4e,0xa4 +.byte 0xed,0xa5,0xf6,0x66,0x7f,0x99,0xbc,0xfb,0x4b,0x37,0xca,0x5a,0xb3,0x29,0x8e,0x80,0x30,0x8b,0x74,0x7b,0xac,0x61,0xfb,0xca,0x62,0xfe,0x24,0xc4,0x6e,0xac,0x66,0x97,0xaa,0x9a,0x99,0xe6,0xa8,0xa4,0xd8,0x62,0x58,0x7c,0xd1,0xeb,0xee,0xc8,0x08,0xa0,0x54,0xde,0xb1,0xef,0x57,0x2c,0xb6,0x2c,0x78,0x22,0x10,0xbb,0xfe,0x4b,0x77,0xa5 +.byte 0x5a,0xed,0xbb,0xf8,0x97,0x96,0x20,0xa9,0x8c,0x78,0xb5,0xb9,0x55,0xc9,0xaf,0xb9,0xa1,0x1f,0x13,0x52,0xf9,0xbb,0xaa,0x98,0x01,0x57,0xa6,0x88,0xaa,0x5c,0xf0,0x62,0x5b,0x3e,0xe1,0x5f,0xf4,0x98,0x95,0x8b,0x8f,0x48,0xd6,0xd5,0x8b,0xc2,0x1d,0x45,0x7d,0xe2,0x03,0x66,0x84,0xfc,0xbd,0x8e,0x95,0x9f,0x58,0x99,0x7b,0x4c,0xb6,0xe5 +.byte 0xe2,0xf9,0x2e,0x92,0x58,0xca,0xa9,0x24,0x9c,0x7c,0x46,0xdf,0xea,0xb4,0x6e,0x0e,0xa5,0x9c,0x14,0xbf,0x25,0x5b,0x39,0x4a,0xaf,0x31,0xaa,0xd1,0x2c,0xe6,0x06,0x3d,0xc4,0x60,0xc7,0xcd,0x49,0x8d,0xe1,0x50,0x55,0xe4,0x72,0x68,0xed,0x43,0xb8,0x85,0xa3,0xc3,0xf1,0xf5,0xd1,0xcf,0xcb,0x57,0xac,0x04,0x16,0x22,0xe4,0xfc,0x4a,0x13 +.byte 0x60,0x3f,0x09,0xa4,0xf2,0x9b,0x34,0xeb,0x0c,0x10,0x57,0xc3,0x3f,0x15,0xb5,0x1b,0x6a,0xb3,0x7d,0x37,0x02,0x4c,0x0f,0x6f,0x8b,0x4d,0x5d,0x57,0x7d,0xbf,0x00,0x8a,0x74,0xb4,0x4c,0x5f,0x90,0x27,0x76,0x09,0x8c,0x18,0x3f,0x26,0x3a,0x09,0x06,0xdd,0x8b,0xff,0x0e,0xa4,0xae,0xef,0x0c,0x81,0xf2,0xf3,0x1f,0xe0,0x33,0x33,0x37,0xc6 +.byte 0xc3,0xfb,0x14,0xdd,0xa1,0x16,0x84,0x80,0xcb,0x37,0xe7,0x97,0x6d,0x21,0xa7,0x71,0x19,0x2b,0x2d,0x30,0xf5,0x89,0x2d,0x23,0x98,0xfc,0x60,0x64,0x4a,0x26,0x65,0x4a,0xef,0x12,0x59,0xa3,0x8c,0xd9,0xbd,0xdc,0xb7,0x67,0xc9,0x8d,0x51,0x72,0x56,0x6a,0xe5,0x59,0xa2,0x53,0x4f,0xb6,0x53,0xff,0xb0,0xd4,0x06,0x7f,0x79,0x23,0xf9,0xcb +.byte 0xbf,0x9a,0x93,0xde,0x88,0x33,0x58,0x70,0xa7,0xcc,0x07,0xb1,0x44,0xb9,0x99,0x1f,0x0d,0xb9,0xc9,0x18,0xdc,0x3e,0x50,0x22,0xfb,0x4e,0x86,0x0d,0xc0,0xe7,0x7f,0xc6,0xa1,0x52,0x0d,0x8d,0x37,0xe6,0xaf,0xe3,0x13,0xbe,0xa6,0xf9,0x59,0x39,0x0f,0x17,0x66,0xce,0xb1,0x7d,0x7f,0x19,0x1a,0xf8,0x30,0x3a,0xa5,0x72,0x33,0xa4,0x03,0xb6 +.byte 0xb6,0x9b,0xde,0x7a,0x7a,0x62,0x3d,0x85,0x98,0x8e,0x5d,0x8a,0xca,0x03,0xc8,0x2c,0xae,0xf0,0xf7,0x43,0x3f,0x53,0xb2,0xbb,0x1d,0xd0,0xd4,0xa7,0xa9,0x48,0xfa,0x46,0x5e,0x44,0x35,0x50,0x55,0xdc,0xd5,0x30,0xf9,0x94,0xe6,0x5f,0x4a,0x72,0xc2,0x77,0x59,0x68,0x93,0x49,0xb8,0xba,0xb4,0x67,0xd8,0x27,0xda,0x6a,0x97,0x8b,0x37,0x7e +.byte 0xe9,0x59,0x89,0xc7,0x5e,0xd9,0x32,0xe2,0xaa,0xd1,0xe9,0x2b,0x23,0xca,0x9d,0x89,0x7a,0xf5,0xe4,0xfb,0x29,0xcc,0x88,0xfb,0x82,0x0f,0xbf,0x47,0x54,0xca,0x2b,0x4b,0xd8,0x47,0x7f,0x65,0x38,0x5a,0xb3,0xe8,0x0b,0xd7,0xe1,0x8b,0x89,0x57,0x32,0xdb,0xa3,0x85,0xba,0xf9,0xbc,0x52,0x92,0x20,0x10,0x66,0x54,0x81,0xe1,0x49,0x3f,0xe1 +.byte 0x8c,0x2e,0x0b,0x3b,0xe7,0x49,0xb4,0x60,0x5a,0x20,0x33,0xc4,0x4e,0x81,0xef,0x96,0xda,0x73,0x90,0x2b,0xb4,0x86,0xa1,0x5c,0xcd,0xa0,0xc7,0xf3,0x06,0x0d,0x2a,0x5a,0x41,0x96,0xf5,0x40,0x1b,0x0a,0x3a,0xb7,0x38,0xe1,0xbb,0xe3,0x42,0xf9,0x52,0xe5,0x98,0xe2,0x17,0xd4,0xb0,0x09,0x73,0x75,0xc1,0x00,0x18,0x0f,0xa7,0x0b,0x58,0xc1 +.byte 0x78,0x5c,0x0c,0x05,0xd8,0xfb,0xc5,0xfd,0x5c,0x66,0xbe,0x54,0x68,0xd1,0x16,0x54,0xfb,0xc5,0x97,0xd7,0x03,0x82,0x47,0xbb,0x47,0xea,0x9e,0x8b,0x90,0x07,0xb2,0xd2,0x06,0x14,0x79,0xeb,0xb6,0xe1,0x10,0x55,0xa9,0x13,0xea,0x65,0x7a,0xd0,0xe5,0x66,0x5d,0xe7,0x7b,0x10,0x5f,0x7c,0x25,0x7d,0x4e,0x77,0xb3,0x19,0x02,0xb1,0x45,0x1c +.byte 0x1a,0x51,0x24,0x72,0xd4,0xaa,0x03,0x0c,0x37,0x2a,0x78,0x81,0x05,0xca,0x73,0xb9,0xb5,0xd8,0xf5,0x25,0x2b,0x30,0x59,0x00,0x66,0xbd,0x6c,0x38,0xa2,0xc3,0xfb,0x43,0x85,0x6d,0xab,0xca,0xd8,0x73,0xa8,0x76,0xda,0x6e,0x00,0x19,0xd0,0xb9,0x1e,0x9b,0x33,0xe4,0x57,0x68,0xf4,0xb8,0x35,0x44,0xe6,0x74,0xd2,0x33,0x64,0xa1,0x41,0xa6 +.byte 0x5a,0xf6,0x8e,0x29,0xb5,0xa6,0x21,0x8e,0xc4,0x0c,0x0c,0x16,0x81,0x08,0xef,0x0a,0x41,0x08,0x34,0xc7,0xe1,0xd8,0xa8,0x68,0xb1,0xf3,0x9a,0x7a,0xaa,0x90,0xc0,0x77,0x32,0x70,0x50,0x5c,0x92,0xfc,0x38,0x31,0xaf,0x3e,0xd8,0xd8,0x4b,0x90,0x99,0xc4,0x17,0xde,0xa6,0xb5,0x29,0xc0,0x82,0x45,0x20,0x08,0x0c,0x4f,0x76,0x36,0x56,0x7e +.byte 0x07,0x17,0x42,0x78,0xa1,0x2d,0x62,0x48,0x81,0x57,0xc4,0xcf,0xf4,0x89,0x34,0x78,0x10,0xe6,0x98,0x78,0xb0,0x69,0x15,0x06,0xdb,0x2b,0xbb,0x8b,0xa5,0x72,0x50,0x24,0xae,0x6b,0x33,0x49,0x7b,0x9d,0x69,0x74,0xc8,0x7c,0xca,0x7a,0x31,0x39,0x0d,0x72,0x78,0xc1,0x6b,0x97,0x50,0x97,0xea,0x90,0xab,0xe7,0xdf,0x29,0x2e,0xf7,0x6e,0x49 +.byte 0x95,0xab,0xbd,0xea,0x1f,0xd4,0x93,0x4d,0x30,0x6b,0x6d,0xb0,0x86,0x38,0x2c,0xc8,0x77,0x2c,0xb5,0xb5,0x5c,0xd9,0xbb,0xe9,0x7d,0xb2,0xb7,0x6b,0xd1,0x1c,0xd3,0xd0,0x66,0x51,0x63,0x8c,0xf3,0x13,0xad,0xcf,0xeb,0x82,0x12,0x1a,0x6d,0xf5,0x75,0x66,0xa2,0x55,0x30,0x64,0x1d,0x68,0x46,0x50,0x5a,0x93,0xf1,0xc2,0x13,0x68,0x95,0x55 +.byte 0x51,0xe0,0x56,0x3a,0x96,0x86,0x8e,0xfb,0x5f,0x3b,0x1f,0x49,0x9c,0x3d,0xe5,0xf2,0x8c,0x3f,0xd6,0x6d,0x17,0xc7,0x18,0x59,0x1a,0x8a,0x72,0xa8,0xb3,0x39,0xda,0xc4,0xfa,0xc5,0xca,0xdf,0x48,0x48,0xd1,0xd2,0xba,0x14,0x5d,0x28,0x3b,0x4c,0xb3,0xcb,0x8d,0x1b,0x91,0x46,0x6b,0x2d,0x21,0x21,0x99,0x98,0x6d,0xcc,0x6b,0x8e,0x91,0x1d +.byte 0x42,0xc2,0x72,0x1a,0xc6,0xd2,0xaf,0xed,0x10,0xff,0x1e,0xa5,0xae,0x16,0xc0,0x05,0xdf,0x37,0xe2,0x1e,0x2e,0x15,0x21,0x0c,0x33,0x6f,0xfd,0xed,0x3f,0x7e,0xd7,0x69,0xfb,0x76,0x79,0x65,0xe9,0xd9,0x8d,0xf6,0xc0,0x6c,0xf7,0x15,0x7f,0x04,0xd7,0x71,0xcc,0xaa,0x85,0x73,0x23,0xf1,0xc8,0x62,0xd0,0x8e,0x01,0x35,0xff,0x4f,0x4f,0x13 +.byte 0xe6,0x28,0xf1,0xc1,0x7a,0x04,0xc0,0x7b,0x75,0xac,0x1c,0x55,0xb4,0x7c,0x00,0xb9,0xe0,0x14,0x67,0xb6,0xc5,0x69,0x62,0x0b,0xe6,0xb5,0x46,0x86,0x6f,0x09,0xdf,0x84,0x2c,0xa8,0x30,0x89,0x5b,0x24,0x47,0xfa,0x43,0x24,0xd5,0x07,0xf7,0xba,0xab,0x1b,0xfd,0x60,0xad,0x89,0x5f,0x60,0x87,0x78,0x48,0xbb,0xc0,0x63,0xf4,0x27,0x86,0x33 +.byte 0xf4,0x49,0x64,0x4c,0x5c,0x94,0x9a,0xb8,0x0f,0x45,0xe2,0x92,0x7d,0x9a,0x86,0xdb,0xb7,0x05,0xe8,0xd7,0x64,0x44,0xfa,0x74,0x60,0x72,0x89,0x13,0x8f,0x2e,0x96,0x33,0xa9,0x12,0x4a,0x62,0x6b,0xc3,0xcb,0x55,0xd3,0xef,0x17,0x11,0x82,0x4a,0x51,0x77,0xbf,0x63,0xa0,0x21,0xfc,0xbc,0x0c,0x6f,0x9a,0xfd,0xde,0xbe,0x9f,0x2e,0x50,0xd5 +.byte 0x32,0xa4,0xf0,0x1b,0xed,0xfa,0xbf,0xcd,0xc9,0xd8,0xf8,0x06,0xf2,0x17,0x8a,0x92,0x18,0xb8,0xc3,0xe5,0xbf,0xc2,0xf4,0x77,0xb9,0x71,0xfb,0x60,0x6e,0xe7,0xad,0xe4,0x7d,0xd4,0x59,0xa9,0xbd,0x21,0xd5,0x03,0x69,0xb5,0xf1,0xce,0xb5,0x88,0xd9,0x1d,0xc7,0xb3,0x14,0xa6,0xb1,0x30,0x8d,0xaa,0xcd,0xe5,0x50,0xc5,0x0d,0x4b,0x6d,0xde +.byte 0x17,0x4d,0xd2,0x93,0xf3,0xc2,0x8d,0x59,0xf1,0xd0,0x2f,0xb5,0x62,0x18,0x81,0x07,0xb3,0xfb,0x08,0xb3,0xa8,0x15,0xe0,0x9a,0x4c,0xa5,0x24,0xcd,0x47,0x69,0xf9,0xf7,0xda,0xa9,0xff,0xe1,0xe2,0x43,0xe3,0x69,0xf1,0x26,0xac,0xc6,0x42,0xf2,0x32,0x42,0xfb,0x7c,0xa2,0x94,0xc6,0xaa,0xd9,0x05,0x29,0xc6,0x3d,0x45,0x44,0x1d,0x52,0x7e +.byte 0x48,0x47,0x93,0x34,0x08,0xa0,0x93,0xc2,0x5e,0x9b,0x22,0xc1,0x2a,0xaa,0xfe,0xa2,0x26,0x00,0xa8,0xbb,0xd0,0x58,0xfd,0x5a,0x09,0x4f,0xa1,0x0c,0xff,0x66,0xcc,0x88,0x3a,0x69,0x9a,0x12,0xb6,0x05,0x6e,0xdf,0x54,0x5d,0xe7,0x03,0x8e,0x95,0x86,0x68,0x83,0x83,0x6f,0x04,0x0b,0x9c,0x05,0x05,0x77,0x14,0x83,0x47,0x98,0x5f,0x22,0xaf +.byte 0xa8,0xfd,0xf3,0xe7,0x73,0xec,0xef,0xd7,0x57,0xd9,0xef,0xe7,0x1b,0x18,0x24,0x09,0xd9,0x14,0xf9,0x60,0xba,0x05,0x0f,0x8f,0x33,0x48,0xb1,0x06,0x41,0x2e,0x95,0x3d,0xf5,0xcf,0x14,0x50,0x5d,0xb6,0x93,0xeb,0xd5,0xf8,0x9f,0x7c,0x8f,0x23,0x35,0x39,0x30,0xc8,0xf6,0x74,0x07,0xc4,0x4c,0xcf,0xe1,0xdb,0x3e,0x9f,0x0a,0xfd,0x48,0x9e +.byte 0x56,0xe4,0xa7,0xa3,0x07,0x06,0x18,0xbb,0x50,0x75,0x33,0x48,0xb9,0xa1,0x4e,0x63,0x65,0xd3,0xf4,0x40,0xc3,0x2d,0x52,0x9a,0xad,0x56,0x7f,0xff,0xb0,0x46,0x24,0xa1,0x78,0x5f,0xb6,0xa8,0x72,0x28,0xb3,0x6c,0x61,0x6e,0xa0,0xfc,0xcb,0xe8,0xfe,0x07,0x28,0x97,0x1c,0xda,0x76,0xc7,0x98,0x2f,0x00,0x1d,0xf2,0x17,0xbe,0x48,0x3f,0xd3 +.byte 0xc7,0xbe,0x89,0x89,0xe1,0x96,0x75,0x1e,0xee,0xf9,0x78,0x67,0xbf,0x12,0x1e,0xe2,0x14,0xbf,0xd4,0xfd,0x49,0xaa,0xbf,0xc6,0xb8,0x4f,0x84,0xcd,0x5d,0x3c,0x45,0xb3,0xb0,0x14,0x6f,0x2d,0x6f,0x35,0xfa,0x60,0x7f,0x64,0x40,0xc8,0xde,0xa8,0x2b,0x56,0x75,0x74,0xc9,0xe1,0x2c,0xe2,0x2f,0xc2,0x3e,0xba,0xa3,0x20,0xd8,0xa3,0xbc,0x69 +.byte 0x9d,0x1c,0xcf,0x5e,0xe3,0xc0,0x66,0x72,0xce,0x22,0x96,0xad,0x47,0xc9,0x5b,0xac,0x45,0xdc,0x4f,0x8e,0xf6,0xa6,0x2e,0x4a,0x1e,0x01,0xe4,0xb7,0x83,0x68,0x92,0x2b,0x98,0xdf,0x22,0x0f,0xd9,0x4f,0x6f,0x72,0x37,0x56,0xfa,0x1b,0xbb,0x5a,0x4d,0xd8,0x5b,0xc6,0x65,0xf8,0xd4,0x4e,0xa5,0xc0,0x0f,0x2d,0xc2,0x38,0xa4,0x6c,0x33,0x2f +.byte 0x7a,0x52,0x14,0xbb,0xfb,0xb3,0xf2,0xa9,0xbf,0xa0,0xad,0xcb,0x8c,0x81,0x47,0x26,0xe9,0xfb,0xc1,0x8e,0xc6,0xe5,0x39,0x48,0xa5,0xb3,0xbc,0xb2,0xe4,0xac,0xf9,0x49,0xbb,0x34,0x2b,0xc4,0x4d,0x06,0xe4,0xd6,0x0b,0xdd,0x55,0x36,0xe6,0xaf,0x64,0xea,0x84,0xf2,0xa5,0x68,0xe3,0x4e,0x4c,0x77,0x46,0x6c,0x17,0x6e,0x08,0x99,0x96,0x1b +.byte 0xb5,0x44,0x3b,0x94,0x2d,0x0f,0xcd,0x90,0x17,0x8f,0x80,0xcb,0xc2,0x30,0xbe,0xe1,0x36,0xdc,0x1e,0x48,0xe3,0x2c,0xe5,0xc9,0xbc,0xbd,0xff,0x3f,0x95,0x59,0x35,0x58,0x2f,0x9c,0xa6,0x1c,0x45,0xa7,0x61,0xde,0xf2,0x9c,0xa3,0x04,0x0f,0xa0,0x93,0xaf,0x69,0x2b,0x0d,0x1c,0xfc,0xff,0x97,0x1c,0x69,0x7e,0x30,0x06,0x88,0x01,0xa4,0xf1 +.byte 0x32,0x36,0xed,0x56,0x89,0xff,0xa9,0x63,0x3a,0x17,0x91,0xc5,0xba,0x6e,0x38,0x84,0xb1,0xaf,0x28,0xac,0x8a,0xb2,0x60,0xbe,0x1b,0x0a,0xd8,0x05,0x22,0x25,0x56,0xbe,0x75,0x47,0x59,0xcf,0x8c,0x2e,0xb3,0xc3,0x5f,0x06,0x81,0x65,0x39,0x78,0xed,0xe3,0xc9,0x5a,0x99,0x01,0xae,0xfb,0xf6,0xed,0x55,0xf5,0xbd,0x2f,0x93,0xf1,0x62,0x6a +.byte 0x54,0x4f,0xe1,0x9f,0x0a,0x23,0x83,0xbc,0xc2,0xba,0xb4,0x6f,0xd9,0x88,0xc5,0x06,0x7a,0x83,0xd5,0xdb,0xeb,0x49,0x48,0xd6,0xc9,0x45,0xa2,0xd0,0xc4,0x06,0xd9,0x01,0xec,0x2d,0x6d,0xc1,0x95,0x69,0x22,0xd0,0xae,0x88,0x75,0x8b,0xd2,0x02,0x98,0x83,0xd9,0x10,0x27,0x8d,0x68,0x97,0x5e,0x6b,0xdd,0x51,0xbb,0x92,0x38,0xa8,0x12,0xde +.byte 0x0f,0xa4,0x1e,0x2e,0xec,0xd5,0x73,0x55,0x5f,0x46,0x6a,0x0f,0xc9,0x50,0x0d,0xb3,0x55,0x20,0xe0,0x01,0xef,0x92,0x29,0x04,0x38,0x60,0xbd,0xc7,0x0b,0x1e,0x94,0x10,0x37,0xb7,0x02,0x94,0xbc,0xde,0xdb,0xb3,0xe3,0x1e,0xd5,0xe2,0xa8,0xed,0x46,0xe8,0xd4,0x8a,0x6c,0x93,0x4e,0xb7,0x73,0xa6,0x20,0x86,0xd2,0x82,0x2f,0x78,0x80,0x34 +.byte 0x44,0x79,0x84,0x2e,0x54,0xd0,0x30,0xa8,0x06,0x0c,0xcf,0x78,0xb4,0xd7,0xe2,0xc9,0x6e,0xfb,0x37,0x47,0x8f,0xe5,0x9f,0xf8,0xca,0x58,0x9c,0xb6,0x8b,0xbe,0xf4,0x3a,0xfe,0x75,0xec,0x1b,0x22,0xfd,0x93,0x92,0x07,0x09,0xcd,0xe6,0x2f,0xe6,0x51,0x0f,0x19,0x43,0x9c,0x6a,0x32,0x38,0x7d,0xf0,0x0c,0x78,0x81,0xb7,0x5c,0xbe,0x3c,0xf4 +.byte 0xc0,0x12,0x57,0x51,0x8a,0x69,0x84,0x0d,0x1e,0x0a,0xed,0x75,0xde,0x9e,0x31,0x8a,0x9b,0x18,0x82,0x01,0x5a,0xee,0x0e,0x33,0x3c,0x8c,0x95,0xb1,0x0b,0x05,0x3b,0xb2,0x85,0xab,0xaf,0x47,0xa2,0x03,0xb6,0xbb,0xda,0xf5,0xc8,0xbe,0x0e,0x4d,0xf8,0x84,0xe4,0xfb,0xd4,0x54,0x44,0x72,0xe5,0x30,0x57,0xa3,0xb6,0x47,0x8f,0xd3,0x32,0xc2 +.byte 0x83,0x07,0x4f,0x17,0x20,0x88,0xa1,0x0b,0xb3,0xef,0x4b,0x27,0x60,0xe0,0x9d,0xec,0xc2,0xdf,0xaf,0x2e,0x74,0xae,0xa4,0x2b,0x59,0x94,0x75,0xbe,0x54,0xf5,0x18,0x62,0xd9,0xe2,0x35,0xee,0x37,0x2e,0xdf,0x48,0xf8,0x80,0x32,0xcb,0xf1,0x83,0x78,0x03,0x68,0x06,0xd7,0x82,0xc6,0x76,0x2a,0x10,0x2a,0xdb,0x73,0xe6,0x65,0x24,0x9f,0x73 +.byte 0x1f,0x55,0x55,0xb6,0x10,0x65,0x80,0x70,0x5a,0x8e,0x8a,0xc8,0x4c,0xca,0x74,0x47,0x63,0x3f,0xee,0x49,0xc3,0x86,0x0f,0x66,0x56,0x08,0xee,0x9f,0xf5,0x5a,0x89,0x4c,0xb4,0x97,0x6e,0x75,0x61,0xc0,0xa7,0x92,0xa8,0x38,0x99,0x08,0x01,0x12,0x82,0x77,0x80,0x20,0x9d,0x62,0x46,0x92,0xdd,0x39,0x4d,0xcf,0xc0,0x8a,0x3e,0x30,0x9a,0xfa +.byte 0x28,0xe8,0xd8,0xf8,0x07,0x0d,0xab,0x4c,0xd4,0x02,0x4c,0xd7,0xc3,0x16,0x89,0x24,0x84,0x52,0x7c,0xa4,0x1b,0x54,0x7f,0xc4,0x74,0x4f,0x88,0x0a,0x14,0x03,0xd9,0x1a,0x48,0xff,0x2c,0xfb,0xbf,0x33,0xf1,0xf8,0x0e,0xdd,0xc4,0x98,0xf2,0xbd,0x32,0x99,0x03,0x8e,0x56,0xc1,0x84,0x5d,0xa6,0xd7,0x21,0xf2,0x43,0xfb,0x3b,0xf5,0x6a,0x75 +.byte 0x20,0xfb,0x08,0x7b,0x66,0x15,0x47,0x31,0xb6,0xb6,0x7a,0xc9,0xe6,0xf5,0xd6,0x0a,0x14,0xb3,0x68,0x0a,0x32,0x13,0xb5,0xe6,0x56,0xbd,0xa5,0x24,0xe2,0xa3,0x7b,0x3d,0x01,0x23,0xed,0x08,0x09,0xb5,0xdb,0x7c,0xa9,0x4b,0x23,0xdb,0xa2,0x25,0x0c,0xc6,0xa4,0x0d,0xbb,0x1a,0x5d,0x1b,0x42,0x0b,0x86,0x72,0xc3,0xca,0x5b,0x14,0x04,0xa3 +.byte 0xd7,0x01,0xe7,0x17,0x78,0xd0,0x54,0xde,0xd4,0x76,0x3d,0xe1,0x7d,0x26,0x3e,0xb4,0x71,0x42,0x84,0x36,0x58,0x78,0x22,0x32,0x26,0x0e,0xc8,0x99,0x05,0xe3,0x4a,0xa6,0x5a,0x1a,0x06,0x0a,0x88,0x47,0x51,0x5c,0xa8,0x72,0x70,0x0c,0x62,0x5f,0xf3,0x1e,0x02,0x50,0x20,0xc6,0x5c,0x50,0x30,0x1f,0x4e,0x5a,0x3a,0x02,0xc9,0xca,0x3f,0xa4 +.byte 0xf1,0x66,0x05,0xf3,0x19,0xe5,0xaa,0xdb,0x75,0x51,0xc1,0xb8,0x94,0xfa,0x2d,0xb6,0x8b,0x42,0xdc,0x9a,0xa3,0x13,0xeb,0x95,0x8d,0xf0,0x65,0x87,0xc9,0xa1,0x43,0xb4,0xfe,0x76,0xf4,0xc8,0xbb,0x19,0x96,0x84,0x9d,0x2f,0x92,0xe8,0x22,0x9a,0xf0,0xd5,0xf4,0xc4,0x8d,0x19,0x59,0x21,0xbf,0x15,0xfd,0xa6,0xc4,0xde,0x77,0x58,0xae,0x93 +.byte 0xb3,0xff,0x44,0x49,0x6e,0x37,0x94,0x04,0xd2,0x96,0xe9,0x80,0xd8,0xe3,0x93,0xd8,0xb4,0x7f,0x5f,0xcf,0xe5,0x9d,0x51,0x92,0xac,0x5d,0x9f,0x23,0x3a,0x3e,0xdf,0x96,0x68,0x9a,0x46,0x9b,0x1a,0x06,0x44,0x54,0xc4,0x2e,0x19,0x0f,0x50,0xee,0x73,0xda,0x39,0x7e,0xec,0xcb,0x1d,0x39,0xf7,0x9f,0xbc,0xe0,0x6d,0x49,0x56,0xf8,0xa7,0x24 +.byte 0x70,0xab,0xe1,0xc3,0x82,0x99,0x0a,0x4d,0x64,0x41,0x37,0xab,0x92,0x76,0xeb,0x6a,0x2a,0xa5,0xab,0x75,0xd7,0xe3,0x6a,0x72,0x4a,0x2b,0x57,0x02,0xc7,0xbe,0xd5,0x35,0xce,0xdf,0xee,0xf1,0xc6,0xe6,0x69,0xb7,0x76,0x99,0x22,0xb0,0xb9,0xe1,0x18,0x91,0x9a,0x35,0xd9,0x3a,0x19,0xc7,0x77,0xf2,0x2d,0xae,0x04,0x2e,0xb7,0x35,0x97,0xa5 +.byte 0xc6,0x97,0x4e,0x5d,0xbe,0xa9,0x35,0x2b,0x53,0x1a,0x6b,0x4e,0xa8,0xa6,0x22,0x48,0x2c,0x81,0x25,0xac,0x30,0x89,0x7b,0xb3,0x38,0x34,0x42,0x0b,0xa5,0x5f,0x02,0xe8,0xee,0x12,0x9b,0xce,0xe7,0x10,0xf9,0x65,0xb6,0xc5,0x74,0x06,0xef,0xc8,0x95,0xb3,0x40,0x30,0xec,0x1f,0x8e,0xeb,0x93,0x31,0x91,0x5a,0x2f,0xc2,0x90,0x85,0xaa,0x4c +.byte 0x51,0xc4,0xd0,0x3e,0xc8,0xc9,0x61,0x46,0x96,0xd4,0x60,0x56,0x7d,0x91,0xc4,0x24,0x76,0xfb,0x09,0x08,0x48,0x2f,0x4a,0x73,0x90,0x8e,0x9d,0xb2,0x38,0xa8,0x95,0x3e,0x6d,0x10,0x57,0x91,0x8d,0x55,0x62,0x1f,0x21,0xc7,0x01,0x15,0xb0,0x71,0x0b,0x26,0xbc,0x10,0x33,0x3e,0x79,0x37,0x64,0x85,0x98,0x42,0x21,0xcc,0xff,0x51,0x9a,0xc2 +.byte 0xe0,0x51,0xc3,0xff,0xf2,0x14,0x3d,0xe8,0x89,0x12,0xe7,0xcd,0x58,0x2f,0x87,0xfb,0x4a,0x50,0x6c,0x4d,0xdf,0x6f,0x64,0x9c,0x64,0x93,0x49,0x89,0xb6,0x0d,0x10,0x3f,0x13,0x9d,0x9a,0x35,0xf1,0xc0,0xe7,0xf0,0x9b,0xe8,0x39,0xd3,0x32,0xb2,0x23,0x67,0x77,0xdb,0xbc,0x0d,0x19,0x77,0x7a,0xbe,0x54,0x56,0x64,0xec,0xb6,0x2e,0x03,0xc5 +.byte 0x35,0xda,0xf1,0xc7,0x7d,0x0c,0x5a,0x32,0xec,0x86,0xdf,0xdb,0x94,0x73,0x4e,0xe3,0x45,0xf6,0xb2,0x63,0xc4,0xb7,0x80,0x59,0x4b,0x82,0x0b,0x61,0xa0,0xd5,0x43,0x18,0x78,0x35,0x93,0xde,0x46,0xa3,0xa2,0xd5,0xa2,0x71,0xec,0x3e,0xee,0x7a,0x89,0x7f,0xe9,0x70,0xff,0xad,0xae,0xa3,0x64,0xde,0x61,0xea,0x71,0xc2,0x37,0x98,0x8a,0x33 +.byte 0xd1,0x5f,0x03,0x08,0x23,0x24,0xc7,0x6c,0x62,0x24,0x6d,0x3f,0x44,0x8e,0x7c,0x9f,0x64,0x87,0xa5,0x79,0x0b,0x16,0x7e,0x4e,0xc0,0x0e,0xb8,0x77,0x56,0x9c,0xa5,0x7d,0x2d,0x5d,0x7d,0x81,0x13,0x2c,0x08,0xd5,0x83,0x84,0x38,0xfe,0x50,0x6f,0xa7,0x30,0x1f,0x06,0xee,0xab,0x13,0xc2,0x19,0xe6,0xcf,0x7b,0x85,0xfc,0x31,0x5b,0xdf,0xb8 +.byte 0x0e,0xe8,0x72,0xba,0x97,0x03,0x25,0xbc,0xad,0x74,0x7c,0xe1,0x59,0xf7,0x08,0xc1,0xe3,0x2d,0xb1,0x05,0xe7,0x1f,0xb9,0x0f,0x09,0xcd,0xe6,0x4f,0x5a,0xf6,0xcc,0xea,0xc7,0x92,0x35,0xf5,0xbc,0x3f,0xef,0xc9,0x2b,0xb4,0xd7,0x66,0x50,0xaa,0x80,0xb9,0xaf,0x5d,0x02,0x9c,0x77,0xdf,0xc0,0xc7,0xe2,0xbf,0x7d,0xff,0x69,0x63,0x3e,0x7c +.byte 0x91,0x94,0xae,0xa4,0x0a,0x25,0xa3,0x1f,0xf3,0xc6,0x88,0xda,0x82,0xac,0xbc,0x1f,0x8d,0x53,0xd6,0xfd,0x2b,0x5c,0x33,0x6d,0x03,0x68,0x92,0x38,0x07,0xeb,0x85,0x7f,0x55,0x89,0x17,0x58,0x7f,0xc7,0xb4,0x7a,0xff,0x15,0xe5,0xe0,0xea,0xce,0xac,0x3f,0x0f,0x09,0x25,0xfa,0x80,0xe3,0x07,0x89,0x4e,0xbf,0x7e,0xc2,0x42,0xf1,0x18,0x78 +.byte 0x05,0xe3,0x6a,0x2e,0xf7,0x2e,0xe5,0xbf,0x63,0x9e,0x48,0x69,0xe6,0x3c,0x4b,0x12,0x73,0x58,0xde,0x0c,0x73,0x27,0x9a,0x95,0xfa,0x51,0x8c,0xbb,0x74,0x31,0x53,0x4e,0x9a,0x13,0xda,0x49,0xf0,0x8b,0xb4,0xcd,0xc1,0xe9,0xaf,0xd6,0x59,0x59,0xa8,0x24,0x94,0xd9,0x4b,0xf8,0x20,0x79,0xa0,0x79,0x01,0x08,0x84,0x9b,0x04,0xe7,0xda,0x06 +.byte 0x22,0x3e,0x85,0x23,0x0c,0xa9,0xe5,0xcd,0xd3,0xc4,0x27,0x8c,0x4e,0x75,0xe4,0x60,0xb5,0xe9,0xc5,0xb7,0xb1,0x3a,0x84,0x68,0x40,0x3e,0x36,0x1b,0x9a,0x64,0x50,0x45,0x6f,0xc6,0x58,0x70,0x46,0x1a,0xca,0xf6,0x81,0x02,0xa8,0x17,0x4d,0x92,0x0d,0xae,0x88,0x1a,0xbd,0x52,0xc0,0x32,0xb1,0x2d,0x2d,0x12,0x9c,0x29,0xfa,0xa6,0x70,0x5f +.byte 0xe7,0x0b,0xd5,0x5d,0xa5,0x49,0x9e,0x9e,0x5b,0x55,0xbc,0xce,0x5b,0xb4,0xef,0x3f,0xe4,0x7c,0x50,0xef,0x58,0xf5,0xfe,0xcc,0xf6,0xd0,0xf1,0x3a,0x0b,0xf2,0x3e,0x1c,0xce,0x22,0x7e,0x88,0x1c,0x8f,0x9a,0x69,0x76,0xa9,0xf0,0x18,0xa8,0x76,0x7f,0x0c,0xa6,0xfd,0x67,0x43,0xc7,0x43,0x67,0x98,0x6e,0x37,0xd4,0x82,0x29,0x62,0xa6,0xcf +.byte 0x2b,0x7c,0xee,0x14,0x4d,0x2d,0x1a,0xfc,0xc6,0xaf,0x5b,0xea,0x8a,0xa8,0x9a,0x3b,0xab,0x7d,0x76,0x15,0x50,0xe8,0x95,0x31,0xc8,0x5d,0x5d,0x19,0x68,0x07,0xf5,0xb0,0x29,0x5f,0x79,0x4f,0x0d,0x2b,0xba,0x1d,0xd2,0xf2,0x83,0x50,0x89,0x0b,0x96,0x16,0xde,0x7c,0x04,0xea,0x9c,0x75,0x97,0x7e,0xd7,0x2c,0xee,0x82,0x7c,0xbf,0x0b,0x71 +.byte 0x05,0x59,0xd7,0x11,0x70,0x8e,0x41,0x62,0x91,0x38,0x3a,0x69,0x3f,0x3d,0xde,0x8e,0x03,0x0a,0xea,0xfb,0xea,0x36,0xf0,0x5c,0xb6,0xdf,0x9a,0x66,0x9e,0x64,0x43,0xaf,0xb7,0x83,0xd1,0xef,0x7c,0xb6,0x9b,0x40,0xd8,0x0f,0x0e,0x0b,0xa7,0xd0,0x98,0xca,0x8e,0x3b,0xed,0xb7,0xa5,0x19,0xca,0x67,0x30,0x87,0x17,0x0e,0xc4,0xe1,0xaa,0x6e +.byte 0xdb,0x67,0xbd,0xf5,0xed,0x10,0x68,0xb1,0x43,0x73,0xaa,0x99,0x1a,0x83,0x0d,0x1a,0x5a,0x8b,0xc8,0xff,0xe9,0xe0,0x1c,0x15,0xda,0xb0,0x99,0x90,0xce,0x1f,0xfd,0x17,0xd2,0xfa,0x8f,0x3a,0xe8,0x1b,0xd3,0x96,0x2a,0x0d,0xa9,0x4d,0x6d,0x77,0x53,0xe8,0x8f,0xc7,0x6b,0xb4,0x3b,0x6d,0x0c,0x8e,0x35,0x67,0x09,0x6e,0x43,0x36,0x52,0x3e +.byte 0x0e,0xf6,0x4f,0x16,0x40,0x45,0x7f,0xab,0x39,0xf2,0x23,0xfb,0x4e,0xea,0x6e,0xcf,0xa0,0xb6,0xec,0x6d,0x93,0x1b,0x6f,0x9f,0xd6,0xce,0xcd,0x1e,0x90,0x5c,0x7d,0x61,0xc4,0xae,0x02,0xb2,0x7a,0xb2,0x25,0x59,0xac,0x0a,0xcb,0xc6,0x28,0xa2,0x9c,0x7b,0x4b,0x05,0x5a,0x23,0x55,0xc8,0x9a,0x72,0xe6,0x3b,0x91,0xa2,0x9b,0x12,0x1c,0x1f +.byte 0x4b,0x85,0x42,0x9d,0x73,0xf9,0x50,0x3e,0x12,0xc4,0x51,0xb4,0xe1,0x2a,0x08,0xfc,0xf9,0xc8,0x5a,0x53,0x79,0xcc,0xd1,0x24,0x4c,0xc1,0xf6,0xe7,0x10,0x9d,0xe6,0xce,0xcc,0xc7,0x04,0xf8,0x7a,0xd4,0x2f,0x0a,0x97,0x32,0xaf,0x38,0x77,0x97,0x78,0xc8,0xa9,0x9a,0xca,0x65,0xee,0x2b,0x07,0x0e,0xb1,0xaa,0x3c,0xee,0x03,0x85,0xf7,0x09 +.byte 0xd1,0x03,0xe5,0x4f,0x8a,0x6b,0xba,0x83,0xd2,0x6a,0x05,0xe6,0x4e,0x59,0x21,0x26,0xcc,0x8d,0x4a,0x91,0x21,0x6b,0xe5,0x7a,0x83,0xed,0x4e,0x95,0x4b,0x16,0x98,0x3f,0x2d,0x51,0xc5,0x67,0x56,0x58,0xc9,0xc3,0x32,0xff,0x91,0x9d,0x7f,0x6d,0xc7,0x8a,0x40,0x58,0x56,0x35,0xca,0xc1,0xa9,0x07,0xe2,0xc6,0xe1,0x8f,0x7b,0x7c,0x68,0x4e +.byte 0xde,0x19,0xc8,0x9c,0x41,0x65,0x74,0x33,0xb5,0x5b,0xf7,0x47,0x91,0x51,0x41,0x56,0x54,0xaa,0x8e,0xa5,0x1f,0xdb,0x50,0xa4,0x97,0x7a,0xea,0x86,0x2e,0xfd,0xdd,0x64,0x23,0x6e,0x44,0x28,0xfb,0xae,0xe8,0xc2,0x38,0x96,0x56,0x2e,0xd8,0x7e,0x3a,0xc8,0xc6,0x7f,0x20,0x15,0xad,0x9f,0xfa,0x5c,0x55,0xf5,0xe1,0x9a,0x07,0x84,0x5b,0x81 +.byte 0x39,0x4b,0x70,0xc3,0xfd,0x2b,0xc5,0xb7,0x47,0x36,0x74,0x5a,0x85,0xaa,0x45,0x94,0x8e,0xbe,0x7f,0x6c,0x45,0xf5,0x02,0x4e,0x5f,0x16,0x04,0x7e,0xfa,0xb8,0xa9,0x38,0xc4,0xd9,0xca,0x5f,0x7a,0xe3,0x96,0x78,0x82,0xa0,0xac,0xef,0xc4,0x2a,0xb5,0xf4,0x7d,0x28,0x8c,0x25,0xba,0x4e,0xd5,0xd5,0xd1,0x24,0xc6,0x05,0xb2,0x18,0x2d,0x66 +.byte 0xea,0xe3,0x42,0x79,0x33,0x9e,0x70,0x3a,0x1b,0x5a,0x8e,0xcb,0x03,0xa8,0x43,0xf3,0xd5,0x66,0x41,0x10,0xd7,0x09,0xf0,0x28,0xe5,0x25,0xe6,0xac,0x9a,0xe6,0x34,0x36,0xfb,0xc4,0xa6,0x9a,0xd0,0x24,0x4d,0x18,0xf9,0xd1,0x8e,0xca,0x92,0x83,0x0f,0x55,0x54,0x6d,0x72,0x81,0x81,0xdb,0x72,0x1f,0xd6,0x32,0xb9,0x32,0x45,0x84,0x9c,0x66 +.byte 0x68,0x7e,0xab,0xb3,0xca,0xf5,0x4f,0xdd,0xb4,0xee,0xbb,0x05,0x70,0xbe,0x4f,0xd1,0x27,0x01,0xcc,0x7c,0x4f,0x47,0x55,0xce,0x91,0x73,0x6f,0xff,0x8d,0xfc,0x0c,0x4c,0xaa,0xfc,0xce,0x9f,0xf3,0x4a,0x46,0x92,0x89,0x84,0x8f,0x4d,0x94,0x37,0xda,0xe3,0x11,0x0d,0x63,0x60,0xcb,0x40,0x8f,0xe8,0x0f,0xf9,0xa1,0x89,0x64,0x44,0x45,0x74 +.byte 0xc5,0xa2,0x73,0x33,0x08,0xa2,0x59,0xb0,0xeb,0x7b,0x7b,0xa7,0x28,0x4c,0x13,0x6a,0x04,0x15,0x14,0xd0,0x3e,0x5e,0xec,0xe1,0x3f,0xe5,0x93,0x06,0x6b,0x60,0x50,0x1c,0x90,0xc0,0x5c,0xea,0x7e,0x58,0xf1,0xed,0xba,0x43,0x0b,0x84,0xf7,0xa4,0xbd,0x4c,0xed,0x88,0x5b,0xae,0xa2,0x0a,0xf6,0x06,0xfd,0x43,0x63,0xfe,0x8a,0x03,0x21,0x8b +.byte 0x27,0xc6,0xef,0xa3,0xa9,0x3a,0xc1,0x8b,0x65,0x62,0x25,0x85,0xaa,0x2f,0xff,0x22,0x96,0xb7,0x5c,0x82,0xde,0x21,0x4e,0x0d,0x8d,0xd9,0x7f,0x97,0x79,0x95,0x6c,0xe6,0xfd,0xb1,0x7c,0x84,0xc8,0x73,0xbc,0x50,0x2f,0x87,0x03,0x56,0xcf,0xea,0x7f,0xed,0x17,0x7d,0xf7,0x61,0x6b,0x6f,0x5b,0xd3,0xe4,0x83,0xbd,0x8b,0xd3,0x8e,0x51,0x57 +.byte 0x3d,0xcc,0xe4,0x09,0xb9,0x73,0x1f,0xb4,0x47,0x5e,0xf2,0x10,0x3e,0xf4,0x9c,0x86,0x02,0xdf,0x3e,0x75,0x1c,0x9b,0xb5,0x0f,0x31,0xc6,0xbb,0x00,0xb4,0x8a,0x1a,0xe5,0x0d,0x9c,0x3e,0x93,0x61,0x5a,0x61,0x86,0x12,0x64,0xaa,0xfd,0xa2,0x6e,0x8f,0xcc,0xcd,0x60,0xa1,0xad,0x6d,0xdc,0xa2,0x7b,0x5a,0xe0,0xee,0x27,0x5d,0xc5,0xfe,0x1f +.byte 0x7b,0x9f,0x33,0xf1,0xee,0x2a,0x58,0x39,0x56,0x14,0x4f,0x2f,0x11,0x26,0x6b,0x56,0x7c,0x75,0xb7,0xc3,0xa7,0xf6,0x54,0xd8,0xa7,0xbb,0x73,0xb5,0xa5,0x83,0x1e,0x65,0x7e,0xa7,0x85,0x74,0xa4,0x04,0x0e,0x26,0x01,0x88,0xbc,0x8b,0x98,0x0c,0x9b,0x74,0x22,0x44,0x16,0x16,0xed,0x94,0x81,0x81,0x13,0x26,0xc9,0x27,0xa9,0xa7,0xe0,0x45 +.byte 0x69,0x6e,0x33,0xcc,0xa3,0x15,0x10,0x99,0x84,0x06,0x95,0x00,0xbb,0xc6,0x8e,0x4e,0x37,0x1b,0x23,0xb2,0xf7,0x4d,0xd7,0x24,0x68,0x6b,0xaa,0x2e,0x57,0x8d,0xd6,0x4e,0xa2,0x69,0xd8,0x8d,0x84,0xb2,0x85,0x91,0x30,0xbf,0x41,0xab,0xcf,0x5c,0xa6,0x51,0x1e,0xf5,0x79,0x5a,0x20,0xfa,0x3d,0x0a,0xc5,0xd7,0x3f,0xa6,0xcc,0xf6,0x9b,0x76 +.byte 0xe0,0xec,0x9e,0x0b,0x23,0xe4,0x74,0x36,0x14,0x6f,0x24,0x9d,0xe7,0xb2,0x41,0xd7,0x68,0x37,0x67,0xdc,0x01,0xb1,0x20,0xf9,0x8b,0x0b,0xf5,0xa7,0x95,0x78,0xa0,0x6c,0x4b,0xc0,0x44,0x92,0x4a,0x75,0x0f,0x61,0xde,0xc3,0xc2,0x3d,0x17,0xa0,0x4d,0x57,0x8b,0x11,0x35,0xbd,0x49,0x87,0x05,0xba,0x5d,0x1f,0x76,0xd4,0x0f,0xb0,0x5b,0x5f +.byte 0xb7,0xf8,0xcf,0x12,0x54,0x19,0x9a,0x49,0x6a,0x42,0xad,0x93,0x85,0x0b,0xe7,0x8c,0x30,0x59,0x82,0x82,0x2d,0xd9,0x89,0xf5,0x8c,0x39,0x9c,0xf5,0xcd,0x25,0x22,0x74,0xcf,0x56,0xa2,0x15,0x40,0xa6,0xa8,0xfc,0xdc,0x85,0x9e,0xab,0xd6,0x94,0x5d,0xd6,0x73,0x07,0xed,0x7b,0x76,0x11,0x67,0xf5,0x52,0xac,0x1a,0x69,0x1f,0x4a,0xa2,0xaa +.byte 0x4d,0x11,0xe0,0xc4,0x4c,0x6e,0x9e,0x8e,0x13,0x46,0x0b,0x95,0x40,0x53,0x35,0x53,0x58,0x7f,0x81,0x5f,0x17,0xd7,0x5e,0x53,0x86,0xf3,0x1b,0x70,0xf1,0x95,0x8f,0xf6,0xd4,0x6f,0x55,0x92,0xa2,0x38,0xd3,0x43,0x6c,0x7e,0xa2,0x21,0x5b,0x18,0x11,0xdd,0x03,0x52,0xe6,0xe5,0xc0,0xc5,0x4e,0x8e,0xda,0xdb,0x91,0xcf,0xf7,0x75,0xc2,0x33 +.byte 0x69,0xd1,0xd1,0x29,0x9d,0x51,0x79,0x91,0xe4,0x58,0x05,0xa5,0xf6,0x54,0x16,0x3e,0x42,0xf3,0xc4,0x1f,0x88,0x94,0xfc,0x6b,0x53,0xb1,0xd5,0x17,0xe6,0xab,0x77,0x33,0x8a,0xd0,0x93,0x74,0x02,0xe0,0x81,0x5e,0xbe,0x2f,0x4d,0xcd,0x25,0x0b,0xd0,0x06,0xd8,0xc9,0xf9,0xcf,0x8e,0xf8,0xc3,0xe2,0x33,0x60,0xe5,0xfa,0x89,0x68,0xf8,0xb7 +.byte 0xef,0x9d,0xfc,0x9d,0x76,0x13,0x2d,0x9d,0x18,0x7d,0x05,0xb4,0xa7,0xa3,0x8a,0x91,0xe0,0x73,0x65,0x89,0xb4,0xc1,0x53,0x7c,0xdc,0xf2,0xab,0x39,0x94,0xc7,0x3d,0xf8,0x1c,0x8f,0x49,0x37,0xee,0xc1,0x19,0x84,0x15,0x3b,0x36,0xb2,0xc2,0xe1,0x16,0xe2,0xfb,0xde,0x1f,0x0e,0xa4,0xea,0x59,0x67,0x2d,0xea,0x47,0xe5,0x2c,0xd1,0xb5,0xa9 +.byte 0xbd,0x5c,0x92,0x34,0x8b,0xc5,0xab,0x4f,0x2b,0x6b,0xc4,0x8b,0xdb,0xbb,0xcb,0x86,0x34,0x35,0xa0,0x5c,0x29,0x1a,0x8b,0xce,0xdc,0xd7,0x46,0x2b,0x20,0x9d,0xea,0xa8,0x97,0x68,0x37,0x56,0x03,0x7d,0x4f,0xb6,0xfc,0x30,0x82,0x68,0xb4,0x56,0xf3,0xbe,0x58,0xcc,0x20,0xc1,0x53,0x9f,0xbb,0x0b,0x2b,0x6e,0xa0,0x2d,0xc0,0x61,0x02,0x0b +.byte 0xf9,0x0e,0x55,0xb8,0xb8,0x23,0x6e,0x50,0xc0,0x36,0xb8,0xf6,0x5e,0xb3,0xa7,0x8f,0xf8,0x7f,0xd0,0x5d,0x0a,0xc4,0x2b,0xa9,0xd3,0x76,0xcf,0x4d,0x27,0xda,0xac,0xf3,0xb0,0xca,0x00,0xa0,0x94,0x12,0x20,0x89,0x22,0xa9,0x89,0xe4,0x23,0x71,0xe0,0xdb,0xec,0xb0,0xa9,0x2e,0x45,0xf6,0x8d,0x1e,0x4b,0x0e,0xc7,0xf8,0x40,0xd6,0xf4,0x2f +.byte 0x80,0x3e,0xf8,0xfb,0xcf,0x7b,0x54,0xb5,0xbd,0x55,0xf2,0x37,0x46,0x9f,0x32,0x45,0x87,0xa3,0x6a,0x51,0x25,0x43,0x54,0xa2,0x92,0xc6,0xbe,0xa4,0x33,0x54,0x82,0xc7,0xf1,0xe4,0x52,0xf9,0x09,0xac,0xc3,0xb1,0x25,0x86,0xc7,0x89,0x83,0x2c,0xf6,0x35,0x9e,0xd1,0xd8,0xb1,0x71,0xed,0xfa,0xae,0x09,0x83,0xb3,0xf0,0xde,0x24,0xed,0x3c +.byte 0xc6,0x60,0xe8,0x15,0x49,0x93,0x29,0x82,0xbf,0x1d,0x23,0x17,0x11,0xea,0xa7,0x53,0x83,0xa5,0xc1,0x9e,0x02,0x17,0x08,0x99,0xa6,0x72,0xaf,0x82,0x3f,0x0b,0x69,0xca,0xb8,0x72,0xa9,0x31,0x71,0x20,0x32,0x57,0x89,0x9b,0x16,0x92,0x54,0xc0,0x99,0x6d,0xa4,0xbf,0x5a,0xb5,0x53,0xa7,0x4c,0x69,0xd8,0xf7,0xe7,0x4c,0xc0,0x76,0xb6,0x35 +.byte 0xdd,0xe7,0xb2,0xd9,0x1c,0xd5,0xf7,0x39,0x32,0x44,0x48,0x02,0x85,0x69,0x02,0xad,0xe6,0xfc,0xbb,0x07,0x9e,0x7f,0xee,0x6d,0x07,0x12,0x21,0xeb,0x67,0x4d,0x74,0x90,0x8f,0x79,0x51,0x9d,0x8a,0x63,0x24,0xab,0x6f,0x8f,0x73,0xd3,0x91,0x68,0x15,0xa9,0x6a,0x84,0x92,0xc2,0xd4,0x4d,0xa8,0xe1,0x4f,0xa2,0x1e,0x34,0xa3,0x9a,0x04,0xf2 +.byte 0xfc,0xc4,0xe7,0xd0,0x52,0xc4,0x49,0x51,0x8e,0x7d,0xaa,0x74,0xaa,0x08,0xbe,0x08,0xf6,0xe4,0xc1,0x61,0xff,0x2e,0x9c,0x17,0x61,0xb6,0x01,0x44,0x18,0xe8,0x5e,0xa9,0xfb,0x02,0x21,0xbb,0x08,0x5c,0xe0,0xd3,0x0c,0x98,0xc5,0x93,0x2a,0x1c,0x69,0xf3,0xe8,0x8b,0x36,0xa0,0x9d,0x1e,0xda,0x18,0x14,0x06,0x7f,0x75,0x3d,0x42,0x92,0x5a +.byte 0xb9,0xb7,0xc0,0xc0,0xb0,0xc5,0xa9,0xb2,0x67,0x24,0xc2,0x28,0x29,0xcb,0x78,0x8e,0xf3,0xd1,0x37,0x63,0xca,0xc8,0x9a,0x1b,0x38,0xa5,0x9f,0x0e,0x0d,0x26,0x5b,0xfe,0x2f,0xdf,0x4f,0xb9,0x21,0x8c,0xc8,0xe0,0x9f,0x71,0xb9,0xc3,0x6c,0xd8,0xd3,0x2f,0xe4,0x3c,0x67,0x35,0x45,0x74,0x7f,0xcb,0x13,0xda,0x64,0x47,0xff,0x6f,0x05,0xf0 +.byte 0x87,0x8d,0x0d,0x1f,0x10,0x47,0x0e,0xf6,0x9d,0x89,0x6d,0x79,0x04,0x77,0x8a,0x6c,0xeb,0x7d,0x9b,0xd7,0x65,0x82,0xa8,0x95,0xa2,0x8c,0x02,0x91,0x0d,0xf2,0xe8,0x65,0x60,0x0d,0xb6,0x1d,0xf4,0xf3,0x41,0x75,0x33,0x21,0x13,0x22,0x93,0x01,0x2f,0x11,0xe7,0xed,0x45,0x56,0x90,0xec,0x0b,0x99,0x8e,0x84,0xc8,0x76,0x31,0x1d,0xb9,0xcb +.byte 0x87,0x3f,0x5f,0x39,0xeb,0xe8,0x9e,0x5e,0x96,0x9e,0x42,0x64,0xf3,0xef,0x00,0x1f,0x2a,0x6c,0x18,0x67,0xbd,0xdd,0xf9,0x65,0x11,0x1b,0x9c,0xd7,0xf3,0x3d,0xb2,0x6f,0x88,0xf7,0xd2,0x26,0x06,0xef,0xc8,0x23,0x3f,0x46,0x5d,0xf0,0x96,0x40,0xb1,0xdd,0xad,0xe4,0xee,0xb6,0xc2,0x67,0x18,0x46,0x67,0xc4,0xa5,0x7e,0x3e,0xce,0x72,0x47 +.byte 0xca,0xc3,0xa7,0x94,0x56,0xe2,0x23,0x03,0xcf,0xd0,0x18,0x55,0x30,0xe3,0x14,0x00,0xda,0x0f,0xaa,0x7f,0x20,0xaf,0x3b,0x24,0x43,0x7a,0xaa,0xd4,0x12,0x42,0x10,0xe4,0x44,0x8a,0x7f,0xf1,0x74,0x9d,0xe0,0x28,0x60,0xce,0xdd,0x04,0x96,0x03,0x80,0xcb,0xaa,0xa9,0xb5,0xc7,0xb4,0xbb,0xc7,0x9a,0x93,0xd8,0xff,0x3b,0x8f,0x1f,0xb7,0xce +.byte 0xed,0xbc,0xde,0x9f,0x9e,0x56,0x96,0x65,0xba,0xe7,0x89,0x03,0xb2,0xbd,0xfe,0xa7,0x02,0xeb,0x33,0x9a,0x8b,0x5b,0x36,0x64,0x17,0x9f,0xd2,0xe4,0x75,0xb5,0xfb,0x21,0x03,0xa4,0xe7,0xb4,0x49,0x72,0xfd,0xf3,0x1e,0x5f,0xdb,0xe5,0x6c,0x92,0x51,0xe7,0x91,0x55,0xb7,0x82,0x18,0x05,0xc3,0x2c,0xf1,0x23,0x61,0x36,0xad,0x80,0x1b,0xde +.byte 0xe1,0x51,0x4e,0x51,0xa1,0xf6,0x5a,0xb9,0x03,0x48,0xa7,0x12,0x88,0x63,0x30,0xff,0x48,0xfc,0x92,0x30,0x9a,0xca,0x08,0x1b,0x64,0xa9,0x74,0x2a,0x64,0x42,0x7d,0xa9,0xa4,0x9d,0xcb,0x59,0x71,0x53,0xc1,0xa8,0xa6,0xb5,0x47,0xf9,0x87,0xb5,0x41,0x58,0x92,0x14,0xf7,0xbd,0x10,0x45,0x37,0x20,0x1d,0x5b,0x42,0x04,0xed,0x69,0x4c,0xa5 +.byte 0xdc,0x2a,0x58,0xba,0x00,0x1e,0x05,0x9c,0x3c,0xbf,0x65,0x76,0xd1,0x11,0xe0,0x15,0x22,0xb0,0x2a,0x53,0x32,0x0f,0x6e,0x08,0x4e,0x27,0xc2,0x71,0x14,0x20,0xee,0xb0,0x0b,0x60,0xef,0x54,0xae,0x2c,0xe0,0x1d,0x30,0xac,0x0d,0x3a,0x93,0x15,0x0a,0xe7,0x14,0xf3,0x1a,0x67,0xb1,0x43,0x85,0xbd,0x06,0x53,0xab,0x6d,0x5d,0xe7,0xe3,0x82 +.byte 0xb8,0x39,0x35,0x10,0x87,0xe7,0x90,0x4d,0x9c,0x6f,0x83,0xad,0xa2,0x43,0x7a,0x5d,0xc1,0x8a,0x39,0xa3,0xa6,0xda,0x48,0x5c,0x9b,0xe1,0x0d,0x69,0xfc,0x87,0x18,0xdd,0x34,0x9a,0xb4,0x9c,0x04,0x0d,0x49,0x18,0x3e,0x38,0xd8,0x01,0x67,0xb1,0x7f,0x6b,0xb5,0xfe,0x58,0x1c,0x64,0x11,0x10,0x6b,0xc1,0xca,0x56,0xe3,0x12,0x8c,0xb4,0xac +.byte 0x03,0xbd,0xc1,0x54,0xbe,0x5c,0x70,0x6f,0xdd,0x73,0xa3,0x84,0xcd,0x0b,0x1b,0xbf,0x05,0xac,0x27,0x11,0xe8,0x5f,0xc3,0xb9,0x68,0xc2,0xe9,0x3f,0x5a,0x9b,0x28,0xca,0x65,0x5e,0x66,0x4e,0x50,0xa9,0x81,0xb1,0x10,0xc1,0x2c,0xa5,0x62,0xc8,0x52,0x07,0xa5,0xa1,0x99,0x16,0x7b,0x08,0xa4,0x1e,0xf4,0x50,0x8f,0xb2,0x42,0xa5,0x19,0xa2 +.byte 0x34,0x91,0xcf,0xa7,0x5e,0x73,0x6b,0xc2,0xa3,0x4d,0xdd,0x7c,0x26,0x46,0x34,0xe6,0x5d,0x54,0x52,0xe3,0x1e,0xc1,0x10,0x36,0x7c,0xc9,0xd2,0x1e,0xca,0xeb,0x80,0xc5,0x3c,0x04,0xf6,0xb7,0x09,0xd4,0x3e,0x67,0xc3,0xf6,0x6b,0xd4,0x60,0x00,0xc9,0x68,0x17,0x39,0xbc,0xcd,0x14,0x32,0xfc,0x33,0xa4,0xb0,0x6f,0x12,0x6b,0x5f,0xe2,0x15 +.byte 0x1c,0x9a,0x15,0x4f,0x0b,0x7d,0x4c,0xa0,0x89,0x40,0xb3,0x0e,0x84,0x90,0xb3,0xc6,0x3e,0xa5,0x0b,0x81,0x66,0x14,0x5f,0x8d,0xe0,0xbf,0xf7,0x9d,0xa4,0x4e,0x69,0xd5,0xac,0x0f,0x6c,0x29,0x94,0x8f,0x3b,0x4b,0xed,0x5b,0x6e,0xe1,0x58,0x5d,0x32,0x19,0xe6,0xbd,0xfb,0xd5,0xb7,0x0f,0x72,0x0e,0x5b,0x14,0xd3,0xf3,0x09,0xa8,0xea,0xf7 +.byte 0x98,0x2f,0x42,0x07,0x8e,0x72,0x27,0x53,0x8d,0x0b,0xea,0x74,0x38,0xbc,0xaf,0xb8,0x76,0x65,0x97,0xda,0xa7,0x06,0x37,0x29,0x09,0xbe,0xaa,0xe6,0xf7,0xb6,0xb1,0x5f,0x71,0x1f,0x5d,0x14,0x47,0xdf,0x20,0xa3,0x94,0x93,0x7d,0x21,0xe6,0x22,0x7e,0x38,0x1a,0x26,0x83,0xc7,0x32,0xdf,0x58,0xcd,0xab,0x67,0xae,0x94,0xa5,0x68,0xcb,0xe3 +.byte 0x51,0x70,0xc0,0xc4,0x41,0x9f,0xca,0x05,0xc9,0x51,0x2a,0x8e,0x53,0x89,0x3f,0x52,0x6b,0x29,0x64,0xa8,0xb8,0xdf,0x02,0xb1,0x41,0x4e,0x36,0x42,0x32,0xa8,0xc0,0x91,0xf0,0x69,0x69,0x55,0x99,0xb7,0x78,0x4f,0x79,0x5b,0xc5,0xab,0xc6,0xed,0x15,0x88,0x6b,0x94,0x0a,0xdd,0xea,0x47,0xf9,0x0e,0xb8,0x89,0x15,0x68,0x3e,0xc0,0x50,0xf8 +.byte 0xa1,0x2d,0x2a,0x11,0x8a,0xc5,0xb0,0x09,0x4f,0x7d,0x90,0x5f,0x49,0x35,0xe9,0xdd,0xfc,0xac,0xea,0x1b,0x20,0xad,0xd2,0xe6,0xb6,0xbf,0x3c,0x0e,0x7b,0xdf,0x2f,0x55,0x58,0x0e,0x25,0x53,0x62,0xd3,0x73,0xb8,0x3e,0x12,0x91,0xcb,0x23,0xf2,0xc0,0x5d,0x74,0x2b,0x51,0xcc,0xa2,0xb1,0x5a,0xd2,0xf4,0x9b,0xc9,0xa5,0x83,0x2b,0x5a,0x8a +.byte 0x0b,0xe9,0x09,0x59,0xb5,0x44,0xc9,0x55,0xcc,0xbd,0xb6,0x69,0x66,0x9a,0x0c,0x15,0xae,0x76,0x35,0xbe,0xe9,0x37,0x70,0x9e,0xdc,0x97,0x5a,0x82,0x97,0xf6,0x1a,0x45,0xd7,0x27,0xfe,0x1f,0xc3,0x7c,0x3a,0x52,0x85,0x12,0x73,0x8a,0x8e,0x07,0xec,0x1f,0x59,0x3f,0xb0,0x32,0x07,0x92,0x3e,0x81,0xe0,0x7a,0x9a,0xc9,0x91,0xca,0x84,0xf1 +.byte 0xe1,0x32,0x57,0x0a,0x3c,0x9a,0x20,0xa8,0xbe,0x84,0x91,0x44,0x66,0x81,0xdd,0x12,0xa8,0x46,0x15,0x18,0xfc,0xae,0x5e,0x9a,0xf3,0xd9,0xb9,0x6a,0xbb,0x90,0x1c,0x61,0x7f,0x61,0x2c,0xa7,0x12,0x1e,0x05,0xee,0x0c,0x66,0x9e,0xc2,0xc8,0xb9,0xe0,0xc9,0xc4,0xb9,0xee,0x3a,0x6f,0x97,0x2a,0x5e,0xcb,0xd9,0xff,0xd1,0x37,0x5e,0xa0,0x03 +.byte 0x70,0xc1,0x2f,0x15,0xf9,0xf7,0x90,0xbe,0x23,0xe7,0x7c,0x90,0x4b,0xe4,0x5a,0x01,0x65,0x27,0x2d,0x4b,0xd3,0xa8,0x8c,0x1d,0x2d,0x5d,0x48,0xac,0x6b,0x59,0xc9,0x78,0xb2,0xee,0xda,0x6e,0xa8,0x68,0x08,0x99,0x22,0x25,0xfe,0xc2,0xb8,0x83,0xa8,0x08,0xbb,0x6e,0x64,0xae,0x2e,0xbb,0x93,0xaf,0xdc,0xeb,0xa3,0x11,0xa7,0x5d,0x3f,0x22 +.byte 0xf1,0x95,0x27,0xf6,0xd6,0xa6,0xc3,0x56,0x0a,0xd0,0x17,0x43,0x35,0xd2,0xe7,0xa4,0x8f,0x6c,0x1c,0xc4,0x4d,0xa7,0x3b,0xb8,0x7f,0x0c,0xa0,0xd6,0x56,0x82,0xf4,0x16,0x96,0xcd,0xcf,0x6f,0x78,0xec,0xbb,0xb2,0xdb,0x67,0xcf,0x78,0x0c,0x22,0x1d,0x72,0x21,0x8e,0x40,0x85,0xa5,0x07,0x3b,0x0e,0xfa,0x44,0xb0,0xfe,0xbf,0x54,0x80,0x41 +.byte 0xdc,0xa7,0xc7,0xdb,0xaa,0x04,0x42,0x0d,0x42,0x03,0x17,0xc8,0x57,0xd7,0x08,0x34,0x37,0xf5,0x9a,0x90,0x30,0x43,0x54,0x5b,0x58,0x50,0x4e,0xc4,0x56,0x57,0xff,0xf0,0x05,0x82,0xca,0x2e,0x20,0xb0,0xbd,0xd0,0x00,0x7d,0x60,0x3f,0xdb,0x9c,0x08,0x7e,0x21,0x63,0xbc,0x89,0xbf,0xcb,0xcc,0x36,0xb5,0x36,0x41,0xb4,0x9c,0x5c,0x9d,0xa6 +.byte 0x74,0xa4,0x4f,0x6a,0xcb,0x63,0x51,0xb1,0x92,0xa0,0x03,0x9b,0x88,0x03,0xd5,0x82,0x30,0xfb,0x69,0x49,0x20,0xb0,0x37,0x50,0xe4,0x02,0x9e,0x11,0x09,0x20,0x1a,0x41,0x8d,0xdd,0xa0,0x18,0xb4,0x74,0x04,0x1e,0x3a,0xea,0xb4,0x28,0x01,0x7f,0x0b,0x73,0x27,0x5f,0x76,0x2e,0x71,0xfa,0x50,0x1b,0x43,0x8d,0x0d,0x6c,0x87,0xc3,0x10,0x7b +.byte 0x42,0x7d,0x17,0xa6,0x00,0x5b,0x83,0x6c,0x7b,0x7f,0x72,0xd8,0x90,0x4d,0x7f,0x54,0x72,0x17,0x21,0xe4,0x45,0x74,0x20,0x53,0x30,0x46,0x90,0xbf,0x2f,0xac,0x01,0xbd,0x40,0xa9,0xc5,0xbe,0xbd,0x9b,0x59,0x62,0x03,0x30,0x80,0xe3,0x8e,0x23,0x7b,0x2d,0x63,0x4f,0x30,0xe3,0xb8,0x56,0x87,0x57,0x43,0xdc,0x6a,0x3c,0x13,0xed,0x93,0xc9 +.byte 0x1a,0x1b,0xea,0x38,0x67,0x33,0x7f,0x11,0x5c,0x96,0x20,0x4d,0xf6,0x82,0x51,0x45,0xca,0x20,0xfd,0x59,0xef,0x4c,0xb4,0xb0,0xb2,0x0f,0xdb,0x4c,0x00,0x7a,0x18,0x58,0xb0,0xd3,0x65,0x73,0x42,0xe5,0x05,0x76,0xd7,0xa2,0x1e,0x9f,0x59,0xc0,0xd0,0x76,0x29,0x1b,0x12,0x29,0x9b,0xe4,0x7d,0x45,0x13,0xb4,0x57,0xf2,0x0b,0xd1,0xb5,0x60 +.byte 0x6d,0x15,0x0b,0xca,0x5e,0xe4,0x80,0xda,0x56,0x95,0x41,0x18,0x54,0xa7,0xad,0x40,0xe5,0xd7,0xa7,0x3e,0xf7,0x73,0x40,0x70,0xb3,0x23,0xdb,0x22,0x62,0xc7,0x44,0xfb,0x64,0x18,0x18,0x05,0x84,0x07,0x68,0x06,0x7f,0xb9,0xc3,0xf9,0x55,0xe2,0x0d,0x37,0x51,0x34,0xc3,0x55,0x3c,0x29,0x5d,0x1d,0x27,0x77,0xd3,0xe1,0x6a,0x60,0x9f,0x10 +.byte 0xef,0xb1,0x93,0xbf,0x2a,0xb7,0xe8,0x42,0x4d,0xfd,0xa9,0xa9,0x2f,0xb6,0x07,0x5b,0xe8,0xf7,0xd7,0x10,0x47,0x71,0x56,0xba,0x11,0x11,0x32,0xc4,0x22,0xf4,0x12,0x6f,0xc3,0xef,0x81,0xc5,0x82,0xb4,0x1b,0x99,0xbb,0x1a,0x63,0x6b,0x3a,0x70,0x4f,0xec,0x2c,0xf9,0xde,0x1a,0x2e,0x62,0x27,0x1c,0x81,0x21,0x30,0x08,0x30,0xf6,0xf5,0xc1 +.byte 0x6d,0x0b,0xeb,0x34,0xd9,0x3a,0xa2,0xa2,0xc6,0x17,0x60,0x85,0x65,0x43,0xd6,0x3d,0x71,0xac,0xc2,0xaf,0x2b,0x9e,0x62,0xf2,0x08,0x47,0x6f,0x42,0xa8,0x21,0xad,0x42,0x98,0xa0,0xef,0xdf,0xd8,0xda,0x10,0xad,0xf7,0xe5,0xf9,0x22,0x89,0x44,0xbf,0x86,0x86,0x2b,0x02,0xd1,0x9e,0x8f,0xb7,0x10,0x63,0xb1,0xcc,0x40,0x6b,0xa3,0x8e,0x09 +.byte 0xb8,0xe3,0x77,0x3c,0xde,0x36,0x7a,0xb7,0x78,0x4f,0x99,0x5d,0x9a,0x9e,0x19,0x2d,0xb5,0xd9,0x9c,0x95,0x1f,0xa1,0xcc,0x61,0x31,0x1c,0x96,0xe5,0xca,0xeb,0x26,0x34,0xa4,0x63,0x5c,0x7c,0x0f,0x23,0xd1,0xe1,0x09,0xf4,0xab,0xf6,0x73,0x2f,0x8a,0x62,0xf0,0xd3,0x8c,0x44,0xe5,0xe9,0x9d,0x58,0x71,0xfa,0xf5,0x39,0xa5,0x6f,0xf7,0x04 +.byte 0x43,0x0a,0x78,0x54,0xfb,0xa7,0x66,0x57,0x1f,0x61,0xd6,0xda,0xff,0x4f,0x32,0x9d,0x80,0x6b,0x77,0xed,0xda,0xaf,0xbc,0x9e,0xea,0x77,0x04,0xf3,0x47,0x96,0xd1,0x44,0x8e,0xca,0xfe,0xb0,0xa3,0xa6,0x1d,0x8d,0xa4,0xb5,0x8c,0x35,0x28,0xf3,0xaa,0xab,0x28,0x1e,0xc9,0x94,0x12,0x07,0xc6,0xea,0x23,0xf9,0x69,0xc3,0x14,0x27,0xcc,0x55 +.byte 0x27,0x0b,0x27,0x64,0x23,0x38,0x05,0xd9,0xb4,0xf7,0x00,0xf3,0x02,0xae,0xc8,0x5a,0xbd,0x2f,0x20,0xd5,0x45,0xa6,0x09,0x6f,0x1a,0x09,0xb7,0xe7,0x6f,0xf6,0xa6,0x6f,0xc7,0x03,0x4e,0xa3,0x72,0xb5,0xfc,0x17,0xcf,0x1e,0x64,0x8b,0xc4,0xa2,0xba,0x83,0x0e,0x2a,0x11,0xba,0x71,0xe0,0x1c,0x9f,0x70,0x6e,0xf4,0xd9,0x47,0x31,0xf7,0xaf +.byte 0xf7,0x1a,0xe7,0xc1,0xe9,0x66,0xa4,0x48,0xd4,0x25,0x8b,0xf7,0x6f,0x33,0x72,0xff,0x93,0x2e,0xcd,0xc7,0xae,0x3b,0x71,0x3f,0x84,0x7f,0xe6,0xb5,0x58,0x4f,0x95,0x34,0xe7,0x89,0x10,0xd3,0x2b,0x5c,0x30,0x9b,0xd3,0xef,0x98,0xf3,0x33,0x0e,0x6d,0x5f,0x7e,0xba,0x55,0x7a,0xb6,0xf3,0xb6,0xcd,0xa8,0x10,0x68,0x85,0x6f,0xea,0x54,0xc3 +.byte 0x66,0x51,0x5a,0xfc,0x11,0x83,0x9e,0x68,0x95,0xdb,0xec,0x74,0xf0,0x86,0x4a,0x90,0x24,0x66,0xf2,0x61,0x40,0x2e,0x3b,0x53,0xea,0xc1,0x3e,0x1c,0x69,0xaf,0x5f,0x04,0xb5,0xbd,0x3d,0x44,0x1c,0xc6,0x49,0x65,0xf6,0x78,0xfd,0x69,0x49,0x95,0x96,0xa1,0xa0,0xa9,0x78,0x1a,0xf6,0x0f,0xe9,0x52,0x93,0x9c,0x96,0x6c,0x5e,0x67,0x63,0x2d +.byte 0x18,0x22,0x2a,0xcc,0x7f,0x2f,0xd3,0x72,0x82,0x98,0xae,0xb0,0x2b,0xa6,0x96,0x41,0x25,0x47,0x3c,0x92,0xc5,0x0f,0x2c,0xd4,0x43,0x09,0x0b,0x94,0x73,0x73,0x29,0xc2,0x8a,0xa3,0xcc,0x8d,0xed,0x40,0x6d,0x40,0x18,0x7c,0x32,0x1e,0xe1,0x4e,0x26,0xa7,0xa4,0xd5,0xcb,0xfa,0x90,0xba,0xb2,0x04,0x1d,0x5d,0xbe,0x32,0x6c,0x71,0x09,0x51 +.byte 0xdb,0xe3,0xb0,0xe1,0x34,0x74,0xa3,0x2b,0xf2,0xcb,0x9e,0xc0,0xae,0x88,0x40,0x90,0xb6,0x22,0xc8,0xac,0xff,0x45,0xc6,0xfa,0xce,0x0f,0x03,0x9d,0xc0,0xb2,0x2e,0xdb,0x1e,0x6c,0xa5,0xbe,0xb5,0xb3,0xaa,0xd5,0x2d,0x06,0x4d,0x29,0xa3,0xbe,0x25,0x5f,0x21,0x42,0x8d,0x27,0xaa,0x6f,0x59,0x88,0x61,0x4d,0x72,0x9f,0x64,0xfc,0x07,0xaf +.byte 0xeb,0x02,0x5e,0xb9,0x1f,0xfe,0x1a,0x67,0x10,0x35,0xe9,0x9f,0x5f,0x9c,0x8d,0x4a,0xb3,0x10,0x99,0x8d,0x5b,0x9c,0x8b,0x8a,0x0c,0x02,0x8b,0x44,0x1a,0xaa,0xe7,0x14,0x05,0x3d,0x9e,0x62,0xfc,0x76,0x49,0x56,0x46,0xae,0xcc,0x0e,0x47,0x58,0x4d,0x94,0x33,0x4d,0x23,0x24,0x44,0x52,0x2e,0x18,0xf7,0x53,0x6b,0x24,0x67,0xb8,0x88,0x46 +.byte 0x70,0xc8,0xcb,0x60,0xac,0x70,0x85,0xdd,0x00,0xa1,0x5d,0xbb,0x94,0x07,0x0a,0xb6,0x1c,0x88,0x59,0xa7,0x88,0x7e,0x1e,0xc9,0x1d,0x7c,0xa0,0x1c,0xad,0xe4,0xa5,0x36,0xa5,0x35,0xe8,0xda,0x27,0x15,0xbc,0x7b,0x1e,0x8a,0x33,0x74,0x4b,0xc1,0xc7,0x9d,0xa9,0x21,0x98,0x02,0xe5,0xf4,0x8b,0x8e,0x2d,0x64,0x81,0xea,0xa6,0xbe,0xe2,0x05 +.byte 0x16,0xba,0xac,0x75,0x79,0xa4,0xc0,0xd3,0x9d,0xe0,0x25,0x63,0x22,0xb3,0x9c,0xee,0x04,0x8f,0x60,0xab,0x52,0x43,0x05,0x16,0xd4,0xb3,0x88,0xe8,0x68,0xc3,0x81,0x94,0xc4,0xee,0x13,0xaf,0xdd,0x36,0x23,0xe6,0x78,0xc9,0xf6,0x42,0xf0,0xf7,0x89,0x64,0x79,0x13,0xe8,0xed,0x50,0x03,0x16,0x78,0x6d,0xf4,0xdf,0x85,0x2e,0x4e,0x8f,0x2c +.byte 0x5b,0xfe,0x4c,0xf2,0x49,0xde,0xf2,0xa4,0x96,0xe0,0x8a,0x25,0xc8,0x6d,0x22,0xff,0xab,0xfc,0x18,0xe8,0x7f,0xd5,0xc1,0x7e,0x44,0x8e,0x21,0xb4,0xc8,0x79,0xc0,0x55,0xaa,0xb7,0x28,0xa1,0x3a,0xbd,0xc2,0x1d,0xf8,0x87,0xf9,0x35,0x30,0x25,0xb2,0xaa,0x8f,0x3c,0x0d,0x64,0xf2,0xd1,0xa0,0x51,0xbf,0x9b,0x9a,0x9a,0x9c,0x18,0x43,0xea +.byte 0xd2,0x54,0x50,0xe0,0xca,0x1a,0x29,0x16,0x9f,0x49,0x47,0x56,0x65,0x21,0x0f,0xb0,0x53,0x41,0xe3,0xec,0xe0,0x15,0xcb,0xd0,0x61,0x05,0x67,0xd6,0x02,0x1a,0x31,0x80,0xa4,0x9f,0xf5,0x9b,0x28,0xcd,0x43,0xd5,0x70,0x05,0x67,0xe8,0x76,0xb7,0x99,0x98,0x0a,0xd6,0x27,0xe9,0xfb,0x62,0xff,0x66,0x47,0xf7,0xbe,0x5e,0x35,0xa0,0x3b,0x56 +.byte 0x58,0x78,0x9b,0x9c,0x5b,0x9f,0xf5,0x6b,0x1a,0x6a,0xfd,0x8e,0xe3,0xd9,0xa2,0x8b,0x2e,0xef,0xc7,0xd3,0x74,0xb1,0xea,0x6a,0x03,0x8b,0xe2,0x78,0xbe,0xf1,0x75,0x7f,0x02,0x03,0xbc,0xd3,0x15,0x2c,0x87,0x01,0x95,0xa6,0x87,0x2d,0xf8,0x63,0xfe,0x33,0x8f,0xc5,0xc9,0x0a,0x06,0x79,0x93,0x46,0xd7,0x0b,0x61,0x06,0x68,0xae,0x9b,0x46 +.byte 0x6f,0x9e,0x1b,0x21,0x58,0xc1,0x72,0xa9,0x05,0xa7,0xaa,0x88,0xee,0xed,0x8d,0x7f,0x55,0x3b,0xb8,0xb8,0xf8,0x42,0x26,0x4a,0x78,0xe3,0x17,0xe8,0xac,0xb3,0xdb,0x9b,0x90,0x7d,0x8d,0x65,0x00,0x39,0x40,0xc2,0xe2,0x9c,0xc6,0x16,0x35,0x54,0x64,0x09,0xc8,0xc7,0x08,0x77,0x90,0x9d,0xb4,0xd4,0xe1,0x36,0xd4,0x5e,0x63,0xb0,0xba,0x81 +.byte 0x0c,0x4e,0x24,0x20,0xc0,0x7f,0xfc,0x02,0x3d,0x83,0x60,0x8a,0xf5,0xff,0x87,0x60,0x9c,0xd5,0xc0,0x94,0x64,0xe2,0x3f,0xeb,0x9a,0xe5,0xb6,0x50,0x13,0x36,0xf4,0x96,0x5d,0xf4,0xb5,0xab,0xa4,0x28,0x17,0x38,0x7f,0xca,0xf7,0x0c,0xcf,0xae,0xf8,0xef,0x41,0x6d,0x9c,0xa1,0x53,0x33,0xcb,0x8d,0x21,0xab,0x3a,0x8c,0x72,0x8d,0xf3,0xf2 +.byte 0x05,0x69,0xf5,0xe8,0x6b,0x5b,0x42,0x85,0xb1,0x2e,0x6f,0xf8,0x62,0x00,0x1c,0x48,0x6c,0x85,0x72,0x93,0x34,0x67,0x80,0xe7,0x2a,0xfe,0xcf,0x54,0xc6,0x94,0xf2,0x5a,0x48,0xab,0x40,0x52,0x66,0x7d,0x7a,0x75,0x68,0x77,0xfd,0xb2,0xdd,0xb1,0xdb,0x72,0x50,0x31,0x53,0x24,0xbd,0xb0,0x6e,0x1f,0xbd,0xa6,0x90,0x67,0x07,0x1d,0x31,0xf3 +.byte 0x8c,0x82,0xf7,0x53,0x85,0x54,0x64,0x7c,0x76,0x7b,0x5f,0xaa,0xe0,0xe0,0x36,0xa4,0x13,0xb3,0x0b,0x99,0x09,0xfe,0xed,0xbb,0x81,0x4b,0xb3,0x16,0x45,0x2e,0x3a,0xfe,0x60,0x9c,0xdc,0xcb,0x00,0x5a,0x41,0xc4,0x80,0x3c,0x9d,0x15,0x05,0xfa,0x5e,0x37,0x64,0x89,0x9c,0x2d,0xb8,0xf7,0xbc,0x35,0x8c,0x49,0xfe,0x0a,0x43,0x1a,0x59,0xaf +.byte 0x1e,0x50,0x08,0x0f,0x2d,0xb8,0x5d,0x63,0x7f,0x95,0x6a,0xe6,0xad,0x88,0xc3,0xac,0x05,0x14,0x44,0xb0,0x70,0x83,0x5f,0x94,0x45,0x3d,0xe5,0xbd,0xb8,0x92,0x28,0x20,0xd5,0xa0,0x83,0xd2,0xe2,0x41,0x71,0x27,0x29,0x1b,0x2a,0x3a,0x08,0xca,0x75,0xec,0x16,0x4a,0xcf,0x39,0xed,0xbe,0x2a,0x26,0x9b,0xa3,0x26,0xc6,0x89,0xf2,0xc6,0x8d +.byte 0x49,0x3a,0xfe,0xda,0x16,0x54,0x55,0x7e,0x7f,0x65,0x65,0xd2,0x16,0xdd,0xe2,0xa3,0x86,0x7a,0x69,0x82,0x99,0x58,0x45,0x16,0x4c,0x69,0xff,0x72,0xf2,0xbc,0xbb,0xdd,0xe1,0xb4,0x56,0xcf,0xc0,0x84,0xd6,0x2c,0xd8,0xce,0xf4,0x67,0xd8,0x1d,0xb7,0x77,0x6d,0x96,0xf4,0x28,0x7a,0x33,0x03,0x97,0x72,0x37,0xd9,0x35,0xcf,0x20,0x28,0xc2 +.byte 0xc4,0xea,0xf9,0x99,0x89,0xe0,0xcc,0x3d,0xec,0x2c,0xbf,0x06,0x78,0x91,0x1b,0x55,0x1b,0x51,0x9b,0xbe,0xf7,0x4a,0xf8,0x9f,0x46,0xab,0xee,0x5d,0x4e,0x29,0x36,0xf3,0xb9,0xa7,0x85,0x9b,0xf7,0xa1,0x9e,0x2a,0xbb,0xb3,0x0a,0x61,0xb5,0x0f,0x79,0xf4,0xe2,0xd2,0x2c,0x15,0xf7,0x4f,0xca,0xa9,0x46,0x25,0x1c,0xdc,0xfa,0x0f,0x9e,0xfa +.byte 0xf5,0xb8,0x54,0x7a,0xe3,0x98,0x3c,0x3b,0x85,0xf8,0xb3,0x7c,0x70,0x40,0x86,0x2a,0x66,0xd1,0x4d,0x83,0x38,0xc2,0x24,0x8e,0x30,0xc0,0x9e,0x54,0x4c,0x7a,0x62,0x9a,0x55,0x8e,0x11,0x02,0xef,0x30,0x08,0x5c,0xf3,0x57,0xa7,0xbe,0x32,0x04,0xab,0xb1,0x3a,0x51,0x6e,0xcd,0x6f,0xc1,0xd8,0xd0,0x7d,0x4f,0x1b,0xa9,0x1e,0x12,0x92,0x94 +.byte 0xd7,0x40,0xa9,0x99,0x70,0x06,0xcb,0x46,0xa5,0xe0,0x77,0xbe,0x6d,0x48,0xab,0x67,0x4e,0xa7,0x0e,0xfe,0x1f,0x53,0x24,0xbc,0x89,0xcb,0x70,0xac,0x05,0xa2,0xf4,0xa3,0x44,0xde,0xcb,0x18,0x95,0x78,0x70,0x0f,0x69,0xf0,0x5e,0xbd,0xe7,0xfc,0xd3,0x17,0x3e,0x18,0xb0,0x2f,0xa6,0xfe,0x82,0x81,0xe7,0x74,0x44,0xfb,0x43,0x5e,0xda,0xf4 +.byte 0xfb,0xfe,0x5c,0xb4,0x3c,0x1d,0xea,0x0d,0x2d,0xdb,0xee,0x1f,0xc5,0xbd,0xb2,0xa0,0x52,0x76,0x9e,0xad,0xfa,0x19,0x37,0xb0,0x15,0x53,0x82,0x25,0x86,0xd9,0xce,0x99,0x84,0x67,0x5f,0x57,0xb2,0x6f,0x99,0xa4,0x56,0xb5,0x01,0x4f,0xdf,0xa2,0xca,0x8c,0x23,0x51,0xd3,0xc7,0x72,0x9b,0x90,0x72,0x29,0x0c,0xca,0x86,0xff,0xc3,0xd9,0x9e +.byte 0x87,0xe4,0x8d,0xc6,0xac,0xba,0xfb,0x73,0xa9,0xcd,0x5d,0x16,0xfc,0x12,0xea,0x30,0xd5,0x7d,0x7b,0x16,0xa6,0x2c,0xeb,0x3c,0x3e,0x46,0x7c,0xee,0x03,0xd6,0x7a,0xe8,0x88,0x1c,0x17,0xa9,0x08,0xe9,0xd5,0x38,0x59,0x54,0x0b,0xb0,0x77,0x1b,0x76,0x09,0x53,0xca,0x38,0x12,0xd1,0xb5,0x2c,0xe3,0xd6,0xa0,0xca,0x9f,0x65,0x56,0xea,0x95 +.byte 0xab,0xc1,0xf4,0x98,0xaf,0x1a,0xe7,0x2b,0x1e,0x8d,0x75,0x43,0x43,0x9f,0x42,0x5c,0x2c,0xa5,0xd7,0x9a,0xcd,0xc2,0xab,0xd9,0x1f,0x1f,0xde,0x8a,0x3e,0xf8,0x0f,0x56,0x8a,0x01,0xde,0x47,0x41,0xd8,0xa0,0xc8,0x32,0x4d,0xa3,0x75,0x80,0x87,0xb1,0x1e,0x05,0x06,0x5e,0x2c,0x9a,0x7b,0xd3,0x22,0xe0,0x53,0x8f,0x4f,0x35,0x5f,0x46,0x3a +.byte 0xb2,0xfe,0x62,0x44,0x54,0x38,0xe0,0x03,0x5e,0xda,0xcb,0x86,0xdf,0xda,0x67,0x66,0x40,0x27,0x97,0xf0,0xc2,0xbd,0xce,0xce,0x37,0xeb,0x47,0xe2,0x56,0x7e,0x54,0xe9,0x51,0xda,0xec,0xd5,0xe6,0xc1,0x69,0x6e,0x4c,0x3d,0x92,0xdc,0xa0,0x51,0xe2,0x2b,0xb8,0x96,0xb6,0xce,0xdf,0x35,0xdb,0xd0,0xd4,0x42,0xe3,0x94,0x89,0x09,0x1b,0xb4 +.byte 0xe2,0x8f,0xfb,0x23,0x62,0x35,0x56,0xc7,0x94,0x40,0xd7,0x2d,0xdb,0x80,0xc9,0xbd,0x4d,0xe3,0x14,0x30,0x44,0x43,0xad,0xeb,0x3d,0x89,0xe9,0x61,0xd7,0x80,0x15,0x59,0xcd,0xda,0x38,0x11,0x3b,0x84,0x14,0x85,0xef,0x55,0xf2,0x01,0x2c,0xed,0x74,0xf5,0x71,0x75,0x0c,0x52,0x0c,0x41,0x86,0xbe,0x84,0xc5,0x89,0x8b,0xa5,0x6d,0xc3,0xfa +.byte 0x2b,0xe5,0xe7,0xe8,0xdd,0xf9,0xe8,0x27,0x08,0x5d,0xdf,0x61,0xdc,0xb2,0xe0,0x8c,0xe8,0xda,0xa8,0x68,0x22,0x51,0x6b,0xdf,0xd0,0x92,0x87,0x6a,0x43,0xff,0xd1,0x9d,0x9a,0x4c,0x03,0xdf,0x3e,0xc1,0x31,0x33,0x6e,0x2a,0x55,0xc1,0x58,0x59,0x69,0x66,0x05,0xd1,0xa7,0xa1,0x3b,0x98,0x1d,0x44,0x74,0xc7,0x7e,0xc0,0x07,0xd9,0x9c,0x87 +.byte 0x5f,0xc3,0x44,0x25,0x7b,0x96,0xbc,0x20,0x5d,0x14,0x08,0x34,0xe9,0xad,0x34,0xa3,0xc3,0x95,0x1a,0xc1,0xd1,0x37,0x43,0x49,0x66,0xff,0x39,0x70,0x27,0xa0,0x2b,0x39,0x9d,0x1b,0x78,0x52,0x55,0x77,0x30,0xe8,0x72,0x65,0x8a,0xc8,0xa4,0xe6,0xb7,0xd6,0x66,0x82,0xa7,0x1d,0xde,0x3e,0xc2,0x23,0x5a,0x8b,0x51,0xe4,0x44,0x03,0xf3,0x89 +.byte 0x10,0xb0,0x9a,0x09,0x5d,0xe3,0xe9,0x4a,0x0b,0xe3,0x86,0x58,0xf8,0xe3,0x1a,0x3f,0x7f,0x42,0xa5,0xd7,0xb0,0x24,0xb7,0xbc,0x1d,0x40,0xe7,0x2f,0x42,0x8c,0xa8,0x3c,0x33,0xee,0x9f,0xaf,0xd1,0x51,0x8e,0x34,0x82,0xc5,0x16,0xef,0xb1,0xa6,0xa8,0x0e,0xae,0xe6,0xc3,0x2f,0xb3,0x06,0xd4,0x4c,0xec,0xee,0x9e,0xff,0x88,0x82,0x4b,0xb8 +.byte 0xc5,0xef,0x94,0xe2,0x68,0x48,0x23,0xa2,0xc8,0xe4,0xdb,0x33,0xf9,0xee,0x73,0xc2,0xe6,0xa1,0x64,0xf9,0xf6,0xab,0x5a,0xdc,0xa5,0xb3,0xd8,0xae,0xf4,0x1f,0x47,0xfe,0xa0,0xee,0xf5,0xee,0x41,0x30,0xa6,0xbe,0x34,0x2c,0x1a,0x24,0x8a,0x80,0xb1,0x79,0x7e,0x2c,0xc0,0x65,0x68,0x46,0xae,0x0a,0x01,0x77,0xce,0xa2,0x5f,0xc3,0x00,0x8f +.byte 0xd4,0x0f,0xbe,0xbf,0x81,0x20,0x4e,0xb8,0x21,0x5f,0xfa,0xb2,0xf2,0x02,0x83,0x41,0xa8,0xf1,0xe8,0x2c,0x7e,0x0e,0xe6,0xf0,0x6e,0xd5,0x7b,0xcb,0x4e,0xed,0x06,0xc4,0x18,0xfb,0x0e,0x0d,0x8e,0x22,0x8a,0x40,0x4d,0x66,0xa5,0x0c,0x74,0xf3,0x9e,0xd9,0x90,0xf8,0x71,0xe4,0x92,0x05,0x3d,0x2d,0xa0,0xed,0x42,0x88,0x18,0x9a,0xc7,0xe4 +.byte 0x41,0x5d,0xde,0x44,0x2e,0x26,0x30,0xfe,0x51,0xa8,0x91,0xa3,0xa6,0xfd,0x3e,0x04,0x7f,0x3a,0xa9,0x1c,0x21,0x98,0xab,0xaa,0x39,0x9d,0xe4,0x51,0x75,0xeb,0x90,0x6b,0xab,0x11,0x89,0xa9,0x22,0xa8,0xc5,0x92,0x16,0x51,0xe1,0x77,0x09,0x53,0x7f,0xb6,0x80,0x4b,0xf5,0xf5,0xa2,0x0e,0x36,0x24,0x7f,0xe7,0xcc,0x67,0xfb,0x2c,0x6e,0xc2 +.byte 0x16,0x47,0x41,0xc2,0x77,0xf4,0xcf,0x49,0x37,0x17,0x67,0x34,0x14,0x92,0x7d,0x0f,0x14,0xe8,0x4b,0x4c,0xc3,0xbb,0x78,0xf7,0xa0,0x59,0xbe,0x06,0x10,0x38,0xe6,0x2c,0x08,0x15,0xba,0xc6,0x49,0x38,0x9a,0x91,0x2b,0x4d,0x82,0x42,0x0e,0xe4,0x02,0xef,0x2b,0xa2,0x06,0xcc,0x3a,0x3c,0xb9,0xc5,0xb5,0x71,0x1e,0x17,0x5d,0x65,0x35,0x91 +.byte 0x89,0x54,0x97,0xa8,0x7b,0x02,0x24,0xf9,0xdb,0xb5,0x52,0xf7,0xd0,0xa0,0x42,0x48,0x01,0xf4,0x47,0x7c,0x84,0x7c,0x8a,0xb4,0xf4,0x30,0xec,0xb9,0x21,0x44,0x87,0xb2,0x96,0xa4,0x3b,0x0d,0x93,0x26,0x09,0xc8,0xfa,0x28,0x6f,0x09,0xb7,0x03,0x85,0x66,0x21,0x2d,0xf1,0xaa,0x3f,0x0b,0x59,0x15,0xfe,0x8b,0x2b,0xe0,0x81,0x38,0x63,0x70 +.byte 0x09,0x37,0x38,0x62,0x04,0x8e,0x3f,0x23,0x65,0xf8,0xf7,0xc0,0x30,0xb8,0x04,0xb4,0x17,0xd7,0x21,0xcc,0x8b,0x31,0xd3,0x7b,0x11,0xea,0xc5,0x51,0x01,0x93,0x5f,0xe3,0xf3,0x1e,0x0d,0x41,0x52,0x2a,0xfd,0x27,0x02,0x00,0x58,0x0d,0x1f,0x16,0xd7,0x50,0x09,0xea,0x3f,0x9f,0x72,0xae,0x7a,0x79,0x4b,0x69,0x61,0xfc,0xac,0x5c,0x4d,0x6a +.byte 0x65,0x5d,0xa5,0x67,0x76,0xe4,0x24,0x3f,0xa0,0x6f,0xf6,0x60,0xd2,0x70,0x8e,0x2e,0xbe,0xf9,0x8b,0xab,0x22,0xc8,0x9c,0x5b,0x26,0xc5,0x75,0xeb,0x96,0xa2,0x4f,0xdf,0x6c,0x05,0x9a,0x15,0xef,0xbf,0x3e,0x35,0x6d,0x8d,0x48,0xa4,0x33,0xc2,0xe8,0x3b,0x89,0xe4,0x0c,0xb2,0x9a,0xc6,0x89,0x52,0xba,0xc7,0x2a,0xa5,0xfb,0xe5,0xde,0x06 +.byte 0xbd,0xc3,0x4f,0xe8,0xa9,0x9d,0x36,0xa5,0xcc,0x90,0xcd,0x68,0x49,0x52,0x6e,0x9a,0x85,0xd4,0x1b,0xe5,0x3f,0x54,0xc8,0xb4,0x7a,0x76,0xbf,0xa8,0xf4,0x25,0x05,0xeb,0x43,0x0c,0x2b,0x1c,0x59,0x5b,0x51,0x7f,0xd5,0x13,0x54,0x37,0x44,0x37,0x2f,0x79,0x1c,0x1f,0x18,0x57,0x60,0xab,0xf7,0xcc,0x5d,0xd5,0xdd,0x69,0xab,0x7f,0xc7,0x9d +.byte 0x7f,0xd7,0x6a,0xdc,0x34,0x3d,0x6e,0x2c,0x1e,0xb8,0x74,0xef,0xec,0x14,0x83,0x98,0x20,0x85,0x8a,0x95,0x93,0x26,0xed,0xbb,0x7d,0xfe,0x63,0xaa,0x20,0xbb,0x40,0x7b,0x35,0x1d,0xe5,0x64,0xc0,0x64,0x83,0x90,0x59,0xb4,0xae,0xf7,0xfe,0x14,0xb2,0xaa,0x72,0xf7,0x34,0x61,0xe0,0x61,0x06,0xb3,0xdc,0x09,0x5f,0xe1,0x57,0x65,0x83,0x8a +.byte 0x6d,0x46,0x54,0x8f,0xbf,0x38,0x12,0xf5,0xa3,0xfc,0x7b,0x90,0x4f,0x30,0xed,0xc1,0xab,0xb2,0x6e,0xee,0x7c,0x5e,0x35,0x70,0x80,0xb0,0xae,0x93,0xdc,0x4e,0x8f,0x6c,0x37,0xef,0xc9,0x4c,0x3a,0x41,0x14,0x91,0x99,0x0d,0x48,0xbe,0x5e,0x9b,0xc5,0xa6,0x4d,0x07,0x0d,0xd5,0xe6,0x5d,0x26,0x6b,0xa0,0xf3,0xb2,0x28,0x15,0x57,0xdb,0x7b +.byte 0x8e,0x6b,0x88,0xc3,0x81,0xb6,0x16,0xd1,0x3c,0xd0,0x2d,0x5a,0x23,0x35,0x8e,0xb0,0x8b,0x5c,0x99,0x6a,0x7a,0x55,0xb1,0xf9,0x45,0x97,0x94,0x05,0x6e,0x58,0xd4,0x53,0x8d,0x73,0x43,0x02,0x68,0xdf,0x7c,0x37,0x1a,0x6b,0x71,0x04,0xa0,0x31,0x77,0xbc,0xe0,0x16,0x5a,0x2a,0x9a,0xb2,0x40,0xe4,0xbb,0xd0,0xfd,0x35,0xcb,0x7f,0xf4,0x13 +.byte 0x0f,0xb5,0x93,0x9a,0x7d,0x50,0xf8,0xfe,0x56,0x34,0x83,0x20,0xce,0x3d,0x02,0x2e,0x0b,0x95,0x76,0x88,0x47,0x8c,0x75,0x51,0x14,0x52,0x49,0xbc,0xed,0x66,0x0e,0x81,0x65,0x5e,0x64,0xfb,0x45,0x59,0x3d,0x2b,0xd6,0x3a,0xc6,0xfd,0x50,0xe4,0xeb,0x0c,0x68,0x38,0x0f,0xdd,0xa2,0xdc,0xaa,0x26,0xf5,0x7b,0x40,0x6a,0x90,0xf8,0x08,0x2c +.byte 0xe8,0x8f,0x8e,0xc1,0xf2,0x6b,0x87,0xeb,0x7a,0x02,0x9e,0x26,0x3e,0x6b,0xb9,0x71,0x2e,0x6f,0x26,0x20,0xa9,0xc0,0x7c,0xe5,0x6c,0x6b,0xd4,0xc4,0x7b,0x54,0x8e,0x4a,0x7a,0xef,0xfc,0x03,0x02,0x1d,0x6a,0x16,0x99,0x35,0x12,0x49,0xba,0x86,0x37,0x7a,0xb0,0x8d,0x58,0x6f,0x1c,0xba,0xa9,0x5d,0x93,0xdf,0x98,0x50,0x7e,0xea,0x0a,0x88 +.byte 0x1a,0xd4,0x63,0x91,0x23,0x43,0x43,0x17,0x2e,0xe6,0x04,0x95,0x96,0xa8,0x2b,0xb4,0x9e,0x91,0x6c,0x13,0x52,0x8c,0xbf,0x7d,0x50,0xfc,0x79,0xef,0xa1,0x3e,0x90,0xba,0xac,0xd1,0x0d,0xb0,0x4d,0xd5,0x7a,0xc7,0xbd,0x82,0xb7,0x03,0x9c,0x0b,0xbc,0xa7,0x3c,0x05,0x8f,0xbd,0x0d,0x7f,0x80,0xeb,0xe9,0xbd,0x8f,0xdc,0xcd,0x86,0x23,0x26 +.byte 0xb0,0xa4,0xdc,0x63,0xef,0xad,0x61,0x53,0x7e,0x23,0x34,0x0d,0xd9,0x75,0x7c,0xa7,0x57,0xba,0x28,0x0c,0x82,0x7f,0x68,0xe5,0x24,0xdc,0x23,0x99,0xcd,0x6f,0x03,0x59,0x4f,0x35,0x47,0xc4,0x11,0xc0,0x0c,0x2b,0x16,0x94,0xb8,0x28,0xf2,0x0a,0x91,0x2e,0x1c,0xde,0x75,0x50,0x52,0x00,0x0a,0x92,0x80,0xca,0x39,0x3a,0xdf,0x16,0xb7,0xe2 +.byte 0xbd,0x98,0x7b,0x70,0x48,0x85,0x6d,0x48,0xa0,0x1b,0x0a,0xbb,0xa8,0xb6,0xca,0x9c,0x4e,0xda,0x0a,0x17,0x0b,0x30,0xf5,0xa2,0x9b,0x5a,0x89,0xf4,0x53,0x89,0x38,0x34,0x2b,0x7d,0x14,0x04,0x44,0xa3,0x8f,0x70,0x29,0xa5,0x3e,0xdd,0x5a,0x61,0xa1,0x04,0xac,0xd8,0xd3,0xec,0x42,0xc4,0xd9,0x2c,0x13,0x80,0xf8,0xc9,0xec,0x54,0xa7,0xa0 +.byte 0xe6,0x37,0x04,0x38,0x5f,0x1e,0x0b,0xfb,0x38,0x06,0xb9,0xe2,0x05,0x12,0x12,0xa2,0x28,0xff,0x12,0xae,0x44,0xd8,0x0d,0x2c,0x5a,0x8f,0xfb,0x1d,0x98,0x69,0x85,0x69,0x99,0xc0,0x63,0xc5,0x88,0xa7,0x2d,0x56,0x76,0x32,0x23,0x4c,0xf7,0x29,0xd6,0x3e,0x45,0xfa,0xd7,0x61,0xf4,0x9a,0xa6,0x9e,0x4a,0xe7,0xe7,0xf9,0xbf,0x1f,0x09,0x82 +.byte 0xbe,0x36,0xa0,0xdd,0x91,0x47,0x3b,0xbc,0x52,0xf2,0xc2,0x04,0x96,0x85,0xb6,0x93,0xac,0x99,0x94,0xbe,0xfd,0xe6,0x53,0x9f,0x75,0xab,0x38,0xdd,0x81,0xc0,0x79,0x25,0xcd,0x73,0x72,0x5b,0x4d,0xc0,0xba,0xa9,0x18,0xaa,0x76,0x51,0x15,0xef,0xb9,0x22,0xdd,0x5f,0x22,0x62,0x6c,0x36,0xf6,0xc0,0x72,0x34,0x01,0x7a,0xaf,0xe2,0x87,0x1b +.byte 0x5f,0x33,0x9c,0xd5,0xe2,0x81,0x03,0xbe,0x4e,0xac,0xcc,0x17,0xc5,0xc6,0xf8,0x0f,0x24,0xe0,0x26,0x56,0x8a,0x20,0x2e,0xe4,0x05,0xc8,0x0f,0x89,0x24,0x0e,0xd4,0xb7,0x07,0xd1,0x99,0x8c,0x55,0xfd,0x75,0xc1,0xdb,0xaa,0xd1,0xd2,0xa6,0xf2,0xf0,0x3c,0xae,0x62,0x0e,0x1f,0xaa,0xc9,0xa5,0x16,0x09,0x2c,0xc0,0x61,0x55,0x72,0x70,0x63 +.byte 0x22,0xb6,0x41,0xa5,0x08,0x34,0x6a,0x1b,0xfc,0x42,0x81,0xe7,0x25,0x98,0xcf,0xba,0x18,0xb0,0x36,0x90,0x72,0x65,0x75,0xf3,0x57,0x68,0xd0,0x86,0xe4,0xaf,0x33,0xb6,0x2b,0xef,0x96,0x97,0x17,0x42,0x6b,0x8e,0x19,0xaa,0x4b,0x9d,0xc7,0x73,0x34,0x5f,0x41,0x24,0x12,0xfb,0x66,0xa2,0x1e,0x91,0x41,0xc2,0x78,0x08,0x66,0xc4,0xb2,0x86 +.byte 0x67,0x70,0xe6,0x96,0x76,0x8d,0xa4,0x69,0x6f,0xe5,0x35,0x8b,0x20,0x3d,0x6a,0xcb,0x65,0x7b,0x82,0x7b,0xf6,0x2d,0xd8,0xd0,0xda,0x69,0x8b,0xcd,0xdf,0x15,0xf6,0x3a,0x2c,0xfe,0xc7,0x84,0x20,0x11,0xcc,0x18,0x4f,0xc7,0x2e,0x1c,0x46,0x41,0x6b,0x91,0x79,0xa0,0xbb,0xf4,0x48,0xd7,0x0c,0x9a,0x88,0x01,0xda,0xa1,0xd1,0x8f,0x27,0x49 +.byte 0x9d,0xa0,0x3f,0x5a,0xc2,0xf7,0x26,0x9b,0xe5,0xff,0xa4,0xcb,0x86,0x32,0xb3,0x3c,0xd5,0xe5,0x7c,0xbb,0x5e,0xfe,0x3d,0xcf,0x60,0x1c,0x16,0x8e,0x0c,0xc4,0xa9,0xf2,0xb2,0x42,0x1d,0x13,0xb0,0xa8,0xff,0x90,0xbc,0xd9,0x9a,0x6d,0x78,0x7a,0x46,0x1a,0xa8,0x35,0x4e,0xa4,0x79,0xd5,0xb4,0x36,0x47,0x62,0x3c,0x0e,0x23,0x56,0xca,0xa2 +.byte 0x60,0xe6,0xca,0xf6,0xc3,0xd6,0x7c,0x5d,0x54,0x9c,0x0c,0xfa,0x9a,0x0f,0x3a,0x8c,0x64,0x52,0xdb,0x62,0x5e,0x93,0x82,0xef,0x9e,0x8d,0x30,0xa5,0xe7,0x3d,0x52,0x11,0xd4,0x93,0xb1,0x77,0x8f,0xee,0x54,0x9c,0x80,0x47,0xa9,0x21,0xa8,0xf7,0x16,0x4b,0xbb,0xab,0x75,0x52,0xed,0x0c,0x85,0xf8,0x04,0xf4,0x80,0x08,0x4a,0xb5,0x2d,0x2d +.byte 0xd8,0x98,0x57,0x24,0xd5,0xc8,0x77,0xa0,0xd8,0xb5,0xb1,0x83,0x92,0xb4,0xc7,0x42,0x36,0xd1,0xa5,0xd6,0xbd,0x89,0xc6,0x76,0x31,0x92,0x31,0x67,0x2c,0xa4,0xb2,0x2b,0xcf,0x94,0x20,0x6a,0x17,0x63,0xb9,0x76,0xac,0x9c,0x1c,0x95,0x3e,0x57,0xf8,0x87,0x0d,0xef,0x36,0xcd,0x87,0xd1,0x58,0x2c,0x9a,0x5e,0x54,0x0e,0xac,0x97,0xbd,0x15 +.byte 0xc4,0xdb,0xea,0xd3,0x21,0x05,0x2d,0x78,0xce,0x4c,0x60,0xf3,0xf8,0xeb,0xd9,0x19,0x89,0xb0,0x83,0xc0,0xe4,0x42,0x08,0x5c,0x1a,0x1c,0x53,0xf3,0x1e,0x5a,0x28,0x92,0x0d,0x32,0xbe,0x4a,0x9a,0x70,0x78,0x93,0xc1,0x66,0x81,0xda,0xe7,0x3d,0x05,0xc5,0xaa,0xdc,0x51,0x6b,0xaf,0x67,0x4d,0x18,0xfe,0x29,0xe0,0xfa,0x5c,0xe5,0x9a,0x18 +.byte 0x7f,0x8f,0xaa,0x21,0xa5,0xd0,0x8b,0x62,0x32,0x6b,0x93,0x02,0x19,0x62,0xd3,0xd6,0x74,0xea,0x83,0xdb,0x6c,0x57,0xe3,0x1f,0x1f,0x90,0xd0,0x22,0xf7,0x9a,0x4a,0x14,0xf4,0x8a,0xb3,0x86,0xa5,0x4c,0x1e,0xdf,0x49,0xa5,0x78,0x30,0x5e,0xf0,0x9a,0x69,0x0d,0xaa,0xe9,0x47,0x01,0xae,0x51,0xcf,0x32,0x4c,0xec,0x03,0x08,0xe7,0xcb,0x35 +.byte 0x59,0xd2,0x48,0xd4,0xfa,0x6a,0x45,0x6b,0x66,0x1f,0xb8,0x1e,0x45,0x85,0xef,0x14,0x25,0x34,0x48,0x50,0x59,0xf3,0x76,0x09,0x32,0xf5,0xe4,0xa8,0x98,0xb0,0x9a,0x70,0xec,0x0a,0x17,0x87,0xcf,0x6d,0x96,0x7d,0x50,0x5e,0x3a,0xff,0x57,0xa7,0xaf,0x04,0x0d,0xdc,0xcc,0xad,0xe3,0x09,0xd3,0x92,0xab,0xd8,0x3a,0x61,0x1f,0x9c,0xc4,0x36 +.byte 0x3b,0xf3,0xf6,0x87,0x43,0xea,0xc8,0xff,0x29,0x19,0x9e,0x87,0x44,0xc7,0xe5,0x5c,0x43,0x30,0x9a,0xb2,0xd8,0x47,0x4a,0x87,0xcc,0xc7,0x8e,0x99,0x32,0xdd,0x3c,0x37,0xda,0xa0,0x39,0x04,0x55,0xca,0xcf,0x2f,0xce,0x8b,0x22,0x35,0x2c,0x29,0x89,0xef,0x5c,0x05,0x82,0x55,0xf3,0x8d,0x64,0x7f,0x69,0xf7,0x3d,0x43,0x27,0xf3,0x4c,0xd7 +.byte 0x43,0x89,0x47,0xd5,0x0b,0x01,0x1b,0x17,0x6c,0x7e,0x63,0x18,0x87,0x8b,0x8f,0x20,0x0d,0xa4,0x1e,0xa5,0x3b,0xf1,0x5c,0xe5,0xc8,0x23,0xd4,0xee,0x79,0x3e,0xd1,0xbc,0x83,0x30,0x03,0x64,0x80,0x7e,0xda,0x13,0x7c,0x52,0x88,0xc1,0x7c,0xa7,0x8a,0x5d,0x8d,0x7b,0x57,0x4e,0x59,0x97,0x83,0x52,0x03,0x04,0x6b,0xd2,0xf3,0xff,0x1c,0x4e +.byte 0x3b,0xae,0x70,0x61,0x3b,0x8b,0xaf,0x56,0x3d,0x28,0x73,0x24,0x39,0x4b,0xb8,0x6e,0x89,0x28,0xe6,0xc8,0x5c,0xe9,0xf8,0xec,0x8f,0xf7,0x75,0x1a,0x13,0xc1,0x8e,0x53,0x4e,0xe5,0xef,0x37,0xce,0xa1,0x54,0xca,0xcc,0xf5,0x01,0x29,0x2a,0x8f,0x00,0x1c,0xde,0xcd,0x5e,0x24,0x0b,0xa5,0x94,0x0c,0x8a,0xab,0x54,0x1e,0x80,0x2a,0x0d,0x84 +.byte 0x38,0x4c,0x17,0xea,0x84,0x07,0x9c,0xbd,0x85,0xd8,0x1b,0x57,0x6a,0xde,0xb3,0x86,0xa3,0xf8,0x6d,0x03,0x3e,0xf1,0x37,0xae,0x7d,0x02,0x33,0xc5,0x7b,0xf6,0x64,0xdb,0x3e,0xb0,0x48,0xda,0x49,0xec,0x89,0xb4,0x83,0xff,0xe1,0x6f,0x9a,0x7e,0x0a,0xda,0x6e,0xec,0x70,0x0b,0x51,0xac,0x82,0xac,0xb8,0xce,0x16,0xe7,0x47,0xab,0xe8,0xc7 +.byte 0x56,0xd1,0xab,0x73,0x72,0x5c,0xe7,0x9e,0xb8,0x77,0xa7,0xc1,0x47,0x9c,0x4e,0x16,0x68,0xce,0x21,0x23,0x2d,0x6c,0xcf,0x79,0xd6,0xd4,0xdf,0x74,0x30,0xb8,0x0f,0x60,0xea,0xbf,0x39,0x77,0x45,0xdc,0xaf,0x25,0xbd,0xc5,0x8d,0x0b,0x44,0x21,0xc1,0xc1,0x2e,0x54,0x2a,0x32,0x6c,0xea,0x51,0xe0,0x7d,0xa8,0x09,0x94,0x2f,0x4e,0xfe,0x27 +.byte 0xe8,0x63,0xfb,0x71,0xca,0x01,0x7d,0xc9,0x70,0xd8,0xe4,0x82,0xbf,0x3f,0xea,0x64,0x5e,0xa9,0x84,0x1d,0x2c,0xfd,0x8a,0x7d,0x33,0x73,0x5c,0x82,0xbe,0x9e,0x46,0xfc,0x39,0x5e,0x38,0x2a,0x20,0xd9,0xa9,0x20,0x46,0x23,0xc1,0x8b,0x0a,0x9c,0x42,0xb6,0x50,0x9f,0xc8,0x7d,0x4a,0x85,0x98,0xed,0x92,0x13,0xd3,0xd6,0xe6,0x6d,0x50,0x6e +.byte 0x93,0x63,0x41,0xa3,0x63,0x97,0x52,0xe3,0xaf,0x09,0xe1,0x40,0x12,0x41,0xed,0xb3,0xc5,0xb8,0x9f,0xc1,0xf2,0xd2,0xe6,0x16,0x94,0x97,0xdb,0xae,0xdb,0xd4,0x1f,0x5a,0x2f,0xf1,0xb1,0x22,0xf6,0x60,0xa4,0x0e,0xd8,0x2f,0xf7,0xf7,0x3f,0x6c,0x7d,0x73,0xe3,0x1d,0x99,0x04,0x7f,0x4f,0x70,0x2a,0x8c,0x43,0x80,0xa3,0xd0,0x25,0x75,0xd8 +.byte 0xb6,0xc8,0x90,0xa2,0x26,0xee,0xba,0xc5,0x1a,0xdc,0x1f,0x81,0x65,0x54,0xc6,0x57,0x6e,0xa2,0x03,0x32,0xf5,0x14,0xb2,0xdd,0x4d,0x21,0xaa,0xb9,0x78,0x4f,0x76,0xab,0xbe,0xfe,0x5d,0xc6,0xaf,0xed,0x6f,0xf9,0xaa,0x31,0x21,0x08,0xa4,0x6e,0xfb,0x78,0xdc,0xed,0x0c,0x05,0xff,0x1e,0x60,0x38,0x60,0x94,0xa9,0x92,0xa7,0x07,0x6e,0x6f +.byte 0x6d,0x89,0x8a,0x73,0xfb,0xaf,0x01,0x34,0x7d,0x7d,0x33,0x76,0xff,0x1f,0x6b,0x79,0x5e,0xff,0x50,0x14,0x80,0x7d,0x55,0x0e,0x2d,0xc3,0x77,0x85,0x30,0x20,0xf6,0xc8,0xc7,0xb7,0x73,0x1b,0xd1,0x87,0x69,0x44,0xeb,0x02,0x5e,0x45,0x66,0x6f,0x28,0x00,0x1f,0xf8,0x58,0x93,0xe5,0x21,0xbc,0x19,0x8d,0x72,0x19,0xaa,0x9a,0xbb,0xc6,0x47 +.byte 0xe6,0x0b,0xe4,0x76,0x13,0xc7,0xc4,0x1b,0x9d,0x85,0xba,0x17,0xb6,0x30,0x2a,0xdb,0x7c,0x36,0xd7,0xd8,0x8b,0x9c,0x99,0x92,0x64,0x03,0x4f,0xd4,0x1f,0x04,0x2e,0x45,0x34,0x55,0x92,0x99,0x77,0xb8,0x45,0xce,0x59,0x22,0x3c,0x6e,0xe5,0x18,0xb0,0x83,0x42,0x42,0x75,0x1c,0x34,0x0f,0x2e,0x59,0x06,0x94,0x17,0xea,0xc3,0xdb,0x0b,0x2f +.byte 0x44,0x97,0x54,0xe8,0x76,0xd3,0x25,0x24,0xe9,0x21,0x4f,0xd7,0x01,0x7d,0xbe,0x90,0x8a,0x0a,0x7d,0x4e,0x91,0x5f,0x4c,0x32,0x83,0x42,0x55,0x95,0x3c,0x7a,0x3e,0x46,0x8a,0x5d,0x0c,0x05,0xcd,0x0b,0xf6,0x3e,0x4d,0xf3,0x55,0xea,0x42,0x3e,0x19,0x0e,0xda,0xd4,0x22,0x88,0xe2,0x29,0x06,0x9e,0xea,0x1c,0x27,0x96,0x7f,0x3a,0x8a,0x28 +.byte 0x2f,0x7d,0xa2,0x65,0x37,0xae,0xb6,0x6a,0x59,0x41,0x19,0x73,0x91,0x64,0x77,0x4e,0x5a,0x1a,0x85,0x9f,0xc5,0xb0,0x85,0xc1,0x96,0x47,0x69,0x9c,0x36,0x70,0x36,0xa3,0x2e,0x1a,0x7d,0x11,0x59,0x55,0xec,0x4c,0x49,0xa1,0x86,0x3c,0x3d,0x24,0xb8,0x7a,0x84,0xca,0x4c,0x3f,0x7e,0x81,0x95,0x39,0x41,0xfe,0xc4,0x74,0xe5,0x89,0x7e,0xdc +.byte 0x86,0xd2,0xdb,0x8b,0xb8,0xa2,0xbb,0x15,0x64,0x89,0xf9,0x00,0x7d,0x56,0xec,0x8b,0xc8,0x05,0xcd,0x76,0x6c,0xcb,0xaf,0x7e,0xd2,0xdd,0x67,0xb3,0x99,0x16,0x63,0xf2,0x6d,0x49,0x7d,0xeb,0x67,0x24,0x98,0xf1,0x28,0xa3,0xb2,0x14,0xfc,0x95,0xf6,0x55,0xa0,0xb5,0x8c,0x26,0x2f,0xc6,0x08,0x49,0x57,0x4c,0x20,0xbc,0x48,0xab,0x24,0xef +.byte 0xe9,0xab,0x6b,0x77,0x4d,0x3b,0x61,0x84,0x68,0x67,0x72,0xc2,0xcf,0xab,0x8e,0xac,0x39,0xec,0x43,0x03,0xbb,0x4f,0x32,0x7d,0x7d,0x51,0x69,0x30,0xee,0x4f,0xd0,0xb9,0xa5,0x22,0xdd,0x47,0x06,0xad,0xac,0x62,0x20,0xff,0x7b,0x8c,0x90,0x91,0xb3,0xd8,0x89,0xd3,0xea,0x81,0xdc,0xca,0x31,0xc3,0x65,0xca,0x4c,0x50,0x0a,0x85,0xf7,0xaf +.byte 0xe3,0x67,0x57,0x53,0x1d,0x4e,0x42,0x17,0x2d,0x14,0x80,0x29,0x09,0x2b,0x48,0x45,0x43,0xb9,0xad,0x1f,0xb7,0x2d,0xab,0xfa,0x6a,0x1b,0x3c,0x7d,0x76,0xd7,0x36,0x20,0xb0,0xd3,0xc0,0x5e,0xc7,0x20,0x06,0x0c,0xa9,0x6a,0xb2,0x67,0xad,0x91,0x49,0xfc,0x4d,0xb2,0x15,0x61,0x61,0xfa,0x33,0x6c,0x94,0x92,0x58,0xef,0x46,0x82,0x9c,0x04 +.byte 0x52,0x21,0x28,0x08,0xb4,0xa9,0xd4,0x2e,0xd9,0x8c,0x93,0xd0,0xd8,0x4f,0x33,0x1d,0x0b,0x7e,0x07,0x12,0x40,0x64,0x3d,0xa2,0x8f,0xa3,0x96,0x45,0x0e,0xfc,0x9b,0x55,0x5f,0x3c,0xa2,0x57,0x3e,0x51,0x40,0x69,0xdc,0x7a,0x51,0xd2,0x3b,0x79,0x2f,0xd2,0x01,0x18,0xbf,0xd5,0xd2,0xd1,0x0e,0x08,0xcf,0xac,0x07,0x4d,0xd1,0x92,0xc7,0xca +.byte 0x92,0x75,0x0b,0x80,0x29,0xf1,0x46,0x24,0xba,0x47,0x6b,0x4a,0x64,0xfb,0x31,0x69,0xe9,0x40,0x0d,0x69,0x50,0xd0,0xdf,0xf8,0xcb,0x6a,0xe8,0xd4,0xc2,0xbd,0x0b,0x23,0x00,0xe0,0x29,0x0a,0x0a,0x8e,0x19,0xec,0xa9,0x14,0xe4,0x5d,0x4c,0x30,0xc9,0x85,0x42,0xd6,0x9f,0x83,0x8f,0x2a,0x5b,0x22,0x37,0xe4,0x71,0x3b,0x19,0x86,0xd4,0xda +.byte 0xb5,0x81,0x8e,0x84,0x57,0xcd,0x13,0x64,0xc3,0x23,0xfd,0x91,0x8a,0xe4,0xb9,0x32,0x12,0x17,0x02,0xa6,0x8d,0xec,0x44,0x9d,0xa5,0x7c,0x96,0x14,0xd1,0xd5,0x93,0x02,0x0c,0x9d,0xfc,0x26,0xa0,0xd2,0x41,0xaa,0x75,0xe8,0x82,0x6f,0x47,0x1d,0xe8,0xcf,0x94,0xe3,0x35,0xa9,0x76,0x1e,0xdb,0x92,0x5f,0x32,0x49,0xf4,0xd5,0x59,0x9c,0x4e +.byte 0xf7,0x89,0xda,0x23,0x7f,0x46,0x0e,0xfc,0xaf,0x1c,0x6f,0xcc,0x59,0xa5,0x43,0x04,0xbf,0x55,0xab,0x7d,0x36,0xa3,0xa5,0x03,0x7f,0xdf,0x33,0x6c,0x6d,0xd0,0x53,0xaa,0xef,0x54,0xc1,0x62,0xa0,0xd6,0x3a,0x67,0x87,0xe3,0x76,0x17,0x45,0xbe,0x7f,0x55,0xc8,0x8b,0xe8,0x1c,0xa8,0xe6,0xa6,0xb2,0xbf,0xe5,0x45,0xc0,0x88,0x22,0x36,0xa0 +.byte 0xec,0x21,0xdc,0x3e,0x6b,0xd2,0xc7,0xdf,0x5b,0xa4,0x32,0x28,0xca,0x23,0xe1,0x50,0x55,0x72,0x59,0x28,0x1c,0xf7,0x93,0x91,0x07,0x3c,0x4e,0x81,0x20,0x58,0x9b,0x07,0x38,0x37,0x68,0x2c,0x29,0xba,0x20,0x11,0xa9,0xa0,0x29,0x65,0x57,0xb1,0xe3,0xb1,0xfb,0xe2,0x70,0xee,0x1f,0xcd,0xf5,0x61,0xea,0x7a,0x08,0xb4,0x1e,0xfe,0xe7,0x4d +.byte 0x32,0xa0,0xfd,0xb4,0x52,0xa1,0x4b,0x67,0xba,0x5e,0x90,0xe7,0x56,0xec,0x06,0x03,0xb6,0xe6,0xc6,0x98,0xa1,0x41,0xf4,0xaf,0xde,0xe2,0x67,0xef,0xaa,0x05,0x97,0xc5,0x80,0x32,0xd0,0x43,0xc2,0x02,0x7a,0xcc,0x4c,0xdd,0xe9,0x1e,0xd0,0x4f,0xad,0xf3,0x4b,0x2c,0x5e,0xb8,0xd8,0x84,0xc2,0x43,0xc7,0xa9,0x86,0x4d,0x10,0xae,0xb7,0xe3 +.byte 0x5c,0xd5,0x2a,0xba,0x3b,0xd3,0x7b,0x5d,0xc8,0xe0,0x67,0x87,0xbe,0xbf,0x71,0x4e,0x22,0x68,0x12,0x53,0x95,0x73,0x5c,0x30,0x7b,0x2b,0xfd,0xc1,0x3c,0xfc,0xc4,0x0f,0xdd,0x5b,0x3e,0x1b,0x72,0x71,0xa6,0xe3,0x1f,0x2d,0x51,0xe2,0x61,0x3d,0xa0,0x60,0xc2,0x6b,0x41,0x8f,0x94,0x83,0x29,0xa3,0xb6,0xa7,0xc7,0x11,0x8f,0x1c,0xb5,0x19 +.byte 0x66,0x44,0xc7,0x05,0x58,0x83,0x28,0x69,0x0c,0xb6,0x65,0xe5,0x93,0x1c,0xb1,0xf6,0xf9,0xea,0xda,0x84,0x26,0x8e,0xa2,0xbb,0x9b,0x55,0xd3,0xbc,0x42,0x56,0x8f,0xce,0x6e,0x74,0x40,0xf2,0x02,0xa6,0x22,0x22,0x6e,0x20,0x0e,0x4b,0x8b,0x15,0xa5,0x04,0xf0,0xe0,0x7b,0x27,0x0a,0x38,0xe3,0x99,0x04,0xd0,0x5b,0x64,0xd2,0x04,0x92,0x61 +.byte 0x57,0x74,0xbc,0x1e,0x98,0x01,0x4b,0x2f,0x46,0x56,0x1c,0xeb,0x49,0x2d,0x66,0xac,0x85,0x96,0x48,0xfd,0xa1,0xf0,0xf5,0xc0,0xdb,0x7a,0xf2,0x0b,0x57,0x86,0xac,0x4c,0x6a,0x02,0x97,0x13,0xef,0x08,0xf6,0x18,0xe1,0x5c,0xb3,0x18,0x3d,0x70,0xc0,0x76,0x5e,0xd0,0xb8,0x44,0x32,0x25,0x75,0x62,0xa2,0x80,0x78,0x8c,0xc4,0x2a,0x84,0xbc +.byte 0x51,0xd4,0xee,0x44,0x48,0xe5,0xc4,0x48,0xbf,0xc0,0x27,0xc1,0x77,0x25,0xf5,0x59,0x6b,0x60,0xae,0xa5,0x42,0xfe,0xc3,0x06,0x91,0xe3,0xdb,0xa9,0x4b,0xe2,0x73,0x95,0x1f,0xf6,0xb6,0x66,0x71,0x63,0xb3,0x14,0x4a,0x3d,0x36,0x84,0xbe,0x2a,0x7c,0x7c,0xba,0x0e,0x8d,0x9a,0x73,0x52,0x21,0x89,0x02,0x8f,0x94,0xa5,0x9a,0x11,0x2e,0x6e +.byte 0x78,0xf7,0x07,0xf8,0xb1,0x42,0x96,0x06,0x78,0xf0,0x53,0x86,0xec,0x2b,0x1f,0xa7,0x84,0x79,0x37,0xc7,0x61,0x83,0x8e,0x62,0x65,0x49,0xdd,0xfe,0xee,0x97,0x70,0xa2,0x73,0xb5,0x85,0xaf,0x10,0xed,0xb8,0x74,0xec,0x42,0xd0,0x14,0x47,0xa6,0x90,0x7c,0x07,0x22,0xb4,0x4e,0xfc,0x12,0xa1,0x9d,0xd4,0x73,0x8f,0x6a,0x55,0xf8,0x56,0x25 +.byte 0xdb,0x9b,0xe8,0x10,0x87,0x7a,0x4b,0x42,0x9c,0xbb,0x6e,0xf1,0xd7,0x1d,0xf4,0x07,0x31,0x9c,0x94,0x3a,0xb6,0xad,0x4b,0xf4,0x57,0x3d,0x2f,0xba,0x23,0x36,0x34,0x52,0x62,0xf7,0x64,0xc7,0x47,0xeb,0x41,0xad,0x07,0xfb,0x3e,0x08,0x74,0x92,0x58,0x0f,0x73,0xe2,0x53,0x35,0xda,0xae,0x64,0x3c,0x47,0x89,0xaf,0xce,0x59,0x35,0x75,0x8b +.byte 0x50,0xee,0xbf,0xbe,0xd1,0xf4,0x2f,0x11,0xa3,0xfe,0xce,0xfd,0x15,0x0d,0x32,0x17,0x00,0xfb,0xad,0x02,0x70,0x5c,0xeb,0x59,0xfb,0x87,0xe5,0xed,0x0e,0xde,0x97,0xe7,0x75,0xb6,0xdc,0xe9,0xb0,0x08,0x26,0x0e,0x11,0xd4,0x4f,0xc4,0x92,0x71,0x7c,0x63,0xef,0xc0,0x14,0x64,0xe1,0x0f,0x7e,0xe6,0xcb,0x5b,0x4c,0xd4,0x16,0x8b,0x7b,0x8b +.byte 0x2f,0x2a,0x77,0xef,0xd3,0xdf,0x56,0xc0,0x5a,0x94,0x72,0xd5,0x36,0x12,0xfa,0x25,0xd7,0x77,0x52,0xdd,0xea,0x11,0x2f,0x6b,0x16,0x6e,0xe3,0xa2,0x84,0xba,0x55,0xc2,0xb0,0xe2,0x3b,0x53,0xb6,0xa4,0xc6,0xa5,0x3f,0x1b,0xb3,0x38,0xc0,0x2f,0x1a,0x80,0xe0,0xa4,0x60,0x49,0x8c,0xe3,0x23,0x5f,0x59,0xfd,0x2a,0x0f,0xe8,0x4c,0xaf,0xd7 +.byte 0x36,0xc7,0x25,0x21,0xad,0x41,0x54,0x27,0x95,0x15,0x42,0xbc,0xb3,0x77,0x4e,0x97,0xf4,0x3c,0x54,0xcc,0x19,0x63,0x62,0x67,0x97,0x5a,0xd0,0x59,0xfb,0xce,0xcd,0xe1,0x3c,0xb6,0xc9,0x49,0xc4,0xff,0xde,0xf9,0x89,0x87,0x9c,0xdf,0x4e,0x8c,0x9d,0xe5,0xbd,0x0d,0x0c,0x6e,0x93,0xfd,0xea,0x90,0xf2,0x80,0x7e,0x00,0x9a,0x06,0x02,0x87 +.byte 0xae,0xca,0xf4,0x46,0xbb,0xb5,0x52,0xee,0x18,0xb0,0xf1,0x61,0xcb,0xe1,0x65,0x9c,0x0b,0xfb,0xe6,0x3b,0xeb,0x3a,0x1a,0x22,0x41,0x0b,0x99,0xa4,0x8e,0x01,0x5e,0x7c,0x4e,0x1a,0xaa,0xab,0xd3,0x8b,0x99,0x7f,0xba,0x6b,0xec,0xe7,0x3a,0xd6,0x55,0x46,0x20,0x1b,0x10,0x39,0x06,0xcc,0x90,0xc1,0x6a,0xa5,0x27,0x7c,0xca,0xa5,0x58,0x07 +.byte 0xd7,0xaf,0x6d,0x12,0xa6,0x68,0xc7,0x0e,0x19,0x53,0x44,0x22,0x85,0xbb,0x72,0x9c,0x4d,0xfb,0xeb,0x94,0x3a,0xa0,0x64,0xf5,0x25,0xe8,0xee,0x7a,0x3b,0x71,0x0e,0xbb,0x40,0xa2,0xb3,0xc9,0x6b,0x14,0x0f,0xc3,0x75,0xac,0x1b,0x5c,0xf1,0x34,0x51,0xcb,0xeb,0x5f,0x40,0x0f,0x82,0xe9,0xd2,0x6d,0x95,0x88,0x84,0xea,0xe9,0xe3,0xa0,0xe9 +.byte 0xef,0x3b,0x33,0xfe,0x32,0x52,0x93,0xce,0x95,0x4b,0x64,0x3c,0x97,0x76,0x91,0xd8,0xce,0xb5,0xc2,0xda,0x58,0x23,0x27,0xe2,0x3d,0xbe,0xf6,0x31,0x79,0x73,0x0e,0x31,0xd7,0xa3,0xaa,0xac,0xcf,0x31,0x1e,0x75,0x58,0x14,0x21,0x52,0x1c,0x3e,0x4f,0x2a,0x2b,0x9a,0x22,0xbc,0x42,0x68,0x5b,0x83,0xc2,0x8c,0xd4,0xe8,0xd9,0x02,0x0d,0x13 +.byte 0x2f,0x08,0xd3,0x11,0xb7,0x4b,0x84,0x67,0x43,0xda,0x20,0xdb,0x89,0xd5,0x9e,0x14,0x54,0x3d,0x49,0xda,0xac,0x3f,0x8f,0xf5,0x17,0xfe,0xb8,0x5f,0xc3,0x20,0x38,0x27,0x21,0x32,0xbf,0xf3,0x9b,0x2c,0x0b,0x9b,0xeb,0x64,0x87,0xf7,0x9d,0xed,0x15,0x05,0x21,0x69,0xcf,0x2d,0xf8,0xfb,0xf2,0x81,0x51,0x08,0xc7,0x18,0x81,0xdf,0xed,0xa4 +.byte 0x70,0xb3,0x07,0xfa,0x00,0xd5,0x65,0xb9,0x5a,0x82,0x67,0x6f,0x10,0xfc,0x46,0x05,0x9a,0x85,0x64,0x14,0x60,0x64,0x4d,0x1f,0x13,0x57,0xbb,0x7c,0x4a,0x10,0x84,0x8c,0x57,0x36,0x13,0x22,0x00,0x04,0x2d,0xcf,0x27,0x3d,0xf4,0x27,0x3e,0x32,0xb3,0x87,0xda,0x82,0xaa,0xad,0xd7,0xa7,0xc5,0x3c,0x45,0xec,0x28,0x82,0x79,0x95,0x8f,0x56 +.byte 0x50,0x5f,0xc2,0x15,0xab,0x18,0x58,0x4f,0x69,0x46,0xce,0x29,0x33,0x42,0x53,0xe9,0xea,0xe5,0xa8,0x5b,0x90,0xc4,0xf4,0xbf,0x8a,0x20,0x62,0xad,0xa5,0xea,0x6a,0x4e,0xb4,0x20,0x2d,0xca,0x90,0xdf,0xbd,0xab,0x5b,0xc3,0x33,0x7c,0x53,0x1f,0xf5,0x2e,0xc0,0xbf,0x19,0xe1,0xa1,0x5a,0x63,0xf3,0x13,0x4d,0x6e,0xef,0x4f,0x3a,0x94,0x18 +.byte 0xbe,0x79,0xdb,0xbf,0xc2,0x2c,0xb3,0x36,0x59,0xab,0x21,0x1d,0x98,0x60,0x70,0xdd,0x95,0x51,0x19,0x07,0xd6,0x68,0x0e,0x2a,0xd4,0x4c,0x30,0x18,0x1c,0xe4,0xe1,0x89,0x15,0x25,0xea,0x27,0xcf,0x51,0x56,0xc9,0xa9,0xa7,0x31,0x08,0x17,0xfb,0xfc,0xf6,0x0c,0x5d,0xf1,0x7c,0x36,0xcb,0xad,0xef,0x29,0xf5,0x2e,0x23,0x09,0xcf,0x31,0x6f +.byte 0x74,0x12,0xd2,0xc2,0xc7,0x19,0xa5,0x6e,0x20,0x09,0x67,0xdc,0x41,0x69,0xbe,0x15,0xd6,0xeb,0x7b,0xba,0x63,0xae,0x65,0xd8,0x67,0xec,0x6e,0xcc,0x1d,0x04,0x08,0xfb,0x7c,0x34,0x1d,0x5f,0x1e,0x51,0x1c,0x30,0x72,0xd3,0x0c,0x48,0x60,0x3d,0x52,0xae,0xe6,0x78,0x44,0x6d,0xb8,0x40,0x08,0xb7,0x7a,0xa9,0xfc,0xa0,0x86,0xff,0x32,0xd6 +.byte 0x5a,0x31,0x4e,0xe2,0x65,0xab,0xb0,0x84,0xb6,0x74,0x3e,0xa6,0x67,0x7c,0xa2,0x0f,0x23,0x22,0xab,0x72,0x7e,0xeb,0x45,0xa9,0x2a,0xb4,0xd3,0xcc,0x27,0x5c,0x12,0xdb,0x14,0x68,0x73,0x0f,0x36,0xbf,0x9f,0x14,0x12,0xe9,0xef,0x04,0x2a,0x63,0x41,0x4b,0x04,0x9b,0x4c,0xc4,0xb2,0xb9,0x1c,0xc0,0xb8,0xcc,0x23,0x61,0xc4,0xed,0x27,0x1e +.byte 0x1d,0x97,0x3d,0x40,0x4c,0x1f,0xeb,0x6e,0xc4,0xfb,0x5c,0x2d,0xf5,0xf1,0xbb,0x05,0x47,0xa2,0x1a,0x9c,0x2b,0x8f,0xce,0x98,0x09,0x6b,0x86,0x22,0xf8,0x3a,0xae,0xf3,0xb4,0x66,0x2f,0xdb,0x20,0xa5,0xc6,0xb6,0x35,0xb5,0x5a,0x68,0xb5,0x37,0x2c,0xab,0x13,0x3d,0x2d,0xcb,0x38,0xed,0x3c,0x7a,0x1f,0x26,0x08,0x58,0x94,0x52,0x30,0xec +.byte 0x06,0x9f,0x90,0x97,0x4d,0x90,0x49,0x23,0xaf,0x00,0x90,0x6b,0x96,0x37,0x02,0x4c,0x35,0xc0,0x3e,0x66,0x2c,0x52,0xbc,0x75,0x28,0xd7,0x8f,0x25,0xbe,0x91,0x10,0x22,0x67,0xbf,0x4a,0x4d,0x62,0xc4,0xe9,0xda,0xe2,0x79,0xcc,0x76,0xeb,0x99,0x87,0xac,0x39,0x7d,0xf6,0x5a,0x37,0x85,0x30,0x33,0x65,0x3f,0xd9,0xd6,0x17,0xf8,0xf0,0x86 +.byte 0xee,0x5c,0x2f,0xb0,0xb3,0x4f,0x83,0x6c,0x4a,0x8f,0xfc,0x80,0x91,0xaf,0x4b,0x21,0x9c,0x9b,0x44,0x3c,0xed,0x67,0xfb,0xa3,0x31,0x7f,0xd4,0x73,0x72,0xb9,0xc1,0x31,0x96,0x47,0x8e,0x99,0x8e,0x62,0x1a,0xfd,0xc7,0x9d,0x2f,0x4c,0xda,0xe5,0xae,0x17,0xb6,0x40,0x5f,0x9e,0xa8,0xf2,0xcc,0xd7,0xd5,0x40,0x33,0x88,0x57,0x63,0x9b,0xde +.byte 0x82,0x71,0x68,0xfe,0xaf,0x29,0x6c,0xc1,0x2c,0x2f,0x02,0x42,0xd7,0xa5,0x28,0x05,0xca,0xa0,0xb6,0x8c,0x43,0x90,0x05,0xe2,0x1c,0xb7,0x76,0x79,0x39,0xd3,0x23,0xe1,0xe7,0xbb,0x19,0x65,0x1a,0xb4,0xbb,0x5a,0xcf,0x43,0x70,0x26,0x1a,0x2f,0x61,0x78,0x75,0x08,0xb0,0x88,0xe5,0x4a,0x46,0x0a,0xfc,0xcb,0x46,0x18,0xb0,0x8d,0x9b,0xeb +.byte 0xf5,0xe1,0x83,0x04,0x84,0x4f,0xd6,0xa0,0x4f,0xb2,0x4c,0x44,0x08,0xde,0xd6,0x82,0xb5,0x9a,0x45,0x15,0xb8,0x21,0xc7,0xf5,0xe2,0xfd,0x02,0x27,0x18,0x13,0x24,0x18,0x01,0xd1,0x2a,0xff,0x63,0xf2,0xa4,0x97,0xc8,0x4b,0x3b,0xae,0x49,0x47,0x54,0xe8,0x75,0xe7,0x16,0x77,0x22,0x10,0x7b,0x3c,0xf0,0xdb,0x49,0x6e,0xd6,0x55,0x9d,0x43 +.byte 0x6f,0x6e,0x2d,0x97,0xea,0x16,0x2e,0x0c,0x85,0x89,0x67,0xe1,0x7b,0x38,0xa6,0x2b,0x89,0xf0,0xcd,0x90,0xcd,0xba,0x9a,0x70,0xa9,0xe3,0xff,0xe0,0xbd,0x15,0x3e,0x4b,0x13,0x62,0x7b,0x59,0x64,0x18,0x96,0xe9,0x6a,0xf3,0x69,0x2d,0x2d,0x25,0xe7,0x91,0xd3,0xbc,0x74,0x58,0x66,0x2f,0x5e,0x8b,0x52,0xf6,0x91,0x24,0xa8,0x6f,0xa5,0xce +.byte 0xa1,0x4e,0x3b,0xe9,0xc5,0x30,0x7e,0xa5,0xc7,0xe2,0xb3,0x71,0x3b,0x25,0xb9,0x5f,0xe5,0x9c,0xf8,0x46,0x23,0xc5,0xa2,0xc1,0x1f,0x3f,0x43,0xa6,0xaa,0xf1,0x36,0x27,0xc6,0xa8,0xed,0x0d,0x50,0x71,0xf1,0x38,0x27,0xb7,0x16,0x43,0x7c,0x7f,0x77,0x5b,0x25,0x59,0xb7,0x08,0x0d,0xc8,0x84,0xe4,0xc2,0x03,0x95,0xe5,0xf3,0x0a,0x9c,0x1f +.byte 0xde,0x98,0x7c,0xa9,0xe2,0x70,0x9e,0xde,0xf6,0x80,0xd0,0xf8,0x86,0x4a,0x7a,0x0d,0x16,0xaa,0xde,0xba,0x02,0x30,0x8a,0xe6,0x03,0x0f,0xa1,0xf1,0xe8,0xd6,0xf8,0xce,0x7b,0xba,0x74,0xa8,0x25,0xb0,0x49,0x22,0xa6,0x81,0x7e,0x71,0xc5,0x97,0x9e,0xa8,0x46,0xa7,0xe9,0x8b,0x7c,0x7c,0x4c,0xc5,0x3c,0x93,0x08,0xb9,0x8b,0x3c,0x33,0xd6 +.byte 0xc4,0x37,0xc8,0x05,0xe7,0xfe,0xc2,0x7c,0x02,0xe6,0xda,0x09,0x52,0x2c,0xc6,0xa8,0x6e,0x44,0x7e,0x55,0xf0,0x32,0x10,0xcb,0x1e,0xa7,0x77,0x8d,0xc7,0xfe,0xb5,0xf6,0x3b,0x49,0xf2,0xfb,0xe0,0x41,0x98,0xd3,0x17,0xa6,0x5d,0x3f,0x4c,0x95,0xb0,0x02,0x8d,0xab,0x36,0xb7,0xa0,0x92,0x40,0x5e,0x15,0xfb,0xa9,0xb4,0xa3,0x04,0x8b,0x6b +.byte 0x81,0x44,0x59,0x22,0x10,0xcb,0xc5,0x52,0x3f,0x78,0x70,0x00,0xe2,0xa2,0xf7,0x76,0x62,0x72,0x06,0x8b,0xbb,0x56,0x0f,0x8c,0x67,0x2f,0x52,0x3f,0x3b,0xdc,0x15,0x79,0x55,0x89,0x6c,0x61,0x23,0xcc,0x6b,0x41,0x77,0xe5,0xc4,0x90,0x51,0xc3,0x87,0x22,0x1e,0x89,0xf5,0x5b,0x41,0xd7,0x34,0x22,0x3c,0xbd,0x29,0xaa,0x54,0xed,0x5a,0x90 +.byte 0x17,0x24,0xba,0x7a,0x46,0x5f,0x54,0x33,0x56,0x7e,0x2d,0x03,0x59,0xcb,0xbb,0x7a,0xce,0xbb,0x8d,0xf7,0xb6,0x38,0x00,0x18,0x6a,0xa1,0x6c,0xdf,0x42,0x49,0x4d,0x9b,0x4f,0xd6,0x85,0x54,0x1f,0xad,0x17,0xdd,0x66,0x0e,0x7c,0x30,0x86,0x82,0x1c,0x5a,0x81,0x08,0x55,0x51,0x5b,0x06,0x54,0x52,0x3e,0x8b,0x6e,0x72,0x92,0xd2,0x05,0x5d +.byte 0xe4,0xe8,0x0e,0x62,0x1d,0xec,0xb1,0x7f,0x42,0x05,0xd5,0xd3,0x60,0xd4,0xdc,0xa4,0x48,0xc0,0xf0,0x89,0xef,0x5b,0xae,0x5f,0xcd,0xf0,0x62,0xaa,0x3e,0xd5,0x1a,0xbe,0xe3,0x08,0xd5,0xe8,0x00,0x21,0x8c,0x0b,0x0c,0x8e,0x24,0xac,0xb2,0xea,0x44,0x9f,0xce,0x53,0x45,0x9a,0x85,0x67,0x99,0x85,0xea,0x92,0xa7,0x1d,0x86,0xb4,0x3b,0x22 +.byte 0xa2,0xcd,0x35,0x65,0xb5,0xa6,0xdb,0x6d,0x48,0xd1,0xa4,0x76,0x0c,0x00,0x30,0x62,0x86,0x06,0xda,0xa8,0xfe,0xec,0x70,0x87,0x4a,0xe8,0x2e,0x4d,0xe3,0x94,0x0b,0xdf,0x81,0xcd,0xfe,0x23,0x79,0x2c,0x2b,0xae,0xf7,0x75,0x49,0x47,0x24,0x46,0x09,0x10,0x62,0x39,0x3b,0x50,0xf1,0xfa,0xf7,0x5f,0xe4,0x7c,0xa5,0xc0,0x25,0x9e,0x20,0x4d +.byte 0xc8,0x6b,0x93,0xc5,0x4a,0x6b,0x62,0xb8,0x3b,0xe5,0x0d,0x92,0x70,0x26,0xa5,0x2b,0xd0,0x9f,0x03,0x8b,0xd3,0x1a,0xc4,0xb0,0xa3,0xc7,0xf4,0x35,0xe5,0x1d,0xe0,0xaa,0x43,0xab,0x64,0x10,0x2b,0xa4,0x09,0x42,0xee,0xba,0xb7,0xbf,0xfd,0xa6,0xff,0x76,0xe5,0x12,0xd6,0x50,0x9a,0x26,0x6b,0x3a,0xd3,0xe6,0x7d,0x3e,0x0e,0x9b,0x95,0xd7 +.byte 0xbf,0xb6,0x7e,0xfb,0x3c,0x24,0xa4,0x26,0x98,0x88,0x81,0xf4,0x56,0xa4,0xf7,0xe8,0x87,0x15,0x5e,0x9f,0x84,0xdd,0x04,0x66,0x43,0xd8,0x76,0xc2,0xa3,0xfd,0x4b,0x58,0x09,0x06,0xa6,0x60,0x5c,0x3f,0x75,0x80,0xd7,0xc4,0x29,0xf9,0x0b,0x1e,0x4d,0xe5,0x26,0xf6,0xae,0x7a,0xc1,0x05,0xf3,0xf1,0x6c,0xee,0xed,0x56,0x0b,0x51,0x66,0xbe +.byte 0x99,0xec,0x9c,0xc2,0x97,0xe2,0xed,0x09,0x1d,0xa8,0x18,0xaa,0x1c,0x9e,0x20,0x62,0xb1,0x80,0x68,0x3e,0x28,0x1f,0x4f,0x50,0x0e,0x41,0xaf,0x17,0x44,0x79,0x16,0xca,0x17,0xe9,0x13,0x66,0x0a,0x04,0x68,0x41,0xe2,0x1d,0xc7,0x00,0x1e,0x66,0xa3,0x6c,0x2d,0x52,0x8c,0x0b,0x7c,0x03,0x48,0x73,0x3b,0xa9,0x84,0xe5,0x31,0x12,0x0f,0xe8 +.byte 0x1e,0x58,0x4d,0xd0,0x1b,0xb7,0xcf,0x75,0xd5,0x2c,0xca,0x33,0x17,0x95,0x9c,0x30,0xc7,0x7f,0xe9,0xde,0xae,0x19,0x72,0x00,0x2a,0xf5,0xde,0x93,0x3f,0xf5,0x44,0xe5,0xf8,0xc7,0xeb,0x1a,0x5d,0x5b,0x11,0x30,0x09,0xf5,0x49,0x66,0x70,0x1a,0xd5,0xe6,0xfc,0xe6,0x59,0x3d,0x17,0x6c,0xb5,0x0c,0xdf,0x1e,0x9c,0x48,0xd1,0xde,0x12,0xd6 +.byte 0xc8,0x48,0xc8,0x73,0x6d,0xfc,0xec,0x07,0xce,0x02,0xe5,0xb3,0x18,0xb9,0x55,0x4d,0x64,0x07,0xf3,0xaa,0x3c,0xf1,0x71,0x22,0x31,0xbb,0x74,0x2c,0x9f,0x7b,0x68,0x9d,0x80,0x49,0x32,0x48,0x9b,0x54,0xf3,0x74,0x37,0xac,0x4e,0xb2,0x96,0xdf,0x9d,0xeb,0x43,0xe0,0xd0,0xa0,0xe3,0x77,0xbd,0x8b,0x92,0x95,0x9d,0x63,0x8d,0xa8,0x23,0x07 +.byte 0xb0,0xcb,0x9d,0x8d,0x3f,0xe2,0xd5,0x81,0x6a,0xe5,0xc2,0xfe,0xda,0x1c,0x25,0x25,0x5b,0xa8,0xad,0x06,0xec,0x0d,0x4b,0x68,0xc3,0x45,0x81,0x38,0xb0,0x22,0x71,0xa4,0x2b,0xf3,0xa6,0x05,0xae,0x0c,0x48,0x94,0x0d,0x3d,0x48,0x51,0x76,0xdf,0x79,0x66,0x0e,0x28,0xc0,0xc1,0x6f,0xc8,0x8f,0xf7,0x7d,0x37,0x06,0xa2,0x8a,0x3a,0x6b,0xab +.byte 0xe0,0x55,0x8e,0xec,0x89,0xe2,0xca,0xc4,0x01,0x03,0x5d,0xa1,0x84,0x21,0x44,0xbb,0x6b,0x36,0x63,0x57,0x4f,0x54,0x88,0x81,0xbe,0xf8,0x53,0xf7,0x57,0xee,0x30,0x85,0x03,0x11,0x86,0xff,0xe4,0xd6,0xc4,0xf0,0x3c,0xcf,0xfd,0x38,0xd8,0xcb,0xd0,0x96,0x03,0xf2,0xc7,0xfa,0x18,0xc8,0x1b,0xe6,0x77,0x3c,0x61,0xa9,0x14,0xdb,0xb4,0x5c +.byte 0x2d,0xee,0xd7,0xe8,0xc4,0x0c,0x69,0x0c,0x55,0xe2,0x99,0x4b,0xc4,0x89,0xc8,0xee,0x48,0x0e,0x16,0xd7,0xa4,0x78,0x25,0xda,0xd3,0xa8,0xac,0x89,0x66,0x67,0x0d,0x51,0x21,0x0e,0x91,0xfb,0xb5,0xab,0x33,0xcb,0x3e,0xc7,0x0f,0x03,0x22,0x51,0x71,0x03,0xa0,0x3c,0xa9,0x35,0xcb,0x40,0xa7,0xbe,0xe7,0xc3,0x51,0x43,0xd8,0x9a,0x24,0xb7 +.byte 0x7e,0xfb,0x26,0x8d,0xa5,0x1a,0x6b,0xe7,0x97,0xe4,0xdd,0xc0,0x3e,0x98,0x67,0x55,0x79,0x56,0xb9,0x7e,0x25,0x4c,0x5c,0x5a,0x47,0x0a,0xce,0xb6,0x4d,0x2c,0x69,0x73,0xaa,0xf0,0x12,0xbb,0x9d,0xe1,0x60,0xc4,0x5b,0x10,0x32,0x6d,0x89,0x54,0xb1,0xfe,0x36,0xbe,0xb2,0x60,0x9a,0x91,0x73,0x9c,0x32,0x61,0xad,0x9a,0xf7,0x56,0x5f,0x5a +.byte 0x54,0xaf,0xb2,0x0c,0x5b,0x1a,0xe6,0x98,0x94,0xed,0x69,0x0b,0x8d,0x06,0x87,0xc9,0x20,0xdc,0x92,0x2d,0x5e,0xba,0xbb,0x15,0xef,0xc1,0x07,0x18,0x44,0x3f,0xf4,0x48,0x3e,0x7b,0xa4,0x9e,0x14,0x6b,0x97,0xdd,0x68,0x33,0x18,0xdd,0x47,0x08,0xa6,0x3b,0x8d,0x79,0x58,0x92,0xd9,0xda,0x82,0x34,0xa7,0x99,0xbc,0x43,0xa3,0x0a,0x7e,0x85 +.byte 0x0b,0xab,0x0e,0xc2,0x94,0x22,0x2d,0x05,0x99,0x9d,0x5c,0xc7,0xb2,0x7b,0x18,0x3e,0xb2,0xdd,0x47,0xb3,0xd7,0xcf,0x19,0xc7,0x55,0x5e,0x64,0xd8,0x7b,0xb4,0xf6,0x11,0x72,0xed,0xbd,0xfc,0xd8,0xe9,0x9f,0xcd,0x9a,0xeb,0xb2,0x6c,0x04,0xb9,0x88,0xf7,0x60,0x68,0xc3,0xf2,0xfd,0xa0,0x8c,0x82,0xc5,0xf7,0x5d,0xc3,0x9a,0x1e,0x49,0x27 +.byte 0x69,0x35,0xb0,0x8f,0xe9,0xb3,0xe4,0x09,0xd8,0x1a,0x73,0x9e,0x56,0x41,0xfa,0xe0,0x94,0x9e,0x0e,0x65,0xe6,0x5b,0xe2,0x12,0x39,0xca,0x86,0x0c,0xae,0xee,0x24,0x58,0xfd,0x85,0x09,0x7a,0xad,0x54,0xde,0xda,0x06,0x73,0x7d,0x11,0x7e,0x91,0x44,0xf3,0x4b,0x61,0xce,0x8a,0xff,0x76,0x92,0x2e,0x43,0x52,0xcf,0x63,0x3f,0xc4,0x1f,0x7f +.byte 0x4d,0x67,0x21,0xed,0xd7,0x88,0xdb,0x36,0x56,0x11,0xb2,0x3b,0xee,0x5f,0x2d,0x5f,0x17,0x98,0xa1,0xd5,0xcc,0x82,0xfd,0xc2,0x56,0x69,0xaa,0x68,0x86,0xaf,0x48,0x77,0xba,0xe9,0xd9,0x42,0xcd,0xaa,0xe3,0xad,0x2b,0x17,0xef,0xd3,0x54,0xc5,0x4e,0x31,0x0b,0x14,0xb7,0x73,0xc1,0x6f,0xc3,0x06,0x41,0x1a,0x11,0x19,0x9f,0xe9,0x9f,0x61 +.byte 0x4f,0x13,0x9b,0x3e,0xcd,0x7c,0xd6,0x2a,0xb3,0x87,0x84,0x58,0x58,0x10,0x1f,0xa0,0x2e,0x5c,0x15,0x8b,0x5e,0x37,0xd4,0x22,0x93,0xd9,0x67,0xe1,0xa8,0x35,0xe2,0x95,0xd8,0x4c,0x2c,0x65,0xc9,0x21,0xaf,0xf9,0xdd,0x3d,0x2c,0x0e,0x0c,0xcc,0x6b,0xad,0xb3,0x6d,0xd2,0x3e,0x65,0x8e,0x82,0x70,0x41,0xd6,0xaa,0x97,0xab,0x38,0x78,0xe4 +.byte 0x62,0x7c,0x5f,0x22,0xa3,0x1e,0xf2,0x6c,0xfe,0x3c,0xa9,0xb5,0x57,0xcd,0x96,0x11,0xd0,0x8b,0xcf,0x6d,0x06,0xcf,0x7c,0xda,0x1d,0xe4,0x22,0x5c,0x5d,0x9f,0xa8,0x24,0x55,0x45,0x93,0xc6,0xeb,0xfc,0xb5,0x71,0x5a,0x1d,0x52,0x40,0x95,0xc7,0x76,0x32,0xfb,0x2b,0x0c,0x7d,0x64,0xfa,0x5b,0x5e,0x7a,0x3b,0x0b,0xa0,0x99,0x5d,0x19,0x16 +.byte 0xe4,0x8e,0xae,0x49,0xee,0xc5,0xb2,0x24,0xd7,0x0b,0xa4,0x20,0xa6,0x74,0xc4,0x36,0x1d,0x43,0x25,0xd6,0x71,0x54,0x69,0x79,0xea,0xa3,0xd5,0xe9,0x75,0x53,0xcf,0x99,0x4e,0x3b,0xc0,0x52,0x28,0x80,0xe5,0x07,0x65,0x83,0xb3,0x24,0xfe,0x13,0x92,0xd6,0x18,0xf7,0xa3,0xeb,0x9e,0xf0,0xd5,0x69,0x93,0x79,0xda,0xb7,0x2e,0xe2,0x01,0xdd +.byte 0x9a,0xc3,0x7b,0x3b,0x17,0x88,0xe5,0xe9,0x9b,0x46,0x5c,0x5f,0x0e,0x1e,0x80,0x9b,0x11,0x1f,0xa4,0x08,0x90,0x14,0x08,0xb4,0x73,0x32,0x72,0xbe,0x43,0x4f,0x70,0x90,0xe7,0x80,0xdd,0xfd,0xa7,0xea,0x13,0xd9,0x5d,0xae,0x93,0x24,0x2b,0x1e,0xc7,0xf4,0x81,0xbb,0x5f,0xb0,0xb9,0xe4,0x35,0x39,0xf4,0x9a,0x49,0xb5,0xc0,0x47,0x18,0xc3 +.byte 0xcc,0xbe,0x26,0x36,0x44,0x2d,0x65,0x24,0xa3,0x09,0xde,0x69,0x3b,0xb8,0xdc,0x52,0x98,0x2e,0x38,0x5f,0xf7,0xb1,0x84,0xdd,0xea,0xe2,0xe5,0xec,0x96,0x31,0xb1,0x93,0xc0,0x5b,0xc4,0x87,0x4a,0x51,0x58,0x2d,0xea,0x47,0xab,0xfd,0xd3,0x76,0xf1,0xbc,0x52,0xa7,0x94,0x6c,0x74,0x1e,0x84,0x07,0x1f,0x5c,0x18,0xb9,0x06,0x37,0xf0,0xfb +.byte 0xbd,0x5d,0xaf,0xa8,0x06,0xc9,0x86,0xf0,0xd1,0x78,0x84,0x95,0x01,0xdd,0x70,0x9d,0x71,0x51,0xb7,0x80,0x69,0xbe,0xe8,0xfb,0x8f,0x43,0x72,0xd9,0xa9,0xf1,0x90,0xbb,0xf1,0xb5,0xc0,0x75,0x93,0x4e,0x14,0xc5,0x14,0x77,0x59,0xf8,0xe5,0x81,0x11,0x25,0x48,0x51,0x46,0x2a,0x69,0x59,0x92,0xe7,0xa7,0x39,0x96,0xad,0x67,0x30,0xaa,0xb2 +.byte 0x5d,0x95,0x94,0x83,0x83,0x93,0xf3,0x52,0x81,0x1c,0x27,0x78,0x1d,0x19,0x35,0x6e,0x8f,0x16,0xe5,0x3b,0xce,0x80,0x2a,0x3a,0x89,0xb7,0x51,0xfc,0x34,0x24,0xa2,0x61,0x95,0x9e,0xd4,0x69,0xa1,0x2f,0x49,0x16,0x2d,0x12,0x05,0xfe,0x69,0x62,0x12,0xa4,0x2c,0x04,0x7b,0xce,0x3f,0x34,0xc4,0x48,0x1a,0xe6,0x64,0x4b,0x8a,0xbf,0x68,0xdd +.byte 0x54,0x15,0xd3,0x25,0x49,0xdd,0xed,0x5e,0x2c,0x0e,0x25,0xbe,0x77,0xcf,0x94,0xf4,0xe9,0xf3,0xcc,0xe6,0x94,0xf9,0xb2,0x5d,0x24,0x53,0x63,0xbb,0x66,0x8d,0x73,0xef,0x79,0x5c,0x95,0x1a,0x64,0xc3,0xfd,0xc0,0xd3,0x71,0xf4,0x79,0x19,0x79,0xa5,0x30,0xf8,0x2c,0x28,0xc2,0xc2,0x9d,0x12,0x50,0x95,0x38,0xec,0xd5,0xc6,0x28,0x94,0xaa +.byte 0x83,0x66,0x3b,0xe3,0x51,0xc7,0x6a,0x75,0x2a,0x9b,0xb9,0xb0,0xa2,0xe1,0xfd,0xaf,0x58,0xd2,0x4b,0xf4,0x22,0xef,0x77,0x1e,0xa0,0x00,0xd7,0x9e,0x20,0x63,0x87,0x1d,0x98,0xab,0x0e,0x57,0x31,0x4b,0xda,0x90,0x3a,0xe6,0x6e,0x5e,0xd4,0x17,0x06,0x83,0x4f,0x90,0x33,0x1c,0xe5,0xea,0xf7,0x8d,0x95,0xa2,0x1e,0x7d,0x27,0x15,0x49,0x68 +.byte 0x3a,0x54,0xe3,0x1e,0x60,0x72,0x42,0xa6,0x8c,0x5b,0x63,0x1d,0x7d,0xb1,0xe2,0x7e,0x8b,0x19,0xf4,0x25,0x6c,0x77,0x64,0x15,0x5e,0x4c,0xfa,0x35,0x68,0xd2,0x54,0x11,0x5a,0xac,0x85,0xb0,0xb3,0xe8,0xa8,0x70,0x36,0xa8,0xe5,0x04,0xd1,0x82,0xdc,0x62,0x63,0xe6,0x3f,0x86,0x46,0x77,0x08,0x6b,0xa8,0x09,0xd0,0x56,0x09,0x87,0x9c,0x65 +.byte 0x8e,0x53,0xae,0xa6,0x2b,0x59,0x23,0xca,0xe9,0xc7,0xc4,0xb5,0xb9,0xca,0x20,0xf6,0xcc,0x62,0xfd,0xb5,0x66,0x66,0x86,0x99,0xb2,0x5a,0xeb,0xac,0xff,0x22,0xf4,0x94,0x9c,0x6d,0xc9,0xce,0xf3,0x8d,0x26,0x7f,0x06,0x40,0x71,0x8b,0x3e,0x5c,0x3e,0xe6,0x11,0x64,0x91,0x79,0xbe,0x66,0x80,0xd2,0xf6,0x2d,0x28,0x4b,0x6c,0x8d,0x9c,0x5b +.byte 0x1e,0xd1,0x15,0xb0,0xdf,0xfb,0x57,0xaf,0x4a,0xab,0xde,0x12,0xe9,0xb8,0x41,0x3d,0xc3,0xff,0xb2,0xc1,0x86,0xb0,0x06,0x5b,0xaf,0xa4,0x30,0x62,0xd0,0xd8,0x91,0x36,0x28,0xc1,0xc2,0xef,0x60,0x5d,0x42,0x04,0xd5,0x6b,0x10,0xa9,0x6c,0x88,0x5c,0x56,0x59,0x4a,0x87,0xdc,0x7c,0x41,0x03,0xb3,0x7c,0x35,0x8c,0x52,0x0e,0xc1,0xd5,0xdf +.byte 0x9b,0x8a,0x2e,0xc2,0x6b,0x06,0x7f,0xb4,0x93,0xc9,0x52,0xd0,0xc5,0x57,0x78,0x9e,0xf9,0x08,0x36,0xbc,0x4b,0xc1,0xbd,0x71,0x35,0xf8,0x73,0xae,0x9c,0xbc,0xf1,0xd1,0xba,0xe3,0x7f,0x49,0x9b,0x9b,0xb3,0xe2,0x7d,0x7d,0x18,0x6d,0x0d,0x96,0xe3,0x50,0x28,0xf2,0x7c,0x7a,0x71,0x27,0x33,0x3c,0xd3,0xeb,0x3d,0x5a,0x79,0xb5,0x69,0xed +.byte 0x40,0x38,0xbe,0xc9,0xad,0x11,0x7b,0x9d,0xe6,0x71,0xc8,0x89,0x54,0x51,0xf0,0x8f,0xdc,0xad,0x96,0xc3,0x04,0x60,0x5f,0x6d,0xa0,0x37,0xba,0x1c,0x69,0xca,0x42,0x26,0xeb,0x31,0x34,0x8d,0xae,0x25,0xe2,0x29,0x8d,0x19,0x9f,0xfa,0x75,0x91,0x4b,0x51,0xcd,0x76,0xd6,0x8f,0xa2,0x40,0x79,0xc3,0xbb,0x61,0xaf,0xc4,0x69,0xf5,0x8b,0x8a +.byte 0xb6,0x2c,0x25,0xb9,0x3c,0x8e,0x13,0xa4,0x0f,0x52,0x72,0x11,0x4b,0x89,0x63,0x01,0x05,0x54,0xd5,0x0d,0x5f,0x91,0x59,0x84,0x64,0xac,0xf7,0x9c,0xa3,0x48,0x31,0x4a,0x2e,0xea,0xf8,0xf8,0x0e,0xf0,0xd9,0x4d,0x06,0x60,0x11,0x4a,0x72,0x6f,0x93,0x93,0x85,0xf0,0x20,0x55,0x8b,0x37,0xf1,0x29,0x92,0x2d,0x1f,0xa1,0x6c,0x7c,0x90,0x4f +.byte 0xdb,0x78,0xcc,0x6c,0xb2,0x14,0x85,0x07,0x34,0xc8,0x98,0x18,0x52,0x2d,0x6b,0x13,0x63,0xc5,0x31,0x20,0x8e,0xa9,0x88,0x6b,0xb3,0x3f,0x1a,0x68,0x2f,0xf9,0xf3,0x97,0x29,0x68,0x22,0x89,0xb0,0x45,0xc4,0xf4,0x1f,0x31,0xba,0x97,0x14,0x59,0xae,0x05,0xe0,0x99,0x5b,0x29,0xcf,0xe3,0xf0,0x2a,0x0c,0xca,0x5f,0xc1,0xe7,0xe7,0x11,0x48 +.byte 0x73,0xc0,0x86,0x0b,0x59,0xc2,0x8a,0xfa,0x44,0x51,0x1c,0x84,0xdf,0x2f,0x4d,0xab,0xca,0xea,0xe1,0x48,0x9a,0xa1,0x86,0x60,0x47,0x7a,0x86,0x30,0x6a,0xba,0xbe,0x6a,0x9b,0x34,0xf4,0x52,0x0e,0xae,0x7f,0xbd,0xe0,0xf4,0x5f,0xfd,0xbc,0x57,0x02,0x95,0x6f,0xad,0x78,0x2e,0xa7,0x46,0x1c,0x2d,0x98,0x40,0xb7,0xfa,0xb5,0x08,0xee,0xb5 +.byte 0x25,0x51,0xaa,0x1a,0x14,0x41,0x48,0xe0,0x8f,0xe7,0x2f,0xfc,0xfd,0x47,0x10,0x55,0x90,0x02,0xeb,0x7f,0x0d,0x40,0xa8,0x4b,0x82,0xdc,0xab,0x43,0x35,0x62,0xa1,0x1d,0x5a,0xb0,0xc0,0x93,0x75,0x3d,0x68,0xd9,0xf8,0x31,0x22,0xfd,0x30,0xda,0xea,0xea,0x7c,0x30,0xf8,0x6f,0x75,0x5f,0x07,0x39,0xfe,0x69,0x93,0x73,0x22,0xa2,0x72,0xed +.byte 0x39,0x2f,0x00,0x5c,0xc3,0x14,0x86,0x90,0xda,0xc9,0x09,0x43,0x80,0x85,0x22,0x98,0xb0,0x4e,0x05,0x47,0x8f,0xc7,0xba,0x2e,0x4c,0x8f,0x57,0x8a,0xe9,0xb0,0x97,0x3b,0x51,0x12,0xcb,0x88,0xfd,0x5e,0x7f,0xa6,0xc6,0x00,0xd0,0x3a,0x3a,0x70,0x9e,0x56,0x28,0xa0,0x08,0x76,0x58,0x57,0x4a,0x0f,0xff,0x31,0x44,0x08,0x6c,0x23,0x79,0xad +.byte 0x35,0x95,0xc5,0xc8,0x26,0x0f,0xb3,0x17,0x04,0x1d,0xde,0x16,0x5d,0xb8,0x71,0x76,0x89,0x0b,0xd6,0xd8,0x9d,0xa1,0xdf,0xcb,0xb5,0x1c,0x86,0xc3,0x15,0x8d,0xaa,0x25,0x82,0xbf,0x6b,0x06,0xfb,0x1b,0xf5,0x11,0xaa,0x14,0x0e,0x67,0x7f,0xbd,0x46,0x21,0x8f,0x6d,0xbd,0x63,0xe6,0x14,0x05,0xa2,0xee,0x56,0xee,0xe6,0x37,0xf9,0xc0,0x2f +.byte 0xc9,0xe0,0x8e,0xdb,0xf7,0xf6,0xcb,0x83,0x79,0xcc,0xe3,0xf6,0x30,0x9d,0x56,0x31,0x40,0xd2,0x50,0x25,0xb6,0x89,0x16,0x97,0x65,0xd8,0x8d,0x1a,0xa5,0xf4,0x47,0xfc,0x4c,0x73,0x07,0x42,0x9c,0x8f,0x7f,0x10,0xb4,0x96,0x33,0x1e,0xe2,0xff,0x0c,0x33,0x35,0xbc,0x37,0x01,0x2b,0x67,0xda,0xca,0xcf,0x87,0xa2,0x38,0x71,0x6b,0xf4,0xcf +.byte 0xa6,0xc6,0x6a,0x90,0x5c,0xa0,0x8b,0x66,0x44,0xc7,0xc2,0x05,0x24,0xee,0x53,0x99,0xf3,0x07,0x78,0xb0,0x17,0xf8,0x11,0xf9,0x52,0x20,0x41,0xc5,0xdb,0x4e,0x92,0xd3,0xeb,0xd2,0x86,0xea,0x9b,0xc3,0x4c,0x1b,0x75,0xcd,0x15,0x0c,0xe0,0x28,0xe9,0xe1,0x99,0x98,0x96,0x33,0x06,0xea,0xa8,0x4e,0xde,0xc1,0x1c,0xfe,0x6c,0xca,0xac,0x6d +.byte 0xc4,0x3a,0x7d,0xd2,0x41,0xf5,0xb3,0x7d,0x1c,0x28,0x93,0x72,0xf8,0x08,0xc1,0x71,0x72,0x4c,0x41,0x68,0x38,0x80,0x2e,0x4b,0xa6,0xc5,0xc7,0xb4,0x24,0x29,0xd0,0xce,0xb2,0x3d,0xc4,0x60,0x5b,0xeb,0x2d,0x80,0x13,0xee,0x95,0x41,0xfe,0x49,0x6d,0x89,0xc0,0x7a,0x61,0x51,0x3f,0xbb,0x24,0x7c,0x64,0x5e,0x9f,0xf7,0x60,0x88,0x95,0xe8 +.byte 0x60,0xc5,0xf6,0xc3,0xc3,0xd4,0x43,0xce,0xf9,0x4e,0x35,0xf2,0xfa,0xb0,0x2b,0xe3,0xfe,0xb8,0x88,0x19,0xf2,0x89,0xc0,0xb5,0x00,0x61,0xc8,0xe5,0xaa,0xde,0x18,0xb4,0xd4,0x21,0xbe,0xcc,0x61,0xc7,0xc9,0xfe,0x22,0xcc,0x65,0xf6,0x79,0xe8,0x4d,0x1c,0x30,0x31,0x7a,0xd4,0xbc,0x98,0x2d,0x72,0x5e,0x5c,0x4f,0x7e,0x52,0x9c,0x95,0x20 +.byte 0x29,0xa4,0x0b,0xf7,0xb2,0x7d,0xcc,0xc3,0x8c,0x94,0xb0,0x09,0xf4,0x6f,0x59,0x63,0x91,0x2a,0x06,0x80,0x09,0x01,0x3c,0x73,0x83,0x42,0xa1,0x5c,0x0f,0x42,0xf4,0x74,0x3c,0x24,0x8c,0xbe,0x91,0x73,0xdf,0xf1,0xea,0x21,0xbd,0xc9,0x36,0x17,0xca,0x81,0x28,0xd9,0x4a,0xc4,0x2e,0xdf,0x4c,0x4f,0xbd,0x1e,0xbc,0xe9,0x32,0x12,0xd3,0x8f +.byte 0x48,0x9b,0x4f,0x49,0x23,0x54,0x15,0x15,0x14,0x8b,0x18,0x64,0x7d,0x08,0x7f,0xc4,0x56,0x01,0x94,0x4e,0x50,0xe8,0xf2,0x4a,0xb5,0x3c,0xa0,0xb5,0xaf,0x55,0x70,0x44,0x41,0x5c,0xe6,0x61,0x5a,0xbb,0xf2,0xe6,0xc9,0x05,0x33,0x45,0x8f,0xbc,0xe5,0x59,0x7f,0x66,0xc5,0x61,0x4d,0x1b,0xc7,0xee,0x45,0x7d,0x57,0x8f,0x6c,0x9d,0x8b,0x87 +.byte 0x98,0xa8,0x58,0xac,0x4a,0x31,0x79,0xd6,0x26,0x08,0x2f,0x28,0x3f,0x31,0x77,0xad,0xff,0xe1,0x9d,0xa8,0xf7,0xe0,0x76,0x66,0x48,0x00,0x52,0xe8,0x9a,0xb2,0x47,0x5e,0x0a,0x87,0x86,0xaf,0xf6,0x7d,0x46,0x78,0x66,0x68,0xf7,0x68,0x0c,0x6f,0x5c,0xd7,0x09,0xc0,0xd7,0x90,0x98,0xe2,0x5c,0x07,0xe9,0xd1,0x58,0x48,0x57,0x9f,0x48,0x99 +.byte 0x87,0xdf,0x06,0xc1,0x35,0x0f,0xd8,0xb0,0xa9,0xfa,0xdc,0x31,0x76,0xd1,0xad,0x47,0x80,0xe4,0x74,0xe0,0xda,0x4b,0x77,0x8b,0x71,0xab,0x9a,0x8e,0xd7,0x6b,0x91,0xb1,0xdb,0x78,0xd2,0x86,0xf7,0x61,0x1b,0xdc,0x34,0x57,0x32,0x51,0xee,0xd3,0xff,0xb2,0x6c,0x6a,0x79,0x90,0x9c,0x1f,0x6b,0xe7,0x43,0x20,0x05,0x4f,0x66,0x83,0xd0,0x56 +.byte 0xe1,0x21,0x63,0xf4,0xd6,0x96,0x91,0xcb,0x51,0x3c,0x13,0x88,0x97,0x26,0x88,0xda,0x7c,0xd4,0x0d,0xcb,0xdf,0xc2,0x7d,0xcd,0x2c,0x0e,0x28,0x23,0x21,0x5f,0xbe,0x5d,0x62,0x58,0x6c,0xa7,0x45,0xae,0x1f,0xac,0x35,0x53,0xdb,0x2c,0xa6,0x71,0xe4,0x11,0x5e,0x59,0xbe,0xd5,0x20,0x2a,0xc4,0xcd,0x4c,0x1b,0xe0,0x38,0xef,0x02,0x0c,0x5f +.byte 0x5a,0x1b,0xf9,0x1e,0x32,0x63,0xd7,0xa6,0x0f,0x1d,0x98,0xd5,0x3a,0x0f,0xf6,0xcc,0xfc,0xd6,0xb4,0x87,0xc5,0x76,0xd8,0x3e,0x72,0xb0,0x20,0xfe,0xb3,0xfc,0x48,0x4c,0xd1,0x71,0xcd,0x13,0xef,0xe8,0x40,0xd9,0x0d,0xf6,0x1d,0x5b,0xa4,0x26,0x56,0x8c,0x66,0xcb,0x18,0x5a,0x5f,0x86,0x43,0x2c,0xa4,0x1e,0x00,0x3f,0x09,0xbf,0x8e,0x61 +.byte 0xad,0x2a,0x44,0x97,0x35,0xb2,0xf3,0x50,0x5f,0xfa,0x01,0x74,0xbf,0x70,0x46,0x38,0xf1,0x15,0xaa,0x04,0xfe,0xe9,0x3f,0x43,0x2f,0x53,0xcb,0xea,0x5c,0x04,0x8e,0xe6,0x43,0xeb,0xc0,0xd9,0xbf,0x4a,0xc1,0xbc,0xf9,0x11,0xd5,0x33,0xdc,0x41,0x8e,0xfe,0x5e,0xf3,0x8c,0x80,0x47,0x46,0x01,0x9e,0xa9,0x2c,0x2d,0xd2,0x90,0x7f,0xce,0x7c +.byte 0x59,0x78,0xaa,0xbb,0x96,0x52,0x0a,0xf3,0x18,0x1f,0x0b,0x41,0xc1,0xd5,0x12,0x14,0x1a,0xe1,0x4e,0xac,0xf8,0x2a,0x56,0xfe,0x66,0x34,0x21,0xdf,0x1f,0x6a,0x02,0x85,0xd2,0x38,0xc0,0x39,0x5c,0xa7,0x3f,0xcc,0x2b,0x6f,0x69,0xe7,0xa7,0x0a,0x36,0xf1,0xa9,0x77,0x59,0x2c,0x44,0x8b,0x72,0xc9,0xc2,0x74,0x32,0x48,0x76,0x19,0x1e,0x49 +.byte 0x10,0xe6,0x46,0xdf,0x82,0x9b,0xad,0x4e,0x40,0x20,0xd7,0xd3,0xf5,0x5c,0xbc,0x25,0x94,0xd1,0x68,0xaf,0x29,0xc5,0xcd,0x1b,0x86,0x4b,0x88,0x21,0x6e,0xeb,0x06,0x14,0xb5,0x15,0xe7,0x26,0x01,0x05,0x4e,0x3a,0x2a,0x24,0xbe,0xf2,0x64,0x6e,0xf4,0x9c,0x60,0xf8,0xd4,0xfd,0x4b,0xc0,0x0e,0x68,0x0d,0x19,0x26,0x87,0xa5,0xbf,0xe1,0x16 +.byte 0xf0,0x27,0x58,0xa8,0x3a,0xed,0x27,0x5b,0x73,0x4f,0x19,0x40,0x58,0x36,0xf6,0xfd,0x60,0x37,0x09,0x74,0x3c,0xb9,0x76,0x9a,0x32,0xfd,0x98,0x79,0x53,0xb3,0xea,0x3a,0x98,0x21,0xf9,0xb2,0x97,0xe4,0x00,0xb6,0xed,0x67,0xc4,0x76,0x8f,0x1e,0x4d,0xc8,0x2e,0xf4,0x54,0xd9,0x09,0xd7,0xcb,0xa0,0x91,0x1e,0x5a,0x60,0x53,0xbc,0x3e,0x35 +.byte 0x69,0xa6,0xca,0xf3,0xce,0x41,0x84,0x71,0xee,0xf3,0x75,0xd4,0x7a,0x71,0x36,0x62,0xe3,0x08,0xae,0x40,0x05,0xde,0x01,0x34,0x92,0x5f,0x71,0xa9,0x08,0xb3,0x43,0xcd,0xe7,0x2f,0x42,0x7e,0x9c,0x1e,0xfe,0x9a,0x40,0x99,0x58,0x31,0xd9,0x8d,0x5d,0xda,0x75,0x14,0x3f,0xae,0x45,0x27,0x85,0x47,0x7d,0x41,0x0e,0x94,0x20,0xee,0x11,0xd0 +.byte 0x1e,0xcd,0x00,0x56,0xb7,0x59,0xe6,0x58,0xab,0x2c,0xa6,0x44,0x14,0x8c,0xff,0x49,0x7b,0xe5,0xf7,0x93,0xd5,0x78,0x1a,0xe0,0x16,0xd8,0x24,0x08,0x1e,0x70,0xce,0x1a,0x84,0x87,0x6b,0xe5,0xf2,0x43,0x5f,0xb3,0x34,0xaa,0x85,0x3e,0x9e,0x2e,0x86,0x22,0x74,0xe2,0x1a,0x87,0xfb,0x1b,0x6c,0x08,0x8c,0x43,0xb4,0x85,0x75,0x2c,0x13,0xc2 +.byte 0x18,0x94,0xe8,0x0d,0x09,0xd5,0x8f,0xd4,0xca,0x50,0x93,0x9f,0xa3,0x9f,0x3b,0x3c,0x54,0x68,0xa9,0xb1,0xdd,0x0a,0x0b,0xe2,0x15,0x92,0x9c,0x6f,0xfa,0x45,0x6f,0x0a,0xb4,0x6b,0xcb,0xdc,0xa4,0xf3,0xf0,0xa6,0x1c,0x8a,0x60,0x42,0x35,0xa8,0xe3,0xdf,0xc8,0xdc,0xbb,0xbe,0x95,0xa7,0xac,0x08,0x08,0xbc,0x56,0x1a,0xa4,0xc2,0xd2,0x53 +.byte 0xfa,0xb2,0x89,0x4f,0xb8,0xe4,0xb9,0x90,0x95,0x91,0x2f,0x0f,0x93,0xa9,0x8c,0xc6,0xf8,0x01,0x34,0x08,0xe6,0x8c,0x58,0x43,0x57,0x40,0xf9,0x78,0x83,0xea,0x92,0x70,0xa8,0xa5,0xc8,0x9e,0xf8,0xc6,0x39,0x4c,0xb4,0xe9,0xbb,0xdf,0xd2,0x52,0x43,0x6b,0x6c,0x8b,0x2c,0x47,0xd7,0x11,0x42,0x3d,0xc7,0x3f,0xce,0xd1,0xd9,0x28,0x5b,0xce +.byte 0xec,0xb6,0x31,0x3a,0xc9,0xad,0x0c,0x93,0x82,0x2b,0xf6,0xdc,0xd4,0xcd,0x80,0xe1,0x75,0x45,0xeb,0x3b,0xbf,0x12,0x42,0xeb,0x71,0xc1,0x8b,0x27,0xd5,0xcb,0xd9,0xb6,0xe8,0xe9,0xc6,0x79,0xff,0x38,0x88,0x87,0x72,0xf2,0x71,0x4a,0x44,0x55,0x0f,0x9c,0x93,0xcf,0x15,0x18,0x44,0x62,0x2a,0xc5,0x0a,0x80,0x69,0x91,0x6e,0x4b,0x30,0x4e +.byte 0x3f,0x2f,0xb5,0x65,0x9e,0x65,0x07,0x36,0x9b,0xba,0x5f,0x81,0xd9,0x60,0xbe,0x1f,0xf5,0x98,0x20,0xf9,0x9e,0x53,0xf7,0x5d,0x57,0x7f,0x22,0xaf,0x8e,0x82,0x9e,0x0f,0x33,0x74,0x37,0x26,0x61,0x67,0xf6,0xfd,0x2c,0xab,0xd8,0x18,0x1d,0x10,0x48,0x7a,0x1d,0xed,0xbb,0x57,0x83,0xf9,0x82,0xf5,0xe3,0xf9,0x98,0x5c,0xc0,0x3e,0xee,0x38 +.byte 0x0a,0x57,0x10,0x22,0xc4,0xe8,0x1d,0xe3,0x46,0xa3,0x81,0x5e,0x92,0xba,0xcc,0x53,0x48,0x85,0x33,0x58,0xa2,0x3e,0xea,0x0a,0xfb,0x72,0x5c,0xcd,0xd9,0xa4,0x3f,0x56,0x99,0x35,0x92,0x6c,0xe8,0xf2,0x59,0x0f,0xc8,0x6a,0x21,0xb2,0x9f,0xa2,0xf6,0xf3,0x1b,0xec,0x38,0x95,0xed,0xef,0x00,0x09,0x16,0x6e,0xf7,0xf8,0x1a,0xef,0x0d,0x2b +.byte 0xef,0x83,0x8a,0xc2,0x22,0x3d,0x50,0xa3,0x70,0x52,0xe8,0xad,0x11,0x44,0x83,0x80,0xfe,0x88,0x7e,0x40,0x02,0x8f,0x4a,0x5d,0xd3,0x28,0x66,0x75,0x5a,0xf2,0x38,0xb5,0xdc,0x54,0xa8,0xb3,0xaa,0x76,0xdb,0x73,0xe0,0xd1,0xd7,0x51,0x20,0x8c,0x38,0x18,0x46,0x25,0x2e,0x0d,0x5b,0x61,0x9d,0x36,0x9a,0x14,0xfb,0xc8,0x4e,0x5a,0xba,0xa1 +.byte 0x98,0x34,0xfd,0x05,0x2c,0x87,0x58,0x8d,0xe3,0x5d,0x79,0x5a,0x45,0xff,0x75,0x25,0x98,0xbd,0xe4,0x9d,0x1a,0x70,0x79,0xaa,0x44,0x1a,0x10,0x7f,0xfb,0xe9,0x30,0x81,0xc7,0xa2,0x81,0x41,0x49,0x41,0x4e,0x42,0x5f,0x8a,0x9b,0x10,0xe2,0xdc,0xd9,0xdf,0xbd,0x61,0x29,0x72,0xa5,0x39,0xb7,0xf6,0x9f,0x4e,0x98,0xb8,0x04,0xae,0xd7,0xda +.byte 0x9a,0x9f,0x08,0xb8,0x2c,0x40,0x14,0x6d,0x01,0xb7,0x86,0x58,0x55,0x42,0xe5,0xdb,0x5f,0x4a,0xef,0xd8,0xed,0xdf,0x3b,0x24,0x1c,0xe4,0xb1,0x73,0xd1,0xce,0x29,0x96,0xde,0x8e,0xf3,0x1d,0x8d,0x75,0x57,0xd3,0x9a,0xf8,0xff,0x1a,0x4c,0x0c,0x47,0x82,0x83,0x73,0x34,0x43,0x55,0xfa,0xf2,0xd4,0x38,0xed,0xde,0x6d,0x24,0x55,0x90,0x06 +.byte 0xd6,0x03,0x52,0x28,0xc7,0x38,0x4a,0x16,0x95,0x4d,0xf4,0x46,0x56,0xf7,0x63,0x1f,0xe4,0xa9,0x51,0xc6,0x0b,0x85,0x42,0x40,0x8e,0x49,0x1e,0xc2,0xab,0xeb,0xda,0x99,0x26,0xf6,0x6e,0x00,0x8f,0x26,0x82,0xef,0x03,0xb0,0xd4,0xdb,0x54,0x46,0xdf,0xdc,0x23,0xaf,0xa8,0x6a,0x9f,0xb7,0xf9,0x41,0x07,0x5e,0x2d,0xcf,0x85,0xfd,0x9c,0x46 +.byte 0x30,0xb9,0x14,0xca,0xe2,0x30,0x12,0x06,0x88,0x08,0x05,0x2c,0x9a,0x4b,0x52,0x98,0xa9,0x99,0xd7,0xca,0xb5,0x1e,0x60,0x44,0xd9,0x5c,0x19,0x42,0xbe,0xa5,0x04,0xfd,0x7a,0xfc,0xb9,0xdf,0xd6,0xe3,0x6d,0x02,0xe3,0x96,0xf6,0xae,0xf3,0x78,0x1d,0x90,0x6d,0x86,0x17,0xf7,0xb7,0x6b,0x1d,0x52,0x32,0x5b,0xc0,0x31,0xaf,0x09,0x90,0x5e +.byte 0x81,0x75,0x17,0x47,0x6b,0x5e,0x9a,0x40,0xa5,0xa8,0x84,0x60,0xdc,0xdb,0xd2,0x89,0xcd,0xb2,0x72,0xf4,0x74,0xda,0x5d,0x34,0xf8,0xc6,0x1b,0x26,0x3e,0x8b,0xc7,0x73,0xf9,0x0c,0x93,0xf4,0x40,0x02,0xe0,0xed,0xe5,0xa0,0xae,0x91,0x03,0x85,0xa8,0x2f,0xe2,0x72,0xfe,0x17,0x7d,0x2b,0xa6,0x39,0x10,0x80,0x4c,0x58,0xaa,0xd8,0x22,0x7d +.byte 0x2f,0xbf,0x0c,0x40,0x48,0xfa,0xbe,0x40,0x4c,0x32,0x96,0x69,0xa5,0xab,0x0b,0x1e,0x33,0x9b,0xcf,0xe6,0x4e,0x2b,0x41,0x5a,0x21,0x23,0xa1,0xbb,0xd3,0xd6,0xd1,0xfd,0xbd,0x55,0xfc,0x92,0x92,0xcb,0x4b,0x72,0x39,0x8b,0xeb,0x72,0xdd,0xf7,0x77,0x43,0x52,0x2f,0x99,0x14,0x6e,0x41,0xce,0x1d,0x57,0x2c,0x09,0xd2,0x18,0xec,0x1b,0x89 +.byte 0xa0,0xe9,0xfe,0x1e,0x41,0xda,0x0f,0x76,0x02,0x38,0xec,0x9a,0x30,0xb7,0x5a,0x54,0x70,0xbc,0xe8,0xfa,0x06,0xd0,0x80,0xfb,0x27,0xd2,0xd8,0x00,0x80,0x65,0x9d,0x23,0xfd,0xad,0x26,0xb8,0xdc,0x09,0x4f,0xfb,0x52,0xcd,0xe4,0x41,0x68,0xca,0xdd,0xbc,0x2a,0x62,0xeb,0xa6,0x32,0x71,0xb0,0x08,0xb6,0x9f,0x3e,0x74,0xfe,0xb0,0xd4,0x9d +.byte 0x9e,0x6c,0x50,0x96,0x8a,0xde,0xd6,0xe9,0xde,0x2c,0xa6,0xf0,0x9f,0x67,0x00,0x50,0x0a,0x8c,0xe5,0xc2,0x37,0xcc,0xf0,0x53,0xeb,0x72,0xf2,0x87,0x77,0xee,0x80,0xe8,0xb2,0xa1,0x13,0x52,0x70,0xe6,0x8f,0x70,0x17,0x90,0x60,0xcb,0xac,0xb2,0x72,0xef,0xd9,0xb5,0xc3,0x68,0x57,0xdf,0x2d,0xcb,0x5a,0x35,0xf9,0x2e,0xfb,0xef,0x6e,0x77 +.byte 0x5d,0x21,0x37,0x4b,0x36,0x9b,0x3f,0x03,0x65,0xc9,0x84,0xb1,0x12,0x99,0xd1,0x6b,0x00,0x71,0x37,0xc7,0x57,0x82,0x44,0x7f,0xe1,0x81,0x24,0x70,0x96,0xd5,0x27,0xba,0x36,0xf7,0x25,0xc6,0x1c,0x7c,0x1b,0xdb,0xa3,0x6a,0x3e,0xb9,0x69,0x78,0xf7,0x51,0x46,0xe2,0x74,0xd3,0xfc,0xef,0x58,0x63,0x53,0x1d,0xd7,0xd0,0x8a,0x6a,0xd3,0xb0 +.byte 0xb9,0xbb,0xba,0x43,0xbf,0x8b,0x6b,0x04,0xd2,0xb1,0xe8,0xd1,0x72,0x3f,0xdc,0x2b,0x01,0xa6,0x2f,0x9c,0x7d,0x65,0xa1,0x9f,0x9b,0x4d,0x70,0x26,0x11,0x4c,0xb2,0xe1,0x01,0x0e,0x78,0xf2,0x32,0x87,0x2d,0x8e,0x95,0x02,0x76,0xca,0xe5,0x71,0x5f,0x36,0x35,0xb9,0xbb,0xc3,0xdf,0xf3,0x1e,0x1a,0x7a,0xe4,0x2c,0xdf,0x64,0x5d,0x96,0x12 +.byte 0xea,0x5c,0x14,0x73,0xa0,0xf1,0xbc,0xa9,0x6e,0x30,0x8a,0x47,0xf0,0x4b,0x9b,0x4c,0xc5,0xb0,0xbe,0x15,0x32,0x1b,0xde,0x0c,0x39,0x6a,0x6d,0x4e,0x3b,0x69,0x4c,0xb4,0x1f,0x56,0xf0,0xa1,0xb1,0x8c,0x29,0x5c,0x87,0x54,0xf2,0x5b,0x51,0x03,0x20,0x70,0x90,0x38,0x66,0x07,0xcc,0xd7,0xde,0x96,0x40,0x82,0xee,0xb5,0x87,0x2a,0x86,0xec +.byte 0x66,0x09,0xb7,0x4a,0xfe,0x4e,0x92,0x89,0x07,0xde,0x35,0xc4,0x6e,0x91,0x25,0xfd,0x18,0xfa,0xd9,0x8f,0xa7,0xa6,0xa7,0x6b,0x32,0xba,0xd3,0x1c,0x90,0xb9,0x8a,0x6c,0x9f,0x3f,0xb5,0x16,0x81,0x81,0xee,0xd7,0x55,0xc1,0x41,0x62,0xfd,0xe9,0x4c,0x5d,0xd7,0x70,0xdd,0xc6,0x4a,0x2b,0x42,0x77,0xe7,0x74,0xed,0x02,0x80,0x0d,0x7c,0x73 +.byte 0x8e,0xf0,0xd3,0xb0,0x20,0xbb,0xc8,0x82,0x06,0xdd,0x56,0x64,0xcb,0x9c,0xda,0xa1,0xa9,0x92,0xbc,0x8c,0x65,0x03,0xcd,0x68,0x87,0xa2,0x94,0x41,0x3c,0x36,0x96,0x1f,0xa4,0xd2,0x6d,0x5d,0x9f,0x2d,0x0c,0xf9,0x8a,0x82,0x19,0x93,0x47,0x62,0x71,0x8e,0x59,0xaa,0xf1,0x87,0xe0,0xb8,0xab,0x10,0x7f,0x4e,0xa8,0xa3,0xe2,0x32,0x58,0xb0 +.byte 0xcf,0x12,0xc0,0xf8,0x94,0x4a,0x61,0x36,0xdc,0x2d,0xb5,0x91,0xf9,0x0f,0x7d,0x91,0xd3,0xc7,0x03,0x8a,0xae,0x5c,0x22,0x8c,0x60,0x30,0xf4,0x71,0x51,0x00,0xf5,0x5d,0xe9,0x37,0x6c,0xae,0x64,0xff,0x45,0x35,0x4b,0x47,0x08,0xca,0xda,0x7b,0xe9,0xef,0xcb,0x27,0xcb,0x7e,0x3c,0xa6,0xd2,0x38,0x54,0x74,0xc3,0x7c,0xf8,0x71,0xb7,0x47 +.byte 0xe9,0xe0,0x43,0x03,0x3b,0x41,0x57,0xc3,0xda,0xa1,0xcb,0x64,0xb1,0x31,0x0d,0x12,0x45,0x3a,0xa0,0xad,0x6b,0xc7,0x26,0x62,0x50,0xcf,0x94,0x5a,0x30,0x8d,0xf6,0x91,0x49,0x9e,0xd5,0x84,0x0e,0x0c,0xe3,0x47,0x08,0x7f,0xa1,0x54,0x78,0x1b,0xa8,0x2c,0xbc,0x12,0x4f,0x7e,0x53,0x1b,0xca,0xfb,0x09,0x35,0xe0,0x9c,0x15,0xea,0xf6,0x3e +.byte 0xb2,0x20,0x9e,0x2c,0x81,0x6f,0xa4,0xb5,0x6b,0x04,0x6d,0xd1,0x90,0x66,0x46,0xdc,0x4b,0x71,0x7e,0x4b,0x3f,0xd6,0xe1,0xa8,0xc0,0xa7,0x45,0x85,0xe3,0x98,0x30,0xda,0x23,0x68,0x55,0xd8,0x96,0xb1,0xcc,0xeb,0xe1,0x95,0x0b,0x20,0xf3,0x4c,0xf2,0xc5,0xfa,0x0e,0xca,0xf5,0xc9,0xb3,0xd7,0xb4,0x1b,0x9f,0xef,0x82,0x56,0x4c,0xc5,0xa5 +.byte 0x21,0xda,0xcc,0x19,0x69,0x68,0xcb,0x37,0xb2,0x0c,0x73,0xb1,0x13,0x61,0x6b,0xca,0xda,0xfc,0xf7,0x1c,0xbc,0xd1,0x72,0x56,0xb8,0x7d,0xa1,0xef,0xc4,0x32,0x38,0xa3,0xdb,0x8b,0x2d,0x0a,0xce,0xcb,0x86,0x51,0x60,0xd2,0x47,0xf0,0x97,0x58,0xd8,0xa5,0x12,0x77,0xfc,0x32,0x04,0x29,0x61,0xfc,0xab,0xc2,0x42,0x86,0xd9,0x57,0x80,0xad +.byte 0x00,0xf0,0x9a,0x2a,0xac,0x52,0x27,0xd6,0xf8,0xd6,0x38,0xc8,0xfc,0xc1,0xab,0x4f,0x41,0xbf,0x8e,0x60,0x20,0xeb,0x24,0x36,0xd8,0xd8,0x25,0x6f,0xc8,0x5d,0x6b,0x00,0xdd,0x7a,0xe2,0x37,0xe4,0x13,0xd0,0xaa,0x5c,0x56,0x32,0x98,0x00,0x4b,0x8a,0x81,0xb1,0xfa,0xe8,0xf3,0xfa,0x0d,0xbb,0x66,0x6e,0x24,0xfd,0x3c,0x50,0x63,0x3a,0xf1 +.byte 0x72,0x63,0x18,0x71,0x6d,0xee,0x6f,0xf1,0x0e,0x1f,0x9e,0x9d,0x87,0x12,0x5c,0xdf,0x1d,0x9e,0xc0,0x0b,0x39,0x0e,0xd6,0x56,0x79,0x30,0xcb,0x07,0x7b,0x88,0xa5,0xbe,0xfd,0xd4,0x49,0xcc,0x92,0x6a,0xcc,0x78,0x1e,0xaf,0xee,0x89,0xc8,0x51,0x08,0x98,0x14,0x20,0xe5,0x52,0x93,0x18,0x6f,0xbb,0xdc,0xb2,0x68,0x14,0xd1,0xdb,0xe8,0x56 +.byte 0x24,0xd0,0x34,0xab,0xa6,0xfa,0xfe,0x72,0x5a,0xe3,0xe1,0x87,0x0d,0xf4,0xfa,0xa6,0xa6,0x6c,0xb6,0xcb,0xf8,0xfc,0x59,0xac,0xd9,0xb0,0xcd,0x15,0xa4,0x37,0x73,0x6e,0x70,0xc9,0x74,0xef,0x87,0x78,0x61,0xc2,0xd0,0x52,0x51,0xa9,0x2c,0xdb,0x9d,0xd9,0x3d,0xac,0xcd,0x52,0x39,0x69,0x2d,0x2a,0x4f,0xf3,0xb2,0x69,0xb9,0x01,0x3c,0x57 +.byte 0xeb,0x1b,0x0e,0x87,0xe9,0x42,0x58,0x83,0x6b,0xbc,0x72,0xc8,0x46,0x32,0x42,0x17,0x6a,0x19,0xa0,0xb3,0xf1,0x1c,0x96,0x9c,0x11,0x09,0x8b,0xc1,0x9e,0xe9,0x7f,0x18,0x8e,0xca,0xea,0x24,0x1b,0xce,0x12,0x57,0x1d,0x34,0xbe,0x60,0x60,0x2c,0xd8,0xa0,0x61,0x73,0xd6,0xf8,0xaf,0x15,0x26,0x84,0xd7,0xec,0xc0,0xbe,0x7e,0xa1,0xa8,0xba +.byte 0x2b,0xcc,0x20,0x67,0x6e,0xea,0x48,0x79,0x23,0xea,0x14,0x36,0x85,0x0a,0x56,0x3a,0xcd,0x5b,0x51,0xa4,0xf5,0x92,0x49,0xc2,0x55,0x62,0xed,0x88,0xde,0xd0,0x0c,0x01,0x36,0xb9,0x2e,0x94,0x80,0x75,0x8a,0x21,0x0a,0x07,0x45,0x68,0xd8,0x9d,0x49,0x7b,0xa7,0xb2,0x84,0xfa,0x3c,0xc4,0xd5,0x59,0xf9,0xc3,0xff,0xcf,0xe4,0x5f,0xea,0xbb +.byte 0x0f,0xae,0x7d,0x96,0xd3,0xe9,0x38,0xd1,0xb1,0x02,0xf6,0x4b,0x95,0x43,0x1c,0x69,0xa6,0x99,0xf5,0xdb,0x46,0x62,0xea,0x69,0x5a,0x08,0x2d,0x01,0x11,0xed,0x70,0x03,0x60,0x54,0xba,0x32,0x2c,0x0e,0x44,0x1f,0x8d,0xee,0x2e,0x39,0xab,0xc0,0xd4,0x88,0x11,0xef,0x07,0x3a,0x47,0xb9,0x6e,0x0c,0x22,0x9a,0xf3,0x89,0x01,0xfb,0xb8,0x2d +.byte 0x52,0xa0,0x42,0x4c,0xb3,0x9e,0xf5,0x4b,0x0c,0x78,0x0a,0x3b,0x29,0xae,0x4a,0xc0,0xb2,0xa3,0xc0,0x0d,0x38,0x07,0x49,0x9c,0xda,0x7c,0x48,0x81,0xba,0x53,0x0d,0x0d,0x78,0x8c,0xac,0x9b,0x3d,0x1f,0xaa,0xc1,0x32,0x54,0xca,0x54,0xe1,0xef,0x46,0x82,0x61,0xd0,0x88,0x04,0x53,0xb0,0x34,0xc2,0x23,0x9a,0x90,0xe3,0x73,0x9c,0x0d,0x46 +.byte 0x61,0xe5,0xc0,0x42,0x87,0x4a,0x3b,0x3a,0xf9,0xab,0xbe,0x4c,0xba,0x2f,0x88,0x03,0x6b,0x52,0x25,0x8c,0x9b,0xc0,0x13,0xb6,0x80,0x09,0x85,0x97,0x64,0x6d,0x65,0xcd,0x18,0x42,0x00,0xdf,0x76,0x4d,0x67,0xbf,0x04,0x7a,0x5f,0x7e,0x3a,0x5c,0x6f,0x1d,0x12,0x5b,0xbe,0xd2,0xc8,0xe5,0x09,0x45,0x4d,0xae,0xed,0xd8,0x77,0xc5,0x6f,0xb6 +.byte 0x43,0x09,0xe2,0xee,0xc9,0x5a,0x76,0xc5,0xeb,0xdd,0x96,0x23,0xb9,0xe5,0xfc,0xf2,0x3c,0xe1,0x67,0x5f,0x1b,0x10,0x39,0x47,0x67,0x8b,0x48,0x32,0xd0,0xbc,0xa0,0xa8,0x3e,0xc3,0x30,0x21,0x18,0x54,0x49,0xfe,0x8a,0x14,0x7a,0xe5,0x6e,0xbe,0x70,0xec,0xf6,0x97,0xa0,0xa4,0xf4,0xdd,0xaf,0xf2,0xde,0x50,0x1a,0x68,0xb9,0x1a,0x4b,0x37 +.byte 0xf8,0x29,0x16,0x4f,0x8c,0xa5,0x9e,0xd2,0x72,0x7f,0xf6,0x6b,0x7d,0xac,0xe4,0x17,0x93,0x39,0x8f,0xd9,0xdf,0x50,0x1f,0xce,0xf5,0x58,0xdd,0xcd,0xc2,0xb9,0x64,0xfc,0xad,0x8a,0x3c,0x2e,0x52,0x58,0x91,0x3b,0x78,0xb4,0xfd,0x4a,0x3b,0x13,0x5d,0x20,0xd5,0xdf,0xe7,0x52,0x3d,0x4c,0x2f,0x02,0x30,0xfc,0x24,0x17,0x99,0x6e,0x4b,0xfe +.byte 0x1d,0xf0,0xe6,0x86,0x32,0x37,0xb5,0xd5,0x09,0xa3,0xa5,0x3b,0xc1,0x88,0x9f,0x01,0x57,0x12,0x03,0x1d,0x60,0xd8,0x57,0xba,0xc6,0xfc,0xda,0xab,0x02,0xbe,0xab,0x89,0xf9,0x08,0x63,0xbd,0x42,0x11,0xf7,0xbf,0xd3,0x45,0x2b,0xa5,0x34,0x91,0x18,0xb9,0xb3,0x79,0xb4,0x15,0xa1,0x01,0x1a,0xf9,0x74,0x91,0x08,0x94,0xb2,0xf3,0xb2,0xca +.byte 0x0a,0x3a,0x4f,0x42,0x8a,0x16,0xf7,0x9e,0xbf,0x27,0x72,0x7b,0xff,0xd3,0xb9,0x4e,0xf5,0x8e,0x68,0xb5,0x91,0x23,0xef,0xeb,0x5d,0x7d,0xd8,0xc9,0xda,0x07,0x33,0xc9,0x1c,0x4a,0x7a,0xf2,0x72,0x64,0xb3,0x35,0x2e,0x54,0xec,0xc4,0xd9,0xee,0xea,0xda,0xfe,0x8b,0x1c,0x21,0x93,0x52,0x95,0x7c,0x2d,0xfe,0x56,0x05,0xdd,0x57,0x37,0xf2 +.byte 0x54,0x1c,0xe2,0x6c,0xc0,0xaa,0x71,0x67,0xdd,0x73,0x43,0x17,0x3e,0x76,0xdb,0x60,0xb4,0x66,0x62,0xc7,0x74,0x08,0x91,0x1f,0xd5,0x4c,0xa9,0xd0,0x34,0x33,0xea,0xb0,0x2c,0x0a,0x88,0xda,0xf7,0xca,0x91,0xf6,0x5f,0x9e,0x72,0xf6,0x18,0xf9,0x19,0x9d,0x84,0xf8,0x4c,0xe1,0xeb,0x45,0x29,0xaa,0xf2,0xa6,0xfd,0x64,0xf9,0x0b,0xfe,0x09 +.byte 0x1c,0xc2,0xde,0x19,0xdd,0x0f,0x02,0x16,0x65,0x70,0x33,0xd4,0x32,0x67,0x7b,0xc4,0xbb,0x11,0x60,0x4f,0xc3,0x4d,0x29,0x23,0x7e,0x84,0x58,0x51,0x43,0x7e,0x25,0x4f,0x3d,0xd4,0xe0,0x20,0x79,0xfd,0xce,0x59,0x49,0xf8,0xd1,0x53,0xca,0x2d,0x66,0xec,0xe5,0x7f,0xc8,0x14,0x06,0xc1,0x96,0x40,0xf2,0x61,0xa7,0x1b,0xf9,0x5e,0x97,0xfe +.byte 0x62,0x57,0x05,0xcc,0x6f,0x26,0x4b,0xa6,0x40,0x33,0x72,0x20,0xd3,0x1e,0x2b,0xb2,0x60,0xe7,0x56,0xda,0x87,0xd3,0xb4,0x5a,0x73,0x04,0xc9,0xc2,0x68,0xe3,0x18,0x74,0xd9,0x46,0x74,0x31,0xf4,0xf4,0xab,0xc4,0x0a,0xbc,0x66,0x4e,0x23,0x5f,0x92,0x7c,0x0a,0x81,0xdd,0xcc,0x79,0xee,0xb3,0x3d,0xc0,0x91,0x81,0xd0,0x79,0x39,0xd2,0x69 +.byte 0x5d,0xdc,0xc1,0x5c,0x61,0xb9,0x5e,0x87,0x32,0x73,0x70,0xd0,0xa8,0x7d,0xb5,0xd0,0xfc,0xf4,0xb6,0x55,0x9f,0x1f,0x8a,0xec,0xf4,0xb0,0x47,0xeb,0x3b,0x68,0x80,0x0b,0x79,0xd0,0x71,0x99,0xb1,0xd0,0xed,0x1f,0x9f,0x6c,0x2d,0x9d,0xae,0x1c,0x62,0x3b,0xec,0x3e,0x2f,0xb4,0x6f,0xbb,0x2e,0x1e,0xa9,0x7c,0xe8,0x5d,0x14,0x7d,0x0d,0x17 +.byte 0x6d,0x9c,0x54,0xce,0x64,0x93,0x8e,0x3b,0xa4,0xa9,0xfb,0xd9,0x44,0x06,0xbb,0xb8,0x7f,0xdf,0xd3,0xc2,0xa2,0xcf,0x5a,0xa2,0xa7,0xbb,0xb5,0x08,0xe2,0x67,0xdf,0x0e,0x4e,0xc6,0xcf,0x0a,0x79,0x1e,0xa5,0x60,0x1a,0x81,0xb1,0x8e,0x1b,0x27,0x7f,0x8d,0x28,0x50,0xa7,0x4a,0xe4,0x4b,0x61,0x6b,0xa9,0xfa,0xaf,0x82,0x83,0xfb,0x1f,0x2e +.byte 0xfa,0xce,0x18,0x0e,0x32,0x5f,0x5a,0xcf,0xac,0xaf,0x22,0x30,0x16,0xd7,0x97,0x99,0x0d,0xb8,0x92,0xa5,0x1d,0x44,0xb2,0xa5,0xc7,0x74,0xd2,0x81,0x8d,0x5c,0x38,0xda,0x9f,0x76,0xcb,0x47,0x6c,0xb7,0x08,0xd9,0xc1,0x52,0xd0,0x64,0x0a,0xf9,0xdd,0x3e,0xe8,0x99,0x15,0x4d,0xcb,0x7b,0x25,0x53,0x8c,0x13,0xb1,0xbf,0xb7,0xca,0x2d,0xce +.byte 0x71,0x48,0xee,0x5b,0x3a,0x01,0x5b,0xfd,0x22,0xfa,0x6f,0x17,0xcb,0x52,0xcc,0x0a,0x2b,0xbb,0x6d,0xce,0x2d,0x00,0xf5,0x9e,0x0d,0x58,0xf1,0xf4,0xa4,0x9f,0x13,0xf9,0x68,0x15,0xd7,0x02,0x41,0x6c,0x19,0x6b,0x66,0x9a,0x74,0xee,0xb4,0xb3,0xc7,0xec,0x60,0x19,0xbd,0xbb,0x97,0x22,0x7c,0x4e,0xe6,0xc6,0x00,0x03,0xa5,0x36,0x52,0xec +.byte 0x21,0xcf,0xc8,0xda,0x2c,0x14,0xa9,0xd8,0x75,0xab,0xea,0x05,0x8c,0x24,0x28,0x63,0xbd,0x58,0x35,0xd7,0x95,0xcb,0x14,0x89,0x04,0x99,0x7e,0x67,0x0d,0x07,0x35,0xdb,0x17,0x7c,0x72,0x2d,0xbc,0x89,0x9b,0xb4,0x16,0x21,0x2f,0x90,0xe8,0x8f,0xeb,0xc3,0x8d,0x86,0x0d,0x92,0xf6,0x4b,0x80,0x36,0x96,0x6b,0xd8,0x95,0x7b,0xad,0xe8,0xbf +.byte 0x77,0x9e,0xf4,0x93,0xcd,0xa5,0x06,0xbc,0x38,0xf2,0x57,0x25,0x54,0xfa,0x8e,0x19,0x8e,0x25,0x8e,0x3c,0x28,0xaa,0xf2,0x02,0x30,0xd4,0x47,0x89,0x36,0xb9,0xb7,0x01,0x5f,0x0c,0xd1,0x8d,0x93,0x7e,0xf0,0xf0,0xff,0x2f,0x8f,0xb5,0x97,0xa7,0x02,0xe8,0x9b,0xf2,0x51,0xe6,0x51,0x62,0xa5,0x27,0x26,0xc6,0x7a,0x39,0x7a,0xa9,0xaf,0x1e +.byte 0x03,0xd5,0x25,0xbe,0x3b,0x19,0x46,0xc4,0xdd,0xd6,0x5e,0x6a,0x18,0xc0,0x41,0x5f,0x53,0x89,0xd3,0x16,0xfb,0x3a,0x10,0xce,0x0d,0x8c,0x04,0x4c,0xcf,0xab,0xb9,0x0d,0x6c,0x45,0x6c,0x29,0xed,0x77,0x37,0x1f,0xd8,0x10,0x8a,0xfe,0x07,0xbd,0x7e,0xd7,0xa6,0x6b,0x80,0xde,0x3e,0x2c,0xa8,0xb1,0x38,0xcc,0xab,0x10,0x69,0x8f,0x58,0x3d +.byte 0x12,0xc7,0x9c,0xc1,0x0a,0xeb,0x3d,0x5e,0xf1,0x65,0xc6,0x09,0xcb,0x4b,0x09,0x24,0xa7,0x56,0x1d,0x1d,0x4c,0xd7,0x06,0xbd,0xe2,0x72,0x70,0xae,0x7e,0xe9,0xaa,0x97,0x6d,0xec,0xcb,0x55,0x0b,0x5d,0x45,0x3a,0x25,0x3d,0x52,0x0f,0x48,0x2f,0xe4,0xd0,0x5e,0x85,0x87,0xb6,0xa7,0x70,0x2f,0x9c,0x19,0x89,0x95,0x45,0x76,0x00,0xfe,0x27 +.byte 0xff,0xf8,0x73,0x59,0xba,0x98,0x92,0x4e,0x76,0x1a,0x90,0x1d,0xbc,0x1b,0xae,0x44,0xb6,0x63,0x86,0x4c,0x3c,0x8a,0x8f,0x3e,0x03,0x95,0x50,0x30,0xd8,0x0f,0x7f,0x6f,0xb6,0xe9,0xbe,0x2e,0xc9,0x55,0xe7,0x73,0xd6,0x77,0xdc,0xbc,0x67,0x54,0x31,0x47,0x30,0x46,0xe1,0xa4,0xf8,0xf3,0x90,0x4f,0x68,0x5a,0x52,0xe2,0xe7,0xdb,0xd9,0xfd +.byte 0xf6,0x36,0x2a,0xc1,0xdb,0x35,0x82,0x69,0xff,0xf9,0xea,0x53,0xff,0xcd,0x21,0x2c,0x26,0x79,0xd6,0x8c,0x74,0xe7,0x9e,0x85,0x1a,0x04,0xf5,0xed,0x89,0x16,0xf5,0xd7,0xf1,0x89,0xf1,0xb3,0x5b,0x47,0x42,0xcb,0x92,0x2e,0x70,0xf6,0x3e,0xfc,0x20,0x87,0x70,0xec,0x30,0x16,0xcc,0x88,0x64,0x13,0x58,0xf1,0x0d,0x17,0x90,0xc4,0xdb,0x07 +.byte 0xf5,0xe3,0x34,0x31,0x10,0x9c,0xa4,0x6a,0x4a,0xe6,0x6c,0x80,0x49,0x07,0x23,0x21,0xd6,0xf1,0xcb,0x4a,0xd1,0xb5,0xb7,0x63,0x94,0x4c,0x0a,0xce,0x90,0xf2,0x63,0x31,0x4f,0x96,0x6c,0x5d,0x3e,0xaa,0x10,0x20,0xd6,0xb6,0xbe,0xfa,0x3f,0x83,0xbc,0xa8,0x08,0x38,0xec,0x38,0xe4,0xe9,0xf5,0xb3,0x8e,0x32,0x31,0xcd,0x7c,0x08,0x98,0xf6 +.byte 0x0f,0x8a,0x8f,0xc1,0xd8,0x9e,0x05,0xb6,0x74,0x11,0x94,0xef,0x4f,0x8f,0xa1,0xc6,0x8c,0xdb,0xc3,0x27,0x4e,0xa3,0x30,0x94,0xf5,0xe8,0x2a,0x18,0x0a,0x51,0x9b,0x79,0xb2,0x1f,0xc3,0xa0,0x26,0xa9,0xf5,0xc4,0x9e,0x39,0xda,0x6a,0x53,0x8f,0x8c,0x4c,0x54,0x50,0x81,0xa0,0x0a,0xd3,0x7c,0x99,0x91,0xc7,0x3e,0x56,0x7d,0x53,0x8c,0x3c +.byte 0x51,0x44,0xa5,0x22,0x9d,0xd2,0x9b,0x13,0xcf,0xb8,0x0c,0xb8,0xd4,0xaa,0xb4,0xaa,0x8d,0xab,0x7c,0x06,0xca,0xbb,0x85,0xac,0x01,0xee,0xef,0xe7,0x74,0xd5,0x0d,0x64,0x91,0x1c,0xde,0x6c,0x05,0x37,0x1e,0x23,0x05,0x7e,0x38,0xdc,0x17,0xaf,0xa7,0x95,0x85,0x1f,0xaf,0xc8,0xe1,0xc2,0xda,0xda,0xf1,0x14,0x56,0x66,0x68,0x70,0x36,0x38 +.byte 0x7b,0xb8,0x22,0x9f,0xc4,0xeb,0x5d,0x76,0x97,0xc5,0xa3,0xb9,0x06,0x86,0x4f,0x20,0xab,0x7d,0xce,0x7d,0x78,0x59,0xc5,0x1f,0x73,0x81,0xf6,0x6d,0xb4,0xcc,0x10,0xc5,0x4d,0xe3,0x81,0xaf,0xbc,0x37,0x42,0x28,0x5f,0x51,0x1e,0xaa,0xc7,0x81,0x20,0xc3,0x89,0x35,0xf1,0x74,0x3a,0xe8,0x04,0x24,0xef,0x8b,0x70,0xe1,0x74,0xdf,0x87,0xd5 +.byte 0x3c,0x32,0x32,0x7d,0x03,0xd7,0xda,0x6d,0x8b,0x25,0x8d,0x11,0xa3,0xc2,0x27,0xdc,0xa3,0xfc,0xdf,0x70,0xa4,0x41,0xad,0xda,0xce,0x12,0x45,0x14,0xa1,0x96,0x16,0xd8,0x54,0x89,0x9e,0x78,0x7f,0x23,0x12,0xd1,0x15,0x08,0x7f,0xbd,0xf0,0x9a,0xf1,0x5b,0x07,0xd5,0xbc,0xab,0xab,0x15,0xae,0xda,0xf1,0x26,0x12,0x4e,0xd6,0x6c,0x35,0xc1 +.byte 0x6e,0x27,0x4d,0xa8,0x71,0x51,0x1e,0xae,0xa8,0x35,0x26,0x06,0x18,0x03,0xd8,0xae,0x9e,0x8b,0x07,0x30,0x10,0xfb,0x47,0x05,0x02,0xcc,0x0a,0xbd,0x57,0x43,0x15,0x0a,0x7a,0xb5,0x30,0x0b,0xa6,0x3c,0xa8,0xc9,0xf5,0x68,0xe1,0xfb,0xd1,0xe0,0xe7,0x44,0x6c,0xb4,0x44,0xb6,0xd1,0x2b,0x30,0x5e,0x17,0x89,0x40,0xcc,0x10,0x8f,0x97,0x8a +.byte 0xf3,0xf4,0x52,0x55,0xc4,0x8e,0x46,0xe5,0x24,0x0b,0x2a,0x5d,0x84,0xc1,0x4e,0xa8,0x5a,0x53,0xa8,0xce,0xc6,0x3f,0xa2,0xaa,0x3a,0x8f,0x51,0xed,0x4c,0xa6,0x34,0x6a,0x8c,0x18,0x9b,0x36,0x49,0x40,0x34,0xa3,0xe4,0xd8,0x3c,0x8a,0xfc,0x41,0xc9,0x35,0xfe,0x6e,0x3e,0x29,0xbc,0x04,0x61,0xaf,0x04,0x03,0x43,0x79,0xb5,0x77,0x27,0x25 +.byte 0xbe,0x85,0xc9,0x56,0xa4,0x17,0xc4,0x27,0x3d,0x53,0x1b,0x49,0x86,0xb2,0xb6,0x52,0x62,0x12,0x5d,0xe9,0x47,0x6f,0x65,0x78,0xf8,0x95,0x63,0xbc,0x73,0x6d,0xa6,0xb9,0xcd,0x17,0x39,0x56,0xb0,0xab,0x3a,0x15,0x5f,0x9a,0x98,0xfb,0xcd,0x51,0x4a,0x35,0x21,0xaf,0x07,0x4a,0x3d,0xfd,0x39,0x11,0x42,0xed,0xfc,0x7e,0x10,0x24,0xa5,0x0c +.byte 0xb2,0x4f,0x27,0xe4,0x78,0x32,0xfe,0xfc,0x8e,0x46,0x68,0xbb,0x2e,0x85,0x87,0x0f,0x01,0xde,0x1c,0x02,0xdd,0x82,0xa0,0x9e,0x30,0x31,0x8d,0x86,0x36,0x33,0xa6,0x59,0x16,0x78,0xae,0x1f,0x1d,0x27,0x0b,0x29,0x42,0x16,0x93,0x3b,0xe6,0xfb,0x8d,0xd5,0x48,0x42,0x61,0x39,0x5b,0xf7,0xea,0xd0,0x6f,0x67,0xd9,0x03,0x72,0xed,0x54,0xe1 +.byte 0xab,0x3f,0xa0,0xdc,0x4b,0x19,0xe6,0xe3,0xfe,0x5f,0x65,0x64,0x4c,0xa9,0x5c,0x52,0x36,0xb3,0x65,0x28,0x3e,0xe5,0x07,0x50,0xed,0xec,0x2f,0xc9,0xff,0x47,0x27,0xf6,0xfe,0xb8,0x60,0x60,0x52,0xe5,0xec,0x3c,0x4f,0x69,0x9f,0xaa,0x06,0x8a,0x99,0x9f,0xac,0xfc,0x0a,0x6f,0x8a,0xa4,0x0e,0x5c,0x58,0xb4,0x09,0xba,0x93,0x95,0x94,0x12 +.byte 0x9b,0x23,0x4f,0x93,0x28,0x6d,0xd0,0x76,0xfd,0xc9,0x87,0x3b,0xf1,0x8c,0x7d,0x56,0x84,0x5a,0x04,0x08,0x30,0xf7,0xf6,0x52,0x15,0xba,0xd6,0x7a,0x39,0x8c,0x5a,0xbf,0xeb,0x02,0x6d,0x31,0x30,0x92,0xbc,0xe2,0x07,0x21,0x16,0x96,0x70,0x66,0x00,0xe0,0x04,0xc5,0xa8,0xe4,0x08,0x6d,0x08,0x69,0x35,0xe2,0xb1,0x83,0x03,0x37,0xca,0xff +.byte 0x06,0x37,0x80,0xd5,0x1a,0xc5,0x31,0xfc,0x9a,0xb0,0x8a,0x4b,0x58,0xf3,0x00,0x4e,0xa4,0xfe,0x9e,0xe0,0x60,0xc7,0x3d,0x2c,0x52,0xb5,0x39,0xf0,0xa4,0x88,0x39,0x37,0xa5,0x26,0x8a,0xa3,0xe6,0x31,0xce,0xf3,0xa1,0x54,0x73,0xe7,0x69,0x38,0xef,0xa2,0xab,0x52,0x50,0x1a,0x45,0xcc,0x29,0x9c,0xb6,0xf4,0xde,0xc2,0xfe,0x7a,0x26,0xf7 +.byte 0x7a,0x6e,0x07,0xb6,0xd8,0x3f,0x77,0x60,0x35,0xae,0x6a,0x90,0xd6,0xb8,0x37,0xed,0x73,0x59,0x54,0xd9,0x0c,0x87,0x0e,0x81,0xef,0x69,0xc7,0xd4,0x8f,0x00,0x74,0x57,0x12,0xcf,0xa1,0x76,0xe8,0x45,0xf5,0x9a,0x4f,0xe2,0x5d,0x8a,0x89,0xb1,0x8b,0xea,0x9c,0x0a,0x1e,0x00,0x61,0x3b,0x66,0xbd,0xb5,0xd6,0xff,0xa3,0xff,0x52,0xc2,0x35 +.byte 0x81,0x05,0x08,0x2b,0xf9,0x52,0xda,0x74,0xd1,0x76,0x13,0xba,0x28,0x4c,0xb1,0xb1,0x82,0x5b,0x4e,0x79,0x39,0x22,0xf9,0x96,0x91,0x07,0x4f,0xf9,0xf2,0x25,0x25,0xb1,0x3e,0xda,0x07,0x5c,0x01,0x7b,0xfa,0x3e,0x95,0x92,0x1d,0xf8,0x44,0x06,0xc1,0xed,0x64,0x74,0x14,0x84,0x25,0xee,0x75,0xaf,0xe3,0x7c,0xd3,0xbe,0x7a,0x51,0x6b,0x80 +.byte 0x20,0x43,0x20,0x10,0x5f,0xf5,0xfc,0xd5,0xe8,0x06,0x43,0xad,0x10,0x6b,0x67,0x48,0xca,0xca,0x6e,0x3e,0x1c,0xdf,0x8f,0x7a,0x65,0xc8,0x5d,0xba,0x3b,0x67,0xeb,0x1f,0xc4,0x37,0xad,0xef,0x73,0x9e,0x18,0x8e,0xc1,0x99,0xaf,0x75,0xd3,0x91,0x73,0xc3,0x3a,0xb2,0xfe,0xff,0x30,0x81,0xc4,0x4f,0x37,0x37,0x23,0x96,0x17,0xf1,0xa2,0x9b +.byte 0x55,0x6e,0xd6,0xb3,0xc4,0x98,0xa3,0x32,0xb6,0xff,0x86,0x87,0x77,0xf4,0xad,0x16,0x3e,0xf0,0x24,0x01,0xb4,0x8e,0x1e,0x0f,0x10,0xa4,0x2e,0xe4,0x79,0xe6,0x88,0xe7,0x09,0x58,0x5e,0x97,0xad,0x0d,0x72,0x05,0xbf,0x2f,0x3f,0x99,0xee,0x8a,0x84,0xc3,0x62,0x43,0x52,0x6d,0xab,0x66,0xcf,0x9f,0x4e,0xf2,0x0d,0x13,0x15,0x49,0x84,0x5e +.byte 0x6c,0x8d,0x2d,0xef,0x53,0x16,0xa0,0x63,0xbe,0x05,0xb8,0x9b,0x23,0xca,0xca,0xb8,0xdd,0xbc,0x96,0x68,0x35,0x43,0x63,0x30,0x8e,0xaf,0x53,0x98,0xe2,0x76,0xe8,0x89,0x00,0x29,0x11,0x70,0xd5,0x94,0xbd,0x78,0xff,0xf6,0x88,0x4a,0x3d,0x99,0xd9,0x7e,0xdf,0xa8,0x33,0x92,0xa2,0xc0,0x32,0x42,0x73,0x08,0xd4,0x55,0x5d,0x18,0x93,0xca +.byte 0x7e,0x33,0xe3,0x51,0xc7,0xb7,0x24,0x62,0x69,0xf4,0xab,0x36,0xe3,0x22,0x10,0x9b,0xe0,0xbd,0x48,0x65,0x30,0x9c,0xfe,0xeb,0x3f,0x7f,0x22,0x67,0xcc,0x87,0x5a,0x71,0xb0,0xd1,0x19,0x82,0x1c,0xb2,0xf1,0x73,0xd2,0xd6,0x3f,0xef,0xe3,0x2f,0x25,0xf3,0x8b,0x21,0x4e,0xbf,0x0e,0xc1,0xd2,0x8a,0xbb,0x04,0xde,0xcf,0xd1,0x77,0xba,0xaa +.byte 0xc7,0x41,0x68,0xce,0xc4,0x64,0xf9,0x3a,0x2f,0x1c,0x0b,0x22,0xf8,0x60,0x09,0x76,0x31,0x88,0x62,0x3a,0xf3,0x49,0xe6,0xda,0x4b,0xd3,0xf3,0x35,0xaa,0x56,0x4c,0x2f,0x7f,0x03,0x3e,0xf8,0xcb,0x5e,0xed,0x37,0xa1,0x29,0xe8,0x20,0xf5,0x4a,0x32,0x73,0x30,0xfd,0xd1,0xf6,0xb4,0xa1,0x30,0x87,0xcb,0x21,0x63,0xf5,0x3a,0xad,0x05,0x1a +.byte 0x34,0xf5,0x32,0xf6,0x02,0xf3,0x10,0x52,0xfd,0x86,0x37,0x1f,0x5d,0xe4,0x2e,0x31,0xcb,0xb8,0x4c,0xeb,0xdd,0xea,0x01,0x0d,0x94,0x13,0xa8,0x8f,0xf0,0x52,0x4e,0x0d,0x4f,0xd1,0x24,0xeb,0x0f,0x2b,0xb1,0xaa,0xc5,0xc8,0x52,0xb9,0xbe,0x21,0x48,0x2a,0x53,0x98,0xe4,0x00,0x72,0x64,0xdb,0x44,0x48,0x36,0x60,0xe7,0x81,0xdc,0x25,0x85 +.byte 0x4d,0xaf,0xa8,0x0d,0xfb,0x07,0x76,0x4f,0x6a,0x30,0x3c,0x7c,0x3b,0x36,0xa9,0xf8,0xae,0x81,0x03,0xe9,0x19,0xdf,0xdb,0xd9,0x7f,0x59,0xe0,0xd7,0x50,0x14,0x9f,0x67,0x3d,0xc7,0xdf,0xa8,0x44,0x86,0x29,0x81,0x65,0x44,0x9e,0x37,0x27,0xdd,0x2f,0x33,0x59,0xf7,0xaa,0x17,0x34,0x8c,0x1c,0xa7,0x8e,0x06,0x46,0xf1,0x43,0x87,0xa9,0xb7 +.byte 0x85,0xec,0x92,0x0d,0xdd,0x78,0x55,0x99,0xfb,0x1c,0x66,0x85,0x0d,0x59,0x31,0x00,0xbc,0xd9,0x9b,0xbb,0xfb,0xfc,0xb2,0x36,0x3c,0x34,0x8f,0x4a,0xb6,0x74,0x9c,0x32,0x6f,0x69,0x6c,0x3e,0x68,0x7e,0xec,0xeb,0x58,0x6a,0xf5,0xa2,0xbb,0x04,0x68,0xdb,0x8c,0xf0,0x04,0xba,0xf7,0xf7,0x50,0xd0,0x60,0xba,0x45,0x73,0x0f,0x2c,0x2f,0x97 +.byte 0x58,0xcc,0xa2,0xbe,0xfe,0x5e,0xf9,0x44,0x03,0x8b,0x99,0x56,0xb0,0x4f,0xe1,0xd0,0xa5,0x9f,0xd1,0xfc,0x95,0x44,0x4b,0x01,0x24,0xc0,0x4c,0x91,0xc1,0xb5,0x99,0xe7,0x5f,0x2f,0xcf,0x5d,0x4f,0x64,0x6e,0x54,0x51,0x0c,0x35,0x5f,0xa8,0x7b,0x27,0xa0,0x7d,0xb1,0x90,0xc2,0xdd,0x50,0xef,0x09,0x6f,0xed,0x25,0x6b,0xf5,0x6f,0xc1,0x97 +.byte 0xea,0xd5,0x49,0xf5,0x40,0x60,0xc3,0xbb,0x0d,0x82,0x15,0xa5,0xf7,0xfe,0xa1,0x20,0x13,0x9e,0xbb,0x43,0x58,0xba,0xd2,0xe8,0x89,0xaa,0xfc,0xe0,0x47,0x6b,0xac,0x91,0x8b,0xeb,0x4f,0xf5,0xda,0xf5,0xc8,0x11,0x64,0x7c,0x8d,0x43,0x92,0xf2,0x84,0xeb,0xfb,0x5c,0x1b,0x6b,0x68,0x8e,0x3c,0x66,0xb2,0xd1,0x8e,0x67,0x44,0xbf,0x69,0x3b +.byte 0xb9,0x41,0x78,0x8d,0xc8,0x7b,0x81,0x61,0x70,0x6e,0xe2,0xfc,0xd2,0x96,0x31,0x31,0x2f,0x27,0x90,0xf2,0xc4,0xed,0xbd,0xb5,0x0e,0x91,0x7d,0xd0,0xec,0x3c,0xe9,0xcf,0xf2,0x07,0xac,0x54,0x44,0x9a,0x24,0x41,0xcb,0x2a,0x86,0x30,0x18,0xba,0x65,0x59,0x41,0x00,0x59,0xbf,0x3d,0x01,0x8a,0x51,0xe5,0xd2,0x90,0x8c,0x7d,0xd7,0xad,0x71 +.byte 0xdc,0x45,0x62,0x95,0xf9,0x9f,0xe8,0x55,0x6d,0x48,0x22,0x32,0xcb,0x9a,0x55,0x65,0xe5,0xdf,0xee,0x22,0x99,0x91,0xd7,0xed,0x33,0x04,0x72,0xc7,0xc5,0xb2,0x56,0x5e,0x8f,0x38,0x4b,0xd0,0x61,0x4b,0x4b,0x04,0x4c,0x4c,0x2b,0x23,0x00,0xd4,0x5c,0xdd,0x84,0x8d,0x73,0xf4,0xf7,0xef,0xd5,0xdb,0x2b,0xec,0x54,0x86,0x37,0x01,0x64,0x56 +.byte 0xef,0x73,0x9f,0xb4,0xb6,0xd2,0xf4,0x33,0x93,0xbd,0xd7,0xd9,0x6e,0x8f,0x60,0x85,0xbc,0xa6,0x16,0x3f,0x3f,0xc3,0xd7,0xfc,0xb6,0x82,0xf0,0xe5,0x1e,0x2c,0x51,0x48,0x27,0x50,0x3e,0xdb,0xe6,0x86,0x3b,0xa1,0xfa,0x09,0x39,0x04,0x6f,0xb1,0x85,0xbd,0xda,0x4d,0x2f,0xd1,0x40,0x6f,0x2e,0x2b,0xf2,0x9a,0x4d,0x8e,0xb2,0xc5,0x6e,0x21 +.byte 0xf9,0xdd,0xc9,0x2e,0x81,0x18,0x7b,0x88,0xb9,0x86,0x36,0xe5,0xb2,0xdd,0x19,0xb4,0x7f,0x5d,0xc0,0x20,0x34,0xdc,0x63,0x7d,0x8c,0x80,0x0f,0xe6,0x85,0x14,0xbb,0x87,0x6c,0x3e,0x39,0x53,0x60,0x3d,0xc5,0x46,0x11,0xa3,0x96,0x60,0x6f,0xe9,0xfe,0x59,0xcc,0xed,0x4d,0xdb,0xa3,0xa1,0xf1,0x71,0x0b,0xb0,0x1f,0x89,0x4c,0x32,0x59,0xa5 +.byte 0x7d,0xf7,0x3e,0x5b,0xca,0xa4,0xe1,0xc3,0x50,0xac,0xdf,0x00,0xad,0x45,0x59,0x9e,0x23,0x5f,0x52,0xbd,0x36,0x78,0x55,0xcf,0x90,0x91,0x41,0x14,0xdb,0x76,0x3a,0x43,0x39,0x89,0xe1,0x93,0xc8,0x66,0x91,0xc7,0x42,0x06,0x6f,0xbb,0x35,0x1e,0x07,0x52,0x5a,0xe4,0x41,0x9f,0x65,0xe0,0xdc,0x49,0x8c,0xd3,0x5f,0x16,0x21,0xc9,0xb8,0x8a +.byte 0xc2,0x56,0x91,0xcb,0x18,0x6b,0x38,0x7b,0x3a,0xeb,0x91,0x3c,0x0d,0x6a,0x1f,0xd6,0xc6,0xd7,0x56,0x8d,0xd3,0x76,0x1c,0x9d,0xed,0x3d,0xb6,0x92,0x71,0x6e,0x73,0xc6,0xb8,0xa2,0x1c,0x25,0xb9,0x3c,0xd4,0x41,0xf7,0x8f,0x39,0x60,0xe6,0x27,0xf2,0xc6,0x5f,0x56,0x08,0x7c,0xd3,0x16,0x9d,0x06,0xc0,0xca,0x3d,0xc6,0x61,0xb0,0x21,0x51 +.byte 0x6d,0xca,0x82,0x59,0xe6,0xbb,0x99,0xa2,0x4f,0xfc,0x71,0x66,0x2b,0x4e,0x40,0x62,0x97,0x34,0x73,0x4a,0xe5,0xf0,0x4f,0x4c,0x36,0x4c,0xdb,0x03,0xa9,0x87,0x29,0x21,0x5d,0x91,0x5b,0x89,0xb8,0x3d,0x65,0xc7,0x58,0x0a,0x81,0xb5,0x3e,0x22,0xa1,0x57,0x95,0xbe,0x60,0xf5,0xeb,0xb3,0x49,0xdf,0xd9,0xa2,0x31,0x36,0x5f,0xb2,0xa6,0xf6 +.byte 0x66,0x88,0x88,0x8e,0xa3,0x2c,0xac,0x5e,0xa1,0x33,0x16,0x64,0x08,0x47,0xc8,0xbc,0xc2,0xe9,0xdb,0x73,0x57,0x50,0xd4,0x24,0x01,0x26,0x26,0x04,0x4f,0x8a,0xc0,0x7a,0x97,0x14,0xf2,0xd0,0xbe,0x03,0xea,0x8a,0x25,0xcb,0x98,0xe7,0xbd,0x67,0xff,0x32,0xfd,0x8a,0x7d,0x11,0xe1,0xb2,0x91,0xb5,0xa0,0xb6,0x3c,0x2c,0xb3,0x6e,0x35,0x61 +.byte 0x86,0xbc,0x37,0x15,0xf8,0x3b,0x0d,0x84,0x83,0x69,0x76,0xb0,0xaa,0x8f,0x4f,0xca,0xba,0x54,0xfe,0x42,0xc8,0xba,0x9a,0xd5,0x53,0x69,0x67,0x29,0x23,0x3a,0x6a,0x75,0x97,0xb4,0x29,0x2e,0x62,0xe3,0x95,0x82,0xb3,0xa0,0xa1,0xb7,0xdf,0xc2,0x66,0x4d,0xdd,0x0d,0xda,0xda,0xc2,0x42,0xe0,0x69,0xb1,0xab,0x3c,0x44,0x39,0x11,0x3b,0x0a +.byte 0xd6,0x96,0x2c,0x36,0xb0,0xa0,0xed,0x3d,0x0c,0x63,0x8b,0x90,0xe4,0xb9,0x5f,0x4c,0x27,0x70,0x87,0xb3,0x54,0xe2,0x36,0x74,0x6f,0x3e,0x22,0xb1,0x3b,0x1b,0xba,0xdb,0x1c,0xbd,0x9c,0x6d,0x84,0xbd,0x33,0xfb,0xc0,0x98,0x4c,0xcf,0x7a,0xe8,0x41,0xdb,0x32,0x1f,0xb7,0x64,0x19,0xdb,0x87,0xe7,0xf9,0x52,0x40,0x8c,0xc6,0x89,0x98,0x15 +.byte 0x69,0xde,0xfa,0x29,0x9a,0x0f,0xaf,0xb0,0xad,0x71,0x35,0xab,0xab,0x34,0xe0,0xf4,0x03,0x24,0x6f,0x94,0x38,0x87,0xba,0x68,0xd5,0x1f,0x58,0x88,0x3e,0x12,0x20,0x57,0x43,0xde,0xd0,0xbc,0xaa,0x31,0x8f,0xbc,0x88,0xa0,0xdf,0x5a,0xcc,0xd1,0xba,0x9c,0x18,0x80,0x4e,0x8f,0x68,0x91,0x9c,0x57,0x3b,0x5a,0x62,0xc7,0x29,0x3e,0x49,0xc7 +.byte 0x23,0x26,0xfd,0x9e,0xd0,0xb0,0x4f,0xd4,0xb2,0xa9,0xa8,0x4c,0x66,0x54,0x52,0x75,0x6b,0xbf,0x63,0x76,0x49,0x3b,0xa3,0xb2,0x8f,0x87,0x9d,0xb4,0x8f,0x07,0x3c,0x8e,0xae,0xe1,0x0e,0x9a,0x86,0x90,0x58,0x73,0x8a,0xb3,0xa9,0xab,0xe6,0x27,0xd7,0x70,0x94,0x77,0x12,0xdc,0x71,0xdf,0xcf,0xba,0xdd,0x85,0xfe,0x28,0xaa,0xcd,0xcc,0xe8 +.byte 0x5f,0xd4,0xd8,0x45,0x6f,0x20,0xa8,0x5e,0x40,0x91,0x3b,0xd7,0x59,0x92,0xb8,0x7d,0x2b,0x8b,0x38,0xbd,0xfe,0x7b,0xae,0x5c,0xee,0x47,0x9b,0x20,0xb7,0xf3,0xad,0x75,0xa9,0xe1,0x96,0xc8,0xb2,0x30,0xfe,0x0c,0x36,0xa2,0x02,0xf4,0x3b,0x30,0xfd,0x91,0xfa,0x5f,0xd6,0x18,0x1a,0xcb,0xd2,0x26,0xbb,0x67,0xbe,0x1c,0x99,0xa5,0x4f,0x57 +.byte 0x40,0xb5,0xed,0xd6,0x84,0xfd,0x6b,0x00,0xc8,0xe7,0x18,0x1a,0x9f,0xf7,0x3b,0xd1,0xcc,0x12,0xeb,0x9d,0x61,0xf0,0x8d,0x64,0x08,0x93,0x61,0xc4,0x3e,0xdb,0xda,0x15,0xb1,0xd6,0x2c,0x84,0x2a,0xd8,0xd2,0xa1,0x66,0x4e,0xc9,0xd6,0xbf,0x7e,0xb6,0x22,0xfa,0x35,0x5e,0xdc,0xc0,0x31,0x02,0xb8,0x17,0x46,0x9e,0x67,0xd3,0x6a,0x8f,0x33 +.byte 0x85,0xc3,0xfe,0x36,0xbc,0x6f,0x18,0x8a,0xef,0x47,0xf1,0xf2,0x6e,0x15,0x6c,0xb1,0x4a,0x4b,0x13,0x84,0xd5,0x1b,0xf9,0xa2,0x69,0xcd,0xc7,0x49,0xce,0x36,0x8e,0xe5,0xd5,0x35,0x05,0x7c,0x7f,0xc6,0x15,0x29,0x2e,0x64,0xa6,0x91,0x9d,0xe5,0x9d,0x90,0xe7,0x26,0xec,0x75,0x19,0x58,0x57,0xf2,0x19,0x7b,0x24,0x7d,0x19,0xd3,0x72,0x69 +.byte 0xaa,0xa2,0x8c,0xe3,0x3d,0x38,0xb9,0xf0,0x5b,0xe9,0x3b,0xaa,0x96,0xef,0x2c,0xfc,0xf5,0x13,0xa6,0xa9,0x57,0x8c,0xa9,0x3a,0xc1,0xf0,0x2d,0x57,0x06,0x08,0xe3,0x9c,0xfe,0x82,0x8a,0x6a,0x79,0x5b,0xef,0x2b,0x81,0x83,0x01,0x53,0xac,0xdc,0x79,0x93,0x9b,0x23,0xd4,0xae,0x17,0x6f,0x62,0xaa,0x33,0x41,0xa6,0x31,0x1c,0x7b,0x46,0x2b +.byte 0x17,0xd3,0x6f,0x66,0x73,0x54,0xee,0xa1,0x08,0xee,0x8f,0x0f,0x0e,0x53,0xa7,0x49,0x17,0xdb,0x35,0xaf,0x4e,0x94,0x87,0x8e,0xff,0xf4,0x2b,0x29,0x01,0x45,0xa3,0x0a,0xd9,0x13,0x38,0x09,0x46,0x2c,0x56,0x97,0xd7,0xee,0x24,0x43,0xd1,0x20,0xed,0x38,0xde,0x52,0x13,0x38,0x06,0xd3,0x97,0xc7,0x48,0x8b,0x72,0x0a,0xc5,0xca,0x75,0x2c +.byte 0x04,0x9e,0xee,0x14,0xe7,0xda,0x59,0xc2,0x54,0x7a,0x72,0x55,0x35,0x00,0x93,0xb7,0xb9,0x81,0x01,0x46,0xae,0x43,0x81,0x34,0xd7,0xb4,0x7a,0xfc,0xfc,0x98,0x2b,0x29,0xe5,0x5e,0x9d,0x8e,0xef,0xd4,0x44,0x9d,0x9a,0xbe,0xdb,0x83,0x33,0x18,0x9e,0xbd,0x0f,0x34,0x4d,0xd9,0x34,0xe0,0x2c,0x1f,0x10,0xaa,0x06,0x5e,0x54,0x51,0x72,0xec +.byte 0xbf,0x6b,0x3e,0xb9,0xdd,0x37,0xc3,0xe1,0xbe,0xbe,0x1d,0x86,0xde,0x12,0xca,0x82,0xc5,0xe5,0x47,0xf8,0xbe,0xef,0xb6,0x79,0xd5,0x3c,0x69,0x0a,0x35,0x3e,0xd3,0xf8,0xaf,0x5b,0x8e,0x69,0xff,0xb2,0xf7,0x91,0xc2,0x70,0x22,0x97,0x1c,0x5c,0x56,0x25,0x5a,0xcf,0x31,0x7a,0x37,0xce,0xc7,0xf2,0x98,0xdc,0xb5,0x58,0x71,0x5a,0x60,0xe2 +.byte 0xfe,0x4f,0xf3,0xe2,0x2a,0xca,0x22,0x3e,0x07,0xc2,0xea,0x23,0xc8,0x04,0x97,0x7f,0xca,0xf6,0xf8,0x12,0x06,0x88,0x81,0xee,0xb7,0xdd,0x56,0x9e,0x0f,0x36,0xd3,0x09,0xa8,0x74,0x4d,0x8b,0x8f,0x31,0x64,0xbe,0x9d,0x7b,0x68,0x50,0xc8,0x64,0x40,0x3b,0x0c,0x04,0xb9,0x4b,0x9e,0xff,0x7e,0x5d,0xd8,0x57,0xa0,0xe5,0x6d,0xc2,0x37,0xe7 +.byte 0xd1,0xd9,0x96,0xaa,0x16,0x3e,0xa2,0x9d,0x32,0xe7,0x1e,0x11,0x6e,0x41,0xe2,0xa0,0xe1,0x6f,0x32,0x6d,0xd5,0x38,0x0c,0x27,0x27,0xa9,0xc2,0x04,0xc6,0xe7,0x8d,0x7d,0x7b,0x30,0xbe,0x54,0x6b,0x82,0x37,0x39,0x53,0x54,0xc9,0xac,0xcb,0xd1,0x31,0x79,0xd4,0x7b,0x85,0x07,0xf4,0xf4,0x5d,0x33,0xc7,0x91,0x4e,0xe5,0x13,0x78,0x09,0x42 +.byte 0x29,0x48,0xaf,0x82,0xb1,0x88,0xd4,0xd3,0x57,0x50,0x38,0xa7,0x66,0x41,0x63,0x34,0x2a,0x3c,0x5e,0x8f,0xc4,0xc1,0x00,0xa1,0x22,0xbe,0x5e,0x64,0xb0,0x60,0x9b,0x42,0x9d,0xc6,0x59,0x5c,0xcc,0x29,0x6f,0x64,0x5b,0x5c,0x0f,0xb2,0xae,0x21,0x0c,0x9a,0x6a,0x19,0xb9,0xa6,0x32,0xf8,0xdc,0x82,0xea,0xba,0x27,0xcf,0x42,0xd3,0xde,0x78 +.byte 0xfe,0x9c,0xa5,0x36,0xb6,0x24,0xb6,0x0d,0x5b,0x67,0x6c,0xf5,0x16,0xbf,0x67,0x54,0x4f,0xe4,0x83,0x29,0x75,0x42,0x9a,0xbb,0xd5,0xe7,0x01,0x1f,0xbd,0x80,0x1a,0x7a,0xb6,0xe1,0x2b,0x5d,0x71,0x93,0x00,0xad,0xf6,0x11,0x8d,0x67,0xdc,0x9c,0x8f,0xf0,0x09,0x3f,0xf9,0xa4,0xd6,0xe0,0xdd,0x95,0xea,0xfb,0x71,0x76,0x21,0x31,0x6d,0x48 +.byte 0x0a,0x27,0xa8,0xa6,0x3a,0x7f,0x42,0x6b,0x7e,0xd7,0x6e,0xd5,0x42,0x97,0xad,0x55,0xae,0x26,0x3c,0xde,0x3f,0xaf,0xfd,0x1d,0x6d,0xd3,0xeb,0x84,0xad,0x6d,0xd1,0x4a,0x85,0x1a,0xf7,0x99,0xa4,0xd0,0x48,0xfb,0xf6,0xfe,0xc6,0xea,0x61,0x77,0xe2,0x56,0x87,0xc1,0x36,0x44,0xb4,0xe3,0xd7,0xd9,0x6d,0x3e,0x1b,0xf4,0x72,0x3e,0xfe,0xa5 +.byte 0x47,0xf8,0x3f,0x1a,0x6e,0x43,0xf5,0x67,0xfe,0x90,0x96,0x9b,0x52,0xde,0xab,0xfb,0x45,0x7d,0x93,0xea,0xc3,0x40,0xe1,0x5f,0xcd,0xad,0x3b,0xe9,0x4e,0x36,0xc5,0x38,0xf4,0x66,0xde,0x4b,0xc8,0x2a,0xc3,0xa2,0x3a,0x2a,0xf1,0xd1,0xe8,0x01,0x07,0x37,0xca,0x42,0xbf,0x4f,0xd8,0xc5,0x50,0x93,0x1a,0x01,0x1d,0x51,0x41,0x6e,0xbf,0x68 +.byte 0x93,0x2e,0xdc,0x41,0x23,0xf3,0x13,0xe7,0x09,0xfa,0x39,0x6d,0xee,0x41,0x49,0xbb,0x78,0x04,0xcf,0xc9,0xbb,0x11,0xaa,0x57,0xb5,0x3e,0x4c,0x3a,0x77,0xb7,0x0b,0x38,0x34,0x48,0xd0,0x99,0x20,0x55,0xcd,0x43,0x2f,0x68,0x66,0xb0,0xe6,0x75,0x41,0xe4,0xae,0xfd,0x96,0xe8,0x01,0x4c,0x0b,0x5c,0xbc,0x4f,0x45,0x70,0x08,0x9e,0xf7,0x68 +.byte 0x9e,0xbb,0xe5,0x39,0x20,0x3f,0xbe,0xd3,0xe3,0x95,0xba,0x98,0xd5,0x12,0x2e,0x87,0xd4,0xf4,0x12,0xa2,0xcb,0xd4,0x51,0x53,0x93,0x67,0x06,0xf1,0x21,0x0e,0x92,0x8f,0x9f,0x9e,0x6c,0x16,0xa4,0x2c,0x6d,0xb0,0xd0,0xe1,0x87,0x2f,0x09,0x2c,0x8f,0x4b,0x89,0x1f,0xab,0x66,0xf1,0xcd,0x6e,0x67,0xaf,0x07,0x99,0x18,0x1b,0xda,0xc8,0x65 +.byte 0x81,0xa3,0x37,0x8a,0xad,0xe4,0x1d,0xfd,0x82,0xa0,0xf1,0xe1,0x1e,0x8d,0x0b,0xf7,0x07,0x7c,0xb3,0x10,0xc8,0x5a,0xa9,0xcc,0xc8,0xd0,0x2e,0x5a,0x71,0x45,0x4c,0x30,0xf0,0x10,0xe0,0xf6,0x0d,0x0d,0x11,0xb4,0x83,0x40,0x75,0xee,0xb9,0x24,0x04,0xe3,0xba,0xb3,0xd3,0x00,0x57,0x71,0x98,0xf0,0x4b,0x35,0x8d,0xd8,0x71,0xa0,0xcc,0xaf +.byte 0x46,0x54,0x67,0x65,0x70,0x0b,0x9c,0x61,0xf8,0xd4,0xb2,0x35,0xfd,0xcf,0x2b,0x3a,0x48,0x5b,0x03,0x86,0xd8,0x13,0x48,0x8a,0x55,0xa5,0x4d,0xef,0x42,0x41,0xbb,0x6a,0x8c,0x92,0x46,0x87,0x82,0x09,0x43,0xf3,0x94,0x1d,0x23,0x36,0xfe,0x6f,0xb8,0x9f,0xfa,0xf9,0x92,0x27,0x3c,0xcc,0x47,0x89,0x5c,0x7f,0x81,0x42,0x74,0x12,0x14,0xff +.byte 0x98,0x63,0xc0,0xfb,0x70,0xff,0xc7,0x65,0x5a,0xc3,0xb9,0x74,0x1b,0x71,0x3c,0x2c,0x47,0x79,0x07,0xb9,0x3c,0xc2,0x5f,0x48,0x4f,0xbd,0xaf,0x03,0x05,0x57,0xa9,0x84,0x33,0xc8,0x0d,0xd5,0xac,0x42,0xdb,0x4b,0x57,0x46,0x41,0xf0,0xe4,0x08,0x0d,0xf3,0x43,0x41,0xa5,0x14,0xb7,0xcd,0x64,0x23,0xc9,0xfe,0xff,0x12,0x97,0xc6,0x2f,0x8d +.byte 0x9e,0xf2,0x1d,0x33,0x26,0x3c,0x57,0x17,0xe1,0x7b,0x92,0x3f,0xb6,0xf4,0xd9,0xf8,0xe0,0x37,0xe6,0x18,0x7d,0xa7,0x8a,0x1e,0xe8,0xd8,0x56,0xa6,0x63,0xdf,0xa3,0x99,0x16,0x74,0x48,0x01,0xaf,0x95,0x55,0x40,0xce,0xa8,0x0d,0x30,0x01,0x09,0x40,0xc9,0x9d,0x3d,0xdf,0x4e,0x00,0xe0,0x2a,0xe6,0xdb,0xa2,0x79,0x42,0x57,0xd0,0x3d,0x81 +.byte 0x7f,0x67,0x3a,0xa9,0x63,0xb3,0xd4,0x60,0xa7,0xab,0x54,0x46,0xb0,0xbe,0xb0,0x83,0x72,0xec,0x47,0x0f,0xc7,0xd1,0xed,0x16,0x96,0xbc,0xa5,0x62,0x38,0xdb,0x88,0x2b,0x25,0x26,0x27,0x56,0x7f,0x46,0x39,0xe8,0x4e,0xc0,0x6c,0x62,0xf8,0x80,0x68,0x56,0x8a,0x93,0x51,0x95,0x77,0xe3,0x11,0x7b,0xaf,0xc4,0xcf,0x34,0x5a,0xd5,0x26,0xfc +.byte 0xa2,0x18,0xb0,0xc0,0xa5,0x8b,0x25,0x70,0x40,0x70,0x29,0xc3,0xda,0x80,0x3d,0xe2,0x59,0x49,0x7f,0xdd,0x62,0x6e,0x5a,0xe6,0x27,0x73,0xce,0xb6,0x32,0x37,0x5f,0x73,0x12,0x2b,0x34,0x84,0xff,0x85,0xe3,0xb5,0x93,0x41,0x47,0xc5,0xf5,0x0e,0x21,0xfb,0x24,0x0f,0xdf,0x7b,0xb4,0x29,0x7f,0x67,0x2a,0x38,0x79,0xf0,0x54,0x8a,0x94,0x68 +.byte 0xe2,0x0b,0xb0,0xd4,0xb2,0xa4,0xe4,0xfb,0x3b,0xe6,0xe7,0x59,0x41,0xbd,0xed,0x62,0xce,0x50,0x1a,0x47,0x92,0x92,0x8d,0x80,0xa6,0x05,0x7a,0xb0,0xce,0x48,0x9c,0xb0,0x64,0xea,0xe0,0xa5,0x77,0xff,0xc1,0x82,0x99,0x7b,0xfb,0x74,0x53,0xfa,0x41,0x9a,0x2c,0xb4,0xbb,0xd2,0x26,0xa1,0x80,0x68,0x17,0xaa,0x8f,0x14,0x52,0xb6,0x5d,0xe0 +.byte 0x69,0x5b,0x31,0xc5,0xf5,0x32,0x0d,0xff,0xa4,0x7b,0x28,0x38,0x9b,0x61,0xfc,0xd0,0x92,0xb8,0x6e,0x23,0x8a,0xf3,0xc7,0x85,0x11,0xb8,0xd0,0x19,0xaf,0xca,0xa7,0xb4,0xcc,0xeb,0x5d,0xf6,0xa1,0x1c,0x56,0xdf,0x78,0x7a,0xe3,0x6a,0xa4,0x07,0x71,0xce,0xf1,0xb2,0xd5,0x38,0x3c,0xfa,0xf7,0x7a,0xbf,0x4b,0x43,0xa6,0xb3,0x4d,0xff,0x82 +.byte 0x96,0x46,0xb5,0xec,0xda,0xb4,0x5e,0x35,0x78,0xeb,0x4a,0x7e,0xc5,0x7b,0x05,0xd4,0xdd,0xf7,0xb7,0xf3,0xf0,0x04,0x26,0x7e,0x5e,0xc1,0x23,0xca,0x7f,0x14,0x27,0xac,0xda,0xe7,0xdb,0x31,0x05,0x9d,0xd4,0xda,0x20,0xc7,0x6d,0x9a,0x47,0x14,0x38,0xbd,0x7c,0xfe,0xbe,0x8d,0x42,0x7c,0xba,0x36,0xe2,0x2c,0x26,0xd2,0x46,0xa5,0x6b,0xbd +.byte 0x6a,0x75,0x6b,0x52,0x8c,0x10,0xc6,0x0e,0x76,0x60,0x46,0xcc,0x93,0x54,0xc4,0x6e,0xc7,0x70,0x5b,0xb4,0x81,0x51,0x56,0x03,0x22,0x33,0x21,0xe4,0x36,0xee,0x01,0xc3,0x0d,0x17,0x23,0x15,0xae,0x79,0xbc,0xe6,0x13,0x0f,0xfc,0x77,0xa2,0x06,0xed,0x76,0x4a,0xf7,0x2d,0x99,0xc8,0x5c,0xfd,0xac,0xd0,0x11,0xe8,0xfa,0x55,0x17,0x56,0x63 +.byte 0x3e,0xd5,0x23,0x71,0xf8,0xe9,0x1f,0x62,0x95,0xae,0x7c,0x2d,0xcd,0xb8,0x6e,0xb0,0xfe,0xf3,0xd0,0xba,0x72,0x8e,0xe3,0x95,0x82,0x00,0x85,0xdb,0x25,0xe4,0xf2,0xaa,0xbc,0x8d,0xb9,0x4d,0x69,0xa4,0xcd,0x39,0x52,0x9e,0x10,0xae,0x90,0xf0,0x74,0x2f,0xc6,0x5e,0x01,0x99,0x03,0xd5,0x88,0x59,0xfd,0x1b,0x80,0x56,0x0a,0x04,0x27,0xd9 +.byte 0x04,0x51,0xb0,0xb7,0x7a,0x65,0x79,0xa8,0xe2,0x6d,0x7f,0xb2,0xba,0x37,0x40,0xa0,0xbb,0xaf,0x15,0x46,0x23,0x5f,0x22,0xd0,0x2c,0x6c,0x7a,0x58,0x76,0x6f,0xb8,0x19,0xfe,0xb5,0x3d,0xf0,0x77,0x00,0x6b,0x4c,0x83,0x36,0x90,0xe6,0x57,0x29,0x6e,0x27,0x76,0xd4,0x7d,0x9a,0x6a,0xf1,0xf6,0x1b,0x1a,0x45,0xf5,0xf6,0x2d,0xb8,0x30,0x33 +.byte 0x65,0x51,0x37,0x26,0xbc,0xf7,0xb7,0xf9,0x56,0x05,0x6b,0xd4,0xd6,0x00,0x1d,0x13,0x15,0x45,0x24,0x0d,0x28,0x69,0xc6,0x50,0xe1,0x48,0x48,0x34,0x69,0x31,0x3c,0x58,0x71,0xd6,0x4a,0xd9,0xda,0x0d,0x28,0xbd,0xe9,0x5d,0x5d,0x8a,0x6e,0x71,0xc0,0x8b,0x7a,0xba,0x17,0x8e,0x82,0xcb,0xe9,0x95,0xc4,0x43,0x37,0xd0,0x58,0xed,0xec,0x77 +.byte 0x1e,0x22,0xf0,0xf0,0x7c,0x9d,0xeb,0x64,0x30,0x7b,0xb2,0x7b,0x86,0xdb,0xef,0x92,0x79,0xd9,0x9c,0x1c,0x1a,0xf6,0x98,0x26,0x18,0xa2,0x83,0x45,0x08,0xd4,0x1d,0x84,0xd4,0x28,0x6d,0x1f,0xb5,0x1f,0xab,0x97,0xc9,0x0d,0x1f,0x83,0x34,0x18,0xa3,0x20,0x63,0x60,0x6c,0xf3,0xd8,0xb2,0x0a,0xd9,0x35,0xa6,0xce,0x44,0x50,0xc6,0xf3,0x91 +.byte 0xe3,0x95,0x89,0x49,0x99,0x32,0x1d,0xf2,0x54,0x39,0x09,0xca,0xd1,0xc4,0x7f,0xa1,0x1d,0xce,0x94,0x67,0xf1,0x88,0x04,0x29,0xcb,0x5d,0xf7,0xfa,0xcd,0x69,0x16,0x17,0x05,0xc3,0x93,0x45,0xbf,0xd3,0x74,0x63,0xdc,0xe2,0x84,0xab,0x27,0x60,0x56,0x61,0x72,0x5d,0xdf,0xb4,0xa4,0x0f,0xb0,0x21,0x82,0x9b,0x73,0x0a,0x11,0x22,0x2d,0x65 +.byte 0xa2,0xff,0x29,0x8a,0x19,0x28,0x4f,0x4f,0xdd,0x64,0x0a,0x48,0x35,0x70,0x30,0x9f,0x41,0x4d,0x0c,0x7b,0xa6,0xcb,0x63,0x83,0xd1,0x79,0xfa,0x5f,0xc9,0x9b,0x6e,0x09,0x12,0x87,0xcd,0x1e,0x39,0xd6,0x40,0x08,0x0f,0xfd,0x79,0xc8,0xcb,0x77,0x8f,0x7a,0x52,0x42,0xc0,0xb2,0xc8,0xa0,0x2a,0xff,0xbc,0x60,0x13,0xbc,0x41,0x4a,0xc6,0x8b +.byte 0x08,0xb0,0x9f,0x75,0x87,0xa1,0x75,0x42,0x4b,0x3a,0xf7,0xf7,0x84,0x39,0xa5,0x88,0x25,0x2d,0x4f,0x73,0x4e,0x30,0x27,0x92,0xea,0x93,0x70,0x5c,0xb5,0xeb,0xb0,0x10,0xda,0x0f,0xaa,0xb3,0x3f,0xb5,0x55,0x64,0x65,0xae,0xb5,0xf8,0x0a,0xe4,0x9f,0x86,0x02,0x6f,0x63,0x8a,0x0b,0x6b,0x82,0x85,0x3c,0x6a,0xdf,0x68,0x4c,0x1e,0xe9,0x5c +.byte 0xd0,0x99,0xe5,0x0c,0xfc,0x63,0xfb,0xce,0x2d,0x63,0xd5,0x7d,0x8a,0x7d,0x14,0x22,0xbd,0x71,0x5e,0x79,0x3f,0x44,0x95,0xe5,0x6c,0x58,0x94,0x84,0x41,0x65,0x52,0x94,0x50,0xec,0xd3,0x2a,0x16,0x88,0xdb,0x71,0xb9,0xe4,0xb6,0xbf,0xc5,0x3c,0x48,0x37,0x62,0x32,0x79,0xbe,0x1d,0xdb,0xc9,0x79,0x37,0x40,0x65,0x20,0x62,0x45,0xb4,0xda +.byte 0x24,0xef,0x33,0xf1,0x05,0x49,0xef,0x36,0x17,0x17,0x0f,0xdc,0x65,0xb4,0xdc,0x57,0xc3,0xc6,0x82,0x57,0x08,0xf2,0x20,0x57,0x5c,0x25,0x0e,0x46,0x75,0xa7,0x4f,0x9e,0xa4,0x00,0xf7,0x79,0xb9,0x0a,0xef,0x4f,0x50,0x79,0xf8,0x59,0x01,0xf2,0x74,0x9f,0x16,0x27,0xa5,0xc1,0x32,0xcc,0x58,0xa7,0x40,0xa1,0xa1,0x26,0x80,0x00,0xb5,0x64 +.byte 0x0a,0xd8,0x53,0x1f,0x72,0xf7,0x60,0xf7,0x0a,0xaa,0xdf,0x31,0x95,0xff,0xfc,0xb4,0xca,0xbc,0xf8,0x2a,0x33,0x20,0x04,0x16,0x1a,0xe7,0xeb,0x22,0xd1,0x25,0xa6,0x03,0xc9,0x9e,0x9e,0xca,0x7a,0x46,0x7c,0xcb,0x8a,0x63,0x4a,0xf0,0x1b,0xd0,0x34,0xc3,0xbb,0x89,0xcf,0x16,0x38,0xcb,0xe0,0xce,0xd5,0x0b,0xfd,0x4e,0xbc,0xce,0xba,0x28 +.byte 0x68,0x00,0x2a,0x31,0x52,0xe6,0xaf,0x81,0x3c,0x12,0x09,0x2f,0x11,0x0d,0x96,0xc7,0x07,0x42,0xd6,0xa4,0x2e,0xc1,0xa5,0x82,0xa5,0xbe,0xb3,0x67,0x7a,0x38,0xf0,0x5e,0xd8,0xff,0x09,0xf6,0xab,0x6b,0x5d,0xec,0x2b,0x9f,0xf4,0xe6,0xcc,0x9b,0x71,0x72,0xd1,0xcf,0x29,0x10,0xe6,0xe3,0x27,0x1c,0x41,0xc8,0x21,0xdf,0x55,0x27,0xa6,0x73 +.byte 0xb7,0x45,0xa1,0x09,0x66,0x2f,0x08,0x26,0xf1,0x50,0xe0,0xec,0x9d,0xf2,0x08,0xf3,0x49,0x56,0x50,0xe0,0xba,0x73,0x3a,0x93,0xf5,0xab,0x64,0xb6,0x50,0xf4,0xfa,0xce,0x8d,0x79,0x0b,0xad,0x73,0xf2,0x8c,0x1e,0xe4,0xdd,0x24,0x38,0x1a,0xde,0x77,0x99,0xb8,0x92,0xca,0xc0,0xc0,0xbc,0x3d,0x01,0x6f,0x93,0x3a,0x6e,0xc5,0x28,0x6e,0x24 +.byte 0x9c,0xf9,0xd9,0xcb,0x4b,0xbe,0x9e,0xda,0x0d,0x10,0xfb,0x9d,0x15,0xfe,0x28,0xdc,0xd9,0x09,0x72,0xd3,0x9f,0x6d,0x77,0x14,0x84,0x86,0x56,0x10,0xdc,0x8e,0x6a,0xa7,0x62,0xf0,0x0b,0x65,0x2c,0xa2,0xd1,0x7f,0xae,0x32,0xfa,0x9b,0x46,0x0f,0x12,0x08,0x22,0x8c,0x87,0x15,0x4b,0xc4,0x6d,0x85,0xfb,0x69,0xfe,0xce,0xfb,0xb4,0x3e,0x7b +.byte 0xcf,0x88,0xa7,0x97,0x52,0x56,0xd0,0x9f,0xb4,0x33,0xf9,0x08,0xd2,0x28,0x46,0x5e,0xc4,0xec,0x22,0xc6,0x1e,0x7b,0x34,0x99,0x0c,0x5b,0x04,0x19,0xe2,0xca,0x09,0x11,0x50,0x45,0xcc,0xb2,0x90,0x25,0x51,0x68,0xc9,0x20,0x6c,0x99,0x2e,0xdb,0x5b,0x07,0x91,0xb2,0x69,0xbf,0x3c,0x05,0x50,0xfb,0x21,0x33,0x4f,0x6e,0x18,0x19,0xd5,0xff +.byte 0xce,0x9d,0xb5,0x7f,0xd4,0xd5,0x8f,0x41,0x26,0x1f,0xa1,0x4c,0x34,0xd3,0x98,0x08,0x5d,0xb5,0x56,0xa7,0x04,0x63,0x76,0x7d,0xae,0xee,0xea,0xbf,0x69,0x8d,0xff,0xa1,0x62,0x86,0x19,0x7b,0xe5,0x08,0x7a,0xe5,0x9e,0xe5,0x44,0xca,0x24,0xde,0x00,0x43,0xc7,0xcd,0xc8,0x5b,0x21,0x00,0xb9,0x56,0x3f,0xba,0xef,0xcd,0xc4,0xe0,0xd7,0x90 +.byte 0xa7,0xe1,0xf9,0x83,0x2c,0x1d,0x8d,0xc3,0x1b,0xa2,0xab,0xcd,0x7d,0xbc,0xd1,0x2b,0xf8,0x30,0x9e,0xb6,0x95,0xe0,0xd1,0xe6,0x81,0x89,0xa7,0xda,0xf0,0x54,0xc1,0xcb,0x3a,0x85,0x85,0xb5,0x03,0xb4,0x8c,0x7d,0x98,0x16,0xa8,0x83,0x29,0xbb,0x1c,0x1d,0xe1,0x7e,0x0e,0xb5,0x04,0xba,0xbf,0x89,0x30,0x3c,0x44,0xa2,0xc5,0xbf,0xf1,0x70 +.byte 0xdb,0xf3,0x13,0xf4,0x44,0xac,0x63,0xc4,0x9c,0x93,0xa9,0x13,0x1b,0xf1,0xcc,0x16,0x66,0xdf,0x56,0x10,0x88,0x0c,0x76,0xab,0x43,0xcb,0x75,0xf8,0x4f,0x04,0x26,0x95,0x4c,0x6d,0x55,0xc8,0xbd,0xf8,0x94,0x0f,0xca,0x29,0x2b,0xcd,0xce,0x05,0x1e,0xea,0xae,0x02,0x01,0x8b,0x60,0x6a,0x6a,0x03,0x14,0xe5,0xa7,0xdf,0x9e,0x9f,0x94,0x92 +.byte 0x41,0x2c,0xf0,0x1a,0xa7,0xc2,0xc1,0xfc,0x11,0xf3,0x00,0xe1,0xfc,0x7a,0x97,0xc0,0xe1,0x81,0x90,0x3f,0xea,0x1e,0x7f,0xf8,0xb0,0xd8,0x4c,0x2d,0xdc,0x83,0xfa,0x27,0x8b,0xf2,0xef,0x3b,0x3a,0x44,0xdc,0xa5,0xa9,0xd5,0x24,0x5f,0xb1,0xdd,0x1d,0x3f,0x03,0x76,0x3b,0x92,0x0d,0xb4,0x84,0xa4,0x5b,0xef,0x9f,0x89,0x9d,0xef,0xff,0xcf +.byte 0xc2,0x28,0x3b,0x9d,0xd2,0x28,0x75,0x3e,0xdc,0x14,0x79,0x7c,0x0c,0xaa,0x6c,0xf2,0x05,0x9d,0x27,0x01,0x15,0x19,0x60,0x48,0x5a,0x7d,0x04,0x27,0x2d,0x82,0x92,0x3e,0x0b,0x62,0xd7,0x5a,0xfb,0x72,0xfb,0xdd,0x43,0xfa,0xf4,0x6f,0x16,0xd2,0x8f,0x8f,0x21,0xdc,0x81,0x48,0x7a,0xe8,0x39,0xd5,0xdf,0x54,0x0f,0xe1,0xbe,0x65,0xc9,0x49 +.byte 0x98,0xb1,0xff,0x8d,0x52,0x31,0x6a,0xcd,0x5e,0x83,0x17,0x41,0x93,0xcd,0x23,0x76,0x18,0xe9,0x82,0x71,0x15,0xb7,0xd8,0xde,0x0d,0x57,0x8b,0x90,0xe6,0xf4,0x57,0xc1,0xfd,0x3d,0x0d,0x6a,0xae,0xd1,0xd6,0x02,0x3e,0xb9,0x82,0xb2,0x82,0x80,0x48,0xa4,0x14,0x29,0x80,0x55,0x1d,0xaf,0x3e,0xf8,0x7e,0x36,0x5f,0x77,0x4c,0x73,0x6c,0x35 +.byte 0xd2,0x7c,0x36,0xca,0x2f,0xec,0x1e,0x3f,0x74,0xee,0xa5,0xe7,0x7d,0xce,0x81,0xf1,0xd5,0xc1,0xb3,0xaf,0x90,0x2c,0xc6,0x5b,0x81,0x37,0x85,0x98,0x78,0x3c,0x4f,0x2a,0x55,0xea,0x06,0x30,0x77,0x73,0x97,0x39,0x75,0xcf,0x4a,0x9b,0x55,0xb8,0x64,0x5c,0x86,0xfd,0x26,0x3e,0x8d,0x68,0xd2,0x70,0xe8,0xd7,0x99,0x57,0x6f,0x96,0x47,0x6d +.byte 0xa7,0x1a,0x0e,0x85,0xcd,0x00,0xa5,0x3e,0x11,0xec,0x76,0xd2,0x47,0x26,0x71,0xda,0x5c,0xf4,0xb1,0xd5,0x23,0xe1,0x62,0x71,0x43,0x30,0xa7,0x95,0xf6,0xc1,0xcf,0x8a,0x1b,0x75,0x53,0x39,0x6d,0x9d,0x18,0x7c,0xe3,0x48,0x27,0x33,0x1c,0x38,0x45,0xdf,0x75,0x22,0x05,0x6d,0x81,0x5d,0xfc,0xeb,0x0e,0x05,0x26,0x45,0x81,0x9f,0xce,0x0f +.byte 0xc9,0xdd,0x95,0x11,0x04,0x47,0x40,0xa4,0x07,0x3b,0x52,0x92,0xe0,0x91,0xdb,0xdd,0x3c,0x9f,0xd3,0xa1,0xb7,0xf9,0xeb,0xd6,0x6d,0x64,0x88,0xe9,0xf5,0x4e,0x98,0x8e,0x7b,0xd3,0xec,0xc0,0x22,0xe0,0xf2,0x14,0xf2,0x20,0xa2,0xa3,0xb3,0x0d,0x75,0x1a,0xbb,0xde,0x4a,0x41,0x04,0x43,0x0d,0xd9,0xd0,0x1d,0x73,0xc8,0x67,0x8e,0x58,0xe5 +.byte 0x4b,0x28,0x4d,0x8f,0x2f,0xab,0x1a,0x4a,0xfc,0x7c,0xd1,0x27,0x3e,0x4a,0x10,0x6a,0x5f,0x55,0x3a,0xf7,0x63,0x14,0xe9,0xad,0xb4,0x95,0xef,0x3d,0x5c,0xc3,0x7d,0xe4,0xb7,0x15,0xd7,0x0b,0x68,0xf0,0x23,0xa8,0xd4,0x8e,0x27,0xf6,0x55,0x11,0xbc,0xc0,0xff,0x3e,0x2c,0x24,0x59,0xb7,0xb7,0xb5,0x0b,0xd2,0x99,0xa5,0xd5,0xe2,0x24,0x33 +.byte 0x21,0xb8,0x96,0x48,0x18,0x94,0xb5,0xb2,0x50,0x5e,0x04,0x24,0x86,0x17,0x62,0x1e,0xc9,0xf8,0x22,0x6a,0xd0,0xec,0xc5,0xbc,0x90,0xf7,0x55,0xcf,0x3f,0x4c,0x7c,0xf7,0x51,0x19,0x95,0xa4,0x81,0x38,0x0c,0xa5,0x58,0x22,0xf3,0x10,0x05,0x05,0x44,0xbf,0x7e,0x2a,0xbd,0x5f,0x79,0x56,0x08,0xd5,0x68,0xea,0x85,0xa1,0xeb,0x0b,0xe1,0xd4 +.byte 0xfd,0x3a,0x38,0xd2,0x5a,0x49,0x17,0x9a,0x58,0x8f,0x52,0xf5,0xf4,0x7b,0x1f,0x58,0xa8,0xc0,0x1c,0x46,0x38,0xa6,0xe4,0x7d,0xcc,0x88,0x97,0x10,0x2b,0x5e,0x61,0xf5,0x73,0x7d,0x79,0x1b,0x53,0xf1,0xac,0xb4,0x3f,0xbd,0x9d,0xb6,0xc2,0x57,0xd5,0x84,0x4d,0x60,0xd6,0x45,0x56,0xa1,0x36,0x28,0xf5,0x74,0xc6,0x29,0xd7,0xc9,0x63,0x5e +.byte 0x7c,0x97,0x46,0xde,0x56,0x3f,0xd8,0x8e,0x75,0x29,0x87,0xe7,0xd1,0x24,0x78,0x26,0xdc,0x17,0x97,0xc9,0xf0,0x8e,0x95,0xbc,0xe5,0xfe,0xe3,0x3a,0x75,0x70,0x52,0xa9,0x31,0x97,0x79,0x3a,0xc2,0x53,0x6a,0x73,0xe2,0x76,0xf8,0x85,0xe6,0x0d,0x85,0x9b,0xfc,0x72,0x08,0x2a,0xa5,0x8e,0x42,0xb2,0x7c,0x8d,0x8b,0x28,0x4b,0xf5,0xcb,0x66 +.byte 0x80,0x46,0xb3,0x87,0xdf,0x38,0xa7,0x08,0xc8,0xea,0x85,0x0e,0x6f,0x13,0xe0,0x57,0x99,0xc6,0xb8,0xed,0x9c,0xb0,0xa9,0x89,0xd7,0xc5,0xa9,0x71,0xfd,0x8a,0x21,0xb1,0xec,0xc8,0x65,0x78,0x72,0xc6,0x77,0x69,0xd4,0x0b,0x47,0x4d,0x79,0x93,0xcf,0x2a,0x34,0xf1,0x1b,0x0e,0x6f,0x0d,0xd1,0xbb,0xe7,0xd7,0xb5,0x6f,0x57,0x01,0xd4,0xcd +.byte 0x56,0xbe,0xf0,0xd9,0xe2,0x8e,0x0e,0xb8,0x3d,0xdb,0xf6,0x97,0x39,0x0b,0x3e,0xe2,0xb2,0xa3,0x93,0x0b,0x74,0xe5,0x6a,0x21,0x04,0x29,0x5a,0x3e,0x07,0x9c,0x11,0x4e,0xfe,0x01,0x6e,0x96,0x1e,0x8f,0xe0,0xfe,0x24,0x24,0x7e,0x04,0x2f,0x65,0xf4,0xe2,0x1f,0x36,0x56,0x43,0x3a,0x6c,0xeb,0xd7,0x20,0x13,0x71,0x45,0x6a,0xe8,0xc6,0xfa +.byte 0xba,0x26,0x6f,0x7d,0x9a,0x62,0x76,0x34,0x7d,0xed,0x47,0x71,0xd1,0x0e,0x5b,0x04,0x39,0xd6,0xc0,0xe5,0xa5,0xd8,0xf5,0x73,0xf9,0xf4,0xc2,0x2a,0x54,0x25,0x67,0xdf,0x83,0xa3,0xcd,0xfd,0x1e,0x46,0x87,0x06,0x17,0x6d,0x78,0x8e,0x0c,0x7b,0x08,0x06,0x1b,0xd9,0x5d,0x3d,0x03,0x40,0xbc,0xe7,0x02,0xc4,0xe0,0xe0,0x49,0xb2,0x6c,0x6f +.byte 0x97,0x76,0x0f,0xc7,0x14,0xd8,0x7c,0xc0,0xad,0x8a,0xbb,0xbc,0x2a,0x7e,0x68,0x46,0xcd,0xa7,0x26,0x16,0x77,0x1b,0x89,0x38,0xd8,0x2a,0x69,0x43,0xc4,0xaa,0x0d,0xf6,0xd1,0x65,0xda,0x41,0x75,0x77,0xcd,0xf7,0xd2,0x38,0x9c,0xdb,0x81,0x17,0x27,0x2f,0xba,0x2e,0xa5,0xb5,0xbe,0x05,0xe8,0xdd,0x5f,0xa9,0xad,0xbe,0xb2,0x0e,0x0b,0x69 +.byte 0xb6,0x8d,0xd2,0xf2,0xde,0x76,0x32,0x26,0xd9,0x06,0x1d,0x42,0x26,0x8c,0xf7,0xca,0x4c,0xe1,0x59,0x82,0x6c,0xea,0x96,0x70,0x39,0xb8,0x0d,0xf3,0x67,0x9d,0x5e,0x94,0x99,0x77,0xf2,0x0a,0x9a,0xde,0xa5,0xd2,0xe1,0xaa,0x91,0x85,0xc7,0x0f,0x92,0x35,0x04,0xd3,0x7a,0x13,0xfa,0xf2,0x86,0x5a,0x38,0xd1,0x7f,0x10,0xd8,0x30,0x0e,0x33 +.byte 0xe3,0xa0,0x8a,0xad,0x4f,0x6c,0x24,0xdd,0x9d,0x1c,0x4e,0xff,0x4c,0xfc,0x74,0x01,0xab,0x08,0x6c,0xe6,0x4c,0x78,0x75,0xc9,0x67,0x83,0x1f,0x75,0x22,0xb0,0x7c,0x44,0xa0,0xa1,0xee,0x4e,0xf6,0x3e,0xd3,0x35,0x70,0xbe,0x36,0x1e,0x90,0xa6,0xaa,0x64,0x67,0x7f,0x52,0x84,0xd9,0x27,0xab,0x37,0x30,0x68,0x46,0xcc,0x0e,0x57,0x58,0x6f +.byte 0xdb,0xb2,0x5f,0x24,0xf7,0xeb,0x97,0xea,0x64,0xec,0x6c,0x1e,0xe1,0xc4,0x72,0xfb,0x00,0xa7,0x62,0xa0,0x59,0xb9,0x17,0x8a,0x33,0x32,0x59,0xb8,0xbe,0x84,0xd4,0x62,0xb7,0xf6,0x35,0xd4,0xf1,0x1c,0xdb,0x7e,0xa6,0xbc,0x2c,0x54,0x3c,0xf5,0x63,0x4a,0x22,0x26,0x58,0xa0,0x35,0x98,0xa7,0x32,0xb2,0xa0,0x2b,0xd5,0xfa,0x2f,0x9b,0xb4 +.byte 0xea,0xd6,0x58,0x61,0xb2,0x24,0x45,0x46,0x1e,0xac,0x79,0xa4,0xf7,0xc1,0x13,0x2f,0xf5,0x6b,0xfa,0x70,0x50,0x2b,0x83,0xee,0x7c,0xc1,0x55,0x27,0x7b,0x4f,0xa6,0x0a,0x72,0x26,0x82,0xcd,0x4d,0xe2,0xe8,0x45,0xe6,0xd7,0x39,0x7e,0xed,0x35,0xdf,0x9e,0xb1,0x41,0x55,0xa2,0x5d,0x68,0x4b,0x0b,0xd1,0x73,0x5a,0x2b,0x81,0x35,0x28,0xfc +.byte 0x64,0x08,0xd7,0xc4,0x9f,0x30,0x77,0x3d,0x9d,0x80,0x15,0x67,0x9a,0x84,0xe4,0x34,0xea,0x8c,0xf7,0x73,0x9e,0x33,0xb4,0x09,0x33,0xbd,0xd8,0x82,0x43,0x7d,0xc5,0x1f,0x0e,0x7b,0xa0,0x53,0x59,0x20,0x12,0x57,0xed,0xda,0xc7,0x19,0x8e,0x62,0xe4,0x09,0xc1,0x4b,0x20,0x32,0x9e,0x18,0x11,0x1c,0x42,0x49,0x62,0x76,0xa8,0x83,0x72,0x11 +.byte 0x45,0xe7,0xb5,0x60,0xa7,0xc0,0x07,0xbd,0xb4,0x7c,0xc6,0x5c,0x03,0x34,0xa3,0x85,0x47,0x24,0x75,0xd2,0xab,0x46,0xbb,0xc7,0x0d,0xcd,0x40,0xe2,0x5e,0x5b,0xa7,0x98,0x67,0xe4,0xe2,0x02,0xe9,0xdc,0xd7,0xc2,0xaf,0x90,0x43,0x94,0xfe,0xf3,0x53,0xc1,0x10,0x28,0xa7,0x90,0xba,0x73,0x57,0x0c,0x4d,0x6d,0xbd,0xda,0x81,0xd5,0x90,0xce +.byte 0x02,0x40,0xb3,0xf0,0xec,0x50,0x82,0xc9,0xfb,0xf1,0x22,0x6d,0xc8,0xd2,0x7b,0xed,0x0b,0x43,0x7e,0x0b,0x60,0x9b,0x69,0x9e,0x58,0x26,0xc3,0x9f,0x6b,0xd0,0x31,0xeb,0xb7,0x0a,0xf3,0x9a,0x9a,0xf5,0x72,0xcf,0x29,0xc8,0x19,0x08,0x4d,0x67,0xd5,0xa1,0x8f,0x68,0x0e,0xee,0x59,0x14,0xf8,0x86,0xc0,0x08,0x5a,0x56,0xfe,0x6a,0xb7,0xac +.byte 0x78,0x8d,0x77,0x39,0x5e,0xb1,0x01,0x4d,0x31,0x81,0x56,0xdc,0x5b,0x10,0xda,0x4d,0xd2,0xfd,0xfc,0xa3,0xe3,0xaa,0x46,0x29,0x1a,0xea,0x9c,0x47,0x1b,0xd0,0xa6,0x84,0x1f,0x71,0x1a,0xd3,0x35,0x59,0x7f,0xef,0xf7,0x81,0x39,0x7a,0x9f,0x4a,0x01,0x4d,0x46,0xcf,0xa4,0x6a,0x9c,0x7e,0x07,0x8b,0x98,0x17,0x49,0x5c,0x46,0xac,0xc8,0xfd +.byte 0x1c,0xaf,0x91,0x30,0x0c,0x36,0x63,0xef,0x69,0xd3,0x47,0xf4,0x76,0xc1,0xf7,0x40,0x03,0x98,0x9e,0xcb,0x61,0x65,0x46,0x45,0x1c,0x1b,0xfd,0x13,0x36,0xe9,0x19,0xbf,0x2b,0x59,0x51,0xe8,0x04,0x44,0xe3,0xc2,0x4b,0x66,0x78,0x69,0x66,0xa3,0x1a,0xe5,0x2a,0xad,0xf8,0xc5,0x0f,0xb7,0x3e,0xe8,0xab,0xe0,0xe4,0xd9,0xc2,0xb8,0x61,0x5b +.byte 0xef,0x6b,0x4d,0x5f,0xb8,0xdc,0x06,0xa5,0xce,0x08,0x5b,0x1f,0xf4,0x29,0x4d,0x0a,0x3e,0xb3,0x60,0xf4,0x63,0x3c,0x70,0x5d,0x02,0x9c,0x55,0x5e,0x5e,0xd1,0x9b,0xed,0x20,0x75,0x54,0xa1,0x8e,0xae,0xce,0x5a,0xb2,0x2d,0xe4,0xc3,0x9b,0x7d,0x72,0xce,0x7c,0x0c,0xa9,0x99,0xa4,0x12,0xaa,0x31,0xe9,0x61,0x47,0x8a,0x41,0x93,0xd5,0x69 +.byte 0xc5,0xf3,0x9f,0xf4,0x97,0x69,0x64,0x6f,0xf9,0x5b,0xbf,0x58,0xf6,0x3b,0x3e,0xd6,0x93,0x94,0x89,0xcc,0xc0,0x25,0x7d,0xf8,0x40,0x9e,0xb2,0xc8,0x75,0x9d,0x4d,0xf0,0x5f,0xa5,0x3d,0x38,0x67,0xea,0x8d,0x1b,0x60,0x5e,0xfe,0xa8,0x26,0xb9,0xed,0xc0,0xe9,0xc8,0xec,0xb1,0x77,0x0f,0xf2,0xaa,0x77,0x2a,0xcd,0xa8,0x70,0xb7,0xda,0x60 +.byte 0x49,0xb3,0x01,0x95,0xc8,0xac,0x71,0x6a,0xd0,0x49,0x67,0x2a,0x04,0xfc,0x55,0x38,0x08,0x37,0xd9,0x21,0x37,0xce,0x41,0xaf,0x7c,0x33,0xdd,0xcd,0xe0,0x92,0x27,0x38,0x63,0x77,0xea,0x86,0x04,0x99,0x4e,0x61,0x8b,0x8f,0xfe,0x4e,0xc1,0x16,0x6c,0x89,0xac,0x1f,0x0b,0x67,0x75,0x49,0xf4,0xdb,0x6d,0xd3,0xb8,0x1d,0x9c,0xb2,0xe6,0x98 +.byte 0x81,0xae,0x3f,0xe0,0xdd,0xda,0xfa,0x4c,0x8b,0x30,0x18,0x88,0xa1,0x1d,0xa1,0x18,0xb8,0x28,0xc2,0x04,0x6a,0x80,0x02,0x5a,0xe6,0x04,0x85,0xfa,0x54,0x38,0x45,0x64,0xe1,0x50,0x4a,0x38,0x4c,0x85,0xf7,0x00,0x0c,0xd3,0x16,0xcb,0xfa,0x38,0xb4,0x1b,0x6a,0x95,0x3d,0xc3,0x24,0x79,0x0e,0x3e,0x81,0xe6,0xc3,0xd9,0xdb,0x05,0x19,0x7c +.byte 0xb4,0x4d,0xef,0x71,0x22,0x53,0x97,0x8a,0xc9,0xe3,0x69,0x20,0x5b,0x83,0xb1,0x44,0xd7,0xd1,0x1e,0x87,0xa7,0xbf,0xe4,0x84,0x68,0x9c,0x77,0xfe,0x83,0xdb,0x7a,0x53,0xa8,0x53,0x1f,0xc7,0xd1,0x6a,0x26,0x87,0x71,0x06,0x23,0xa7,0xe0,0x18,0x5d,0xfa,0x8c,0xa7,0x24,0xee,0xf6,0x74,0xab,0x17,0xd3,0x46,0x33,0xe9,0xc3,0xcd,0xa6,0xaf +.byte 0xcf,0xa1,0x60,0x75,0x7b,0x77,0xc3,0x58,0xa2,0xe8,0x87,0x7b,0x4b,0x57,0xb1,0x96,0xc1,0x91,0x6d,0xbf,0x71,0xb3,0xbf,0xe2,0x62,0x86,0x72,0xa9,0x01,0x64,0x62,0x32,0x33,0xc8,0xa4,0x26,0x7d,0xfa,0x0d,0xd4,0xd8,0xc3,0xaa,0xc0,0xc8,0x7c,0x51,0xe8,0x10,0x08,0x6f,0xf6,0xc1,0x46,0x89,0xc4,0xd2,0x00,0x1d,0x14,0x05,0x89,0x64,0x52 +.byte 0xcd,0x1f,0x97,0x0b,0x1d,0x94,0xbe,0x9d,0xa0,0x6b,0x03,0x9b,0x83,0x87,0x38,0x0f,0x65,0xdd,0x6a,0xaf,0xf1,0x22,0x74,0x7e,0x11,0xa0,0xdf,0x1e,0x95,0xef,0x1a,0xdc,0x8b,0x29,0x4a,0xbe,0xfd,0x2f,0xc7,0x48,0x94,0x3f,0xb9,0x8c,0x8e,0xe1,0x0c,0x54,0xa6,0x2f,0xa5,0x2b,0x71,0xdd,0x16,0x68,0x91,0x35,0xd0,0x22,0x48,0x1f,0xf2,0xe2 +.byte 0xe8,0x57,0x83,0xd7,0x49,0x43,0xfd,0xf9,0x77,0xb5,0xfa,0x70,0x19,0xeb,0xae,0xf6,0x31,0xfe,0xd6,0x81,0x6c,0xcc,0x14,0x28,0xa6,0x9f,0x74,0x56,0xc5,0xf6,0x51,0xba,0xc8,0xbd,0x32,0x80,0x5f,0xdb,0x28,0x3f,0x4a,0x55,0x01,0xe1,0x39,0xf5,0x9c,0xda,0xb3,0x42,0xee,0x43,0x17,0xc3,0xc7,0xf5,0xd1,0xda,0xd2,0x2e,0x56,0xcf,0x77,0x0e +.byte 0xdd,0x72,0xcf,0xe5,0xab,0xfb,0xd6,0xa2,0x6c,0x03,0xa6,0x77,0x25,0xf8,0x2a,0x8c,0xfa,0x6f,0x45,0x79,0x59,0x84,0x92,0xd1,0x00,0x58,0xc7,0xb8,0x95,0x4d,0xc8,0x49,0xad,0xe0,0x1e,0x64,0x47,0x00,0xfb,0x93,0x7f,0x3e,0xf1,0x65,0x70,0x47,0x64,0xbb,0x36,0x63,0xe3,0x09,0xcb,0xdb,0x5a,0xd1,0x72,0x83,0xfd,0x15,0x91,0xa2,0x03,0x81 +.byte 0x04,0x98,0x45,0x0f,0x7f,0x23,0x48,0x6c,0xb1,0x2d,0xd0,0x2c,0x61,0x52,0x1b,0x4a,0x52,0x08,0x92,0xe1,0x7a,0xf1,0x8c,0x1f,0x1f,0xdf,0x1c,0xfd,0xd9,0x46,0x99,0x71,0x05,0x58,0x71,0x82,0x5c,0x05,0xa0,0xb2,0x6a,0x50,0xd2,0x6e,0x35,0xf4,0x6c,0xfb,0x50,0x99,0xb3,0xc1,0x2b,0x05,0xaf,0x02,0xe5,0x18,0xfa,0x74,0x09,0xcc,0xa5,0x2c +.byte 0x26,0xfd,0xc5,0xe7,0x2c,0x96,0x0f,0xa4,0x7c,0x88,0xc6,0x7f,0xf9,0x74,0x9d,0x1c,0xe5,0xd2,0x27,0xf0,0xae,0x5b,0x4c,0xbf,0x0a,0x99,0x2e,0xaa,0x54,0xba,0x0d,0x75,0xd9,0x48,0x76,0xf3,0xe9,0xd9,0x01,0xbe,0xaa,0x97,0x09,0xfe,0xb2,0x4a,0xcb,0x55,0xd0,0xe1,0x58,0xec,0x31,0x0c,0xd9,0xdf,0xd9,0x01,0xf9,0x3c,0x28,0x40,0x91,0xbb +.byte 0x4d,0x2d,0x88,0x60,0x31,0xc7,0xc9,0x1d,0xaf,0x22,0x44,0x21,0x05,0x06,0xdd,0x07,0x60,0x29,0x7d,0x49,0x30,0x9d,0x35,0x1d,0x9f,0x37,0xbd,0x32,0xb2,0x21,0xa6,0x4f,0x89,0xd8,0xe6,0x85,0x44,0xcf,0x13,0x12,0x4f,0x5f,0x50,0x71,0x01,0x39,0xff,0x6e,0xa0,0x07,0xff,0xf0,0xa6,0x3b,0x39,0x59,0x17,0xae,0x93,0xb2,0x86,0xcc,0xe5,0x59 +.byte 0x5a,0xf2,0x82,0x62,0xc6,0x8d,0x13,0x2f,0x6b,0x92,0x28,0xbe,0xd1,0xc0,0xf6,0xc9,0xe1,0xd6,0x98,0x94,0x65,0xd4,0x2a,0xdb,0x37,0xb1,0xd3,0x83,0xf2,0xaa,0xa5,0x00,0xf9,0x08,0xe6,0x22,0x38,0x30,0xb6,0x49,0x8d,0x9d,0x1c,0xa4,0xf7,0xdb,0x3c,0x6f,0x75,0x08,0xa0,0xda,0xe9,0xc0,0x01,0x54,0x09,0x68,0xc6,0x7c,0x5b,0x4d,0x88,0x71 +.byte 0xa7,0x2f,0xb3,0x50,0x18,0x4a,0xfb,0x55,0x29,0xf2,0x56,0x1d,0x4c,0x12,0x22,0x1c,0x54,0xd2,0x63,0x67,0xfa,0xe9,0x5b,0x74,0x3b,0x38,0xf6,0xa0,0x85,0x63,0x1c,0x41,0x6a,0x6d,0x71,0x1d,0xb1,0x39,0x28,0x88,0x96,0x9b,0x9c,0x50,0x9e,0x57,0x4e,0xf5,0xa7,0xf4,0x17,0xc6,0xca,0x42,0x84,0x83,0xca,0xa4,0x28,0x72,0x08,0x74,0x62,0xe1 +.byte 0xf0,0x73,0xc5,0x86,0x6c,0x76,0x9d,0xd3,0xa6,0xb8,0x5d,0x73,0x1b,0x02,0xe2,0x69,0x8b,0x59,0xd6,0x6a,0x53,0xe9,0x13,0x88,0x41,0x95,0xe9,0x97,0x5f,0x07,0x62,0xa5,0x21,0x97,0x7e,0x5e,0xc2,0x2c,0xc7,0xaf,0x0a,0xdb,0x9e,0x4f,0x44,0x4b,0xd6,0x3d,0xc0,0x24,0x38,0x50,0x47,0x98,0xa3,0xfc,0xda,0xfc,0xae,0x0e,0x2b,0x9b,0x53,0x0f +.byte 0x6b,0xb1,0x2f,0xd5,0xd7,0x68,0xc9,0xab,0xb9,0xff,0x7f,0x54,0xd6,0x2f,0x88,0xbc,0x5e,0x6a,0x22,0x49,0x0f,0x98,0xbe,0x1f,0xef,0x3e,0xcc,0xa2,0x72,0x6b,0x16,0xbe,0xe8,0x5f,0x0e,0x36,0xa2,0x68,0xe0,0x65,0xd9,0x7c,0xdc,0x8c,0x6a,0x66,0xf0,0x6a,0xfc,0x2b,0x85,0x28,0x2a,0x1a,0xfc,0x92,0x64,0x3d,0x38,0x5b,0xc1,0x0c,0x68,0x45 +.byte 0x94,0x85,0x58,0x82,0x99,0xfc,0x20,0xdd,0x62,0xae,0xed,0x35,0x7c,0x02,0x16,0x9b,0x00,0x8a,0x44,0x02,0x80,0x00,0xca,0x7d,0x95,0x03,0x5d,0xa6,0xec,0xe1,0x0c,0x50,0x34,0x61,0x55,0xee,0xb5,0x11,0xff,0xc3,0xaa,0xf2,0xbc,0xa3,0xa9,0xc7,0x6b,0x16,0xab,0x56,0x7b,0x55,0x54,0x95,0x88,0x15,0x15,0x6a,0x2c,0x97,0xd7,0x7c,0x26,0x65 +.byte 0xaf,0x8d,0xd1,0x05,0x57,0xb2,0x63,0xd1,0x22,0xf7,0x7d,0x77,0x54,0x6c,0x87,0x03,0x1f,0x0e,0x2b,0xae,0xa6,0xa4,0xb5,0xd6,0x95,0x34,0xd0,0x62,0x4e,0xfb,0xcb,0xee,0x01,0xc1,0xf7,0x36,0x94,0xa6,0x54,0x94,0x90,0x0e,0x45,0x9c,0x95,0x89,0x96,0x88,0x32,0x90,0x27,0x48,0xc5,0x96,0xf0,0x7e,0x7f,0x69,0x99,0xdf,0x7b,0xfb,0x2b,0x7b +.byte 0x38,0x10,0x6b,0xd1,0x1a,0xfb,0xf2,0xcd,0x2d,0x8b,0x47,0x21,0xca,0x92,0x64,0x28,0xd1,0x53,0x1d,0xed,0xa7,0x7d,0xa4,0x88,0xab,0xd0,0xfe,0x9b,0x2b,0xf8,0x48,0x94,0x8d,0xd5,0xfa,0x5c,0xef,0x12,0x43,0xdf,0xb6,0x5b,0x83,0x43,0xf3,0xf7,0x1d,0x6f,0x3e,0x44,0xe6,0x20,0xd8,0xbc,0x4a,0x9a,0xed,0xa0,0x79,0x66,0x8d,0x23,0xca,0x35 +.byte 0x15,0x87,0x11,0x50,0xa4,0x40,0x6e,0xfa,0xf7,0xaf,0xa2,0xb7,0x3b,0x9b,0x8b,0x44,0x19,0x90,0xb3,0x47,0x92,0x08,0x2f,0x0c,0xe2,0x95,0x5d,0x80,0xb5,0x93,0x5e,0x1c,0xb5,0xce,0x52,0x0b,0x12,0xc1,0x72,0x2e,0x66,0x8c,0xd1,0x13,0x94,0x36,0xf7,0x17,0xe3,0xad,0x69,0xc9,0x2d,0x21,0x64,0xcd,0x8f,0x2d,0x8f,0x0c,0x85,0xa5,0x23,0x8b +.byte 0x6c,0x00,0x13,0xf7,0x6a,0xb4,0x68,0x1a,0xcc,0xc4,0x03,0x5b,0xd6,0x7b,0x5b,0x34,0x90,0x34,0x3e,0x0a,0x07,0x19,0x81,0x99,0xe9,0xd2,0xa8,0x73,0x2c,0xa2,0xcf,0xdf,0x29,0x69,0xbf,0xec,0xdd,0xa5,0xd3,0x16,0xb0,0xd2,0x9c,0x2f,0xeb,0x70,0x50,0x20,0x3c,0x22,0x1a,0x5b,0x55,0x79,0x76,0x0f,0x1f,0xd0,0x34,0xa9,0x55,0xad,0x75,0x75 +.byte 0x7f,0xa7,0x9b,0xa7,0x3d,0x5d,0x73,0xce,0x91,0xf6,0x9b,0xcd,0xa5,0xee,0x48,0x44,0xba,0xd5,0xad,0xbe,0x1e,0xc6,0xd2,0x8b,0x05,0x21,0x20,0xb5,0x7d,0x78,0x88,0x10,0x20,0x85,0x90,0x8f,0x47,0x74,0x68,0xe6,0x32,0x2a,0x13,0x7a,0xb3,0x5d,0xfe,0x24,0x97,0xd1,0x65,0x55,0x60,0xb3,0x88,0xfb,0x59,0xc9,0x29,0x70,0xf1,0x45,0xbd,0xbe +.byte 0x4d,0x01,0x4e,0x5e,0x5f,0x99,0x52,0xf8,0x5f,0x38,0xcf,0xa8,0x5d,0x69,0x54,0x87,0x72,0x41,0xca,0xc4,0x63,0xc1,0x52,0x58,0x66,0x8b,0xda,0x8b,0x61,0xd1,0xab,0x7d,0x8d,0xfe,0x51,0x8d,0xf6,0xd0,0x21,0x4d,0x0b,0xc5,0xea,0x74,0xcd,0x21,0x93,0x4a,0x91,0xe5,0x3f,0xce,0x35,0x3b,0x3f,0xc0,0xab,0xa4,0x23,0x76,0xd1,0x8c,0xa7,0xbe +.byte 0x15,0xab,0x8e,0xd7,0x0d,0x86,0xac,0xc3,0x06,0xff,0x33,0xf2,0x41,0x6f,0x69,0x58,0x49,0xd1,0x73,0xcf,0x5e,0x4e,0x1e,0x46,0x12,0xfa,0x30,0x0d,0x4b,0xb1,0xfb,0xc6,0xe6,0x0d,0xcd,0x8d,0xca,0x34,0x28,0x5a,0xed,0x85,0x55,0x31,0xee,0xba,0xbf,0xa4,0x6f,0x9c,0x7d,0xeb,0x4b,0x1b,0x73,0xea,0x4e,0xb9,0x62,0x5d,0xac,0xe3,0x53,0xdf +.byte 0x27,0x87,0x2f,0x39,0xca,0x5b,0xd6,0x72,0xcf,0x95,0xc6,0x2a,0xa5,0x3f,0x57,0xfd,0xdc,0xa9,0x4a,0x86,0x0f,0xcd,0xd5,0xea,0xfe,0x85,0xeb,0x9b,0x84,0xc6,0xf7,0xba,0xc2,0x37,0xbc,0x18,0x85,0x49,0xa6,0x7f,0xd9,0x3e,0xfb,0xf0,0x0c,0x39,0xe3,0x1c,0x06,0xfe,0xb6,0x49,0xa3,0x8b,0x72,0x2b,0x39,0xa1,0x48,0xfd,0x1f,0xfe,0xa4,0xf7 +.byte 0xcc,0x7a,0xef,0x64,0xa0,0x0d,0xeb,0x78,0x71,0x8c,0xd6,0x59,0x7c,0xf4,0xaa,0x81,0x7a,0x89,0xe6,0x22,0xc9,0x57,0xe8,0x13,0x9c,0xca,0xc4,0x6f,0xb5,0xbf,0x08,0x31,0x93,0x56,0x2a,0x82,0x00,0x95,0xdc,0x4b,0xfd,0x9b,0xc7,0x8b,0x31,0x72,0xa0,0xff,0xbe,0xb4,0xd6,0x07,0x16,0x0a,0x4a,0x0a,0x96,0x02,0x83,0x53,0x2a,0x4d,0x33,0x72 +.byte 0x1f,0x20,0x20,0xc3,0x63,0xee,0x4e,0x05,0x90,0x7d,0x21,0xd0,0xf1,0xda,0xde,0x0d,0x4a,0x59,0xb9,0xca,0x81,0xe3,0x1f,0x83,0x19,0xdc,0x09,0x03,0x5f,0xaa,0xee,0xbc,0x5a,0xfa,0xc6,0x4d,0x3d,0xfe,0xfe,0xf3,0xdb,0xc3,0x77,0x31,0x74,0xb4,0x94,0xb5,0x09,0xb1,0xb5,0x13,0x47,0x2e,0x4f,0x3b,0x38,0x83,0xf5,0xfc,0xe9,0xcc,0x45,0xea +.byte 0x5b,0x88,0x21,0xba,0x53,0xc5,0xf6,0xd4,0x63,0xc5,0x37,0x1d,0xa1,0x42,0x2e,0x9c,0x9a,0x50,0x2c,0xfe,0xdb,0xf6,0x31,0x36,0x5f,0x9d,0xed,0x63,0x42,0x20,0xdd,0x27,0xe5,0x34,0x3c,0x0f,0x06,0x8b,0x8f,0x32,0xb6,0x47,0xce,0x07,0xcb,0x27,0xc1,0xb7,0xfe,0xb2,0x69,0x81,0x79,0x20,0xd7,0x47,0xbb,0xab,0x61,0x5f,0x09,0x99,0xdf,0x9f +.byte 0xde,0x59,0x33,0x75,0xd1,0xcc,0xfe,0x92,0x79,0x1f,0x2d,0x59,0x88,0xef,0x4b,0x80,0x0c,0x38,0xa3,0xb1,0xef,0xae,0x53,0x84,0x2f,0xbd,0xd3,0x0c,0xcf,0xd5,0xf7,0xb7,0x6f,0xa7,0x22,0x1f,0xf1,0x56,0x76,0x0c,0x78,0x52,0xa3,0xc0,0xd0,0x2f,0xbc,0xdf,0x29,0x0d,0xa8,0x54,0x0d,0x2b,0x65,0x1b,0x7f,0xeb,0x21,0x22,0xaf,0x10,0xc1,0xd6 +.byte 0x30,0xa8,0x2f,0xb1,0x25,0xbf,0xdc,0xee,0xe9,0x35,0x40,0x69,0xa0,0xa0,0x27,0x85,0x2e,0x18,0xc1,0x36,0x24,0xc5,0x96,0x9a,0x85,0x3f,0xbb,0xfd,0xf5,0x02,0xa2,0xa1,0x92,0x3c,0x16,0x48,0x9f,0xc5,0x00,0x7c,0x7b,0xaf,0x31,0xba,0x68,0x0e,0x58,0x88,0xf4,0x10,0xb9,0xa6,0xe0,0x46,0x2a,0xb8,0x8d,0xc7,0x8e,0xad,0x7c,0xec,0xd2,0x74 +.byte 0x92,0xfe,0x1b,0xd0,0x73,0x79,0x0b,0x4e,0xcc,0x2d,0x5c,0xe7,0x80,0x2d,0x21,0x1c,0x97,0xfc,0x2a,0xc9,0x9c,0x07,0x10,0x64,0x8b,0xf7,0xf5,0x1c,0x54,0xb6,0x6c,0x73,0x1c,0x50,0xd3,0x1a,0x2a,0x63,0xcb,0xba,0xd3,0x95,0xe2,0xa6,0xc3,0xca,0x45,0xfd,0x5e,0x1b,0xbb,0x6b,0x4d,0xb3,0xf7,0xfd,0xaa,0xf9,0x73,0xb8,0x74,0x4d,0x36,0x7e +.byte 0xcc,0xaa,0x1e,0xf3,0x20,0x68,0xa5,0x0c,0x03,0xe3,0xbe,0xee,0x82,0x03,0x8d,0x10,0xa6,0xf6,0x6c,0x73,0xc2,0x9d,0x74,0xba,0x57,0x17,0xd7,0xfa,0x85,0xf5,0x1e,0x3d,0xf8,0xc7,0x80,0xef,0xcd,0xf0,0xf4,0x46,0xfc,0x07,0xb5,0xc4,0x5f,0xd2,0x04,0x6a,0x90,0xf5,0x76,0xb6,0xf9,0x73,0x22,0xa6,0x09,0x2f,0xbf,0xb5,0x93,0x9a,0x95,0x05 +.byte 0x95,0xaa,0xf9,0x8c,0x71,0xd6,0xc6,0xd9,0x72,0x50,0xf6,0x58,0x77,0x09,0x47,0x97,0x21,0x42,0xf0,0x30,0x5c,0x3c,0xec,0x60,0x67,0xdf,0x5e,0xd2,0xed,0x0f,0xab,0x25,0x11,0xbb,0xf8,0x34,0x1e,0xbd,0x7f,0xc6,0x52,0x19,0xf5,0x53,0x28,0x46,0x75,0x93,0xce,0xc2,0x0b,0xdf,0xfd,0xa5,0xf1,0xb0,0xa2,0x0b,0x97,0xb5,0x76,0xb4,0x8a,0x2b +.byte 0x82,0x55,0x23,0x29,0xc2,0xd3,0x32,0x94,0x2f,0xf0,0xe6,0x77,0x2c,0xe4,0x6a,0x7f,0xd7,0xee,0x84,0xfb,0xba,0xb8,0x4b,0xae,0x13,0x34,0xbd,0xa8,0x12,0x7a,0x3c,0x28,0x40,0x74,0x5d,0x9a,0x11,0x1a,0xe9,0x74,0x31,0x28,0x3d,0x3d,0x64,0xb7,0x54,0xa0,0x51,0x0d,0xed,0x97,0x94,0x56,0x7a,0x48,0x8e,0x36,0xc9,0xae,0x5f,0xc6,0x79,0x45 +.byte 0x4f,0x07,0xdd,0x13,0x52,0x8b,0xfc,0x3b,0x73,0x44,0x68,0x64,0x51,0x0d,0x95,0x6f,0x0f,0x94,0xba,0xf8,0x40,0x64,0x51,0x43,0x49,0x63,0xc1,0xbd,0xf3,0x39,0x7f,0x6e,0x6f,0x45,0xeb,0xd2,0x33,0x44,0x2d,0x10,0xb4,0x68,0xcb,0xcb,0x8c,0x84,0xc5,0xd4,0x63,0x1d,0x23,0x85,0x30,0x4d,0x6c,0xfc,0xc9,0xa4,0x8c,0xd2,0x42,0x69,0x2f,0x17 +.byte 0x86,0xf0,0x17,0xd0,0xb2,0xaa,0xfd,0x62,0xcb,0xb4,0xfd,0xba,0x29,0xf8,0x85,0x45,0x84,0x9d,0xae,0xf8,0x9c,0x8f,0x64,0xd5,0xb8,0xb6,0xa9,0x64,0xf9,0x39,0x86,0x68,0x29,0xac,0x32,0x87,0x84,0x6c,0xb0,0x09,0xd2,0xdd,0xf2,0xec,0xa1,0x3a,0xfd,0x11,0x37,0x54,0x67,0x29,0x62,0x25,0x62,0xe8,0x6a,0x4b,0x5e,0xde,0x9a,0xf0,0x97,0x73 +.byte 0x66,0x69,0x2a,0x21,0xbe,0x95,0x86,0xca,0xf9,0x17,0xe9,0x4b,0x23,0x83,0x1e,0x8c,0x37,0x47,0x91,0x03,0x3f,0x9f,0xb8,0x60,0x2c,0xdd,0x82,0xbd,0x2a,0xc3,0xe7,0x30,0x8f,0x91,0x2b,0xa4,0x23,0x01,0x03,0xb2,0x8b,0xbd,0xd2,0x1d,0x16,0xf7,0x6a,0x86,0xa8,0xe4,0x54,0x6f,0x9c,0x47,0xa5,0x0f,0xbe,0x94,0x56,0xfa,0x18,0x69,0xbe,0x92 +.byte 0xe9,0xf8,0x24,0x4d,0x65,0x42,0x81,0x1f,0x85,0x52,0xb7,0xc9,0x49,0xde,0xa5,0x4c,0x8f,0x0d,0x5f,0x12,0x68,0x68,0x35,0xce,0x29,0x22,0x5c,0x55,0x3e,0xbd,0xce,0xf2,0x2a,0xec,0x7e,0xe1,0x29,0x0a,0x88,0xf3,0x5e,0xeb,0x27,0xe5,0x52,0xee,0x72,0x37,0xba,0xff,0x82,0x97,0xa9,0x5d,0x77,0x6f,0xb9,0xc3,0xa7,0x73,0xba,0x7f,0x2f,0x7a +.byte 0x19,0x32,0x87,0x56,0xa2,0x89,0xb2,0xb4,0x48,0xbe,0x2e,0x30,0x89,0x0a,0x8f,0x75,0x25,0x25,0x5c,0x46,0xe8,0x02,0x45,0xcb,0x03,0xd1,0xa3,0xeb,0x70,0x71,0x08,0x1c,0x46,0xf1,0x2c,0x43,0xe2,0x44,0x30,0x6a,0x61,0x31,0x45,0x3e,0xbb,0x47,0x33,0x24,0x25,0x13,0xeb,0xf7,0x24,0x66,0x15,0x4c,0xf3,0x07,0x2f,0xff,0xdc,0x37,0x0f,0x71 +.byte 0x85,0xc8,0x56,0xa7,0x2a,0x22,0x87,0x8b,0xae,0x35,0x31,0x29,0x96,0xf0,0x81,0xfb,0x2c,0xbf,0x44,0x69,0x69,0x9a,0x77,0xfd,0xc0,0x2b,0x42,0x16,0x67,0xd6,0xbd,0xd0,0xf1,0xb9,0x40,0x8f,0xd2,0x9a,0x1b,0x2c,0x64,0x78,0x6b,0xda,0x37,0x26,0xae,0x4c,0xee,0x36,0xaf,0x84,0x61,0xe4,0x93,0x22,0x64,0xaf,0xee,0x6d,0x69,0x5c,0xe5,0x85 +.byte 0xd8,0xcc,0xcf,0xf3,0xe8,0x05,0xcd,0xd2,0x09,0x66,0xaf,0xbb,0xc4,0x79,0xb2,0xa7,0xa5,0x09,0xd9,0xf5,0xa2,0x83,0x4f,0xd5,0xf5,0xf3,0x7d,0x7a,0xab,0x94,0x83,0xb3,0x15,0xfb,0x0d,0x1a,0x1d,0x77,0xc5,0x63,0x0b,0x54,0xde,0xa8,0x0d,0xc4,0x16,0xe3,0x89,0xeb,0xa3,0x1b,0xd4,0x77,0x13,0xe3,0x55,0x98,0x15,0xab,0x3b,0x32,0xc8,0xd4 +.byte 0x0c,0x91,0x80,0x57,0xf7,0x1e,0x24,0xd0,0x56,0x78,0x29,0xd2,0x03,0xe7,0xc4,0xd2,0x09,0xca,0xee,0x9b,0x60,0x5f,0xa1,0xfd,0xaa,0x85,0x4b,0x68,0x35,0xa4,0x3b,0xef,0x29,0xb8,0x49,0x85,0xee,0xbb,0x39,0xc0,0xc6,0x99,0x97,0xc6,0x86,0x6c,0x27,0xf9,0x1a,0x19,0x6e,0x7c,0xae,0x75,0x41,0x0d,0x08,0x1e,0xf0,0xb4,0xc3,0x9e,0xdb,0x40 +.byte 0x86,0x94,0x9d,0x90,0x09,0x3f,0xdc,0xb9,0xfc,0x59,0x41,0xc5,0x5b,0x89,0x97,0x49,0x4a,0x1a,0x06,0x68,0x83,0xd8,0x7e,0x09,0x51,0xe1,0x86,0xd8,0x88,0xbe,0x8a,0x36,0x48,0xb3,0x83,0x7b,0x57,0xdd,0x8f,0x18,0x67,0x4a,0x7d,0x68,0xab,0xb9,0x05,0xf0,0xe4,0x27,0x4e,0x33,0x44,0xa7,0x13,0x04,0x94,0xc5,0x57,0xaf,0x36,0x03,0xe8,0x09 +.byte 0x36,0x5b,0xe8,0x92,0xad,0x0a,0x79,0x02,0x24,0x43,0x62,0xc7,0xa5,0xce,0x7c,0xac,0x6d,0x0a,0xf2,0x83,0x33,0x05,0x3b,0x6f,0x9d,0xda,0x96,0x9f,0x8b,0x79,0x3e,0x6c,0xd6,0xba,0x7f,0xea,0x84,0xd8,0x23,0xb6,0x92,0xc3,0x9c,0x7f,0x0d,0xcb,0x7b,0x9f,0xbd,0xc2,0xf5,0x6f,0x71,0x67,0x5f,0x0b,0xd1,0x73,0xb5,0x8c,0x46,0x07,0xcd,0xd8 +.byte 0xee,0x28,0xcf,0x8f,0x8e,0x5c,0xde,0x14,0x78,0xc7,0x60,0xd5,0xf4,0x49,0x97,0x46,0x5f,0x49,0x4a,0xb4,0x8f,0xc9,0xd1,0x52,0x34,0x01,0x29,0xa1,0x46,0x55,0xf8,0x29,0x53,0xbb,0x32,0x1e,0x4b,0x89,0x96,0x53,0x0b,0xf2,0x16,0xf9,0xa7,0x70,0x93,0x59,0x78,0xc0,0x77,0x78,0x9f,0x6c,0xb3,0x0e,0x3f,0x6f,0x40,0x09,0x1d,0xd6,0x66,0x4e +.byte 0xe8,0xb0,0xa1,0x14,0x65,0xc8,0xc7,0x3f,0xd2,0xf0,0x1f,0xfd,0x51,0xe0,0x29,0xd6,0x39,0x26,0x60,0xfe,0x62,0xc2,0xe4,0x45,0x6d,0x01,0xdb,0xd3,0x7c,0xdf,0x48,0x10,0x2f,0xf2,0x8e,0x6c,0xc6,0x58,0xc3,0x7d,0x26,0xb1,0x9d,0x52,0x02,0x2a,0x5f,0x2b,0x57,0xca,0x84,0x9d,0x74,0x31,0x01,0x0f,0xda,0x3d,0x7c,0xbb,0xdc,0x71,0x82,0x8b +.byte 0x42,0xaf,0x49,0x9e,0x2c,0xe8,0xdc,0xa1,0xfb,0x23,0x6d,0xdb,0xdc,0x36,0x01,0xc9,0xb3,0x93,0xd4,0x2e,0x8b,0xd1,0xe4,0xed,0x1b,0xd0,0x4c,0xeb,0xaf,0x96,0x57,0xde,0xee,0x90,0xf4,0xa7,0x58,0x46,0x8a,0xd4,0xa9,0x44,0xe0,0xb3,0x13,0x96,0xb2,0x8a,0xb0,0xd3,0xbe,0x71,0x38,0xb7,0x35,0xa9,0xa8,0x48,0x37,0xa3,0x11,0x0e,0x61,0x36 +.byte 0x6c,0xaf,0x6c,0xf2,0x3f,0xd6,0x55,0xb3,0xa5,0xe0,0xaf,0x18,0x6a,0xf5,0x78,0xb5,0x7c,0xc7,0x48,0x24,0x6c,0xea,0x1e,0x7f,0x52,0xb4,0xe8,0x72,0x46,0xd2,0xbd,0x1c,0x9e,0xe6,0x5b,0x3e,0x9c,0x6c,0x6c,0x6b,0x45,0x0c,0x3a,0xb7,0x67,0x3c,0x8e,0x77,0x77,0xbf,0x50,0xb6,0x30,0x6e,0xe1,0x28,0x0d,0x2a,0x85,0x44,0xf8,0xbb,0xf1,0x14 +.byte 0x89,0xaa,0xc2,0x27,0xf5,0x8e,0xa1,0xd3,0x07,0xba,0xe8,0x03,0xcf,0x27,0x1c,0xa6,0xc4,0x63,0x70,0x40,0xe7,0xca,0x1e,0x05,0xb7,0xb7,0xdc,0xc0,0x07,0x4c,0x0d,0x21,0x12,0x60,0x02,0xe3,0x86,0x65,0xe7,0x1c,0x42,0x86,0xdd,0xdb,0x7f,0x26,0x60,0x01,0x3d,0xd8,0x18,0xcd,0x7a,0x9f,0xf8,0xb2,0xf6,0x6d,0xd3,0xe0,0x57,0x1f,0x80,0x30 +.byte 0x2d,0x5e,0x71,0xdf,0x4d,0x7f,0xcd,0x63,0x77,0x19,0x5e,0x2d,0xd5,0xb5,0xfa,0xa9,0x26,0x02,0xb9,0x62,0x2b,0x57,0x80,0x0a,0xe9,0xbc,0xa4,0x3b,0xa7,0xf1,0xf3,0x77,0x2b,0x6b,0x41,0x5e,0xf7,0xe8,0x66,0x23,0x63,0xac,0xcd,0x58,0xfc,0xa9,0x97,0x6b,0x5a,0x1e,0xe5,0x7d,0xfd,0xb1,0x42,0x7f,0x99,0xdd,0x60,0xaf,0x39,0x46,0x36,0xdd +.byte 0xc2,0x70,0x83,0x53,0xd1,0xc3,0x69,0xc8,0x90,0x0e,0x2b,0x34,0xb2,0x0c,0xb9,0x7a,0xb8,0x6b,0x7c,0xc2,0xf3,0xae,0x41,0x24,0xb8,0x94,0x5f,0xdd,0xce,0xda,0x95,0xda,0x49,0x81,0xb6,0xf8,0xa9,0x8e,0xb3,0x79,0xf8,0x55,0xf9,0xcf,0x8c,0x24,0x99,0xfc,0x6b,0x15,0x0f,0x39,0xac,0xd0,0x3e,0x89,0x9d,0xc2,0x46,0x8c,0x99,0x45,0xfd,0xce +.byte 0x13,0x4c,0x9c,0xc8,0x80,0x87,0x8f,0x7b,0x28,0xe3,0x5e,0x2b,0xe3,0x89,0x7e,0x13,0x52,0x52,0xe9,0x3a,0xed,0x33,0xe7,0x28,0xc7,0x7a,0x48,0x8d,0x0e,0xee,0x24,0xc4,0x61,0x04,0x3c,0xd4,0x7e,0xf3,0x30,0x22,0x07,0x58,0xae,0x02,0xc5,0xd1,0x7d,0x04,0x18,0xca,0xd6,0x04,0xd4,0xc5,0xa4,0xff,0x8d,0x0d,0x68,0xd4,0x1a,0x3a,0x72,0x6f +.byte 0x41,0x1e,0xda,0xc0,0x97,0x7c,0x55,0x2c,0x13,0x20,0x9a,0x07,0x35,0xcc,0xc5,0x83,0xee,0x41,0x77,0x51,0x28,0x07,0xe0,0x81,0xe3,0x9b,0x1f,0xdb,0x73,0x5c,0x8d,0x82,0xa2,0x8b,0xf4,0x92,0x4f,0x70,0xa8,0x6a,0xcf,0xbf,0xcf,0x0b,0x71,0xbc,0xeb,0x81,0xb4,0xc9,0x65,0xe7,0x43,0xef,0x25,0x45,0x27,0xea,0xcd,0x60,0x68,0xcd,0x2d,0x7a +.byte 0xfd,0x88,0x6d,0x06,0xd5,0x92,0x32,0xc3,0x18,0x88,0x64,0xa7,0xde,0x39,0xeb,0x0b,0x5c,0x9c,0xf6,0xf6,0x93,0x90,0x24,0x0c,0x9e,0x0b,0x89,0x1c,0xcb,0xc8,0x96,0x72,0x17,0xae,0x46,0x61,0x69,0x6e,0xbe,0x6c,0xf1,0xa4,0xa4,0x50,0xa9,0x2a,0x47,0xd7,0x80,0xe4,0x72,0xd2,0x3f,0x1a,0xdd,0x82,0xdc,0x12,0x66,0x10,0x26,0x15,0x80,0x56 +.byte 0x4d,0xbe,0x02,0xae,0xe1,0x24,0x8a,0x41,0x52,0xc8,0x5d,0x8d,0x62,0x85,0xbe,0x7c,0x35,0xdd,0x88,0xd3,0xf5,0xf7,0x9b,0xf1,0x5a,0x4e,0x70,0x48,0x31,0x5a,0xaa,0x96,0x1e,0xf8,0x73,0xb4,0x0f,0xb2,0x82,0xf4,0x13,0xac,0xba,0x3b,0x12,0x36,0x1e,0x23,0xbf,0x09,0x8a,0x1c,0x96,0x47,0x56,0x2d,0x16,0x24,0xc3,0x23,0x65,0xe2,0x99,0xd0 +.byte 0xf0,0xa0,0x2c,0x64,0x35,0xad,0x16,0x34,0x67,0x52,0xbc,0x8f,0x17,0x90,0xf9,0xc7,0x4f,0x64,0x6c,0x75,0x3f,0xd7,0x48,0xa4,0x6b,0x43,0xe6,0x2e,0x7a,0xe3,0x79,0xe8,0x47,0x51,0xe9,0x52,0x36,0x30,0xa4,0x24,0x89,0x00,0xd5,0x77,0xbd,0x34,0x2e,0xa9,0x74,0x02,0x25,0xc0,0x0c,0x10,0x31,0xf0,0xa7,0xcb,0x01,0xed,0x43,0x70,0x15,0xe6 +.byte 0xda,0x01,0xb4,0x7a,0x13,0xbc,0xf1,0x57,0x34,0xb1,0xb7,0xb3,0x26,0x18,0x5f,0x42,0x6b,0xcb,0x78,0x25,0x48,0xe9,0xe6,0xe8,0xf5,0x45,0xa2,0x61,0x97,0x10,0xa5,0x7e,0x7a,0x48,0xf3,0x23,0xa5,0x88,0xc0,0xc4,0xc7,0x3b,0x5c,0x0c,0xfc,0xe0,0xf4,0x68,0x64,0xc6,0x9f,0xd9,0x17,0xcb,0xe5,0xba,0x4a,0xa4,0xe0,0x27,0xf8,0x2b,0x4e,0x67 +.byte 0x13,0xab,0xd2,0xce,0xbc,0x8d,0xdf,0x6e,0x49,0xaf,0x72,0x8a,0x51,0xa1,0x78,0x38,0x0a,0x58,0x2e,0x72,0xec,0x94,0x70,0x8d,0xdf,0x0b,0x5a,0x52,0x81,0xb1,0x9b,0xda,0x2c,0xd2,0x85,0xbb,0x8f,0xb0,0x99,0x64,0x24,0xbe,0x03,0xd9,0x92,0x8d,0x29,0xf3,0x41,0x9c,0xd6,0xef,0xef,0xb2,0x5c,0x22,0x90,0xff,0x27,0x4d,0xb3,0x91,0x72,0x9f +.byte 0x42,0xca,0x66,0xc5,0x66,0xb7,0x50,0x3e,0x83,0x6f,0x2d,0xe3,0x7b,0x2a,0xc4,0x5a,0x93,0x92,0x80,0xdb,0x1a,0xdd,0xef,0xfd,0x96,0xcb,0x6a,0xd8,0x4a,0xc5,0x6e,0x36,0x4a,0xe4,0x10,0x15,0xb3,0x12,0xb4,0xd9,0x9e,0x37,0x48,0x96,0xcb,0xe5,0x3a,0x4f,0x57,0xa6,0x46,0x2f,0xd3,0x06,0xb8,0x61,0x1c,0x17,0x3a,0xb8,0xad,0x40,0x50,0x57 +.byte 0x10,0xd9,0xd0,0xe9,0x1b,0xe3,0x18,0x8c,0xc4,0xfa,0x08,0x8d,0x82,0x3c,0x22,0x22,0x1b,0x97,0x64,0xa6,0x8b,0x7c,0x70,0x2b,0xa0,0xd8,0x4c,0x64,0xcf,0xbc,0x49,0x78,0xcb,0x92,0x0f,0xe1,0x60,0x12,0x4e,0x92,0x0d,0xaf,0xa4,0x1f,0xe0,0x2a,0xa5,0x69,0xc6,0xa1,0x91,0x5c,0xdd,0xb8,0xae,0xfa,0xc5,0xb9,0x18,0x31,0x81,0x32,0x6e,0x97 +.byte 0x44,0x2a,0xda,0x58,0xcd,0x9e,0x0d,0x57,0xe0,0xe3,0x5f,0x7b,0x04,0xd8,0xc8,0x68,0xf5,0xa2,0xac,0x0c,0x29,0xf0,0x7e,0xff,0x32,0xfb,0x53,0x1a,0xc2,0xe3,0xae,0xa5,0xe4,0x9c,0x50,0xaf,0xf4,0xde,0x0b,0xdd,0x4d,0xfa,0x65,0x3c,0xbe,0x3c,0xb8,0xda,0x88,0xd9,0x6c,0x55,0x58,0xe1,0x4d,0x00,0xa8,0x1e,0xe2,0x3a,0x9c,0x53,0x9b,0xca +.byte 0xb7,0x5d,0x3a,0x83,0xe0,0xbb,0x95,0xc4,0xd5,0x45,0x48,0xdc,0x12,0xab,0x24,0xfc,0x5d,0x91,0xe1,0xc8,0x0a,0x5c,0x10,0xc4,0xc9,0xaf,0xb6,0x54,0x80,0xfd,0xa0,0x70,0xb9,0xab,0xdf,0x34,0x9f,0x5c,0xff,0xde,0x8e,0xa0,0x0b,0x21,0xcf,0x28,0xc4,0xdf,0x67,0xb5,0xc0,0x20,0x49,0x0c,0x7e,0xe6,0xf7,0x41,0x6b,0x75,0xd9,0x1d,0x3b,0x49 +.byte 0xb7,0x4f,0x01,0xd1,0x20,0x62,0x15,0x1e,0x9f,0x16,0xb0,0xbd,0x30,0x09,0x05,0x00,0x0f,0x25,0x5a,0x37,0xe9,0xa6,0xc6,0xef,0xe5,0x39,0x2b,0xd7,0x6b,0xc5,0x96,0xd2,0xad,0x46,0xaf,0xd3,0xc0,0xfd,0xea,0xff,0x4c,0xaa,0x44,0x48,0x9a,0xdb,0x99,0x44,0x3f,0x4a,0xf0,0x3f,0x81,0x75,0xf2,0x79,0x31,0x3c,0xed,0x56,0xc6,0xf0,0xf1,0x8c +.byte 0xdb,0x1d,0x6c,0x6c,0xcc,0xfb,0xc2,0x30,0xf6,0x24,0x14,0x69,0xc4,0x89,0x4d,0xd0,0x10,0x77,0x37,0x00,0xe8,0xc9,0xf2,0x32,0xf1,0x43,0x8b,0xe1,0x09,0xc4,0x59,0x17,0xf9,0x20,0x2b,0x01,0x76,0x20,0xb8,0x03,0x84,0xf6,0xd7,0x2e,0xef,0x20,0xa6,0xfa,0x8b,0x74,0x7f,0x4a,0x14,0x33,0xad,0xac,0x45,0x66,0x18,0x2b,0x6b,0xd2,0xb8,0x20 +.byte 0x1a,0xff,0xca,0x25,0x69,0xfd,0xba,0x4b,0x5b,0x9c,0x38,0x35,0x4c,0x30,0xa2,0x24,0x3d,0xbb,0xd4,0xf3,0x67,0x24,0xa5,0x93,0xc6,0xf5,0xb2,0xb4,0xa5,0x04,0x53,0xb6,0xe4,0xc7,0xdc,0xf1,0xe5,0x43,0xb7,0x73,0xaa,0xab,0x5c,0xea,0xcb,0xf1,0xeb,0x5b,0x04,0x7a,0xff,0x0f,0x5e,0xb4,0xd3,0x2a,0x39,0x50,0x1b,0x54,0x1f,0x32,0xd7,0x7c +.byte 0xea,0x3f,0xee,0xa5,0xc8,0x46,0x48,0x7e,0x75,0x60,0x7a,0x42,0x42,0xd3,0x15,0x07,0x69,0x46,0x1c,0xe2,0x21,0x31,0x94,0x31,0x24,0x9e,0x39,0xab,0x7a,0xf9,0xc2,0x0b,0x2d,0x6b,0x55,0xa3,0x36,0xb2,0x65,0xf2,0x17,0x08,0xde,0x15,0x83,0x07,0x36,0x12,0x54,0x8f,0x0b,0x23,0xa8,0x7e,0xb5,0x57,0x1c,0x9e,0x29,0xd7,0xd4,0x9b,0xc1,0xf6 +.byte 0x94,0x23,0xf3,0x92,0xbf,0xba,0xc8,0xf5,0x78,0x3e,0x67,0x48,0x14,0x3b,0xd4,0xe9,0x8f,0x78,0xc1,0x4b,0x9a,0x59,0x08,0xaa,0x50,0xf4,0x9d,0xc4,0xc3,0x2c,0xbc,0x56,0x2c,0x13,0x30,0x75,0xfb,0xed,0x48,0xab,0x90,0xec,0x64,0x18,0xb5,0xd5,0xb5,0x7f,0xc1,0x7f,0x83,0xf2,0xdb,0xae,0xde,0xf5,0xb5,0x29,0x03,0xbe,0x80,0xb1,0x5d,0x97 +.byte 0xd3,0x7a,0xa4,0xd0,0xe0,0xce,0x04,0xda,0xaa,0x82,0x19,0xc9,0x02,0xb7,0x1c,0xe1,0x66,0xd9,0x3e,0x86,0x6d,0xb5,0xd1,0x35,0x63,0x8e,0x4b,0xc6,0x58,0x41,0xf9,0xb7,0xba,0xf3,0x06,0x91,0xb7,0xa2,0xfb,0xb5,0x5f,0x53,0xf3,0xe0,0xc1,0xf6,0x91,0x66,0xc7,0x93,0x3a,0x0a,0x72,0xb1,0xed,0x36,0x9d,0xde,0x21,0xdd,0x7d,0x0a,0x7b,0x35 +.byte 0x1f,0xc3,0x56,0xde,0xbb,0xcb,0xb2,0x0a,0xb6,0x84,0xce,0xa1,0xc6,0x1a,0x46,0x2f,0x9f,0x48,0xd5,0x98,0x73,0xa4,0xbd,0xbd,0xa3,0xe9,0xc9,0xc4,0x64,0x89,0xb7,0x9c,0x97,0x7c,0x2f,0x88,0x22,0xe4,0x4b,0x71,0x3d,0x2a,0x47,0xee,0xf8,0xfe,0xe0,0xf7,0x03,0x14,0xe6,0x7c,0x9e,0x57,0xbb,0x8e,0xf5,0xea,0x63,0xfc,0x5b,0x18,0x3b,0xa2 +.byte 0xa1,0x4a,0x28,0x82,0x37,0x77,0x5b,0xc4,0xd3,0xc1,0xf2,0x87,0x13,0x2b,0x2a,0xc8,0xac,0x70,0xe1,0x82,0x38,0x9c,0x12,0xa0,0xc4,0x9e,0x6b,0xac,0x33,0x8a,0xe9,0x31,0x6f,0xa1,0x76,0x94,0x48,0xcf,0xbc,0x78,0x22,0x82,0x6a,0xb0,0xb9,0x49,0x71,0xdb,0xde,0x8b,0x90,0x09,0x82,0x4d,0x79,0x17,0xe8,0xcf,0xd8,0x50,0xc3,0x08,0x07,0x81 +.byte 0x5f,0x9a,0x72,0xce,0x0a,0xe4,0x29,0xc9,0xdd,0x95,0x67,0x58,0xa1,0x14,0xec,0xcf,0x2f,0x29,0xcf,0xce,0xb3,0x35,0x54,0x77,0x67,0x56,0xec,0x95,0x68,0xee,0xbf,0x9c,0x9f,0x74,0x78,0x12,0xd5,0x30,0x83,0x28,0xd5,0x36,0x96,0x57,0xa0,0x8d,0x1c,0x99,0x19,0x04,0xaf,0x25,0xe5,0x71,0x83,0x88,0xb0,0x74,0x38,0xdd,0x8a,0xff,0x39,0x7a +.byte 0xfd,0x34,0x8f,0x9c,0x67,0xa8,0xc8,0x6f,0x13,0x5d,0xf2,0x5b,0x22,0xd3,0x8e,0x63,0x51,0x58,0x9b,0xfc,0xaa,0x89,0x65,0x4e,0x36,0xc4,0xa7,0xef,0x98,0xf9,0xaf,0xcd,0x35,0x8c,0x16,0xbc,0x70,0x4f,0xcd,0x71,0x2a,0xf4,0x13,0xb3,0x3d,0xa3,0x92,0x71,0x45,0xe5,0x9a,0x45,0xbd,0xc5,0x1d,0x82,0x60,0x3a,0x97,0xf3,0x0f,0x96,0x21,0x3d +.byte 0xe5,0x6e,0xfb,0x9d,0x9b,0xeb,0x15,0xc2,0xa6,0x73,0x76,0xf2,0xcd,0xec,0xfd,0x0f,0xf4,0x3f,0x46,0xc9,0x9c,0x73,0xa1,0x21,0x08,0xdc,0x31,0x00,0xaa,0x95,0x07,0xf0,0x3d,0x51,0x57,0xfa,0x6b,0xc3,0x8e,0xe9,0xa4,0x65,0xdc,0xff,0x57,0xb9,0x1f,0x4f,0xc6,0x6d,0x03,0x00,0xa7,0x19,0xb8,0x24,0xb5,0x3d,0x87,0xcb,0x84,0xb7,0xf5,0xfe +.byte 0x51,0x16,0x5b,0xc7,0xed,0x4b,0xff,0xa3,0x66,0x17,0x93,0x60,0x69,0x84,0x8c,0x95,0x74,0xa7,0x30,0x2d,0x09,0xf7,0x4e,0x0e,0x2f,0x99,0xda,0x46,0x34,0x0f,0x93,0x90,0x97,0x4c,0xa6,0x25,0x15,0xb8,0x6f,0x1d,0xd5,0xe1,0xc1,0x39,0x50,0xfd,0xd5,0x79,0x4f,0x04,0x2f,0x76,0x50,0x3f,0x67,0x56,0xad,0x02,0x82,0x30,0x1a,0xaa,0x6e,0xe2 +.byte 0x05,0x6a,0x93,0xb7,0xbe,0xde,0x84,0xce,0xd8,0x53,0xed,0xad,0x95,0xab,0x45,0x1f,0x4c,0x3b,0x22,0x36,0x27,0x45,0x19,0xa4,0x7f,0x12,0x20,0x6c,0x9d,0xeb,0xd2,0xfe,0xd6,0x7d,0x25,0xf9,0xe3,0x64,0x77,0x56,0x89,0x12,0x57,0x80,0xd5,0x40,0xbb,0x2a,0xcc,0xac,0x34,0x8e,0x87,0xfd,0x58,0xc3,0xbd,0x92,0x48,0xd8,0x7f,0xc4,0x39,0x6a +.byte 0x4e,0x1c,0x50,0x93,0xef,0xae,0x81,0x93,0x50,0x95,0x6e,0x46,0x7c,0xf5,0x27,0x44,0x6c,0x21,0x06,0x49,0x89,0x7e,0xf4,0xfa,0x08,0xa5,0xbc,0x0a,0xbd,0xb6,0x7b,0x55,0xac,0x87,0x19,0x33,0xfa,0xab,0xf3,0x15,0xc9,0x1b,0x83,0xf2,0x41,0xf1,0x26,0x6f,0xdf,0x15,0x60,0xdb,0xa6,0x03,0x43,0x3e,0x34,0x7a,0xa9,0xb1,0x38,0x57,0xe4,0x09 +.byte 0x1a,0x4a,0xd8,0x6e,0x28,0xee,0x7d,0x74,0x54,0x03,0xb3,0x29,0x24,0xb3,0xf0,0xc6,0x20,0x7c,0x47,0x01,0x66,0x36,0x7a,0x14,0x18,0x09,0xd6,0xaa,0xa6,0x82,0x5b,0xe4,0x0a,0xf9,0x41,0x52,0x3b,0x56,0xa2,0xf8,0xa2,0xa1,0x2b,0xe0,0x0d,0x1f,0x5b,0xe4,0x0e,0xe1,0x94,0x84,0x6f,0xed,0x2e,0x11,0xfa,0x4a,0xbd,0x41,0xf4,0x3c,0x8c,0x7e +.byte 0x94,0x46,0xec,0x79,0x81,0xb0,0x36,0xfd,0x9c,0x73,0x0f,0x84,0x1a,0x59,0x4e,0x1b,0xd5,0xd1,0x0d,0xff,0xfd,0xb7,0xfb,0x73,0x35,0x8a,0x66,0xed,0xf3,0xee,0x6d,0xf7,0x86,0x0a,0xb9,0xc0,0xf1,0xa3,0xb7,0x32,0x49,0x01,0xe8,0xcd,0xfe,0x82,0x7b,0xf6,0x46,0xd8,0x73,0x47,0x8b,0x7b,0x6e,0x31,0x92,0x0f,0x4b,0x16,0x11,0x86,0x1d,0x02 +.byte 0x5d,0x12,0x79,0x59,0xdc,0x8c,0xaa,0x1b,0xc1,0x75,0x63,0xb2,0xd6,0xbf,0x19,0xb0,0x81,0x70,0x34,0x12,0xd2,0x09,0xbe,0x6d,0xa1,0x31,0x77,0xd2,0x9b,0x59,0xdc,0xcb,0x67,0xb5,0x14,0xcd,0x37,0x31,0x2c,0xa6,0x17,0x58,0x2b,0x24,0xfc,0x2a,0x9e,0x8f,0x38,0x38,0x7a,0x80,0xda,0x8b,0x54,0x1d,0xc9,0x99,0xc7,0x1f,0x98,0x7a,0x1f,0x32 +.byte 0x23,0x1c,0xb5,0x6e,0x53,0xd3,0x61,0xe7,0x78,0x19,0x6c,0xd5,0x2f,0x85,0xde,0xd1,0x67,0x6b,0x9b,0xa1,0x09,0x87,0x5e,0x89,0x5e,0x89,0x21,0x36,0xf2,0x94,0xc1,0xfd,0x6c,0x4e,0xd9,0x6b,0xd2,0xb1,0x1b,0x48,0x37,0x9a,0x7b,0xc9,0x52,0xfd,0xe2,0x6d,0x07,0x19,0xf2,0xa5,0x69,0xdc,0x0b,0x52,0x8f,0xb3,0x87,0x03,0x1a,0xd8,0x43,0x20 +.byte 0x68,0xcf,0x08,0xcc,0xce,0x37,0xf6,0x96,0x7f,0x03,0x62,0xb2,0xce,0x6a,0xfb,0x22,0x54,0xd6,0xfc,0x84,0x5c,0xf5,0x55,0x32,0x36,0x77,0x1d,0x15,0x6a,0x2c,0x3a,0x01,0x34,0xff,0x5b,0x7f,0x3f,0xab,0x97,0x8f,0xbd,0x1d,0x07,0xb9,0x47,0xb1,0xcc,0xc0,0xdf,0x17,0x38,0x54,0x07,0xc0,0x1b,0xb9,0xa2,0x29,0xa6,0x25,0x73,0x32,0x4d,0x5e +.byte 0x51,0x60,0xb3,0x27,0xe5,0xb6,0xdb,0x56,0x81,0x95,0x03,0x7e,0xca,0xc6,0x15,0x8f,0x48,0xd4,0xac,0x71,0x41,0xdc,0x9c,0x86,0x5d,0xd8,0x90,0x90,0x54,0xdd,0x3d,0xf3,0xa8,0xbb,0xe5,0x55,0x69,0x26,0xdf,0xd1,0x8e,0x75,0x2a,0xe4,0xfe,0xe0,0x80,0x1d,0x6b,0xd2,0x8a,0x06,0x49,0x4e,0x60,0xf8,0xbd,0x3d,0x99,0x27,0x80,0x27,0x42,0x66 +.byte 0x01,0x32,0xe1,0x9e,0xa6,0xde,0x7b,0x14,0xa4,0x49,0x68,0x70,0xbe,0xa4,0xe1,0x44,0x2e,0xce,0xa3,0xe9,0x1d,0x7a,0xbd,0xf1,0xe4,0x25,0x11,0x47,0xd8,0xaa,0x32,0x34,0xf8,0xca,0x3d,0xec,0xf3,0x5d,0x8a,0x55,0xe7,0xd4,0x7c,0xfb,0xcf,0xe7,0xa6,0x13,0xaa,0x16,0x5f,0xaa,0x02,0x19,0xdd,0xf1,0xf8,0x5c,0xb2,0x1e,0x68,0x9a,0x21,0x93 +.byte 0xd1,0x38,0x31,0xbb,0x26,0x76,0x44,0xf8,0x84,0x3b,0xf5,0xd1,0x52,0xbe,0x1b,0x8e,0x4d,0xa0,0xb4,0x4a,0x5a,0x7e,0x89,0xe5,0x36,0xb0,0x76,0x77,0xc5,0xc2,0x22,0x73,0xc2,0x19,0x12,0x7f,0xdf,0x9c,0xb8,0xc0,0xf5,0x0e,0xd5,0xa3,0x55,0xae,0x61,0xf8,0xf1,0x6b,0x79,0xc8,0x2e,0xbc,0xa5,0xef,0xd4,0xb1,0x84,0x0c,0x15,0xc4,0xed,0xb3 +.byte 0x18,0x29,0xd6,0x31,0x83,0x79,0x30,0x1a,0x8f,0xf0,0x3b,0xe9,0xd1,0xf2,0x1d,0xec,0xcb,0xe8,0xc5,0x1c,0xb5,0xcb,0x8e,0x01,0xd1,0xb2,0x86,0x43,0x33,0x95,0x70,0x7e,0x75,0xa9,0xa1,0xe7,0xcb,0xd9,0xf4,0xd3,0xe1,0xe2,0xe9,0x46,0x21,0x20,0x3b,0xe9,0x48,0x1c,0x3f,0x93,0x57,0x31,0xeb,0x15,0x9c,0xa7,0xa6,0xcb,0xb5,0xb7,0xa7,0x24 +.byte 0xbe,0x66,0x4c,0x92,0x7c,0xe8,0x8e,0x3f,0x9c,0xa9,0xd7,0xad,0x73,0x68,0x19,0x19,0xd4,0xb5,0x57,0x82,0xdc,0x67,0x3c,0xec,0xac,0x06,0xec,0x86,0x9b,0x65,0xff,0xbb,0xc3,0x90,0x48,0xdb,0x52,0xcc,0xa4,0xf5,0xdf,0x2c,0xc5,0x5a,0xe3,0x30,0xed,0xad,0x37,0x40,0x8c,0xaa,0x32,0x4f,0x94,0x1e,0x14,0x59,0x48,0x1d,0xd3,0xaf,0x80,0xe7 +.byte 0xcf,0x6b,0xa7,0x70,0xe7,0x98,0x22,0x4b,0x40,0x02,0x0c,0x29,0x09,0x0a,0x53,0xf7,0xd4,0xeb,0xbb,0x75,0xb4,0x30,0x1c,0x67,0xea,0xd2,0xb5,0x40,0xfe,0x57,0x2c,0x3c,0x44,0x8d,0x8d,0x02,0x78,0xf0,0x76,0x8f,0x92,0xab,0xb4,0xc9,0xc0,0x2f,0xf5,0xde,0xa7,0x09,0x14,0xf1,0xe5,0x34,0xeb,0x86,0xfa,0xcf,0xcc,0x85,0x1c,0x9c,0xa6,0xe1 +.byte 0x72,0x9e,0xc1,0xe4,0x74,0xc4,0x96,0x5d,0xf4,0x4b,0x23,0x4f,0xa5,0x32,0xff,0x38,0x21,0x8f,0x43,0xe5,0x96,0x20,0x3c,0x78,0xb8,0xb4,0xcd,0x29,0x62,0x84,0x59,0xb5,0xb4,0x57,0x07,0xa8,0x79,0x77,0x21,0xf4,0x82,0xa7,0xb1,0x36,0xee,0x16,0x8e,0xb5,0x9a,0xf7,0x03,0xac,0x64,0x03,0x20,0x48,0x24,0xbc,0xbb,0xec,0x50,0xed,0xa1,0xf3 +.byte 0x67,0xd9,0x34,0xe1,0x0c,0x0b,0xc3,0xd0,0x46,0x0b,0x55,0x85,0x59,0x3c,0xb4,0x7d,0xd0,0xc2,0xe7,0x95,0x24,0x1f,0x53,0x76,0xf1,0x81,0x4a,0x61,0x6a,0x2e,0x3b,0x3f,0x92,0x14,0x7c,0xe0,0x33,0x7f,0xb4,0x85,0x92,0x78,0x0c,0x0b,0xe7,0xbd,0x7a,0x08,0x31,0x7d,0x47,0x3b,0xfa,0xdd,0x90,0x9e,0xf0,0xa9,0xd1,0xa7,0x7c,0x2a,0x37,0xb1 +.byte 0x23,0x71,0x34,0xa0,0x63,0xfb,0x9e,0x8f,0x39,0x00,0xa0,0x09,0xd4,0x1f,0xf4,0xba,0x2d,0xc1,0xac,0x6c,0x94,0x18,0x56,0x3e,0x89,0x92,0x63,0x10,0x5e,0xfe,0x76,0xec,0x4e,0xb6,0x5d,0x59,0xf9,0x94,0x46,0x4f,0xda,0xd5,0x3e,0x6c,0x48,0x49,0x7e,0x7c,0x77,0xe7,0x7e,0x22,0x31,0xb5,0x9d,0x15,0xd3,0x08,0x24,0xdb,0x67,0x98,0x6b,0xfc +.byte 0x45,0x54,0x85,0x29,0x9a,0x47,0xa5,0x60,0xe2,0x46,0x36,0x45,0x16,0x54,0xd6,0xb1,0x5c,0x38,0x45,0xf8,0x43,0x28,0x58,0x81,0xc9,0x57,0x10,0xda,0x3b,0xfc,0x3e,0xe4,0xf4,0xb2,0x16,0xb6,0x16,0x1d,0xa4,0x68,0xa6,0xe0,0x36,0xdb,0xe2,0x19,0x1c,0xce,0x9f,0x94,0xa9,0x94,0xad,0x20,0xcb,0x17,0xd0,0x92,0x37,0x75,0x88,0x0d,0xaf,0xdf +.byte 0x98,0x6d,0x19,0x9e,0x8e,0x61,0xe4,0x8c,0xfc,0x27,0x27,0x6a,0xa7,0xa4,0x66,0x7f,0x08,0x03,0xef,0x5c,0x4a,0xb7,0x89,0xa1,0xae,0xe8,0x70,0x3f,0x13,0x27,0x0a,0x7d,0x5d,0x5e,0x2b,0x69,0xb5,0x98,0x1f,0x25,0x1e,0x41,0xff,0x46,0x5a,0x25,0x1f,0xb4,0x90,0x8e,0x81,0x91,0x19,0x63,0x10,0xd4,0xa9,0xdf,0x3b,0xae,0xe6,0x63,0x1a,0xdc +.byte 0x09,0x5f,0xac,0xaa,0xb8,0x6b,0xbd,0x6a,0x90,0x70,0xce,0x2c,0x63,0x6d,0x48,0x78,0xca,0xc1,0x59,0x94,0xe2,0xc7,0x89,0x17,0x73,0xfa,0x73,0x34,0xb7,0xd3,0x9c,0x4e,0xd8,0xac,0x18,0x80,0x25,0xbf,0xbe,0x75,0x0a,0x9a,0x05,0x5e,0x54,0xcb,0xba,0xab,0xca,0x7f,0x96,0xf7,0x26,0x8c,0x82,0xe0,0x23,0xa5,0x86,0xb5,0xdf,0x31,0xd0,0x2f +.byte 0xe3,0x66,0x96,0x83,0xd2,0x04,0x43,0x8a,0x28,0x59,0x49,0xdc,0x11,0x38,0xd9,0x5f,0xc2,0x31,0xaa,0xa8,0x1a,0xff,0x57,0xf1,0x84,0x18,0x28,0xe8,0x04,0xae,0x98,0xa4,0x17,0xc4,0x35,0x75,0xf5,0x37,0xf5,0x27,0x3e,0x7e,0x32,0xa4,0xcb,0xd4,0x43,0x59,0x02,0x63,0x7b,0x7c,0x9d,0xa7,0x61,0x12,0xf7,0xdc,0x12,0xe0,0x07,0xac,0x96,0xf3 +.byte 0x71,0x43,0xe5,0x30,0xe0,0x4c,0x51,0x2a,0x19,0xf5,0x79,0x59,0x5a,0xc5,0x74,0xfa,0x54,0x18,0xb4,0xb1,0xfb,0x4b,0x9b,0xf8,0xe4,0xa4,0x63,0x25,0xc3,0x84,0xeb,0x2e,0xa1,0xf8,0xf8,0x7b,0x25,0x6a,0x7d,0x14,0x38,0x06,0xeb,0xae,0x9f,0xa5,0x80,0x9a,0x8a,0xb6,0x46,0x95,0xdf,0x52,0x11,0xd4,0x30,0xcc,0x11,0x8f,0x4a,0x5e,0x56,0x26 +.byte 0x60,0x3d,0x5f,0x0b,0x04,0x94,0xcd,0xca,0x1d,0x6b,0x83,0x51,0x83,0x8d,0xf8,0x33,0x4a,0x91,0x00,0xa4,0xf5,0x44,0x5b,0xad,0xa0,0x4a,0x72,0xaf,0xe6,0x4a,0x0d,0x1e,0x9f,0x18,0x6b,0xb4,0xdf,0x85,0x61,0x2a,0x3b,0xe1,0x4c,0xaa,0xc3,0x17,0xef,0x51,0x9f,0xae,0xb5,0xca,0xaa,0x6c,0xd9,0xa1,0xf5,0xa3,0x6f,0x1c,0xca,0xb3,0x37,0xda +.byte 0x27,0xea,0xcb,0xb7,0x36,0xb2,0x11,0xda,0x9f,0x07,0x78,0xaa,0x6c,0xad,0x63,0x9b,0x49,0x6b,0xfe,0x1f,0x93,0x82,0x73,0xc9,0xc8,0xf6,0x68,0x54,0x50,0x77,0xba,0x78,0xc7,0x82,0xee,0xbd,0x97,0x66,0xb9,0x22,0x49,0x0d,0x7a,0x1f,0x0f,0x4e,0xe5,0x02,0x8b,0xa6,0x1b,0x11,0xfc,0xa6,0x37,0x2a,0x5c,0x66,0xaf,0xac,0xa5,0x9f,0xbf,0x26 +.byte 0x98,0x9b,0x25,0x44,0x48,0x09,0xe6,0x76,0xb9,0x08,0xf1,0x37,0xcf,0x86,0xc9,0xdf,0xa8,0xf3,0x88,0x2f,0xc1,0x33,0x15,0x95,0x59,0xf7,0x9b,0xf2,0x48,0x76,0xcb,0xd0,0x31,0xe4,0x27,0x74,0x2d,0x6e,0xd2,0xc3,0x29,0xea,0xef,0xff,0x4e,0x3d,0xda,0x3e,0xef,0x94,0x94,0x40,0xcd,0x93,0xcf,0xb8,0x56,0x29,0xf8,0x20,0x20,0xa3,0x66,0x83 +.byte 0xba,0xc8,0x4f,0xe6,0x22,0x96,0xb5,0xb2,0x44,0x75,0x55,0x98,0xed,0x11,0xd0,0x58,0x50,0x26,0xf1,0x4a,0xf6,0x80,0x5c,0x17,0x92,0xba,0xc2,0xd6,0x68,0xd4,0x7a,0x4f,0xdf,0x16,0x97,0xbd,0xad,0xd7,0x1b,0x0c,0xe5,0x23,0xa9,0xaa,0xf4,0x1c,0x8d,0xec,0xbf,0xf0,0xb5,0xaa,0x49,0xfd,0xf1,0x31,0x9b,0xf9,0xe9,0x21,0xa1,0x20,0xab,0xbe +.byte 0x56,0x8c,0xf2,0x85,0xdc,0x1f,0xea,0x25,0xce,0xf5,0x6c,0x18,0x7d,0xc4,0x1a,0x01,0x08,0x01,0xed,0x02,0xa8,0xac,0x7f,0x74,0x2c,0xd7,0x28,0x25,0x6e,0x68,0x19,0x38,0x8d,0x20,0x51,0x8f,0x38,0x8b,0x03,0x36,0xae,0x50,0x35,0x28,0x65,0x7e,0x15,0x2a,0x80,0x2c,0xae,0xcd,0xb3,0xb6,0x91,0xf1,0x8c,0xf2,0x8c,0xc5,0xce,0x3e,0x3a,0x97 +.byte 0x5a,0xff,0xe1,0x37,0x13,0xf7,0x6b,0x07,0xb2,0xaa,0xaa,0x57,0x18,0xb7,0xb2,0x19,0x52,0xbf,0x59,0x0b,0x6f,0xba,0x56,0x54,0x14,0xac,0x21,0xfd,0x7d,0x03,0x4b,0x0b,0x39,0x54,0xba,0xf9,0xba,0x73,0xcd,0x67,0x13,0x30,0xca,0x19,0x80,0x4f,0x18,0xb4,0x75,0x2a,0xec,0x78,0xa7,0xd0,0x5c,0x53,0xe2,0x43,0x2c,0x08,0x5f,0x5c,0xe6,0x60 +.byte 0xde,0x04,0xf6,0x75,0xca,0x35,0x3b,0xf6,0x68,0x53,0x60,0xc0,0xed,0xb0,0x15,0xa1,0xa4,0x89,0x23,0x34,0x49,0x35,0xd2,0x78,0x4b,0x8f,0x7c,0x8d,0x59,0x22,0x9f,0xad,0x72,0x47,0x5b,0xde,0xf2,0x09,0x08,0xa0,0x8d,0x5f,0x4d,0xc3,0xd1,0x83,0x17,0xbc,0x39,0x8e,0xa5,0x53,0xaa,0xe3,0x31,0x03,0x93,0x14,0xb4,0x57,0xf0,0xdf,0x54,0x1d +.byte 0x79,0x4d,0x21,0x1a,0x8f,0x3f,0x6e,0x07,0x41,0xcc,0x2d,0x94,0x55,0x4e,0x50,0xfd,0xac,0xe3,0xef,0xa7,0x50,0x3b,0x3c,0xda,0x32,0x25,0xee,0xd9,0x01,0x37,0x8e,0xb3,0x23,0xc5,0x5e,0x12,0x88,0x6d,0xd5,0x41,0xfd,0x3f,0xfa,0x75,0xb8,0xcb,0x82,0x10,0x81,0x38,0x1b,0x10,0x2d,0x2c,0x6b,0x62,0xa1,0x7c,0xd1,0x75,0xd8,0x8c,0x0c,0x2f +.byte 0xe8,0x97,0xff,0x18,0xb3,0x12,0xa2,0xef,0x6c,0xc5,0x79,0x9f,0x64,0xf3,0xc7,0xdc,0xdb,0x54,0xa4,0x25,0xc7,0x30,0xfb,0x6c,0x5a,0x50,0x24,0xf9,0xb6,0xc9,0xe7,0xda,0x78,0xcc,0x1b,0x5e,0xf3,0xe7,0x32,0xd8,0x36,0x47,0x10,0xe5,0x2c,0xeb,0xea,0xf7,0x25,0x30,0x93,0x64,0x88,0xc8,0x59,0xf8,0x5c,0x02,0x43,0x4c,0x23,0x8e,0x1c,0x42 +.byte 0xe4,0x36,0x39,0xbf,0xba,0x8b,0xe3,0x53,0x01,0x32,0x0d,0x89,0xc2,0xea,0x35,0x94,0xf1,0x0d,0x29,0x45,0x08,0x07,0x15,0xcb,0xd7,0x3e,0x4d,0x9f,0x04,0xd8,0x18,0x8a,0x56,0xa3,0xb1,0x1c,0x46,0x19,0x8b,0xd0,0x51,0x30,0xf3,0xca,0x52,0x2a,0x16,0xc4,0x90,0xc1,0x00,0x50,0x87,0x8b,0x4c,0x71,0x61,0x48,0x69,0xb2,0xf1,0x33,0xaa,0x79 +.byte 0x81,0x8b,0x36,0x33,0x19,0x41,0x6b,0xc1,0x91,0x40,0xf2,0xcc,0x1d,0x83,0x09,0xab,0xcc,0x6f,0x6c,0x54,0x91,0x62,0x80,0xac,0xe6,0x1f,0xcd,0x5d,0x05,0x2b,0xe5,0xac,0xbc,0xd6,0x1b,0x8b,0xef,0x95,0xa0,0xf3,0xfe,0x8e,0x4d,0x32,0x77,0xe8,0x02,0x8f,0x44,0xad,0xc4,0x40,0xc3,0x99,0x68,0x81,0x47,0x15,0xbd,0x3b,0x8f,0x0b,0x9b,0x3a +.byte 0xb3,0x9d,0x8f,0x3d,0x86,0xd1,0x89,0x5f,0x67,0x19,0x33,0x2d,0x18,0x64,0x0e,0x3a,0x13,0xa4,0xe9,0xb4,0xc9,0x90,0x09,0x6a,0xcb,0x5d,0x0d,0x83,0x13,0x04,0x29,0xe5,0xa5,0xf4,0x00,0x56,0xf4,0x80,0x96,0x33,0x93,0xe4,0x9b,0xc4,0x6e,0x38,0xbf,0x0a,0xe0,0xee,0x8c,0x89,0x5d,0x60,0x36,0x7e,0x69,0xc2,0xc7,0x28,0x6f,0x2b,0x97,0xfb +.byte 0xb3,0x5b,0x82,0xe8,0x9a,0x36,0x44,0xd7,0x1f,0x9b,0x1b,0xd0,0x14,0xe4,0xd4,0x0d,0x35,0xcd,0xee,0x88,0x50,0x37,0x5c,0x88,0x09,0xa5,0x16,0x4d,0xe1,0xbc,0xe8,0x79,0x8f,0xa9,0x18,0xb8,0x43,0xb4,0xd7,0x32,0xcd,0x26,0xdd,0x78,0x29,0x59,0xad,0x29,0xe3,0xe0,0xe7,0xcf,0x16,0x03,0xc6,0x8a,0xb6,0xa2,0x09,0x9a,0x6e,0x90,0x7b,0x0c +.byte 0x9d,0x20,0xb6,0xc4,0x28,0x3f,0x44,0x06,0xa9,0x45,0x72,0x27,0xa7,0x56,0x3f,0x07,0xff,0x13,0xd9,0x80,0xda,0xbd,0x25,0xad,0xd3,0x74,0x2c,0xd8,0xd2,0x93,0xa5,0xda,0xbc,0x5f,0xa5,0xde,0xb7,0x3a,0xf0,0xd2,0x17,0xb1,0xc3,0x70,0x2a,0x85,0xde,0xf0,0x97,0x7b,0x96,0xb2,0x0e,0x45,0x7f,0x63,0xd4,0x94,0xd8,0x78,0x05,0xcf,0xea,0xb3 +.byte 0xfb,0x7a,0x79,0xb5,0x91,0x53,0xb8,0x8c,0xa2,0x03,0xf4,0xc3,0xed,0xf0,0xab,0x33,0x5c,0x6e,0xcd,0xbd,0x73,0xe3,0xe9,0xd0,0x83,0x2a,0x2a,0x68,0x32,0xf1,0x69,0x4f,0xd0,0x8b,0xe8,0xa1,0x7d,0x5b,0x0f,0x69,0xc2,0x33,0xbf,0xc1,0x54,0x29,0x47,0xed,0x9f,0xdb,0x35,0x0a,0x3d,0x2b,0x9d,0x8b,0x91,0xb6,0xe0,0xbc,0x53,0xba,0xb7,0xcd +.byte 0x2c,0xd9,0xeb,0x81,0xa0,0x2e,0x14,0x6e,0xdc,0xe1,0x90,0x36,0x14,0x9d,0xa8,0x8b,0x6b,0x1b,0xac,0x4c,0x09,0x8b,0x1a,0x87,0xf4,0x66,0xf6,0xfb,0x62,0x92,0x13,0xcf,0xb2,0x96,0xf0,0xc9,0x8b,0x12,0x99,0xf1,0x16,0xae,0x5c,0x27,0x24,0xa8,0xfd,0xb3,0x4c,0xc2,0xe6,0x3f,0xd2,0xc6,0x0c,0xf2,0x65,0x4e,0xdf,0xf1,0x06,0xb8,0x99,0xc4 +.byte 0x3a,0x35,0xba,0xed,0x18,0x3e,0xfa,0x03,0x51,0x8d,0x45,0x68,0x12,0x7b,0xb6,0xac,0x63,0x99,0x47,0xee,0x6f,0x8b,0xcb,0xc1,0x0a,0xf9,0x23,0xf0,0x05,0xe1,0x03,0x4a,0xb5,0xe0,0x65,0x71,0xc8,0x64,0x7e,0x0d,0x39,0xe7,0x96,0xdb,0x34,0x63,0x2e,0x1a,0x27,0x85,0x52,0x63,0x8e,0x44,0xfb,0x61,0xca,0x79,0xe5,0x91,0x99,0x83,0x2d,0xe0 +.byte 0x26,0x04,0xad,0x43,0x26,0xf2,0x7e,0x56,0xae,0x35,0x6a,0xfb,0xec,0xc6,0x27,0xe4,0x3a,0xa3,0x6b,0x63,0x72,0xba,0x98,0x03,0x9f,0x2a,0x4c,0xb1,0x33,0x22,0x9d,0x53,0xf6,0x00,0xa3,0x1e,0x32,0xcb,0xbe,0xe0,0xc2,0xf8,0x71,0xcd,0x3f,0xe3,0x4d,0x83,0xf2,0x9f,0x1c,0x91,0x35,0x97,0x52,0x95,0xba,0x24,0x04,0x04,0xca,0x32,0x6d,0xd7 +.byte 0x4b,0xd4,0x9e,0x8b,0x73,0x42,0xfb,0x9f,0xfc,0x93,0xea,0xc2,0x41,0x56,0xa9,0xe5,0xdd,0xd0,0x37,0x8a,0xe2,0x92,0x9f,0x45,0x4f,0xd8,0xef,0xe6,0x6f,0x58,0x41,0x5f,0x7b,0xe7,0x0f,0x32,0xce,0x06,0x02,0x7f,0xe2,0x37,0x87,0xb7,0x35,0x72,0x68,0x87,0xc9,0x35,0xa8,0x51,0xce,0xd8,0xde,0xc3,0x8c,0xb4,0xab,0xf4,0xa7,0x3b,0xcd,0xc8 +.byte 0x0a,0x56,0x5b,0x48,0xb1,0xa4,0x27,0xa8,0x9e,0x3e,0x04,0xbc,0xb3,0x63,0x3e,0xd5,0xf7,0xae,0xec,0x0c,0x6e,0x4a,0x73,0xb6,0xed,0x66,0xea,0xc1,0x7a,0xc4,0xaa,0x21,0x27,0x62,0xef,0x3d,0x1d,0x51,0x8b,0x63,0xe6,0xe2,0x8a,0xed,0x7a,0x4b,0x90,0xc3,0x9f,0x91,0xb4,0x8f,0x78,0x65,0x9c,0xdd,0x0a,0x7a,0x50,0x36,0x33,0x30,0x3b,0xb4 +.byte 0xdf,0x67,0xbd,0xfd,0x71,0xfc,0x40,0x49,0xaa,0x01,0xdf,0x68,0x67,0x73,0x31,0x2c,0x98,0x2f,0x8c,0x9e,0x2d,0xce,0x4a,0x71,0xbc,0x6f,0x90,0x1d,0xc0,0x37,0x07,0x30,0x0c,0xa3,0x04,0xfb,0xd1,0xd0,0x0e,0xcb,0xdc,0x94,0x06,0x7f,0x83,0xe5,0x45,0x47,0xd0,0x71,0x06,0x94,0x23,0x7c,0x03,0x80,0x46,0xa5,0x10,0x08,0xd1,0xdb,0xfb,0x9d +.byte 0xd4,0x05,0x01,0x5e,0x66,0x4d,0xf9,0x32,0x9b,0x5b,0xfe,0x7a,0x60,0x63,0x77,0x9a,0x31,0x34,0xe5,0x9a,0x82,0x2d,0x2b,0xb7,0xe0,0x04,0x8f,0x86,0xf3,0xb2,0x16,0x86,0x50,0x37,0x9d,0x80,0xe7,0x62,0xdf,0x77,0xda,0xf4,0xfc,0xb7,0x42,0x9d,0xac,0xcb,0x11,0xff,0x0c,0x6f,0x4e,0x16,0x0c,0x59,0x04,0x05,0x8f,0x88,0x64,0x37,0xe6,0x6c +.byte 0xee,0x64,0x58,0x79,0x60,0xd4,0x2f,0xb7,0x90,0x59,0xfb,0x82,0x3b,0x20,0x2e,0x2b,0xba,0x15,0xfb,0xf7,0x5b,0x1d,0x81,0x8a,0x8a,0x8f,0xe3,0x39,0x92,0x34,0xfc,0x3a,0x67,0xce,0xb6,0xa0,0x9b,0x56,0x78,0x96,0x4d,0x32,0xbf,0x9c,0x83,0x9e,0x19,0x66,0x20,0x42,0xb2,0x78,0x62,0x42,0xdd,0xdf,0x98,0xab,0x0c,0x3d,0x41,0xb5,0x74,0xc1 +.byte 0x2d,0xf0,0x02,0x58,0x6e,0xb3,0x4d,0x7b,0x41,0x1c,0xf1,0x09,0xc1,0xbb,0x84,0x67,0xf8,0x24,0x77,0x32,0xcd,0x7a,0x63,0x87,0x0d,0xf2,0xc5,0xaf,0xe4,0xb5,0xc6,0x3b,0xad,0x66,0x5e,0xae,0x90,0xc2,0x24,0x27,0x7a,0x0b,0xed,0x1b,0x86,0x5d,0x02,0x19,0x85,0x78,0xc8,0xb1,0xce,0xe7,0xc9,0x5c,0xce,0x43,0x58,0xac,0x1c,0x4e,0xcd,0xb8 +.byte 0x3a,0xb8,0x7a,0xf3,0x79,0x4b,0x97,0xcf,0xbe,0x88,0x24,0xd0,0x9a,0x5a,0x55,0x43,0x0c,0x48,0xa2,0x7f,0xaf,0x4b,0xd8,0x16,0x02,0xfb,0xe6,0x0c,0x6b,0x85,0xb4,0xb8,0x5e,0x40,0x60,0x5d,0x93,0x51,0xc6,0x32,0xb9,0x4a,0x23,0x96,0x71,0xeb,0xe8,0xe8,0x01,0x1e,0x85,0xb0,0x47,0xde,0x86,0x15,0x52,0x3a,0xb2,0xd3,0x86,0x4b,0x78,0x09 +.byte 0x9c,0x6e,0x9d,0xd9,0xef,0xe8,0x64,0x2d,0x2a,0xec,0x21,0x5a,0x60,0xa5,0xe4,0x26,0xbb,0x79,0x0c,0xdb,0x48,0xd6,0x4b,0x5c,0x5b,0xe3,0x34,0xc9,0x96,0xf0,0xcb,0x68,0x8a,0x2d,0xee,0xa3,0x37,0x34,0x5f,0x3e,0x65,0x40,0xce,0xe1,0xc8,0x2e,0x11,0xca,0x42,0x51,0x53,0x72,0x3d,0xa9,0x68,0x54,0xb4,0xd8,0xd7,0x72,0x84,0x8d,0xcd,0x6d +.byte 0x1f,0x0e,0x0c,0x0f,0x32,0x3a,0x7d,0xdd,0xc1,0xd3,0xe7,0x2d,0x1f,0x52,0x8b,0x73,0x86,0x70,0x2a,0xcb,0x71,0x37,0xa1,0xab,0xe3,0x94,0x5a,0xd7,0x9d,0x68,0xc1,0x6e,0x5d,0x72,0x25,0x81,0xe8,0x45,0xad,0x6c,0xf8,0xdb,0x9b,0x70,0x31,0xb9,0xf0,0x4f,0x23,0xd7,0x03,0xc8,0x87,0x43,0x51,0x7a,0x55,0xfe,0x6f,0x2d,0x40,0xbc,0xfe,0xdf +.byte 0xe6,0x21,0x4b,0x4d,0xc6,0x02,0x48,0xe7,0x7a,0x2a,0xef,0x91,0xdf,0xbc,0x98,0x91,0x6f,0x59,0xc4,0x47,0x77,0x2e,0x45,0x45,0x23,0x47,0x5d,0xf8,0x50,0x41,0x84,0x75,0x8a,0xe7,0x4d,0xfb,0xeb,0x58,0x00,0xcf,0x42,0xca,0x02,0x05,0xc7,0xfa,0x11,0xfb,0x6e,0x90,0x7d,0x53,0xa0,0x19,0x23,0x24,0x8f,0x89,0x17,0x40,0xbe,0x11,0xfb,0xd9 +.byte 0x04,0xf8,0x84,0xeb,0x90,0x7c,0x84,0x45,0x9c,0x53,0x45,0x5e,0x45,0x51,0x55,0xfc,0xf1,0x6b,0x02,0x24,0xfd,0x95,0x4a,0x40,0x80,0xdc,0xa6,0x94,0x15,0x2c,0x1d,0x85,0xa0,0x07,0x8d,0xf8,0xf2,0x95,0x0c,0xa0,0x4e,0x5a,0x5b,0x29,0x09,0xcc,0xf3,0x4e,0x8e,0xea,0xe8,0x26,0xb8,0xbe,0xb2,0x6f,0x76,0x6f,0xa4,0xe5,0x6a,0x50,0xcf,0xc8 +.byte 0x7d,0xb6,0x1e,0x9d,0x90,0x6b,0xde,0xe2,0x55,0x49,0x97,0x00,0xa5,0xc5,0x1f,0x1c,0x41,0x66,0xe7,0x6b,0x20,0xb2,0x1e,0xc7,0xb3,0xd4,0xa9,0x75,0xbb,0x83,0x24,0xd0,0xdf,0xbd,0xba,0x2c,0x2f,0xa4,0x03,0x1d,0x17,0xc5,0x74,0xc2,0x6a,0x20,0x71,0x18,0xd1,0xc5,0xb0,0x78,0xfe,0xda,0x55,0xd2,0x43,0x2a,0xd8,0x88,0x74,0x75,0x86,0x07 +.byte 0xe9,0x8b,0x0d,0x0f,0xe5,0x8d,0xe8,0x3d,0xf4,0x93,0xde,0x4c,0x97,0x98,0xe2,0x9b,0x22,0xde,0x13,0x18,0x8b,0xc5,0xe1,0x6f,0x6d,0xb4,0x19,0x46,0xff,0xbd,0xa6,0x2e,0xe6,0x48,0xcd,0x66,0x22,0x7d,0xf4,0x0e,0xeb,0x74,0x25,0x5c,0x90,0x0e,0x26,0xce,0x17,0xe9,0xdb,0x30,0xb9,0x25,0x99,0x96,0x46,0x3a,0x78,0xa3,0x76,0x2d,0x9e,0x42 +.byte 0x06,0x8a,0x1e,0x62,0x46,0xa4,0xd0,0x1d,0xe2,0x4c,0x3c,0xb4,0x4c,0xc0,0xd1,0xf7,0x05,0x5b,0xe4,0xd4,0x71,0x73,0x31,0xfc,0x98,0x2a,0x55,0xb0,0x78,0x92,0x59,0x8b,0x25,0x97,0x15,0xf2,0xf9,0x57,0x8b,0x7c,0xd4,0xc4,0x47,0x2f,0x10,0x3b,0x76,0xde,0x5f,0xb1,0xdf,0xdc,0xb0,0x15,0xd5,0x4a,0xd2,0x54,0xad,0x5e,0x32,0xf4,0x5a,0x1a +.byte 0x8d,0xe8,0xa0,0x4a,0x4e,0x04,0xdc,0xdd,0xd2,0x57,0xe5,0x24,0x4b,0x93,0x51,0xef,0xd4,0xba,0x3f,0x77,0xfc,0x0a,0x5c,0x7d,0x6e,0xa7,0x86,0xe5,0x88,0xd1,0xac,0x74,0x46,0x9a,0x39,0xb6,0x98,0x3d,0xae,0x89,0x4e,0xea,0x8d,0xdc,0xc7,0xb9,0x0c,0xd7,0xa6,0x06,0x4d,0x28,0x2b,0x51,0x2b,0xdb,0x30,0x4a,0x91,0x1c,0x40,0x89,0xe4,0xba +.byte 0x72,0xd5,0xed,0x16,0x66,0xb8,0xef,0x81,0xd9,0x51,0xf8,0x1b,0xff,0xab,0x8b,0x52,0xb8,0xf3,0x11,0xb3,0xe5,0x04,0x5a,0xb0,0x60,0xa3,0x35,0x12,0x6a,0xa0,0x75,0x5c,0x21,0xa9,0x5a,0xe8,0xd3,0xd7,0x8a,0x1f,0xe0,0x9b,0xb7,0x1e,0x7d,0xbe,0x81,0xaa,0x56,0x5a,0xd8,0x2d,0x7e,0x0c,0x60,0xb2,0x68,0x26,0x6d,0xaa,0x8b,0xcc,0x11,0x40 +.byte 0x25,0xea,0xc9,0x94,0xfb,0x3b,0x9b,0xa7,0x3a,0xde,0xd9,0xfe,0x6b,0x4b,0xfc,0x3f,0xbf,0xdd,0x51,0x9b,0xa1,0xca,0x2f,0xed,0x33,0xd8,0x3d,0x92,0xa4,0x1d,0xee,0xb2,0x47,0xd0,0x72,0x6a,0x96,0x33,0x0f,0xdd,0x0a,0xd9,0xbd,0x86,0xdb,0x25,0x53,0x0e,0x3c,0x31,0xad,0x05,0xb9,0x24,0x13,0x00,0xdf,0xc2,0x7c,0x3d,0x03,0x9b,0xf6,0x6d +.byte 0x93,0xd9,0xdf,0x73,0xf8,0x1c,0x98,0xe2,0x77,0x46,0x46,0xdc,0x07,0xe6,0xbb,0xc1,0xa7,0xb6,0xbe,0x21,0x07,0xae,0xdb,0xca,0x69,0x2d,0x8a,0x2b,0x59,0x27,0xe0,0x7c,0xf0,0xf1,0x34,0x69,0x97,0x44,0xba,0xbb,0x48,0x9f,0xd9,0xd8,0x16,0x1a,0xef,0x11,0x68,0xb6,0xaf,0x3a,0x10,0xc6,0x7c,0xd1,0x12,0xc7,0x89,0x47,0xe3,0xd1,0x24,0xc6 +.byte 0x44,0x9f,0x7e,0x6a,0x66,0x43,0x48,0xd6,0x9f,0x7b,0xf0,0x1f,0xd2,0x5f,0x2b,0xa7,0x13,0x6a,0x7c,0x70,0x08,0x38,0xb0,0x00,0xbc,0x7c,0xd3,0x01,0x9b,0xf6,0x29,0xd3,0x9c,0xa4,0x11,0x90,0xe4,0x9f,0x04,0xd6,0x21,0xec,0xfd,0xcb,0xb8,0xe6,0xb6,0x49,0x2b,0xfa,0x4b,0x90,0x9e,0xc6,0x0c,0x87,0xff,0x5e,0x2e,0xcc,0xf8,0x09,0x70,0x52 +.byte 0x42,0xec,0x88,0xac,0x1e,0x76,0x2b,0xeb,0xfc,0xb3,0x65,0x81,0x34,0xb1,0x06,0x90,0xde,0xb2,0xc4,0xd3,0xfd,0xd4,0x9c,0x78,0x1a,0x5c,0x8f,0x65,0x0a,0xbd,0x88,0xe5,0x95,0x06,0xb5,0x94,0xe5,0xbf,0x90,0x31,0xbb,0xcb,0xce,0x19,0x51,0x25,0x4a,0x47,0x35,0x26,0x93,0xdb,0xe2,0x93,0x36,0x47,0x7d,0xdd,0x4e,0xd5,0xeb,0xdd,0x63,0x1c +.byte 0xbc,0x2d,0x75,0xdb,0xd4,0xfa,0x60,0x4b,0x51,0x45,0x32,0x0f,0x01,0xf9,0x73,0x9b,0xd8,0xbc,0xee,0xaa,0x7d,0x2e,0xfe,0xbf,0x9d,0x45,0xae,0xe2,0x01,0xe3,0xbf,0x58,0xdc,0xc0,0xb8,0xe8,0x44,0x16,0x3b,0xd8,0xaa,0x3b,0x13,0xca,0xfb,0x5f,0x8d,0xb3,0x2a,0x83,0x66,0x49,0xae,0x54,0x02,0x4e,0xd8,0x68,0xee,0x21,0x1a,0xbb,0xf4,0xf7 +.byte 0xdf,0xf1,0x51,0x7b,0x62,0xa8,0xb2,0xdc,0x4b,0xd4,0x04,0xd2,0x05,0x49,0xdd,0xa4,0x75,0xe6,0x64,0x82,0xe7,0x25,0x55,0x60,0x2c,0x9f,0x8a,0x7a,0x11,0xe9,0xf2,0x72,0xfe,0x89,0xe1,0xaf,0xca,0x0c,0xb9,0xf5,0xcc,0xcf,0x07,0xef,0x8f,0xbb,0xef,0x53,0x1e,0xe2,0xfb,0x98,0xe8,0x05,0xab,0x4e,0x7e,0x38,0x56,0x24,0xd5,0x74,0x1c,0x95 +.byte 0x1a,0x0e,0x62,0x92,0x80,0x16,0x45,0x78,0x2f,0xb1,0xe1,0x83,0x24,0x2b,0x16,0x5c,0x05,0x52,0x17,0xe9,0xe8,0x9e,0x5d,0x63,0x8f,0x77,0xc4,0x89,0x22,0x76,0x43,0x31,0xfd,0x09,0xc0,0x51,0x70,0x57,0x2d,0x51,0x91,0xe5,0x61,0x3f,0x77,0xff,0x17,0xfc,0xa6,0x19,0x9d,0x82,0x46,0x11,0x0c,0x77,0x19,0x2a,0xf5,0x19,0xb4,0x3d,0xa6,0xd4 +.byte 0x8b,0x07,0x4b,0xc6,0xa3,0x1e,0x8c,0xf5,0xe8,0x2d,0xe7,0xcc,0xa1,0x38,0x57,0x66,0x76,0x1d,0xdd,0xe3,0xb9,0x0a,0x1e,0x2c,0xad,0x09,0x07,0x26,0xff,0x7a,0xc0,0xb0,0x51,0x71,0x44,0x6d,0x2c,0x39,0x3d,0xa6,0x14,0x4e,0x74,0x2c,0x54,0x3d,0xfa,0xdc,0x2e,0x0c,0xc4,0x88,0x32,0xda,0xb0,0x9d,0xf4,0x2c,0x0a,0x1b,0xb7,0xb4,0x78,0x6f +.byte 0x1b,0x6a,0x21,0x03,0x4e,0xe0,0x87,0xa0,0x1c,0xd8,0xe6,0x0c,0x97,0x47,0xde,0x98,0x81,0x3d,0x39,0x93,0x3d,0xcb,0x29,0xa3,0x93,0x8d,0x27,0x5d,0x29,0xb5,0x85,0xc4,0x32,0xd8,0xdc,0x19,0xb1,0x63,0xdc,0x76,0x32,0xc3,0x52,0x9a,0xfd,0x3d,0xff,0xf9,0x94,0x55,0x72,0xbb,0x4d,0xe2,0x42,0xd2,0xf7,0xb2,0xac,0xac,0x5d,0x50,0x95,0xda +.byte 0x3a,0x87,0xb6,0x0f,0x27,0x72,0x34,0xe7,0xe8,0x9f,0xc7,0xba,0xca,0x8d,0xf3,0xb9,0xa1,0xdd,0xd7,0xa5,0x70,0x3b,0xcc,0x72,0x0e,0x9d,0x85,0x75,0x01,0x11,0xe1,0xc2,0xca,0xcb,0x40,0x3a,0x31,0xf2,0x5d,0x0c,0x63,0xc8,0xbf,0x38,0xde,0x09,0x3b,0x32,0xaa,0x6c,0x07,0xd2,0x2b,0x3b,0x94,0x37,0xd0,0xd9,0xe0,0x4c,0x25,0xa3,0x22,0x64 +.byte 0x05,0xcc,0x69,0x9e,0x73,0xd4,0x46,0x2c,0x73,0x23,0xd0,0x6f,0x09,0xff,0x8b,0xef,0x7a,0x08,0x3e,0xa2,0xa7,0x9d,0xf5,0xc9,0x40,0xd1,0x06,0xd6,0xe3,0x89,0xa5,0xcc,0x9f,0x40,0x67,0x80,0x11,0xec,0x5d,0x23,0x19,0xf3,0x66,0xaf,0x06,0xcc,0xe4,0xb6,0x5e,0x20,0xf7,0x19,0xce,0x1a,0xb6,0x86,0x0d,0x39,0x1d,0xc8,0x0a,0xdb,0x50,0x52 +.byte 0x7e,0x3b,0x96,0x9f,0x05,0xdd,0xd8,0xdf,0x40,0xdf,0xe4,0x66,0x14,0x4d,0x4e,0xb3,0x9f,0x86,0x7b,0xc2,0x99,0xc3,0x8f,0xb9,0xe7,0xc3,0x50,0xa4,0xab,0xb8,0x8e,0xc5,0x28,0xce,0x8b,0x51,0xcb,0xad,0xd8,0x1a,0x23,0x7d,0x12,0xc2,0xaf,0x1a,0x93,0x4c,0x57,0xe9,0x59,0x6a,0x03,0x65,0x81,0x07,0x40,0x84,0x92,0x9d,0x22,0x8a,0x3d,0x27 +.byte 0x39,0x05,0xdd,0xf7,0x20,0xad,0xc2,0x03,0x27,0x87,0x8e,0xc1,0x23,0xad,0xe5,0x59,0x16,0xe7,0xde,0xe4,0x44,0x6b,0x06,0xb5,0x1d,0xaf,0xda,0x08,0x4a,0xfa,0x75,0x1a,0x0b,0x35,0xe8,0x6e,0x29,0xd3,0x79,0x19,0x80,0xb9,0x5f,0x36,0xec,0x43,0x25,0x3c,0xbc,0xcf,0x70,0x0c,0xc7,0x2c,0xbc,0x2e,0x72,0x40,0x73,0x98,0x11,0xc9,0x72,0x9f +.byte 0xd9,0x95,0x9f,0x8d,0x4a,0x52,0xbb,0x89,0x30,0x5b,0xa2,0x7e,0x0c,0x21,0x11,0xda,0x4e,0xa1,0x7c,0xc1,0x0f,0x95,0x1b,0x5b,0x2e,0xbd,0xae,0x8a,0x56,0x82,0x8f,0x84,0x43,0xdf,0x24,0xac,0x99,0xaa,0x8a,0xaf,0x82,0x33,0xf7,0x0a,0xbf,0x5e,0xfd,0xf2,0x91,0xf0,0xe1,0x5d,0x4e,0xa5,0x16,0x6e,0xb4,0x39,0x8b,0x99,0x32,0x6b,0xc8,0x16 +.byte 0xc1,0x84,0x10,0xc2,0x74,0x54,0xfc,0x02,0x71,0x44,0xfc,0x52,0xfa,0xc2,0x3c,0x8d,0xf7,0x8b,0x1e,0xcc,0x5e,0x43,0x66,0x29,0x29,0x93,0xe7,0xf6,0x9f,0xa8,0xa3,0x35,0xc9,0xde,0xb0,0xbe,0x4d,0xdf,0x8c,0x61,0x5a,0x6b,0x16,0x88,0x33,0x65,0x47,0x98,0xd2,0xf8,0x71,0x09,0x9f,0x00,0xb6,0x9e,0x21,0x37,0x2a,0x0b,0xb4,0x74,0x6b,0x0e +.byte 0x6e,0x4d,0x14,0x45,0x6c,0x1b,0xa8,0x4c,0xa7,0xc6,0xc3,0x36,0x6e,0x9e,0x63,0x5a,0x36,0x76,0x04,0x06,0x7f,0xdd,0x74,0x24,0x19,0xd8,0xb7,0xbc,0x6c,0x52,0x82,0x67,0x6b,0xd5,0xcb,0x81,0xdf,0xd7,0xe4,0xdd,0x14,0x33,0x71,0xcf,0x6b,0x7f,0xaf,0x66,0x27,0x8a,0x70,0xb8,0x45,0xae,0x8c,0x1a,0x65,0xd3,0x16,0x5c,0x05,0x65,0xd0,0xfb +.byte 0x07,0xe3,0x98,0xa9,0x94,0x27,0x6c,0xac,0xfc,0xee,0x1b,0x35,0x43,0xd6,0x3b,0x41,0x1c,0x86,0xc0,0x4f,0xf3,0x63,0xf4,0xba,0x4d,0xdf,0x6a,0xda,0xcf,0xb5,0x9f,0x69,0x3f,0x3d,0x0c,0x80,0x79,0x02,0x34,0x4a,0x9a,0xfd,0xb6,0xea,0x0b,0x61,0x32,0x67,0x2d,0x6a,0x6b,0xcb,0xcf,0xa6,0xee,0x6a,0x93,0x11,0x00,0xb8,0x6e,0x27,0x88,0x62 +.byte 0xf7,0x4c,0x7b,0xe1,0x13,0xe1,0x47,0xaf,0x96,0x24,0x3b,0x46,0x8c,0xf4,0xbe,0x13,0xed,0x65,0xe1,0xf2,0x36,0x2d,0xa4,0x6d,0x5e,0xa6,0x93,0xfb,0x64,0x0e,0xbd,0x50,0xdc,0x29,0x4f,0x90,0x8e,0xe1,0x7f,0x5e,0x47,0x08,0x9b,0x1c,0xb7,0xce,0x06,0x80,0x52,0xc0,0xb5,0x82,0x77,0x49,0x3c,0xe0,0x70,0x1f,0x84,0x75,0x9e,0x19,0xb2,0x83 +.byte 0xda,0x40,0xf8,0xd7,0x27,0x1e,0xbc,0x39,0xb5,0x1d,0x25,0x75,0x63,0x7d,0x85,0x2f,0x09,0x07,0xe9,0x73,0x8e,0x2b,0xb8,0x9a,0xbe,0xd6,0x90,0x91,0x6e,0xdb,0x7c,0x9d,0x9b,0x43,0x1d,0x21,0x88,0x76,0xb0,0xaa,0x7b,0x68,0xe4,0xa7,0x92,0x64,0xe4,0x1f,0xff,0x53,0x1d,0xf7,0xc0,0x44,0x5c,0x0a,0x1e,0xcd,0xa7,0x6e,0x41,0x1c,0x8c,0x7d +.byte 0x66,0xa7,0xf6,0xfc,0xa9,0x0d,0x3f,0x9c,0xfb,0x15,0x87,0x14,0x20,0x43,0x1b,0x05,0xf5,0xea,0x5c,0x07,0x61,0xb3,0x0e,0x7c,0x52,0x57,0x1c,0x09,0x33,0xb4,0xd8,0x3d,0x9d,0x17,0xee,0x86,0x25,0xdc,0x6b,0xcd,0x58,0xb7,0x18,0xbd,0x85,0x39,0x0b,0xb9,0xb8,0x35,0x3a,0x86,0xbb,0x88,0xb5,0x5e,0x4b,0x0a,0x7e,0x9c,0x02,0xb5,0x45,0xe5 +.byte 0xc7,0x38,0x56,0x1e,0xe4,0xe7,0xf7,0x88,0xac,0x75,0x9a,0x97,0xa8,0x15,0xb6,0x2d,0xcf,0x2a,0x59,0x65,0x0e,0x00,0x9f,0x8e,0xa9,0x94,0x23,0x1c,0x40,0xe4,0xb9,0x6b,0xcf,0xf0,0x53,0x7f,0x98,0xd1,0xa7,0x72,0xd7,0xe3,0x22,0xfd,0x5f,0x3d,0x3f,0xd6,0x21,0xb4,0x84,0x0c,0x1b,0x1d,0x00,0x2d,0x8f,0x72,0x22,0x2d,0x2c,0x8c,0x54,0x46 +.byte 0xe5,0x53,0xca,0x66,0x67,0x5e,0xb3,0x62,0x6f,0xaf,0x33,0x81,0xc1,0xf6,0x77,0x92,0x3e,0xdb,0x74,0x68,0x93,0xca,0x38,0xf8,0x18,0x50,0xef,0xe4,0xc9,0x45,0x40,0xc9,0xf0,0xc5,0x7a,0x4b,0xf2,0xd8,0xca,0x72,0x62,0x5f,0x67,0x10,0x10,0xcc,0xff,0x1a,0xc7,0x9c,0x3a,0x7f,0xca,0x11,0x67,0x3e,0xca,0xa6,0x9c,0x48,0x15,0xaf,0x68,0xb7 +.byte 0x2b,0xa7,0xa2,0x68,0x7b,0x40,0xb2,0xe3,0x27,0x18,0x7e,0x94,0x4c,0xca,0x0e,0x5b,0x3a,0x30,0xcb,0xc3,0x72,0x31,0x6b,0xe6,0x3e,0xa7,0x09,0x3e,0xf2,0x53,0xda,0x7d,0x6f,0x55,0x08,0xd2,0x26,0xc3,0x07,0x52,0x38,0x90,0x04,0xc6,0x3c,0xb6,0xb5,0x2a,0x7b,0x38,0x07,0x9e,0xb4,0xa5,0x48,0x36,0xf5,0x5e,0xac,0xa8,0x97,0x4e,0x37,0xc2 +.byte 0xee,0x12,0x88,0x28,0xd0,0x7d,0xd1,0xae,0xc0,0xc7,0x84,0x69,0x25,0x79,0x9a,0x8a,0x16,0x49,0x50,0x72,0x69,0x1a,0x02,0xc9,0xfe,0xd5,0x2c,0x40,0xc6,0xc8,0x8b,0x7d,0xe3,0xab,0x89,0xe3,0x78,0xf1,0xe9,0xbd,0x3c,0xbd,0x02,0x96,0xfe,0x0c,0x5c,0xc4,0x9e,0x89,0x3a,0x4b,0xe9,0xcd,0x41,0x1c,0x59,0x71,0x52,0xb0,0xc9,0x36,0xf1,0x80 +.byte 0xab,0x5e,0xbc,0xf1,0x20,0x99,0xc0,0xab,0x0c,0x59,0x43,0xc2,0xcd,0x09,0xa6,0x30,0x91,0xfa,0x12,0x23,0xbe,0x18,0x24,0xa6,0xbf,0x55,0x4c,0xe8,0x22,0xff,0x01,0xbd,0xde,0x2c,0x72,0x3c,0x0a,0x36,0xd5,0x7e,0xed,0x6a,0xe3,0x63,0x14,0x60,0xa3,0x0a,0x6f,0x04,0x90,0x64,0xc1,0xd1,0x78,0x54,0xae,0x19,0x74,0xe2,0xea,0xec,0x86,0x22 +.byte 0xc7,0xdb,0xf6,0x48,0x0e,0x75,0x43,0x04,0xf7,0x62,0xe6,0xa9,0x46,0x65,0xcc,0xa5,0xa4,0x1a,0xb2,0x94,0x7b,0x7a,0x8c,0x9a,0x80,0x62,0x32,0x17,0x80,0xc3,0xc6,0x54,0x0e,0x4e,0xe3,0x46,0x74,0xa8,0xae,0xcd,0xd0,0xc1,0x19,0x84,0x61,0xb4,0x1d,0x18,0x4d,0x80,0xf1,0x70,0x40,0xbe,0xa2,0xa3,0x38,0xcc,0x21,0x1c,0x2f,0x72,0x85,0x72 +.byte 0x0a,0xa1,0x0d,0xa3,0xdc,0xa2,0xf4,0x64,0x84,0x3c,0x43,0x6d,0xfb,0x45,0x11,0xf9,0x40,0xdc,0x25,0x85,0x80,0x41,0x84,0xa7,0x06,0x2e,0x79,0xbf,0x0c,0xa7,0x8f,0x17,0xea,0xa2,0xc4,0x6f,0xd8,0xc6,0x9e,0xab,0xdc,0x45,0x6f,0xaa,0xda,0xe9,0xe6,0x84,0xf0,0x5f,0x8a,0x90,0x99,0x33,0x9b,0xcf,0x03,0xe6,0xce,0x19,0x0c,0xad,0x2f,0xad +.byte 0x81,0xb8,0x17,0xff,0x6b,0xff,0xc8,0x14,0xa6,0xf4,0x37,0x55,0xdc,0xbb,0x09,0x3c,0x3c,0xe7,0x29,0x95,0x23,0x5c,0x58,0x92,0x2e,0x95,0xe8,0x3b,0x8b,0x81,0x2d,0xfd,0x58,0x8a,0x1f,0xdf,0xf1,0x54,0xa3,0xd0,0x01,0xaa,0x3d,0x32,0x61,0xe5,0x8e,0x62,0xa7,0xf6,0x3b,0x2d,0x0e,0xff,0xf4,0xe9,0x08,0xe7,0xef,0x3a,0x63,0x10,0x34,0x49 +.byte 0x14,0xe1,0x88,0xd0,0xb2,0x1d,0xb7,0x31,0xc9,0xa4,0x48,0xa8,0xaf,0x64,0x29,0xab,0x1f,0x14,0x13,0xa7,0xb8,0xb8,0xa4,0x24,0x1d,0xf9,0xb6,0x3e,0x62,0xa6,0x5e,0x10,0xcb,0x44,0x5c,0x9d,0x2c,0x58,0x3a,0x36,0xa3,0x81,0x9f,0xa9,0xa4,0xa1,0x06,0x1d,0xbf,0x97,0x03,0x88,0xf2,0xf4,0x81,0x3e,0x1b,0x35,0xea,0xd0,0xb6,0x96,0xa1,0xf7 +.byte 0x1e,0x49,0xb7,0xe8,0x23,0x6f,0x05,0x7c,0x9f,0xc4,0x53,0xb1,0x63,0xdc,0x07,0xbb,0xd6,0x57,0x85,0x4d,0x77,0x33,0x21,0xbf,0x77,0xfe,0xfe,0x34,0x52,0x02,0xe7,0xe4,0x87,0x11,0xa0,0xfd,0x11,0x4a,0x34,0x36,0x88,0x69,0xdf,0x77,0xfd,0x83,0x71,0xa8,0x68,0xed,0x49,0x39,0xb4,0x06,0x32,0x48,0xf1,0xd2,0x4e,0x61,0x47,0x65,0x26,0x87 +.byte 0xba,0x2b,0x2e,0xf4,0x12,0xfc,0xd0,0x84,0x81,0xa1,0x59,0xdc,0xe3,0x13,0x51,0x9e,0xea,0x57,0x56,0x3b,0x7c,0x71,0x6b,0xff,0xe9,0xf8,0xec,0x3e,0xe7,0xbe,0x65,0x47,0xe1,0x6f,0x8f,0x7c,0x3a,0x77,0xdb,0x75,0x4a,0x43,0x43,0x39,0x37,0xb2,0x68,0x16,0x72,0xdb,0x49,0xf7,0x13,0x3c,0x09,0x93,0xef,0xc1,0x2a,0x99,0xff,0xc7,0xdb,0xd9 +.byte 0x80,0xd2,0xfe,0x7c,0x39,0x50,0x21,0xdc,0x1d,0xae,0x9b,0xfc,0xd4,0x5f,0x56,0xae,0x6a,0xd9,0x35,0xa1,0x2b,0xd6,0x53,0x90,0xe8,0x8c,0x31,0x73,0x0f,0xa3,0x9e,0xa1,0x2f,0x76,0xa8,0x72,0x4d,0x5e,0x58,0xca,0x9f,0x8f,0xdf,0xf0,0xf9,0x6a,0x54,0xb1,0x5f,0x39,0x03,0x7a,0x26,0x06,0x71,0x74,0x6f,0x42,0xee,0x63,0x76,0x13,0xb9,0xed +.byte 0x74,0xad,0xf9,0xe0,0xa7,0x35,0x9c,0x18,0xe0,0xf7,0xc5,0xb2,0x27,0x14,0x0f,0xd7,0xaa,0x17,0x1c,0x8f,0x50,0xc8,0xb0,0xc2,0x63,0xff,0x38,0x65,0x87,0x69,0xb3,0xd5,0x3f,0xb4,0xf2,0xe8,0x8b,0x7b,0x24,0xdc,0x1f,0x62,0x2f,0x0a,0xd7,0x2d,0x0f,0x6f,0x48,0x1d,0xf0,0x3c,0xb1,0xb4,0x10,0x8d,0xc6,0x5c,0x79,0x30,0xde,0x20,0x9e,0x7b +.byte 0xf1,0xa5,0x73,0x38,0x05,0x1b,0x13,0x78,0xb1,0x02,0x2f,0x32,0x2a,0x07,0x59,0xa4,0xfc,0x88,0x08,0x0c,0xff,0x42,0x72,0x6a,0xb0,0x8a,0xc9,0x3d,0xdb,0x04,0x90,0xdd,0x0b,0xbc,0x3a,0x4e,0xfa,0xd4,0x57,0xd8,0x2f,0x7b,0xcb,0xd9,0x6a,0xe7,0xfd,0x32,0x17,0x99,0x20,0x64,0x1e,0x76,0x07,0xb9,0xa3,0x58,0x7f,0x79,0xda,0x0c,0xe0,0xec +.byte 0x30,0xbf,0xa4,0x85,0x0a,0x39,0xc0,0xe9,0xf7,0xbe,0xd1,0xa7,0x94,0x1f,0xa6,0x6d,0xe8,0xc5,0x1b,0x04,0x27,0xf4,0xdc,0xc2,0x4d,0x9a,0x0e,0x9b,0xe8,0xec,0x56,0x99,0x90,0x5f,0x8b,0x28,0x0a,0x92,0xaf,0x0b,0xa1,0xd2,0x85,0x86,0x26,0xc7,0x8a,0x01,0xa4,0x08,0x29,0x32,0x7d,0x3d,0xa5,0x74,0x9c,0x90,0x63,0x83,0x1f,0xd4,0xee,0x98 +.byte 0xf5,0x14,0xff,0x39,0xeb,0xbf,0x40,0xa4,0xc9,0x70,0x4f,0x81,0x03,0x19,0xef,0xf5,0xdf,0xf7,0x00,0x75,0xcb,0x2e,0x81,0x41,0xc5,0xda,0xfb,0x67,0x6a,0xf0,0xa3,0xd3,0x5a,0x60,0xaf,0x72,0x27,0x3e,0xad,0x37,0x3e,0x3d,0xe6,0x85,0x4c,0xa1,0xb0,0xe9,0xab,0xc5,0xd3,0x8b,0x04,0x0d,0x64,0x7f,0xa2,0xb9,0x6d,0x6d,0x28,0xf8,0x4b,0x43 +.byte 0x78,0x51,0xf4,0x84,0xf1,0x3c,0x67,0xd8,0xdd,0xd7,0x0b,0x67,0xc3,0xd9,0x95,0x7b,0xfc,0x7d,0xc4,0x33,0x05,0x90,0xec,0x0a,0x98,0xfb,0x6b,0x0d,0xe9,0x8c,0x74,0x94,0x20,0xf8,0xcb,0xca,0xb6,0x72,0x07,0x7c,0xef,0xfa,0xd0,0x3f,0x51,0xc5,0x6e,0xf8,0x3f,0x37,0xe3,0xfe,0xb9,0x9a,0x9c,0xb3,0xf6,0x96,0x4e,0x65,0x77,0x21,0xcf,0xaf +.byte 0xe7,0x20,0x06,0xc2,0x93,0xc5,0x2e,0xc0,0x7f,0xe5,0x0a,0x42,0xad,0x89,0x64,0x6e,0x95,0xbf,0x95,0x1d,0x24,0x47,0xf8,0xd5,0xec,0x7c,0x1f,0x98,0x67,0x9c,0x5f,0x6e,0xaf,0x74,0x95,0x65,0x4c,0xb6,0xe0,0xd3,0xb7,0x5b,0xc7,0x76,0xe6,0x87,0x19,0xf5,0xc7,0xb0,0x2d,0xe0,0x8b,0xaf,0x6d,0x3c,0x31,0x6e,0x84,0xc8,0x86,0x51,0xff,0x29 +.byte 0x2a,0x1f,0xea,0xd4,0x2d,0x1a,0x8f,0x04,0xb4,0xc0,0x6a,0x93,0xc2,0xc5,0xe7,0x98,0x8c,0xc7,0xff,0xbf,0xb8,0x8e,0x5b,0x29,0x5b,0xa6,0x87,0xc7,0x02,0x88,0x51,0x29,0x66,0xd8,0xf3,0x68,0x38,0xd4,0xa6,0xbd,0xa2,0x5c,0x1b,0xb7,0x13,0xd7,0x64,0xed,0x68,0x21,0x88,0x2b,0x59,0xba,0x95,0x84,0xda,0xce,0x61,0x3b,0x51,0x04,0x3e,0xc2 +.byte 0xdd,0xec,0x0c,0x6b,0xbe,0x35,0x51,0x63,0x29,0x40,0xcb,0xa5,0x62,0xe4,0x27,0x35,0x15,0x1f,0x7c,0x8b,0xe5,0xd0,0x2e,0xde,0x8c,0x3d,0xa0,0xd2,0xbe,0x51,0x3d,0x65,0xed,0x94,0x8b,0x8c,0x00,0xda,0x0e,0x78,0x4d,0x25,0xef,0x8e,0x3c,0x55,0x77,0xeb,0x58,0x06,0x7d,0xd1,0xfc,0x73,0xad,0x76,0x0a,0x81,0xbe,0xda,0x50,0x30,0xf3,0xfd +.byte 0x58,0x25,0x0a,0x4b,0x1b,0x1e,0x0b,0xd0,0x9b,0xbc,0xb9,0x31,0x26,0xbc,0x4c,0x7b,0x05,0xd7,0x5c,0xe4,0x7a,0xdd,0xff,0x04,0xac,0x5d,0xcb,0xfd,0x91,0x34,0x68,0x26,0x1e,0xb4,0x86,0xcc,0xe3,0x90,0xaf,0x6a,0x65,0xda,0x6b,0x3e,0xec,0x44,0x90,0x72,0x7a,0x34,0xfc,0x7b,0x65,0x83,0x34,0x93,0xbc,0x85,0x50,0xdf,0x03,0x89,0x35,0xb8 +.byte 0x6a,0x39,0xd3,0xb6,0x38,0x66,0x5b,0xa7,0x9e,0x93,0xa2,0x3b,0xb6,0xe7,0xee,0x1e,0x5c,0xd6,0xa8,0xd9,0x1f,0xf7,0xd1,0x0a,0x2f,0x87,0x63,0xf4,0xf9,0x8c,0xd4,0x7c,0x02,0xaf,0x7e,0xb6,0xc7,0xfc,0xc9,0x4d,0x35,0x0c,0x8c,0x3c,0x13,0x9d,0xe6,0xd7,0x2e,0x4b,0x91,0xcc,0x88,0xdb,0xfc,0x68,0x3a,0xd1,0x15,0x07,0x16,0x66,0x11,0x9b +.byte 0x66,0x9f,0x3f,0x37,0xae,0x11,0xba,0x5f,0xc7,0x3a,0x1a,0x49,0xbc,0x14,0x21,0x75,0xdc,0xcc,0xbb,0x5c,0xed,0xdc,0x8b,0x21,0x9a,0x8f,0x5f,0x91,0x6a,0x9b,0x26,0x33,0x64,0x45,0xa0,0xdf,0xc4,0xa1,0x32,0xc4,0x4c,0xc2,0x42,0x1b,0x59,0x37,0x1f,0xdb,0x01,0x6d,0xed,0xd8,0x05,0x5b,0x90,0x59,0x32,0x45,0x50,0x5d,0xf1,0x34,0xc4,0xb7 +.byte 0x52,0x97,0xbb,0x42,0x12,0xf1,0xa5,0x76,0xe4,0x1a,0xbc,0x4a,0x64,0xd3,0x08,0xac,0xe1,0x49,0x70,0x61,0xc8,0xcf,0xb1,0xd3,0xc4,0x7f,0x38,0x31,0x6b,0xd3,0xe1,0xe1,0xe9,0x5b,0xaa,0x7a,0xec,0x26,0x81,0x44,0xd3,0xb9,0x63,0xea,0x37,0x98,0x15,0x41,0xf1,0xa1,0x72,0x87,0xcc,0x3b,0x6a,0x27,0x9b,0x85,0xa8,0x7b,0xb6,0x25,0xf9,0xd4 +.byte 0x84,0x3e,0x66,0x12,0xce,0x24,0xee,0x22,0x51,0x73,0x7e,0xba,0x1e,0x95,0x64,0xc5,0xbf,0x4e,0x4f,0x73,0xc1,0xc3,0x98,0xb9,0x6b,0x90,0x1f,0x39,0xfc,0x03,0x55,0x76,0x8c,0x57,0xea,0xe8,0xc1,0x25,0x09,0x69,0xc0,0xe8,0x54,0x91,0xc1,0x7c,0x52,0x8e,0x82,0x6d,0xf2,0x0e,0x3f,0xa9,0x98,0x04,0x40,0xda,0x1c,0xc0,0xbb,0x42,0xf0,0x7d +.byte 0xed,0x78,0xb0,0x4f,0x94,0xba,0x0d,0xbf,0x60,0xbe,0x09,0x67,0x42,0xc5,0x41,0x4c,0x80,0x8d,0x30,0x10,0xa9,0xd2,0x07,0x8c,0xa8,0x40,0xc6,0xe2,0x08,0x42,0x7f,0x99,0xad,0xc5,0x66,0x1f,0xfd,0xd2,0xc5,0x79,0x77,0x9b,0x60,0x7d,0x25,0x2d,0x69,0x14,0x94,0xa5,0xf0,0x0a,0x14,0xb6,0xf9,0xbe,0x3a,0x4a,0x3d,0xc6,0x45,0x2e,0x27,0x4a +.byte 0xd1,0x1d,0xcf,0x08,0xee,0x93,0x3c,0xb5,0x8a,0xee,0xdd,0xf3,0x33,0xa6,0x35,0x9d,0xd8,0xb4,0x68,0xc5,0x98,0x09,0x78,0xcc,0xb3,0xeb,0x0f,0xcd,0x25,0xf8,0x17,0x9c,0x45,0x77,0xc7,0x06,0x40,0x44,0x90,0xec,0x6a,0xd9,0xf5,0x05,0xd4,0x88,0x17,0x47,0xeb,0x29,0x85,0x32,0x76,0x7b,0xa4,0xe3,0x65,0x30,0x50,0x9a,0x99,0x26,0x91,0x60 +.byte 0xb0,0xb8,0xe5,0x8d,0x35,0x9e,0x9a,0x13,0x65,0x82,0xb2,0x4b,0xf1,0xed,0x1f,0xb7,0xb4,0xc0,0x03,0xe6,0x1d,0x2b,0xaa,0x1e,0x01,0x92,0x0b,0xcb,0x34,0x77,0x80,0x94,0xc2,0x4e,0x3b,0x73,0xd8,0x2e,0xd8,0x95,0x33,0x05,0x65,0xa2,0x99,0x29,0x7a,0xd1,0xb3,0xed,0x5a,0x8d,0x4d,0x6a,0x6d,0x69,0x2b,0x5a,0xa1,0x3a,0xc0,0x81,0x96,0xf1 +.byte 0xc2,0xa7,0x4e,0x07,0x90,0x04,0x99,0x70,0xea,0x1a,0x3a,0x26,0xb5,0xed,0x92,0xbd,0x57,0x80,0x11,0x06,0xf2,0xb4,0x05,0x69,0x7a,0xbf,0x27,0xa1,0xbd,0xdb,0x09,0xe5,0xb3,0x2d,0x86,0x41,0xcc,0x5d,0x68,0x37,0x9e,0x98,0xa5,0x4a,0x20,0x8a,0x5f,0x54,0xae,0x4f,0x73,0xd0,0x22,0x18,0x8d,0x2b,0x91,0xcb,0xbb,0x83,0x1e,0x04,0x93,0xc8 +.byte 0xc3,0x89,0x35,0xfd,0xda,0xeb,0x52,0x53,0x9f,0xdc,0x33,0xf0,0xe0,0x99,0x19,0x11,0xeb,0x55,0xd3,0x3c,0x5f,0xca,0x29,0x52,0xe7,0x6b,0xd1,0xad,0xeb,0xed,0x8e,0x68,0x82,0x91,0x85,0x81,0x68,0x70,0x78,0x61,0x1e,0x0c,0x09,0x3a,0x82,0xdc,0xdb,0x26,0x66,0x1c,0xa3,0x80,0x99,0x23,0x8a,0x45,0xd7,0xb8,0x10,0x97,0x80,0x70,0x49,0x78 +.byte 0xa9,0x4c,0xf0,0xec,0xcc,0x05,0xd0,0x6a,0x6a,0x1a,0xa0,0xf7,0xde,0x78,0xc6,0x42,0xbe,0xbd,0xa0,0x24,0x1d,0x3f,0xdd,0xfb,0x92,0xc2,0xbd,0xd6,0x5c,0x25,0x74,0x3d,0x2b,0xb8,0x60,0x67,0xdb,0x70,0x1e,0xe8,0x9f,0xcd,0xb4,0x82,0x90,0x9e,0x2a,0x94,0xa5,0xa2,0xd4,0xd2,0x24,0xa7,0xca,0xbf,0xe1,0x8b,0xab,0xf3,0xd2,0x7c,0xa6,0xc8 +.byte 0xe6,0xaf,0xef,0xe3,0x86,0xb1,0x42,0x1d,0xc6,0xa2,0x37,0x9b,0x26,0x46,0x0b,0xfd,0xee,0x88,0xa4,0xf1,0xa8,0x72,0xaf,0xda,0x30,0x56,0x22,0xd3,0x1b,0x31,0x76,0xd7,0x03,0xef,0xf3,0x98,0x16,0x4d,0x36,0x57,0x1b,0xd5,0x90,0xb8,0x67,0x50,0x7f,0x22,0xa8,0xdc,0x9c,0xf1,0x6e,0xa4,0x65,0x45,0xf0,0x73,0xd8,0x7e,0x41,0xb0,0x68,0x52 +.byte 0x00,0x0a,0xda,0x99,0x6c,0x84,0xce,0xf0,0x73,0x65,0x93,0x52,0xc8,0x4b,0xb4,0x72,0xda,0x2c,0xa1,0x47,0xb5,0xe3,0x00,0x63,0xc0,0x4e,0x84,0x16,0x00,0xe6,0x1f,0xbd,0xba,0x49,0xcb,0xd3,0x7d,0xd2,0xeb,0x4a,0xb2,0xd5,0xb2,0x53,0x96,0xfb,0x04,0x73,0xc0,0x09,0x31,0xf3,0xf2,0xc0,0xd3,0xa6,0xe1,0xea,0xe1,0x58,0xbe,0x90,0xc9,0xfb +.byte 0x6e,0x13,0x69,0xbe,0x17,0xd4,0x16,0x5b,0xcb,0xf4,0x93,0x0a,0x38,0x46,0xea,0x64,0xad,0xb0,0x0d,0xc0,0x3b,0xfc,0xe3,0xd4,0x20,0x75,0x0c,0x3e,0x71,0x1b,0x5f,0xde,0xff,0xd6,0xfa,0x6f,0xe4,0x10,0xb0,0x14,0x05,0xaa,0x05,0x70,0x5e,0xbd,0x58,0x9f,0x3c,0x9d,0x4f,0xa7,0x5a,0x65,0x57,0x02,0x05,0x44,0xe0,0x95,0x9d,0xa2,0x60,0x06 +.byte 0xcb,0xfd,0x91,0x8e,0x7f,0xce,0xa1,0x80,0x94,0xbb,0x88,0xf2,0xa6,0xe7,0x83,0xf9,0x38,0x8f,0x09,0x8e,0xe4,0xa9,0xc2,0xc7,0x84,0x9d,0x25,0x09,0x52,0x8b,0x32,0xaa,0x3b,0xde,0xb6,0x82,0x9f,0x6d,0xc4,0xdf,0x11,0xf7,0x72,0x1a,0xe4,0x00,0x51,0x41,0x01,0xba,0x21,0xea,0x0a,0xda,0xf2,0xbb,0x66,0xae,0x51,0x2b,0xb0,0x6d,0x1d,0xe8 +.byte 0x4b,0x1e,0x42,0x68,0x3a,0xed,0xe6,0x59,0x13,0x42,0x07,0x54,0xae,0x2e,0x15,0x93,0xd7,0xff,0xad,0x49,0x09,0x41,0x52,0x6b,0x3b,0x9c,0x41,0x43,0x0d,0xed,0xed,0x6f,0xb8,0xe9,0x0d,0xcc,0xde,0x0d,0xaa,0x91,0xef,0x89,0x2f,0x2d,0x94,0xd0,0x03,0x2b,0x51,0x7f,0x85,0x9b,0x7b,0x08,0xc8,0xb6,0xe2,0x82,0x22,0xa9,0x57,0x71,0xf2,0xae +.byte 0x08,0xfa,0x6c,0xd8,0xca,0x78,0x42,0x98,0x23,0xfd,0x38,0x4b,0x6c,0xd3,0x9f,0xc6,0xa3,0xb2,0xc1,0x8c,0x4a,0xa3,0xcd,0x9f,0x56,0xe7,0xc2,0x06,0xd7,0xc5,0xc2,0xd9,0x98,0x57,0xc8,0x5a,0xaa,0xf4,0xaa,0x44,0x02,0x83,0x11,0x1e,0xf6,0x64,0x8d,0xf7,0x3b,0x86,0x3c,0x04,0x53,0x5f,0x62,0xc8,0x7a,0x0e,0x1c,0x4f,0xa8,0xe3,0x5c,0xe8 +.byte 0x64,0xf7,0xe3,0x5d,0xea,0xb5,0x2d,0xdb,0x7b,0x0e,0xdb,0x91,0x34,0xd5,0x87,0x4f,0xe6,0x73,0xee,0x3d,0x79,0x7c,0x67,0x48,0xb5,0xbb,0x42,0x96,0x0d,0x9d,0xbd,0x68,0x98,0xe5,0x59,0x51,0x16,0x45,0x15,0xac,0x80,0x41,0xae,0x45,0xdb,0xe4,0x2a,0x44,0x0d,0xe4,0x25,0xc7,0xd3,0x06,0xf7,0x98,0x15,0xe1,0xc5,0x9b,0x34,0x0e,0x87,0xb8 +.byte 0x90,0x1b,0x24,0x84,0x06,0x24,0xb0,0x80,0xbe,0x03,0xa0,0x95,0x10,0x1e,0x72,0xde,0x0f,0xd4,0x15,0x7b,0xa0,0xf5,0x42,0xc3,0x6f,0x10,0xe9,0x76,0x44,0xe3,0xa9,0xb7,0xef,0xf6,0xc2,0x80,0xe2,0x0c,0x2d,0xad,0xe0,0xb9,0x45,0xca,0x67,0x6f,0xb6,0xc5,0xc0,0x8d,0x25,0xee,0x50,0xeb,0x51,0xc6,0x87,0x87,0x61,0x3a,0x75,0x95,0x41,0x47 +.byte 0x26,0xfd,0x35,0xf6,0x46,0xf4,0xe9,0x42,0xc6,0xef,0x37,0x97,0xb3,0x0a,0x1d,0xc8,0xdf,0x07,0x24,0xb1,0x0d,0x07,0x43,0x67,0x7d,0x81,0x09,0x58,0xdd,0xf6,0xcf,0xf1,0x47,0x42,0xbd,0x3c,0xa3,0xd7,0xe8,0x73,0xf9,0x5b,0xff,0x2c,0xcd,0xe6,0xd1,0xe9,0x47,0x6d,0x19,0x9b,0x6a,0x63,0x69,0xf4,0x4a,0xdf,0x69,0xab,0xa9,0xb7,0xe5,0x8d +.byte 0x1c,0x44,0x52,0x0c,0x7e,0xa1,0xfe,0x9d,0xd5,0xa4,0x71,0x62,0x0b,0x3c,0xf6,0xd2,0xd3,0xe9,0x70,0x09,0x68,0xf7,0xd6,0x0a,0x00,0x61,0xf1,0xf3,0xd0,0x41,0x4a,0x14,0xc6,0xf5,0x49,0xb1,0xde,0x10,0xd3,0x20,0x8b,0xfe,0x78,0x6a,0x87,0x79,0x15,0xd3,0x43,0x00,0xbe,0x71,0x40,0xaa,0xca,0x1a,0x64,0xe3,0x96,0x34,0x2f,0xea,0x0c,0x11 +.byte 0x41,0x21,0xf8,0xa7,0x65,0x9b,0x75,0xe2,0x1e,0x6f,0x5e,0xe0,0x68,0x42,0xca,0xd3,0x19,0x35,0xe8,0x88,0x0f,0x05,0xa3,0xb1,0x73,0xea,0x53,0x79,0x40,0x24,0x00,0x86,0x20,0xbb,0x25,0x58,0x89,0x6b,0xde,0xd6,0xd0,0x36,0xbb,0x33,0x30,0x59,0x4b,0x30,0x92,0xac,0xe5,0x95,0x94,0x22,0xab,0xc1,0x10,0x35,0x9c,0xa1,0x20,0x11,0x5d,0x4f +.byte 0x57,0x5c,0x9c,0xb8,0x3a,0xdc,0x97,0xa5,0xf3,0x0b,0xf5,0x96,0xe7,0xef,0x90,0x72,0x01,0x52,0x70,0x5a,0xf0,0xd9,0x7e,0x59,0x05,0x8c,0xd1,0x45,0x47,0xbf,0x16,0x15,0xa2,0xc9,0xdd,0xe7,0x5f,0x4b,0x94,0x5f,0xe6,0xf9,0x78,0xbb,0x8f,0xf9,0x79,0x9f,0x5e,0xd7,0x1f,0x0b,0xef,0x8d,0xfe,0x75,0xd4,0x8a,0x12,0x28,0xa5,0xf9,0x6e,0x14 +.byte 0x3c,0x52,0x80,0x57,0xc6,0x96,0xae,0x67,0x27,0xc1,0x1c,0xb6,0xd6,0x1c,0x74,0x8c,0x6f,0xc7,0x71,0x3e,0xd5,0x73,0xf2,0x3e,0x02,0x15,0x67,0x18,0xb8,0x5b,0x61,0x9e,0xfa,0x7e,0xba,0x00,0xe9,0xd9,0x51,0x91,0x63,0x7e,0xf7,0xab,0xc0,0xc6,0xee,0x66,0xdd,0x66,0x88,0x7a,0x8a,0xc5,0xc2,0x08,0x45,0x62,0xde,0xe1,0xfb,0x35,0x65,0x34 +.byte 0x00,0x9e,0x1d,0x25,0xdf,0x69,0xb6,0xe3,0xfe,0xbb,0x13,0xac,0xd3,0x13,0xb2,0x64,0x5a,0xf3,0x47,0xf1,0x36,0x55,0x5f,0x1b,0x87,0xea,0x5d,0x5c,0xfd,0x8a,0x68,0x69,0x8a,0x00,0x9f,0x83,0xbe,0x79,0x7d,0x01,0x9e,0xf2,0xb2,0x5d,0x56,0xe0,0xe6,0x49,0xe5,0xe1,0x76,0x57,0x7a,0x85,0xac,0x94,0x16,0xe3,0x68,0x05,0x14,0xb5,0x33,0x54 +.byte 0x64,0x5a,0xbe,0xa3,0x04,0x90,0x5c,0x1c,0xf8,0x97,0x16,0x36,0xce,0x76,0xe7,0xf0,0xaf,0x8a,0xea,0x65,0xa8,0x15,0x5b,0x1e,0x0a,0x91,0xad,0x62,0x62,0x67,0xb4,0xf0,0x94,0x1f,0x64,0x50,0xa8,0xc0,0x6b,0x38,0x80,0xd7,0x53,0xbb,0x70,0xbd,0x54,0x01,0xb0,0xa5,0xbc,0x00,0xe0,0xd6,0x23,0x37,0xe6,0x9f,0x0f,0x2f,0x96,0x21,0xc2,0x90 +.byte 0x55,0x26,0x55,0xa4,0xcd,0x3e,0x54,0x6b,0xa6,0xb0,0x2c,0xf2,0xd4,0xcc,0x6a,0x44,0xea,0x18,0x61,0xc5,0x1a,0x8e,0x60,0x64,0xf4,0x5f,0x21,0x36,0x01,0x5d,0x9f,0xc4,0x2c,0x67,0x1c,0x48,0x94,0x16,0xae,0xa8,0x13,0x5c,0xee,0x18,0x88,0x61,0xe4,0x54,0x6b,0xa2,0xe8,0x7f,0xf0,0x15,0xc3,0xce,0xbc,0x5b,0x91,0x25,0x7b,0x1d,0xd3,0x9f +.byte 0x13,0x1b,0x01,0x5d,0x43,0xe8,0xa1,0x77,0x5a,0x87,0x79,0x8b,0xd5,0x69,0xf7,0xdf,0x66,0xa2,0x84,0x0c,0x66,0xac,0x15,0x65,0xbf,0x74,0xc0,0xd2,0x78,0x6a,0x3a,0x9c,0x98,0x62,0x04,0x41,0x95,0xb2,0x23,0x59,0xc6,0xb0,0xc5,0x22,0xc0,0xfa,0xaa,0xc8,0x94,0x73,0x91,0x5b,0x64,0x1b,0x74,0xbe,0xcb,0xa1,0x81,0xb1,0xc1,0x26,0xa1,0x94 +.byte 0x55,0x04,0xb3,0x9c,0x80,0xb7,0x00,0x6f,0x36,0xc7,0x7f,0x6d,0x97,0xea,0xf3,0xf5,0x55,0xc5,0xfe,0x61,0xd9,0xb1,0x6d,0x8c,0xa1,0x02,0x08,0xb3,0x41,0xe6,0xe6,0x57,0xc6,0xff,0x6e,0x47,0xa4,0x22,0x2e,0x2d,0x21,0x53,0xbe,0xe3,0xbe,0x15,0xec,0x23,0x9d,0x87,0xe0,0x2e,0xcc,0x6c,0xd0,0xc7,0xb7,0x3d,0xa4,0x07,0x5f,0x69,0x4e,0x2b +.byte 0x07,0x69,0x4f,0xc5,0xa3,0x66,0x52,0x91,0x8f,0xa4,0x48,0xb9,0x40,0x76,0xd9,0xcb,0x6e,0x1a,0x35,0x9e,0x50,0x9f,0xd1,0x78,0xb2,0xb8,0x0d,0xa8,0xf8,0x6e,0x07,0xa5,0x3a,0xdf,0x3c,0x32,0xa6,0x10,0xbd,0x73,0x2f,0x07,0x45,0x66,0x0f,0x61,0xce,0xc2,0x08,0x19,0x98,0x33,0x4b,0x59,0x81,0xb5,0x78,0x4f,0x46,0x88,0xae,0x29,0xf8,0xf5 +.byte 0xc2,0x29,0x6f,0x8f,0xe5,0x8f,0xb0,0x53,0xc8,0x7a,0x48,0xda,0x6f,0x7e,0x8a,0x69,0x68,0xab,0xba,0xd9,0x20,0x0f,0x96,0x69,0x41,0xa6,0x92,0x94,0x8e,0x0f,0x86,0xdf,0x8d,0x70,0xaf,0xfe,0xf1,0x20,0x50,0x01,0xff,0xca,0x30,0x24,0x67,0x4a,0x04,0xa2,0xde,0x06,0xdc,0x26,0x1e,0x17,0xbc,0x52,0x9a,0x62,0x72,0xc1,0xd8,0xd7,0xe0,0xed +.byte 0xcf,0x4b,0x13,0x80,0x9a,0xbf,0x72,0x4f,0xf4,0x24,0x26,0xcd,0xe0,0x21,0x99,0x7b,0x5c,0x4f,0xbf,0x5c,0x41,0x08,0x8b,0x17,0x69,0x62,0x60,0x2c,0x74,0xb0,0x2d,0x22,0x7e,0x25,0x95,0x6a,0x84,0x0f,0x45,0x8f,0x9a,0x92,0xa1,0xcd,0xa5,0x50,0xf0,0x52,0x7f,0x60,0xd8,0x91,0xe1,0x17,0xe1,0x66,0x8f,0xd3,0x1f,0x41,0x7f,0x6f,0xf1,0x72 +.byte 0xa3,0xb6,0x12,0x62,0x46,0x16,0xea,0x26,0x9e,0xda,0x61,0x13,0x0b,0x17,0xf7,0xe1,0xec,0xc0,0x38,0xfe,0x40,0x31,0x6b,0x38,0x2a,0x4b,0xa5,0x8e,0xfb,0x99,0x60,0xd6,0x4a,0xbd,0xfb,0x75,0x2b,0x41,0xd4,0x33,0x5d,0x35,0xfe,0x2d,0xfc,0x1a,0xac,0x02,0xb3,0xf0,0xa2,0x6d,0xfa,0x8b,0x12,0x99,0xdd,0x54,0xf2,0x1c,0x35,0xd3,0x60,0x5a +.byte 0xdb,0x65,0xa7,0x58,0x1b,0x82,0xb4,0xf6,0x49,0x77,0xf2,0xea,0xa3,0xa9,0x57,0x94,0xb7,0x6e,0x19,0xda,0x7e,0xa5,0x70,0xb8,0xff,0x39,0x81,0x7d,0xfa,0xea,0xd6,0xc6,0x12,0x84,0x0a,0x8a,0x16,0xde,0x99,0xa6,0xe7,0xe0,0x77,0x76,0xb8,0xa3,0x6f,0xfb,0xb4,0x8f,0xc3,0xbd,0x90,0xd8,0x2a,0x04,0xed,0x42,0x91,0x9b,0x84,0x40,0x2d,0x01 +.byte 0x94,0xdb,0xbb,0x58,0x25,0xed,0xa3,0xdd,0xaa,0x0c,0xce,0x25,0x12,0xcd,0x11,0xbf,0xd0,0x57,0xe9,0x51,0x74,0xa7,0x45,0x6c,0x58,0xe7,0x4d,0x43,0xc6,0xd0,0x09,0x93,0x2d,0xe0,0xe3,0xae,0x7b,0x8f,0x53,0xa0,0x80,0xa1,0xef,0xcb,0xf5,0xfe,0x38,0x4d,0x31,0xa2,0x5c,0xd3,0x4a,0x66,0x1a,0x5c,0x07,0xbe,0x25,0xba,0x30,0xb6,0x00,0x27 +.byte 0x52,0xb9,0x1f,0xa3,0xed,0xd7,0x31,0x33,0x4a,0xf6,0x3f,0xed,0x75,0xe7,0xa4,0xf4,0xdf,0x97,0xc1,0x78,0x90,0x9b,0x4b,0xbd,0x06,0xc6,0x72,0x5c,0xdf,0x57,0x60,0xbe,0xbc,0x88,0x02,0xb6,0x5a,0x65,0xea,0x3a,0x3a,0x74,0x03,0xc8,0x66,0xef,0xf0,0x63,0xc7,0x9d,0x58,0x8e,0xa1,0xb2,0x25,0x4f,0xc4,0x14,0x5f,0x80,0x78,0x08,0x06,0x21 +.byte 0x50,0x34,0x01,0x2b,0x15,0xf4,0x7d,0x1f,0x1f,0x32,0x36,0x0a,0x52,0x1f,0x50,0xa2,0x50,0xbc,0x9a,0xdf,0x4e,0x84,0x49,0x2d,0x08,0xaa,0x46,0xc0,0x0e,0xcf,0x27,0x17,0x91,0x78,0x8c,0xb9,0x72,0xc5,0x8e,0x25,0x85,0x11,0xff,0x2f,0x4a,0x71,0x7c,0x14,0xfe,0x86,0xfe,0xb4,0x3a,0xd0,0x67,0xfd,0xaa,0x9b,0xee,0x89,0x66,0x03,0x59,0x4e +.byte 0x1c,0x96,0xaf,0x2b,0x8d,0x4d,0x6f,0xf6,0x72,0xc6,0x13,0xc7,0x14,0xce,0x19,0x0c,0x0b,0xa3,0x01,0x12,0x7c,0x8e,0x10,0xb8,0x63,0x41,0x57,0xb9,0xfe,0x6e,0x3e,0xda,0x20,0xfb,0x92,0x08,0x7d,0x66,0x31,0x9d,0x4f,0xdb,0x14,0xf4,0xb6,0xb8,0xea,0xee,0x54,0x0f,0xaf,0xc1,0x99,0xf0,0x8f,0x55,0x44,0x20,0x44,0xd0,0xa6,0x98,0xa3,0xa8 +.byte 0x8b,0x8e,0x26,0x03,0xec,0x2d,0x50,0x4f,0xb0,0x8d,0xd0,0xf2,0x96,0xcc,0x18,0xa9,0xb1,0x0f,0x79,0xe3,0x9f,0x08,0xb3,0x53,0x0b,0x9c,0x9f,0x22,0xdb,0x45,0x57,0xd6,0xaa,0x3b,0x6a,0xcb,0xdc,0xc9,0xda,0x57,0x75,0x65,0x0a,0xc1,0x17,0xb3,0x97,0xa9,0x07,0x40,0x20,0xfb,0x72,0x2d,0xc6,0x37,0x1e,0x44,0xb7,0x7e,0x0b,0x38,0xcc,0xfc +.byte 0xa0,0xed,0x48,0xa9,0x9b,0x87,0xbc,0x71,0x0f,0x8b,0xda,0x4f,0x09,0x27,0x1e,0x3d,0x9c,0x03,0x62,0x81,0xa8,0x7c,0x7b,0x8a,0x14,0xa7,0x22,0x69,0xa8,0xba,0x0e,0xcc,0x1f,0x2b,0xb3,0x0f,0x7d,0xce,0x3f,0xec,0xb5,0x9d,0xe0,0x3a,0x67,0x56,0x08,0x5d,0x03,0x8b,0x71,0x01,0x44,0x11,0x1b,0x7b,0xcf,0xcc,0x2e,0xfc,0xa5,0x52,0x9b,0xeb +.byte 0x1e,0x8a,0xa1,0x86,0x64,0xcf,0x32,0x03,0x6b,0x3e,0x29,0xe7,0x9a,0x16,0x7e,0xe2,0x21,0x2f,0x5f,0xe2,0x86,0x7f,0xf8,0x22,0x36,0x10,0x99,0xc8,0x27,0x43,0xa1,0xb9,0xf4,0xb4,0xb8,0xe1,0xa3,0x1d,0x80,0x9c,0x81,0x92,0xef,0x1f,0x28,0x54,0x51,0xf3,0x62,0x9c,0x7a,0x24,0xd4,0x5a,0xdc,0x38,0x4f,0xa5,0x57,0xdd,0x4d,0xa1,0x52,0xf3 +.byte 0xd3,0x9d,0xa1,0x93,0x5e,0xbe,0x9b,0xd1,0x2a,0x52,0xf1,0xbb,0xa5,0x3f,0x3a,0x94,0x7c,0x7d,0x41,0x61,0x36,0x14,0x25,0x5f,0xab,0xef,0x32,0xf3,0x0f,0x6c,0xc5,0xf5,0x5f,0xe5,0x88,0x51,0x17,0x60,0x8b,0xd5,0xa6,0xea,0x8b,0x21,0xec,0x1a,0xa7,0x69,0xa0,0x59,0xf9,0xeb,0x51,0x94,0x70,0x2b,0x96,0x2e,0x71,0xa9,0x8c,0x12,0x15,0xce +.byte 0x7d,0x59,0x6b,0xf2,0xca,0x2c,0xbd,0x85,0xfb,0x23,0xab,0xcb,0x89,0x89,0xda,0x28,0x49,0x7e,0xfc,0x90,0x2a,0x9a,0x3d,0x6d,0x24,0x57,0xba,0xd9,0x30,0xe0,0x10,0x04,0xb1,0x7f,0x8a,0xcf,0xc8,0x27,0x63,0xd6,0xbd,0xea,0xef,0x90,0x6f,0xc2,0xfc,0x78,0xfd,0xc4,0x5b,0x45,0x0c,0x41,0x8a,0x53,0x5b,0xbc,0x62,0x32,0x86,0x7f,0x19,0xb7 +.byte 0x8b,0x03,0x50,0xed,0xca,0x8e,0x8b,0xa0,0xe3,0xc2,0x0e,0x81,0xe5,0x8a,0xe8,0xf1,0x6a,0x0b,0x1a,0xa7,0xb6,0xed,0x74,0x23,0x34,0xad,0x5b,0xd8,0xf7,0x17,0x8d,0xa5,0x05,0xf3,0x00,0x4a,0xad,0x7e,0x91,0xc9,0x6b,0x13,0xff,0x76,0x78,0xf0,0xd1,0xf4,0x99,0x43,0x73,0xd9,0xba,0x59,0xbe,0xb5,0xa3,0xbd,0x5e,0xc5,0xd3,0x88,0x06,0x9c +.byte 0x86,0x32,0xb4,0xd5,0x30,0x77,0x78,0x8e,0xd5,0x6a,0x1d,0xeb,0xfd,0x6b,0xe6,0xf8,0x4b,0xe8,0xf3,0xba,0xbb,0x86,0x8e,0xe6,0x63,0x83,0x92,0x23,0x05,0x58,0x2e,0x61,0xdd,0x38,0xad,0x8d,0x19,0x7d,0xfa,0x7c,0x3e,0xc8,0x9f,0xae,0xea,0x6d,0x12,0xf0,0xa4,0x08,0xed,0x12,0x0c,0x97,0x87,0x58,0xd8,0xbc,0x3f,0xde,0x7c,0xee,0x0c,0xc0 +.byte 0xa2,0x2e,0xf0,0x25,0x6d,0xf3,0x30,0x23,0xa7,0xc2,0xc8,0x09,0x67,0x01,0xe1,0x25,0x26,0x46,0x38,0xf5,0x5e,0x55,0x8b,0xd6,0x43,0x6a,0xb8,0xe4,0xdf,0x0f,0x5d,0x6c,0xc3,0xb2,0x56,0x38,0xda,0xbc,0xbf,0x5e,0x85,0x8c,0xd5,0x2a,0x6a,0xe2,0xff,0x4f,0x36,0xf7,0x52,0x2c,0xe2,0xae,0x65,0x65,0xd1,0xfc,0xd3,0xc6,0xf7,0x26,0xa6,0xd0 +.byte 0x0b,0xc8,0xf0,0x68,0x5d,0x07,0x89,0x06,0xb3,0xfb,0x39,0x1d,0xd8,0xd8,0xd7,0x53,0xd0,0xc9,0x76,0x56,0xc0,0xd3,0xf5,0x66,0x80,0x5b,0xff,0x4a,0xdf,0xae,0x52,0x86,0x54,0x24,0x53,0xcf,0xcf,0xd2,0x89,0xde,0x71,0x62,0x9c,0x31,0xa5,0x3d,0x62,0x07,0xa1,0x33,0x49,0xbb,0x06,0x88,0xd8,0xa1,0xdd,0x0e,0x47,0x8d,0x72,0x00,0x2d,0x51 +.byte 0xa3,0x35,0x6e,0xb6,0x1f,0xbf,0xe5,0x42,0x68,0x6f,0x62,0xfa,0xf3,0x12,0xa9,0x1a,0xbd,0xe8,0xa4,0xf1,0x6d,0x07,0xe7,0x70,0x87,0x44,0xb7,0x3d,0xea,0xdc,0x3a,0x24,0xbd,0xa0,0x9b,0xb8,0xc5,0xa8,0xd9,0x06,0xde,0x02,0x68,0x7e,0xd5,0x2d,0x3b,0x5f,0x12,0x31,0x72,0x35,0x77,0xf6,0x10,0x6e,0x81,0x7d,0x3c,0xac,0x95,0x5b,0xbe,0x90 +.byte 0x74,0xf3,0x3e,0x9b,0x07,0x54,0x97,0xe3,0x1d,0xcf,0xe2,0xc5,0x80,0x6b,0x5f,0x0b,0x96,0x00,0x0f,0x0e,0x53,0x36,0x76,0x6e,0x99,0x0c,0x32,0xa2,0xc9,0xaa,0xa0,0xa1,0xb7,0xee,0x9d,0xd6,0x46,0xe7,0x2d,0x10,0x7a,0xf2,0x22,0x50,0x52,0xbf,0xec,0xcc,0xbc,0x0d,0x81,0x55,0x2d,0xac,0x2e,0xf7,0x99,0xbe,0x68,0x09,0xb0,0x11,0xc3,0xc8 +.byte 0xca,0x63,0xa7,0xc2,0x0f,0x37,0x2a,0x9e,0x85,0x79,0x6b,0x44,0xc1,0x4f,0xb9,0xd6,0x6c,0x56,0x0e,0x59,0x33,0xc3,0x00,0x53,0xe2,0xf4,0x30,0x90,0x4e,0x4b,0x09,0x4d,0x6f,0x9a,0x9e,0xb9,0x8d,0x0b,0xa1,0x80,0xfd,0xfb,0xde,0x74,0x49,0x53,0x04,0x3a,0x35,0xcb,0x45,0xe2,0x67,0x2c,0x4d,0x6e,0x39,0x7b,0xbd,0x68,0xaa,0x93,0x1e,0xee +.byte 0x1e,0x35,0xae,0x1e,0xf2,0xe7,0xb1,0x80,0x92,0x45,0x27,0x85,0xd0,0xc7,0x26,0x17,0x54,0x30,0xba,0x0c,0x8e,0x48,0xf3,0x08,0x51,0xa6,0x41,0x70,0xba,0x5b,0x90,0x69,0x7c,0x64,0x1d,0x61,0xb5,0x23,0x4a,0xef,0x97,0xe4,0x9a,0xd0,0xff,0x47,0x7a,0x93,0x1a,0x28,0xb3,0x8a,0x32,0x29,0xf8,0xe9,0x08,0xc3,0xf3,0x24,0xd7,0x2e,0x18,0x6d +.byte 0x99,0x40,0x77,0x43,0x9f,0x98,0xe4,0xe5,0x3a,0x34,0x9d,0x46,0x52,0x9f,0x84,0x79,0x8c,0x70,0xbc,0x88,0x30,0xaf,0x87,0x69,0x57,0x6e,0xde,0x2e,0xfe,0x0f,0x3b,0x8d,0xc8,0x95,0xcf,0x69,0x78,0xff,0xa1,0xb1,0x81,0x49,0x1e,0x45,0xc0,0x83,0x1b,0xa3,0x5a,0xee,0x3e,0x9a,0x15,0x7c,0xf0,0xa2,0xfd,0x04,0x22,0x55,0x2d,0x74,0x61,0x29 +.byte 0x0e,0x4f,0x31,0xdb,0x35,0x99,0x37,0xb7,0x7d,0x11,0xde,0x87,0x4f,0x84,0xeb,0x6c,0x14,0xcc,0xbb,0x71,0x47,0xab,0x5b,0x61,0x51,0xeb,0xa1,0xc1,0x5f,0xe4,0x5c,0x3c,0xab,0x04,0xf1,0x60,0x50,0xe1,0xd0,0x58,0xdf,0x42,0xed,0x73,0x5f,0x31,0xdf,0x8d,0xb8,0xb8,0xdc,0x4e,0x2f,0xe3,0x7f,0x89,0x9e,0x62,0xc9,0xef,0xfd,0x60,0xae,0x58 +.byte 0xa9,0xa5,0x8b,0xa8,0x3b,0xd8,0x5f,0xd4,0x09,0xff,0x61,0x8c,0x25,0xde,0x84,0x7f,0x35,0xc9,0x5c,0x2b,0xe8,0x46,0xe4,0x1c,0xbd,0x77,0x51,0x31,0x55,0x3d,0xb4,0x35,0xf3,0xdc,0xa5,0x55,0xd3,0xe3,0x24,0xf9,0x41,0xe2,0xf0,0xbd,0xf5,0xff,0x81,0x87,0x64,0xc9,0xe7,0x69,0x29,0x86,0xaf,0x98,0x33,0x33,0x62,0x9c,0x7b,0x16,0xbb,0xfe +.byte 0x0b,0xa7,0x92,0xa5,0x7b,0x81,0xbc,0x50,0x88,0xf6,0xe7,0xfc,0x73,0xd6,0x37,0x43,0x09,0xa5,0xc6,0xd6,0x4d,0x28,0xb5,0xaa,0x53,0x52,0x8c,0x2c,0x06,0x64,0x6c,0x21,0x6b,0xe7,0x67,0x4a,0xa5,0xcc,0xa1,0x32,0xf0,0xd9,0x78,0xb9,0xc3,0xdb,0x41,0xee,0x10,0x11,0x81,0x04,0x03,0x73,0x48,0xc6,0x3e,0x60,0x6d,0x82,0xef,0xe2,0xa8,0xe8 +.byte 0xd7,0xda,0xd9,0xb5,0x34,0x42,0xc8,0x1c,0xa7,0xa4,0x8e,0x88,0x2e,0xbc,0x96,0x0a,0xfc,0x40,0x36,0x80,0xdf,0x60,0xe9,0x03,0x02,0x0c,0x51,0xf7,0x7d,0x01,0xd2,0x21,0x38,0x44,0x4b,0x34,0x80,0xbf,0x5e,0xc1,0x86,0xf2,0x35,0xeb,0xa8,0x21,0x15,0x74,0x7c,0x99,0x55,0x64,0xf4,0x48,0xd6,0xd1,0x47,0x1f,0x4d,0xbf,0x0c,0x20,0x5d,0x86 +.byte 0xb9,0xab,0x4e,0xc8,0x86,0x08,0x71,0x1d,0x13,0xf6,0xd3,0x17,0xac,0x61,0x10,0x5d,0x2a,0xb4,0x48,0xa1,0xb9,0x79,0x5a,0x09,0x3a,0x65,0x4c,0xbd,0x97,0xbe,0x48,0xc6,0x66,0xd8,0xce,0x0c,0x19,0xb5,0x44,0x02,0xfa,0xb7,0xa8,0x3f,0x9b,0x86,0xec,0xd1,0xef,0x1d,0x7d,0xb3,0x82,0x5c,0x92,0x48,0x02,0x2c,0x56,0x0f,0xff,0xf7,0x19,0x74 +.byte 0xc2,0x38,0x24,0x8d,0xb2,0x87,0xb6,0xeb,0x49,0x50,0x6a,0x33,0x74,0x4e,0x2a,0xcb,0xf4,0x13,0x2c,0xfa,0x3b,0x0e,0x3d,0x98,0x3e,0x33,0xd9,0x55,0xfa,0xb9,0x74,0xb8,0x6f,0xc1,0xd8,0xfd,0x8f,0xff,0xb9,0x1a,0x17,0xf8,0xb6,0x21,0xc4,0x9d,0x47,0x5e,0x84,0xf6,0xe5,0xbf,0x93,0x98,0xac,0x8f,0x68,0x85,0xf8,0xe8,0x79,0x7f,0x6f,0x0d +.byte 0x62,0x2c,0xaa,0x1e,0xe4,0xab,0x73,0xf8,0x6f,0x02,0xda,0x6b,0x3c,0x14,0x2e,0xc9,0xdb,0xb0,0x4e,0x39,0xb5,0xcf,0x05,0xae,0x9c,0x63,0x2f,0x6a,0x25,0x61,0x9d,0x40,0xeb,0x7e,0xd8,0x97,0x97,0x33,0x67,0x5c,0x78,0x84,0x68,0xc2,0x7a,0x26,0x58,0xe3,0x6c,0x0a,0x2e,0x6a,0x82,0xd6,0x43,0xed,0x79,0xa5,0x8d,0x4e,0x7c,0xf7,0x80,0x01 +.byte 0xe7,0x02,0x5e,0x3a,0xf7,0x8a,0x4a,0x85,0xe9,0x98,0x1e,0x69,0x33,0xf3,0x54,0x96,0x79,0xc8,0x03,0x0a,0x9f,0x0c,0x5d,0x66,0x44,0x88,0x3c,0xd7,0x9e,0xd1,0xde,0x01,0xfd,0x5e,0xa5,0x6a,0x82,0x00,0x36,0xe6,0x12,0xe3,0x62,0x46,0x45,0x69,0xfb,0x4f,0x44,0x8e,0xe5,0x8d,0x21,0x57,0x6a,0x61,0x8e,0x56,0xcb,0x5b,0x2c,0x5f,0x65,0x41 +.byte 0x2c,0xad,0xf2,0x98,0x34,0xbb,0x06,0x0d,0x8a,0x3c,0x34,0x0d,0xa3,0xe2,0x6e,0x86,0xfa,0xa9,0xfb,0x6f,0xbb,0x32,0xd6,0x0d,0x76,0x6b,0x77,0xf3,0x83,0x41,0xc0,0x80,0x63,0x55,0x47,0xb8,0x13,0x6b,0x99,0x96,0x08,0x9b,0xc0,0x82,0xae,0x49,0x4a,0x51,0x63,0x74,0xf2,0xec,0xfa,0x0d,0xbc,0x3a,0xde,0xf5,0x4b,0x4f,0x08,0x41,0x23,0x88 +.byte 0x14,0x88,0x6a,0x3a,0xf0,0x5f,0x0c,0x45,0x7f,0x65,0x7a,0x67,0xd8,0x17,0xed,0x04,0x47,0x60,0x0e,0x74,0x8f,0xfd,0x48,0xda,0xcd,0xe9,0xfe,0xf5,0x6f,0x43,0xcd,0xa5,0x05,0xa2,0x2e,0x78,0x5b,0xff,0xb8,0x6f,0x2e,0xfd,0x3e,0x4b,0xef,0xcf,0xe0,0x06,0x57,0x28,0xf4,0x2e,0x3b,0xb5,0x9e,0x3c,0xbd,0x63,0xa6,0x78,0x8e,0xd5,0xb8,0x81 +.byte 0x4e,0xf0,0xbf,0x14,0x65,0xc8,0x00,0x9f,0x0e,0x25,0x6a,0x7a,0x63,0x58,0xe4,0xe7,0xa9,0x82,0x16,0xc9,0x86,0x20,0x94,0x71,0x5b,0x9f,0x9b,0xc3,0xc5,0x32,0xb0,0x6c,0x2b,0x8c,0x54,0x67,0x36,0x94,0xb1,0x47,0x33,0xfd,0x9f,0x7c,0x7f,0x7e,0x08,0x51,0x1f,0x7e,0xbf,0x09,0x57,0xf3,0xaa,0x77,0x94,0xf3,0x20,0x1b,0x95,0xf6,0x04,0xb2 +.byte 0x09,0x9d,0xe2,0xbb,0x4d,0xfe,0x6b,0x99,0x06,0x58,0x40,0x84,0x90,0xfa,0x0e,0x9b,0x58,0x6d,0x02,0xbe,0x53,0x73,0xd1,0xc9,0xc7,0x31,0x2a,0x4a,0x12,0x2c,0xb6,0x1c,0xfb,0x49,0xc6,0x1a,0x93,0x33,0x1f,0x29,0x8b,0x94,0xe9,0x20,0xa7,0xe6,0x20,0xe6,0xbf,0xcd,0x5c,0xb6,0x52,0x42,0xf0,0x9c,0x6c,0x21,0x61,0x10,0xe7,0x0e,0x9f,0x33 +.byte 0x5f,0xc8,0xd0,0x20,0xe0,0x3e,0xc5,0x7a,0x10,0xf1,0xe5,0x19,0x52,0xcd,0xe1,0xa8,0x62,0x43,0x20,0x79,0xc3,0xac,0x93,0x27,0x02,0x8e,0x21,0x06,0xb9,0x66,0xd9,0xc8,0x40,0xe0,0xd1,0xf0,0x64,0x81,0xa6,0xc4,0x87,0x85,0x2b,0x92,0x1c,0xd6,0x48,0x85,0xb1,0xbe,0x78,0xf3,0x89,0xa2,0xf0,0xe5,0x39,0xac,0xbf,0x59,0x5d,0xf8,0x4f,0x74 +.byte 0x44,0x85,0x98,0x03,0x81,0x4b,0x7e,0x6f,0x5c,0xa1,0x11,0xd2,0xfd,0x30,0x7f,0xcd,0xd0,0xe2,0xcc,0xd4,0x80,0x16,0x46,0xa6,0x64,0x8b,0x9e,0xfc,0x2a,0x1a,0x65,0x5c,0x90,0x82,0xf9,0x23,0x48,0x11,0xf6,0xf2,0x50,0x3f,0xed,0x44,0xf2,0x9a,0x5a,0xca,0x1c,0x9a,0xd2,0x71,0x1b,0xd6,0x4c,0x51,0xf6,0x89,0x6f,0x65,0xe4,0x97,0x41,0x47 +.byte 0x1b,0x86,0xbd,0x83,0xa0,0xfe,0xac,0x16,0xe8,0xab,0x28,0x96,0x2f,0xa2,0x12,0x5f,0x7c,0xb3,0x18,0x2b,0x05,0x51,0x49,0xba,0xb4,0x1f,0x1e,0xe6,0x8a,0x82,0xca,0x33,0x7d,0xe6,0x8c,0x95,0xba,0x08,0x60,0x47,0x6d,0x79,0xac,0x0f,0xba,0x46,0xff,0xed,0xe0,0x34,0x03,0xfe,0xa7,0x85,0xe5,0x61,0xe3,0xe4,0x6c,0x5c,0x1b,0x9d,0x8a,0x54 +.byte 0x17,0xaf,0x08,0x4c,0x44,0x7f,0xb7,0xb0,0x6a,0x3a,0xff,0xb7,0xf6,0x10,0xc4,0x8f,0x31,0xd6,0x1a,0x25,0x27,0x35,0xca,0x87,0xa9,0x61,0x0b,0x35,0x96,0x89,0x0f,0x1a,0xbd,0x1e,0xf6,0xee,0xaa,0x95,0x16,0xe4,0x38,0x7b,0xb2,0xbe,0xea,0xc9,0x5a,0xcd,0x3b,0xb8,0x9e,0xd7,0x20,0xcd,0x3f,0x90,0xaa,0x8b,0x2a,0x42,0xed,0xab,0xc1,0x53 +.byte 0x83,0xc7,0xb8,0x3f,0xa1,0xb9,0xf4,0xf4,0xb0,0xe0,0x1f,0xb0,0xeb,0xa9,0x81,0x9f,0x31,0x67,0x1e,0x6c,0x96,0x9f,0x09,0xea,0x04,0xfe,0x37,0x22,0x87,0x60,0xb9,0x91,0x8f,0xa9,0x11,0xa3,0x68,0x5e,0x29,0x21,0x41,0xa3,0x02,0x08,0x82,0xd0,0x2b,0x66,0x6d,0x3c,0x46,0xc7,0x23,0x09,0x86,0x7f,0x53,0x11,0x3e,0x83,0x52,0x0a,0x4a,0xe4 +.byte 0x93,0xc6,0xc1,0x96,0x17,0x94,0x51,0x17,0x69,0xea,0x72,0xb8,0x85,0xde,0x7e,0x13,0x4a,0x08,0x26,0xae,0x31,0x19,0x0f,0x6f,0x48,0xa1,0xf2,0x57,0xa2,0x01,0x8e,0x84,0xee,0x63,0x23,0xc0,0x97,0x84,0xa2,0xf5,0x3f,0xeb,0x30,0x9e,0xdd,0xd2,0x43,0x24,0xa2,0x57,0xb7,0x57,0x86,0x26,0xa3,0xe6,0x6e,0xf2,0xcd,0xfb,0x7b,0x34,0x74,0x53 +.byte 0x07,0x95,0x51,0xb7,0xfd,0xf3,0xd1,0x83,0xbd,0x25,0xd6,0x2c,0x69,0x73,0x02,0x8e,0x76,0x19,0xea,0xb0,0x83,0x60,0x8c,0x53,0x9d,0x77,0x86,0x1e,0x65,0xc7,0x57,0x31,0x29,0xd9,0xa9,0x3a,0xb2,0x0d,0xd8,0xf4,0xf9,0x48,0x49,0xfb,0x3c,0x40,0x3d,0x1b,0xc4,0x8b,0x94,0x0e,0x50,0x7f,0xd5,0x39,0x5e,0x57,0x86,0xd1,0xba,0x0c,0x38,0x10 +.byte 0x01,0x5f,0x44,0xf3,0xe5,0xb0,0xf8,0xae,0x17,0xdf,0xd2,0xb3,0x10,0xc5,0x3b,0xfd,0xd9,0x68,0x90,0x9c,0x6c,0x26,0xdf,0x12,0x50,0xfa,0xbf,0x8b,0xce,0x68,0x80,0x8c,0x04,0x60,0xbf,0x34,0x81,0xbd,0x29,0xa3,0xa2,0xe4,0xe0,0x2d,0x25,0xb2,0xff,0x9f,0xd1,0x20,0x07,0xd5,0x8c,0x19,0xfa,0x3f,0x47,0xec,0xc1,0x8d,0xc9,0x36,0xf8,0x51 +.byte 0x4c,0xaa,0x40,0xe3,0x6a,0x21,0xd5,0xe6,0xa6,0xcf,0x8c,0xd9,0x10,0x47,0x66,0xfd,0x32,0x48,0x36,0x8f,0x14,0xed,0x09,0x80,0x50,0x27,0xaa,0xd5,0x1f,0x69,0xb8,0xe4,0x96,0x27,0x56,0x78,0xd6,0xd5,0x2d,0xf0,0x4f,0x14,0x30,0x17,0x9e,0x5b,0x69,0x8c,0x7c,0x1c,0x97,0x38,0x65,0x77,0x75,0x49,0xac,0x4b,0x06,0xda,0x74,0x11,0x86,0xbc +.byte 0xad,0x01,0xf2,0x03,0x29,0x5d,0xa7,0x74,0xd3,0x44,0xae,0x1d,0xbf,0xf9,0xc5,0x5b,0x83,0x8c,0xd6,0x84,0x8a,0x8e,0xe9,0xa6,0x08,0xf4,0x88,0x13,0xcb,0x16,0x45,0x13,0x9c,0xc7,0x75,0xa9,0xa7,0x55,0x04,0x91,0xd6,0xe9,0xd4,0xe5,0x65,0xa0,0x3a,0x53,0xa0,0xfc,0x62,0xce,0x91,0x01,0xb4,0x06,0x8b,0x10,0x79,0x6f,0x2c,0xd6,0x0a,0xa2 +.byte 0x31,0x8f,0x75,0x32,0x0e,0xfa,0x0d,0xec,0xfd,0x71,0x7f,0x74,0x97,0x30,0xe9,0xee,0x9f,0x04,0x21,0xb5,0xc9,0xd1,0x52,0x2a,0x0f,0x18,0xbe,0x3e,0xbb,0x98,0xaf,0x59,0x9b,0x85,0x79,0x5e,0x52,0x93,0x1c,0x42,0x67,0x67,0x6b,0xd5,0x41,0xaf,0xba,0x09,0x3a,0xb4,0x0e,0x97,0x22,0xe6,0xbb,0xe1,0x27,0xa1,0xf9,0xf0,0xcd,0xa2,0x3d,0xdb +.byte 0x81,0x2f,0x65,0x90,0xb7,0xe5,0xe5,0xce,0x1d,0x3b,0xfe,0x34,0x57,0xcd,0x3a,0xbd,0x19,0x59,0x23,0x12,0xf1,0xb6,0xf2,0xf7,0xc1,0xf5,0x1d,0x0b,0x46,0x8f,0x16,0x6a,0x81,0xfe,0xc1,0x97,0x8d,0x69,0x55,0x60,0xdd,0xf0,0x61,0xe9,0x22,0x30,0x72,0x1a,0x24,0x30,0xd7,0xbc,0x1c,0xfa,0x02,0x55,0xfc,0xb9,0x4b,0x0a,0xe4,0x90,0x90,0x3a +.byte 0xe3,0xce,0xd4,0xa0,0x7d,0x21,0x5a,0xf7,0x79,0x6e,0x03,0x4f,0x4e,0x93,0xad,0xc4,0x8e,0x9d,0x9f,0x8a,0x39,0x59,0x20,0xc1,0x5d,0x6a,0x4d,0x8f,0x69,0x78,0xea,0xba,0xde,0xc0,0x87,0xb2,0xf2,0x20,0xd6,0x7a,0x9c,0xf9,0x09,0x03,0x2a,0x4d,0xb9,0x10,0xfc,0xe5,0x05,0x90,0xed,0x45,0x4f,0x5f,0x7c,0x5d,0xfa,0xe6,0x0d,0x07,0xae,0xcc +.byte 0x21,0xc8,0x1c,0x7a,0xfb,0x1d,0xb9,0xe3,0x69,0xa1,0xb7,0x5f,0xb5,0x6a,0xb9,0x58,0x9d,0xcd,0x99,0xf8,0x38,0xbb,0xa0,0xfe,0xf8,0x41,0x51,0x72,0xce,0x76,0x89,0x59,0xa2,0xab,0xef,0xea,0xab,0x79,0xbc,0xda,0x73,0xdb,0x18,0xda,0x60,0x1b,0xc4,0xb7,0x4f,0xb3,0x86,0x21,0x2a,0xc3,0xec,0x7f,0x0e,0x89,0x16,0x0e,0xd2,0xbd,0xea,0x0e +.byte 0xcf,0xc1,0x4b,0x2c,0x97,0x69,0xce,0xd3,0x94,0xad,0x81,0xe9,0x70,0xf4,0xf8,0xe5,0x77,0xe6,0x92,0xe0,0x23,0x38,0xd3,0xc1,0xdd,0x2e,0x58,0x77,0xc5,0xc3,0x29,0x34,0x66,0x48,0xf9,0x75,0x3c,0x8a,0x6a,0xb8,0xbf,0xf8,0xba,0xf0,0xb9,0xa1,0x81,0x0b,0xa1,0xaa,0x17,0x34,0x1a,0xbb,0xa3,0xa2,0xba,0x21,0x45,0xc0,0x1d,0x57,0x11,0x4d +.byte 0x9b,0xd4,0x64,0x84,0xd7,0x0b,0xd6,0xfb,0x72,0x2c,0xdb,0xc3,0xe6,0x24,0xa9,0xf3,0x30,0x9f,0x21,0x05,0x1e,0xcc,0x48,0x58,0xed,0xfd,0xb2,0x34,0xe3,0xf7,0x7e,0x56,0xee,0xdf,0xa4,0xbb,0xb1,0xcc,0x7f,0x81,0x40,0xe9,0xdf,0x3f,0x82,0xc4,0x0d,0x14,0x9b,0x3b,0x80,0x15,0x24,0x6e,0xa4,0xce,0xfa,0x28,0xa7,0x7f,0x89,0xfb,0xc6,0x83 +.byte 0xe8,0x2a,0x70,0xfb,0x9c,0x75,0xb8,0xfd,0xec,0xbc,0xbb,0xf5,0xef,0x0a,0xa5,0x77,0x0b,0x38,0xa0,0x63,0xa5,0x71,0x12,0xc9,0xaa,0xc3,0xf9,0x72,0x30,0x45,0x4e,0x19,0x44,0x2d,0x09,0xf4,0xf1,0xa8,0xe8,0xde,0x58,0x87,0x70,0xa8,0x91,0x86,0xef,0x5d,0x02,0x90,0x55,0x63,0x99,0xde,0xd7,0xb7,0x5f,0x07,0x01,0xdf,0xb1,0xe5,0x55,0xf5 +.byte 0x87,0x69,0xd2,0x7a,0x71,0xbc,0x0e,0x4b,0x8b,0x98,0xf7,0xf6,0x0a,0x01,0xbb,0x9f,0x1b,0x15,0xb6,0x76,0xe0,0xc0,0x4b,0x5d,0x08,0xba,0xba,0x73,0x3f,0x36,0x5a,0x29,0xd7,0x7c,0xc2,0x87,0x03,0x75,0xff,0x26,0x21,0xae,0xbe,0x66,0x70,0xa2,0x99,0x11,0x35,0x49,0x78,0x7b,0x3a,0xfe,0x94,0xf7,0x37,0xe0,0x69,0x56,0x39,0xf7,0x3f,0x71 +.byte 0x39,0x74,0x75,0x32,0x1f,0xfb,0x3a,0x87,0x07,0xab,0xf1,0xed,0xe3,0xe2,0xbf,0x3f,0xb1,0x73,0x11,0xc9,0x34,0x4b,0xb1,0x1e,0x62,0x4e,0xc1,0x8a,0xae,0xcc,0xc7,0xb3,0xa7,0x70,0x01,0x73,0xad,0xb3,0xc3,0x59,0x70,0x14,0x31,0x94,0x9f,0x6b,0x18,0x11,0x50,0x52,0xc9,0xf0,0xf8,0x12,0x9d,0x7c,0x90,0x64,0x9d,0xd9,0x41,0xa6,0x45,0xe3 +.byte 0xc9,0x25,0x73,0xe7,0x48,0x9d,0xdc,0xe0,0x2c,0x71,0xd3,0x68,0xc5,0xab,0xac,0xe3,0x16,0x95,0xe3,0xa5,0xae,0x2f,0x57,0x60,0x4b,0x11,0x90,0xaa,0xe7,0x48,0xca,0xc7,0xde,0x2e,0x56,0x10,0x8e,0xc3,0x0a,0x7d,0x66,0xf1,0xc3,0xf7,0x2d,0xdd,0xfa,0x5e,0xb2,0xcb,0x99,0x4d,0xaa,0x4e,0x91,0xc1,0x94,0x60,0x27,0x33,0x82,0xa6,0x2a,0xba +.byte 0x05,0x32,0x33,0x0a,0x30,0x47,0xb0,0xac,0x68,0x7d,0xef,0x25,0x09,0xcf,0x51,0xf4,0x06,0x28,0x14,0xb2,0xb4,0x1f,0xaf,0x37,0xdc,0x70,0x88,0x4d,0xb9,0xfc,0x2d,0x61,0x25,0x13,0x1f,0x32,0x48,0x6d,0xeb,0x46,0x05,0x66,0x44,0xa1,0xec,0xce,0xe9,0x51,0xa9,0xba,0xf8,0xde,0x95,0x1b,0x20,0xe1,0x21,0x75,0x4b,0x25,0x7f,0x3c,0x16,0xf7 +.byte 0xe2,0xbe,0xeb,0xca,0x2b,0x77,0x92,0x16,0x32,0xe2,0x74,0x21,0x52,0x3f,0x08,0xba,0x41,0xb0,0xd3,0xd2,0xf7,0xf3,0x29,0xb6,0x10,0xfa,0xa5,0x29,0x35,0x29,0x21,0x0d,0xec,0xba,0x5a,0xf3,0x63,0x0f,0x9d,0xbc,0x42,0x02,0x46,0xe9,0x07,0x4a,0x9a,0xe8,0xd3,0x78,0x92,0xa2,0xe5,0x03,0xec,0xd4,0xe2,0xc8,0x8f,0x92,0x4a,0xae,0xbc,0xd7 +.byte 0xdf,0x4b,0x07,0x22,0x47,0xbd,0xb4,0xb5,0xa0,0x7e,0xfb,0x21,0x40,0x62,0xb1,0x6c,0x07,0x00,0x64,0xf6,0xb2,0x75,0x5c,0x29,0x84,0xff,0x38,0x0c,0xc8,0x08,0x38,0x92,0xf9,0xad,0xd7,0xcc,0xc3,0x1c,0x03,0x80,0x49,0x39,0x1c,0xdb,0xae,0x60,0x87,0x8a,0x5c,0xe9,0x17,0xbd,0x2b,0x0f,0xa5,0xa1,0xf9,0x0d,0x4b,0x8c,0x4d,0x39,0xda,0x15 +.byte 0x8c,0xc4,0x69,0xaf,0x2b,0xb0,0xa1,0xfd,0xd9,0x65,0x3c,0x87,0x4b,0xf2,0x5a,0xd7,0xd8,0xb9,0xef,0x78,0x67,0x30,0x4c,0x6c,0x92,0xc5,0x1e,0x15,0xf8,0xd9,0x74,0x1b,0x54,0x0c,0x10,0x1b,0xb5,0x11,0x13,0xd6,0xb4,0xc0,0x53,0x03,0x2c,0x4b,0xee,0xac,0xf9,0x87,0x17,0x51,0x35,0xb8,0x1a,0xdc,0x16,0x61,0x5b,0xe9,0x5a,0x43,0x94,0x42 +.byte 0x8f,0x68,0xbd,0xb6,0x52,0x00,0x63,0xa3,0x52,0x6e,0x5d,0x8e,0xe9,0x4f,0xf5,0x69,0xd8,0x4f,0xf5,0x5c,0x89,0x7e,0x1c,0xb9,0xdc,0x7b,0x92,0x8a,0x2b,0xfc,0xb8,0xad,0xbb,0xff,0x61,0x2e,0xc0,0xdc,0xfb,0x2f,0x78,0x2a,0x50,0x32,0x9b,0x4c,0xfd,0x9e,0xab,0x80,0x5c,0x7d,0xc8,0x6b,0xb3,0x2d,0x0a,0xfe,0x43,0xa2,0x10,0x10,0x79,0xbc +.byte 0x8c,0xa0,0x86,0x09,0x8c,0x8b,0x28,0xf3,0x8a,0xc9,0xeb,0xcb,0xb5,0x0e,0x56,0x19,0xae,0xe0,0xa1,0x22,0x72,0xc5,0xad,0x01,0x12,0x69,0xb6,0x52,0xb8,0xdd,0x36,0x25,0x21,0xae,0x73,0x06,0xc1,0xe0,0x23,0x20,0xe1,0x8e,0xe4,0x99,0xcd,0x86,0xca,0xf5,0x93,0x0e,0x6b,0xb8,0xba,0x18,0x4a,0x36,0xed,0xd0,0x37,0xc8,0xc7,0x8a,0xb2,0x63 +.byte 0x2e,0xa4,0x22,0x76,0x6f,0xf7,0xdd,0x81,0xd6,0x6f,0xcd,0xb9,0x65,0xf0,0x95,0x77,0xae,0xca,0x54,0x62,0xce,0x5d,0x47,0x9e,0x10,0x89,0xb9,0xfa,0x72,0x0a,0xef,0x24,0x17,0x45,0xb0,0xb0,0xc7,0x51,0x85,0xa1,0xb1,0x6a,0xd2,0xea,0x48,0xe2,0x6a,0x03,0x2a,0xdf,0xa8,0x0e,0x62,0xa2,0x1e,0xe2,0xa7,0x20,0x57,0xbd,0x73,0xeb,0xef,0x86 +.byte 0xc9,0xd4,0xfa,0x96,0xfe,0xfa,0xb3,0xc6,0xbf,0x7a,0x16,0xa2,0x43,0x73,0x56,0x71,0x78,0x32,0x3b,0xc1,0xd8,0x26,0xbf,0xde,0x39,0x5d,0xbd,0x3b,0xff,0xd7,0x4f,0xa0,0x67,0xa6,0x09,0x9a,0x81,0xfd,0xec,0x34,0x73,0xcd,0x90,0x15,0x8b,0x3e,0x2d,0x6f,0x7d,0xcc,0xf5,0x20,0x15,0x07,0xa8,0x2f,0xa5,0x5b,0x2b,0x4f,0xb8,0x2f,0x14,0x6c +.byte 0x52,0x78,0xbd,0x92,0x98,0xda,0x69,0x19,0x58,0x4c,0x76,0xe4,0x20,0xb2,0x48,0xa4,0x9f,0x2f,0x4c,0x9b,0x45,0x7f,0x7d,0x1c,0x46,0xe9,0x1e,0x43,0x26,0x49,0x39,0xb6,0x42,0x3a,0x4c,0x59,0x95,0x6b,0x28,0xd5,0xbe,0xa7,0x2e,0xd0,0x0c,0x00,0xa0,0x67,0x06,0x4e,0xee,0xae,0x7f,0xc2,0xb5,0x12,0x46,0x3f,0xb4,0x35,0x16,0x2a,0xda,0xbf +.byte 0x41,0x34,0xbe,0x30,0x2a,0x0f,0x7b,0x60,0xa6,0x8b,0xcd,0xae,0x7a,0x8c,0xd6,0x97,0xab,0x06,0x1e,0x14,0x87,0x45,0xa3,0x3c,0x9c,0xc4,0xa0,0x1d,0xee,0xf0,0xca,0xb8,0xa6,0x8d,0x37,0x92,0xad,0xbc,0xe6,0x1f,0x65,0x75,0xd3,0xbc,0x72,0x66,0xe2,0xff,0xbc,0x19,0x93,0xae,0xee,0xd0,0x63,0x6d,0x97,0x6f,0x57,0xf3,0x77,0xcd,0xe3,0x57 +.byte 0x3f,0x00,0xc8,0xe1,0x63,0x83,0x15,0x84,0xc6,0x08,0xdb,0x03,0xc9,0x27,0x47,0x4c,0x17,0x12,0x40,0x6e,0xac,0x74,0x6f,0x3c,0x22,0x57,0x36,0x29,0xbb,0x6a,0xc7,0x5a,0xfe,0x60,0x1c,0x0f,0x32,0x95,0x1b,0xf2,0x3c,0xed,0x04,0x87,0x4c,0x48,0xc7,0x63,0x79,0x24,0xb3,0x12,0xbf,0x55,0x3b,0x32,0xbf,0x52,0x4e,0x1e,0xc1,0x1f,0xf2,0xfd +.byte 0xe6,0xb8,0x56,0x38,0x0e,0xd2,0x75,0x3d,0x41,0x99,0x0c,0x7a,0x12,0x3f,0xa7,0x3a,0x79,0xa0,0xd7,0x6f,0x47,0x97,0x7e,0x9e,0xf6,0xfe,0x29,0xc0,0x16,0x34,0x38,0x80,0x2f,0xde,0x65,0x79,0xc9,0xfd,0xa0,0x84,0xc3,0x39,0xbc,0x0b,0xbe,0x18,0xba,0x0d,0xe3,0x35,0x11,0xba,0x9f,0xde,0x5d,0x0c,0xae,0x8e,0x0c,0x0f,0x66,0x9c,0xe6,0xfc +.byte 0x3d,0xdb,0x46,0xf1,0x84,0x57,0x62,0xb0,0x00,0xd4,0x8c,0xaa,0x93,0xeb,0xf7,0xa7,0x8e,0x82,0xba,0x89,0x67,0xbb,0x38,0xb0,0xb6,0x13,0x0c,0x96,0x22,0x9c,0x6a,0x86,0xea,0x83,0xad,0x5f,0x7b,0x3a,0x28,0xd8,0x53,0x90,0x2d,0xab,0xc9,0xbe,0x99,0xfb,0x68,0x42,0x27,0xf6,0xe3,0x5a,0xaf,0xf3,0xd6,0xee,0xb6,0xa2,0xe0,0x32,0x3c,0x1d +.byte 0xd4,0x3c,0x2b,0x58,0xc2,0x4f,0x3d,0x20,0x39,0xdb,0x80,0x89,0x20,0x20,0x7b,0xe6,0x1d,0xd0,0xa2,0x1a,0xd4,0x88,0xc9,0xe0,0xb9,0xf6,0xb2,0xa1,0xcd,0xf2,0x67,0x60,0x44,0xd8,0xce,0x6a,0xe2,0x52,0xc3,0xf3,0x61,0xa3,0x14,0x58,0xd6,0xe5,0x43,0x4a,0x8d,0xcc,0x4f,0xf8,0x17,0xdd,0xd2,0x5d,0xd5,0x5a,0x86,0x8e,0xc4,0x74,0xdc,0x1b +.byte 0xad,0xca,0x63,0x75,0xf0,0x43,0x41,0x16,0x02,0x49,0x6a,0x3a,0xe3,0xb9,0xa9,0xdc,0xfb,0x99,0xbc,0x60,0x0d,0xdb,0xa0,0xcf,0x27,0xaa,0xd5,0xc5,0x42,0x0b,0x02,0x00,0x43,0xaf,0xb5,0x4f,0xe1,0x88,0xa1,0x9d,0xca,0xfb,0x9f,0x1f,0x08,0x9c,0x66,0x23,0xca,0x4b,0x88,0xb4,0x40,0xdc,0xd3,0xd3,0x1a,0x64,0xe3,0x9b,0x43,0xea,0x20,0x90 +.byte 0x30,0x2e,0xc4,0x75,0xc5,0x52,0xc5,0x7c,0x0e,0x35,0x56,0xf5,0x1f,0x50,0x2b,0xf6,0x28,0x93,0x6f,0xde,0x10,0xc6,0x49,0x2b,0x77,0xb1,0x6d,0xce,0xfd,0x37,0xd4,0x8d,0x11,0xed,0x88,0x1e,0xca,0x68,0x0c,0x4e,0x38,0x7f,0x0f,0xab,0x6f,0x8d,0x1c,0x7d,0xd4,0x7d,0xd8,0xa9,0x5c,0x24,0x5a,0x7d,0xf4,0x5b,0xb6,0xb7,0x28,0xc7,0x93,0xd6 +.byte 0xa9,0xe5,0xac,0x62,0x16,0x9c,0x4e,0x5c,0x24,0xa0,0x2a,0x76,0xce,0x7d,0x5c,0x4b,0xbe,0xbc,0x83,0x5c,0x9a,0xc8,0x06,0x7b,0x1e,0xac,0x98,0x67,0x17,0x32,0x94,0xda,0xd1,0x8b,0x58,0xad,0x8e,0x26,0x03,0x81,0x7c,0x48,0xd1,0x83,0x03,0xba,0x6c,0x51,0xe9,0x25,0x82,0xd2,0xb9,0x7f,0xd8,0x33,0x3f,0x77,0x29,0x45,0x41,0xa9,0x17,0x3d +.byte 0x62,0xc6,0xd2,0xfb,0xd1,0x24,0xc7,0xee,0x10,0xc0,0x64,0xc3,0x46,0xc6,0x2b,0xe8,0x9c,0xc8,0x99,0x23,0x77,0xa9,0xb5,0x12,0xc4,0x53,0xde,0xbc,0x20,0xb2,0xc4,0x12,0xdb,0xc2,0x0b,0x63,0x70,0x6a,0x41,0x31,0x65,0x48,0xa0,0xfc,0xbc,0xd6,0x3f,0x55,0x18,0x17,0x65,0x35,0x58,0xe3,0x33,0xac,0xaf,0xca,0xb2,0x51,0xc1,0xcc,0x60,0x38 +.byte 0x94,0x8f,0x13,0xb8,0xcc,0x8c,0xc4,0x12,0xea,0xd5,0x39,0xd3,0x46,0x55,0x17,0x27,0x7a,0x07,0x01,0x02,0x74,0xa6,0xe7,0xc8,0xa7,0xd0,0x76,0xc8,0x5e,0x57,0x50,0xc5,0x19,0xf1,0x95,0xa3,0x52,0x10,0xa3,0x1e,0xcd,0xb1,0x05,0x64,0xe5,0x69,0xd9,0x5e,0xfc,0x71,0xef,0xe1,0xf6,0xb3,0xa7,0xf7,0xf9,0x71,0xfd,0xbb,0x5b,0x2b,0x7a,0xd2 +.byte 0x72,0x7c,0xc7,0x73,0x89,0xf7,0xe2,0x0b,0xcd,0x05,0x4f,0x0c,0x10,0xed,0xcc,0xda,0xb6,0x81,0x19,0xe6,0x2b,0x06,0x66,0xef,0xc5,0xfd,0xd5,0xc6,0x66,0x20,0x86,0x2a,0x4f,0x05,0x49,0xf1,0x54,0x4a,0x6e,0x1d,0xcd,0xad,0x18,0xeb,0x6c,0x58,0xd6,0x75,0x3e,0x62,0x48,0xab,0xea,0x1f,0x7f,0x05,0x45,0x6e,0x75,0x2a,0x5e,0x97,0x5b,0xde +.byte 0x5a,0x99,0x42,0xc1,0x62,0xab,0xc7,0x01,0x4d,0xac,0xd6,0xdc,0xc9,0x71,0x24,0xd1,0x33,0xe2,0x4b,0x1f,0x09,0x04,0x1f,0x0d,0x42,0x45,0xcf,0x7c,0xa0,0xee,0x48,0xfd,0x8b,0x1f,0xaa,0x50,0x48,0x6d,0x8e,0x34,0x76,0x09,0x23,0x8a,0x40,0x0d,0x5d,0xc1,0x2a,0xba,0x5f,0x9c,0x86,0xfb,0x37,0xdf,0x24,0xff,0x27,0x88,0xbf,0xf6,0xa4,0xc3 +.byte 0xf0,0xd3,0x02,0xa8,0x7c,0x6d,0xc4,0xc5,0x14,0xc3,0x64,0x28,0xa8,0x05,0x33,0xc2,0xda,0x12,0xfc,0xbe,0x0d,0x8e,0xf4,0xf5,0x48,0x5a,0x8e,0x8a,0xd2,0x50,0x7c,0xc0,0xbc,0xde,0xdb,0x9a,0xf6,0xa0,0x92,0x8d,0x19,0xbc,0x5a,0xdc,0xbf,0xfb,0x13,0x8f,0x41,0x09,0xba,0xd9,0x0b,0x91,0x7a,0xdb,0x92,0x10,0xac,0xf2,0xb5,0x76,0xb5,0x7d +.byte 0x80,0x04,0xd6,0xec,0x98,0x09,0x5f,0x63,0x0d,0x58,0x00,0x8a,0x07,0x76,0xfa,0xe6,0x6e,0xdf,0xbf,0x73,0xe5,0xc9,0xe5,0x12,0x44,0x58,0xf9,0x2e,0xb1,0xe6,0x2c,0xf5,0x0d,0x94,0xa9,0x51,0x0d,0x01,0x03,0xab,0x79,0xf9,0xee,0x7e,0x10,0x4b,0xcb,0x20,0xbb,0x01,0x19,0xd6,0x12,0xd1,0xac,0x96,0xe9,0x0e,0xde,0xbf,0x7e,0x80,0xf6,0x58 +.byte 0xc9,0xec,0xaf,0xf7,0x2d,0x98,0xbc,0x2b,0xb1,0xf1,0x34,0x94,0x39,0x8e,0xbc,0x13,0x13,0x41,0x8f,0xf3,0x4e,0x4e,0x6b,0x2a,0xaa,0xea,0x70,0x5c,0xf8,0x42,0xf7,0xbc,0xfd,0xbd,0x6f,0x62,0x1b,0xcb,0xb9,0x39,0xdc,0x6a,0x47,0x81,0xaf,0xff,0x5b,0x7e,0x80,0xb9,0xbf,0xfa,0x15,0x7e,0xd1,0xc3,0xb2,0x80,0x99,0xbd,0xb9,0x30,0x8d,0xb5 +.byte 0x43,0x6b,0x7a,0x31,0xaf,0x45,0xf7,0xdd,0x21,0x8f,0x54,0xb1,0xf6,0x2d,0x7d,0x96,0x63,0x4a,0x93,0x98,0x37,0x7f,0x48,0x02,0x4b,0x0f,0x71,0xe4,0x70,0xce,0x66,0x6a,0x36,0xde,0x58,0x84,0x69,0xd6,0xbd,0x1a,0x9a,0x8b,0xc5,0xda,0x97,0xc5,0xe1,0x4e,0xec,0x9b,0x7a,0x65,0xe0,0xa5,0xdd,0x39,0x3c,0x9f,0xfd,0x45,0x17,0x4c,0x2f,0xb4 +.byte 0xb1,0xb1,0x42,0xe8,0x88,0x75,0x9f,0xb4,0xc1,0xdf,0x44,0xf9,0x4f,0x9a,0xf7,0x3d,0x35,0xc5,0x32,0xbe,0x43,0xd0,0x0d,0x71,0x4e,0x21,0xbf,0x31,0x99,0x73,0x5a,0x84,0x45,0x2e,0x00,0x8b,0x42,0x2b,0x14,0x86,0x51,0xcb,0xa0,0x98,0xa9,0x68,0x8d,0xdb,0x58,0x3d,0x73,0x9d,0xf9,0x2d,0x86,0x76,0x62,0xcb,0x93,0x29,0x48,0x92,0x38,0xfb +.byte 0xeb,0x1d,0xda,0xc3,0x10,0x1f,0x32,0x68,0xee,0xcb,0xb7,0x8a,0xcb,0xcb,0xe0,0x37,0x31,0xe8,0xad,0x7b,0x4a,0x29,0x2c,0x10,0x9e,0xdf,0x86,0xeb,0x13,0x0c,0xab,0xa4,0x30,0x36,0xf0,0xe0,0xac,0x14,0x41,0xa4,0xf4,0xf8,0x44,0x95,0xe8,0x8f,0x28,0xc2,0x35,0x0a,0x44,0x61,0xc7,0x60,0xc5,0x3b,0xc4,0x1d,0x67,0xfd,0xac,0x0b,0x2e,0x49 +.byte 0x62,0xea,0x17,0x3c,0xf5,0x4b,0xbe,0xba,0xba,0x42,0x02,0x0d,0x13,0xf1,0x15,0xff,0x2e,0x47,0x46,0xd1,0x27,0x64,0xb7,0x35,0x28,0x31,0xb5,0xde,0x1e,0xf9,0x26,0x6c,0x04,0x3c,0x0e,0x06,0x9d,0x4d,0xc7,0x1c,0x97,0x67,0x2c,0x6d,0x36,0x0d,0x4c,0x61,0x08,0xe9,0xbd,0x04,0x1d,0x8d,0xfb,0x0c,0x03,0x3d,0xb4,0x40,0xd5,0x1b,0x69,0x3b +.byte 0x68,0xcf,0x46,0x27,0xcf,0xb3,0xda,0x1e,0xdc,0x85,0x6f,0x4f,0x6b,0x09,0x9d,0xe9,0x6c,0x73,0x40,0x27,0xc9,0x8b,0x12,0x97,0xea,0x34,0xd7,0x51,0x32,0x90,0x4e,0xd7,0x91,0x41,0x3a,0xee,0xbc,0x97,0xb0,0x4a,0x39,0xdb,0xe3,0xe5,0x12,0x73,0xbf,0x5d,0x68,0xe0,0xc6,0x7c,0x6f,0x0d,0x14,0x1c,0xaa,0xde,0x29,0xb7,0xc7,0xa5,0x90,0x62 +.byte 0xe9,0xc5,0x75,0x16,0xe6,0xc0,0x9d,0xc5,0xb8,0xd6,0xfa,0xb0,0x72,0xb7,0x27,0xa6,0xa8,0x3f,0xbf,0x18,0x8b,0xaa,0x94,0xb3,0x47,0x50,0x2f,0x1c,0x49,0xab,0x46,0x38,0x7f,0x3e,0xf3,0xf1,0xb8,0xb3,0x44,0xaa,0x1f,0x76,0xb4,0x67,0xff,0xcf,0x7c,0x4b,0xa9,0xe1,0x62,0x93,0x4d,0x3e,0x96,0xdb,0x56,0xf6,0x26,0x5d,0x95,0x4c,0xfa,0x5f +.byte 0x06,0x2b,0x5c,0x33,0x2d,0xf8,0xfa,0x68,0x8a,0xed,0x28,0x2a,0x6e,0x95,0x86,0x59,0x71,0xef,0x86,0x47,0x60,0xec,0x35,0x79,0xa9,0x98,0x2d,0x6e,0x20,0x26,0x3a,0x21,0xec,0x59,0x15,0x65,0xcd,0xb9,0x91,0x19,0x6e,0x74,0x89,0x3b,0x10,0x00,0xab,0x8a,0x45,0x23,0x20,0x94,0x03,0x02,0x77,0xb7,0xcf,0x9c,0x71,0x18,0x0c,0x5b,0x40,0x62 +.byte 0x3b,0x8f,0xc9,0xf6,0x4c,0x8f,0x60,0x66,0x05,0x87,0x05,0x90,0xd4,0x08,0x76,0xd7,0xa3,0xb6,0x37,0xa8,0x83,0x05,0xb2,0x48,0xe9,0x24,0xc4,0xfb,0x79,0xa1,0xce,0xac,0x29,0x13,0x4e,0x72,0xdf,0xad,0x9e,0x5b,0xcd,0x9c,0x39,0x1d,0x3e,0x57,0x9d,0xf2,0x96,0x13,0xa4,0x79,0x4c,0x76,0x40,0x03,0xb3,0x18,0xcf,0xd7,0x45,0x2a,0x2d,0x07 +.byte 0xe5,0x2e,0xb7,0x74,0xda,0x94,0xea,0x32,0x74,0xb0,0xca,0xf4,0xd1,0x09,0x97,0x3c,0x69,0x17,0xf6,0x5b,0x13,0x7b,0xb8,0xb1,0xd9,0x0e,0x12,0x44,0x29,0xea,0x26,0xd8,0xaa,0x9d,0x26,0x87,0x0c,0x89,0x4e,0xec,0x29,0x48,0x43,0x66,0x21,0x0b,0xab,0xce,0x40,0x57,0x4c,0xa7,0xdd,0x56,0xde,0xac,0x5c,0x62,0xea,0xc4,0x54,0x4a,0xe0,0x8d +.byte 0x54,0xc8,0x65,0x44,0xcc,0x6f,0x2a,0xcd,0x0e,0xb3,0xad,0xa3,0x30,0xd1,0xb7,0x19,0x70,0x51,0xd3,0x9a,0xcf,0xe5,0x42,0x6c,0xa1,0xc1,0x0f,0xe2,0xda,0x86,0xb4,0x51,0x50,0x62,0xdc,0x51,0x3f,0xd2,0xff,0xde,0x7f,0x38,0x5a,0xff,0x2d,0x21,0x1d,0x59,0xb9,0xdd,0xde,0x83,0x13,0xb0,0x25,0xf5,0xbb,0x11,0x47,0x4a,0xaf,0x81,0x15,0xa0 +.byte 0x39,0x5b,0x30,0x17,0x2b,0xbf,0x5a,0x03,0x60,0xb6,0xbb,0x86,0x9f,0x50,0x45,0x15,0x0b,0xba,0x42,0xf4,0x3d,0x05,0x62,0xcd,0x9b,0x8c,0xcf,0x93,0x5c,0x33,0x6c,0xea,0x4b,0xd0,0x1d,0x91,0x3e,0xbf,0xa4,0x9d,0x7c,0x2c,0x87,0x9c,0x42,0x9f,0x03,0x98,0x03,0x1b,0x98,0x66,0x4f,0x8f,0x29,0x12,0xc5,0xb5,0xec,0x81,0xf8,0xb2,0x5e,0x44 +.byte 0x4f,0xb0,0x31,0xe4,0x2a,0x73,0x83,0xac,0x5a,0x3f,0xfa,0xcf,0x8b,0x7c,0xa3,0xf1,0x01,0x14,0xa1,0xca,0x60,0x8d,0x6a,0x6c,0x04,0x31,0xcc,0xba,0x12,0xe0,0x4e,0xaf,0x01,0x8d,0xf5,0x60,0x23,0x79,0x8a,0x80,0xcc,0x32,0x31,0x69,0x83,0xb6,0x83,0xaa,0xd9,0x3b,0x86,0x4a,0xd8,0x10,0x28,0x09,0x82,0x36,0xee,0x6a,0xc0,0x80,0x3f,0xfd +.byte 0xb1,0xd2,0xde,0x34,0xf9,0x4c,0x87,0x5b,0xdd,0xd0,0xb6,0x2d,0x99,0x69,0xd3,0x2c,0xb7,0x0b,0xfc,0x16,0x88,0x7b,0x80,0x21,0xbc,0x30,0x7b,0x56,0xe5,0x7b,0x41,0x43,0x4d,0xaf,0x40,0x5e,0x74,0x14,0x17,0x66,0x32,0xd6,0x81,0x53,0x94,0x35,0xf0,0x0f,0x4f,0x99,0x54,0x9a,0x38,0xc0,0x2a,0xa9,0xd3,0x53,0xdd,0x9a,0xc5,0x29,0x18,0x62 +.byte 0xf6,0x93,0xa3,0x02,0xf0,0x13,0xcb,0xcb,0xcc,0x64,0x0b,0x00,0xf4,0x43,0x03,0x26,0xe6,0x2f,0x39,0xa1,0x83,0xea,0x94,0x2f,0xde,0x61,0xbd,0xe1,0xbe,0x08,0xf8,0xd4,0x01,0x6e,0x61,0x98,0x01,0x39,0x4b,0x93,0x39,0x38,0x34,0x58,0x24,0xc1,0xf5,0x03,0x05,0x15,0x9c,0xf0,0x30,0x20,0x24,0xd4,0x7e,0x73,0xb2,0x60,0x06,0x3b,0xd3,0xb7 +.byte 0x2c,0x47,0x17,0xc4,0x79,0x4e,0x45,0x0b,0x89,0xf0,0xfc,0x42,0xa0,0x0d,0x80,0xd2,0x44,0x36,0x70,0xaa,0x9e,0x72,0x85,0xa8,0xc8,0x1d,0x35,0x28,0xc3,0x5a,0x72,0x4c,0x06,0x6d,0xf4,0xae,0x54,0x86,0x9a,0x32,0x3c,0xa5,0x06,0x63,0xc1,0x37,0xbb,0xaf,0xa6,0xae,0xce,0x94,0xea,0x9c,0x4a,0x9e,0x56,0xb1,0xc3,0x84,0x84,0xef,0x3d,0xe9 +.byte 0x24,0xf4,0xbf,0xc3,0xf6,0x45,0x74,0x4e,0xbb,0x86,0xd3,0x7f,0xab,0x19,0xe3,0x63,0x67,0x81,0xb6,0x18,0xc8,0x78,0x8e,0xf8,0x83,0x5f,0xfb,0x2e,0x49,0x97,0x2b,0x34,0xbb,0x76,0x2e,0x93,0xec,0xe9,0x7f,0x4d,0x7e,0x52,0x0c,0x92,0xbc,0x6d,0x3a,0x34,0x9b,0x5e,0x61,0x6f,0xea,0x45,0xe7,0x5c,0x34,0x6b,0xcb,0xc0,0x31,0x61,0x64,0x9d +.byte 0xad,0x7f,0x98,0xca,0xfe,0x3d,0xad,0xf7,0x21,0xf6,0x4c,0x2a,0x21,0x07,0x80,0x25,0xa2,0xea,0x26,0x85,0xc3,0xb1,0x74,0x04,0x7f,0xd1,0x1c,0x1b,0xa5,0x7e,0x96,0x45,0xfe,0x6f,0xa6,0x34,0xdf,0x94,0x1f,0x7e,0xfb,0xcf,0xfd,0x29,0xeb,0x3a,0xb0,0xfc,0xb6,0xd5,0x80,0x8b,0x37,0x71,0xfb,0x70,0x19,0x30,0xc4,0x6f,0xa0,0x5b,0xae,0x5b +.byte 0x75,0x51,0x98,0x89,0x9e,0xf0,0xf5,0x79,0xaf,0x1c,0x07,0xb6,0x5e,0xcf,0x34,0x70,0x0f,0x0b,0xbc,0x0a,0xa6,0x40,0xc7,0xf8,0xe4,0xef,0xe6,0xb7,0x94,0x6e,0x98,0x75,0x22,0x73,0x5c,0xca,0xcc,0xfb,0x09,0x2f,0x9c,0xfe,0x49,0x0f,0xd3,0x65,0xfe,0xd4,0xf0,0x9b,0xeb,0x8c,0xd7,0x8c,0xff,0x4b,0x18,0x3e,0xf3,0x9d,0x3f,0xf5,0x83,0xd6 +.byte 0x1d,0x3d,0x23,0x79,0x0f,0xae,0x17,0x62,0x33,0x07,0xc3,0xac,0x98,0x07,0x72,0x9b,0xd9,0x26,0x5c,0x1a,0x9d,0xf1,0x35,0x92,0xf9,0x38,0x17,0xf8,0xee,0x26,0xf9,0x64,0xfc,0x5e,0x8b,0x80,0xce,0xdb,0x64,0xf7,0xde,0x20,0x19,0x5c,0x26,0xf6,0x23,0xd6,0x99,0x8e,0x75,0x77,0x3d,0x17,0x0f,0xea,0x31,0x5a,0x65,0x32,0x1b,0x78,0x78,0xe4 +.byte 0xfe,0x76,0xf8,0xa7,0x81,0x34,0xf1,0x2a,0x13,0x22,0xe4,0x8a,0xe1,0x42,0x5a,0x3f,0x44,0x22,0xeb,0x7e,0xcd,0x20,0xcd,0xf7,0x44,0x1a,0x87,0xb9,0x7a,0x0e,0xf8,0xcb,0xb5,0x0a,0x1f,0x6a,0xe6,0x0b,0x70,0x59,0x38,0xa3,0x6b,0x64,0x7b,0x61,0xfe,0xbd,0xa4,0xb7,0x89,0x7a,0x28,0x70,0xfe,0x9d,0x64,0x2c,0xe9,0xc4,0xc9,0x2f,0xc8,0x3e +.byte 0xfa,0x70,0xce,0x21,0x9b,0xa8,0x10,0x6a,0x16,0xdd,0x28,0xce,0x4e,0xd4,0x6c,0x8c,0x47,0x83,0x13,0x8b,0xec,0x1c,0x76,0xdc,0x4d,0x81,0x25,0x08,0xd8,0xf9,0xde,0x66,0x1d,0xe2,0xf3,0xe7,0xdc,0x3e,0x3c,0x6b,0x98,0x25,0x55,0x88,0xe8,0xda,0x7f,0x16,0xe5,0x7d,0xad,0x8a,0x36,0x00,0xf0,0x68,0xc5,0xe4,0xfc,0xe9,0xe3,0x54,0xeb,0x4c +.byte 0xd1,0xff,0x07,0x1a,0x5c,0x5e,0xd4,0xb1,0xff,0x7d,0xfc,0x5b,0x34,0x42,0x95,0x89,0x01,0x24,0x8e,0x30,0xec,0xfe,0x67,0xf8,0xe2,0xaa,0xd5,0x6a,0x9f,0xe3,0xc3,0xa5,0x53,0x7f,0xd3,0xf4,0x98,0xa5,0x47,0x11,0xad,0xac,0xea,0xba,0x20,0x34,0x03,0x65,0x8c,0xec,0xb6,0xa3,0x2b,0xf6,0x93,0xe1,0xc8,0xad,0x34,0x30,0x8f,0x0e,0x3b,0xf6 +.byte 0x63,0xc6,0x58,0xc3,0xe8,0xa3,0x85,0xf8,0x24,0x8e,0x21,0xb9,0x36,0x7c,0xe0,0x11,0x64,0x31,0x6a,0x6a,0xa2,0xad,0xd3,0x94,0xbb,0x13,0x5b,0xb4,0xe9,0xee,0x09,0xdc,0xfe,0xb2,0xad,0xa8,0x43,0x02,0xba,0x85,0x1f,0x56,0xcb,0xb5,0x95,0x32,0xcc,0x7e,0xe0,0x00,0xde,0xfa,0x3f,0x91,0x71,0xde,0x21,0x19,0xff,0xc9,0x97,0x43,0x95,0xd8 +.byte 0x0d,0xc2,0x8a,0xde,0xcc,0x34,0x48,0xf4,0x35,0x41,0xb8,0x56,0x52,0xce,0x06,0xb3,0xcf,0xd4,0xae,0x7a,0xcb,0xe9,0xed,0x37,0xd6,0x76,0xa0,0x77,0x04,0xfb,0xb7,0x41,0x25,0x38,0xe1,0xd1,0xb5,0xde,0x21,0xe0,0x64,0xd8,0x83,0x13,0x7b,0x4b,0xb8,0xc9,0x12,0x02,0x51,0x56,0x52,0xe9,0x1c,0x49,0x48,0x83,0xd0,0x99,0x73,0x60,0x4a,0x4c +.byte 0x7d,0x8d,0x43,0xf9,0x06,0xa4,0xbb,0x0e,0xb6,0xdd,0x5f,0xc7,0x5e,0x35,0xcb,0xa0,0xc1,0x66,0x4a,0xe3,0x4a,0xa9,0xec,0xa4,0x5a,0xd7,0xd6,0xea,0xa5,0x20,0xa6,0xc3,0x1b,0xc0,0xa8,0xd1,0xf1,0x08,0x05,0xab,0x40,0x14,0x35,0xf2,0xdd,0x0f,0xc5,0xda,0xb3,0xa6,0xb1,0x07,0x36,0x17,0x5d,0xe9,0x96,0x23,0x96,0x46,0xd4,0xa7,0x71,0x64 +.byte 0x13,0x72,0x4e,0x83,0xe0,0x65,0x40,0x41,0xaf,0xb6,0x5b,0x00,0xa2,0xab,0x09,0x7f,0xa5,0xd5,0xc2,0xd9,0xc0,0x68,0x2a,0x44,0xdc,0x43,0x37,0x81,0xb8,0x88,0x4c,0x85,0x1b,0xb1,0x83,0xb2,0x56,0xa3,0x91,0x0f,0xa6,0x70,0x3f,0xbd,0xe9,0xda,0x40,0x9b,0xf5,0x9e,0x53,0xed,0x5f,0x84,0x70,0xd2,0x4c,0x1c,0xb6,0x87,0xd6,0xbb,0x3b,0xec +.byte 0xe5,0x35,0x1b,0x2c,0x9b,0xf1,0xe5,0xf8,0x0e,0x07,0x98,0xcc,0x58,0x38,0x57,0x74,0xdb,0x0e,0x08,0xd9,0x56,0xe8,0x08,0x63,0x3d,0x94,0x4a,0xdc,0x59,0xfc,0x3d,0xc1,0xa4,0x36,0xc3,0xe8,0xbe,0x4b,0xd7,0x47,0x69,0x33,0xb8,0x72,0x30,0x59,0x28,0x4e,0xf1,0xc1,0x25,0xa3,0xa4,0xe3,0x12,0xcf,0x31,0xf6,0xf8,0xae,0x31,0x06,0x76,0x92 +.byte 0x64,0x87,0x8e,0xb0,0x9f,0x1d,0xf4,0x56,0x73,0xc5,0x5d,0xbb,0x80,0x0d,0x19,0x3f,0x56,0x8c,0xe4,0xd6,0x8a,0x9a,0x62,0x26,0x4e,0x8a,0x21,0x7d,0x72,0x34,0x87,0xb6,0x7e,0x49,0xdc,0xfd,0x27,0x95,0xba,0x25,0xdd,0xf4,0x58,0x2b,0x11,0x3f,0xd1,0xd7,0x13,0x1d,0xb0,0xec,0xe2,0x55,0x5e,0x72,0xea,0x36,0xc9,0xd8,0x61,0xc0,0xee,0xc4 +.byte 0x9f,0x35,0x7e,0x73,0xd3,0xf6,0xd7,0x6a,0xce,0xd6,0xd2,0x80,0xe6,0x10,0x4b,0x65,0x18,0x6f,0xab,0xd3,0x41,0xbb,0x39,0x36,0x95,0x84,0x3c,0x99,0x9a,0xfd,0xf0,0xa3,0x46,0xdf,0x48,0x7c,0xd5,0x57,0x9d,0x10,0x59,0xca,0x70,0xc4,0xb5,0xbe,0x47,0x9e,0xca,0x2b,0x49,0x54,0xbb,0x34,0x8e,0x39,0xf4,0xf8,0x8c,0xa5,0xa1,0xab,0xf6,0x51 +.byte 0xd8,0x22,0x9a,0xd5,0xc2,0x12,0xf8,0x26,0xc6,0x19,0x2a,0xa6,0x6e,0xab,0xd3,0xac,0xd1,0x21,0x97,0x67,0x3e,0x39,0x90,0x5c,0x37,0x65,0x7b,0x06,0x54,0x1a,0xb8,0x2a,0x56,0x02,0xa3,0x92,0xee,0xf3,0x38,0x53,0x25,0x4d,0x5d,0x0a,0x37,0x9e,0xbb,0xf4,0xb2,0x13,0x77,0xbb,0x93,0xa9,0x85,0xf2,0x15,0xfd,0x71,0x17,0x00,0x89,0xe7,0x7b +.byte 0xa9,0xdc,0x10,0xd9,0xc7,0x44,0xa5,0x7b,0x3f,0x2f,0x1e,0x6d,0xa7,0xfe,0x0c,0x0e,0x83,0x3e,0x38,0x27,0xa7,0x4e,0x85,0x3c,0x84,0xfe,0x95,0x48,0x85,0x09,0x75,0x62,0x1d,0xa4,0x64,0x54,0xed,0x89,0xd5,0x28,0x62,0x52,0x18,0xef,0xf0,0x57,0x05,0x30,0xf0,0xce,0x87,0x05,0x0d,0x81,0xe8,0x2a,0x3c,0x8c,0x22,0xe1,0x4b,0x32,0x42,0x9d +.byte 0x02,0xc5,0xe4,0x6a,0xa4,0x4d,0x9b,0xc4,0x82,0x47,0xdc,0x61,0xbd,0x82,0x01,0xcd,0x5e,0x64,0x9f,0x4c,0xe3,0x31,0xe9,0x48,0x53,0x85,0x07,0xc7,0x47,0x49,0x35,0xd8,0x6a,0xab,0x4f,0x73,0x3f,0xd3,0xde,0x87,0x29,0xac,0xbc,0x35,0x0a,0xb4,0x74,0xc2,0xa7,0x0b,0xb1,0x93,0x92,0x29,0x3b,0x3e,0xa8,0xde,0x12,0x49,0x75,0xda,0x16,0x27 +.byte 0x52,0x2f,0x93,0x23,0xd6,0xf7,0x10,0xfe,0x1e,0x93,0x97,0x06,0x9d,0xef,0x4f,0xe4,0x3d,0x5d,0xde,0x30,0x70,0x3d,0x78,0x3a,0x30,0x00,0x9b,0x77,0x12,0x90,0x62,0xda,0x32,0x9b,0x6a,0x47,0xd7,0x0f,0xee,0x75,0x18,0xdd,0x4d,0x8a,0xe2,0x35,0x5b,0x60,0xb8,0xf9,0xa4,0x6c,0x93,0x3e,0x47,0x23,0xed,0x7a,0xe2,0x58,0x42,0xd6,0x3f,0x90 +.byte 0xc0,0x12,0x38,0x8b,0x70,0xe0,0xf8,0x1a,0xb5,0x8d,0xe1,0x39,0xdf,0x93,0x25,0x72,0x2e,0xa9,0x3f,0x58,0x12,0x40,0xc4,0x92,0x46,0x08,0xf0,0x64,0xdd,0x34,0x42,0xfe,0x74,0x35,0x0c,0xda,0xef,0x06,0x0b,0x33,0x59,0xd9,0xee,0x4c,0xf9,0x02,0x3a,0x93,0x40,0xa3,0x99,0x0e,0x64,0x11,0x2f,0x52,0x9d,0x28,0x4d,0xe8,0x45,0xd0,0x22,0xd7 +.byte 0x8f,0xd6,0x28,0x8c,0x0e,0x18,0x87,0x24,0xf9,0x88,0xd2,0xc0,0xe8,0xd4,0x9d,0xa2,0x5a,0x79,0x83,0x37,0x18,0x84,0x12,0xca,0xc7,0x10,0xd5,0x5a,0xa8,0xe5,0xa8,0xe7,0x79,0xb6,0x2c,0xb3,0x90,0x6c,0xc5,0xa4,0x99,0x1b,0x85,0x29,0x78,0x0b,0x09,0x77,0x05,0xf4,0x23,0x79,0x5c,0x91,0xf3,0xe0,0xe4,0x6f,0x82,0x33,0x4e,0xa2,0x2e,0xa2 +.byte 0x65,0x79,0xad,0x98,0x36,0x34,0x72,0x97,0xd7,0x39,0x89,0x5e,0x82,0x9f,0x4c,0xe2,0xea,0x51,0x85,0x62,0x0c,0x39,0xf6,0xdc,0xc6,0x80,0x48,0xcf,0x98,0x93,0x64,0x7d,0xf9,0x63,0xf4,0xf5,0x18,0x2a,0xb6,0x04,0xb7,0x44,0xc4,0x60,0xc0,0xcf,0x3d,0x88,0xa8,0xb6,0x81,0xa3,0x99,0x2a,0xf0,0x1a,0x8d,0x76,0x20,0x1d,0xcc,0x10,0x50,0x58 +.byte 0x09,0xf9,0xda,0x65,0x60,0xc3,0xb1,0xc1,0xc0,0x4d,0x62,0x52,0x22,0x45,0x32,0xbc,0x11,0x93,0x15,0xb6,0x25,0x8f,0x65,0xa0,0x4c,0x88,0xc9,0x83,0xe1,0x5c,0xbb,0xfb,0x1a,0xab,0xdb,0x35,0x40,0x66,0xc0,0x2f,0xdc,0xf5,0x92,0x08,0x4c,0xc7,0xb8,0x49,0x05,0xe0,0xe1,0x61,0x2b,0xde,0xc7,0x6a,0x04,0x05,0x4d,0x9f,0xe9,0x59,0x22,0x56 +.byte 0x63,0x77,0x9d,0xe3,0x1e,0x36,0xdf,0x87,0x4a,0xeb,0xba,0x42,0x3d,0x1b,0xa5,0xd0,0xc5,0x44,0x07,0xbe,0x37,0x37,0x70,0x10,0x2d,0x02,0x9b,0xf6,0x52,0xf3,0x54,0x6d,0x50,0xdb,0xdb,0x57,0x01,0x0b,0x9b,0xd5,0x99,0x99,0x69,0x9b,0x10,0x76,0x48,0xea,0x28,0x27,0x06,0x30,0x63,0x3b,0xdf,0x06,0x30,0x37,0x28,0x75,0xcf,0x9c,0xe7,0x52 +.byte 0x43,0xe2,0xd5,0x7b,0xfa,0x88,0x98,0x9c,0x3e,0x27,0x30,0x21,0xcc,0x11,0x71,0x14,0x24,0x04,0x1a,0x8c,0xe9,0xfe,0x2f,0x9d,0xec,0xb1,0x10,0x33,0x05,0x31,0x01,0x1b,0xde,0x6b,0x30,0x20,0x6d,0xf4,0x7c,0xbf,0x41,0x04,0x5f,0xb9,0x9c,0x24,0x63,0x74,0x98,0x3e,0x60,0xc7,0xf1,0xb1,0xc6,0x94,0xf3,0x6f,0x95,0x24,0xdf,0x97,0xd5,0xc7 +.byte 0x50,0x19,0xaf,0xa5,0xae,0x51,0xde,0x6d,0x44,0x0c,0x90,0x72,0x11,0x82,0x04,0xf9,0xda,0x17,0xd8,0xf3,0x03,0xf2,0x03,0x3f,0x65,0x7f,0xd7,0x66,0x84,0x9a,0x02,0x90,0x2b,0x65,0x00,0xd9,0x9c,0xfb,0xaa,0xe2,0xde,0x5f,0x1e,0x19,0x1e,0x6d,0x20,0x1e,0x01,0xf1,0xca,0x7b,0x90,0x06,0x96,0x1d,0x7a,0x34,0x0c,0x66,0x57,0xd7,0x61,0x1f +.byte 0x74,0x03,0xcb,0xae,0xea,0xaf,0x65,0x8e,0x32,0xbe,0xb8,0xe6,0xd8,0x6d,0xf7,0x51,0x6d,0xec,0x7e,0xc6,0x9d,0x20,0x01,0xbf,0xd7,0xbc,0xcb,0x34,0x7c,0xe5,0x1f,0x92,0x72,0x2f,0x6f,0xa3,0x1f,0xe8,0x4d,0x7e,0xa5,0x85,0x3b,0xed,0xc7,0x25,0x53,0xe3,0x77,0x90,0x1f,0xda,0xb7,0x48,0x7d,0xbe,0x20,0x48,0x9f,0xb4,0x05,0x5d,0x41,0xc5 +.byte 0x48,0xd0,0xc9,0x83,0xbe,0xf8,0xd8,0x6b,0x0d,0x26,0x66,0x2e,0xef,0x6b,0x13,0x58,0x6b,0x5f,0x0e,0x8b,0x4e,0x57,0xb2,0x6b,0x3d,0x4d,0xcd,0xcb,0x9a,0x9b,0xda,0x4d,0x7f,0xea,0x17,0x06,0x7f,0xcd,0xaf,0x18,0xda,0x3d,0xf0,0x30,0x2e,0xbb,0xc2,0x1d,0xcf,0xde,0xf7,0xee,0xda,0xd6,0x3d,0x75,0xcf,0x19,0xcf,0xfc,0xdf,0x7a,0xb6,0x1f +.byte 0x89,0xf5,0x0c,0xe9,0xd5,0xf1,0xd0,0x40,0xbd,0xae,0xb5,0x16,0xf6,0x05,0x1e,0xba,0xcd,0x18,0x80,0x4a,0xb3,0x87,0x93,0x6b,0x19,0xfc,0x47,0xa8,0x45,0x4b,0x75,0xe8,0x06,0xc0,0xbd,0x86,0xf7,0xcf,0x2c,0x39,0xc6,0x0b,0x3f,0x32,0xcd,0x1c,0x02,0xec,0x4b,0xd5,0x90,0x84,0xaf,0xc9,0x5c,0x9e,0x64,0x82,0x13,0x81,0x05,0x03,0xe4,0xed +.byte 0x48,0x23,0xc3,0x53,0x2c,0x5a,0x22,0x0a,0x27,0x7e,0x55,0x79,0xdc,0x46,0xf5,0x4b,0x04,0xcc,0x43,0x87,0x6c,0xb5,0xa4,0x2d,0x78,0x70,0x02,0x43,0x0e,0x76,0x62,0x99,0x86,0x40,0x2a,0xe4,0x62,0xe6,0xee,0x4e,0x03,0x64,0x83,0x9c,0x38,0x6d,0x62,0xa6,0x85,0xb8,0xce,0xd7,0xf8,0xcb,0x78,0x00,0x7a,0x48,0x72,0x75,0x4e,0x9c,0x6f,0x0c +.byte 0x61,0xc7,0x93,0x4e,0x6d,0x65,0xa3,0x1b,0x17,0x84,0xc6,0xd2,0x29,0xc3,0x4d,0xe3,0x14,0x21,0x5f,0x9e,0xa9,0x28,0x11,0xf3,0xb2,0xe8,0xe7,0x60,0x9e,0x24,0xab,0x88,0x9c,0x9c,0x5e,0x17,0xe4,0xe1,0xa7,0x74,0xb4,0x82,0xd5,0xaa,0x92,0x08,0xa7,0xa2,0x04,0x6f,0x77,0x14,0x54,0x44,0x5d,0x13,0x10,0xa2,0x40,0x1d,0xf0,0x44,0x16,0x17 +.byte 0xda,0x8c,0x80,0x83,0x2b,0x19,0xb8,0xab,0xf2,0xb8,0xb1,0x92,0xb5,0xc5,0x05,0x3e,0xd2,0x1a,0xfc,0xfd,0x21,0xa6,0xb2,0xbd,0x89,0xee,0x9c,0x3c,0x90,0xd9,0xf1,0xd2,0xe8,0xc3,0x21,0xb9,0x0e,0x0c,0x98,0xbc,0x5e,0xa1,0x0d,0x89,0xfe,0x0f,0x3c,0x45,0xea,0xe1,0x6e,0x06,0x59,0xff,0x79,0xf4,0x7e,0xf4,0x82,0xc0,0x6b,0xd9,0x53,0x30 +.byte 0x98,0xed,0x8d,0x6f,0x3d,0x0e,0xfb,0x42,0x66,0xab,0x41,0xa8,0x4a,0xef,0x73,0xa4,0x54,0x99,0x4f,0xb6,0x65,0x44,0xf9,0xd9,0x3c,0x6b,0x59,0x36,0xb0,0xe3,0x7c,0x4a,0x85,0x80,0x6c,0x77,0x6f,0x34,0x4e,0x9e,0x54,0xfd,0x0c,0x25,0x72,0xc3,0x5a,0xb6,0x3b,0xad,0x2b,0xd5,0x29,0x55,0x31,0xab,0x62,0xe4,0x15,0xed,0xef,0x16,0xef,0x43 +.byte 0xd5,0xdd,0x3d,0x64,0x8c,0x13,0xbc,0xcd,0x4d,0xfb,0x4f,0x86,0x3b,0x73,0x1e,0xc4,0xe8,0x54,0xb4,0xcc,0x49,0xba,0x4f,0x81,0xcd,0xe8,0x30,0x92,0x4b,0x57,0xd1,0x7c,0x0c,0x65,0x7d,0xe1,0x59,0xc6,0x8c,0x7d,0xad,0xd5,0xcf,0x6c,0xc4,0x9d,0xc5,0x3f,0x23,0x1f,0xb0,0x6d,0x1c,0x07,0xbf,0x38,0xc9,0x16,0xdc,0x5b,0x51,0xa1,0xdb,0x8f +.byte 0xf8,0x25,0xc6,0x4d,0xc0,0x4d,0xa1,0x02,0xd9,0xd3,0xb5,0x63,0xda,0xe1,0x91,0x60,0x71,0x39,0x46,0x1a,0x13,0xe0,0xf2,0xca,0xcc,0xd3,0xbb,0x6b,0xd0,0x64,0xaa,0x0e,0xc0,0x89,0xa3,0xc6,0x14,0x56,0xe4,0x44,0x97,0xa9,0xcc,0x17,0x68,0xe6,0xfc,0xe5,0xfd,0xf0,0xa6,0x69,0xcd,0xac,0x20,0xc7,0xeb,0x53,0x1b,0x4f,0xdd,0xd3,0xb0,0xed +.byte 0x30,0x4e,0x36,0x73,0x63,0xef,0x51,0x3e,0x9a,0x3e,0x41,0x2b,0x9c,0xda,0x67,0x96,0x46,0x33,0xe3,0x3f,0x87,0x01,0xd8,0xc5,0x26,0x80,0xe4,0x7e,0xf4,0x78,0x8c,0x2b,0x81,0x2a,0x01,0x7c,0xe3,0xfc,0x8d,0x6b,0xdc,0x84,0xb9,0xff,0x43,0x37,0x57,0xce,0x3f,0x5e,0x63,0xd3,0xbe,0xb6,0x4a,0x31,0xbf,0xb8,0x74,0x64,0x9c,0xf3,0xc5,0x8a +.byte 0xae,0xe8,0x5f,0x68,0xcf,0xce,0xff,0x3f,0xc5,0xb5,0xfd,0x13,0x08,0x11,0x9d,0x1a,0x0f,0x06,0x08,0x4d,0x7c,0xf9,0xd4,0x20,0xdf,0x82,0xf9,0x86,0xfc,0xf3,0x67,0xa0,0x14,0x99,0xe5,0x47,0xf0,0x02,0x7b,0x16,0xca,0xcf,0xb9,0x0f,0x68,0x08,0x5d,0x1d,0x65,0xee,0x23,0x56,0xeb,0x11,0x5b,0xca,0xf1,0xa7,0xad,0x50,0xb2,0xd1,0x37,0x65 +.byte 0xe9,0x7e,0xf6,0xe9,0x64,0x42,0x49,0x80,0x40,0x17,0xe3,0x43,0x00,0xda,0xe1,0x7a,0x1c,0xb3,0xde,0xd9,0xf7,0x33,0xeb,0xb3,0xb8,0xf5,0x40,0x1b,0xcd,0x71,0x97,0x30,0xf9,0x9c,0x4d,0xac,0x7e,0x8e,0xd9,0x36,0x92,0x39,0xb5,0x56,0x0f,0x4f,0xbf,0x58,0xb8,0xba,0xc3,0xbd,0x79,0xb0,0xd7,0x6c,0x45,0x49,0xe2,0xde,0x94,0x04,0x9d,0x3e +.byte 0x91,0x0a,0xb2,0x9b,0x90,0x57,0x2e,0x69,0xa4,0x4f,0x61,0xbf,0xdb,0xfb,0xe3,0xe9,0x81,0x26,0xe0,0x48,0x90,0x8c,0x32,0x95,0x8d,0x38,0xec,0x8e,0xa7,0x5e,0xc3,0x36,0xc6,0xd1,0xbc,0x9a,0xb3,0xba,0xdb,0x2c,0xe4,0xa0,0x50,0x74,0xef,0x98,0x48,0x14,0xc9,0x38,0x4d,0xa9,0x48,0x13,0xd4,0x08,0x60,0xfd,0xcf,0x5e,0xf2,0xcd,0xc7,0xeb +.byte 0xaf,0x88,0x32,0x30,0x6f,0x19,0x01,0xec,0x87,0xae,0x6d,0x63,0xa3,0xa7,0x7b,0xcd,0x53,0xa7,0xf2,0xf2,0x9f,0x43,0xcb,0x0a,0x3f,0x8c,0xd2,0x55,0x8d,0xa7,0x95,0xcf,0x5b,0xae,0x64,0x23,0xda,0xb4,0xbd,0x32,0x34,0x95,0x8a,0x03,0xe7,0x6e,0xef,0x3f,0xb4,0xcf,0xc6,0x8a,0x2f,0xc6,0x59,0x99,0xdf,0xad,0x3c,0x15,0xed,0x83,0x0b,0x59 +.byte 0x8b,0xcd,0x0d,0xa6,0xcf,0x3a,0xc3,0xdb,0xc3,0x01,0xa9,0x32,0x38,0x45,0x5c,0xc8,0x56,0x81,0xef,0x21,0x7f,0x52,0xc4,0xb5,0x48,0x97,0x6a,0x60,0x75,0x3a,0x1a,0xd3,0xb0,0x60,0x9a,0x83,0x61,0xad,0x3b,0x4b,0x65,0xaa,0x9e,0x77,0x47,0x6f,0x3b,0x48,0xb0,0xc6,0x36,0x9a,0x59,0x5e,0x26,0xc4,0xb9,0xed,0x04,0xf3,0xc7,0x09,0x33,0xda +.byte 0x81,0x63,0xa6,0x5d,0xe1,0x54,0x6b,0x04,0x17,0x2b,0xb9,0x2f,0xbd,0x55,0xdb,0xa1,0x69,0x00,0xcd,0xba,0xfa,0x36,0xaa,0x47,0x5a,0x7c,0xf4,0x1f,0x53,0x94,0x95,0x2f,0xf8,0x2a,0x4b,0xa8,0xcc,0x73,0xab,0xfd,0x25,0xb2,0x4e,0xd6,0x62,0x90,0x8c,0x8f,0x02,0xe4,0xdc,0x22,0x79,0x04,0x34,0x9b,0x54,0x5c,0x54,0xca,0x9b,0x8a,0xf8,0x05 +.byte 0xd1,0xb0,0x9e,0x8f,0xa3,0x0b,0x53,0xa8,0x6f,0x1b,0x2e,0xf2,0x71,0x78,0x28,0xce,0xa9,0xdb,0x4c,0x5b,0x83,0xfe,0xaa,0xff,0x99,0x2f,0x03,0x14,0xb2,0xe0,0x5f,0xaa,0x65,0x15,0x1f,0xd2,0x31,0x95,0x70,0x3c,0x8b,0x55,0x8e,0x87,0xed,0xbb,0x0c,0x91,0x87,0xaa,0xbe,0x49,0xdb,0x18,0x7b,0x1d,0x26,0xa7,0xdf,0x00,0xff,0x73,0x70,0x2e +.byte 0x10,0xaf,0x46,0xea,0x7f,0xca,0xfa,0x09,0x13,0x02,0xac,0x3f,0xa0,0x02,0xa6,0x67,0xb7,0xec,0x18,0x73,0x91,0x25,0xa0,0x28,0xe3,0xd8,0xfa,0x11,0x6d,0x34,0x79,0x1d,0xe4,0x8f,0x7c,0x73,0x66,0x77,0x3e,0x43,0x23,0xb0,0xee,0x84,0xb5,0x75,0xc9,0x23,0x87,0x6a,0x4f,0x59,0x3d,0xb5,0xf1,0xd6,0x06,0xf8,0xa6,0x5d,0x0c,0x24,0xed,0x94 +.byte 0xd7,0xa8,0x31,0x37,0x10,0x60,0xb6,0x03,0x33,0x27,0x38,0xdd,0xd3,0x74,0x02,0xa3,0xa6,0x01,0x94,0xa9,0x56,0x11,0x23,0x0e,0xdb,0xfd,0x25,0x92,0xa8,0xfb,0x79,0xc8,0x8e,0x0e,0x10,0x1f,0xca,0x95,0xf6,0xad,0x28,0xe7,0xaa,0x2b,0xf1,0x40,0xf6,0xef,0x7b,0x40,0x28,0x57,0xbb,0x4c,0xac,0x0b,0x8b,0xb3,0xe3,0xec,0x53,0xf2,0x15,0x61 +.byte 0x2e,0x91,0xdf,0x91,0xfb,0x55,0xb6,0x7f,0x6c,0xfc,0xb7,0x4b,0x91,0xdc,0xf7,0xe5,0x91,0xd8,0x70,0x92,0x94,0xea,0x3f,0x62,0x98,0x14,0xc3,0x43,0x34,0x02,0x87,0xc7,0xca,0x60,0x4a,0xfb,0x50,0xe4,0xa9,0x92,0x10,0x04,0x7c,0x55,0xd3,0x9a,0x89,0xba,0x8e,0x6f,0x02,0xd6,0xc7,0x6f,0x91,0xb5,0x87,0xb9,0x0e,0xbe,0xe4,0x9f,0x01,0x0b +.byte 0x20,0x60,0xc8,0x16,0xe6,0x23,0x1d,0x5f,0x4d,0x82,0xf4,0x42,0x25,0xe6,0x05,0xe3,0x5b,0xbb,0xd1,0xb0,0xad,0x0b,0x05,0x71,0x3a,0x7b,0xee,0x0e,0xe1,0xe4,0x08,0x9f,0xda,0xdf,0x59,0x57,0x4f,0x05,0x5a,0x51,0x9a,0x60,0xfd,0x85,0x21,0xd1,0x0a,0x3b,0x0a,0x15,0x61,0x28,0x98,0x0a,0x8f,0x1e,0x33,0x15,0xb3,0x5f,0xf3,0xbb,0x89,0x22 +.byte 0x0c,0xaf,0x91,0xce,0x44,0xb1,0x54,0xd0,0x80,0x86,0x43,0xa1,0xb9,0x07,0xde,0xab,0x1f,0x9b,0xae,0xef,0x07,0xf2,0x40,0x33,0x31,0x4d,0xf9,0x45,0x97,0xf6,0xcc,0xe5,0x3c,0x49,0xcd,0x83,0x6e,0x38,0x81,0xab,0x40,0x18,0xda,0xf6,0xfe,0xe7,0x96,0xd1,0x17,0x98,0xae,0xec,0xe9,0x93,0x37,0xbc,0x0b,0xa8,0x12,0xe7,0x65,0xca,0x27,0x37 +.byte 0x6a,0x74,0x81,0xf1,0xe0,0x6c,0x0d,0xba,0x86,0x48,0x94,0xd0,0x72,0xd5,0x4d,0x71,0xcf,0xa8,0x5e,0xd1,0x97,0xd1,0xed,0xf0,0xd3,0xe4,0xe3,0x41,0xc9,0x8f,0xfc,0x89,0xe8,0xbf,0x96,0x8b,0x86,0xb0,0x97,0x79,0x95,0xdf,0x69,0x56,0x6d,0x61,0x0a,0x37,0xcb,0x36,0xe1,0x95,0x88,0xf5,0xf0,0xe2,0x5c,0xb2,0x44,0x73,0xda,0x83,0xa7,0xdc +.byte 0x8b,0x35,0x3e,0xc1,0xd5,0x88,0x17,0x3b,0xeb,0xcf,0x36,0x9c,0xef,0x40,0xb2,0x72,0xde,0x4f,0x16,0x6c,0x8c,0x9d,0x15,0xce,0x7d,0x0d,0xc3,0x2f,0xea,0xab,0x50,0xdf,0x02,0xe0,0x24,0xcc,0xf4,0xa7,0x25,0xba,0x85,0x0d,0x62,0x9a,0x39,0xc7,0x5a,0xd1,0x9a,0xd1,0xa7,0x45,0x5f,0xc2,0x44,0xf5,0xa9,0x8d,0xd8,0xbc,0xd3,0xc8,0x75,0x0d +.byte 0x06,0xc6,0x4b,0x24,0xc6,0xe5,0x72,0xf7,0xd5,0x87,0xca,0x3c,0xc0,0x1c,0x18,0xa9,0x40,0xc6,0x7b,0xe5,0x4c,0xe6,0xb7,0x01,0x57,0xc1,0xcf,0x63,0x83,0x58,0x63,0x47,0xcf,0xa4,0xd3,0xf6,0x1d,0x2c,0xbf,0x17,0xe6,0x0a,0x7b,0x2d,0xa9,0x34,0x23,0xfc,0x1f,0x06,0x31,0x47,0x7b,0x31,0x34,0x8c,0x3c,0x15,0x9b,0xac,0xfd,0x38,0xe6,0xa3 +.byte 0x9e,0xa7,0xdf,0xa6,0x37,0x61,0xfd,0x85,0xb8,0x2e,0x67,0x73,0x7f,0x60,0x12,0x8b,0x62,0xb0,0x38,0xd0,0xaa,0xc4,0xad,0x3b,0xa9,0x04,0x66,0xdd,0xbb,0x9c,0xb1,0x95,0xe1,0x9c,0x0a,0x72,0x80,0x12,0xaa,0xa8,0x0c,0x3f,0x90,0x20,0x33,0xb4,0x76,0xdd,0x26,0xfe,0x1e,0x8f,0x6a,0x2d,0xea,0x4a,0xdc,0x28,0x47,0x66,0x36,0x5b,0x50,0x60 +.byte 0x7e,0x3e,0x93,0xf3,0xe9,0x37,0x31,0x3b,0x43,0x46,0x85,0xb3,0xa9,0xb2,0x14,0x95,0x96,0x49,0xf9,0x2a,0xe7,0x9e,0x3a,0x3e,0xd8,0x12,0xf7,0xbc,0x43,0x8c,0x35,0x31,0x44,0x08,0x7f,0x25,0x39,0x86,0x98,0x6a,0xe8,0xe3,0x2e,0x73,0x2d,0x3b,0xac,0x2d,0x75,0x4c,0xc8,0xca,0x21,0x2d,0x96,0x9b,0x4f,0x56,0xff,0x2d,0xc2,0xe2,0x98,0x3d +.byte 0xe2,0x3f,0xee,0x10,0xb7,0xc3,0x3d,0xa8,0x50,0x88,0x7f,0xd5,0x4e,0xbd,0xc7,0x9d,0xdc,0x01,0x49,0x27,0xf2,0xae,0xea,0x93,0x72,0xdf,0x00,0xcd,0xe6,0xa1,0xdd,0xd1,0x18,0xeb,0xa7,0xe1,0x4a,0x7b,0x38,0x72,0x73,0x29,0x46,0xa3,0xb3,0x25,0x23,0x6d,0x26,0xab,0x86,0xdc,0x67,0x52,0xe5,0x4a,0x5e,0x8f,0x16,0x67,0x8a,0x28,0x13,0xba +.byte 0x44,0x42,0xb5,0x21,0x9f,0x30,0x66,0x7f,0xc9,0x87,0x40,0xcb,0x75,0x58,0x2e,0xcd,0x09,0xb9,0x8a,0x84,0xa3,0xbd,0x63,0x53,0x75,0x2f,0x77,0x8b,0x7e,0x19,0x31,0x33,0x3b,0x9a,0xfb,0x86,0x39,0xa6,0xd9,0xeb,0x9b,0x43,0xc6,0xd9,0xc2,0x10,0xab,0x42,0xe5,0xc6,0x4a,0xe6,0x3e,0xde,0x9d,0xac,0x8e,0x95,0xf0,0xdb,0x48,0x95,0xc2,0x87 +.byte 0x6b,0x7f,0xde,0x09,0xdb,0xed,0x49,0x19,0x73,0x2d,0xa4,0x5c,0xdf,0xfa,0x2e,0x15,0xd0,0xb6,0x46,0x32,0xc9,0x7f,0x7e,0x01,0xd3,0x25,0x45,0x0e,0x5b,0x0d,0xf0,0x67,0xe3,0xd9,0xdf,0x4f,0x3b,0x6f,0xb3,0x15,0xc5,0x6b,0x91,0x75,0xa2,0xaf,0x42,0x3a,0x14,0x50,0xd9,0x4f,0x19,0x65,0x12,0x83,0x5d,0x8f,0x8a,0x01,0x0b,0x89,0xcc,0x7f +.byte 0x1a,0xde,0x5b,0x44,0x34,0x98,0x0f,0x8e,0x5a,0x5e,0x03,0x41,0x3e,0x66,0x9b,0x16,0xf5,0x91,0x7c,0xb0,0xc1,0xbf,0xa2,0x10,0x0b,0x60,0x3a,0x63,0x0c,0xcf,0xd8,0x49,0xdb,0x42,0x88,0x1f,0x36,0x8e,0x15,0xdb,0x5d,0x3f,0xe7,0xf1,0x9a,0x73,0x2b,0x74,0x0c,0xd5,0x09,0xab,0x01,0x2e,0x52,0x6f,0x03,0xf6,0xc9,0x0b,0xeb,0xa5,0xce,0x2e +.byte 0x1c,0x02,0x35,0xca,0xce,0xfe,0x4b,0xad,0x67,0x21,0xf8,0x44,0xea,0x70,0xf2,0x3d,0xfc,0x43,0x77,0x05,0x26,0xbe,0xaf,0x99,0xab,0x41,0xd4,0xcc,0x53,0x33,0x33,0xcd,0xb4,0x2d,0x76,0xfb,0xae,0x0c,0xac,0xc1,0xd0,0x42,0xfb,0x45,0x4a,0x6e,0x55,0xd2,0x93,0xef,0xb9,0x06,0xbc,0x38,0xce,0x94,0xc2,0x01,0xdf,0x27,0xc8,0x47,0xff,0x74 +.byte 0xfb,0x84,0xc5,0xa2,0x78,0x1f,0x4f,0x73,0x12,0xec,0x2d,0x82,0x5b,0xeb,0x3c,0xb6,0x1c,0x5a,0x29,0x9c,0xba,0x9e,0xa4,0x85,0x94,0x84,0x68,0x01,0xd7,0xb1,0x27,0x84,0x4a,0x7d,0x62,0x9c,0x32,0x12,0x89,0xd8,0x66,0xb5,0xe9,0x07,0xf4,0x5f,0x6b,0x0e,0x90,0x87,0xe5,0xc1,0x8b,0xaf,0x8f,0xf7,0xca,0x54,0xe0,0xc6,0x5f,0xa5,0xec,0xd1 +.byte 0xdc,0xdc,0x17,0x9e,0xca,0x4b,0x72,0x72,0x03,0x96,0x62,0xaa,0xc1,0xfe,0x23,0x7e,0xd2,0x06,0x61,0xb6,0xc9,0x0d,0x7e,0xbf,0x72,0x1c,0x66,0x46,0x0b,0x31,0x96,0x81,0x11,0x3d,0xac,0x5e,0xd0,0x35,0xaf,0xac,0x4c,0x74,0xce,0xf9,0x9c,0x64,0x3d,0xe5,0x9d,0xfe,0xc7,0x05,0x09,0xe1,0x70,0xc5,0x37,0xd5,0x4e,0xd8,0x7d,0xdb,0xfa,0x1c +.byte 0x28,0xfc,0x10,0x2a,0xe8,0x62,0x18,0x09,0x97,0xe0,0x98,0x2e,0x9f,0x1d,0x18,0xff,0x22,0xe9,0x5d,0x37,0xd2,0x74,0xf1,0x81,0x08,0x8a,0x55,0xc0,0x40,0x0f,0x70,0xbe,0x82,0x23,0x78,0x35,0xc8,0xf8,0x59,0x6e,0x0d,0x2e,0xd5,0xe7,0xf5,0x2e,0xbd,0xcd,0x1a,0xcf,0x76,0x43,0x1f,0xca,0x15,0x6c,0x4a,0xb7,0xc7,0xb9,0xaf,0x68,0xd7,0x31 +.byte 0x1e,0x0c,0x9c,0x78,0x74,0x66,0x80,0xc6,0x74,0xbe,0x86,0x59,0x0c,0x12,0xdc,0xf3,0x1b,0xaf,0x63,0x74,0xce,0x1e,0xac,0xf0,0x65,0xa0,0xab,0x7f,0x96,0x08,0x32,0xb2,0xca,0x9c,0xfb,0x9d,0x66,0x63,0x76,0xf9,0x69,0x08,0x6e,0xd3,0x46,0xde,0xdf,0x54,0x06,0x0d,0x25,0x81,0xd9,0x5a,0x45,0xeb,0xe5,0xc0,0xf6,0x86,0x0f,0xe9,0x27,0x7c +.byte 0xdc,0x52,0x28,0xb5,0xd0,0x7d,0x07,0xc1,0xb6,0x9b,0xdc,0xea,0xd3,0x2a,0xba,0xb0,0xd5,0xa3,0xd8,0x25,0x07,0x9c,0x6c,0xd6,0x16,0xa5,0x93,0x43,0x52,0xa7,0x5c,0x2b,0xe2,0xfa,0x8e,0x6e,0xaa,0x04,0x84,0x63,0x80,0x0f,0x90,0x10,0x41,0x1c,0xf6,0x67,0xea,0x39,0xb0,0x16,0xfc,0x6f,0x85,0x28,0x8c,0x8e,0xfb,0x79,0x39,0xdf,0xf6,0x6e +.byte 0x57,0xa1,0xaa,0xf1,0x0b,0x99,0xde,0xad,0x69,0xe2,0xf4,0x74,0x8e,0x8c,0x2d,0x20,0xdb,0xf3,0x2d,0xc2,0x75,0xe7,0xd6,0xc8,0x9d,0x46,0x3b,0x8b,0x8b,0x18,0xd8,0x41,0xfd,0xc2,0x7d,0xec,0x66,0x78,0xe7,0xbe,0xee,0x2b,0x07,0xd8,0x7e,0x13,0x61,0x7e,0xab,0x7d,0x2b,0x3f,0x83,0x96,0xf5,0xab,0x0b,0x20,0xd2,0x5b,0xb0,0xeb,0xf7,0x1b +.byte 0xac,0x1a,0x16,0x46,0x21,0x90,0xdb,0x67,0x66,0x42,0xe2,0x54,0x34,0xae,0x34,0xae,0x21,0x33,0x8c,0x48,0x19,0xdb,0x1f,0xa8,0x25,0x76,0xe0,0x03,0x1c,0x35,0x8d,0xd3,0xab,0x6b,0x93,0xf3,0xad,0x7d,0x3c,0x76,0x1d,0xaa,0x43,0x80,0x0f,0x5f,0x20,0xd9,0xf0,0xff,0x8b,0xf4,0xdb,0xbc,0xf2,0xff,0xf2,0x8a,0xfc,0xf5,0x0e,0x4e,0xd9,0xb0 +.byte 0xd6,0xb3,0x86,0x5b,0x3e,0x10,0x87,0x50,0xf1,0xd2,0x8f,0x8d,0xa4,0x39,0x85,0xf5,0x90,0xd6,0x53,0x69,0x40,0x42,0xc1,0xc3,0x7c,0xc1,0x3e,0x97,0xb4,0x08,0x49,0x93,0x4e,0x4c,0x67,0xd9,0x2e,0x05,0x70,0x04,0x98,0x0a,0xed,0xd0,0xff,0x0c,0x13,0xe4,0xde,0x75,0x81,0x24,0xb1,0x27,0x79,0xeb,0x80,0x68,0x52,0x50,0x66,0x77,0x4f,0xf6 +.byte 0x64,0x2f,0x85,0x9e,0xc1,0xbf,0x9f,0x0e,0x31,0x9a,0x36,0x24,0xcd,0xa8,0xe8,0xce,0x41,0x86,0xd1,0x02,0x96,0xdc,0x1a,0xa0,0x48,0xca,0x61,0xd5,0x87,0xdb,0x0a,0xeb,0x69,0x95,0xca,0xf8,0xe5,0xa0,0x5b,0x91,0x8f,0xb9,0x59,0x5f,0x68,0x60,0x58,0xc5,0xe0,0xc7,0x02,0x68,0xa5,0x67,0x1e,0xfc,0xa9,0x27,0x9f,0x83,0x4c,0x05,0x60,0xee +.byte 0xcb,0x79,0x31,0x73,0x36,0xf4,0x39,0x44,0xdb,0xea,0x62,0x89,0x97,0x69,0xd1,0x0d,0xf6,0x27,0xcf,0x47,0xfe,0x3d,0x5c,0xe9,0x92,0x54,0x0a,0x66,0xaf,0x82,0xb1,0x49,0x87,0x3f,0xa2,0x95,0x91,0x0e,0x72,0x1e,0x7b,0xde,0x32,0x31,0x51,0x40,0x24,0x4f,0x30,0x59,0x7d,0x97,0x28,0x30,0x7e,0x93,0xcd,0x1e,0x16,0xef,0xe1,0xb5,0xa8,0xff +.byte 0x3a,0xd0,0x62,0x94,0x8b,0x72,0xe7,0x97,0x8f,0x2f,0x58,0x3e,0x62,0x43,0x6b,0x28,0x05,0xc9,0x0d,0xf0,0x09,0xbd,0x12,0x3b,0xd8,0x15,0xd3,0x7c,0x97,0x96,0x5a,0xf4,0x9f,0x8d,0x25,0xb7,0xc5,0x66,0xf7,0xf7,0x5f,0x7e,0xca,0x2f,0xcd,0x9a,0xf2,0xa3,0x9b,0x4f,0x6f,0xc3,0xd9,0x64,0x38,0xda,0x87,0x97,0x8a,0x49,0x2d,0x80,0x16,0x73 +.byte 0x88,0x62,0xd2,0xdf,0x4f,0xf7,0x79,0xc0,0x83,0xeb,0x2b,0x66,0x5a,0x21,0x3a,0xa2,0x2a,0xed,0x8c,0xe7,0x91,0x6d,0x56,0x18,0xfc,0x59,0x68,0xea,0x9f,0x5c,0x3c,0xd5,0x0f,0x64,0x70,0x89,0x22,0x83,0xed,0xfa,0xc9,0x21,0x68,0x3c,0x69,0xb8,0x3e,0x89,0xb5,0x9d,0x8b,0xc8,0xf7,0x57,0x17,0x27,0x90,0x12,0xa7,0xd2,0x4d,0x2c,0x30,0x64 +.byte 0x42,0xbe,0xa6,0x49,0x4e,0xa3,0x3b,0xdb,0xdb,0x64,0x0e,0x89,0x66,0x87,0x72,0x90,0x86,0x1d,0x0b,0x61,0x32,0x47,0x3d,0x55,0x81,0xb2,0x50,0x5a,0x76,0x6c,0xa3,0x46,0x12,0x1b,0xaf,0x6e,0xbf,0xfd,0x98,0x2f,0xb7,0xd2,0x31,0x92,0xb5,0x26,0x1a,0x3d,0xfa,0x5d,0xc0,0x24,0x44,0xd2,0x6b,0x1c,0x81,0xf5,0x5d,0x50,0xb0,0x33,0x18,0xe0 +.byte 0xc5,0xb3,0x6b,0xf4,0xfd,0xde,0xf7,0x2f,0x69,0x1d,0x5a,0xfe,0x03,0x6d,0xca,0xad,0x29,0xe0,0x6e,0x70,0xcd,0xe3,0x6d,0x38,0xef,0xf1,0x3a,0x76,0x2b,0x2c,0xb6,0xcd,0xff,0xeb,0xbc,0xe7,0xd9,0x40,0xbe,0x23,0x61,0x20,0xd5,0xb8,0x66,0x77,0x65,0xc9,0x33,0xf5,0x75,0x8e,0x15,0x98,0x3f,0xb1,0x4a,0xb8,0x1c,0x47,0x73,0x45,0x0f,0x73 +.byte 0x2a,0xa1,0xb7,0x73,0x76,0x94,0x16,0x45,0xcf,0xd6,0x8f,0xe3,0x62,0x8a,0x42,0xfd,0xe3,0x1e,0xe0,0x7d,0xb5,0x99,0xbd,0x1c,0xf2,0x60,0xb2,0x72,0xa8,0x4b,0x19,0xd6,0xd0,0xdb,0x0b,0x1f,0xc9,0x68,0xc0,0xf3,0x65,0x04,0x50,0x41,0xf0,0xb3,0x0e,0x0a,0x9d,0x7f,0x0b,0x1f,0xeb,0x5b,0x4c,0x58,0x6a,0xf2,0x02,0x95,0xd2,0xf3,0xac,0xe5 +.byte 0x69,0x81,0xb1,0x3f,0x08,0xfc,0xba,0xcb,0x36,0xcd,0x54,0x28,0xac,0x65,0xd8,0x81,0xab,0xc1,0x6a,0x51,0x97,0x21,0xe4,0xc6,0xaf,0xd8,0x76,0x76,0xa4,0xc4,0xd0,0x58,0x63,0xdf,0x32,0xf5,0x04,0xfb,0x11,0xeb,0x76,0x39,0xda,0x55,0xf4,0x7e,0x1c,0x7b,0x04,0x07,0x4d,0x5a,0xeb,0x74,0x0a,0x57,0xcf,0x10,0xf6,0x0e,0x73,0x02,0x25,0x67 +.byte 0x4f,0x8f,0x37,0x75,0x8f,0x44,0x2a,0x1a,0x6d,0x05,0xda,0xe0,0xa0,0xaa,0xd2,0x78,0xaa,0x7e,0x76,0x0a,0xde,0x2a,0x54,0xae,0x1e,0x39,0xcc,0x3c,0x1c,0xa6,0xd5,0x8a,0xca,0xb4,0xcc,0x76,0xb9,0x30,0xd2,0xe2,0x46,0x31,0xb6,0x51,0xcf,0xe2,0x24,0x77,0xc9,0x9b,0x57,0x3c,0xa3,0x84,0x60,0x59,0x28,0x5f,0x23,0x74,0x17,0x79,0x42,0xbe +.byte 0x60,0x3f,0x09,0x6a,0x43,0x8e,0x40,0x25,0x79,0xb5,0xbb,0xbb,0x72,0x50,0xad,0x4f,0xaa,0xa2,0xd4,0xb2,0xc6,0x7d,0x50,0x7b,0x98,0x59,0x22,0x06,0x7d,0x2c,0x35,0xdd,0x44,0x34,0x9c,0x28,0x98,0xf3,0xe5,0xd0,0x7e,0x09,0xbe,0xc4,0x00,0x72,0xd5,0xa6,0x3b,0x0e,0xb1,0x18,0x91,0x0a,0x4d,0x5d,0xe2,0x0a,0x98,0x79,0x30,0x9b,0xaa,0x38 +.byte 0x03,0x2b,0x6c,0xb2,0x8e,0x0a,0x1d,0x30,0x59,0x8a,0xe8,0x6c,0x6d,0xb5,0xd4,0x91,0xc5,0x28,0x1d,0x5e,0x49,0xe0,0xfc,0x26,0x7f,0x40,0xc0,0x6a,0x81,0x0d,0xb9,0xc6,0x05,0xc6,0x18,0x82,0x70,0xf6,0xea,0x0e,0xb4,0x85,0xba,0x5d,0xfa,0xfd,0xe3,0xd6,0x08,0x7c,0x3d,0x99,0x03,0xd4,0xdc,0x9b,0x50,0x12,0xc8,0xbd,0x8c,0x47,0x67,0x28 +.byte 0x83,0x97,0xca,0xef,0xc3,0x1c,0x2b,0x6e,0x3b,0xf7,0xca,0x7a,0x68,0x6e,0x39,0x25,0x58,0xf7,0xa4,0x11,0x9d,0x8d,0x49,0x29,0xd6,0x6e,0x0b,0x0a,0xcf,0xa7,0x04,0x14,0x6f,0xc4,0x4c,0x36,0x1a,0x16,0x3e,0x8f,0x99,0x69,0x94,0x1d,0xa8,0x66,0x93,0xeb,0x1d,0x82,0xfd,0x3f,0x84,0xb0,0x9d,0xa4,0xe1,0xb0,0xd4,0x9d,0xb2,0x60,0x20,0xfb +.byte 0xd3,0xa0,0xdc,0x79,0x83,0xb0,0xfc,0x50,0x18,0x57,0xe1,0xeb,0x44,0x25,0x05,0xab,0x27,0xfb,0x5f,0x83,0xcd,0x51,0xd0,0x3b,0x80,0x4a,0xce,0xbf,0xe9,0xfe,0x46,0xd2,0x5f,0xea,0x8c,0x89,0x48,0xc8,0x65,0xdd,0x2a,0xa4,0xda,0x54,0xc2,0x37,0x7e,0xd7,0xff,0x80,0x5b,0xf0,0xc3,0x40,0x44,0x40,0x72,0x63,0x23,0xc6,0x9a,0x48,0xf3,0x4b +.byte 0x91,0x64,0x26,0xfc,0xf3,0xa0,0xb9,0x06,0x0c,0x88,0xbb,0xc0,0x93,0x73,0x63,0xf6,0x9c,0x0d,0xe2,0xf6,0xee,0xe0,0x51,0xfd,0xae,0x4d,0x21,0xb9,0x6b,0x7d,0x1e,0x34,0xa0,0x4d,0xe4,0x25,0x30,0xe6,0x81,0x2e,0x32,0xef,0xb9,0x9e,0xaf,0xa0,0x22,0xe0,0x67,0xe6,0x07,0x55,0x3a,0xed,0xef,0x4f,0x87,0x2f,0x44,0xd2,0xef,0xc1,0xfb,0xc4 +.byte 0x7b,0x27,0x20,0x44,0xd2,0xd6,0xf9,0xf3,0x67,0xc1,0xbf,0xaa,0xd5,0x9c,0xd9,0x2c,0xd5,0xf1,0x42,0x2d,0xec,0x39,0xb5,0xc1,0x18,0xed,0x6c,0x47,0x80,0xf8,0x6f,0x66,0x10,0xee,0x1d,0xd6,0x79,0x01,0x4e,0x2a,0xd0,0x83,0xa7,0x9d,0x1d,0x81,0xce,0xf5,0x6f,0x26,0x86,0xd2,0xd7,0x56,0x15,0x65,0x48,0x4c,0xf1,0xf9,0x21,0x77,0xd1,0x84 +.byte 0x22,0xce,0x4d,0x8d,0x83,0xda,0x8c,0x50,0x56,0xc8,0x3b,0xc5,0xb6,0xcf,0x3e,0x0d,0x50,0xe5,0x9d,0x6c,0xb5,0x2a,0x5a,0x58,0x28,0xf5,0x0a,0x05,0xf3,0x0e,0x40,0x8e,0xb6,0xb4,0xdf,0x11,0x1b,0x34,0x81,0xc5,0x0e,0x09,0xa6,0xfc,0x46,0x14,0x02,0x78,0x94,0xbb,0x63,0x9d,0x3e,0x25,0x2c,0xc8,0x1b,0x5c,0xef,0x64,0x77,0x0c,0x04,0x40 +.byte 0xe1,0x45,0x85,0xf8,0x07,0xbf,0x14,0x65,0xe9,0xfc,0xba,0xe4,0x9c,0xa7,0x91,0x56,0x2a,0x3a,0x8e,0x33,0xae,0x56,0x04,0x9d,0x35,0xbc,0xad,0x64,0x0e,0x99,0x8e,0xb5,0x84,0x72,0xcf,0xcc,0x81,0x14,0x11,0x9e,0xe6,0xac,0x0d,0x41,0x43,0x4e,0x2a,0x0d,0xda,0x98,0x42,0xfa,0x8c,0x21,0x79,0x93,0xa3,0xdf,0x84,0x88,0x76,0x14,0x5b,0xb9 +.byte 0xff,0xe1,0xab,0x94,0xc3,0xcd,0x10,0x69,0xee,0x53,0xea,0xfe,0xfb,0xaa,0x43,0x8f,0xdd,0x55,0x88,0x34,0x5d,0x55,0x0f,0x42,0x4d,0x1d,0x93,0xce,0x96,0x67,0xf8,0x33,0xc7,0xca,0x34,0x11,0x28,0xb2,0xed,0x0f,0x00,0x40,0x84,0xee,0x51,0x26,0x6e,0x7b,0x2d,0x77,0xeb,0x18,0xb8,0x9a,0xad,0x28,0xb6,0x6c,0x5e,0xde,0x10,0x4c,0x29,0x1d +.byte 0x79,0x3c,0x2e,0x1c,0xf0,0xc8,0xb3,0xee,0x19,0x7a,0x10,0xe1,0xe3,0x05,0x1e,0x63,0xe9,0x00,0xd7,0xfe,0x83,0xe7,0x54,0xff,0x65,0x9a,0x27,0xa3,0x86,0x72,0x5c,0xb6,0xef,0xf5,0x84,0x68,0x1e,0xae,0xe6,0xf8,0x66,0x9c,0x1b,0x86,0xab,0xfa,0x1a,0xe3,0xb8,0x97,0x16,0xb1,0xb7,0x42,0xfa,0x85,0xa3,0x3a,0x0d,0x21,0xd2,0x35,0xb1,0x89 +.byte 0xf0,0x4f,0x1a,0x1d,0x45,0x34,0x2f,0x31,0x12,0x8c,0x19,0xe7,0x4b,0x14,0xa7,0xcf,0x0f,0xf9,0xcd,0x77,0x40,0xbe,0x09,0xeb,0xc3,0x3e,0x4a,0x37,0x55,0xab,0xbb,0x9c,0xe5,0x22,0x56,0x8a,0x66,0xfa,0xb1,0xff,0x73,0x29,0x52,0xb1,0x89,0xf7,0xab,0xa6,0x58,0x53,0x97,0xfd,0x44,0xda,0xbd,0x0b,0x1f,0xc8,0x88,0x01,0xcc,0x5e,0xf7,0x05 +.byte 0xbd,0xf7,0x0a,0x4d,0xcb,0xef,0xbf,0xd9,0x8e,0x15,0xc3,0x40,0xb9,0xc9,0x14,0xe5,0x05,0x3c,0x20,0x67,0xfe,0xdc,0xa6,0xb8,0x92,0xbd,0xf5,0x33,0xb5,0x77,0x11,0x28,0x47,0x21,0x28,0x18,0x61,0xf8,0x1c,0xdb,0x65,0xad,0x89,0x0d,0x98,0x79,0xca,0x2b,0xa3,0x4f,0x16,0xa6,0xb3,0xb9,0xcc,0x47,0x5b,0x13,0x96,0x2e,0x39,0x78,0x24,0xc5 +.byte 0xf9,0xf5,0xae,0xdc,0x34,0x3c,0xf7,0x48,0x0d,0x75,0xaf,0x51,0x75,0x48,0xbe,0x4d,0x73,0x89,0x5a,0xfc,0xd7,0x51,0xd3,0x93,0xa8,0xbc,0xc3,0xa6,0x6b,0x63,0xc1,0xc3,0x7b,0x48,0xf1,0x57,0xe4,0xb4,0xce,0x5f,0x18,0xae,0xdc,0x61,0x99,0xaa,0x7e,0x49,0xd6,0xb5,0x2c,0x62,0xb8,0x8c,0x4a,0x94,0xc1,0xc2,0x13,0x23,0xdc,0x7c,0x48,0xc2 +.byte 0xaa,0xc4,0xd9,0xc0,0x09,0x11,0x6e,0x35,0x07,0x14,0x77,0x7e,0xeb,0x87,0x00,0x05,0x30,0xec,0xb2,0xc6,0xde,0x6e,0x42,0x0b,0x2a,0xb6,0xca,0xb1,0xdc,0x69,0x57,0x1b,0xad,0x52,0xa8,0x22,0x1e,0xb5,0x2b,0xb5,0x8e,0x39,0x4b,0xbf,0x38,0xf4,0xb2,0xf5,0xa1,0x9c,0x7b,0x7f,0x6c,0x14,0x48,0x37,0xa9,0xf9,0xcd,0x85,0x50,0x53,0xb0,0xc1 +.byte 0x15,0x28,0x19,0x3b,0xb1,0x04,0x44,0x93,0x7a,0x16,0x76,0x69,0xa1,0x5c,0x67,0xcc,0x8d,0x02,0x56,0xcd,0xd9,0x91,0x49,0x8c,0x1b,0xc9,0x89,0x98,0x09,0x2e,0x5b,0xf8,0x7c,0xe6,0x0f,0x46,0xb0,0xcc,0xe5,0x75,0x63,0xaf,0x40,0xd5,0xa3,0x45,0x4a,0x76,0x67,0x1d,0x81,0xc2,0x25,0x85,0x7f,0x52,0xc5,0xf8,0x6d,0xd9,0xb6,0xa8,0xa4,0x96 +.byte 0x63,0xcc,0x15,0xc5,0xec,0x40,0x0e,0x08,0xf7,0x6f,0x85,0xa5,0xe7,0x2e,0xbe,0x3f,0xf4,0xc8,0x74,0xc7,0xed,0x86,0x85,0xc0,0x44,0x9e,0x80,0xc8,0x89,0xdc,0x16,0x47,0xb1,0x68,0x0e,0x65,0x66,0x0f,0xbc,0x33,0xb1,0x78,0x1e,0x5e,0xd7,0xde,0x97,0x96,0xb8,0x74,0x5c,0x90,0x7a,0xed,0x36,0xf4,0x10,0x91,0x5a,0x42,0x92,0x81,0x11,0x73 +.byte 0x3e,0xf1,0x5e,0xfb,0xc2,0x38,0xe6,0xe5,0x41,0xce,0x96,0xed,0x44,0x14,0x9c,0xc0,0x1f,0x83,0x5f,0xdd,0x50,0x87,0x90,0x86,0x50,0x61,0x87,0x99,0x7c,0x64,0x2d,0x50,0x17,0xa3,0xb0,0x7e,0x69,0xd3,0x86,0xb4,0x7c,0xe7,0x15,0x34,0x9e,0x3b,0x17,0xc0,0x2d,0x08,0x60,0x8b,0xae,0xec,0xa2,0xf6,0xf1,0xa4,0xbc,0x7b,0xc2,0x75,0x91,0x13 +.byte 0xf6,0xd0,0x71,0xf0,0x3c,0x9c,0x51,0xb3,0x33,0x53,0x57,0x47,0x8b,0x47,0xb0,0x0b,0x95,0x9a,0x39,0x70,0x63,0x91,0xcc,0xd8,0xd0,0x23,0x32,0xc0,0xb6,0x0f,0x91,0x30,0x29,0x45,0xf1,0xfc,0xa1,0x83,0x10,0x9a,0xa4,0x05,0x05,0x9f,0x33,0xbd,0xaf,0x16,0x3e,0x53,0x39,0xb1,0x4b,0x76,0x55,0x3e,0x6f,0x47,0x23,0x59,0x4c,0xbb,0x82,0x31 +.byte 0x19,0xe2,0xb1,0x49,0x20,0x91,0x2d,0xb0,0xfe,0xa6,0xae,0x7f,0x6e,0xd1,0x5b,0xb9,0x84,0x18,0x0f,0x68,0xc6,0x56,0x8a,0x22,0x81,0x3f,0x38,0x42,0x7a,0x31,0xa1,0xc1,0xf7,0x10,0x6a,0xc3,0xb1,0xaf,0x19,0xad,0x06,0x3a,0x53,0x9d,0x44,0x9f,0xe7,0x25,0xac,0x59,0x06,0xb9,0xd2,0xf6,0xce,0xb6,0x1e,0x4d,0x65,0x2e,0x05,0xb4,0x14,0x91 +.byte 0xfb,0x5b,0x26,0xd0,0xee,0xfa,0x45,0x5b,0x0c,0xd5,0x5c,0x1f,0x0c,0xe0,0xf6,0x50,0x78,0x77,0x7e,0x83,0x04,0xec,0x3b,0x53,0x28,0x97,0x56,0x61,0xeb,0xa0,0x78,0xe5,0xc0,0xb2,0x3c,0xcd,0x6f,0x4b,0xda,0x11,0x00,0x93,0x49,0x9f,0x03,0x22,0x39,0x3a,0xc8,0xef,0x01,0x91,0x12,0x36,0x15,0x0c,0x47,0xd5,0x8b,0x77,0x5e,0x5f,0x91,0x4b +.byte 0x44,0x98,0xa0,0xa0,0x46,0x0f,0x17,0xef,0xf9,0x52,0x0b,0x92,0xc1,0xe0,0xfc,0x63,0x9b,0x6d,0xe2,0xde,0x88,0x89,0x32,0x89,0x93,0x44,0x6d,0x69,0xe7,0x26,0xfd,0x77,0xc0,0x18,0x58,0xdb,0x74,0xec,0x04,0x0c,0x60,0x51,0x74,0xca,0x49,0x3e,0x4f,0x5f,0xaa,0x53,0xf2,0xc1,0xcb,0x89,0x1f,0x69,0xaa,0xbb,0x97,0x17,0x04,0x49,0x5e,0x44 +.byte 0xf3,0xf3,0xc4,0x98,0x9d,0x49,0x1e,0xb0,0x27,0x7d,0xff,0x54,0xa5,0xed,0xbe,0xb0,0x52,0xf6,0x00,0x87,0x67,0x2d,0x28,0xdb,0x09,0x4e,0xa2,0xee,0x4f,0x81,0xeb,0xa1,0xca,0x2b,0x07,0x2f,0x54,0x6d,0x5a,0x2e,0x13,0xa4,0xd0,0xac,0x21,0x7c,0x44,0xc0,0x98,0xac,0xe4,0x6e,0x94,0xd1,0x5b,0x5e,0xd6,0xf1,0x3c,0x45,0x88,0xe1,0xbd,0x58 +.byte 0xf1,0xc7,0xba,0x36,0x2c,0x15,0xb9,0xf4,0xa3,0xea,0x73,0xb4,0x91,0x53,0xd8,0x18,0x86,0x23,0x87,0x0b,0x7a,0x4a,0x2d,0x2d,0x3d,0x73,0xcb,0x05,0x11,0x4c,0x19,0x26,0xf2,0x05,0x89,0xc8,0x29,0x26,0xa7,0xe4,0xcb,0x43,0xd0,0xf6,0xbc,0x76,0xbd,0x9a,0x17,0x4a,0xf1,0x39,0xe3,0xde,0x05,0x10,0x8a,0xd3,0x11,0x53,0x61,0xef,0x33,0xd9 +.byte 0x65,0x0d,0x99,0x0b,0x39,0xa4,0x1b,0x4f,0x0b,0xa5,0xf1,0x37,0xa3,0x4f,0x54,0xa7,0x29,0xc1,0xae,0x88,0x5c,0x13,0x2f,0xb2,0xbf,0xcf,0x1b,0x0d,0xa0,0x68,0x21,0xe2,0x20,0x3f,0x02,0x9f,0x08,0x39,0xc6,0x20,0x2d,0x08,0x01,0x5d,0xf1,0x47,0xde,0x88,0xad,0x49,0x09,0xf7,0x1a,0x0c,0xa7,0x29,0x91,0xe5,0xfc,0xc5,0xde,0xd7,0x92,0x3f +.byte 0xe5,0x0c,0x91,0xea,0x24,0xfb,0x02,0x9a,0x13,0x3a,0x61,0x01,0x9d,0x7e,0x9d,0x11,0xf8,0xbd,0xe0,0x05,0xbb,0x13,0xf0,0x00,0x67,0x90,0x6f,0x80,0xe7,0x2e,0xfc,0xe0,0xea,0x8a,0x9d,0x2c,0x13,0x57,0x4c,0x78,0x1c,0x44,0xe2,0xa6,0x62,0x01,0x46,0xf8,0xbe,0xf4,0x51,0x32,0x15,0xd4,0x3c,0x7d,0x3b,0xcc,0xfd,0xc3,0x46,0x43,0xf1,0xfa +.byte 0x9e,0xee,0xad,0x47,0x8f,0x32,0x31,0x94,0x70,0x92,0xea,0x45,0xe3,0x63,0xd6,0x28,0x23,0xa5,0xdf,0x61,0xee,0x19,0x1a,0x5e,0xb0,0xe7,0x17,0xab,0xac,0xb4,0x03,0xed,0xf6,0x9e,0xba,0xdf,0x52,0x88,0xb7,0xca,0x7c,0x27,0xcd,0x7b,0xf8,0x1e,0x54,0x4b,0xe6,0xa3,0x91,0xf7,0xeb,0x22,0x65,0x95,0x13,0xe1,0xac,0xb6,0x22,0x80,0xe3,0xeb +.byte 0xf9,0xde,0xf1,0xb7,0x6a,0xfd,0xc7,0xb8,0x9b,0x9c,0x49,0x4f,0x84,0x7f,0x68,0x93,0x6c,0x3c,0xea,0xb1,0x8a,0xeb,0x23,0xca,0x2d,0x5e,0x29,0xb5,0x52,0x49,0x98,0x12,0x3f,0xed,0xf0,0xb7,0xbc,0x22,0x14,0x73,0x92,0x84,0x1b,0x3e,0x2f,0xed,0x24,0x1e,0x62,0xcc,0x09,0xe8,0x7c,0x5a,0x08,0xd4,0xc6,0xd9,0xd1,0x55,0x66,0x18,0x2c,0x6a +.byte 0x99,0xc3,0x0e,0x1e,0x7b,0xb7,0xd4,0xbd,0x0e,0x1f,0x22,0x85,0x09,0x2c,0xcf,0xff,0x79,0x9f,0x93,0xbe,0xec,0xed,0x63,0xb7,0x97,0xbb,0xeb,0xd6,0x70,0x76,0xa9,0x4f,0xb7,0x9a,0x60,0x5b,0x50,0xdf,0x85,0x46,0x69,0xa0,0x9a,0x86,0xe3,0xe2,0x13,0x2b,0x8c,0x0f,0x3b,0xab,0xa8,0xce,0xa3,0xb0,0x78,0x72,0x40,0xfb,0xd1,0x26,0x72,0xc1 +.byte 0x91,0x25,0x7b,0x29,0xde,0xcf,0x99,0xf3,0x8e,0x87,0x39,0x81,0x04,0xad,0x3b,0x11,0x6a,0xda,0x00,0xdd,0xe9,0x41,0xc1,0xd8,0xcc,0xf9,0x59,0xac,0x9b,0xb1,0x64,0x6f,0xb8,0xf4,0x9f,0x20,0xde,0x67,0x09,0x1b,0xdf,0x11,0xa5,0x94,0x56,0xab,0x76,0xba,0xc5,0xda,0x6c,0x86,0xe6,0xa4,0x73,0x59,0xa9,0xe3,0x68,0xb9,0xc0,0x50,0x1b,0x55 +.byte 0x21,0x9e,0xea,0x8d,0xcc,0x5d,0xee,0x88,0xe1,0x18,0x7c,0xcd,0x8f,0xff,0x18,0xbd,0x13,0xea,0x95,0xc4,0x8e,0xd3,0x92,0xfe,0x3d,0xda,0x6f,0xa5,0xbc,0xa0,0x77,0x5a,0x1d,0x61,0xff,0x7b,0x77,0xc4,0x06,0x25,0xc5,0xa7,0x76,0x36,0x55,0xe7,0xc0,0xf0,0x46,0x7e,0xca,0xe7,0xc1,0xe8,0x88,0x65,0xff,0xa7,0xb6,0x9c,0x83,0x1d,0x2e,0x6e +.byte 0xd6,0xd3,0x07,0x22,0x65,0x79,0x4f,0x3c,0x0a,0x5c,0x4f,0x95,0xb3,0x14,0x37,0x9b,0x0b,0x97,0x69,0xd9,0x5b,0x37,0x09,0xc3,0x70,0x5b,0x4f,0x11,0xcb,0xce,0xc0,0x06,0xf2,0xb9,0x32,0xdd,0x24,0x7b,0x8c,0xe6,0x0c,0x91,0x3b,0xa8,0xb0,0x82,0x56,0x4d,0xde,0xa0,0x5c,0x0b,0x5b,0x70,0x53,0x64,0x9d,0xab,0xbb,0x51,0x6b,0x8c,0x8f,0xe5 +.byte 0x1f,0xc0,0xb8,0xfe,0x1b,0xf6,0x24,0x26,0x62,0xcb,0x78,0x84,0x90,0x76,0x67,0x30,0x18,0x37,0xa9,0xca,0xb7,0x0d,0xac,0x17,0x86,0xb1,0x87,0x59,0x18,0xc3,0x9e,0x62,0x1b,0xb1,0x04,0x52,0xfc,0x7c,0x86,0xa0,0x37,0xb9,0x8b,0x7a,0x85,0x79,0x21,0xe0,0x0f,0x87,0x28,0x91,0xd0,0xe5,0x24,0x63,0x5c,0x7c,0xe8,0x47,0xfa,0x42,0x55,0xe9 +.byte 0x66,0xad,0xdf,0xc3,0x43,0x90,0x47,0x83,0x24,0x09,0x54,0x5f,0x14,0x27,0x53,0xb3,0x22,0x15,0x52,0x84,0x2f,0x61,0x8c,0x01,0x9e,0x34,0x61,0x3f,0x76,0x44,0x1c,0xca,0x79,0x2c,0x40,0x4e,0xa0,0x36,0x11,0xe0,0x23,0x0f,0xa7,0x78,0xf9,0xf9,0x2a,0x2c,0x98,0x5c,0xa9,0x2d,0x66,0xb9,0x87,0x43,0xd5,0xbc,0x64,0xe5,0x52,0x2f,0x1d,0xdc +.byte 0x1d,0xf4,0xb3,0x18,0x6b,0xd1,0x3b,0x8b,0xa3,0x47,0x65,0x62,0xcc,0xca,0x5f,0x00,0xbb,0x78,0x9d,0x35,0xd4,0x79,0x45,0x33,0xc7,0xa8,0x29,0x96,0x98,0xa4,0x23,0x2c,0x23,0x7f,0x5a,0x1d,0x09,0xb4,0xcf,0xac,0x54,0xcd,0x27,0xda,0x88,0x21,0xe2,0xb4,0x85,0xdc,0xc9,0x4a,0x6b,0xc4,0xfa,0x48,0xc5,0x91,0xc1,0x53,0x4b,0xa1,0x7a,0x9c +.byte 0x8a,0x7d,0x35,0x52,0xf1,0x58,0x9d,0x20,0x36,0xc2,0x78,0xdb,0x37,0xf8,0xa4,0x2f,0x50,0x98,0xb0,0x34,0x51,0x66,0x93,0xcf,0xe7,0xf0,0x06,0xf1,0xcd,0x0e,0x4f,0x33,0xcc,0x9b,0x73,0x3b,0xc9,0x51,0x63,0x6d,0x29,0x6b,0xf4,0x9d,0x2c,0x76,0x59,0xcd,0xfc,0x11,0x35,0x52,0xbd,0x3b,0x2e,0x7d,0x8a,0x0d,0xb0,0xbb,0x90,0x9b,0x9c,0xac +.byte 0x1c,0x80,0x89,0xd6,0x6f,0xaf,0xea,0x89,0x38,0x74,0xef,0x83,0x82,0x91,0xf7,0x74,0x96,0x30,0x40,0xe2,0x18,0x2b,0xb4,0xf6,0x15,0xf0,0x8e,0x63,0xe1,0x82,0x55,0x7b,0x65,0x70,0x33,0x14,0xef,0x7a,0x7c,0x2d,0xa9,0x17,0x1b,0x53,0x1e,0xf8,0x98,0x1b,0xbe,0xc8,0x00,0xf5,0xbf,0x79,0xe7,0x8e,0xf2,0xdb,0x59,0x0d,0x46,0xab,0x43,0xd0 +.byte 0xe4,0xa0,0xeb,0x29,0x6a,0x8b,0xc1,0x99,0xa6,0xcc,0x8e,0xe5,0xde,0x67,0xdf,0x49,0x09,0x62,0x8d,0x4b,0xa1,0x1c,0x3b,0x01,0xe2,0x95,0x65,0x10,0xa5,0x91,0xd0,0x48,0x35,0x96,0xcf,0xe4,0x51,0xd2,0x7f,0x93,0x49,0xab,0x1a,0xba,0x08,0x33,0x54,0x34,0xd7,0x00,0xc9,0xa0,0x07,0x03,0xc7,0x8a,0x65,0xa2,0x84,0x60,0xcd,0xaa,0xa2,0x46 +.byte 0x8c,0x67,0xd9,0xc1,0xe7,0x58,0xc5,0x1d,0xc0,0xb3,0xc6,0xb2,0x2a,0xfb,0x70,0x04,0xa2,0x25,0x7f,0x75,0x3c,0xd5,0x8e,0x9c,0x33,0xa2,0xdc,0x20,0x4c,0x26,0x5b,0xbe,0xd9,0x00,0x5d,0xa2,0xbd,0x42,0xbd,0x0d,0xd6,0x52,0x79,0xb5,0x67,0xf6,0x27,0x62,0xc8,0x64,0x05,0xc5,0x0f,0xae,0xe1,0x78,0x39,0xd1,0xb5,0x28,0xe9,0xd4,0x2a,0xaa +.byte 0xd4,0xc4,0x3e,0x43,0x27,0x83,0xfa,0xdb,0x46,0x73,0x20,0xcd,0x2c,0xba,0x33,0xb4,0x77,0x10,0x32,0x3d,0x8e,0x56,0x88,0x81,0xe1,0x4c,0x8b,0x46,0x60,0xcb,0xb7,0x67,0xd7,0x7b,0xc2,0x47,0x7d,0xd8,0x2d,0x4c,0x09,0x9f,0x07,0x8e,0x34,0x45,0xf4,0x50,0x69,0xfd,0x35,0x0a,0x09,0x9e,0xac,0x49,0x5f,0xdf,0x72,0x84,0x97,0x93,0x30,0x2c +.byte 0xc6,0x20,0x6f,0xb5,0x18,0x03,0xb6,0x30,0x23,0xc8,0xcd,0xa1,0x43,0xbd,0xbb,0x6f,0xde,0xb3,0xcb,0x1c,0xdd,0x41,0x71,0xfa,0x37,0xa7,0xa9,0x57,0x5a,0xf7,0xee,0xcd,0xb1,0xc1,0xb6,0x78,0x1c,0xe3,0xde,0x5c,0x02,0xc8,0xce,0xb7,0x8e,0x72,0xce,0xfd,0x79,0xcf,0x1a,0xef,0xcb,0x5b,0x5d,0x3c,0x1d,0xc8,0x1e,0x9f,0x67,0x26,0x86,0xd3 +.byte 0x3b,0x98,0x49,0x04,0xcd,0x1b,0x48,0x7c,0xa6,0xbe,0x37,0x0b,0x19,0xb1,0xb7,0x8a,0x74,0x0a,0xd9,0x4f,0x7b,0xbb,0x8e,0xc6,0x9b,0xdd,0xbc,0x61,0xfd,0xdd,0x86,0x7e,0x70,0x2e,0xe4,0x94,0xb4,0x62,0x47,0x6b,0x7c,0x92,0x41,0xda,0x05,0xdc,0xaf,0x5c,0x93,0xbc,0x7d,0xad,0xce,0x44,0x9e,0x27,0x1c,0x74,0x30,0x01,0xf2,0x8a,0x22,0xce +.byte 0x88,0x61,0xf5,0xb8,0xe2,0xf0,0xca,0x14,0x21,0x53,0xd3,0xbe,0x95,0x8f,0x52,0x10,0x21,0xc5,0x25,0x16,0xa1,0x4f,0xef,0x9a,0x6f,0xce,0xe9,0xee,0x06,0xa8,0x32,0xa4,0xac,0xee,0xd8,0x95,0x0b,0x65,0x10,0xbc,0xb3,0x15,0x48,0xf9,0x96,0xee,0xde,0x5d,0xf6,0x38,0x5f,0x32,0x70,0xd1,0x29,0xa8,0x1d,0xdc,0xf4,0x34,0x2d,0x0c,0x93,0x48 +.byte 0x8c,0x40,0xed,0x35,0x41,0xfe,0x4b,0xab,0x20,0x7d,0x95,0x74,0x02,0xe5,0x71,0x76,0x7e,0x59,0x35,0xb3,0xd7,0x43,0x1f,0xd4,0xe6,0x02,0x86,0xba,0x4f,0x53,0xd9,0xc3,0x7d,0x7f,0x3d,0xb6,0xd8,0x92,0x07,0x89,0x99,0x46,0xf8,0x09,0xcd,0x19,0x43,0x93,0xa7,0xc1,0xb2,0x5d,0xec,0xbf,0x09,0xf4,0xba,0xfc,0xf7,0xf1,0xa7,0x2e,0xfe,0x71 +.byte 0x04,0x58,0xab,0x16,0xd7,0xc0,0xf7,0x03,0xd4,0xc4,0xb9,0xe4,0xd8,0xfc,0x5b,0x66,0xa6,0xb3,0x6a,0x94,0x0e,0xba,0x8c,0x54,0x5c,0x8c,0x02,0x0a,0x33,0xcb,0xde,0x1c,0xad,0x6d,0xef,0x48,0x05,0xa6,0xca,0x9a,0x27,0xd6,0x1c,0xc3,0xea,0x3a,0x46,0x20,0xec,0x72,0xc4,0x94,0x89,0x7e,0xba,0xa9,0x2f,0xe5,0xec,0x1a,0xe4,0x50,0x54,0xeb +.byte 0xd9,0x5a,0x08,0xc5,0x84,0xc1,0x9a,0xdf,0xb0,0xd4,0x9a,0x6d,0xa2,0x93,0x52,0xd2,0x4d,0x69,0x88,0xc8,0x40,0x2d,0x26,0xbd,0x7a,0x37,0x04,0x21,0xe1,0x9d,0xc9,0xed,0xda,0x7a,0x4c,0x11,0x49,0x14,0x42,0xa1,0xdb,0x6e,0xed,0x1b,0x37,0xbf,0x09,0xac,0x35,0xda,0x80,0xf6,0x75,0xd4,0x32,0x54,0xb5,0x18,0xe8,0x79,0x25,0xc4,0x95,0xe8 +.byte 0x74,0xcf,0x6d,0xac,0x34,0x1f,0xea,0xd4,0x2e,0xd1,0x77,0x5e,0x90,0x8f,0x12,0x51,0xbb,0x3c,0xdf,0xe6,0xf4,0x49,0x8c,0x0f,0x9a,0x8e,0xe3,0x96,0xbd,0xba,0xe6,0x47,0x4b,0x50,0xc7,0xa9,0x29,0xea,0x09,0x5d,0xef,0x3c,0x91,0x48,0xc6,0x37,0xfd,0xac,0x7b,0xe5,0x04,0x25,0x93,0x0b,0xe3,0xce,0x32,0x46,0x38,0x81,0x97,0x57,0xbe,0x1f +.byte 0x3c,0x61,0x2d,0xd1,0x4e,0xca,0xbb,0x44,0xc6,0xfd,0xdf,0xdd,0x11,0xbf,0xbf,0xa8,0xc0,0x32,0x67,0xc1,0x2e,0xd7,0xbe,0x3c,0xe3,0xcb,0x57,0xa5,0x6d,0xbb,0x8e,0x0f,0x69,0x22,0x42,0xef,0x53,0x0f,0xce,0x09,0x6a,0xda,0xbf,0xd6,0xed,0x61,0x67,0x82,0x83,0x13,0x63,0x97,0x7d,0x1a,0xad,0x34,0x77,0x37,0xa6,0xe0,0x89,0xaa,0xd4,0xb6 +.byte 0x8f,0x93,0xff,0xb8,0x8f,0x63,0x14,0xfd,0x17,0xff,0xe5,0x7c,0x83,0x23,0xaa,0xe0,0xb9,0xd9,0x94,0x3a,0x1a,0xe7,0xa5,0xbd,0xa6,0x2b,0xd3,0x49,0xca,0xeb,0x7d,0x87,0x1d,0x54,0x16,0x93,0xec,0x14,0x8b,0x77,0x3c,0xb4,0xbe,0x33,0x76,0x5e,0xcb,0x33,0x27,0xd3,0x20,0xd6,0xed,0x0c,0x66,0xb8,0xe0,0x00,0xa6,0x76,0xcd,0x8b,0xb4,0xef +.byte 0x11,0xbc,0xe5,0x59,0xcf,0x1d,0xf5,0x15,0x58,0x4a,0xe1,0xfd,0x87,0x8c,0x7b,0xb9,0xa4,0x42,0x5a,0xed,0x51,0x7e,0x8d,0xa6,0x19,0xaa,0xc4,0xa6,0x14,0x74,0x45,0xb1,0xda,0x87,0x0f,0xd7,0xe7,0x66,0x3b,0xcd,0x04,0x02,0x14,0x20,0x41,0x15,0x4c,0x33,0x79,0x80,0x7d,0xd4,0x44,0x2c,0xab,0x6c,0xf4,0xa8,0xd4,0x31,0x43,0x7b,0xa7,0xc7 +.byte 0x65,0x0e,0x32,0xc8,0xc8,0x6d,0xf5,0x65,0x1b,0x26,0xf1,0xe4,0x68,0x15,0x88,0x1b,0x00,0x60,0x23,0x31,0xd7,0x4b,0x57,0xda,0xf1,0x19,0xa9,0xd9,0xaf,0xe6,0xa9,0x1e,0x2c,0x0d,0x23,0xe4,0x5b,0xcb,0x43,0x38,0xf0,0x93,0xd3,0xfb,0x6a,0x9b,0x83,0x30,0x55,0x96,0x9f,0x53,0x06,0x3f,0xaf,0x40,0x69,0xef,0x9a,0x47,0x6b,0xba,0x7c,0x10 +.byte 0x10,0x44,0x89,0xfa,0xb9,0x9e,0x70,0xed,0x25,0x59,0x68,0xae,0x9b,0x17,0xcf,0x80,0x6f,0x34,0xb8,0x07,0x40,0xe5,0x27,0x6d,0xcd,0x46,0x2c,0x36,0x90,0xf3,0x83,0x74,0x68,0x35,0xf2,0x05,0xa8,0xdf,0x4e,0x34,0xc5,0xb4,0xeb,0x5a,0x7d,0xe6,0x10,0x8a,0x23,0x54,0xeb,0x9b,0x27,0xf2,0x07,0xee,0xf9,0x05,0xc2,0x5a,0x88,0xbd,0x49,0x2e +.byte 0x1b,0x00,0x31,0x68,0x4a,0xc9,0x3a,0xc5,0x93,0x82,0xa8,0x39,0xba,0x55,0xcd,0xc1,0xda,0x49,0xc2,0x4c,0xf4,0x93,0x00,0xcf,0x61,0xa4,0xbb,0x8c,0x64,0x33,0x90,0x14,0x6d,0x1d,0xad,0x75,0x97,0xd9,0x1d,0xfb,0x27,0x67,0x43,0x04,0xdc,0x4e,0xdf,0x0e,0x0c,0x7e,0x1c,0x89,0xfe,0x31,0xb7,0x9b,0x07,0x5e,0x99,0x08,0x22,0xef,0x6e,0x4d +.byte 0x8b,0xd6,0x27,0xe6,0x24,0x1a,0x28,0xb0,0x22,0xa5,0x69,0x17,0x82,0x46,0xe3,0x90,0xe8,0x04,0xae,0x90,0x66,0x14,0xec,0xa2,0x1b,0x7e,0x09,0x13,0x32,0x9d,0xec,0x8b,0x51,0x5f,0xa8,0x96,0x8f,0x4c,0xc6,0xbd,0x5c,0x70,0x29,0x21,0xac,0xe9,0x6e,0xb0,0x0c,0x61,0x50,0xba,0xcc,0x55,0x71,0xda,0x2a,0x92,0x86,0x0c,0xff,0xaf,0x7a,0xcf +.byte 0xaf,0x2a,0xbd,0xd6,0x15,0xa4,0x4c,0x2e,0x76,0x0d,0xcf,0x10,0x11,0x4a,0xd1,0x89,0xdd,0x46,0x5f,0x6b,0x5a,0x02,0x05,0x49,0x6f,0x98,0x6a,0xa7,0x8a,0x66,0x87,0x59,0x23,0xb5,0x3f,0x2e,0x95,0x73,0xfe,0x48,0xe9,0x0d,0x17,0xa6,0xa5,0x4e,0x40,0x98,0x79,0x40,0x1a,0x10,0x1d,0x84,0xdd,0x6f,0x17,0xa7,0xb7,0xfb,0x49,0xbd,0x54,0x97 +.byte 0x0f,0x42,0x25,0x95,0x83,0xf0,0x97,0xe7,0x4c,0x24,0xb5,0xe8,0x23,0x0a,0xd6,0xbf,0xef,0x2c,0x03,0x4f,0x87,0x59,0xe8,0x80,0x87,0xcc,0x51,0x1b,0x94,0xd8,0x60,0xe7,0x10,0x4d,0x01,0xfd,0x83,0xf2,0xd8,0x8d,0x1b,0x33,0xbf,0xaf,0x36,0x41,0x47,0x51,0xe0,0x45,0x2a,0x05,0x5f,0xe1,0x92,0xf8,0xa5,0x15,0x46,0x35,0xd8,0x9b,0xe0,0xff +.byte 0xee,0xa6,0x4e,0x7d,0xfd,0x96,0xa5,0x75,0xdf,0x7e,0xb0,0x7d,0x14,0x73,0xdd,0xbe,0x17,0x6d,0xdd,0xec,0xac,0x9a,0x92,0x68,0xe3,0x44,0x16,0x63,0x22,0xa8,0x15,0x58,0x8c,0x11,0x23,0x46,0x18,0xae,0x47,0x39,0x87,0xc7,0x4c,0x30,0x09,0xce,0xe5,0xc4,0xd8,0x82,0xc6,0xc6,0x3d,0x31,0xf6,0x0f,0xb5,0x69,0x61,0x63,0x88,0xd6,0xb8,0xda +.byte 0x89,0x29,0x87,0x69,0x6e,0x3f,0x55,0x2f,0xbc,0x91,0x91,0x43,0x7d,0xb3,0x7b,0x99,0x5a,0x5a,0xb0,0x7d,0x90,0xa7,0xe7,0x30,0x0d,0x32,0xb2,0x43,0x43,0x78,0x59,0x6e,0xbb,0xd7,0x76,0xd4,0x5b,0x4d,0xc4,0xa9,0x99,0xdd,0xd3,0xce,0x3d,0x13,0x41,0x38,0x33,0xed,0xb8,0x76,0x1a,0xbb,0xfd,0x26,0xcd,0x69,0x89,0x22,0x16,0x9a,0x21,0x35 +.byte 0x38,0x77,0x14,0x10,0x42,0x17,0x1f,0xa1,0xbf,0x55,0xb4,0x51,0x62,0x15,0xac,0xd0,0xa2,0x71,0xe4,0x32,0x89,0x33,0x8b,0x74,0xc6,0x61,0x38,0xd0,0xfe,0x28,0x69,0xe6,0x88,0x1b,0x11,0x7e,0x46,0x39,0xba,0x24,0xdd,0x1f,0x61,0xf4,0x74,0xad,0x58,0x94,0xa9,0x3e,0xc7,0x2a,0x9e,0xc0,0xe1,0x1c,0xee,0x21,0xab,0x3e,0x65,0x0c,0xe8,0xd8 +.byte 0x71,0x52,0xf3,0x6c,0x64,0x53,0x75,0x17,0x87,0x55,0x14,0x42,0x25,0x7f,0xe7,0x0d,0x89,0x1b,0x77,0x26,0xc4,0xaa,0xcc,0x91,0x47,0xe5,0x54,0xae,0x1a,0x0d,0x04,0x99,0xeb,0x56,0xd8,0xb4,0x6d,0xeb,0xec,0x2f,0x6c,0xc5,0x8e,0x76,0xe1,0xa0,0xa7,0x42,0x06,0xc9,0xc3,0x03,0xee,0xa9,0x9b,0x1e,0xfc,0x11,0xf5,0x2f,0x2b,0x14,0xb8,0x9f +.byte 0x87,0x61,0x9b,0xc7,0x38,0x0e,0x58,0xf1,0xd4,0x36,0xca,0x82,0x85,0x9c,0xde,0xec,0xd3,0x1e,0x29,0x4e,0x70,0x9e,0x9a,0xe0,0x8b,0x6f,0xfe,0xd0,0xe9,0x95,0x51,0xcf,0x36,0x31,0x9c,0xff,0x63,0xc6,0x04,0x8e,0x61,0xc2,0xcb,0x3a,0xfa,0xd0,0xd7,0x29,0xbd,0xe7,0x8a,0x2b,0x8e,0xa0,0xac,0x58,0x93,0xb3,0x52,0xca,0x80,0x17,0xd2,0x2d +.byte 0x93,0x5f,0xe0,0x8a,0x47,0x3c,0x67,0x95,0x64,0x91,0xa4,0x76,0xa4,0x5f,0xfa,0x93,0x4d,0xc7,0x6e,0x5d,0x23,0x9f,0xe1,0x4a,0x16,0xff,0xa5,0xf0,0x94,0xa8,0x02,0xcc,0x9a,0x84,0xd5,0x9d,0xb6,0xe5,0x7c,0x76,0x3f,0xc9,0xfd,0xdc,0x8e,0x59,0x9a,0x22,0x18,0x3c,0xe6,0x90,0x85,0x10,0x73,0x2d,0x65,0xa7,0xa7,0xe1,0xeb,0xc5,0x05,0x24 +.byte 0x1e,0x0b,0x31,0x19,0xb5,0xb0,0x8d,0xc0,0xb5,0x04,0xfe,0x9d,0xfa,0xf7,0xcd,0x71,0x29,0x40,0x19,0x23,0xed,0x2c,0xdb,0x89,0x89,0x8d,0x69,0x22,0x4c,0x9c,0xa7,0xf7,0xb1,0x56,0x87,0xa3,0x44,0xa9,0xa3,0x16,0x28,0xce,0x94,0x40,0x6f,0x71,0x77,0x0e,0x6d,0xe9,0x78,0xa2,0x2a,0x17,0x45,0x03,0xeb,0x1e,0xf1,0xfa,0x56,0x3e,0xa7,0x6b +.byte 0x08,0x06,0x6a,0xcb,0x8f,0x5e,0x0f,0xd3,0x6e,0x4b,0x21,0x31,0x73,0x50,0x94,0x56,0xf9,0xb9,0xc7,0x38,0x69,0xe8,0x09,0x3f,0x03,0xb3,0xb5,0xe8,0x2a,0x5e,0xf6,0xad,0xae,0x6f,0xab,0x6a,0x49,0xdd,0x93,0x6d,0xfb,0x8b,0xde,0xea,0x8b,0xb0,0xa1,0x44,0xf0,0xb3,0xf6,0xaa,0xe3,0xc8,0x04,0x87,0x9f,0x8b,0xee,0xab,0x13,0x1d,0x2d,0xeb +.byte 0x09,0x62,0x21,0x49,0x5f,0xb6,0x95,0xab,0xc4,0xee,0x69,0xfb,0x31,0xff,0xbf,0x1a,0xa6,0x4c,0x67,0x66,0x84,0xe6,0x0c,0xb7,0xb2,0x3e,0x3f,0xa4,0xb3,0x52,0xde,0x15,0xc9,0xa7,0xa9,0xb5,0x0d,0xe5,0x0b,0x99,0xa6,0xb6,0x8f,0x69,0xc5,0x6d,0x6c,0xbb,0x83,0x89,0x4e,0xfc,0x49,0x79,0x4d,0x46,0x31,0xa0,0x09,0x5f,0x5d,0xd0,0x5b,0x80 +.byte 0xa1,0xf4,0x36,0x48,0x97,0x6a,0xfd,0x34,0xcb,0x20,0xa8,0x01,0x25,0x04,0xe7,0x13,0x12,0x87,0x66,0x27,0x96,0x36,0xba,0x92,0xbd,0xda,0x94,0x11,0xef,0x90,0xbd,0xbc,0x9e,0xf9,0x63,0xb3,0xa6,0xc1,0xbb,0x46,0xe8,0x86,0x3f,0x2d,0xf9,0x11,0x3a,0x23,0xa8,0x7a,0x33,0x41,0x3e,0x2e,0x5d,0xde,0xc0,0xd2,0x23,0xca,0x41,0xa0,0xb9,0x70 +.byte 0x6d,0x31,0xf3,0x89,0x87,0x9b,0x72,0xd9,0x15,0x4d,0x8b,0x51,0xdd,0x56,0xa1,0xb4,0x68,0x52,0x65,0x81,0x12,0x46,0xea,0x24,0xb4,0x34,0xcc,0xa0,0xdb,0x7d,0x96,0xd9,0x8e,0x64,0x61,0x10,0x7c,0x2a,0x00,0x4d,0x82,0x61,0x54,0xa4,0x70,0x3d,0x9c,0xa5,0x0b,0xd2,0x08,0x71,0xa8,0x94,0xb1,0xb4,0x30,0x61,0x59,0x9f,0x72,0x61,0x56,0x2d +.byte 0xa3,0xf4,0x9d,0x1c,0xfc,0x49,0x9d,0x39,0x27,0xcb,0x54,0xb2,0xce,0x3c,0xb6,0x76,0xe5,0x8e,0xa5,0xe7,0x08,0xd4,0xc7,0x2c,0xa6,0x28,0xc8,0x3e,0x22,0x14,0x06,0x75,0x68,0x0d,0x6b,0xb5,0xa3,0x68,0x14,0x17,0xfe,0xb8,0xcc,0x26,0x5b,0x9d,0x0b,0xcc,0x3e,0xd7,0x6c,0xe0,0xec,0x5e,0x1e,0x1e,0xb8,0x9a,0xbe,0x91,0xb5,0xa6,0xb5,0x83 +.byte 0x28,0xc2,0x35,0x65,0xd3,0xde,0xdd,0x71,0x29,0x13,0xc1,0xee,0x78,0x22,0x34,0x0b,0x77,0x3a,0x48,0x98,0x26,0x43,0xc2,0xce,0x03,0xe8,0x75,0xf8,0x8a,0xdf,0x6a,0xb0,0xb4,0x8c,0x11,0x8c,0xe5,0x95,0x96,0x17,0xfb,0x06,0x5e,0x8f,0x36,0x10,0xc5,0x04,0x43,0x1b,0xed,0xd3,0xad,0xd4,0xa4,0xe0,0x17,0x85,0xed,0x9b,0xd8,0xae,0x98,0x46 +.byte 0x58,0x57,0x0e,0x46,0xea,0x3f,0x07,0x6d,0x0e,0x46,0xda,0x2f,0x68,0x2b,0xd6,0xe7,0x0d,0x4b,0xbe,0x32,0xee,0x10,0x73,0x18,0x7d,0x6b,0x2d,0x04,0x27,0x72,0xb1,0xe1,0xbf,0x89,0xaa,0x4d,0x1a,0xfc,0xbd,0xf2,0xc3,0x9f,0xf0,0x01,0x85,0x62,0x09,0x4d,0x08,0x2c,0x57,0x9a,0x7b,0xad,0x0b,0x79,0xff,0x14,0xa1,0x45,0xde,0x21,0x8f,0xe2 +.byte 0x93,0xd0,0x35,0x26,0xc3,0xbc,0x8c,0xb7,0x57,0x6a,0xdf,0x98,0xa7,0x75,0xc6,0xf6,0x4b,0x5f,0x91,0x6e,0x71,0x3a,0x5c,0x5f,0x57,0x63,0x34,0x87,0xf8,0x20,0x6a,0xa1,0xbf,0xf8,0xca,0x8e,0xf9,0xa9,0x10,0x8b,0xab,0x0b,0xc2,0xcc,0x71,0x89,0x7c,0xef,0x70,0x3a,0xb0,0xf6,0x90,0xcc,0x6b,0x2c,0xcc,0x8b,0x2a,0x21,0x78,0x23,0xa0,0x71 +.byte 0x8c,0x7b,0xc1,0x0f,0x27,0x72,0x40,0xe4,0x9e,0x35,0xf3,0x0a,0xc0,0x7e,0x7f,0xe5,0x9b,0xdb,0x93,0x49,0x08,0xc3,0x6b,0xb7,0xea,0xea,0xd4,0x5a,0x96,0x97,0x3c,0xdf,0xc7,0x02,0x39,0x9f,0xa3,0xca,0xdd,0x62,0xf3,0x68,0xc7,0xae,0x37,0xc1,0x35,0x73,0xb2,0x5d,0x99,0xe4,0xae,0x27,0x55,0x5e,0x6a,0xae,0x6f,0x1a,0x95,0x51,0xb1,0x3b +.byte 0xd7,0xb4,0x4d,0x3d,0x88,0x54,0x01,0xbe,0x2c,0x12,0x17,0x29,0x4f,0xf3,0xed,0x5a,0x1f,0xa9,0xf0,0x67,0xbd,0x7c,0xad,0xe5,0x58,0x52,0xd4,0xd1,0xfe,0x1e,0x1b,0xd6,0xce,0x7c,0xc3,0xa2,0xa9,0x72,0x9b,0x6a,0xe5,0xf9,0x39,0x22,0xaa,0x7f,0x2e,0xa2,0x53,0x75,0xf0,0x99,0x2e,0x36,0x86,0x83,0x10,0x63,0xd7,0xac,0xa3,0x52,0xa6,0x23 +.byte 0x80,0x46,0xe4,0xa9,0x07,0x79,0xe1,0x61,0x75,0xbf,0x08,0x31,0x6c,0xdd,0xe1,0x30,0xd0,0x35,0xc2,0xbd,0x30,0xb8,0x85,0xf3,0xd2,0x2c,0x90,0x7a,0xf0,0xd3,0x80,0xe5,0xf1,0xc2,0x58,0x3d,0xf7,0x3c,0xbc,0xff,0x03,0x4d,0xf7,0xad,0x2f,0xa6,0xfe,0x73,0xde,0xa8,0x60,0xd7,0x89,0x4a,0xcf,0x3d,0xf3,0xab,0x62,0xfa,0x9d,0x46,0xad,0xd0 +.byte 0x97,0x6f,0x89,0x84,0x16,0x9b,0x84,0xb2,0x6c,0x63,0x6d,0x29,0xee,0x8e,0x97,0x3c,0x48,0x19,0x92,0x62,0xdc,0x1d,0x35,0x9d,0xec,0x01,0x00,0x64,0xbf,0x4d,0x8b,0xa3,0x13,0x48,0x9f,0xb4,0x01,0x0d,0xb1,0xc4,0xf2,0xf2,0x6a,0x84,0x1a,0x07,0x3c,0x46,0xa6,0xb5,0x41,0x9a,0x32,0x7e,0xc3,0x4f,0x87,0x95,0x71,0x7a,0xbf,0x74,0xf8,0x0b +.byte 0xfb,0xa5,0xde,0xa8,0x35,0xf1,0xcb,0x04,0x8d,0x8b,0xd3,0xb0,0xc8,0x1d,0x6c,0xaf,0xb4,0x21,0x79,0x1c,0x34,0x71,0x2f,0xf5,0xc4,0xbe,0xad,0xbc,0xaf,0x2f,0x54,0x81,0xd9,0xf8,0xff,0x59,0xf9,0x4e,0x62,0x9f,0x7d,0x7c,0xe9,0xdc,0x67,0xae,0xa3,0x32,0x4b,0xf7,0x4e,0x53,0x4c,0x55,0x7d,0xc5,0xdd,0xd4,0x5d,0x93,0xb8,0x98,0x3e,0xd3 +.byte 0x15,0x65,0x52,0x78,0x5a,0xd2,0x21,0x84,0x5d,0x28,0xaf,0x44,0x7d,0x18,0xf8,0xdd,0x5c,0xc3,0x6e,0xc8,0x05,0x05,0x30,0xd0,0x82,0xf8,0x00,0x0f,0x3d,0x5c,0x62,0x7e,0xa6,0xd5,0x7b,0x9f,0xb1,0x44,0xb7,0x0d,0x22,0x81,0xe1,0x4a,0x2b,0x79,0x7e,0x39,0x4d,0x8a,0x9a,0xfd,0x94,0x0c,0xf7,0x23,0x10,0x99,0xd2,0xd2,0x8b,0x98,0xe5,0x9d +.byte 0xb0,0xbf,0xcf,0x06,0x08,0x80,0x32,0x69,0xfd,0x81,0x5f,0xb3,0x66,0x11,0x63,0xeb,0x30,0x1d,0xcd,0x5b,0x5b,0xec,0x0c,0xca,0x30,0x37,0xa0,0x82,0x79,0x75,0x87,0xc1,0xfa,0x5b,0x38,0x4b,0xe3,0xea,0x46,0x49,0x36,0x92,0x92,0xf0,0xc9,0x15,0xa5,0xec,0x9e,0x21,0xb6,0x9f,0xb4,0x6d,0xf6,0xef,0x5c,0x2f,0x7d,0xa4,0xb3,0x25,0xfb,0x13 +.byte 0x40,0xe1,0xa0,0x20,0x4a,0x3a,0xe2,0x3e,0xf5,0xe0,0x68,0x61,0x11,0x9a,0xfb,0x1e,0xe8,0x1b,0xe0,0x17,0x9c,0x8a,0xe5,0x53,0x74,0xdd,0xec,0xc6,0x03,0xc6,0xd0,0x9b,0xc2,0x0b,0x77,0x4c,0x36,0x2b,0xac,0x4e,0x4d,0xd2,0x26,0x70,0x39,0x96,0xb4,0x11,0x1a,0x5b,0xcc,0x3f,0xb9,0xcf,0x0d,0x04,0x55,0x05,0x00,0x66,0x8f,0xa9,0xec,0x31 +.byte 0xe5,0x47,0x4c,0x9b,0xb7,0x6e,0xa5,0xe7,0x9e,0x70,0xf4,0x02,0x2a,0x3c,0xa2,0x03,0x04,0x30,0x9e,0x3f,0x7c,0xaa,0x0a,0x8f,0x55,0x61,0xca,0x50,0x35,0xe6,0xa4,0x24,0x61,0x26,0x31,0x9e,0x9e,0x77,0x0d,0x15,0x3a,0xc0,0x88,0x32,0xb5,0xbb,0x3d,0x3e,0x59,0x25,0x52,0x81,0x2e,0x4b,0xc6,0x5d,0x9f,0x87,0x0f,0x1f,0x5e,0xec,0xdd,0xbe +.byte 0x32,0x6c,0x71,0xef,0xd2,0x9c,0xfd,0x70,0xc8,0xf6,0x1f,0xb9,0xc9,0xdd,0x4d,0x39,0x61,0x92,0xbd,0x0c,0x48,0x63,0x4b,0xd2,0x2b,0x8c,0x4b,0x35,0xb1,0x8e,0x04,0x44,0x3c,0xe1,0xde,0xfd,0x6e,0xde,0xeb,0x94,0x51,0xea,0x36,0x7b,0xc6,0x87,0x15,0x34,0x68,0xa0,0xb8,0x94,0xb6,0x56,0x33,0xf4,0xab,0x84,0xed,0x1c,0x36,0x91,0xa7,0x1b +.byte 0x03,0xca,0x48,0x64,0x16,0x5b,0x4b,0x69,0x47,0xae,0xd7,0xc9,0xcf,0x74,0xd2,0xbd,0x60,0x04,0x7c,0x66,0xe9,0x12,0x92,0x40,0x78,0x23,0x0b,0x5b,0xa0,0xda,0xf7,0xe4,0x9a,0xad,0x9c,0x31,0xe7,0xaa,0xad,0x5a,0xc3,0x45,0x00,0x6c,0xd3,0x4d,0x93,0xdf,0xb6,0x68,0x11,0x3f,0x2a,0xbc,0x9a,0x8d,0xeb,0x0f,0xb5,0xa9,0x8e,0xa5,0x2c,0x99 +.byte 0x94,0x8d,0x21,0xa9,0x41,0x6b,0x11,0x2e,0x02,0x21,0xd8,0xc1,0xbc,0xf0,0x2a,0x87,0xae,0x35,0xa9,0x78,0x5c,0x43,0xb8,0xb7,0x63,0x2d,0x09,0x31,0xae,0x6f,0xfc,0x39,0x7b,0x18,0xc3,0xce,0xe3,0xfa,0x51,0x70,0xc7,0x6b,0x5e,0xc3,0xce,0xc8,0xa2,0x3a,0x66,0x9e,0xfe,0x45,0xb4,0xa2,0xaf,0x81,0x03,0x74,0xbf,0x0c,0x65,0x4c,0x30,0x27 +.byte 0xd5,0x34,0x29,0x2d,0x83,0xa8,0xb9,0x1d,0xf8,0x12,0x09,0x51,0xdd,0x0e,0x66,0x95,0xf3,0x94,0xaa,0x83,0x3a,0x6f,0x8a,0x7c,0x3a,0x29,0x82,0xbb,0x80,0xa1,0x37,0x8c,0x79,0xf4,0x4a,0xa8,0xe4,0x17,0x72,0x77,0xee,0xc4,0xaa,0x25,0xd3,0x8f,0x2e,0xaf,0xb9,0xb2,0x3c,0xa6,0xd5,0x72,0x97,0x07,0x23,0x38,0xae,0x9e,0x22,0x08,0x85,0x70 +.byte 0xfa,0xff,0x38,0xe6,0x96,0x9f,0x2c,0x11,0x14,0x16,0x9a,0xfa,0x5a,0x7b,0x05,0x31,0x3e,0x20,0xbf,0x4d,0x87,0xaa,0xba,0x94,0xcd,0xdb,0xeb,0xec,0x29,0x58,0x4e,0x43,0x12,0xe8,0xf9,0x01,0x50,0xc8,0x51,0x7a,0x61,0x12,0xe9,0xed,0xc2,0xd6,0x2e,0xd3,0xed,0x54,0x72,0xf7,0x1b,0x0c,0x8c,0xb4,0x65,0xea,0x22,0x31,0x22,0xeb,0xcd,0x53 +.byte 0x66,0xf1,0xa5,0x34,0xe9,0x81,0x74,0xcb,0xb5,0x6b,0x45,0x71,0x69,0x6d,0x84,0xe8,0xc6,0x86,0xc9,0xdd,0x0c,0xa4,0x30,0x12,0x08,0x42,0x10,0x6b,0xcd,0x65,0x6c,0xfd,0x9c,0xde,0x77,0x3c,0x32,0x09,0xef,0x99,0x27,0x0e,0x4a,0x72,0x03,0x8d,0xb5,0x68,0xa0,0x67,0xf7,0xc2,0xae,0xb8,0xce,0x41,0x70,0x4e,0xdd,0x13,0xcb,0x3f,0x05,0x4e +.byte 0xf4,0xbc,0x88,0x98,0x2f,0x42,0x4e,0x5f,0x3e,0xcb,0x2c,0xd3,0x2f,0xb8,0x92,0xbb,0xd8,0x95,0xc8,0xaf,0xa9,0x44,0x8b,0xf0,0x2f,0x81,0xd4,0xe7,0x06,0x19,0xf7,0xa7,0x0a,0x73,0x3e,0x30,0xd9,0x00,0xe4,0x2d,0x76,0xb1,0x0d,0xfa,0x12,0x1f,0xbe,0x59,0x4f,0xf7,0xc8,0x5b,0xab,0xd7,0x16,0x3d,0x7e,0x97,0x9e,0xec,0xf8,0xcb,0x31,0x2e +.byte 0xe0,0x41,0x0b,0x00,0xa6,0x6d,0xe9,0x5e,0xd5,0x4a,0xc5,0xbf,0x1c,0xcc,0xa5,0x71,0x94,0x29,0x3d,0x17,0x43,0x27,0x63,0xc4,0xc7,0x8f,0x1b,0xb7,0x5f,0xcf,0xdf,0x8e,0x6a,0x69,0x87,0xc1,0x29,0xab,0x7b,0x8d,0xdf,0x07,0x95,0x50,0xa3,0x1c,0x8e,0xdc,0x7f,0x8a,0x21,0x37,0x1e,0x26,0xa7,0x67,0x28,0xb2,0xc8,0x23,0x5a,0x1d,0x94,0x46 +.byte 0x1b,0x3e,0x72,0x87,0x73,0x08,0xe2,0x3b,0x46,0x51,0xbe,0x5b,0xa9,0x72,0xb9,0xf8,0x45,0x6d,0x0c,0x89,0x80,0x0d,0x7a,0xfb,0x4c,0x3f,0x7f,0x3d,0x29,0xff,0xef,0xb2,0xec,0x23,0xc2,0x26,0xcf,0x8c,0x2e,0x28,0xbf,0xc5,0x68,0x47,0xd9,0x49,0x95,0xf1,0x67,0x7e,0x3a,0x48,0xe2,0x43,0x5c,0xc8,0x95,0x5b,0xb2,0xf3,0x22,0xc9,0x73,0x91 +.byte 0xb5,0x78,0x96,0x1b,0x9a,0x75,0x5f,0xb2,0x6b,0x8c,0x66,0x8c,0x8e,0xc1,0xe1,0xde,0xd6,0x64,0x31,0xe1,0x7b,0x12,0xd2,0x85,0x8f,0x52,0x68,0xec,0x80,0x26,0x3d,0xcc,0x9b,0xe3,0x57,0xbe,0x19,0x42,0xb9,0xdd,0x7d,0x2b,0x5b,0x6d,0x1b,0x9e,0x96,0xd7,0x75,0x83,0x82,0x3c,0x3e,0x5f,0xf8,0xa9,0x36,0xbe,0x14,0xc7,0xce,0x9d,0x05,0x7e +.byte 0xd7,0x38,0x37,0x35,0xc9,0x37,0x8b,0x9f,0xc6,0x2d,0xff,0x00,0x41,0xff,0x1b,0x09,0xea,0xd2,0xb0,0x04,0x48,0xff,0xfc,0xb5,0x67,0x54,0x39,0x3d,0x23,0x68,0x0b,0x7d,0x97,0xf3,0x65,0x20,0xa2,0xf8,0x33,0x96,0xd1,0xf4,0xc7,0xba,0x6f,0x00,0x95,0x36,0xf6,0x33,0xd1,0x8d,0xde,0xee,0x1e,0xfa,0x60,0x8e,0x5e,0x4c,0x70,0xbb,0x53,0x79 +.byte 0xc9,0x9a,0xdf,0x3c,0x53,0xe4,0x35,0x87,0xc3,0xe6,0x8e,0x0e,0x1a,0xd0,0xf8,0x57,0x2b,0x33,0x51,0x4d,0x7d,0x43,0x17,0x3e,0x6f,0x0e,0xca,0x86,0xb2,0xc6,0x09,0xf3,0x2f,0xc1,0x5f,0x0e,0x9a,0x5e,0x7d,0x9d,0xf7,0xff,0x09,0x46,0xe5,0x30,0x91,0x61,0x93,0xb5,0x2f,0xc5,0x7f,0x09,0x0b,0x55,0x94,0x17,0x25,0x19,0x9b,0xa9,0x0e,0x68 +.byte 0x71,0x18,0x1b,0x4b,0x1b,0xa3,0x75,0x90,0x56,0x96,0x5e,0x33,0x71,0xf2,0x06,0x69,0x07,0x04,0xcb,0x8c,0x79,0x9b,0xa5,0x17,0xd8,0xd8,0x77,0xc7,0xca,0x95,0x58,0x12,0xec,0xdd,0x41,0xc9,0x12,0x16,0x9a,0xc4,0xf0,0x27,0x7a,0x8e,0xeb,0x19,0x79,0x27,0x7b,0x2e,0x55,0x96,0x57,0x19,0xbe,0x55,0x8c,0x7f,0x97,0x90,0x80,0x40,0x5d,0x5a +.byte 0xf6,0x07,0xd6,0xb4,0xc5,0xe8,0x0e,0x54,0xde,0x78,0x23,0xca,0x39,0x90,0x42,0xb6,0x8b,0x14,0x22,0x06,0x71,0x77,0xd5,0xf7,0x8d,0x05,0x9d,0xbf,0xfe,0x38,0x91,0xba,0x79,0x85,0x30,0x47,0x25,0xf0,0xa2,0x72,0x55,0x94,0x2a,0x8a,0xc8,0x28,0xc8,0xa9,0x23,0xab,0xf0,0x4e,0x49,0x2f,0x58,0x53,0x35,0xd1,0xb6,0x16,0x81,0xc2,0x25,0x18 +.byte 0xd9,0x71,0x91,0xc4,0x81,0x3e,0xf4,0xd7,0x87,0x9e,0x57,0x78,0xf7,0x7d,0x4b,0xb2,0xfd,0x91,0x9f,0xa8,0x0e,0x77,0xb3,0xc7,0xe5,0x6a,0x95,0x17,0xc3,0xf4,0xcb,0x7f,0x96,0xc1,0xa8,0xee,0x6a,0x0f,0x1f,0x5d,0x20,0x28,0x93,0xe5,0xf3,0x13,0x46,0x53,0x47,0x9f,0x98,0xc6,0xf5,0x29,0x69,0xb9,0x83,0x36,0x03,0xa1,0x9a,0xb4,0xa9,0x4e +.byte 0xd6,0xda,0x25,0xe2,0x5b,0xbb,0x95,0xdf,0x0f,0x37,0x0b,0x02,0x51,0x03,0xd1,0x0e,0x84,0xef,0xdd,0x85,0xdd,0xae,0x10,0x32,0x65,0x03,0x65,0xf0,0x8e,0x0c,0x69,0x90,0x35,0x26,0x36,0xe8,0x05,0x46,0xe6,0xce,0x52,0x4d,0xb5,0x93,0x9f,0xe3,0xe5,0xb0,0x43,0x57,0x32,0x5d,0xca,0xd4,0xc9,0x89,0x2e,0x5b,0x03,0x8a,0x82,0x78,0x21,0x6b +.byte 0x41,0xa9,0x0a,0x9f,0xe0,0x50,0xec,0x72,0x01,0x67,0xe7,0x1c,0x92,0xe3,0xe4,0x83,0x4d,0x4b,0xcf,0x01,0x37,0x2f,0x34,0x86,0xcf,0x36,0xf7,0x3a,0x57,0xa3,0x89,0x73,0x0f,0x9c,0x06,0x82,0x75,0x7a,0x4b,0xd8,0x44,0x40,0xf2,0xc5,0xc4,0x22,0xa6,0x99,0x1b,0x73,0x2f,0xad,0x09,0xe9,0x84,0x6f,0xc3,0xca,0x72,0x3a,0x8a,0x55,0x55,0x0a +.byte 0xcd,0x33,0x51,0xef,0x5b,0x36,0x77,0x6c,0xb4,0x4a,0xae,0xdd,0xbd,0xec,0x65,0x99,0x43,0xd6,0x8a,0x16,0xba,0x89,0x4d,0x0c,0x11,0xb4,0x0d,0x5d,0x3e,0x76,0xcb,0x48,0x9d,0x31,0x40,0x71,0xe2,0xe4,0xa9,0xd9,0x6e,0x3c,0x3d,0xd1,0x6e,0xaf,0xb9,0x28,0x71,0x5a,0x07,0x6f,0xab,0xdb,0xf8,0x4f,0x11,0xbc,0xe0,0x14,0x01,0x43,0x4d,0xe2 +.byte 0xad,0x5d,0x2a,0xb2,0x58,0x66,0x05,0x50,0x66,0xf6,0x2f,0x66,0x11,0xd1,0xd7,0x05,0x85,0xb0,0x7f,0xa8,0x89,0xbd,0x41,0xda,0x35,0x1e,0xbb,0xff,0x70,0x1a,0xe8,0x65,0x96,0xe9,0x50,0x18,0x7f,0x4c,0xb2,0xe2,0x95,0x26,0xf6,0x37,0x09,0x8c,0x8d,0x7b,0x02,0xb0,0x7f,0x32,0xb5,0x70,0x22,0xd6,0x83,0x0b,0x85,0x25,0x00,0xc5,0x55,0x3f +.byte 0xfa,0x7a,0xc9,0xaf,0x87,0xc1,0x1c,0x11,0x96,0x71,0x18,0xd8,0xdb,0xab,0x86,0x57,0x0a,0x16,0x23,0x32,0x40,0xd3,0xaf,0x17,0x55,0xe3,0xe7,0x01,0x65,0x1f,0x87,0xda,0xb5,0x46,0x67,0x18,0x34,0xcc,0x28,0x77,0xc3,0x12,0x62,0x6c,0x8b,0x8a,0x11,0x7a,0x5a,0xd1,0xdf,0xb3,0x13,0x6b,0x29,0xce,0xf8,0x03,0xba,0xad,0x7c,0x14,0x60,0x42 +.byte 0x17,0xf6,0x7b,0x0c,0xb7,0x5f,0xd6,0xc1,0xb5,0xa5,0x2b,0xb1,0x9f,0x6c,0x65,0x29,0xe5,0xf4,0x84,0x85,0x11,0x82,0xf1,0x4c,0xcd,0xff,0x99,0x29,0x53,0x7b,0x43,0x04,0x60,0xc4,0x6c,0x01,0x5c,0xcb,0x33,0x4f,0xdb,0xc4,0xad,0x8c,0xea,0xff,0xd6,0xcd,0x8e,0x85,0x6e,0x54,0xd5,0x18,0x63,0x84,0x78,0xea,0xff,0x08,0x95,0xdc,0x2a,0x07 +.byte 0xac,0xea,0x44,0x79,0x52,0x07,0xf3,0xf1,0x03,0x7f,0x71,0x53,0xd8,0x85,0xdb,0x70,0xde,0x5e,0xd5,0x9a,0x18,0x9f,0xcc,0x3f,0xc0,0xc0,0x49,0x82,0x70,0x09,0xce,0x29,0x04,0x0a,0x19,0x81,0xd9,0x81,0x22,0x71,0x48,0x8e,0x79,0x08,0x1c,0xb4,0xc8,0x7e,0x60,0x43,0x4a,0xe3,0xd5,0x6b,0x09,0x5c,0x01,0x6e,0x20,0x9e,0xd2,0xaf,0x80,0xb7 +.byte 0xa2,0x0a,0x5b,0x26,0x08,0x32,0x73,0xbc,0xc6,0xfd,0x06,0xaa,0x2e,0x55,0xa0,0x5b,0xa9,0x3c,0x85,0xb2,0x04,0xdc,0x9a,0x94,0x02,0x93,0x96,0x6b,0x3e,0xc3,0x5e,0x37,0x9b,0x6f,0xef,0xb9,0x65,0x52,0x42,0x1c,0xa7,0x84,0x09,0x0c,0x49,0x3a,0x95,0x06,0x94,0xd7,0xc7,0x40,0xf5,0xf1,0x69,0x41,0xfb,0xf8,0x57,0xb5,0x1e,0x0c,0xf3,0xd9 +.byte 0xb1,0x2e,0x58,0x33,0xbe,0xb1,0x3d,0x61,0xc6,0xca,0x01,0xe5,0xda,0x60,0x8f,0x87,0xf7,0x9a,0xb5,0x92,0xb4,0x8c,0x2a,0xaf,0xd4,0x1e,0x9c,0x97,0x39,0x83,0x99,0x4a,0x07,0x54,0x75,0x7d,0xde,0x72,0x06,0xc1,0x8f,0xb4,0xde,0x12,0x43,0xf2,0x62,0xae,0xe7,0xec,0xfe,0xb2,0xe5,0x63,0x35,0xb7,0xee,0xaa,0xf0,0x09,0xb8,0x61,0xf2,0x42 +.byte 0x28,0x87,0xd7,0x47,0xa8,0xfc,0x51,0x85,0x6f,0xa2,0xb1,0xa6,0x82,0xd6,0x0e,0x1b,0x3f,0xea,0xa1,0xe1,0x91,0xc9,0xd2,0x5b,0x3e,0xff,0x18,0x39,0x14,0xe0,0x44,0xda,0x3d,0xd8,0xca,0xdb,0xd9,0xbf,0x3f,0xa4,0xdb,0x99,0x2e,0x31,0x32,0x7c,0xf4,0x61,0x2f,0xa1,0xf9,0xa9,0xbe,0x26,0x94,0xea,0xb4,0xe3,0x25,0x8d,0x93,0x3b,0xa1,0x7e +.byte 0x1e,0x99,0x87,0x6c,0xaf,0x14,0x54,0xd0,0xc0,0x37,0x39,0x76,0x3c,0x07,0x2e,0xce,0x98,0x25,0x81,0xe4,0x01,0x0c,0x07,0x79,0x4e,0xcd,0x82,0x44,0x83,0x04,0x07,0xa6,0x52,0xb7,0x96,0x7c,0x43,0x12,0xe1,0xc5,0x12,0x18,0x25,0x47,0xe4,0x19,0x6d,0x26,0x1e,0x55,0x66,0xca,0x28,0x4c,0xfa,0xd2,0xd9,0xcc,0x7e,0xad,0x9f,0x2a,0x2f,0xc6 +.byte 0x6c,0x77,0xaa,0x0f,0x5b,0xeb,0x15,0x97,0x62,0x52,0x3c,0x6f,0x4b,0xf3,0xcc,0x80,0x7b,0x1f,0x1d,0x58,0xf8,0xfe,0xc1,0x8c,0x3b,0xe3,0xd7,0x05,0xc3,0xd6,0xa9,0xda,0xcf,0x85,0x1c,0x68,0xd6,0x6d,0x2b,0x06,0x30,0x5f,0x58,0x39,0xea,0xfa,0x99,0xaa,0x04,0x10,0x05,0xaf,0xb0,0xf7,0x32,0x60,0x8d,0xe4,0xd1,0x40,0x32,0xd6,0xa3,0xf2 +.byte 0xba,0x5a,0x79,0x58,0x92,0x75,0xf0,0x3a,0xce,0xb2,0xee,0x66,0x3e,0xe3,0xbe,0x4d,0x53,0x9d,0xbb,0xdb,0x45,0xf0,0x09,0xeb,0xd5,0x83,0x39,0x20,0x06,0xa9,0x44,0x35,0xeb,0x6d,0x9b,0xd9,0xa4,0xda,0x4b,0x9d,0xde,0x3d,0x26,0xa2,0x2d,0xcf,0x8e,0x3e,0xbc,0xb4,0x8c,0x3a,0xbf,0x56,0x7c,0x48,0x50,0xb5,0xc5,0xbe,0x84,0x5e,0x63,0x82 +.byte 0x5f,0x87,0x77,0x4a,0xa7,0xf6,0x66,0x07,0x42,0x6a,0xb0,0xcf,0x19,0xaf,0x6c,0x16,0x85,0x78,0x88,0x3b,0xa5,0xbc,0x42,0xd2,0x4c,0xdf,0x51,0x3b,0xc4,0x0e,0xf5,0xc5,0x70,0x57,0x40,0xf6,0xed,0xd2,0x37,0x3e,0x14,0x0c,0x31,0xda,0x94,0x87,0x6b,0xd9,0x8c,0x15,0x41,0xa9,0xc0,0x2a,0x61,0xd3,0x52,0xe0,0xb6,0x0a,0x83,0x6b,0x75,0x1b +.byte 0x1e,0xd1,0x7f,0x26,0x19,0x34,0x9b,0x70,0xc9,0xba,0xdc,0xa2,0x03,0x6d,0xc7,0xac,0xbd,0x2c,0x63,0x8a,0x7b,0xb1,0x62,0x51,0xc1,0x1d,0x54,0x0d,0x34,0x0e,0xfb,0xa6,0xb8,0x9d,0x79,0x4f,0xc3,0xaa,0x8d,0xa0,0xcc,0x80,0x96,0x86,0x37,0xd6,0x80,0x9c,0x3d,0x91,0xd0,0xe7,0xe2,0xb4,0x00,0xba,0x86,0xe9,0xeb,0x86,0xea,0x84,0x78,0x81 +.byte 0x20,0x29,0x28,0x02,0x4d,0xd8,0x1b,0x5e,0x4f,0x41,0xfc,0x13,0x3e,0x4c,0x7f,0x64,0x55,0x35,0x41,0x0d,0x74,0xc5,0x6a,0x7c,0x37,0x82,0x41,0xbd,0x67,0x39,0xd9,0x83,0xfa,0x7f,0x8c,0xe1,0x9f,0x23,0x0d,0xe4,0x1d,0x40,0xe6,0x6e,0x94,0x5d,0xec,0x77,0xf7,0x5e,0xb4,0xa1,0x03,0xfb,0xa0,0x0e,0xba,0xf8,0x28,0x50,0x3c,0x38,0x47,0xf7 +.byte 0xed,0x2d,0xe5,0x0b,0xa8,0x7a,0xbd,0xbf,0x7e,0x38,0xc0,0x60,0xe7,0x7e,0xb1,0x03,0xef,0x4a,0x8c,0xc7,0x98,0xf1,0x94,0xf6,0xa0,0x50,0xb2,0x0b,0x7c,0x66,0x0a,0x62,0x10,0x24,0xb0,0xa1,0x69,0x02,0x33,0x79,0xbf,0xd0,0xb5,0xcb,0x17,0x20,0x55,0x02,0x70,0x44,0x5b,0xac,0x20,0x35,0xea,0x05,0x2d,0x68,0x51,0xe7,0x5f,0x1b,0xcd,0x4c +.byte 0x33,0x4d,0x04,0x21,0xfd,0x06,0x67,0x82,0x60,0x98,0x1f,0x79,0xf4,0x28,0xe0,0xa8,0x18,0xeb,0xf5,0x86,0x58,0xe6,0x9f,0xb5,0x29,0x0f,0xe8,0x37,0xeb,0x09,0xf4,0xc6,0x08,0xf2,0xde,0x4d,0x96,0x48,0x62,0x36,0x63,0x10,0x3f,0x63,0xeb,0x44,0x84,0xc8,0xf5,0x74,0x19,0x03,0x50,0xf7,0x7c,0xd2,0x06,0x20,0x6e,0x9b,0xa2,0x37,0xb0,0x68 +.byte 0x78,0x31,0xb6,0x05,0xfa,0xc9,0xcd,0x1d,0x4c,0xbd,0x33,0xb7,0xf3,0x93,0x38,0x7d,0x5f,0x00,0x85,0x5b,0x10,0x7f,0xc4,0x3f,0x3e,0xfe,0x62,0xca,0x51,0x83,0x95,0xcf,0x00,0x65,0x83,0x0e,0xd3,0x78,0xd0,0x51,0xcb,0x70,0x34,0x42,0xc6,0x3a,0x04,0xb9,0x10,0x92,0xe0,0x09,0x06,0xb0,0x66,0x9b,0x37,0x02,0x8d,0x0d,0x3e,0x2f,0xc5,0x17 +.byte 0x6a,0x87,0x7d,0x48,0xa4,0xcc,0x55,0x20,0x7b,0x77,0x07,0xcf,0x44,0x2f,0x88,0x8a,0xcc,0xf2,0x5d,0xa6,0x3e,0x5f,0xda,0xe2,0xde,0xd2,0x7f,0x7f,0xb7,0x90,0x53,0x64,0x6b,0x79,0x42,0x52,0x69,0xc6,0xd6,0xaa,0x9f,0xf9,0x19,0xbe,0x65,0x10,0x99,0x49,0xaf,0x36,0x49,0x1b,0x8a,0x3d,0x7f,0xdb,0xa2,0x1a,0xb5,0xd6,0x34,0x51,0xc8,0xc8 +.byte 0x06,0xca,0xf6,0xb8,0x76,0xa8,0x9d,0x43,0xae,0xf0,0x51,0xe5,0x9a,0x42,0xa2,0x83,0xed,0x20,0x8d,0xe8,0x1c,0xca,0x15,0x4e,0x37,0x3f,0xd8,0x06,0xa0,0xe1,0xf8,0x05,0xfd,0x42,0xf3,0x7a,0x96,0x44,0x36,0x02,0xca,0x11,0x2a,0xc3,0x24,0x58,0xdd,0x85,0x55,0xb2,0xe5,0x1d,0x92,0xc2,0x2d,0x5f,0x7c,0xb5,0x02,0x37,0x7c,0x07,0x35,0x25 +.byte 0x2b,0x33,0x80,0xe2,0xd4,0xfd,0xc7,0xa7,0x19,0x7e,0xba,0x36,0xaf,0xa0,0x4e,0xab,0x8b,0x28,0x4f,0x3b,0x92,0x72,0x42,0x49,0xaa,0x3b,0x08,0x0f,0x1e,0xff,0x2d,0xbf,0x9c,0x48,0x16,0x72,0xbe,0x28,0x05,0x8b,0x3a,0x20,0x6b,0x38,0x43,0xa2,0x35,0xea,0xf7,0x4e,0x50,0xa0,0x43,0x40,0x5c,0xbf,0xe5,0x75,0x13,0x4c,0x36,0x61,0xa1,0x5d +.byte 0x46,0xd7,0x7a,0x94,0x06,0x2f,0x63,0x32,0x9c,0x6e,0x54,0x18,0x31,0x79,0xf2,0x83,0xcf,0xb4,0x47,0x40,0xe5,0x9a,0xd6,0x99,0x12,0xb3,0x61,0x3d,0x0f,0x5e,0xc8,0x95,0xa3,0x5f,0xc3,0xd5,0x6b,0x6e,0xa0,0xf2,0x2f,0xeb,0x66,0xd0,0x68,0x67,0x10,0x85,0x64,0x27,0xd8,0xb8,0x68,0x00,0x36,0xa5,0xab,0x3e,0xe1,0x43,0x65,0x81,0x2d,0xb9 +.byte 0x0f,0x87,0xfe,0xa1,0x52,0xe9,0x8d,0x82,0x3a,0xd1,0x10,0x52,0x34,0x48,0x7c,0x1c,0xc6,0xd0,0xfe,0xa0,0x1a,0x92,0x07,0x88,0x57,0x9e,0xd7,0x5e,0x9f,0xc8,0xb0,0x93,0x73,0x03,0x28,0x36,0x8c,0x25,0x8c,0x0f,0x4e,0x0f,0x5b,0x26,0x58,0xed,0x5c,0x33,0x75,0x20,0x08,0x11,0x47,0xe1,0x47,0x85,0x47,0xeb,0x54,0xbf,0x58,0xe3,0xd4,0x5b +.byte 0xf9,0xc6,0x5e,0x42,0x58,0xe6,0xaf,0x79,0x66,0x3c,0xa5,0xa3,0x30,0x33,0xe3,0xbe,0x21,0x4b,0x42,0x98,0x6e,0x44,0xd7,0x68,0xc0,0xff,0xbe,0x7f,0xc5,0xb3,0x4f,0x4a,0x93,0xb0,0x11,0x88,0xcf,0x36,0xb2,0x03,0xbe,0x30,0x52,0x71,0x20,0x0d,0x16,0xc5,0xbb,0xf5,0x92,0x12,0x67,0x6a,0x35,0x66,0x00,0x09,0xd7,0xc6,0x67,0xb0,0x6a,0x04 +.byte 0x19,0x3e,0xbf,0xe2,0x82,0x74,0x78,0x2f,0x77,0x44,0xdc,0xad,0x0f,0x66,0x2a,0x23,0x62,0x2c,0x5a,0x4e,0x3a,0x82,0x2a,0x75,0x16,0x0d,0x74,0x64,0x35,0x53,0xc5,0xf6,0xda,0x36,0x44,0xba,0xe2,0xfa,0x1e,0xc2,0xcf,0x29,0x01,0x36,0x66,0xc3,0xca,0x40,0xf7,0xc4,0xba,0x67,0xac,0xf6,0x17,0xcc,0xa3,0x96,0x2d,0x08,0x5f,0x0a,0xea,0x5e +.byte 0x97,0xdc,0xc8,0xf9,0x59,0x24,0x6e,0xc5,0x0b,0x02,0xb9,0x1a,0xde,0xac,0x60,0x1d,0xaf,0x9f,0x5a,0x6f,0xe1,0xa6,0xdf,0x75,0xc5,0x9b,0xb7,0xde,0xa4,0xf7,0xf6,0xa4,0xdc,0xb6,0x96,0x08,0xde,0x2a,0x0e,0xb3,0x9d,0xf5,0x75,0x7d,0x7e,0x96,0x91,0x79,0xd4,0xa7,0x30,0x97,0x3a,0xbd,0x7c,0xe0,0xc5,0x87,0x24,0xb0,0x65,0xb7,0x58,0x00 +.byte 0xd9,0x0e,0x97,0xa6,0xa4,0x6a,0xe8,0x0a,0xac,0xac,0x9f,0x3a,0xe3,0x2a,0x9a,0x43,0x41,0x92,0x6e,0x0e,0xc4,0x63,0xc3,0x18,0xb6,0xe1,0xef,0x3d,0xe8,0x0b,0xb0,0x9f,0x2e,0x19,0xa0,0x98,0x98,0x34,0xf8,0x86,0x6d,0xc5,0x8c,0x41,0x26,0xb7,0xf2,0x1d,0xd4,0x72,0x39,0xeb,0x79,0x06,0xaf,0x53,0xaa,0x34,0x80,0x53,0xf8,0x1b,0xf4,0x53 +.byte 0x19,0xfa,0x16,0x8b,0x39,0xea,0x63,0x7f,0x38,0xc4,0x66,0x1d,0xd1,0x90,0xe4,0x2f,0x20,0x43,0x0d,0x5f,0x98,0xcc,0xae,0xef,0x86,0xc8,0xe5,0xf6,0xd2,0xa5,0x49,0xd0,0x3f,0xb5,0x7e,0x42,0xb5,0x6e,0x5e,0x13,0xa5,0xb4,0x71,0x2c,0x5d,0x57,0x24,0x06,0xd2,0x29,0x7c,0x4c,0x90,0xb6,0xea,0xdb,0x62,0xa4,0x2c,0x6c,0x38,0x57,0x97,0xbd +.byte 0xfd,0x41,0x6e,0x26,0xc1,0xe1,0x6b,0xbb,0xf0,0xe7,0x71,0xf1,0xcf,0x6a,0x7f,0xfa,0xe7,0xfb,0x17,0xe7,0x81,0x19,0x9a,0xf2,0xf6,0x86,0x22,0x4f,0x62,0x59,0xd6,0xc2,0x33,0xbd,0x11,0xe7,0x07,0x3a,0xfe,0x74,0x0d,0xf8,0xd9,0xdb,0xbd,0x05,0xf4,0xf4,0xb1,0x41,0xc9,0xb3,0xf8,0x6a,0x7b,0x98,0x08,0x6c,0xce,0x4c,0x28,0xbf,0x8c,0x77 +.byte 0x68,0xdc,0xee,0xf7,0x11,0xde,0xfc,0x5a,0x58,0x4f,0xf4,0x74,0x9d,0x5b,0x78,0xc3,0x78,0xe5,0x5e,0x26,0x83,0x40,0x17,0x80,0x2a,0x02,0xa4,0xf1,0x0f,0xa0,0xc8,0x22,0xe6,0x09,0x3a,0x52,0x74,0xf0,0xb9,0xb9,0x60,0xaf,0x20,0xa6,0x7e,0x88,0xf4,0xc2,0x38,0xa2,0x21,0x73,0xa9,0x18,0x3f,0x7a,0x04,0x7b,0xc4,0xcd,0x68,0xd9,0x83,0xa4 +.byte 0x8e,0x54,0x0d,0xbc,0xee,0x8b,0x39,0x93,0x66,0xa2,0xd6,0x76,0x4a,0xb2,0x33,0x4f,0x61,0x53,0xde,0x3b,0xff,0x47,0xcb,0x87,0xd9,0x21,0xd0,0x82,0x64,0x54,0xdf,0xf2,0x67,0x62,0x40,0x33,0xc7,0x0d,0xea,0x98,0xaa,0x95,0xfb,0xa9,0x0e,0x90,0xa5,0xd9,0x54,0x81,0x86,0xad,0x9e,0xa4,0x4d,0x36,0xe1,0x77,0xf2,0xe3,0x0a,0x54,0x1a,0x57 +.byte 0x9d,0x62,0x5e,0x0e,0x00,0xc8,0xa6,0x1e,0xf3,0x43,0xe6,0x20,0x0d,0x6a,0x8e,0x90,0x1d,0x4d,0xac,0x2f,0x9f,0x1c,0xb7,0x30,0xec,0x5c,0x99,0x78,0x6f,0x3b,0xe7,0xe0,0x28,0xb9,0x97,0xc5,0x6a,0xf2,0x17,0xc2,0x11,0xac,0x1a,0xe2,0xca,0x57,0x49,0x64,0xc8,0xc7,0x66,0x43,0x8d,0xc8,0xa7,0x0e,0xfc,0xcf,0x05,0x2f,0xae,0x4b,0xfe,0xe4 +.byte 0xbe,0x9c,0xe7,0xe6,0xa8,0x36,0x49,0x0d,0x9c,0x60,0x39,0x0c,0xfd,0x41,0x5b,0xc7,0xa4,0xa5,0x30,0x89,0xe5,0x10,0xf6,0xea,0xf8,0x2c,0xf2,0x3e,0xb1,0x96,0x81,0xa7,0x32,0x8b,0x39,0x14,0x15,0x36,0xfc,0x55,0x3c,0x22,0xcf,0xa3,0x98,0x90,0x68,0x13,0xd8,0x3f,0xf2,0x53,0x19,0x3e,0x9a,0x0c,0x1f,0xc6,0x29,0x43,0x46,0x23,0x58,0xea +.byte 0x49,0x49,0x15,0x46,0x8e,0x63,0x30,0x1f,0x3e,0x2a,0xa0,0x18,0xfd,0x28,0xc5,0x32,0x77,0x75,0xac,0x6e,0x5d,0x39,0xa9,0x44,0xce,0xfe,0x39,0xa6,0xec,0xde,0x69,0xde,0xfa,0xc8,0x40,0x44,0x34,0x29,0x15,0x19,0xa7,0xbe,0xd6,0x5b,0xfd,0x1f,0x7b,0xb9,0x88,0xf1,0x14,0xcf,0x42,0xc5,0xa7,0xa7,0x0e,0x6b,0x6e,0x86,0xb2,0x7c,0x23,0x8e +.byte 0xf6,0xae,0xde,0x3c,0xd7,0x26,0x5e,0xde,0x31,0x94,0xc1,0x19,0x65,0x55,0x03,0x73,0xba,0xdc,0x69,0x95,0x9c,0x9d,0x8e,0x59,0xd8,0x51,0x61,0x9f,0x8f,0xf4,0x29,0x43,0x4b,0x6a,0x75,0xb3,0x4b,0x9d,0xcc,0x46,0xd2,0x6e,0x00,0x49,0x4f,0xf0,0xac,0x80,0x55,0xc0,0x0c,0xbf,0x18,0x52,0x75,0x76,0x3b,0xac,0x92,0x83,0x69,0x1b,0xb4,0x15 +.byte 0xe5,0x9e,0xde,0x10,0x30,0x30,0x0e,0x85,0xc7,0xf9,0xae,0xbc,0x9e,0xaf,0x4b,0xee,0x27,0x6b,0xa5,0x6d,0xe4,0x8e,0xed,0xdd,0x95,0xaa,0x85,0xe2,0xf5,0x38,0x15,0x50,0xd3,0xcd,0x2c,0x88,0x6c,0x2b,0x14,0x37,0x74,0x2d,0x6d,0x30,0xec,0x96,0x78,0xae,0x80,0xb3,0xd9,0x84,0xc1,0xd6,0x71,0x90,0xe4,0x8d,0x3a,0x7c,0x9c,0xc4,0xf5,0xa0 +.byte 0x20,0x7e,0xa2,0x0e,0x75,0x7c,0x25,0x7a,0x7e,0x2b,0x2e,0xdb,0x12,0x23,0x73,0x6a,0x8e,0xe3,0xd7,0x47,0x94,0xfb,0xcc,0xe4,0x5a,0x8c,0xfb,0xdc,0x46,0xb3,0x4a,0x42,0x15,0xe0,0xaf,0x6e,0x81,0x72,0x72,0x04,0x52,0x09,0xc5,0x8b,0x6e,0xdd,0x7d,0xff,0x27,0xa8,0xc1,0x94,0xb5,0x33,0x59,0xc2,0x7d,0x59,0x6c,0x3c,0xaa,0xd9,0xd8,0x05 +.byte 0x43,0x7e,0x8a,0x47,0xdd,0x76,0x36,0xe3,0x05,0x49,0xd1,0x8f,0xdf,0x45,0x46,0x63,0xff,0x17,0xb4,0x52,0xc8,0xee,0x4d,0xf5,0x74,0x65,0xc6,0xca,0x19,0xfd,0xb9,0x51,0xc8,0xc9,0x96,0xd4,0x06,0xd4,0x09,0x1e,0xab,0x6d,0x1b,0x26,0x61,0x80,0x5b,0xa8,0xcb,0x62,0x92,0x5a,0x1a,0x8e,0xa4,0xb7,0x25,0x19,0x96,0x63,0xd5,0xc3,0xc9,0xdc +.byte 0x04,0x83,0x62,0x31,0xe3,0x76,0x00,0x4d,0xf8,0xb3,0x98,0xae,0x4d,0x1a,0x38,0xe3,0xa1,0x27,0x52,0x87,0xbe,0x2c,0x93,0x45,0xd1,0xab,0x56,0xc6,0xf5,0xbc,0xb5,0xe6,0x9c,0xe1,0x1b,0x37,0x42,0x08,0xe7,0x71,0xb5,0xa4,0x67,0xf9,0x48,0xd4,0xc4,0x10,0x25,0x53,0x9c,0x03,0xfc,0x6d,0x5e,0x62,0x5e,0x6d,0x56,0xbc,0x78,0x11,0x0a,0x6d +.byte 0x1b,0x7a,0xdc,0x62,0xb5,0x58,0x86,0x15,0x71,0xff,0x11,0x33,0x94,0x2b,0xa6,0xc7,0x68,0xd5,0x68,0xda,0x5b,0xd5,0xb7,0x38,0x6c,0x1c,0xf4,0x07,0x39,0xef,0x1f,0x72,0x0a,0xb3,0x12,0x13,0x25,0x86,0xd3,0xf8,0x9f,0xb5,0x40,0x58,0xe7,0x5e,0x9f,0xa0,0xbc,0xd7,0xab,0x4f,0xf3,0x94,0xcf,0x0f,0x5a,0x4c,0x98,0xb4,0x70,0x35,0x62,0xee +.byte 0x33,0x24,0x72,0x31,0xd4,0x06,0xd9,0xb4,0x1c,0x1e,0x0f,0xa7,0x48,0xc7,0x75,0x45,0x40,0x02,0xd0,0x60,0x32,0x29,0x4d,0x61,0x7a,0xee,0x65,0x35,0x2b,0xe5,0x50,0xac,0x82,0xdb,0xf7,0x9c,0x8f,0x82,0xe4,0xf0,0xbd,0xdb,0x00,0x3d,0x3a,0x3d,0xa2,0xc3,0x2d,0x0e,0x51,0x20,0xdb,0xdb,0x8d,0x15,0x03,0xbd,0xcb,0xcb,0x24,0x81,0xc5,0xdb +.byte 0x05,0x39,0x48,0xb8,0x3c,0x93,0x35,0x10,0xef,0x19,0xba,0x09,0x9e,0xff,0xf9,0x3f,0x0c,0xdc,0x96,0x98,0x32,0x26,0x76,0xe7,0xfa,0xaa,0xdf,0xdc,0xb9,0x15,0x44,0x42,0x9a,0x8c,0x6c,0x88,0xea,0x43,0x63,0xb5,0x79,0xb6,0x50,0x30,0x78,0xea,0x70,0xba,0x33,0x36,0x8f,0x8c,0xe5,0x78,0xfd,0xbc,0xc0,0xbd,0xde,0x3a,0x3d,0xe6,0xe6,0x57 +.byte 0x0f,0x29,0xf2,0x82,0x05,0xf2,0x5c,0xfd,0x33,0xc1,0xb2,0x2e,0xc2,0xc0,0x42,0xa2,0xc8,0xa5,0xf9,0x70,0x05,0xff,0x7b,0x8d,0xb9,0x68,0xc3,0xf6,0x74,0x00,0xcd,0x9d,0x70,0xfa,0x62,0x34,0xe5,0x05,0xe8,0x5f,0x53,0x9b,0x69,0x01,0x86,0xb9,0x1d,0x68,0x80,0x89,0x51,0x52,0x0d,0xe8,0x28,0xa1,0xdd,0x62,0x2b,0xf3,0x53,0x74,0xaa,0x98 +.byte 0xdb,0x7e,0x74,0x44,0xeb,0x25,0xe7,0xde,0xc4,0x29,0x14,0x11,0x7b,0xc6,0xef,0x14,0xe4,0x04,0xd0,0xf4,0x11,0xca,0xdc,0xdc,0xe6,0x3f,0x9a,0xc9,0xe2,0x0e,0x67,0x30,0x78,0x65,0x94,0x5a,0xa1,0x24,0xd6,0x90,0x2f,0x1c,0x13,0x46,0xf5,0xb5,0xf9,0x74,0x56,0x3e,0xd5,0x1b,0x09,0xb3,0x04,0xbe,0x89,0x00,0xbd,0xe0,0xba,0x13,0x05,0xd1 +.byte 0x98,0xa7,0x93,0x09,0xc5,0x96,0x46,0xb5,0x5a,0x05,0xac,0x1e,0x66,0x03,0xf0,0xaa,0x3d,0xc2,0x54,0xa3,0xc4,0x2b,0x0d,0xa3,0xe4,0x92,0xd6,0xd0,0x44,0xa6,0x37,0x30,0xa5,0xac,0xc2,0xc8,0x58,0x2a,0x2c,0x18,0x68,0x8d,0x9b,0x4f,0x99,0xd0,0x55,0x41,0xf4,0x84,0x3c,0x69,0xda,0x3c,0x6d,0x43,0xb3,0x85,0x15,0x1f,0xdb,0x58,0x0b,0x71 +.byte 0x33,0x24,0xbb,0x21,0x43,0x19,0x16,0xeb,0x83,0xde,0xe5,0xb7,0x68,0x9e,0xb9,0xd9,0xf6,0x2e,0xae,0xdd,0x88,0x2c,0x18,0xd7,0xc3,0x72,0x8b,0xbe,0xaf,0x8d,0xfd,0xcd,0x2f,0x8e,0x3e,0x2b,0xa4,0x20,0x11,0x9d,0x00,0x4f,0xea,0xf0,0xaa,0x2d,0xf3,0x9d,0xfd,0x11,0x7b,0xac,0x2c,0x66,0x74,0x03,0xe5,0xcc,0x70,0x9f,0xfb,0xb7,0x5a,0x16 +.byte 0xc3,0x05,0x61,0x7c,0x8c,0x73,0xcc,0x9c,0x6a,0x2f,0xee,0xae,0x85,0xc9,0x51,0x91,0x13,0xa4,0x09,0x82,0x4d,0x62,0x09,0x24,0x25,0x35,0x1f,0x82,0x88,0xbb,0xdd,0x16,0x5e,0x8d,0x98,0x5f,0x07,0x49,0x32,0x96,0xb7,0xee,0x85,0xb0,0x7b,0xfd,0xf5,0x35,0x4b,0xa9,0xd4,0xee,0xf2,0x37,0xd1,0xfe,0x62,0xf5,0x52,0x13,0xb4,0xb2,0xce,0xc4 +.byte 0xe0,0x09,0x78,0x48,0xd5,0xc6,0x5d,0x36,0x1b,0x90,0x3a,0x6a,0x3c,0x21,0x50,0xf0,0x0a,0xe9,0x46,0x24,0x45,0xc1,0x5e,0x76,0xa3,0xf9,0x70,0xb8,0x62,0x4d,0x0e,0x92,0x87,0x4a,0x6a,0xf9,0x46,0x91,0x64,0xfe,0x7f,0x53,0x24,0x7e,0xc7,0x3e,0xb0,0x37,0x1a,0xc8,0xd6,0x33,0x0b,0x5f,0xa5,0x30,0x03,0x0e,0x85,0x3d,0x7b,0xc1,0xa1,0x18 +.byte 0xb3,0x8c,0xfe,0xca,0x3e,0x71,0xd8,0x92,0x46,0x49,0x60,0x54,0xd9,0x7b,0xf7,0xc3,0x99,0x2f,0xb5,0x79,0xcc,0x32,0x40,0x7d,0x3d,0x0b,0xc6,0x6f,0x04,0xd9,0xf1,0xdd,0x64,0xf5,0xc4,0x60,0x14,0x04,0x5c,0x3a,0xa4,0xda,0xdc,0xad,0x8f,0xc2,0x44,0x37,0x96,0x63,0x00,0xf7,0xb1,0xc0,0x7c,0x8c,0x12,0xb5,0x3a,0xec,0xc0,0x16,0xd8,0x24 +.byte 0xe9,0xc0,0xc4,0xfa,0xb1,0x85,0x5b,0xe3,0x62,0x24,0xa1,0x75,0x92,0x82,0x04,0x59,0x10,0x50,0x4b,0x51,0x51,0x3e,0x39,0xba,0x6d,0xa0,0x65,0x2d,0xfc,0x23,0x1c,0x9d,0x69,0x22,0xe7,0x15,0xfa,0xba,0x76,0xbf,0x53,0x62,0xb0,0x0d,0x0d,0x5d,0x55,0x00,0xbc,0x58,0x01,0xed,0x37,0x53,0xb9,0xa6,0x0d,0x71,0xab,0xec,0x42,0xbf,0x3b,0x52 +.byte 0xfd,0xae,0xe9,0x6d,0x65,0x07,0xf3,0xd9,0x32,0x66,0xc1,0x66,0x1a,0x18,0x73,0x86,0x01,0xaf,0x1d,0xd1,0xd0,0xcf,0xb1,0xea,0x54,0x23,0xdf,0xf2,0x4d,0x7d,0xc7,0xfe,0xfe,0x7d,0x1d,0x2c,0x1b,0xb6,0xa7,0x7a,0x9e,0x90,0x3a,0x3b,0xb0,0x6c,0xb0,0xd2,0xd1,0xd0,0x6a,0x94,0x4c,0x84,0x1c,0x45,0xae,0xda,0x16,0xa9,0x2e,0x63,0x19,0x26 +.byte 0xf6,0x74,0xd3,0x6f,0x9b,0x9c,0x0c,0xb8,0x85,0x9f,0xeb,0x99,0xbc,0xab,0xff,0xc3,0x75,0x86,0xe5,0x3a,0xa0,0xf9,0xfc,0x6b,0x3d,0x5a,0xad,0x46,0x7f,0x17,0x0e,0x94,0xb7,0xa4,0x43,0x61,0x54,0x76,0x29,0x78,0xe4,0x41,0x91,0xbe,0xa5,0x36,0x39,0xdf,0xdc,0xcc,0x8e,0x42,0x40,0x08,0x51,0x26,0xb0,0x53,0x5d,0xb4,0x7a,0x18,0x8e,0xb3 +.byte 0xae,0xf2,0xe0,0xef,0x63,0x51,0x3a,0xbe,0x4c,0x2d,0xce,0xc7,0xe2,0x1b,0xc2,0x40,0xf3,0x82,0x61,0xf0,0x1b,0x05,0xdd,0x1e,0xae,0xed,0x87,0x2c,0xe5,0xad,0xc7,0xec,0xb5,0x63,0xf7,0x3a,0xf9,0xb7,0xd8,0x4e,0xa7,0xef,0xac,0x6d,0x9c,0x27,0xd9,0xcc,0x66,0xf4,0x75,0x40,0x94,0x8b,0x78,0x4f,0x61,0x4f,0x31,0x49,0x5c,0x96,0x72,0x58 +.byte 0xcf,0x55,0xb2,0x66,0x16,0x29,0x27,0x24,0x39,0xc3,0x64,0xb1,0xdf,0x69,0x87,0x85,0x46,0xe3,0xd0,0x82,0x53,0x1a,0xc2,0xf1,0x3a,0xab,0xdf,0xe5,0x29,0x17,0xdd,0xfe,0xbf,0xf9,0x3d,0x7a,0xfb,0xe7,0x74,0x49,0xa9,0xef,0x61,0x93,0x4c,0xfa,0x30,0xea,0x65,0xa7,0x61,0x32,0x88,0x74,0x12,0xc1,0x91,0xf1,0xc2,0x1f,0x38,0x6a,0xfd,0x0d +.byte 0xc8,0x6f,0x87,0xe6,0x15,0x55,0x26,0x13,0x86,0x13,0xb9,0x01,0x98,0x34,0x1c,0x2d,0x1d,0x30,0xae,0x7d,0x8e,0x07,0x7d,0x4d,0xe9,0xfd,0x58,0x18,0xc3,0xa6,0x8e,0x87,0x98,0x33,0xcc,0x80,0xd7,0x70,0x07,0x6a,0x4a,0x97,0xef,0x56,0xf3,0x9d,0xf9,0xef,0x6f,0xa8,0x71,0x7f,0x61,0x07,0x1d,0x9d,0x51,0x06,0x86,0x4a,0x35,0x9e,0xab,0x2c +.byte 0x66,0x8d,0x61,0x62,0xbd,0xed,0x6c,0x76,0x7c,0x67,0xe0,0xe1,0x6e,0x90,0x74,0xb1,0xa6,0x26,0x0d,0x01,0x1f,0xe9,0xb4,0x30,0x9a,0x7e,0x37,0xd1,0xea,0x97,0x9a,0x0f,0x9e,0x8d,0x52,0xd4,0x96,0x36,0x5b,0x6f,0x40,0xbb,0x9e,0x44,0xb4,0x6e,0xee,0x15,0x70,0xef,0x66,0x81,0xf5,0xb4,0xe7,0x69,0xb0,0x40,0x44,0xdc,0x70,0x1e,0x4d,0x3c +.byte 0x9b,0x19,0x2a,0x97,0xbd,0xb2,0xd2,0x9b,0x98,0xac,0x36,0xf1,0x05,0x48,0xdc,0x5d,0x21,0xfb,0x17,0xe3,0x9c,0x3c,0xbf,0xfd,0x1d,0x39,0x1e,0x5b,0x2a,0xa2,0xb3,0x7d,0x4f,0xdf,0x3a,0x41,0x7a,0x31,0x01,0xc2,0xe5,0xd0,0x06,0x50,0x29,0x05,0xce,0xb8,0x28,0xb7,0xdd,0x83,0xc8,0xaa,0x39,0x78,0xc7,0x7d,0x9e,0xcd,0x9a,0x07,0x71,0x7e +.byte 0x20,0x92,0x82,0xce,0x49,0x90,0xce,0xef,0x53,0xa7,0x48,0x2a,0x69,0x86,0xa1,0x5e,0x35,0xe8,0x7d,0x10,0xb8,0x5e,0xa6,0x9a,0x69,0x6f,0x32,0x75,0xf3,0x4a,0xee,0x9c,0x06,0x5c,0xdd,0x84,0x7e,0x38,0x00,0x67,0x39,0x42,0xed,0x72,0xda,0xe3,0x6b,0x5a,0xf4,0xc9,0x80,0x3e,0x0e,0xda,0x39,0xfa,0x83,0x2c,0x60,0x69,0x87,0x85,0x05,0xfc +.byte 0xf4,0x2b,0xd4,0x0a,0xad,0x86,0xca,0xd5,0xf0,0x92,0x1f,0x43,0x3c,0x0e,0xac,0x99,0xf3,0x67,0xa3,0x41,0x6d,0xb9,0x29,0x70,0x57,0x62,0x9f,0x45,0x91,0x72,0xe5,0x53,0xcc,0x89,0x80,0x3f,0xbc,0x1c,0x66,0x21,0xdd,0x90,0x2b,0xa4,0xca,0x2f,0xf0,0x0f,0x9f,0xd0,0xe9,0x28,0xe2,0xd9,0x36,0xaf,0xf9,0x01,0x81,0xce,0xb4,0xe7,0x71,0xfd +.byte 0x92,0xf8,0x56,0x2e,0xc3,0xc8,0x8b,0x54,0xc8,0xc7,0x40,0x79,0x27,0x06,0x18,0x4a,0x7b,0x88,0x3f,0xd6,0x4f,0xd4,0x66,0x1e,0x1f,0x9a,0x14,0x1a,0x0a,0x98,0xc7,0xd6,0x25,0x83,0x37,0x8a,0x5d,0xb2,0x88,0x39,0x68,0x7b,0x1f,0x4e,0x0a,0xed,0x11,0x1a,0x77,0x9b,0xcb,0xb6,0x7d,0x5c,0x36,0xac,0x07,0x07,0x9f,0x05,0xcf,0x90,0x8f,0x3f +.byte 0x4b,0xc5,0xf9,0x42,0x90,0xb4,0x42,0x26,0xa1,0x2c,0x66,0xc6,0xb8,0x98,0x80,0x8a,0xbb,0x9b,0x41,0xe4,0x44,0x8c,0x5e,0x56,0x33,0xe3,0xba,0xcf,0x31,0x8e,0x28,0xd7,0xc5,0xd1,0x3b,0x68,0x47,0x10,0xae,0xda,0xc3,0xbd,0x20,0xe7,0xac,0xe2,0xe1,0xe0,0x7a,0x4b,0x83,0xb1,0xab,0x72,0xf4,0xc4,0xe7,0x0d,0x02,0xaf,0x5b,0x74,0xac,0xda +.byte 0x9d,0xce,0x26,0x1f,0x79,0x05,0x67,0x7e,0xc4,0x98,0x3f,0xde,0xa6,0xf3,0xfe,0x59,0x65,0x88,0xfb,0x14,0x3a,0x43,0x91,0x04,0x1a,0x78,0x7e,0x08,0xba,0x55,0x50,0xc7,0x65,0xd3,0x8e,0xda,0x0a,0xee,0x8e,0x11,0xa9,0xf6,0x9e,0xd3,0x23,0x97,0x05,0x0c,0x98,0x2a,0x36,0x25,0xec,0x5e,0x0b,0xf9,0x31,0x80,0x00,0x8a,0x70,0xf1,0xaa,0x7c +.byte 0x73,0x02,0x98,0x8d,0x42,0x27,0x53,0xf1,0x83,0x37,0xd0,0x2d,0xfa,0xc7,0x4b,0xa5,0xb3,0xc9,0xb8,0xd4,0x56,0x94,0x5a,0x17,0x2e,0x9d,0x1b,0x46,0xaa,0xb6,0xd9,0x2a,0x3a,0x6c,0xaf,0x24,0x59,0xfd,0x08,0xc5,0xca,0x0c,0x79,0x3f,0xe7,0x91,0x8d,0x9d,0x59,0x91,0xd8,0x5f,0xda,0x6d,0x35,0x7b,0x52,0x47,0x35,0xf9,0x81,0x86,0x2c,0xee +.byte 0x1a,0x14,0xc5,0x1f,0xb6,0x85,0xb5,0x74,0xe9,0xb7,0x4f,0xde,0xcd,0x93,0x2d,0xf3,0x10,0xbe,0x34,0xfa,0xca,0x15,0x9f,0x02,0x9d,0x19,0x72,0x7c,0xd6,0xfd,0x81,0x43,0x49,0xb5,0x2b,0x52,0x31,0xd6,0x2c,0x28,0x2e,0x83,0x6d,0xd3,0x0f,0x6e,0x03,0x65,0xf0,0x8a,0xdd,0x0a,0xec,0x58,0x10,0x45,0x5d,0xac,0xda,0xf5,0x32,0x5d,0x18,0x26 +.byte 0xcc,0x2e,0xcf,0xd3,0x41,0x2d,0x1d,0xba,0xdf,0xd8,0x96,0x8f,0x18,0x0f,0xa7,0xec,0x8e,0x6e,0x84,0x2c,0xd6,0x1f,0x4e,0x76,0xfe,0xf3,0x14,0x27,0x4b,0x5b,0x3d,0x7c,0x1c,0x59,0x46,0x97,0x1b,0x59,0x5a,0x2d,0x57,0x80,0x17,0x98,0x7d,0x92,0x5d,0x2f,0x98,0x53,0x10,0x59,0x8e,0x7f,0x55,0x64,0x15,0x62,0x2c,0x16,0x0b,0x8d,0x48,0x54 +.byte 0xaf,0x96,0x17,0xa9,0x8e,0x2c,0xcf,0x41,0x8c,0x8a,0x37,0x55,0xe4,0xf9,0x20,0x3b,0x21,0x5c,0x86,0x8d,0x3f,0xa6,0x5e,0x43,0xf3,0x3b,0xf7,0x7c,0x27,0x88,0x8e,0xa5,0x15,0xca,0x0e,0x9e,0x85,0x30,0x17,0x0d,0xcf,0xf0,0x82,0x87,0xd6,0xe8,0xd2,0xad,0xe9,0x4d,0x3f,0xc9,0x58,0x19,0xf9,0x99,0x4d,0xf9,0x6b,0x1b,0xd3,0xf9,0xdd,0x52 +.byte 0xd1,0x3c,0x64,0x46,0xfd,0x4f,0x2e,0x63,0x39,0xd8,0xe4,0xeb,0xfc,0x07,0xf1,0xa5,0xff,0x84,0xa8,0x92,0xfe,0xbc,0xc5,0x36,0x91,0x2b,0xec,0x2c,0xad,0xf0,0xac,0xc5,0xb0,0xad,0x8a,0x0d,0x6a,0xd9,0x29,0x7a,0xb0,0x87,0x0c,0xaf,0xda,0x75,0x84,0x25,0xbe,0xee,0x0d,0xfd,0x4c,0xf5,0x2d,0x46,0xe9,0x17,0xb9,0x9d,0x3d,0x4b,0x8f,0x3a +.byte 0xe9,0x49,0xb6,0x32,0x99,0x27,0xe2,0x4d,0xff,0x2f,0x2e,0xd5,0x69,0x52,0x56,0x20,0x0a,0xbf,0x62,0x14,0x34,0xfb,0xbf,0x95,0xe8,0xfe,0xb1,0x9f,0x43,0x30,0x02,0x03,0x9e,0xa8,0xe2,0x68,0x64,0xdd,0x37,0xfc,0xb9,0x0f,0x85,0x8c,0x36,0x45,0xdb,0x7c,0x8b,0x97,0x50,0xc3,0x75,0xa1,0xcf,0xf4,0xc2,0x46,0xd8,0xa1,0x8c,0xab,0x8d,0x3a +.byte 0xde,0xe7,0x9e,0xd2,0x1e,0x2d,0x8b,0xe4,0x31,0xe3,0x12,0x3f,0x9f,0x0b,0x2c,0x95,0x75,0x8d,0xf1,0x24,0xb9,0xdf,0x1e,0x64,0x35,0x45,0x2a,0xc2,0xf9,0x96,0x5d,0x10,0x64,0x32,0xae,0xe9,0xf8,0x71,0xd4,0x2d,0x6b,0xc6,0xde,0x08,0x1e,0x5d,0x51,0xf1,0xe7,0xfd,0x3c,0x22,0x43,0x59,0x82,0x83,0x13,0x75,0x36,0xef,0x81,0xe4,0xcf,0xa8 +.byte 0xb8,0x30,0x16,0x44,0xae,0x55,0x06,0xdd,0xb9,0x60,0x3f,0x75,0xc6,0xd1,0x73,0xa9,0xea,0xc9,0x64,0x2b,0x8a,0xde,0x44,0x4b,0x3d,0xc3,0x31,0x12,0x84,0x9a,0xe3,0xda,0x24,0x82,0x99,0x00,0x6d,0x8e,0xb8,0x26,0x82,0xa6,0xc2,0x37,0x6c,0x2a,0x1d,0xcf,0x6d,0x18,0xc7,0xee,0x27,0xca,0xe7,0xad,0x95,0xed,0x7d,0xe0,0xe0,0x6f,0x45,0xc3 +.byte 0x8a,0x2f,0x08,0x49,0x7e,0x09,0x9e,0xc1,0xb7,0x1e,0x8f,0x57,0x61,0xf8,0x3e,0xea,0xd7,0x47,0xfb,0xd0,0xda,0xaa,0x04,0xf9,0x06,0xbb,0xa3,0x80,0x68,0x89,0xb0,0x7f,0x18,0xf3,0xd2,0xeb,0xee,0x48,0x30,0x6a,0x24,0xc8,0x71,0x43,0xc3,0x50,0xcc,0x85,0x68,0xf5,0xca,0x44,0x34,0x43,0xaa,0x2e,0x4f,0x02,0x1b,0x23,0x4f,0xe9,0x07,0x02 +.byte 0xa2,0xfa,0x24,0x57,0x70,0x4e,0x1a,0x78,0x03,0xa2,0xdd,0x53,0x50,0x82,0x05,0xb1,0x0f,0xcb,0x9e,0x2e,0x58,0x04,0x62,0xc8,0xac,0x71,0x31,0x56,0x0f,0xc7,0x70,0x32,0x53,0xda,0x51,0xc3,0x15,0x78,0x82,0xb6,0xe8,0x6e,0x32,0xeb,0x39,0xab,0xba,0x67,0xcc,0xbc,0x99,0x58,0x88,0xc4,0x60,0x0d,0x0b,0xc1,0xfa,0x6f,0x40,0x85,0x04,0xdf +.byte 0x5f,0x17,0x69,0xf1,0xbd,0x44,0x97,0xc8,0x62,0x19,0x49,0x1f,0x23,0xcb,0x3d,0x17,0x04,0xf2,0xbd,0x58,0x15,0xa6,0x37,0x3a,0x3f,0x77,0x98,0x32,0x40,0x8a,0x72,0xf0,0x41,0x0b,0xad,0x88,0xba,0xd3,0xae,0xdc,0x3b,0x9a,0x37,0x89,0xa5,0x09,0xe5,0xbb,0xf2,0xf8,0x5d,0xa5,0xed,0xe8,0x39,0x7b,0xed,0x2b,0x90,0xd6,0x6c,0xd3,0xfa,0x69 +.byte 0xa7,0xca,0x09,0x83,0x15,0x8d,0xd8,0xe3,0x81,0x03,0x4e,0x2d,0xd8,0x96,0x3b,0x4b,0x18,0x91,0xac,0x5f,0x22,0xe6,0x9d,0x4b,0x09,0xaf,0xf0,0xdf,0x16,0xa2,0xf1,0x2c,0xd9,0x35,0x8a,0x6e,0x85,0x7a,0xbc,0xc7,0x10,0xd1,0x5f,0x8a,0x53,0x9c,0x8e,0xbc,0x8c,0x15,0xb3,0x8a,0xb0,0x0b,0x74,0x40,0x2a,0x5f,0x46,0x71,0x1c,0x0b,0xee,0x08 +.byte 0xae,0x17,0x26,0x1e,0xcf,0xbf,0x3d,0xa0,0x5e,0x3a,0xdb,0x39,0x6b,0x4a,0x82,0x53,0x02,0xf4,0xa2,0x15,0x5c,0xb6,0xdb,0x20,0x30,0xa2,0x7d,0xcb,0x9a,0xf7,0x88,0x69,0xb5,0xc8,0xe6,0xcd,0x9e,0xa4,0xaf,0x27,0x0e,0x61,0x41,0xcd,0x8e,0x71,0x83,0x11,0xce,0x5e,0x6c,0xaf,0xa4,0x50,0x81,0xb6,0xf2,0x36,0x05,0xbb,0x36,0x4e,0x4a,0x1b +.byte 0x09,0x9f,0xca,0x1b,0x12,0xb0,0x01,0xc0,0xbf,0x7e,0x3f,0x81,0x60,0x9f,0xfd,0x56,0x81,0x54,0x99,0x2b,0x7f,0x1e,0xb1,0xbf,0xd4,0xb7,0xe1,0x7c,0x71,0xf9,0x00,0x72,0x5f,0x10,0xab,0x60,0x03,0x9d,0x13,0xf1,0xba,0x48,0x93,0x1c,0x1d,0x11,0x04,0x40,0xf6,0xde,0x3b,0xef,0x6c,0x47,0xb3,0x0d,0xcf,0x53,0xbd,0x45,0x7e,0xd7,0x8c,0x34 +.byte 0xd0,0xcb,0x85,0x4b,0x1e,0xd1,0xc5,0xfd,0x5b,0x1a,0x18,0x8a,0x27,0xe3,0x16,0x3c,0x25,0x12,0xf2,0xf1,0xa1,0x40,0x53,0x68,0x27,0x2c,0x81,0x0e,0x20,0x12,0xe3,0xde,0xe2,0x9f,0x08,0x75,0xc0,0x25,0x79,0xf0,0xc4,0xaa,0x10,0xad,0x41,0x3f,0x0b,0xc7,0xb2,0xe0,0x50,0xde,0xec,0x24,0x09,0xeb,0xb5,0xd3,0xbc,0xd3,0xdf,0x44,0x6d,0xc8 +.byte 0xf1,0x79,0xf8,0x33,0xb7,0x75,0x09,0x18,0x04,0x59,0x0f,0x15,0x5e,0xf9,0xca,0xe0,0xa9,0x2a,0xe1,0x1b,0xf0,0x49,0x5f,0xca,0xa3,0x80,0xd5,0x9b,0x1e,0xc1,0x1f,0x98,0x18,0x0a,0x24,0xc3,0x3f,0xfb,0x43,0xfd,0xa3,0x01,0x59,0x50,0xea,0x21,0xe0,0x92,0xfd,0xe1,0xd5,0xe4,0x38,0x24,0x88,0xf3,0xb0,0xc9,0x79,0xfd,0x4e,0xd3,0x3e,0xbf +.byte 0xc6,0xb8,0x9e,0x7f,0xab,0x65,0x79,0xd9,0xb9,0x83,0x38,0xe1,0xf7,0xd0,0x37,0x04,0xb3,0x0c,0x48,0x82,0x74,0xe1,0x0c,0x80,0x13,0x59,0xc4,0x72,0xf9,0x2d,0x88,0x06,0x46,0x08,0x7a,0x6b,0xb4,0xfc,0x5f,0x63,0x31,0x2f,0x4f,0xfd,0x4b,0x1f,0x8e,0x21,0x3c,0x67,0x83,0xdd,0xa9,0x65,0x68,0xc6,0xd0,0xb8,0x1d,0xcd,0x60,0xc5,0xb9,0x3b +.byte 0xea,0xe9,0xc7,0xa5,0x1a,0x98,0x8a,0x87,0xb7,0x73,0x29,0x3a,0x6a,0x3a,0x75,0xbf,0xa4,0x79,0x64,0xcb,0x94,0x68,0x93,0x56,0x55,0x1e,0xd5,0x61,0xda,0x87,0xe1,0x28,0xf0,0xa5,0x64,0x9a,0xd7,0xa0,0x91,0xfd,0x46,0x20,0x6c,0x87,0x1f,0xe8,0x9e,0x7e,0x95,0xc4,0x60,0xdb,0xf4,0xe2,0x3e,0xb2,0x6a,0x4a,0xe7,0x46,0x3f,0xca,0xf3,0x72 +.byte 0xb5,0xe8,0x06,0x3a,0x1b,0xeb,0xcb,0x81,0x46,0x44,0xf6,0x97,0xa0,0x79,0xe4,0xa4,0x8a,0xba,0x5e,0x1b,0x6d,0xf4,0xcf,0x7c,0x12,0x7a,0xec,0xdd,0xf6,0xc8,0xab,0x5f,0x30,0xb3,0xf9,0x8e,0x31,0xfd,0x51,0x95,0x8b,0xa1,0xe9,0xe8,0x2d,0xec,0x86,0x12,0x4a,0xf8,0x8b,0xa5,0xdd,0xb2,0xe4,0xad,0xdd,0xcb,0xf5,0xcd,0x9c,0x9f,0x0a,0x42 +.byte 0x5f,0x83,0x9d,0xa6,0x4f,0xbe,0x11,0x75,0x3c,0xde,0x67,0x6b,0x95,0xcd,0xcf,0xdc,0xfd,0x1f,0x1a,0x14,0x01,0x27,0x68,0xaf,0x9b,0x82,0xd6,0xae,0x29,0x8a,0x1f,0xc8,0xf1,0x1f,0xb8,0xa9,0xa2,0x1d,0x81,0xbb,0x19,0xda,0x06,0xe3,0x34,0x7b,0xce,0x99,0x3c,0x5b,0x0c,0x9b,0x8b,0x35,0xc0,0x6c,0x88,0xef,0xeb,0x9f,0x64,0xe3,0xc3,0xbf +.byte 0x37,0xd7,0xf6,0xdf,0xad,0x28,0xf4,0xd7,0x19,0xb0,0xf2,0xa7,0xd4,0x71,0xbc,0xd3,0xa3,0x09,0x5c,0x1a,0x45,0x30,0x2d,0x53,0xa5,0x19,0x2f,0xb0,0x5d,0xae,0x04,0x28,0xe6,0x16,0x3e,0x75,0x9f,0xcc,0x76,0xc4,0xc2,0xa0,0xfb,0xff,0xdd,0x4c,0xa3,0x8b,0xad,0x05,0x73,0x26,0xf0,0xef,0x48,0xd5,0x25,0x22,0x90,0x78,0x21,0xfd,0xc6,0x23 +.byte 0x14,0xbc,0xed,0x13,0x29,0x76,0x17,0xa6,0x93,0x09,0x6e,0xa7,0x42,0xdd,0x11,0x9e,0x05,0xa3,0xb7,0x48,0x84,0x85,0xf8,0x4e,0xed,0x3d,0xdb,0xfc,0x68,0xd2,0xec,0xec,0x69,0x2b,0x60,0x38,0xd1,0x99,0x44,0xf9,0x60,0xd3,0x5a,0x9e,0xe4,0x26,0x9d,0x12,0xf8,0x6a,0x53,0xde,0x76,0x78,0xa7,0x68,0xb0,0xb4,0xdc,0x33,0x7b,0x8a,0x73,0xa0 +.byte 0xa5,0x5f,0x8f,0x81,0x0e,0x51,0x06,0x13,0x6b,0x56,0x16,0x91,0x1f,0xf5,0x6b,0x68,0xe6,0x8b,0x69,0xda,0x0a,0x9c,0xb1,0x74,0x8f,0x1c,0xb3,0xbf,0x52,0x59,0xaa,0xb1,0xb6,0x3a,0x81,0xc2,0x04,0x54,0x12,0x46,0xa2,0xd5,0x21,0xdf,0xe0,0x57,0x1f,0xe8,0x36,0x56,0x87,0xbf,0xcb,0x7d,0x06,0x6c,0xd5,0xc9,0x4e,0xca,0x47,0x47,0x11,0x91 +.byte 0x7a,0x14,0x13,0x5d,0x5d,0x46,0xd5,0x3a,0xe4,0xa4,0x4d,0x99,0x3a,0x54,0x99,0x62,0xb4,0x70,0xa0,0xf5,0x8a,0xda,0x05,0x75,0xf1,0xa5,0xa1,0x5d,0x9d,0xc4,0x7f,0x83,0x8a,0x5b,0x09,0x54,0x0e,0x69,0x28,0xef,0x66,0xfb,0xe4,0xc4,0xe4,0xc4,0xda,0xb0,0xda,0xe2,0x19,0x33,0x3c,0x76,0xa0,0x35,0xdc,0x31,0x4e,0x40,0xfe,0xb8,0x20,0x26 +.byte 0x8f,0x6f,0x7d,0x02,0x54,0x86,0x1d,0xca,0xa6,0x10,0xa6,0x89,0x87,0x3a,0x5a,0xd5,0x3d,0x0f,0xb5,0x81,0x7d,0xab,0xb6,0xc6,0x36,0x87,0xce,0xd7,0xe4,0xc3,0x9e,0xc2,0x9c,0xf6,0x75,0xd5,0x9a,0x69,0xd2,0x13,0x89,0x5a,0xe9,0x29,0xc9,0xf5,0x6e,0xcc,0x05,0x87,0x0a,0x61,0x49,0xd7,0xa5,0x76,0xd0,0xaf,0x96,0xe0,0x2f,0x91,0xf4,0x45 +.byte 0x70,0x5a,0xdc,0x9f,0x07,0x7f,0x86,0x02,0xa4,0x83,0x8d,0x4a,0x6d,0xfc,0x1b,0xd8,0x9b,0xc2,0x42,0x4f,0xcb,0xdf,0xcb,0xe0,0x55,0xb4,0x8f,0xf7,0x27,0x73,0xd9,0x7e,0xf8,0x3a,0x5c,0x4f,0x29,0x64,0xd8,0x39,0xfa,0xf2,0xc4,0x6b,0xeb,0x55,0xc3,0x13,0x22,0x15,0xdf,0xc5,0x91,0x6d,0xd7,0xf3,0x11,0x34,0x08,0xce,0xe5,0xbd,0x16,0x14 +.byte 0x60,0x14,0x8a,0xed,0x4d,0x38,0x98,0x15,0x5d,0xee,0x70,0xff,0x05,0xd2,0x74,0x3a,0x5f,0x78,0x1a,0x70,0x61,0x2a,0x42,0x4a,0xf3,0x15,0x6f,0x9e,0x33,0xca,0xb8,0x46,0x22,0x64,0xd6,0x24,0xe8,0x10,0x1a,0x89,0xab,0x74,0xdf,0x56,0x35,0x41,0x57,0xe1,0xd9,0x4b,0x67,0x60,0x89,0x6f,0xbf,0x73,0xac,0x6b,0xf9,0x78,0x3f,0xbc,0xf3,0x2a +.byte 0xb5,0x8c,0x1f,0xda,0xe7,0xe2,0xac,0x60,0xbf,0x41,0x96,0xbb,0xd5,0x35,0x9c,0x56,0xe7,0xfd,0x95,0xc7,0x4d,0x32,0xa1,0x07,0x34,0xbc,0x99,0xca,0xcc,0x42,0x71,0xfb,0xec,0x5c,0x1e,0xf9,0x8b,0xde,0x43,0x65,0x84,0x16,0x52,0x0a,0x5e,0x92,0x20,0xd8,0x26,0x4b,0x97,0x71,0xde,0xd2,0x1f,0x2e,0xd1,0xb2,0xb6,0x29,0x6a,0x6d,0x41,0x00 +.byte 0x20,0x3d,0x03,0xf8,0x43,0x7b,0x57,0x87,0x4e,0xf1,0x8e,0x6f,0xd3,0xf4,0x6c,0x6c,0x29,0xf6,0x99,0xe3,0xd3,0x1d,0xd3,0x26,0x21,0x3b,0x02,0xa2,0xc1,0x06,0xcf,0x31,0xec,0x7f,0xc6,0x80,0xbc,0xab,0x86,0x01,0xff,0x11,0x8a,0x24,0xfd,0x1b,0x41,0x49,0xd4,0xbe,0x15,0x34,0x82,0xc5,0x02,0x51,0x67,0x5c,0x41,0x8e,0xbf,0x94,0x12,0x15 +.byte 0x64,0xea,0x00,0x0c,0x51,0x40,0x57,0x66,0x1e,0x6d,0x3e,0x41,0x8e,0x84,0xdf,0x71,0xb8,0xd7,0xfa,0x12,0x17,0x22,0x17,0x05,0xdc,0x82,0xfd,0x7c,0x5e,0xfa,0x62,0x23,0xa8,0xbe,0x14,0xdc,0x84,0x42,0xf0,0x90,0xc5,0xb0,0x68,0xbe,0x64,0x74,0xc3,0xa5,0xd1,0x10,0xcf,0xe3,0xd1,0x09,0x98,0x3b,0xb9,0x19,0xf2,0x9b,0x5d,0x90,0x99,0x3d +.byte 0x30,0x67,0x55,0x34,0x50,0x78,0x3b,0xd2,0x70,0xb1,0xd2,0x91,0x4e,0xfa,0x98,0x7d,0x93,0xad,0x7f,0xb1,0x89,0xb0,0x61,0x4c,0x95,0x3f,0x51,0x95,0xd7,0xc6,0x87,0x7a,0xc5,0x53,0xb6,0x6d,0x61,0xec,0xbe,0x40,0x1f,0xa5,0x7f,0x73,0x4a,0x78,0xd2,0x58,0x1e,0x41,0x8e,0x9a,0x08,0x49,0xce,0x39,0x52,0xf9,0xd1,0xcd,0x41,0xb6,0x39,0x99 +.byte 0xfa,0xfb,0x1c,0x38,0xe1,0xe5,0xe1,0xd6,0x16,0x0f,0xc8,0x12,0x0b,0x88,0xdc,0x00,0xd4,0x7b,0x24,0x69,0x16,0x27,0x37,0xa3,0xd5,0x39,0x27,0x34,0xda,0x23,0x24,0x50,0x13,0xd8,0x02,0x48,0x14,0xd7,0xc9,0x28,0x1b,0xba,0x66,0xa8,0xc8,0x9a,0x7b,0xed,0x92,0x5b,0x78,0x46,0x79,0x5a,0xd1,0xf2,0x75,0xf0,0x98,0xd3,0x9f,0x4c,0x72,0x51 +.byte 0xed,0xe5,0xce,0x83,0xac,0xe1,0xc8,0x2b,0x7f,0x77,0x6a,0x70,0xdd,0x80,0x88,0x62,0x58,0x94,0x15,0x72,0x53,0x34,0x48,0x17,0xb2,0xe8,0x4a,0xab,0x2d,0x4e,0xef,0x93,0xb7,0xba,0xd1,0x1c,0x53,0x69,0xd5,0xac,0xa1,0x61,0x7c,0x44,0xec,0x81,0x72,0xcc,0xe8,0x6f,0x5d,0x67,0x1f,0x65,0x9a,0x34,0xf5,0x95,0x89,0x1c,0x2e,0x54,0x42,0xc0 +.byte 0x85,0x79,0xb0,0xfa,0x44,0x0d,0x28,0xc4,0x20,0x2f,0x2e,0x85,0x73,0xfb,0xf6,0x44,0x0e,0xbc,0xab,0x4f,0x42,0x5c,0xdb,0x1f,0x11,0x6f,0x9a,0x23,0x75,0x70,0x78,0x1a,0xd2,0xb8,0x83,0x72,0xf5,0xf6,0x40,0x48,0x3f,0xc8,0xd5,0xe3,0x2c,0x08,0x5c,0x0c,0x2a,0xb0,0x8e,0x69,0xe6,0xdf,0x4b,0x4a,0x95,0x9c,0x4c,0x5e,0x09,0x24,0xc3,0xd0 +.byte 0x4c,0x20,0x0c,0x9a,0xce,0x95,0x53,0x6a,0x7b,0x54,0x0a,0x7e,0x73,0xa7,0x95,0xe7,0x7c,0x67,0x9d,0x05,0xbc,0x26,0x3a,0xa1,0x43,0x99,0x7a,0xee,0x04,0xcf,0x94,0x02,0x36,0x26,0xb3,0x81,0x74,0x22,0xee,0x1e,0x9e,0xe2,0x82,0xd4,0xe0,0xca,0xf2,0xec,0xd2,0x9e,0xf8,0x3f,0x9f,0xc4,0x5b,0xe8,0xfc,0xbd,0x93,0xaa,0xc3,0x2f,0xce,0xf2 +.byte 0x32,0xa9,0x23,0xf3,0xe1,0x06,0xae,0x7d,0x87,0xe9,0xe7,0xe0,0xc1,0x7c,0x74,0x9c,0xdf,0x86,0x6d,0x5c,0x8a,0x51,0x45,0x9d,0x43,0x49,0x87,0x45,0x75,0xfb,0x40,0x55,0xab,0x9a,0x52,0xf1,0x32,0x5e,0xde,0x8b,0x52,0x50,0x9f,0xb8,0x7a,0xe5,0x1c,0x40,0x4f,0xc7,0xb1,0x29,0x90,0xcc,0x98,0x99,0xa0,0x4e,0x1c,0x43,0x6e,0x91,0x61,0x9c +.byte 0xf7,0xa7,0xf7,0x43,0x89,0x15,0x8c,0x56,0x22,0x9d,0x66,0xac,0x71,0x19,0xdc,0xb9,0xf8,0xd3,0xaf,0x2e,0xd7,0x7b,0xc3,0xe4,0x25,0x0d,0x2c,0xaf,0x15,0x8c,0xea,0x2b,0xdb,0x8c,0x71,0xff,0x55,0x29,0x11,0x35,0x11,0xef,0xb0,0x97,0xb2,0x95,0xab,0xeb,0x4a,0x40,0x1c,0x92,0xc4,0x13,0x36,0x74,0x53,0x78,0x51,0x6c,0xca,0x37,0xcb,0xda +.byte 0x5e,0x6b,0x8c,0x69,0xc5,0xd0,0xf9,0xdb,0xbe,0xd9,0x30,0x42,0x16,0xcf,0x40,0x63,0x87,0x10,0x28,0x7d,0xae,0xa9,0x8c,0x14,0x99,0xe1,0x4f,0x11,0x98,0x7e,0xe9,0x14,0x9c,0x2e,0xe2,0xed,0x20,0x15,0x7c,0xb5,0xf4,0xc9,0x16,0x30,0x8d,0x7c,0x61,0x45,0xf4,0x23,0xf5,0xdb,0x81,0x8f,0x6b,0x41,0xaf,0xa9,0xf8,0x51,0xbe,0xc4,0x5d,0x8c +.byte 0xda,0x5e,0x07,0x62,0x7c,0xc6,0xd1,0xae,0x91,0x5e,0x05,0xa8,0xc6,0xc5,0xfc,0xb7,0x12,0x2e,0x7f,0x85,0xef,0xbd,0x2b,0x56,0x57,0x32,0xad,0x3d,0x97,0x5b,0x26,0xcf,0xd3,0xe7,0x48,0x4e,0x9b,0x15,0x98,0x77,0xb4,0x3e,0xf1,0x3e,0x1c,0x21,0xb0,0x98,0xe2,0x69,0xee,0xd8,0x29,0x10,0x93,0xd5,0xc9,0x71,0x8f,0x28,0xbd,0xe3,0xd9,0x54 +.byte 0xf3,0x72,0xb6,0x85,0xe9,0x2b,0xdc,0x96,0x52,0x53,0x5c,0x61,0x54,0x96,0x4a,0xf5,0x3f,0xee,0x53,0xc3,0x63,0xc9,0x67,0x14,0xdf,0x3a,0xfe,0x46,0x8a,0xa6,0xec,0x06,0x0c,0xea,0xb8,0x82,0x49,0xb5,0xed,0x94,0xf2,0xac,0x76,0xd5,0x87,0x79,0x15,0x4f,0xa1,0x34,0x90,0x8e,0x7b,0x02,0xf7,0x02,0xb0,0x07,0xa5,0x7c,0x6b,0xc2,0x34,0x84 +.byte 0xd4,0xaa,0xbf,0x32,0x81,0xf7,0xed,0x1f,0x61,0xd7,0x6e,0x40,0xa0,0xdc,0x4c,0xb5,0xb7,0x36,0x3a,0x87,0x09,0x82,0xd5,0x5a,0xc8,0x1f,0xe6,0x77,0xa6,0xaa,0xcf,0x3c,0x7b,0x23,0x46,0x58,0x95,0x7f,0x84,0xba,0x4a,0x05,0x0b,0x36,0xdb,0x58,0xf9,0xa4,0x2b,0x24,0xd4,0x8a,0xbc,0xb2,0xb7,0x04,0xac,0x64,0x0e,0x88,0x25,0x9a,0x69,0xe7 +.byte 0x87,0x70,0x0b,0xa6,0x43,0xe9,0xb2,0xbb,0x4e,0x4c,0x10,0x19,0x44,0x4d,0x12,0x4c,0x58,0x2a,0x49,0xe2,0x01,0xd2,0x65,0x23,0xee,0xe9,0xca,0x0b,0xa1,0x28,0x02,0x8d,0xcf,0x37,0x06,0xbc,0x5d,0x35,0xba,0xec,0x97,0x95,0xcc,0xfe,0x7b,0xc9,0x1c,0x0d,0x89,0x4e,0xe1,0x8d,0x9b,0x5e,0x5b,0xb9,0x6c,0x24,0x73,0x9a,0x62,0xd7,0xc5,0xfa +.byte 0x54,0xeb,0x05,0x22,0xd9,0xe7,0xc4,0x68,0x88,0x20,0x43,0xd9,0x14,0x47,0xd7,0xa5,0xd0,0xce,0x10,0x77,0xe8,0x5c,0x85,0x39,0x99,0x3f,0x72,0x88,0x4f,0x22,0x15,0x87,0xa0,0xa3,0x47,0x10,0x81,0x64,0xff,0x94,0x77,0x5d,0xce,0x6d,0xd8,0x29,0xb1,0x9c,0x8e,0xce,0xa8,0x39,0x4f,0xfc,0x36,0x3c,0x50,0xb2,0xf1,0x08,0x66,0x1a,0xf0,0x22 +.byte 0x65,0x1f,0x4d,0x17,0xd3,0x63,0x10,0x64,0xd1,0xc6,0x5a,0x3e,0x82,0x72,0x0c,0x48,0x5e,0x07,0x9c,0x07,0xa0,0x40,0x60,0xab,0x74,0x9a,0x00,0xdf,0xd7,0x7d,0xd4,0x11,0x4e,0xce,0x5a,0xaf,0x12,0x4f,0xe7,0x12,0x36,0x1a,0x12,0x11,0x16,0xb7,0xad,0x4b,0x28,0x84,0x7b,0xd8,0x30,0x0d,0x85,0xb8,0x76,0xde,0xa3,0x78,0x8c,0xb7,0x7c,0xbc +.byte 0x97,0x33,0x53,0x95,0xf8,0x14,0x5f,0xf8,0x0d,0xc1,0x6b,0x79,0xa2,0x42,0x49,0xab,0xae,0x8e,0x78,0xf3,0x51,0x01,0xcc,0x20,0x36,0x80,0xbd,0x32,0x0b,0x1b,0xd2,0xcd,0x27,0x52,0x69,0x1b,0x4a,0x37,0xba,0x31,0xe4,0xc2,0x03,0x8d,0x00,0x48,0x4b,0xcd,0x39,0x2e,0xec,0x94,0x2e,0xe0,0x81,0xfd,0x94,0xd9,0x86,0x39,0x23,0x87,0x3c,0x2f +.byte 0x25,0xe1,0x5b,0x22,0xe0,0x2e,0x37,0x6d,0x9b,0x97,0x9c,0x94,0x37,0x01,0x26,0xb8,0xb1,0x73,0x7c,0xfc,0x0a,0x64,0xe7,0x54,0xf1,0x0f,0x71,0xa1,0xd6,0xc7,0xc8,0xb4,0x86,0x2d,0xfe,0x30,0x8b,0xca,0xb2,0x18,0x21,0xc0,0xc7,0x7d,0x60,0xcf,0x2e,0x25,0xb0,0xa4,0x1a,0x28,0x19,0xa9,0xa9,0x15,0x32,0x5e,0x21,0x89,0x3a,0x99,0x5f,0x50 +.byte 0x86,0x37,0x3b,0x10,0xb8,0xa5,0xad,0x8e,0xbf,0xfc,0x8c,0x85,0xf1,0x76,0x5c,0xe7,0x4d,0xac,0xe7,0x21,0xb3,0x45,0x87,0x3b,0x05,0xc8,0x41,0xf4,0x99,0x83,0x28,0x40,0x6b,0x30,0x37,0x31,0xd2,0xb3,0xdd,0x43,0x3b,0x3f,0xec,0x50,0x58,0x7d,0x20,0xc6,0xb2,0xa9,0x3c,0x22,0x38,0xea,0x16,0x32,0x01,0xc4,0xb0,0x9f,0x7d,0x12,0x91,0x82 +.byte 0x0c,0xd8,0x36,0xfc,0xa4,0xec,0x06,0xb2,0xc2,0xce,0x9b,0xa4,0x53,0x71,0x77,0xdd,0xc3,0xfc,0x34,0x6f,0xd9,0x5c,0xfc,0x36,0xdd,0x63,0x19,0x06,0xfb,0x3c,0xf3,0x3f,0x82,0x28,0x6d,0x00,0xf9,0xfd,0x8d,0x6b,0x79,0x06,0x8a,0xe7,0x6f,0xcc,0x39,0x12,0x80,0x71,0xcb,0x71,0xb3,0xb6,0xa4,0xa8,0xbe,0x61,0x9d,0x1f,0x48,0xa2,0x15,0xa1 +.byte 0xb5,0xf5,0x16,0x70,0xc5,0x39,0xce,0x43,0xa3,0x09,0xe5,0xf4,0x8b,0x77,0x18,0x5e,0xa0,0x77,0xa3,0xa4,0x17,0x2c,0x3e,0x50,0x73,0x2f,0xaa,0x5d,0x58,0x5e,0xdc,0xec,0xaf,0xca,0x6e,0x57,0x80,0xa3,0xd5,0x94,0x30,0x7c,0x11,0x75,0xc4,0xbb,0x9d,0x18,0xc1,0x5a,0x58,0xc7,0x04,0x56,0xb1,0x3a,0x21,0x55,0x02,0xea,0xad,0x58,0x19,0x72 +.byte 0xdc,0x7d,0x0e,0x41,0x62,0x1b,0x5c,0x48,0x97,0x3f,0xed,0xd7,0x4e,0x30,0x1f,0xf5,0xde,0xc5,0x23,0xf2,0xd7,0x22,0xde,0x2f,0x3e,0x80,0x06,0x81,0xf6,0x24,0xb7,0x91,0x09,0x56,0x91,0x00,0x1a,0xea,0xaa,0xa6,0xc2,0x8b,0xc9,0x78,0xd7,0xde,0xf6,0x87,0xb1,0x04,0xcc,0xbb,0xc1,0xc6,0x48,0x43,0xc8,0x03,0xb2,0xdd,0x70,0xc0,0xe3,0xf5 +.byte 0xc0,0xf5,0x13,0xd5,0x11,0x41,0x7f,0x1a,0xdc,0x48,0xf5,0xd6,0x1b,0x0a,0x84,0xd2,0x84,0xcd,0x10,0x4f,0x0a,0xd7,0xcb,0x41,0x61,0x1c,0xcc,0x5c,0xa9,0xbd,0x6e,0x6a,0xf3,0x81,0xd8,0xaa,0x3a,0xff,0x39,0x90,0x8e,0x33,0xe6,0x58,0x13,0x5f,0xec,0x58,0x74,0x35,0xe0,0x06,0x38,0x0f,0xd0,0xbf,0x8d,0xf7,0x26,0x99,0xea,0xdd,0xfb,0xdf +.byte 0x5b,0xcc,0xf1,0x3d,0x9b,0x84,0x8b,0x5b,0xe8,0xc4,0xc6,0x3e,0x0a,0x55,0xec,0x73,0xf7,0x70,0xb1,0xc8,0xfa,0xf8,0xd6,0x72,0x2c,0x6d,0x8d,0xc1,0xa3,0xb2,0x9a,0xe7,0x80,0x6d,0x09,0xa6,0x76,0x06,0x71,0xf9,0x95,0x9a,0xa9,0x2f,0x4b,0x7c,0xad,0x64,0x01,0x01,0x91,0xe4,0x87,0x1d,0xe1,0x46,0xf5,0x4a,0x96,0xc6,0x58,0xd9,0xe0,0xa9 +.byte 0x2f,0x80,0x1e,0xd6,0xe9,0xa6,0xeb,0xfe,0x5a,0xb6,0xd3,0xe8,0x76,0xd2,0x51,0xc6,0x68,0x34,0xc9,0xed,0x76,0x29,0x7e,0x63,0xb1,0x09,0xdf,0x23,0x47,0x41,0x2f,0x70,0x46,0x4d,0xbb,0x36,0xc8,0x84,0xe9,0x58,0x20,0x6b,0x04,0xb2,0xa4,0x1c,0x4d,0xe0,0xa5,0xa2,0x59,0xc9,0xed,0x63,0x25,0x5f,0x3f,0x24,0x18,0x59,0x29,0xe3,0x79,0xbd +.byte 0x35,0x50,0xee,0x81,0x59,0xff,0xd4,0x0e,0x62,0xd3,0x52,0x30,0x81,0xa2,0xe6,0x9e,0xc3,0xc9,0x7a,0x10,0x57,0x36,0x27,0xb7,0x3c,0x61,0x38,0x89,0x70,0xa0,0xc5,0xdf,0x78,0x05,0xa5,0x81,0xe2,0x8a,0x93,0xda,0x7c,0xaf,0xbf,0x6d,0x42,0x09,0x1b,0x43,0x9d,0xf9,0x26,0x87,0xc3,0x84,0x6c,0xb7,0x25,0x31,0x50,0x00,0xd8,0x13,0xc0,0xc0 +.byte 0x6c,0x21,0x82,0x6d,0xf9,0x2f,0xef,0x40,0xe8,0xf8,0xae,0x4d,0x9e,0x1d,0x4a,0xda,0xa0,0x0d,0x77,0x36,0x8b,0xed,0xaf,0x6e,0x2a,0x3d,0xa8,0x36,0xe4,0xff,0x37,0xc2,0xa3,0x11,0x5e,0x68,0x58,0xa8,0xa3,0x19,0xf3,0xc1,0x33,0xea,0x39,0x49,0xfe,0x51,0x87,0xb6,0x31,0x6a,0x61,0x47,0xe7,0xb1,0x46,0xde,0x5a,0xf7,0x93,0x06,0xa7,0x72 +.byte 0xa9,0x2e,0x9e,0x2e,0xc9,0x7f,0xe1,0xb2,0x86,0xb4,0xc9,0xff,0x3b,0xf7,0xaf,0xef,0x91,0x47,0xc2,0xfa,0x42,0x0a,0x4e,0xbb,0x10,0x0d,0xea,0xa4,0x11,0x54,0xa9,0x53,0xde,0xc4,0x01,0xde,0xc7,0x2d,0x1f,0x18,0x40,0x79,0xd1,0x44,0x7d,0x51,0x1d,0xf6,0xdc,0x6f,0xad,0xa2,0x5d,0xd9,0xbe,0x5d,0x11,0x57,0xb7,0x68,0x0d,0x96,0xad,0xb3 +.byte 0x32,0xf7,0x99,0xcc,0x0e,0x03,0xa2,0x79,0x9b,0x63,0xce,0xee,0xf9,0x0c,0xfd,0xfa,0x9a,0x82,0xc9,0x43,0xd3,0xd5,0x23,0xfa,0xac,0x75,0xbe,0x61,0x85,0x18,0xb6,0x75,0x72,0x8d,0x17,0xdd,0xde,0x3f,0x6d,0xb4,0xe8,0x47,0x09,0xe1,0xa7,0xe0,0x4c,0xce,0x93,0x7b,0xc3,0xa3,0x3f,0xc0,0x81,0x21,0x6f,0xe8,0xce,0x68,0x61,0xde,0x1a,0x58 +.byte 0x48,0x7f,0xb4,0xae,0xfd,0x7c,0x80,0x63,0x43,0x5a,0xfc,0xf9,0xf9,0x4d,0xb4,0x8c,0x85,0x27,0x12,0x4f,0x7d,0xe8,0x69,0xc3,0x7d,0x57,0x63,0x0d,0x5f,0xd2,0x85,0x4e,0x0c,0x9a,0x0d,0x1c,0x4d,0xdf,0x3f,0x9a,0x16,0x2f,0x34,0x43,0xc3,0xf0,0xf1,0x16,0x16,0xd2,0x9f,0x2e,0x78,0xd8,0x3c,0x63,0xa0,0x7e,0x02,0x8e,0x65,0xd2,0xb0,0x61 +.byte 0xb0,0x1d,0x7a,0x8f,0xf7,0x30,0x45,0x05,0xf7,0x15,0xc3,0x69,0x24,0x98,0xc3,0x74,0x20,0x16,0x09,0x57,0x39,0x16,0x68,0x23,0x33,0x62,0x4c,0xf5,0xd6,0x34,0xe3,0xad,0x7a,0x14,0x64,0x8c,0x2b,0x48,0x96,0xf9,0x85,0x39,0x19,0x73,0x27,0x04,0xa6,0x55,0x66,0x15,0x8c,0xf1,0x47,0xcd,0x53,0xaf,0x31,0x3a,0xd9,0xfa,0xf9,0xac,0xbd,0xb8 +.byte 0x27,0xe0,0xaa,0xa5,0x62,0x85,0x9f,0xbb,0x4e,0xaf,0xa5,0x72,0x42,0x98,0xa6,0x7f,0xa1,0xb6,0xac,0x17,0xc2,0x2c,0xf3,0xd6,0xc0,0x14,0x4b,0xb3,0x86,0x88,0x89,0x81,0x83,0x7d,0x9d,0xf7,0xe3,0xe4,0x27,0xba,0xa8,0x03,0xb4,0xe3,0x97,0x74,0x1c,0x0d,0xab,0xb4,0x6e,0xc6,0x9e,0x58,0xdd,0x15,0x95,0x2f,0xa6,0xd6,0xaa,0x5a,0x96,0x71 +.byte 0x69,0xca,0xe0,0x5f,0xd2,0x3c,0x66,0x1b,0x58,0x25,0xd6,0xec,0xc0,0x46,0x3e,0x56,0xd0,0xe1,0x36,0x44,0x56,0xc0,0xf2,0x15,0x48,0x9e,0x07,0xce,0x5d,0xb9,0xd4,0x4e,0xcc,0x31,0x26,0xaa,0xdb,0x6a,0x87,0x98,0x0e,0x37,0xfc,0xc5,0x91,0x28,0x1b,0xf8,0x70,0xbf,0x30,0x71,0xbe,0xa0,0x81,0x1e,0x30,0x33,0x37,0x37,0xc8,0x07,0x08,0x9b +.byte 0x8f,0xe4,0x27,0x9f,0x90,0x67,0xb4,0x96,0x08,0xd7,0x30,0x9e,0xa6,0x53,0x39,0xd1,0x9b,0xde,0x02,0x35,0xf3,0xb1,0x19,0x7b,0xd2,0x28,0x5a,0xc3,0x1f,0x69,0x0e,0x48,0xbf,0xa3,0xb4,0x55,0xd1,0x10,0x3d,0x30,0x71,0xc6,0x82,0x2d,0xb8,0x6f,0xe6,0x99,0x6b,0xef,0x9f,0x86,0xed,0x93,0x13,0xb6,0xb0,0x87,0x91,0x77,0x4a,0x00,0xe4,0x5f +.byte 0x4c,0x7d,0x41,0x3b,0xc9,0xda,0x99,0x6b,0xff,0xec,0xef,0x05,0x3c,0xc6,0x0d,0xec,0x68,0x12,0x44,0x31,0xac,0xc9,0x0b,0x9c,0xf5,0xea,0xed,0xda,0x88,0xec,0x6e,0x6e,0x73,0xda,0x85,0x52,0x69,0xa1,0x13,0x52,0xcf,0xc3,0x4d,0x95,0x88,0xec,0x1f,0x53,0x81,0x6f,0xac,0x53,0x60,0x48,0x20,0x9a,0x4d,0x88,0x2c,0x4b,0xb0,0x69,0x5f,0x07 +.byte 0xf9,0xa7,0x2c,0x9a,0x13,0x91,0x86,0xa2,0x98,0x20,0xa9,0x80,0x1e,0xaa,0x8e,0xbc,0x3c,0x3d,0x51,0x34,0x3d,0x5b,0x80,0xe4,0x39,0xfe,0xc8,0xb1,0x6d,0xfe,0x36,0x9d,0x9b,0xde,0x22,0x39,0x41,0xe9,0xff,0xda,0x67,0x67,0xd4,0xeb,0x60,0x44,0xd5,0xc1,0x74,0xcd,0xa0,0x98,0x06,0x34,0x76,0xf8,0xe5,0x0d,0xc8,0x52,0xca,0x83,0xd2,0xdd +.byte 0xf2,0x12,0x36,0x7d,0x3e,0x7f,0xbd,0xa6,0xd8,0x1e,0xc0,0x9d,0x67,0x2a,0x33,0x87,0x86,0x79,0x7a,0x70,0x3a,0x63,0x0b,0x74,0x77,0x89,0xce,0x8f,0x5a,0x3b,0xf3,0x2e,0x52,0x4d,0x1d,0xc6,0xc3,0xc8,0x69,0x98,0xdc,0x81,0x45,0x99,0xfd,0xcd,0x6b,0x6d,0x05,0x33,0x40,0xde,0xb3,0xbd,0x4a,0x27,0xc2,0x9e,0x8b,0xf1,0x4c,0xac,0x92,0x82 +.byte 0x55,0x04,0x79,0xe7,0x28,0x74,0x5b,0x70,0xdc,0xc0,0x4f,0x0c,0xcf,0x3a,0x7f,0x08,0xcc,0x2e,0x1d,0xfd,0x8d,0xd9,0x5c,0xe2,0xa7,0x98,0xc1,0xe8,0x4b,0x96,0xbe,0x27,0xd6,0xfd,0x0a,0x59,0x30,0x33,0x85,0x41,0xc5,0x63,0xab,0xe7,0xda,0x26,0xbd,0xce,0xe7,0x9d,0x50,0xd7,0x2d,0x67,0x7a,0xa1,0x05,0x2b,0x74,0x60,0x5e,0x6c,0x04,0x2b +.byte 0xba,0xe6,0x2d,0x25,0xc9,0x00,0xd0,0xf0,0xa5,0x4f,0x22,0x59,0x34,0xb8,0x43,0x6b,0xb7,0x67,0x25,0x99,0xff,0x75,0x17,0xb1,0x13,0x7e,0x34,0x1d,0x42,0xa3,0x6b,0xb5,0x9d,0xfe,0xa1,0x71,0x0d,0x90,0x81,0x58,0xfc,0xc7,0x85,0xe6,0xbd,0xc2,0xcc,0xc9,0xc9,0x23,0x6e,0xd6,0xbe,0x4a,0x61,0xd4,0xf5,0x9e,0x37,0x6a,0xb1,0x8b,0x91,0x59 +.byte 0xe1,0x3e,0xac,0x87,0x54,0xa6,0xf9,0xf5,0x90,0xd2,0x7c,0xba,0x4b,0x37,0x33,0x1b,0x88,0x5e,0xbd,0x78,0x3f,0xed,0x43,0x40,0x4f,0x16,0x59,0x29,0xbc,0x27,0x98,0x87,0xfe,0x62,0x56,0x93,0x21,0x0a,0xca,0xc1,0x21,0x99,0xb3,0x32,0xbb,0x5a,0x79,0x40,0xab,0xea,0x00,0xf8,0xe9,0x90,0x0d,0x59,0xbd,0x6e,0x7f,0x74,0x01,0x50,0x67,0x3a +.byte 0x8e,0x24,0x1d,0x6c,0xc8,0xd6,0x93,0xca,0x71,0x95,0xec,0xac,0x78,0xe9,0x1f,0x38,0x0d,0xa2,0xe5,0x32,0x90,0xa2,0xaf,0xef,0x15,0x06,0xd6,0x52,0xa4,0xd2,0x94,0x0f,0xbd,0x86,0x81,0x82,0x12,0x9b,0x3a,0xc4,0x0b,0xdf,0x8a,0x5f,0xc6,0x3b,0xb4,0x13,0x9b,0xeb,0xed,0x2d,0x06,0x46,0xa3,0xbe,0xbb,0xe1,0xe1,0x93,0xa1,0xab,0x46,0xf3 +.byte 0xd0,0xd9,0xce,0xb6,0xfb,0xd0,0xd5,0xb6,0xde,0x0c,0xed,0x90,0x18,0x6c,0x1e,0x46,0xb0,0x36,0xa7,0xf1,0x29,0xbe,0x9a,0xa0,0xcf,0xed,0xd6,0xaf,0xb8,0x89,0x9b,0x83,0xa8,0xa0,0x8d,0x26,0xaf,0x8f,0x48,0x66,0xfc,0x22,0x1a,0xc0,0xcf,0xf8,0x90,0x57,0x7e,0x25,0x5f,0xe4,0x0c,0x68,0xd2,0xaa,0x59,0x09,0x2f,0x6d,0x3f,0x80,0x8d,0xe0 +.byte 0xfa,0x25,0xb0,0xe0,0x85,0xe9,0x13,0x39,0x3d,0x1f,0xed,0xd1,0x94,0x9b,0xb5,0xc2,0x65,0xda,0xec,0x7a,0x1f,0x2f,0xe2,0x0a,0x42,0x09,0xbd,0x79,0x7d,0xcb,0xb8,0x4a,0x02,0x2b,0x72,0xaf,0x33,0x85,0x72,0x1b,0x18,0x0c,0xa3,0xec,0x39,0x0e,0x30,0x21,0x41,0xf8,0x2e,0xc7,0x8e,0x5c,0x4c,0xda,0x22,0x49,0x8c,0xa7,0xfb,0x89,0x76,0x2e +.byte 0x45,0x90,0x6c,0xeb,0x70,0x78,0x6d,0x6e,0xee,0x12,0x6c,0xb9,0xb9,0x8d,0xe7,0xf3,0x4d,0x86,0xc4,0x58,0x49,0x55,0xa6,0x86,0xaf,0x39,0x03,0x21,0xfa,0xa7,0xdd,0x51,0x80,0x79,0x6d,0x5b,0xa5,0x58,0x0f,0xfd,0x57,0xb3,0x83,0xe6,0x0d,0x25,0xec,0x55,0xdc,0x0a,0x6f,0xbc,0x7d,0xfd,0x94,0x16,0xdd,0x60,0x9f,0x2a,0x4b,0x6c,0x82,0x03 +.byte 0x4b,0x44,0xbb,0x84,0xdc,0xcb,0x97,0x8e,0x58,0xe7,0xc1,0x79,0xa9,0xf3,0x53,0x78,0x1f,0xf1,0x3e,0xdd,0x94,0x24,0x6d,0xb1,0xd2,0x99,0xbc,0xa1,0xbe,0x7d,0xdd,0xff,0xa8,0x5d,0xd2,0xc2,0xba,0xad,0x60,0x6b,0x40,0x5d,0x7b,0x99,0xd2,0xea,0x45,0x66,0x80,0x6c,0x47,0xf2,0xeb,0x94,0xb8,0xe8,0xe8,0xa0,0x46,0x05,0xe1,0x4f,0x40,0x23 +.byte 0x34,0xdf,0x91,0x63,0xae,0xc9,0xe7,0x32,0x20,0x9a,0x95,0x1e,0xcd,0x5a,0x60,0xe1,0x3d,0xe0,0xf1,0x16,0x3d,0x6e,0x8b,0x96,0x23,0xe0,0xaa,0x1d,0x1a,0xde,0xed,0xc6,0x63,0xb5,0x46,0x8b,0x78,0x71,0x9a,0x14,0x88,0x79,0x61,0x68,0x6b,0xcf,0x80,0xd8,0x9c,0xaa,0xfb,0xb1,0xc0,0xf3,0x39,0x07,0x26,0x56,0x80,0xba,0x9d,0xf5,0xe7,0x95 +.byte 0x99,0xac,0x90,0xea,0xe7,0xe1,0xc9,0x0d,0x40,0x94,0x83,0x58,0xd2,0xc3,0x2b,0xce,0x1e,0xae,0x2a,0xa6,0xfa,0xc7,0x89,0x44,0xcb,0xe2,0x9e,0x74,0x33,0xaa,0x70,0xe5,0x28,0x3a,0x51,0x74,0x53,0xe2,0xfb,0x7c,0x47,0x76,0x22,0xdf,0x46,0xa6,0x01,0x17,0xef,0x88,0x43,0x46,0x3f,0x1a,0x26,0x0c,0xad,0xf4,0x31,0x55,0xf2,0xe7,0xc9,0x35 +.byte 0x6f,0x7c,0x0c,0x5c,0xfd,0x43,0xa4,0x6c,0x6c,0x74,0xf0,0xa4,0xec,0x1d,0x83,0x97,0xc1,0x6c,0x9c,0xd7,0x97,0x90,0x7c,0x07,0x88,0xc0,0xb4,0x79,0x2c,0x7a,0x9c,0x93,0xa2,0x15,0x6c,0xd2,0xa9,0x45,0xa5,0xc1,0x16,0xfe,0x72,0xf4,0x01,0x32,0xe4,0x51,0xdd,0xdb,0x50,0xe3,0x61,0x4e,0x29,0x1e,0x27,0x10,0xe9,0x5e,0x30,0x2b,0x30,0x27 +.byte 0x99,0xff,0x92,0x23,0x04,0x8d,0x28,0x68,0x28,0xd3,0x0f,0xec,0xbb,0xf9,0xfb,0x44,0x1c,0xaa,0x8b,0x38,0x95,0x67,0x1e,0xf5,0x42,0xc9,0xec,0x05,0xeb,0x94,0xe5,0x1c,0x8a,0x2a,0xef,0x3b,0x74,0x46,0x89,0x4f,0xd5,0x6f,0xa0,0xe5,0x74,0xae,0x24,0x8d,0x81,0xae,0x9d,0x3c,0x3e,0x3d,0x41,0x54,0x8f,0xd9,0xc2,0x98,0xf4,0x84,0xeb,0x30 +.byte 0x6a,0x06,0x67,0x11,0x2d,0xb0,0x55,0x70,0x26,0xdf,0x19,0x5f,0x81,0xe9,0x39,0x69,0x3a,0xd6,0x09,0xa4,0x40,0x22,0x1f,0x5c,0xbf,0xd5,0xa6,0xea,0x69,0x99,0x0d,0xea,0x70,0xed,0xfe,0x3a,0xba,0x23,0x8b,0xab,0x08,0xfe,0xfb,0xe9,0x1a,0x88,0x80,0x13,0x45,0x9c,0xca,0x2e,0xda,0x4a,0xc8,0x5d,0x15,0x52,0x87,0x36,0x9b,0x87,0x8a,0x76 +.byte 0x5d,0x31,0x24,0x4a,0xcb,0xf5,0xd3,0xd3,0xc1,0xec,0xde,0x1e,0x48,0x99,0xd5,0xcb,0x93,0xf7,0xca,0x2d,0xa4,0x66,0x5e,0xa4,0xcf,0xc6,0x15,0x20,0x10,0xb1,0xe2,0x8e,0xb9,0x44,0xa7,0xc3,0x54,0x14,0x86,0x08,0xb7,0x89,0x52,0xd5,0x72,0xc5,0x62,0x4d,0x82,0x96,0x23,0xcf,0x6e,0x52,0x3a,0x92,0x53,0x48,0xa2,0xa5,0x9d,0xa4,0xcc,0x32 +.byte 0x45,0x5a,0xdf,0xe2,0xbe,0xce,0x28,0xc8,0xb1,0xb7,0x0f,0x6a,0x38,0x28,0x14,0x66,0x55,0x7a,0xab,0x35,0x56,0xd0,0xc7,0xe5,0xa1,0x8a,0x84,0xf7,0xc5,0xa9,0xdb,0x2a,0x45,0xe9,0x34,0x2d,0xf2,0xed,0x2b,0xa9,0x9e,0x49,0x1b,0x23,0x10,0xeb,0x0e,0x01,0x46,0x6f,0x7a,0x50,0x09,0x5f,0xc3,0xb6,0x1e,0x2f,0x1a,0x3e,0x89,0x32,0xaa,0x5a +.byte 0xaa,0xef,0x23,0x45,0xdc,0xb5,0x7e,0x5f,0x87,0x77,0xde,0x50,0xab,0xbf,0x9e,0x62,0xa8,0xe0,0xf0,0xc8,0x4a,0xf1,0x4e,0xaf,0xe4,0x50,0x8a,0xfe,0xc9,0x68,0xdd,0x19,0x1d,0xc6,0x54,0xe5,0x38,0x0a,0x6f,0x36,0xe4,0x85,0xe8,0xab,0xc4,0x06,0xef,0x07,0x29,0xce,0xea,0x9d,0x2e,0x22,0x97,0x18,0x7e,0x59,0x89,0x92,0x31,0xc5,0x87,0x50 +.byte 0xa8,0x23,0x22,0x58,0x47,0x27,0x1c,0x89,0x5f,0xec,0x94,0x1d,0xb2,0xc8,0x61,0x1e,0x0a,0x80,0xd3,0xe9,0xbf,0x65,0xb9,0x66,0x32,0x56,0xde,0xd2,0x13,0xee,0xea,0xc4,0xc9,0xbf,0x4c,0xb7,0xa4,0x1c,0xc0,0xbf,0xcf,0xa4,0x58,0x1f,0x98,0x1d,0x25,0x4e,0x51,0xd9,0xbe,0x89,0x32,0xdb,0x7a,0xa6,0x39,0xa9,0xbf,0xed,0x65,0x6b,0x92,0xc4 +.byte 0x8d,0xcd,0x63,0x18,0x65,0x44,0x95,0xcf,0x17,0x72,0x8f,0x27,0x79,0x83,0xda,0xe3,0xe7,0xd9,0xca,0x57,0xff,0xa3,0x15,0xbf,0xb6,0xd8,0xc2,0x8c,0xe8,0xdb,0x8c,0xdc,0x54,0x6a,0xc8,0x57,0x6e,0x24,0xc3,0x3c,0x1f,0x33,0xdd,0x68,0xbd,0x7a,0xa3,0xbc,0xa9,0x9a,0xe8,0xfc,0x97,0xa5,0xbe,0x59,0xfb,0x77,0xcd,0x22,0xc6,0x3d,0x95,0x21 +.byte 0xcb,0xf7,0x8d,0xc1,0x77,0xc6,0xe0,0x06,0xb2,0xdb,0xec,0x54,0x19,0xad,0x02,0x25,0xe0,0x0f,0xda,0x4c,0xa5,0xf2,0x47,0x3f,0xc9,0xa0,0x91,0x21,0x39,0xe9,0x74,0x2a,0x9a,0xc1,0x57,0x86,0x3c,0x32,0x27,0x4c,0xc2,0x2d,0x50,0xbd,0x7a,0x04,0x9c,0x45,0x0d,0x7e,0x06,0x1d,0x3e,0xc1,0x6f,0x06,0x7f,0xd4,0x71,0xd3,0x5c,0x66,0x74,0xa7 +.byte 0x33,0x75,0x64,0xa8,0x7d,0xc0,0x23,0xda,0xb0,0x6d,0x12,0xbe,0x83,0x98,0xe7,0x65,0x38,0x4d,0x39,0xc3,0xd7,0x33,0xfb,0x58,0x64,0xfc,0xde,0xd7,0xbf,0x9e,0xdb,0xcc,0x7a,0x35,0xac,0xdf,0x13,0x08,0xbc,0x0a,0x55,0x82,0x5f,0xc3,0x74,0xc5,0xb2,0xdb,0x89,0xdc,0x9c,0x60,0xfa,0x02,0x1c,0xba,0x5b,0x7e,0x0f,0xb1,0x0f,0xad,0x43,0xe1 +.byte 0xe1,0xbe,0x1e,0x06,0x05,0x0f,0x39,0x80,0x3d,0x7d,0xbe,0x8f,0x38,0x25,0x46,0x5e,0xea,0x47,0x36,0x65,0x4c,0x3c,0x6c,0xd6,0xaa,0x46,0xaa,0xb0,0x95,0x1d,0xff,0x67,0x6c,0x70,0x9d,0xec,0x3d,0x3d,0x4c,0x2f,0xd9,0x2b,0xb0,0xbd,0x8c,0x6a,0xca,0xac,0x0c,0x53,0xa1,0xda,0xd8,0xc1,0x3c,0xaa,0xcc,0x50,0x85,0x41,0xa1,0xa7,0xe9,0x7f +.byte 0xf7,0xa8,0x28,0xb1,0x5f,0xd6,0x77,0xc9,0xb5,0xae,0x33,0xa7,0x2d,0x16,0xe0,0x13,0xe8,0xd4,0xf9,0x4e,0x62,0x2e,0xc2,0x9a,0xf3,0x83,0xe0,0x45,0x43,0x68,0x40,0x5a,0x56,0xf3,0x31,0xc8,0x5b,0x46,0x0b,0x38,0x1f,0xa5,0xff,0xe6,0xa1,0x81,0xc0,0x91,0xe5,0x5a,0x63,0x8f,0x47,0x9a,0xe7,0x26,0x0d,0x78,0x8d,0x11,0x7d,0xc8,0xd4,0x9f +.byte 0xc1,0xf7,0x8f,0x93,0xfa,0x2f,0xb5,0xfd,0x6d,0xa4,0x34,0xcf,0x3c,0x6c,0xf6,0x64,0xae,0x5c,0x60,0xa2,0xb4,0xcc,0x18,0x3e,0x08,0x8e,0x36,0x88,0xab,0xc3,0xea,0x53,0x4f,0x1c,0x9e,0xe6,0xef,0x2d,0x9c,0x78,0x4a,0x3a,0x5a,0x60,0x8e,0xf7,0xeb,0x0b,0x36,0xb1,0xbb,0x59,0xe2,0x5e,0x64,0x60,0xe5,0xd6,0x3d,0x2a,0xe1,0x1b,0x03,0x40 +.byte 0x8d,0xde,0x2e,0xd0,0x76,0x0a,0x6b,0x63,0x2a,0x53,0x2d,0x39,0xe0,0x53,0xee,0x7d,0xc4,0x8a,0x39,0xc5,0xda,0xfc,0x31,0x7e,0xa2,0x1b,0x11,0x1d,0x8a,0x8e,0x66,0xf4,0x00,0x17,0xd3,0x78,0x1b,0x94,0xad,0xcf,0xdd,0x56,0xce,0xaf,0xf6,0x34,0xe4,0xb6,0x47,0xe0,0xda,0x1b,0x36,0x4f,0x86,0x26,0xc1,0x65,0xec,0x85,0x8c,0xa9,0xfe,0x96 +.byte 0x75,0x0d,0xe3,0xeb,0x9a,0xa6,0x3f,0xb3,0x10,0x03,0x85,0x24,0xf2,0xb5,0xcd,0x69,0x7d,0xba,0xa2,0x5c,0x8a,0x6d,0x45,0xf4,0xc8,0x4f,0x69,0x8e,0xd4,0x69,0x82,0x42,0xfd,0x00,0x59,0xfd,0x20,0x7a,0x63,0x58,0x56,0x30,0x21,0x73,0xbd,0xd4,0x49,0x84,0x3f,0x51,0x0e,0xfb,0xd3,0xfc,0x93,0x17,0x7f,0x23,0x75,0x25,0xea,0x78,0x79,0xf7 +.byte 0xec,0x22,0xef,0x86,0x91,0x0a,0x90,0x10,0x71,0x3b,0xb8,0x8e,0xb7,0xc9,0xd1,0x26,0x98,0x7d,0x1a,0xab,0x74,0x3e,0x5f,0x10,0xa8,0x47,0xdf,0xc9,0x0a,0x03,0xbb,0xe2,0xbb,0x34,0xbe,0x87,0x1a,0x3e,0x13,0x4b,0xd5,0xdd,0x53,0xb7,0x65,0xb4,0x16,0x38,0xd3,0xfd,0x01,0xde,0xe8,0xba,0x1d,0x33,0x5b,0x7b,0x9b,0x9f,0xfb,0xe7,0x8d,0x82 +.byte 0x21,0x78,0x9e,0xb2,0xf5,0x16,0x37,0x88,0x47,0x9d,0x1a,0x2c,0xfe,0x6a,0xac,0xde,0x3e,0xc4,0xa8,0xed,0x64,0x46,0xdd,0x05,0x07,0x60,0xef,0x99,0x96,0xf0,0x84,0x27,0x38,0x58,0xe5,0xc0,0x53,0x7d,0x07,0xe3,0xa5,0x31,0xb5,0x8a,0xe7,0x50,0x94,0xbb,0x29,0xf9,0x58,0x13,0x91,0x5b,0x54,0x77,0xf6,0x91,0xb8,0x75,0x05,0x3d,0x70,0x3e +.byte 0x07,0x95,0x7d,0x37,0xbd,0x1d,0x29,0x4d,0x33,0x07,0x13,0x2b,0x54,0x70,0x9c,0x31,0xf1,0xcd,0x2d,0x28,0x09,0x43,0x90,0x24,0x8c,0x82,0xb0,0x08,0x71,0x08,0x97,0x7e,0x1a,0xbc,0x82,0xd8,0x31,0x0a,0x13,0xe9,0x22,0xf0,0x8d,0x2b,0x91,0xe5,0x2e,0x34,0x56,0x97,0x86,0xc9,0xbd,0x45,0x1e,0x32,0x03,0xcb,0xa1,0x29,0x00,0x81,0xd4,0x6e +.byte 0x5d,0xbc,0x0f,0x01,0x8d,0x5c,0xb9,0x80,0xcc,0xfe,0x0d,0xa3,0xef,0x8e,0x85,0x59,0x37,0xf7,0x64,0xa7,0xe5,0x2a,0xd5,0x44,0xee,0x91,0xcf,0x6c,0xf5,0x0a,0x9b,0xc7,0xdf,0xb6,0x02,0x2d,0xa4,0xf1,0x22,0x2a,0x97,0xfe,0x1d,0xb7,0x4c,0xc7,0x4f,0x2f,0x0b,0x38,0xd2,0xbf,0xfe,0xe3,0x94,0x55,0xae,0x85,0x0c,0x34,0x59,0x67,0x23,0x7b +.byte 0x4a,0x87,0xd9,0xd2,0xca,0xd5,0x38,0xd2,0x9d,0x05,0x2e,0xd8,0xe3,0x26,0x51,0xa4,0x14,0x66,0xfb,0x38,0x40,0x18,0x3b,0xda,0x43,0x85,0xc9,0xf5,0xf4,0xe7,0x22,0x82,0x45,0xa1,0xdf,0x98,0xa0,0xab,0x5f,0x7a,0x50,0x84,0x75,0x7a,0x70,0xa6,0x3b,0x04,0x20,0xed,0xa8,0x68,0x6d,0x3f,0x43,0xf8,0xb8,0xac,0xc7,0x32,0xa0,0xff,0x47,0xd5 +.byte 0xb3,0x92,0x6a,0x15,0x5a,0xf1,0x7c,0x32,0x30,0xda,0x1e,0x5d,0xab,0xcc,0xd0,0x3a,0xdc,0xcf,0x70,0xd8,0x4d,0xa3,0x50,0xac,0x50,0x42,0x53,0xc6,0xe0,0x3a,0x26,0xdc,0x77,0x30,0x31,0x59,0xa1,0xfc,0x4d,0x48,0x00,0x0d,0xe0,0x66,0xb3,0x9b,0xd3,0x38,0x45,0xbb,0x0c,0x57,0xc5,0x78,0xee,0x8c,0x96,0xea,0xa2,0x16,0xa3,0x12,0xb1,0x06 +.byte 0xd0,0x2a,0x70,0xf7,0xce,0x42,0xae,0x17,0x64,0xbf,0x13,0xa0,0xe9,0x62,0x57,0x1d,0x55,0x78,0xfa,0x72,0x19,0x58,0x15,0xea,0xe5,0xdf,0x72,0x0e,0xc6,0xd3,0xb4,0x3d,0x60,0xee,0x32,0x2a,0xce,0xdc,0xad,0xd0,0x34,0xe6,0xb4,0xcf,0xce,0x5a,0x4a,0x9f,0xaf,0x01,0xb3,0x2a,0xed,0x46,0xa0,0xad,0xaa,0x62,0x8b,0xa4,0xf7,0x4b,0xce,0x32 +.byte 0x35,0x29,0x1e,0x7a,0xda,0x74,0xf8,0xe5,0xda,0x52,0x66,0xaf,0x3d,0x1a,0xff,0x42,0xc0,0xcc,0xb1,0x32,0x36,0x10,0x44,0x34,0x6a,0x16,0xc2,0x5b,0x9a,0x35,0x3f,0xd2,0x29,0xc5,0x76,0x3c,0x24,0xc7,0x2b,0x92,0xae,0xe0,0xe2,0x04,0x6c,0x3b,0x97,0xda,0xfd,0x49,0x43,0x6d,0x35,0xf5,0xc3,0xc1,0x93,0xf8,0x2f,0x25,0xef,0x3e,0xd8,0xf2 +.byte 0xc0,0xb3,0xb5,0x71,0x01,0xe0,0x07,0x11,0xd5,0xf1,0xd3,0x54,0x59,0x93,0x77,0x2e,0x77,0xdc,0x57,0xd7,0x9b,0x0a,0xe2,0xde,0x29,0x04,0x81,0xa1,0x81,0x6f,0x94,0x86,0x39,0xd7,0x29,0x69,0x3f,0xfa,0xe4,0x02,0x01,0x85,0x04,0x21,0xd3,0x17,0xf5,0x68,0x85,0x6e,0x74,0x15,0x56,0xe6,0x5e,0x12,0x1c,0x0d,0x2f,0x7a,0x8d,0xe1,0xc8,0x47 +.byte 0x7b,0xdc,0x35,0x64,0xf1,0x00,0xc0,0x7b,0xd8,0x2c,0x8c,0x60,0x10,0x53,0x11,0x2c,0x5c,0xa2,0xb6,0x05,0xa3,0xcd,0x14,0xb6,0xd0,0x36,0xe9,0x74,0x78,0xc3,0x84,0x6b,0x51,0xa9,0xf9,0xf1,0x05,0xe2,0xd4,0xa3,0x57,0xec,0xb1,0x5e,0xd5,0x75,0x64,0xe3,0xb0,0xf9,0x8f,0x88,0x60,0xdf,0x8e,0x75,0xf9,0x32,0xfc,0x58,0x5b,0x4b,0x17,0xdb +.byte 0x41,0x04,0x6f,0x17,0x7a,0xf8,0xd0,0x47,0x8e,0xeb,0xd1,0xf9,0xa6,0xa8,0x52,0x7e,0x07,0x6b,0x5b,0x4d,0xb9,0xda,0x91,0x40,0x51,0x25,0x67,0x4b,0xf1,0x95,0x12,0x07,0xa9,0xa5,0x33,0x96,0x92,0x5e,0xb4,0x0e,0xf0,0x85,0x2e,0x70,0xd8,0xaf,0xae,0x9a,0x3d,0x0c,0xb0,0xee,0xe1,0x80,0x5a,0xb9,0x17,0xe6,0x00,0xa8,0x82,0xd0,0x9b,0xf5 +.byte 0xe3,0xa0,0x12,0xc4,0x15,0xd6,0x5e,0x57,0x5c,0xd2,0xb9,0xa7,0x8e,0xfd,0x09,0xc3,0xd2,0x66,0xfd,0x86,0xb4,0xdc,0xa3,0xc2,0xfe,0x16,0x86,0xc4,0x98,0xa3,0x2e,0x4c,0xc9,0x2c,0xd6,0x87,0x83,0x1b,0x6f,0xe2,0x44,0xd6,0x72,0x94,0x1d,0xba,0xaf,0x34,0x1f,0xf2,0x40,0x40,0x33,0x24,0x63,0xc1,0x26,0xef,0xbc,0x0f,0x3b,0x3c,0x65,0x2b +.byte 0xa7,0xc7,0xdf,0x96,0x67,0xab,0x92,0x0e,0x04,0x8c,0x82,0x9e,0xbe,0x52,0x61,0x40,0xdf,0x77,0x00,0xc5,0x01,0x9a,0xe9,0xde,0xe1,0xe2,0x45,0xb8,0xed,0x94,0xd5,0xf0,0x28,0x29,0xef,0x0d,0x91,0x07,0x9b,0xfe,0x69,0x78,0x26,0xd7,0xf9,0x51,0xf1,0x9c,0xf2,0xbb,0x83,0x2d,0x79,0x1e,0xff,0x97,0x13,0xdc,0x28,0x93,0x26,0x7c,0x54,0x52 +.byte 0xc0,0x92,0xeb,0x4a,0xa2,0xe3,0x01,0xfc,0x07,0xb9,0x26,0x11,0x03,0xe0,0x19,0xa8,0x9c,0xff,0x3a,0x95,0x26,0x3a,0x17,0xf1,0x7d,0x6a,0x6a,0xb2,0xb5,0x5a,0x07,0x43,0x2b,0xb7,0xdd,0x19,0x14,0xe0,0x05,0x91,0xc5,0xee,0x49,0x35,0x7b,0x1a,0x2d,0x34,0xda,0xa2,0x45,0x7e,0x0d,0x64,0x98,0xb6,0x2e,0x47,0xaa,0x6c,0x73,0x66,0x55,0x01 +.byte 0x27,0xb0,0xa9,0x13,0xa6,0xe0,0x74,0x38,0xb3,0x97,0xfe,0xaf,0xdc,0xc0,0x6a,0x4f,0xd8,0xdb,0x07,0x62,0x61,0x05,0xbb,0xa0,0xa8,0xc5,0xb3,0x89,0x13,0xbb,0x09,0x01,0x6f,0x09,0xcb,0x47,0x62,0x46,0xf0,0x4b,0xf0,0xb7,0x7c,0x39,0x8d,0xe5,0x7b,0x64,0x49,0x32,0x93,0x1e,0x94,0x0a,0x98,0xe0,0xca,0xc6,0x67,0x5b,0xdf,0x88,0x0a,0x26 +.byte 0x83,0x77,0xc3,0xd0,0x11,0x66,0x3d,0x25,0x91,0x61,0x80,0xfc,0x9c,0x50,0xfb,0xe8,0x81,0x6f,0xd8,0xfa,0x77,0x78,0x4c,0x2b,0x44,0xd0,0x92,0x52,0xa4,0x50,0x50,0x7e,0xa2,0xb9,0xe7,0x79,0x33,0x95,0xfe,0x29,0x1c,0x1d,0x43,0x9d,0xa7,0x12,0xfe,0xa1,0x45,0xf4,0xd9,0x1c,0x7e,0x5a,0x67,0x99,0x7f,0x22,0x7c,0xa3,0xb1,0x2d,0xb7,0x1d +.byte 0x6b,0xf6,0xb4,0x94,0xf2,0xd1,0x5c,0x28,0x56,0xe9,0x4f,0x21,0x81,0x96,0x37,0x7c,0x25,0x74,0x0f,0xf9,0xc5,0xf5,0xc6,0xe8,0x8f,0xbb,0xfb,0xe4,0xaf,0x23,0xac,0x4c,0x20,0x35,0x7d,0xb4,0x4a,0xde,0x90,0xec,0x16,0x30,0x95,0x1b,0x79,0xf6,0x77,0xfe,0x80,0x10,0xba,0xd2,0x49,0xda,0xca,0x9e,0x6b,0x63,0x2f,0x24,0x38,0xf9,0xee,0x20 +.byte 0x38,0x5c,0xeb,0xf5,0xbc,0x07,0x7a,0xeb,0xde,0xc4,0x97,0xcf,0x48,0x9b,0x80,0x40,0xfa,0x81,0xf5,0x24,0xa7,0xf3,0xf7,0x16,0xe9,0xba,0xae,0x9f,0xde,0xa1,0x00,0x34,0x74,0x36,0x9f,0x47,0xce,0xcf,0x35,0xdb,0x30,0x7e,0x72,0x81,0xc5,0xe1,0x59,0x07,0x3e,0xc7,0x5b,0x7b,0xd3,0xc6,0xeb,0x4e,0x71,0x9c,0xeb,0x41,0x37,0xd9,0x9e,0x34 +.byte 0x0b,0xc1,0x9c,0xf7,0xfd,0x56,0xb0,0xd6,0xa6,0xe4,0x1d,0xdf,0x43,0xc6,0xf3,0x26,0x0f,0x01,0x07,0x29,0x57,0x9c,0x8f,0xe1,0x31,0xc9,0xa6,0x98,0x0f,0x0e,0x27,0xfd,0xa0,0x59,0xdf,0x92,0x7b,0x0a,0x4c,0x42,0x4b,0x03,0x98,0x2a,0xea,0xcb,0xd8,0x0f,0x6d,0x19,0x0b,0x22,0x69,0x8b,0xaa,0x3b,0xc8,0x41,0x66,0x81,0xc3,0xaa,0x64,0x6d +.byte 0x44,0xdd,0xb9,0xe2,0xc4,0x47,0x6d,0xdf,0x61,0xe0,0xf3,0x26,0x40,0x23,0x2f,0xf9,0x2a,0xb3,0xfa,0xe2,0xe8,0x36,0xc0,0xd9,0x89,0xb0,0x05,0x47,0x36,0x20,0x3b,0x03,0x0c,0xd1,0x46,0x9b,0xc9,0x65,0xfa,0x14,0xba,0x68,0x49,0xfc,0x2a,0xb9,0x04,0x47,0xbb,0x64,0xe1,0x7f,0x5a,0xd3,0x70,0x19,0x0f,0x14,0x09,0xc0,0xbe,0xc3,0x9b,0x2f +.byte 0xd1,0x05,0x90,0x56,0x09,0x47,0xb3,0xc5,0x08,0x6f,0x89,0x59,0x8c,0xf3,0xd4,0x1c,0xaf,0x68,0x00,0x32,0x58,0xe2,0x66,0x55,0xe2,0xc3,0x46,0x73,0xfd,0x4b,0x63,0xc5,0xdd,0x48,0xa8,0x14,0xe9,0x07,0x94,0x8f,0x51,0x6e,0x2d,0x7c,0x62,0x97,0x73,0xa5,0x42,0x7d,0xad,0x43,0xcb,0x65,0x56,0xf0,0x23,0x28,0x72,0xdb,0x1f,0xcf,0x34,0x9a +.byte 0x62,0x06,0x8d,0xc9,0x86,0x40,0x6d,0xee,0x58,0x72,0x02,0xbb,0xce,0x33,0x6a,0xe4,0xcb,0x46,0x25,0xda,0x2f,0x8d,0xc9,0x8e,0xfe,0xcf,0xbb,0xfc,0xb0,0xe8,0xec,0xf2,0xf9,0xff,0x5d,0x70,0x9e,0x2e,0x22,0x0e,0x9a,0x4d,0xb8,0x26,0x7a,0x48,0x3f,0xba,0x5c,0xcd,0x10,0xf4,0x6d,0x89,0x3d,0x5d,0x87,0xd4,0x69,0xb8,0x4a,0x20,0xc6,0xf8 +.byte 0x03,0x6c,0x60,0x1e,0x9c,0xc6,0xe3,0x39,0x9b,0xa1,0x16,0x64,0xed,0xc6,0xd7,0x54,0xfd,0x8d,0xa0,0x2f,0xcf,0xc6,0xde,0x43,0xe4,0xc5,0xb7,0xd6,0x00,0xaf,0x95,0x7a,0xc6,0xde,0x26,0x59,0x39,0xb0,0x12,0x6b,0xe1,0x3c,0xa9,0x09,0xb6,0x15,0xb0,0x62,0xad,0xa9,0x11,0x4f,0x86,0xde,0xc6,0xe8,0x32,0x46,0x78,0xeb,0x60,0x81,0x6b,0x8f +.byte 0xac,0x80,0xbf,0xa4,0xc4,0xb7,0x5f,0x3b,0x2f,0xf8,0xe4,0x05,0xcf,0xbf,0xa3,0x14,0x6f,0x16,0xbc,0x6c,0x4e,0x31,0xd7,0x79,0x09,0xcf,0x9c,0x58,0xa3,0x0b,0x1a,0x31,0x4b,0xda,0xcb,0x11,0x35,0xb1,0xf5,0xbb,0xfb,0x00,0x46,0x6d,0x70,0x5e,0x4a,0x85,0x19,0xdf,0xb5,0xd0,0x03,0x2e,0x5d,0x01,0x95,0x4e,0x5a,0x59,0x99,0x24,0xac,0x3f +.byte 0x2d,0x64,0xaf,0xef,0x40,0x16,0x2a,0xcc,0x6a,0x6c,0x0f,0xe3,0x45,0x15,0x74,0x3d,0xea,0xdb,0xa7,0x3f,0xd2,0x50,0x4d,0xc7,0xc6,0x19,0x36,0x84,0xf4,0xbd,0x09,0xff,0xe7,0xf3,0xc0,0xa5,0x34,0x49,0x8a,0xfe,0x83,0xcd,0xe4,0x80,0x7d,0xe3,0xff,0xc9,0x8a,0xb9,0xd6,0x34,0x01,0xd1,0x47,0x16,0x5e,0x7c,0x16,0xf5,0x7c,0xf8,0xb5,0x53 +.byte 0x26,0x84,0x89,0x73,0xf3,0x7f,0x9c,0xb0,0x2f,0x07,0x9e,0xf2,0x12,0xdf,0xba,0xc0,0x15,0xd0,0x3a,0x59,0x9d,0xde,0x67,0x5e,0x1c,0x2b,0x4b,0x84,0xb8,0x89,0xfb,0x62,0x90,0xe9,0x89,0xd9,0xdb,0xb7,0x21,0x4a,0x9f,0xbd,0xc0,0x02,0x01,0xda,0xb3,0x4c,0x9d,0xfb,0x46,0xa1,0xd0,0x3c,0xf5,0x27,0x6f,0x70,0xb5,0xa9,0x74,0xdc,0xa0,0x76 +.byte 0xb7,0x3a,0x53,0x18,0xdd,0x80,0x5e,0x43,0xb5,0x35,0xe4,0x0e,0x26,0x27,0x0a,0xab,0xe8,0x4d,0x2e,0x89,0x20,0xc3,0xff,0xe4,0x7f,0x03,0x2c,0x5f,0x25,0xc7,0x70,0x53,0x27,0x4c,0xc8,0xb9,0xb1,0x81,0x10,0x7a,0xa2,0x65,0xe4,0x0b,0x65,0x8e,0x3d,0x2f,0x96,0xa0,0xa5,0x7b,0x4f,0x09,0xe9,0x9d,0x10,0x06,0xf7,0x18,0xad,0x2d,0x7f,0xb8 +.byte 0x8f,0x08,0xa7,0x2c,0xda,0x82,0xbe,0x5c,0xd6,0x1d,0xb6,0xe2,0x9b,0xa2,0xfc,0x18,0x8c,0x8d,0xf7,0x81,0xf4,0xc6,0x1e,0xcb,0xe5,0x73,0xa6,0x74,0x06,0x20,0xf3,0xa9,0xcb,0x80,0x01,0x55,0x7e,0xc0,0x6a,0x1f,0x5a,0x5b,0xb1,0x56,0x5d,0xd8,0x2a,0xd5,0xf5,0x57,0xe8,0x48,0x6c,0xfb,0x9e,0x93,0xa7,0x0e,0x13,0x2b,0x68,0xc5,0x6b,0x17 +.byte 0x43,0xb0,0x58,0x04,0x65,0x3d,0x46,0x57,0xa7,0x3d,0x99,0xb8,0xa1,0x48,0x17,0x44,0x67,0x2a,0x0d,0x44,0x87,0x9f,0x63,0xd7,0x92,0x56,0x7b,0xab,0xd3,0x6a,0xbd,0x4f,0xc0,0xc3,0xd2,0xee,0xd1,0x3d,0xd1,0x18,0x2e,0x6a,0xf5,0x3b,0x67,0xa0,0x0a,0xf3,0x11,0x49,0xc5,0x4b,0xef,0xcf,0x00,0xfd,0x22,0x8f,0xa0,0x9c,0x99,0x32,0x2f,0x58 +.byte 0xf9,0x97,0x98,0x13,0x4a,0x88,0x50,0xcc,0x58,0x1e,0x27,0x02,0x34,0x7d,0xec,0xf6,0x88,0x3a,0x74,0xb5,0x34,0x6d,0x6f,0x52,0x2d,0x20,0x02,0x70,0x22,0x27,0xdf,0x7a,0xff,0x30,0x36,0x66,0x1a,0xa0,0x51,0xc3,0x75,0x9a,0x06,0xe5,0x3f,0x6c,0x74,0x0d,0x15,0xa2,0xb6,0xe5,0xcd,0x55,0x4d,0xea,0x65,0x8f,0xbb,0xb2,0xd4,0x95,0x73,0xa4 +.byte 0xcd,0xb9,0xc8,0x82,0x60,0x49,0xe9,0x36,0xc9,0xb1,0xe9,0xcb,0x52,0xae,0xa7,0x7a,0x64,0xab,0x75,0x84,0x03,0x4b,0x37,0xf7,0x07,0x75,0xf7,0x1c,0x32,0x19,0xb6,0x8b,0xca,0x7c,0x43,0x15,0xe8,0xec,0x57,0x89,0x1d,0xe2,0xa0,0x80,0xc5,0xb6,0x02,0x29,0xfd,0xda,0xe0,0x14,0x93,0xb4,0xb3,0x44,0x2e,0x17,0x2f,0xed,0x3b,0x38,0x6e,0x8f +.byte 0xe0,0x3d,0xc6,0x77,0xe9,0xa7,0x76,0xcb,0x98,0x2d,0x08,0x61,0xcf,0x1b,0x25,0x3f,0xfb,0x1d,0x99,0xb1,0x5a,0x3c,0x53,0x96,0x4e,0x09,0x11,0xf6,0x5b,0x09,0x31,0xe1,0xad,0xb0,0xaf,0x7b,0xec,0xf9,0xa8,0x68,0xb7,0x93,0x57,0xf7,0x17,0x77,0x87,0x2b,0xdb,0x00,0x28,0xc6,0x48,0xac,0xff,0xcd,0x26,0x4a,0x8a,0x76,0x9a,0x2a,0x1d,0x37 +.byte 0x4c,0x70,0x4f,0xf6,0x52,0xe3,0x7a,0x78,0x94,0x5b,0x0b,0x50,0xb4,0x48,0x03,0xcd,0x78,0xd0,0x5d,0x89,0x6d,0x76,0xaf,0x9d,0x67,0xc3,0x75,0x6f,0x6a,0x2d,0xe2,0xb7,0x58,0x51,0x10,0x0d,0xef,0xa0,0x1a,0x74,0x28,0x3a,0x97,0x19,0x4f,0x3c,0x8a,0x86,0x3d,0xe4,0x66,0x3d,0x57,0xb4,0x66,0xb3,0x0b,0x4f,0x57,0x57,0x34,0x2e,0xc7,0x0c +.byte 0x11,0xdf,0x3c,0xb4,0x9f,0xe1,0xd5,0x27,0x41,0x08,0xec,0xca,0x18,0x88,0x48,0x5e,0x88,0x55,0x89,0x71,0xe6,0xa5,0x90,0x7c,0x3b,0xe5,0xf3,0x2a,0xd7,0xf5,0x0b,0x3d,0xbb,0x47,0xad,0xd7,0x78,0x41,0xa8,0xef,0xd4,0x36,0x31,0xd1,0xe4,0x9c,0x87,0x9e,0xb1,0x11,0x0e,0xff,0x8f,0x4d,0x79,0x65,0xc4,0x83,0x75,0x33,0xc9,0x89,0xe2,0xc3 +.byte 0x41,0x68,0x11,0xe7,0xe4,0x58,0xb9,0xf1,0xee,0x06,0x48,0x4d,0xc3,0xc7,0x76,0x60,0x42,0x94,0x8f,0x0d,0xb9,0x53,0x46,0x78,0x06,0x97,0x94,0x36,0xf4,0x3e,0xf3,0xdd,0x5b,0x46,0xe1,0x9d,0x3f,0x9e,0x78,0x00,0x9e,0xe7,0xcb,0x9e,0xc8,0x30,0x87,0x4a,0x52,0x91,0xd5,0xe2,0xa3,0x65,0x98,0xb2,0xc9,0x6c,0xfb,0x4e,0x54,0x5a,0x9f,0x57 +.byte 0x2c,0x4a,0x76,0xe4,0x97,0x88,0xd5,0x6a,0x0e,0x6c,0x7c,0xef,0x78,0x2a,0x7c,0x26,0xa3,0x25,0xf6,0x33,0x82,0x46,0x6d,0x91,0x0d,0xe4,0x83,0xec,0xf1,0x24,0xf8,0x0a,0x34,0xec,0xfc,0x7e,0x47,0xda,0x9a,0x17,0x1b,0x33,0xd0,0xf1,0x70,0xe4,0x0b,0xc7,0x70,0x58,0x1d,0x76,0x20,0x89,0xce,0x4f,0xd1,0xcb,0x3b,0x26,0xd1,0x98,0xd9,0x51 +.byte 0xb1,0xd0,0xaa,0x4a,0xd5,0x10,0xf2,0xae,0xaa,0x14,0xa7,0x72,0x99,0x3d,0xc8,0xbf,0xfb,0xec,0x6a,0x14,0xdd,0x97,0x7b,0x2f,0x16,0x96,0x0f,0x41,0xb8,0x33,0x15,0x1b,0xa2,0x6a,0x7e,0x64,0x0d,0xab,0xe7,0x62,0xf5,0x6c,0x56,0x69,0x09,0x46,0x32,0x24,0x60,0x4e,0x21,0xc7,0x5b,0xee,0x0a,0xe2,0x94,0x7c,0x20,0xe2,0x06,0xa0,0xa2,0x36 +.byte 0xa0,0x7d,0xb5,0x37,0x2a,0xee,0x20,0x25,0x4c,0xba,0x9a,0x06,0x4c,0x07,0x9b,0xea,0x55,0xac,0x2a,0xf7,0xb9,0x5c,0x23,0xac,0x43,0xda,0x9d,0xad,0x76,0xe2,0x5f,0xe0,0x27,0xaf,0x0a,0x5e,0x3d,0x54,0x84,0xfc,0x19,0x75,0x8c,0x62,0x4d,0x37,0x17,0x1a,0x90,0x55,0xb8,0x7e,0xa1,0xad,0x31,0x1a,0xc0,0x91,0x96,0x51,0xa9,0x5f,0xbb,0xb9 +.byte 0x95,0xbf,0xe2,0xd5,0x7e,0x31,0xba,0xc4,0x1e,0x63,0x98,0xd3,0xe2,0x7d,0x87,0xa5,0x46,0xe3,0xae,0xe1,0xe8,0x4e,0x74,0x29,0x0e,0x4b,0x10,0xa8,0x7f,0x3a,0xe5,0x60,0x0f,0x49,0x6a,0xcd,0x3d,0x5a,0x8e,0xf1,0x48,0xd0,0x80,0x7b,0xa3,0x7f,0x06,0x47,0x2b,0x60,0xf2,0x17,0xc3,0xe1,0x26,0x1e,0xb7,0x0f,0x2b,0x7c,0xc7,0xb8,0x3a,0x4f +.byte 0xad,0x05,0x97,0x88,0x93,0x82,0x8e,0x06,0x77,0x44,0xd1,0x65,0xfd,0x18,0x48,0xd6,0x88,0xcd,0x5c,0xbd,0xe4,0xaa,0xea,0xf1,0xed,0x16,0x5f,0xb3,0x58,0xe2,0x69,0x82,0xbe,0x9e,0xfc,0xcb,0xf6,0x17,0xa9,0x70,0xeb,0x08,0xd7,0x06,0x86,0xf6,0x5a,0x43,0x68,0x7b,0xcf,0xa3,0xfa,0x26,0x5e,0xe5,0x42,0xd3,0x5a,0xc8,0x1c,0x3b,0x8d,0x2d +.byte 0xf1,0x45,0xb0,0x97,0x90,0x0b,0xe7,0x2d,0xab,0xd7,0xd8,0x8a,0x16,0xf9,0x5f,0xa6,0xcf,0xc5,0x60,0x2c,0x34,0x5a,0x2e,0x2b,0xb9,0xb4,0x9c,0xa7,0x09,0x77,0xd2,0x3f,0x8c,0xf3,0xf6,0xf7,0xe0,0x27,0x79,0xc3,0x4e,0x61,0x7d,0x09,0x50,0x05,0x01,0x35,0x1b,0x33,0x54,0x6f,0x90,0x9a,0x19,0xcd,0x86,0x45,0x23,0xcd,0x6f,0x1b,0x62,0xc5 +.byte 0xce,0x4e,0x8e,0xff,0xe7,0x12,0x32,0x85,0x9a,0xc4,0x11,0x83,0xcf,0x78,0xd7,0x41,0x99,0x64,0x20,0xa6,0x69,0xdd,0xe3,0x53,0x98,0x6b,0xc7,0x98,0x51,0xc5,0xf8,0x3e,0xa3,0x5f,0x0d,0x78,0x2f,0xa7,0x05,0xff,0xe5,0x3a,0x0f,0x7c,0x09,0x58,0x3f,0xaa,0x0d,0x9a,0x9d,0x8d,0xe7,0xbf,0x6b,0x7d,0xfe,0x3a,0x4f,0x5c,0x50,0xb2,0xe7,0xc5 +.byte 0xa5,0x13,0xde,0xc8,0xe8,0x59,0xac,0xb0,0xdd,0xc0,0x81,0xa7,0x0b,0x78,0x32,0x23,0x76,0x85,0x11,0xef,0xe3,0x88,0x6f,0x7f,0xa9,0x09,0x7b,0x0c,0x6f,0x34,0xb2,0x67,0x5e,0xd6,0x11,0xad,0xd7,0x3b,0xf2,0xbb,0x66,0x5b,0xde,0x22,0xfc,0x55,0x26,0xa1,0x89,0x80,0x2e,0xb8,0xf3,0x3c,0xf8,0x1e,0xba,0x99,0x1c,0x24,0x33,0xb4,0xe6,0x17 +.byte 0x2b,0x9c,0x80,0xe5,0x9b,0x58,0x54,0x70,0xcd,0x15,0x81,0xcd,0x51,0x48,0x75,0x24,0x27,0xf5,0x30,0x79,0xc1,0x16,0xff,0x89,0x70,0x12,0x74,0x07,0x9d,0x39,0xf2,0x9c,0xc6,0x89,0x8d,0x94,0x41,0x01,0x04,0xf5,0x16,0x99,0xf3,0xf0,0xd1,0xf5,0x6d,0xd3,0x11,0x19,0x29,0x36,0xfb,0x41,0xf9,0x32,0xb9,0x0f,0x13,0xaf,0xac,0xfb,0x30,0x75 +.byte 0x62,0x8c,0x04,0x5b,0xf1,0xce,0x52,0x9b,0xbe,0x8c,0xf9,0x86,0x5d,0x7d,0xc1,0x8e,0x41,0x76,0x42,0x63,0xd7,0x74,0x8e,0x2c,0x46,0xa1,0x0a,0x51,0xb5,0xec,0xe9,0x91,0x56,0xbc,0xdc,0x32,0xfc,0x10,0xb5,0xca,0x5b,0x4b,0x72,0x99,0x07,0xff,0x01,0x11,0x2c,0xa4,0x60,0xf5,0x6b,0xd4,0xa8,0x96,0x21,0xee,0xbe,0x14,0x8f,0x69,0x99,0xdc +.byte 0x43,0x7f,0x13,0x3d,0x17,0x1e,0xa3,0x1b,0x21,0x23,0x26,0x7e,0xff,0x80,0x6b,0x66,0x3e,0xb2,0x48,0x1a,0x77,0x3c,0x50,0xe2,0xca,0x4d,0xc6,0xdb,0xfd,0xd1,0x23,0xcc,0xcb,0x01,0x25,0xc0,0x62,0x8d,0xe5,0x9c,0xb7,0x13,0x97,0xf5,0x49,0x01,0x19,0x45,0x45,0x83,0x17,0xff,0x8e,0x94,0x8c,0xb0,0xc0,0xaf,0x46,0x62,0x0e,0x62,0xb7,0x8c +.byte 0xd5,0xcf,0xb9,0x82,0x6e,0x8a,0xb9,0x22,0xbc,0x30,0xf9,0x65,0xc2,0x7f,0xce,0x6b,0x4d,0xad,0x87,0xcb,0x23,0xab,0x57,0x36,0x6a,0xb7,0x8c,0x63,0x17,0x60,0x13,0xa1,0x1f,0x3d,0xa4,0xd4,0xab,0x5d,0x97,0xc7,0x18,0xaf,0xf8,0xae,0x13,0x64,0x2a,0x19,0x34,0xe2,0x28,0x28,0x4f,0x32,0x2a,0xd8,0x43,0x79,0xaf,0x1e,0x56,0xfc,0x97,0x51 +.byte 0x67,0x8c,0x63,0x80,0x32,0x63,0x71,0x5c,0x78,0x00,0xeb,0xfd,0xa2,0x96,0x58,0x21,0x36,0x13,0x02,0xe5,0xa4,0xb7,0xcd,0x5a,0x30,0xa0,0x5b,0x7b,0x23,0xa4,0xcc,0x54,0x64,0x6f,0x6d,0x9b,0xaf,0xea,0x49,0x69,0x9e,0x2f,0x51,0x5c,0xe7,0xa3,0xa3,0xb8,0xac,0xed,0x47,0x23,0x7a,0x37,0x38,0xe3,0x15,0x98,0x6f,0x50,0x6c,0x8d,0xa7,0xe6 +.byte 0xa8,0x39,0xcc,0x63,0x08,0xeb,0x8f,0x8c,0xfd,0x83,0xaa,0x34,0x75,0x19,0xc0,0xf4,0xd6,0x25,0x18,0x94,0x9d,0xa1,0x7e,0xc8,0x6b,0x19,0x76,0xc0,0x8d,0xaf,0x51,0xe5,0x7c,0x8a,0x98,0x17,0x80,0x90,0xc0,0xb6,0xed,0x5c,0x8f,0x33,0x56,0xba,0xce,0xbe,0x83,0x87,0x5d,0x51,0x2e,0x64,0x84,0xa6,0x9d,0x49,0x27,0x5b,0x92,0xe0,0xe7,0xac +.byte 0x37,0x3d,0x22,0x5e,0x25,0xe7,0xca,0x2f,0x5d,0x2f,0xa0,0xd5,0xcb,0xe9,0xac,0x84,0x5b,0x19,0x72,0x1c,0x2c,0x0a,0xd1,0xb7,0x73,0x24,0x8a,0x0f,0xe0,0x07,0xd8,0x49,0x4d,0x23,0x1b,0xac,0xb8,0xd1,0x42,0xd4,0xdf,0xf8,0x4d,0x85,0xa2,0x37,0x30,0x46,0x38,0x88,0x55,0x1d,0xea,0x37,0x54,0x8c,0x43,0xb0,0xed,0x01,0x53,0x75,0xe6,0xf7 +.byte 0x9b,0xe6,0x10,0x91,0x6e,0x80,0x11,0xf9,0x96,0x29,0x4f,0x08,0x77,0x2b,0x7e,0xdb,0x5b,0x14,0xbd,0x77,0x37,0xe8,0x36,0x07,0x4a,0xe4,0xd8,0xa2,0x4e,0x38,0xea,0xeb,0xc2,0xd6,0x43,0x59,0x20,0x0c,0x12,0x31,0x6c,0x27,0xc5,0x7b,0xfc,0xfc,0x54,0x94,0x1d,0x5f,0x82,0x73,0xd7,0x1f,0x43,0x3a,0x73,0xc4,0xf3,0xb3,0xbb,0x53,0xfe,0x22 +.byte 0xc0,0xa4,0x7e,0x2b,0x84,0x1b,0xef,0x6d,0x83,0x9d,0xb3,0x8b,0x2a,0x6c,0xea,0x1e,0xfa,0x77,0x01,0x35,0xd2,0x5b,0xc4,0xd3,0xe7,0x1e,0xca,0x73,0x8b,0xb9,0x1f,0xfb,0x67,0xf2,0xdd,0x03,0xe6,0xca,0xfe,0x3b,0x61,0xd7,0xb5,0x96,0xe0,0x85,0xc2,0x23,0xa7,0xea,0x38,0xbf,0x6e,0x29,0x9e,0x8e,0x18,0xd4,0xbf,0x16,0x73,0xf9,0x18,0xef +.byte 0xc9,0xaf,0x6c,0xe2,0xdc,0xa4,0x58,0x9c,0xf5,0x6d,0x4a,0xc8,0xb4,0x8f,0x16,0x02,0xb7,0x65,0xd3,0x32,0x3b,0x83,0xfe,0xf3,0xc7,0xba,0x68,0xf4,0x95,0xa4,0xf6,0x33,0x57,0x43,0xbe,0xae,0x83,0xa9,0xe4,0x0d,0x0b,0x23,0xaa,0xbc,0x15,0x53,0x18,0x4d,0xb4,0x35,0xe3,0x8e,0x86,0xfe,0xe4,0x98,0x5d,0x63,0x23,0xce,0x44,0xea,0x4d,0x64 +.byte 0x86,0xf8,0x06,0x8f,0xc0,0x73,0xa6,0x6d,0x04,0x53,0x47,0x95,0x0f,0x6d,0x6c,0x01,0x1c,0x3f,0x7b,0x83,0xe4,0xc2,0x40,0xb8,0x97,0x26,0x9e,0x35,0xb0,0x76,0xee,0xe4,0xc7,0xd8,0xaa,0x22,0x83,0x96,0xe1,0x34,0x7b,0x78,0x31,0xee,0xd3,0x9a,0x50,0xd4,0x05,0xfd,0xd6,0x15,0xca,0x83,0x2f,0x49,0xfd,0x00,0x23,0x82,0x39,0xac,0x46,0x7a +.byte 0xe4,0xb5,0xcc,0xee,0xbb,0xaa,0x98,0x82,0xb5,0x27,0x45,0xd5,0x96,0x6e,0x89,0x01,0x1e,0x30,0xe4,0x1c,0x3a,0x65,0xcc,0x9f,0xda,0x38,0xf0,0x4c,0x68,0xfa,0xe5,0xf2,0xe2,0xce,0x34,0xc2,0x15,0xfd,0x21,0xf6,0xe2,0x33,0xbd,0xef,0xfd,0x49,0x15,0xdc,0x38,0x3b,0x24,0xba,0x3a,0x80,0x35,0x60,0xbe,0x50,0x17,0x38,0x3e,0xe2,0x96,0x84 +.byte 0x01,0x41,0x6c,0xb2,0x0b,0xc6,0xff,0xce,0xb3,0x37,0xa2,0x46,0x27,0x33,0x8e,0x04,0x44,0x8a,0x7c,0x64,0x0e,0xbc,0xed,0x74,0x4f,0x40,0x58,0xf4,0x8c,0xf8,0xd9,0x92,0xa9,0x0b,0x18,0x7c,0x93,0x95,0xca,0xa7,0x3e,0x1d,0xad,0x68,0x80,0xd9,0xdb,0x81,0x78,0x50,0x37,0x49,0xbc,0x64,0xc2,0x52,0x5c,0x70,0x7e,0x0a,0x26,0x7e,0xc6,0xbf +.byte 0xd2,0x7f,0x05,0x55,0x7a,0x5a,0x3e,0x9e,0xe3,0x8b,0xf5,0x95,0x2b,0xd8,0xb4,0xb8,0xc6,0x5d,0x91,0xb8,0xc7,0x7c,0xe1,0x75,0xf2,0x43,0x6b,0x73,0xb7,0xb1,0x10,0xf2,0xa7,0x1e,0xab,0xaf,0xc9,0xc0,0x3b,0xab,0xbe,0xf7,0x4a,0x43,0x9c,0xca,0x3d,0x00,0x5b,0x02,0xf8,0xa2,0x4f,0x57,0x81,0xb0,0xde,0x1e,0xd1,0x60,0xbe,0x6c,0x0d,0xe6 +.byte 0xcd,0x51,0xb6,0xc7,0x00,0x52,0x37,0x4f,0xfc,0xee,0xe2,0x43,0x5c,0x61,0x76,0xed,0x80,0x72,0x38,0x26,0x94,0xfe,0x28,0x06,0xfb,0x62,0xa6,0x21,0x9b,0x53,0x60,0x1b,0xf0,0x56,0xae,0xba,0x6b,0x52,0x27,0x2a,0xd5,0xed,0x11,0x92,0xa2,0xe2,0xab,0xdd,0x05,0x38,0x38,0xae,0xeb,0x72,0xcb,0x6c,0xa5,0x2a,0x73,0xc5,0xfc,0xb0,0x36,0x83 +.byte 0xd6,0xe6,0xda,0x6b,0x38,0x72,0x5e,0x8d,0xaf,0x11,0x5f,0x5b,0x89,0x58,0x21,0x36,0xf6,0x7d,0x42,0x48,0xdc,0xce,0xaa,0x94,0xf0,0xc3,0xc5,0x2c,0x08,0x2a,0x36,0x35,0x25,0x95,0xc4,0x11,0x09,0xea,0x7a,0xbc,0x2e,0xc6,0x0a,0x5b,0x4f,0x86,0xeb,0xc2,0x38,0x71,0x48,0x8c,0x63,0x79,0x3b,0xe4,0xba,0x14,0x44,0x31,0x28,0x4f,0x9d,0xb4 +.byte 0x26,0xa6,0x3b,0xea,0x3f,0xcb,0x30,0x6c,0x02,0x13,0xdb,0x4c,0x9c,0x76,0xc8,0xd8,0x01,0x52,0x3d,0x2f,0x51,0x70,0x15,0x91,0xec,0x8f,0x80,0xed,0x88,0xb7,0xfa,0x91,0x2c,0x10,0xcd,0x3b,0x92,0x85,0xe7,0xe8,0x11,0xfa,0x50,0x15,0xe2,0xdf,0xf7,0xbe,0xa4,0x2d,0x13,0x75,0xa6,0x00,0x25,0x8d,0xe1,0xb6,0x9b,0xbb,0x64,0xfb,0x5c,0xde +.byte 0x97,0xcc,0x00,0x51,0xd6,0xac,0x67,0xc3,0x91,0x1e,0x56,0x36,0x2b,0x43,0xed,0x8c,0x67,0x7b,0xf6,0x54,0x6f,0x91,0x44,0x28,0x93,0x60,0xac,0xca,0xb9,0x91,0x7e,0xeb,0x49,0xd8,0xfc,0x12,0x6c,0x40,0x9d,0x0a,0x4d,0xb4,0xab,0xe6,0xad,0x5b,0x8e,0x2d,0x3e,0x53,0xa1,0x88,0xf7,0x41,0x71,0xa7,0xff,0x05,0x46,0x04,0x34,0x1f,0x12,0x89 +.byte 0x92,0xc1,0xf9,0x26,0x16,0x23,0xb6,0x59,0x82,0xdc,0xa7,0xb8,0xa4,0x8a,0x0f,0x1d,0x7d,0x8f,0x44,0xe8,0x4f,0x70,0xbb,0xdb,0x8d,0xe6,0x7e,0x9d,0xd9,0x44,0x10,0x41,0x6c,0x3f,0xb7,0xe8,0x6f,0x39,0x93,0xe1,0xde,0xb8,0x6c,0xba,0x99,0x95,0xb7,0xc8,0xb2,0x2a,0xcd,0x81,0x53,0xc3,0xb5,0x2a,0x8a,0xd6,0x62,0x1e,0x74,0x4d,0xde,0xfa +.byte 0xff,0x7b,0xed,0x11,0x1e,0x44,0x3e,0x93,0x1c,0xae,0x7c,0x5c,0xed,0x52,0x75,0x5e,0x0a,0xf3,0x95,0xce,0x47,0x86,0x1b,0x7f,0x17,0x09,0x12,0xcc,0x08,0xca,0x16,0x11,0xf1,0xa1,0x39,0x78,0x89,0x5c,0x11,0x25,0xc7,0x39,0x5f,0x97,0x74,0xbc,0xa9,0x2a,0x25,0x5d,0xdd,0x93,0x0d,0x8c,0x74,0x07,0x1e,0xd9,0x9f,0xc1,0x38,0x9c,0xbf,0xe0 +.byte 0x42,0xad,0xb2,0xe7,0xb1,0x84,0x82,0xb4,0x56,0xbe,0x3c,0x42,0xb0,0xce,0x2c,0x94,0xb7,0xe6,0x78,0xc8,0x04,0x06,0x58,0x15,0x3e,0xdc,0xf6,0x9a,0x58,0xc3,0xe3,0x85,0x16,0xc8,0x84,0xba,0x8f,0xbc,0x94,0xa7,0x44,0x04,0x29,0xc4,0xd8,0xec,0x63,0xc4,0x47,0x58,0x22,0x02,0x08,0x20,0x44,0x39,0x52,0xa5,0x33,0xfe,0x1c,0x30,0x27,0x92 +.byte 0xbf,0x42,0x44,0x4c,0x3f,0x3d,0x00,0x7b,0x21,0xef,0xbb,0x25,0x75,0x4c,0xb2,0xe7,0x66,0xc9,0xc1,0xfb,0x1e,0x13,0x04,0xd0,0xcb,0x69,0x51,0x9d,0x9a,0xb0,0xb0,0xec,0xb0,0x12,0x24,0x84,0x57,0x9f,0xef,0xb4,0x19,0x50,0xa6,0xf5,0x03,0xa3,0x93,0x0f,0x77,0xaf,0xe0,0x4c,0xa5,0xd3,0xb0,0xd8,0x5e,0xc3,0x78,0x94,0xd5,0x6e,0x48,0x58 +.byte 0x7a,0x93,0xb1,0x62,0x60,0xea,0xa1,0xba,0x7a,0x86,0x6e,0x87,0xe9,0x97,0xe0,0x7c,0x1e,0xb6,0x63,0x94,0x76,0x5f,0x9c,0x95,0x65,0x00,0xd4,0x14,0x0e,0x4c,0x87,0xe7,0xcd,0x9e,0xb1,0xe2,0x13,0x1b,0xb1,0x8a,0x83,0xaa,0xaa,0x34,0xcd,0xb2,0xf6,0x7f,0x12,0xb0,0x79,0xff,0x1e,0x04,0xc8,0x9a,0xfc,0x41,0x88,0xbb,0x28,0x42,0xeb,0x45 +.byte 0x47,0x8b,0xcb,0x57,0x03,0xcd,0xe5,0x9a,0x84,0xea,0x0a,0xb5,0x0c,0xb8,0x30,0x33,0xd6,0xde,0x66,0xa8,0x57,0xf9,0x76,0x4f,0x0f,0x8f,0x53,0x56,0x57,0x91,0xd4,0x55,0xf5,0x78,0xde,0xa6,0xa2,0x59,0xc8,0xb0,0xf2,0xb9,0xfa,0x6d,0x4a,0x70,0x86,0x3d,0x24,0x1b,0xc6,0xb8,0x06,0xf5,0xea,0x09,0x63,0x9b,0x1e,0x61,0x18,0x85,0xba,0x08 +.byte 0x20,0xaa,0x33,0x66,0xcf,0xa7,0xff,0xf5,0x30,0xfe,0xf8,0x39,0xd3,0x88,0x9a,0x5b,0x3f,0x55,0xa6,0x00,0x4c,0x57,0x0d,0xd1,0xa4,0x0c,0xe7,0x8a,0x95,0xd8,0x64,0xc7,0x93,0x51,0x84,0xa6,0x41,0x2c,0xfc,0xb0,0xfb,0x99,0x9a,0xcd,0x2c,0x62,0x3a,0xca,0x43,0x15,0xf2,0x5a,0x22,0x25,0xa4,0x91,0xa3,0x7c,0x42,0x69,0xc1,0x67,0xe3,0xf5 +.byte 0xd4,0x92,0x54,0xbd,0xb3,0x57,0xe5,0x19,0xca,0x1b,0x9c,0x19,0x79,0x9d,0xbf,0x89,0xfc,0xaa,0x72,0xcd,0xcb,0xc5,0xbc,0xdd,0x0c,0x7c,0x31,0x42,0xb0,0xc2,0x76,0xe5,0x8b,0x9b,0x7c,0x92,0x13,0x20,0x5c,0xdc,0x94,0xfc,0xa1,0x90,0x34,0x27,0x88,0x9f,0xe5,0x97,0x5f,0xc3,0xa3,0x83,0xca,0x8b,0xf8,0xac,0x36,0x33,0x47,0xc6,0x20,0x2f +.byte 0x04,0x2d,0x13,0xc1,0x3c,0x07,0x6e,0xf0,0xe2,0x3d,0x32,0x5c,0x50,0x41,0xf2,0x92,0x3f,0x25,0x2c,0x80,0x34,0xa5,0x90,0x2b,0x97,0x6e,0xd1,0xa2,0xa6,0xf4,0x4a,0xe0,0x20,0xd9,0xb9,0x2b,0x66,0xe5,0x06,0x73,0x97,0xfe,0x80,0x70,0x28,0xf9,0xb6,0xae,0x93,0x27,0x7a,0x65,0xff,0x23,0xc1,0x78,0x18,0x92,0xc9,0x0b,0x05,0x82,0x93,0xbc +.byte 0x73,0x3f,0x98,0xe9,0xa0,0x6d,0x20,0x8d,0x13,0xb1,0xf0,0x7e,0xe4,0x07,0x21,0x7d,0x6d,0xea,0x03,0x59,0xf8,0x29,0xc0,0xc8,0x7d,0xce,0xd1,0xf8,0x67,0x82,0x7f,0x84,0xe8,0x77,0xa9,0x9c,0xa2,0x34,0xdf,0xa9,0xac,0xec,0x6d,0x54,0xe5,0x0f,0xcb,0xdb,0x86,0xbc,0x01,0x44,0x91,0x3b,0xc8,0x85,0x4e,0x1d,0xe4,0x74,0x19,0xc6,0x39,0x2e +.byte 0xdf,0xf2,0x8f,0x3a,0x7f,0xe3,0x1e,0x55,0x45,0xcb,0x7e,0xde,0xcd,0xa6,0x1c,0xef,0x20,0xf7,0x07,0x31,0x94,0x9a,0x3d,0x04,0xd7,0x5e,0x65,0x20,0x6a,0x4d,0x31,0x1e,0x6f,0x89,0x40,0x45,0x1f,0x37,0xc1,0x7e,0x07,0xd5,0xa6,0x38,0x4a,0xf1,0x39,0xae,0x72,0x26,0x60,0xb0,0xb5,0xc7,0xd3,0x9a,0xaf,0x57,0x12,0xe9,0x34,0x28,0x8b,0xaf +.byte 0xd8,0x62,0x24,0x58,0xe2,0xcd,0xa2,0x9e,0x74,0x23,0x2d,0x52,0xc7,0x09,0xe5,0xb5,0xf5,0xc1,0xd3,0xa3,0x19,0xe5,0x1d,0x8d,0x0c,0xdf,0x13,0x8d,0xa4,0xa7,0xc1,0x41,0xea,0x9e,0x6d,0x61,0xd4,0xa4,0x74,0xe5,0xf8,0x5f,0x9e,0xfd,0x6d,0xf6,0x6e,0x87,0x0f,0xb5,0xa3,0x82,0xac,0x64,0xb4,0xda,0x07,0x49,0x51,0xc2,0xfd,0xcb,0x55,0xa3 +.byte 0x59,0x34,0xdf,0xa1,0xd6,0x90,0x62,0x43,0x1a,0xf9,0xae,0x85,0x5c,0x11,0x40,0xb2,0xbe,0xa5,0x03,0x04,0x4f,0xec,0x2c,0x58,0x2d,0xe9,0xda,0xcf,0xaa,0x2f,0xcf,0x60,0xc3,0x2c,0x6c,0x81,0x4d,0xf2,0x71,0x41,0xe4,0xae,0x4c,0xfa,0x8e,0x05,0x10,0xff,0x40,0xfa,0xea,0x96,0x78,0x6e,0xfc,0x35,0x35,0xec,0x84,0xf6,0x1d,0x24,0x60,0xcd +.byte 0x96,0x21,0x21,0xa7,0x32,0x90,0x3d,0x51,0x72,0x13,0xa4,0x9b,0x7e,0x94,0x3a,0x9d,0x97,0xf6,0x68,0xd8,0x08,0x42,0x54,0x7a,0xbb,0x9a,0x95,0x83,0xac,0xb8,0xb4,0x68,0xe3,0x31,0xdb,0xe2,0x32,0x8b,0x7d,0x57,0x62,0x1d,0x61,0x81,0xa1,0x36,0x7a,0x25,0x00,0x72,0x24,0x4c,0xa7,0x96,0x3b,0xa5,0x82,0xba,0x8e,0x89,0x1e,0x1b,0x8e,0xf4 +.byte 0xab,0x91,0x85,0x7a,0x32,0x4a,0x47,0x9f,0xce,0xd2,0x51,0x77,0xcd,0xc9,0x02,0x54,0xf2,0x7b,0xcb,0xb8,0x83,0xe0,0xe0,0x1b,0x4a,0xa2,0xe0,0xd9,0x15,0xb6,0x02,0x19,0x75,0xa6,0xba,0xa6,0x98,0xd9,0x61,0x74,0xc6,0x48,0xa5,0x59,0x3d,0xc8,0x47,0xc9,0xe8,0x6b,0xbb,0x6d,0xcf,0x0e,0x8d,0x6b,0x58,0x8b,0x7d,0x4e,0x0b,0x3d,0x67,0xc4 +.byte 0x8e,0x78,0x59,0x40,0x88,0x82,0x33,0x27,0x2c,0xfe,0x2a,0x6c,0xe4,0x80,0xee,0x5a,0xd4,0x5f,0xc8,0xf7,0x82,0x02,0x67,0xfd,0xcb,0x55,0x3e,0xd8,0x41,0xb3,0xce,0x93,0xfe,0xe7,0x56,0xf5,0x63,0xba,0xfa,0x2e,0x79,0xfc,0x11,0x5d,0xb0,0xc6,0x32,0x54,0xed,0x71,0x9b,0x15,0xce,0x62,0x09,0xd4,0x28,0x7f,0x7b,0xa1,0x50,0x5b,0x46,0x24 +.byte 0x0e,0x40,0xa2,0xe2,0x7d,0x93,0xa6,0x2b,0x0b,0x9b,0x40,0x25,0xc9,0xca,0x7a,0x01,0x8b,0x7d,0x68,0xeb,0xd7,0x84,0xc1,0x9d,0xf9,0xfb,0xd0,0x1a,0xec,0xef,0x6b,0x4c,0x78,0x31,0x62,0x8e,0x9d,0xdc,0x78,0x8f,0xcb,0xf8,0xf9,0x41,0xdc,0x9f,0x6d,0x0a,0x27,0x67,0xce,0xbd,0xeb,0x87,0xb3,0x26,0xf3,0x51,0xe1,0xd6,0xd1,0x57,0x46,0xfe +.byte 0x21,0xb9,0x88,0x7c,0xdd,0xa2,0x49,0x71,0x24,0xfb,0xc4,0xc0,0x6a,0x6b,0x05,0x7f,0x80,0xb0,0x09,0x3b,0x9e,0x6c,0x59,0x31,0x3e,0xac,0x7a,0x2e,0x5c,0x04,0x03,0xa3,0x6e,0xf5,0x66,0xee,0xc2,0x9b,0x65,0x88,0x06,0xbf,0xf5,0xe3,0x23,0x73,0x38,0x88,0x99,0xf1,0x64,0x68,0xdf,0x7d,0x04,0x06,0x72,0x92,0x0b,0x62,0x5d,0x12,0x1e,0x4e +.byte 0xff,0x60,0x35,0xe3,0x0f,0xd9,0x8c,0xac,0x38,0x5b,0x91,0xc1,0x51,0xbb,0xa5,0x19,0x7d,0xfb,0x79,0xfa,0x42,0x3b,0xaa,0xf8,0xd3,0x0f,0xc3,0xf2,0xb2,0x68,0x91,0xae,0x28,0x83,0x4f,0x75,0xbd,0x20,0x5f,0x20,0xba,0xc2,0x75,0x85,0x74,0x23,0xf3,0x36,0x33,0x99,0x9c,0x64,0x4c,0xd1,0x5d,0xbd,0x06,0x46,0xbd,0x49,0xf0,0x86,0xc0,0xcb +.byte 0x1b,0xbd,0xec,0x98,0x5b,0xb1,0x80,0xba,0x12,0x42,0x22,0x09,0x9a,0x62,0x3c,0xa8,0x33,0xbf,0xce,0x92,0xd4,0x07,0xef,0x34,0x33,0x8f,0x67,0x1d,0x25,0x60,0xeb,0xd3,0xe4,0x31,0x63,0xa8,0xab,0xe3,0xab,0x70,0x50,0xd8,0x44,0x9f,0x39,0x51,0xd2,0xb9,0x4b,0x16,0xe4,0xfa,0xc5,0x47,0xf3,0xae,0xb5,0xfe,0x7d,0x5d,0x43,0x28,0xa6,0x3d +.byte 0xcf,0x71,0x23,0x6d,0x8e,0xd7,0x74,0xa4,0x86,0x9f,0x92,0x86,0x3c,0x1e,0x51,0xd4,0xe0,0xe6,0xd5,0xc4,0x53,0x3c,0x96,0x55,0xb9,0xac,0x63,0x5b,0xee,0x5a,0x03,0x84,0xb9,0x43,0x2c,0x0f,0x6d,0xbb,0xb5,0xca,0xf0,0x4f,0x3e,0x8b,0x3b,0x14,0x01,0x0e,0x81,0x0d,0xe6,0x62,0xa9,0x34,0x4e,0x03,0xc9,0x85,0x9f,0xc8,0x4f,0x52,0x3f,0x84 +.byte 0x1b,0xab,0x7e,0xaf,0x93,0x22,0xe2,0x0d,0x41,0x79,0x50,0xb2,0x17,0xa7,0x9a,0x80,0xd5,0x65,0x40,0x3b,0x56,0x9b,0xc9,0x00,0xcf,0x03,0xf1,0xff,0xcd,0x72,0x27,0xdb,0x74,0x94,0x70,0x02,0xdc,0x3a,0xee,0x00,0xcc,0x08,0x0a,0xab,0x40,0x87,0x24,0xaf,0x7d,0x67,0x18,0xd0,0x7c,0xeb,0x91,0x1f,0x7e,0x9e,0x41,0x7b,0x39,0xf2,0xfe,0xaf +.byte 0xb7,0x6c,0x58,0xe0,0xdb,0xf7,0xf1,0x23,0x0b,0x98,0x08,0xfa,0xde,0xfa,0xf9,0x24,0x23,0xd1,0x7f,0x69,0xd3,0xb1,0x82,0x68,0x03,0x06,0x86,0x7a,0xf4,0x90,0x8d,0xa5,0xbd,0xbe,0x14,0x2f,0xa2,0x5e,0xaf,0x5c,0x1e,0x07,0x68,0x19,0x5a,0xd3,0x53,0x7d,0xe8,0x13,0x6b,0xe3,0x02,0x49,0x0d,0xd2,0x96,0x56,0xae,0x67,0x8a,0x27,0x61,0xa0 +.byte 0x60,0x20,0x2c,0xb4,0x5d,0xdf,0xc3,0x24,0x50,0xa9,0xbc,0x3d,0x5c,0xf3,0x2e,0xb6,0xba,0x71,0xf0,0x04,0x43,0x84,0x4d,0x80,0xe9,0xa5,0xdd,0xb3,0x1e,0x5e,0x56,0x32,0x1a,0xd4,0xe3,0x10,0x57,0x35,0xa8,0xf1,0xe5,0x96,0xc1,0x27,0xef,0xcc,0x21,0x71,0x10,0xd1,0x07,0x7e,0xb3,0xab,0x95,0x64,0x86,0xaf,0xc9,0x15,0xe6,0x98,0x5e,0xb1 +.byte 0xbd,0xde,0x99,0x38,0xfc,0x8d,0xb2,0x5a,0xa4,0x44,0x5b,0x74,0x31,0x31,0x07,0x93,0xf5,0x86,0x78,0xc5,0x82,0x26,0xfc,0x95,0x1f,0x33,0xd8,0xfe,0x70,0x42,0x2a,0xa7,0x3a,0xb1,0xb2,0x63,0xd6,0x5b,0x54,0x9c,0x54,0x45,0x4f,0x1b,0x4a,0xc2,0xb4,0x0e,0x99,0x48,0xde,0x8d,0xa6,0x5d,0xd3,0xdc,0x31,0xa4,0x2b,0x0d,0x44,0x6e,0x1a,0x10 +.byte 0x3f,0x6c,0xa0,0xab,0xcb,0xb4,0xf6,0x18,0xba,0x11,0xd4,0xd4,0x70,0xc4,0xab,0x04,0x4c,0xe7,0xe9,0x53,0xe5,0xd9,0xe7,0xeb,0x21,0xa2,0x2c,0xc4,0xc6,0xc3,0xe7,0x73,0xd9,0xd3,0x84,0xb0,0x12,0x94,0x3b,0xfd,0xd9,0x32,0xba,0xe3,0x37,0xc1,0xb9,0x4d,0xea,0x3e,0x3d,0x31,0x4e,0xa0,0xe7,0x73,0x9d,0x4e,0x26,0xd1,0xdf,0xe6,0x26,0xcd +.byte 0xd7,0x17,0xd7,0x28,0x2c,0x04,0xe9,0x55,0xd5,0x70,0xaf,0xab,0xc1,0x07,0xbc,0xc4,0xd2,0x89,0xdc,0x22,0x59,0x19,0x0e,0xd8,0x8b,0xdd,0x46,0x7f,0xe4,0xad,0xa5,0x70,0xd7,0x18,0x51,0x30,0xd7,0xbc,0x26,0x45,0xe7,0xea,0xce,0xc7,0xf2,0xca,0xb1,0x9c,0x57,0x1e,0x10,0x5f,0x44,0x8d,0x3d,0xe8,0x55,0xa1,0x22,0x68,0x97,0xe8,0x03,0x9c +.byte 0x8b,0x63,0x81,0xd9,0xcd,0x4c,0x6c,0xe3,0x68,0xc9,0x35,0xee,0x94,0x13,0x25,0x0b,0x12,0x61,0xbd,0xee,0x6f,0xc7,0xe8,0xb5,0x01,0x7a,0x9e,0xd0,0x5a,0x46,0xc6,0x19,0x1b,0xc2,0xf1,0x2d,0xaa,0x53,0x29,0xcf,0x23,0x1a,0x4d,0x94,0x0a,0x50,0x64,0xf5,0x3b,0x52,0x55,0xac,0xa5,0x21,0x15,0x47,0xd9,0x14,0x8c,0x7f,0x4d,0x79,0x6b,0xc1 +.byte 0x43,0x0a,0xf2,0x42,0xd2,0xb0,0x95,0x19,0x99,0xdd,0x1d,0x8e,0x84,0x8c,0x7e,0x59,0x69,0x93,0x86,0xae,0xf1,0x67,0x35,0x55,0x7c,0x5b,0x38,0x11,0x56,0xec,0x6c,0xbb,0xe8,0xc0,0x54,0xec,0x5f,0x65,0x13,0xe3,0x86,0xa0,0xb1,0xc1,0x5e,0x34,0x4f,0xdd,0x4d,0x00,0xc6,0x29,0x05,0x78,0x64,0x8c,0x19,0xb0,0xfc,0x8a,0xb2,0xc7,0x86,0x57 +.byte 0xa2,0xdd,0xed,0x43,0xc1,0x7f,0xab,0x89,0x19,0xe8,0xa6,0xf5,0x7a,0x15,0xfe,0xd5,0x4f,0x53,0xde,0x78,0x42,0x76,0xf7,0x8a,0x54,0xe8,0x37,0xfd,0xee,0x82,0x20,0xd5,0xe2,0x32,0xb9,0x32,0x67,0xc7,0xff,0xdc,0xf0,0x40,0x07,0x28,0x55,0x16,0x56,0x84,0xe9,0x17,0x25,0x17,0x8e,0x10,0xef,0x9f,0xed,0x33,0x83,0x6d,0x9e,0x87,0x82,0xb8 +.byte 0xa9,0x6b,0xcb,0xe5,0x04,0xfb,0x87,0x51,0x05,0x1a,0x64,0x64,0x51,0x34,0xa3,0x61,0x4a,0xe3,0xa6,0x35,0xa5,0xc9,0xe3,0xde,0xb0,0xcf,0x5f,0x68,0x49,0xbc,0x98,0xf9,0x0b,0x82,0xde,0xb1,0xf9,0x77,0x16,0x7c,0x1f,0x80,0x0c,0xfc,0xbb,0x6d,0x8e,0x92,0x93,0x00,0xc2,0xa5,0xbe,0xde,0x55,0x09,0x9d,0x83,0xa5,0x6c,0x0a,0xb5,0xc4,0x53 +.byte 0xde,0xbc,0x07,0xca,0x0f,0x43,0xea,0x50,0x25,0xee,0x51,0x3b,0xfb,0x7a,0xcf,0x31,0x8a,0x19,0x1c,0xa2,0x2d,0x72,0x79,0x81,0xc6,0xb8,0xe6,0xe1,0xd8,0x3e,0x0f,0xc0,0xae,0x73,0x40,0x30,0x15,0xaa,0xe3,0x72,0xc3,0x36,0xc1,0x42,0x11,0xc5,0x3f,0xf5,0x69,0x78,0xea,0x95,0x54,0x36,0xe8,0x7e,0x9c,0xad,0xbd,0xcd,0x19,0xfe,0x4a,0x04 +.byte 0xb4,0x54,0x14,0x98,0x58,0x6f,0x06,0x8f,0x8c,0x95,0xa8,0xc9,0xe8,0xc4,0x2b,0x03,0xaa,0x42,0x75,0x74,0xa2,0x63,0xdb,0xca,0xd1,0xf0,0x60,0xc3,0x63,0x84,0xfb,0xd7,0x5a,0x7b,0xca,0x45,0x8d,0x14,0xdc,0xf8,0x71,0x40,0x71,0xbb,0xa1,0x1a,0xd3,0x8c,0xfb,0xf6,0xf7,0xfc,0x82,0x72,0x50,0xc9,0xe3,0xc5,0xe2,0xb1,0x57,0xb1,0x24,0x3e +.byte 0x11,0x4d,0x96,0x1c,0x3a,0xe1,0xb6,0xb7,0x0e,0x55,0x35,0x6c,0xd8,0x2b,0xe3,0x78,0xcd,0xac,0x8f,0x24,0x70,0xc6,0x35,0x5b,0x6e,0x75,0x7a,0xf1,0x7d,0x87,0x53,0xcf,0x0a,0x24,0xb6,0x6a,0xfd,0xef,0x90,0x07,0xcf,0xde,0x30,0xbc,0x8c,0xec,0xda,0x6f,0x45,0xad,0x92,0xb6,0x8d,0x6b,0xb8,0x8e,0xdc,0xe5,0xbf,0x57,0x67,0x5e,0x2f,0x4d +.byte 0x5d,0xee,0x38,0x0a,0xaf,0xeb,0x62,0x84,0x2b,0x4c,0x30,0x7b,0x91,0x99,0x40,0x6f,0x09,0x2b,0x36,0xcd,0x04,0xeb,0x7c,0x8d,0xa5,0xbd,0xd6,0xb0,0xfc,0x27,0xcf,0x6b,0xdd,0xe1,0x94,0xbc,0x21,0xc6,0xc9,0x55,0x24,0xd4,0xa1,0x6f,0x1e,0xa2,0x81,0x31,0x22,0xb7,0x75,0x9e,0xa7,0x01,0x26,0x01,0x6c,0x12,0x91,0x02,0x87,0x40,0x5c,0x91 +.byte 0x1f,0x0c,0x55,0x07,0x12,0xa7,0x48,0xdd,0xed,0xb6,0xfe,0x38,0x05,0xbc,0xe1,0x2e,0x3b,0x89,0x4f,0x98,0x65,0x22,0x93,0xda,0x09,0x9f,0x04,0x90,0x66,0x81,0xd1,0x56,0x27,0x8b,0x26,0x99,0xbe,0x93,0x08,0xf1,0xfb,0x80,0x5b,0xaa,0xc4,0x96,0x88,0x93,0xb6,0x01,0xae,0xf6,0x69,0xaa,0x6f,0x4d,0xde,0x2f,0xc7,0x24,0xbf,0xe9,0xb8,0xeb +.byte 0xcd,0xb2,0x0a,0x50,0x5c,0xd2,0x0b,0xfc,0x57,0x3b,0x96,0xf8,0xd9,0xbe,0xd2,0xb5,0x16,0xac,0x7c,0xe4,0x2f,0x46,0x93,0x86,0x48,0x91,0xfa,0xae,0xca,0x05,0x9e,0xfe,0x6e,0xae,0xa5,0x58,0x94,0xc0,0x58,0x1e,0xc5,0x69,0x28,0xe0,0x99,0x12,0x83,0xcf,0x35,0xe4,0x72,0x7d,0x4e,0x8b,0x66,0x56,0xb3,0xa6,0x2a,0x72,0x06,0x03,0x45,0xd1 +.byte 0x95,0xc9,0x93,0xb7,0xf4,0x8a,0x83,0xce,0x17,0x8b,0xf0,0x8e,0x8f,0x4a,0x68,0x55,0xd8,0xfc,0x54,0x8d,0xb5,0x62,0x17,0xa8,0xe6,0x18,0x03,0x53,0x04,0xb8,0xbe,0xd2,0xd0,0x7a,0x84,0xe1,0x39,0x31,0xc5,0x74,0xf2,0x64,0x1c,0x3b,0xd5,0x52,0x9b,0x81,0x8a,0x8f,0x36,0xc8,0xab,0x3d,0xe1,0xa8,0x2a,0xf2,0x84,0x9a,0xca,0x0c,0xcf,0xc9 +.byte 0x45,0x54,0x06,0xe8,0xd2,0x62,0x61,0x4d,0xeb,0x0b,0x38,0x4e,0x43,0x59,0x85,0x3a,0xe4,0xa3,0x25,0x15,0xc2,0xb5,0x7b,0x5e,0x2f,0xe6,0xc1,0x5d,0x2a,0xb7,0x57,0xb8,0x7e,0x61,0x51,0xc3,0x81,0x53,0x45,0x8a,0x6e,0x4c,0x89,0x84,0x2a,0x6b,0xca,0x15,0xff,0x97,0xfc,0x1f,0x8a,0x44,0xbd,0xcd,0x5e,0x32,0x6b,0x5f,0x78,0x7b,0xdf,0xdd +.byte 0x9d,0x2f,0x21,0xf2,0x14,0x40,0x5f,0x5a,0xd5,0x21,0x27,0x3d,0x0b,0x9f,0x9f,0xb0,0x8e,0xab,0x9e,0x68,0x96,0x02,0xfd,0x4d,0xcc,0x03,0xf0,0x03,0xfb,0x4c,0xac,0xfa,0x00,0x3b,0xea,0x1a,0x53,0x80,0x77,0xec,0x53,0xc3,0x3c,0x6c,0xf8,0xa5,0x3e,0x52,0x34,0xd4,0xa1,0x52,0xb8,0xd6,0x19,0x8c,0xdf,0x85,0x27,0x61,0x22,0xe7,0x43,0xeb +.byte 0x85,0xc0,0xbe,0x58,0xe6,0x60,0x81,0x4c,0xc6,0xbb,0xc0,0xbf,0x63,0x39,0x9d,0xad,0x2e,0xa8,0x2a,0x83,0x3d,0xfa,0xdb,0x0b,0x98,0x16,0x78,0x18,0x43,0xc7,0x17,0x82,0xb8,0xec,0x32,0x45,0x75,0x0c,0xc1,0x4c,0x84,0xbf,0xce,0x83,0x3b,0xb4,0x91,0xf4,0x0d,0x5d,0x83,0xf6,0xd6,0x10,0xab,0xc6,0x26,0x9b,0x68,0x59,0xec,0x48,0x4b,0x1d +.byte 0x35,0x2a,0x5b,0x23,0x83,0x22,0x8e,0x7d,0xfa,0xce,0xde,0xb1,0xd9,0x78,0xf6,0x9e,0x08,0xba,0xfb,0xda,0xf2,0x04,0xc5,0x2a,0xac,0xbf,0xb4,0x04,0x05,0x1f,0x0b,0xeb,0xe8,0x2a,0x3c,0x3f,0x4f,0xb6,0xc8,0x6b,0x97,0x5a,0x9e,0xdb,0x4b,0x3c,0x93,0xc1,0x20,0x1c,0x62,0x91,0x74,0x76,0x49,0x92,0xc2,0xd8,0x0d,0xd8,0xfe,0xb5,0x68,0x77 +.byte 0x48,0x9f,0xbe,0xe0,0x78,0x20,0xe7,0xa4,0x3d,0x3e,0xa1,0x4c,0xc7,0xeb,0xd3,0x30,0xd3,0xf0,0x65,0xcf,0x18,0x3c,0xf8,0x25,0xc2,0x99,0xf4,0xec,0xef,0xdd,0xef,0xf3,0x6b,0x28,0x00,0xaa,0xfd,0x76,0xec,0x19,0x67,0xd6,0x79,0xa6,0x01,0x6e,0x20,0x3a,0x7f,0xd4,0xd0,0x05,0xb4,0xea,0xd4,0xde,0x11,0x06,0x44,0x4a,0x6f,0x15,0x2f,0x62 +.byte 0x9a,0xaa,0xeb,0xaf,0xb5,0xb5,0x46,0xb2,0x28,0x2e,0x74,0x26,0x06,0x91,0xeb,0x15,0xef,0xd4,0xfd,0xc7,0x1b,0x65,0x25,0x01,0x24,0xd2,0x44,0x05,0x18,0x1c,0x71,0x36,0x58,0xc4,0x37,0xfe,0x22,0x29,0xc0,0x2f,0xd2,0x4e,0xeb,0x43,0xb9,0xf9,0x4e,0x87,0xd7,0x92,0x77,0xa8,0x4f,0xa5,0x6e,0x5c,0x4d,0x3a,0xe9,0x16,0x62,0x30,0x51,0xbb +.byte 0x32,0xd8,0x0d,0x86,0x20,0xbf,0x68,0x0f,0x3e,0xef,0x8b,0x0d,0xc5,0xa6,0x94,0x81,0xe9,0x6f,0x85,0xf5,0x22,0x6e,0x9e,0x0a,0x56,0xa3,0x43,0x79,0x50,0xd9,0x45,0x5f,0x5a,0x3f,0x53,0x53,0xb7,0xfe,0xb6,0x1c,0x63,0xab,0x7c,0xed,0x2f,0xc4,0x2b,0xa8,0x53,0xfb,0xad,0x46,0xf0,0x63,0xca,0x7a,0x6e,0xce,0xf4,0xb9,0x34,0xd0,0x9a,0xc8 +.byte 0x0d,0xd2,0x32,0xce,0x26,0x3f,0xcd,0xd9,0xbc,0xa9,0x46,0x65,0x45,0xfe,0x45,0xeb,0x0d,0xab,0xe6,0x31,0xb6,0xb9,0x41,0x53,0x7d,0x55,0xc3,0xfb,0x10,0x46,0x37,0x77,0x1f,0x15,0xf0,0x5f,0xcb,0x8f,0xea,0xc5,0xc0,0xb8,0xc6,0xb1,0x3a,0x06,0x42,0xec,0x38,0xec,0x06,0xd1,0x37,0x3b,0xe1,0x8d,0xad,0xc2,0xce,0x96,0x0b,0xf0,0xab,0xde +.byte 0x9c,0x3c,0x09,0xef,0x59,0xcd,0x67,0xa7,0x6e,0x0e,0xc7,0xee,0x51,0x6d,0x90,0x40,0x0e,0xdf,0xb1,0x13,0xe3,0x0c,0xb6,0xe8,0xcb,0xf5,0x57,0x50,0xeb,0xdf,0x09,0x45,0x72,0x40,0xff,0xdc,0x5c,0x51,0x42,0x47,0xb2,0x9e,0xca,0xf3,0x1b,0x06,0xb1,0x3e,0x04,0x55,0x96,0x63,0x24,0x16,0xdb,0x3e,0xab,0x98,0x33,0x70,0x6f,0xfd,0x8f,0x7b +.byte 0x56,0xb0,0x7f,0x28,0x26,0xc4,0x2a,0x9e,0xf5,0xa7,0xba,0x61,0x75,0xa4,0xb1,0x25,0x60,0xe5,0x9c,0x7e,0xb4,0xaa,0x04,0xa1,0x33,0x5a,0x8d,0x88,0x1d,0xc4,0x38,0x58,0x28,0x23,0xc7,0xac,0x20,0xf8,0xaa,0x18,0xf8,0xc7,0x27,0x05,0x07,0xf7,0x12,0xfe,0xe1,0xa5,0x99,0xaa,0x55,0x79,0x72,0xc4,0x14,0x08,0x14,0x4a,0xfb,0xf7,0x66,0x81 +.byte 0x6e,0xed,0x81,0x12,0x5f,0xb6,0x08,0x00,0x37,0xf9,0xdc,0xdf,0x4d,0xcb,0xfa,0xc6,0xf3,0xc2,0x17,0x17,0x52,0x39,0x7b,0xa0,0x3e,0x25,0xc9,0x48,0xd8,0xa6,0x1b,0x8b,0xdb,0xf8,0x74,0xac,0x6b,0x16,0xec,0xa6,0x4a,0x1e,0x7e,0x5c,0x50,0xbf,0x81,0xef,0x3c,0x7d,0x9d,0x21,0x38,0xa9,0x26,0x3c,0x30,0x7a,0xfb,0xab,0xd8,0x6a,0x0a,0xaa +.byte 0xbb,0x6e,0x91,0x92,0x7c,0x04,0x02,0x0e,0xa2,0x71,0xc7,0xde,0x7d,0x42,0xaf,0xe5,0x92,0xc1,0xb9,0xd7,0x52,0xaa,0x32,0xea,0x39,0x84,0x17,0x40,0xb0,0x83,0x18,0xff,0x46,0xb8,0x59,0xd9,0xa3,0xce,0x82,0x7e,0x65,0x54,0xe0,0xa4,0x6d,0x8a,0xbc,0x6a,0x65,0xb2,0xd5,0x96,0x5b,0x1c,0x9a,0x32,0x72,0xf7,0x81,0x57,0xcd,0xb3,0x22,0xc5 +.byte 0x7d,0x20,0x24,0xea,0xbe,0x51,0x4c,0xb3,0x48,0x36,0x4f,0x73,0xf4,0x3f,0x07,0x92,0x01,0xe2,0x1e,0x78,0x3f,0x8e,0x1f,0x35,0x1a,0xf1,0xe1,0x14,0xd1,0xe7,0xd9,0xfd,0xd8,0xf7,0x20,0xc2,0xf3,0x7a,0x59,0xc9,0x1d,0x13,0x41,0x01,0xf6,0x77,0x69,0xfb,0x0f,0xc7,0xe4,0x58,0x04,0xce,0xe8,0x73,0x87,0x2f,0xef,0xe6,0x36,0x38,0xc7,0x91 +.byte 0x2d,0x17,0xb5,0x56,0x68,0xb1,0x9f,0xbf,0x2e,0x4b,0xe7,0x09,0x7b,0x35,0x33,0x5a,0x6c,0xc1,0x6f,0xb3,0xac,0x6c,0x1e,0xfe,0xc0,0xc9,0xd8,0x77,0xf5,0xcb,0x5e,0xcc,0xd1,0x2f,0xdd,0x23,0x8b,0x3b,0xb5,0x43,0x96,0x1f,0xa9,0xe4,0x84,0x41,0x92,0xe9,0x68,0x47,0x50,0xf7,0xd4,0x85,0x22,0xa1,0x43,0xaa,0xde,0xf7,0xea,0xe0,0x54,0xaa +.byte 0x0d,0xe6,0xa5,0xb8,0x7e,0xec,0x13,0x9a,0x1e,0x6c,0x10,0x9d,0xa8,0xfb,0x97,0xde,0x24,0xda,0x33,0xbb,0xab,0x17,0x7a,0xb4,0x72,0xaf,0xed,0xc9,0xa4,0x62,0x65,0x0c,0x99,0x3d,0x74,0x7f,0xff,0x59,0xa9,0x8e,0x37,0xb9,0x10,0x30,0x26,0x3f,0x2f,0xfc,0x1e,0xe2,0xc6,0xb8,0xff,0x41,0xb3,0x35,0x3f,0x41,0xf4,0x47,0xbc,0x76,0xc6,0x77 +.byte 0x0f,0xf8,0xff,0xb8,0xd2,0x34,0x40,0xac,0x43,0xcb,0xcf,0x1f,0x57,0xaa,0x1a,0xa7,0xe1,0x4a,0x69,0xd7,0x05,0xa7,0x9d,0xff,0x13,0x43,0x91,0xe3,0x09,0x1c,0xb2,0xb2,0x82,0x06,0xa3,0x3c,0x35,0x85,0x9e,0xd0,0xcf,0x1c,0xb9,0x13,0x09,0x7d,0x3d,0x17,0x0f,0xf8,0x2f,0x61,0x97,0x7e,0x02,0xe0,0x78,0x07,0x69,0x8c,0x91,0xbe,0x96,0x92 +.byte 0x4a,0x03,0xa7,0x31,0x5f,0x6c,0xfe,0x55,0xb2,0x17,0xe8,0x4c,0x64,0x48,0x18,0xde,0x4f,0x5a,0xce,0xd2,0xcb,0x83,0x4d,0x1b,0x2a,0x1f,0xce,0x85,0xf7,0xdc,0x74,0x8c,0x42,0xc6,0x5a,0x3a,0x51,0x22,0x79,0x70,0xa0,0xe0,0x29,0x2a,0x73,0xe4,0x53,0xb4,0x47,0x5f,0x54,0xa8,0x65,0xe4,0x89,0x78,0xf9,0xb9,0x5f,0x5f,0x9d,0xa8,0xf7,0x82 +.byte 0x4e,0x34,0x60,0xfc,0xe3,0x88,0x65,0x73,0x99,0x1f,0x53,0xed,0xe8,0xf0,0xf4,0x5a,0x0a,0x49,0x42,0x6e,0x02,0x3f,0xa8,0x63,0x21,0x02,0x2e,0x8f,0x33,0xba,0x0e,0x10,0xd3,0x4c,0x1a,0x8b,0xf5,0x84,0x8e,0x2b,0x37,0x12,0x23,0x77,0x02,0x45,0xc7,0xc3,0x79,0x06,0xc2,0x8c,0xaa,0x32,0x53,0x7c,0x19,0xa2,0x92,0x7e,0x47,0x40,0x8f,0xae +.byte 0x8a,0x64,0x51,0x67,0xe1,0xc1,0xc3,0xd2,0x14,0x1d,0x63,0x0c,0x80,0x04,0x30,0x3d,0xee,0x58,0x44,0xe4,0x14,0x63,0xfc,0x95,0x05,0x3e,0xc1,0x8d,0xd3,0xcb,0x5d,0xc1,0x8e,0xf9,0xd7,0xe5,0x9d,0x97,0xef,0x8a,0xaa,0x50,0x31,0xa3,0x01,0x3a,0xb2,0x8d,0x63,0xb6,0xe7,0x34,0xec,0xa1,0x7a,0xff,0x57,0x95,0xbb,0x1d,0xbe,0x0c,0xa5,0x91 +.byte 0x92,0x08,0x06,0x1c,0x67,0x03,0x2e,0xee,0xf6,0x6f,0xa0,0xb7,0x9a,0x7c,0xe3,0x6a,0x8e,0xd8,0x50,0xc1,0xd6,0xa1,0x8d,0xe9,0x66,0x9a,0x1f,0x62,0x15,0x04,0x93,0x74,0xe8,0x04,0x0d,0x27,0x55,0x2b,0x07,0xb1,0xbd,0x69,0xe4,0xc1,0x34,0x8e,0xe7,0xfb,0xa0,0x3f,0x40,0x31,0x47,0xba,0xcb,0x80,0x88,0xf7,0x4f,0x46,0x05,0x31,0xaf,0x23 +.byte 0xdf,0x93,0x09,0x0a,0x15,0xc9,0x95,0x74,0x52,0x72,0xf4,0xbf,0x0d,0x07,0xb6,0xcc,0x4b,0x40,0x12,0xf3,0x87,0xea,0x29,0xd8,0x29,0x31,0x23,0xac,0x29,0x1a,0x89,0x83,0x5b,0x33,0x4b,0x6b,0x69,0xbe,0xb6,0x15,0x7e,0xfd,0xf2,0x95,0xc4,0xbe,0xeb,0xee,0x59,0x01,0x2a,0xce,0xca,0x80,0xda,0xf8,0x1a,0x01,0x23,0xf7,0xa1,0x4f,0xf5,0x83 +.byte 0x5e,0x16,0xd9,0x12,0xa9,0x4e,0xcb,0x59,0x23,0x4f,0x40,0xd7,0xbf,0xaf,0x76,0xf0,0x50,0x31,0x27,0x3a,0x8b,0x1d,0x9b,0xb1,0x1c,0x41,0xb0,0xed,0xe6,0xf3,0xa8,0x5f,0x6b,0x58,0x54,0x92,0xaf,0xcc,0x44,0x5c,0xea,0xdb,0x09,0xc5,0x26,0x5e,0xbe,0x46,0xbd,0x72,0x49,0x5a,0x4e,0x65,0x7e,0x75,0xcf,0xfc,0xf6,0xd0,0x3c,0x4a,0x7e,0xd6 +.byte 0x8e,0x8e,0xb4,0x19,0x45,0x75,0xbf,0xc3,0x5e,0x46,0xff,0xc9,0x46,0x65,0x8d,0x31,0x01,0x5e,0x1c,0x13,0x93,0x56,0x6f,0x28,0xec,0xf3,0x77,0xfa,0x6e,0xb9,0x0e,0xb6,0x8e,0x0e,0x38,0xf8,0x28,0x64,0xa2,0xa1,0x42,0x9a,0xb4,0xf3,0x14,0x8d,0x17,0x80,0x05,0x82,0x7c,0xf1,0xea,0x8b,0x4b,0x62,0xa0,0xde,0xf6,0xd7,0x36,0xb0,0x70,0x8d +.byte 0x03,0xf6,0xc8,0x2a,0x9e,0xc0,0xbb,0x2f,0xcb,0xef,0x35,0xf7,0x16,0xcd,0xd6,0xd6,0x90,0xd7,0x5d,0x61,0x00,0x33,0x9f,0xd8,0xd1,0xda,0x17,0x67,0x90,0xd1,0xf8,0x59,0xcb,0xf1,0x76,0xc2,0xbe,0x1f,0x5d,0x0d,0xb2,0x02,0xbd,0x19,0x9f,0x5a,0xa0,0x91,0xac,0x51,0xb5,0xf5,0x0a,0x64,0x67,0xf2,0x49,0x30,0x6c,0x57,0x83,0xda,0x90,0xf1 +.byte 0xc6,0xc7,0xe6,0x05,0x13,0x30,0x52,0xfd,0x2a,0x47,0xea,0xae,0xd3,0xed,0xe4,0x64,0x1f,0x6c,0xb1,0xdf,0xca,0x20,0x97,0x2a,0xc8,0xdc,0x00,0x0e,0x5b,0x59,0xc8,0x16,0x95,0x68,0x9a,0x2e,0x44,0xab,0xf6,0x93,0x7c,0x8f,0x66,0x4f,0x07,0x42,0x3f,0xa5,0x81,0xe7,0xab,0x59,0xbb,0xae,0xb1,0x3e,0x9a,0x25,0xf1,0xde,0xac,0x4c,0x1d,0x7a +.byte 0x54,0xb9,0xa9,0x59,0xaf,0xb0,0xab,0xaf,0x6b,0x76,0x66,0x1e,0xbe,0x1a,0xc1,0x61,0x1b,0x81,0x6b,0xe8,0xe4,0x73,0x6a,0x87,0xe9,0x39,0xcb,0x2c,0xab,0x64,0x36,0x9a,0x11,0x46,0xec,0x9f,0x30,0xb6,0x2c,0x14,0xe0,0xec,0xbe,0x33,0xde,0x60,0xc6,0x00,0x29,0x3c,0x55,0xda,0xfc,0x64,0xff,0xaa,0xbf,0x99,0x58,0xe2,0xe3,0xec,0xde,0xca +.byte 0xd1,0x3d,0xd2,0xad,0xaa,0xca,0x36,0x8f,0x93,0xa2,0xdd,0xde,0xaa,0x49,0x7f,0xdd,0x39,0x91,0xa0,0x7b,0x33,0xdf,0x36,0xcd,0xc3,0x3a,0xbc,0x53,0xf0,0x07,0x99,0x78,0x4e,0x63,0x47,0x79,0xbf,0x21,0xfc,0x05,0x47,0x69,0xec,0xee,0xf4,0x21,0x97,0x94,0x0c,0x7a,0x9f,0xa6,0xeb,0x5b,0x23,0xed,0x9d,0xc1,0xe1,0x5e,0x10,0xca,0xe0,0x84 +.byte 0x5a,0xdd,0xf6,0xae,0xd8,0x23,0x98,0xea,0x6c,0x43,0x77,0x41,0xf3,0x84,0x5a,0xe8,0xda,0xb3,0x11,0x0e,0x19,0x33,0xe9,0xf9,0x7a,0x90,0x07,0x68,0xf1,0xe4,0x52,0x0c,0x03,0x67,0xb9,0x42,0x41,0x24,0xa3,0x61,0x67,0x75,0xc9,0xb5,0xdd,0x10,0xf1,0x20,0x93,0x54,0xdb,0x0d,0xc7,0x0d,0x25,0x3e,0xda,0xb3,0xe7,0xce,0x97,0x7e,0xdb,0x1a +.byte 0x8f,0x92,0xff,0xe3,0x44,0x2d,0x6b,0xdb,0xe0,0x69,0x8b,0x16,0xce,0xe8,0xc7,0x93,0xf1,0x19,0xb9,0xd3,0x41,0x45,0x8d,0x95,0xb3,0x03,0xb2,0x66,0x96,0x95,0x91,0x33,0x1c,0xee,0xde,0xd7,0x9d,0xab,0x32,0x2f,0xb8,0x3c,0x7a,0x44,0x8f,0xa6,0xca,0x02,0x03,0x2f,0xa8,0x44,0x85,0x0e,0xf5,0x27,0x90,0x84,0xd9,0x80,0x06,0xf4,0x4f,0xc7 +.byte 0x21,0xc5,0x92,0xa4,0x2d,0x08,0x42,0x4c,0xa7,0x84,0xfa,0x7e,0x2b,0x66,0xfb,0x7c,0x81,0xea,0x5c,0x7d,0xdd,0x86,0xf1,0xf5,0x04,0xef,0xf2,0x50,0x12,0x72,0x42,0x22,0x23,0x74,0x7f,0xe7,0xed,0xd9,0xce,0x78,0x10,0x83,0x37,0xd0,0x81,0x97,0x4a,0xac,0xc2,0xe5,0x13,0x91,0x83,0xe2,0x6e,0xff,0x5a,0x0b,0xc3,0x4d,0xc1,0x3e,0x97,0x16 +.byte 0x96,0x69,0x39,0x9e,0x1d,0x6b,0x16,0x82,0xa2,0x94,0x0d,0x50,0xdd,0xa3,0xda,0x9d,0xda,0x3f,0x46,0xce,0x6c,0xd0,0xdf,0x6e,0x1b,0x17,0x47,0x51,0x74,0x6f,0xe9,0xa4,0x6b,0xae,0xd2,0x6e,0x5b,0xc0,0x26,0xc6,0x0b,0x84,0xb1,0x39,0xcf,0x9e,0x7c,0x18,0x52,0xd7,0x8f,0x33,0xae,0x3d,0xaf,0x3d,0x1a,0xba,0x3f,0x09,0x76,0x22,0x1d,0xf3 +.byte 0x42,0x14,0x4f,0x06,0xc7,0x33,0xc1,0x2d,0x58,0x1b,0x4c,0xc0,0x3a,0x29,0xa6,0x5e,0x19,0x26,0xdf,0x36,0x18,0xa9,0xc5,0xe9,0xd3,0xb1,0xae,0x86,0xa8,0x7f,0xd9,0xb4,0x18,0xef,0x9c,0x46,0xb6,0xf2,0xb2,0xb6,0x6e,0xe2,0xf8,0x5f,0x27,0xea,0x76,0xd3,0x40,0x68,0x94,0x66,0x8a,0xf5,0x9f,0xee,0x0c,0xe5,0xae,0xb6,0xba,0x87,0x42,0x40 +.byte 0xc9,0x83,0xac,0xb4,0x2c,0xec,0x74,0xb7,0x55,0x17,0x0b,0x1e,0x45,0x1a,0x87,0x9d,0x52,0xce,0xb7,0x58,0x2f,0x45,0xc7,0x7d,0xf3,0xd3,0x11,0x2e,0xf4,0xd8,0xc0,0xb8,0xc3,0x31,0x45,0x68,0x40,0xe8,0x8a,0x33,0x20,0x9a,0x06,0xa8,0x18,0x53,0xb2,0x73,0xa1,0x57,0xac,0x8f,0x56,0xeb,0x8e,0xa4,0xfc,0xd6,0x76,0x7e,0x81,0x62,0x2c,0x17 +.byte 0x49,0xb4,0xcc,0x15,0x66,0xcb,0xa2,0x3c,0x29,0xf0,0x73,0x0e,0x9a,0x34,0x16,0x6d,0x43,0x62,0x20,0x89,0x14,0xae,0x8b,0x5d,0x61,0x54,0xa1,0x82,0x49,0x73,0xb9,0x2b,0x48,0xd4,0xe3,0x21,0x37,0x5e,0x4d,0xbf,0xd0,0x72,0xa4,0x23,0xdb,0x7c,0xd9,0x45,0x77,0x8a,0x24,0x23,0x56,0xcd,0x84,0x80,0x44,0x12,0xce,0x99,0x39,0xbd,0x77,0xff +.byte 0x8c,0x62,0x8d,0x56,0x77,0x24,0x40,0x11,0x22,0xab,0x28,0xd6,0x75,0x2b,0xbb,0xc1,0x51,0xd6,0x5e,0x61,0x1c,0xe9,0xac,0x36,0x99,0x52,0x44,0xa5,0x20,0xdb,0xe0,0x12,0x9a,0x45,0x8f,0x7f,0x47,0xf9,0xa3,0x91,0x18,0x2b,0x51,0x9a,0x9f,0x3f,0x7d,0x36,0xde,0x71,0xae,0xca,0x62,0x62,0x16,0xda,0x19,0x9c,0x84,0xce,0xde,0x93,0x22,0xde +.byte 0xaf,0xe7,0x91,0x09,0xe8,0xf0,0x0e,0x07,0x71,0xdf,0x48,0xcd,0x8a,0x77,0x19,0x3c,0xd6,0xef,0x8e,0xe0,0x49,0xdf,0xcb,0xd6,0x34,0x78,0x7f,0x42,0xc2,0x6e,0x7a,0x50,0x53,0xee,0xbf,0x73,0x4b,0xd4,0x4f,0x06,0x18,0x26,0x67,0x51,0x54,0xa3,0x40,0xe6,0xb3,0x61,0x4b,0xfd,0xee,0x62,0x00,0x44,0x6c,0x0d,0x8b,0x2f,0x4d,0x06,0x17,0x41 +.byte 0xee,0x8b,0xde,0x1f,0x80,0x36,0x58,0x3e,0x0a,0x53,0x0a,0x83,0xf9,0xba,0xbd,0x91,0x6a,0x20,0x32,0x42,0x6c,0x85,0xdc,0x84,0xfd,0xce,0x57,0xbe,0xf8,0xa5,0x2c,0x7e,0xf9,0x1b,0x07,0xf4,0x32,0x13,0x32,0x79,0xdc,0x91,0xfc,0xc0,0x18,0xe6,0x1e,0xb2,0x67,0x9d,0x08,0xd2,0x89,0xa2,0xb1,0xbf,0x37,0xe1,0x3f,0x9e,0xb5,0x17,0xf7,0x2f +.byte 0x9a,0x4f,0x3c,0xea,0x5d,0x48,0x56,0x48,0x35,0x17,0xe9,0x5a,0x99,0xa7,0x2e,0x25,0x4f,0x96,0xa6,0x3d,0x3c,0xf8,0xdc,0xe7,0xe5,0x98,0x46,0xf7,0x10,0x16,0x4f,0xb0,0x7b,0x48,0x06,0xbb,0x9a,0x5a,0xad,0x32,0x49,0x92,0x39,0xb2,0xfe,0x01,0x1a,0x5e,0xcc,0xf7,0x0d,0x65,0x1c,0xf5,0x3d,0xb3,0x40,0x28,0x06,0x6e,0xbb,0x74,0x2a,0x95 +.byte 0xe9,0x62,0x2a,0xe2,0x19,0x38,0xc6,0x0d,0x46,0x30,0x6d,0x90,0xa5,0x68,0x4d,0x89,0xf0,0xf4,0xaf,0x52,0x11,0x8a,0x47,0x65,0xc0,0x6d,0xee,0xde,0xbc,0xed,0xf2,0x94,0xf3,0xfb,0xfd,0x2f,0xea,0xd5,0x36,0x89,0x8a,0x22,0xb8,0x75,0x3c,0xda,0x8d,0x3f,0x71,0xe5,0x50,0xb8,0xef,0xfc,0xa1,0x34,0x4a,0xb0,0x56,0x64,0xaf,0x28,0x0c,0x7a +.byte 0x28,0x3e,0xc8,0x83,0xc2,0xbb,0x89,0xc4,0x29,0x7f,0xc9,0xe7,0x4e,0xcb,0xdc,0x8f,0xe8,0xa4,0xdc,0x0d,0xcc,0xa0,0x16,0xda,0xa9,0x34,0x61,0xec,0x64,0xa7,0xf4,0x47,0xe9,0xee,0xbf,0xc6,0x4b,0xc5,0x01,0x65,0xe4,0xe0,0x12,0xd6,0x27,0xda,0x30,0xb5,0x60,0x72,0xe1,0xee,0x38,0x23,0x6c,0x9d,0xbb,0x83,0x01,0x4b,0x26,0x9a,0x68,0xb3 +.byte 0x89,0xb3,0xe0,0x10,0x22,0x58,0xef,0x2d,0xd4,0x86,0xab,0xab,0xc4,0xd8,0x9c,0x56,0xe8,0x54,0x40,0x86,0x11,0xd2,0x6b,0xc0,0xaf,0xfc,0x4a,0xef,0x24,0x38,0x79,0x32,0x54,0x26,0x8b,0x7e,0x02,0xad,0x86,0x9d,0x40,0x65,0x28,0x28,0xa3,0xa6,0xe4,0x07,0x29,0x3a,0xbb,0x81,0xed,0x17,0x54,0x51,0x35,0xc6,0x88,0x9c,0x63,0x7e,0x73,0x02 +.byte 0x28,0x13,0x4b,0x33,0xc0,0x68,0xbc,0xae,0x8c,0x59,0xd4,0x84,0x1d,0x41,0x86,0x5a,0xf6,0x14,0x50,0x13,0x88,0xca,0xc8,0xb8,0xfc,0x61,0xeb,0xe6,0x69,0x70,0x4a,0xa5,0xa5,0x36,0x4b,0xac,0xca,0x00,0x28,0xae,0xb0,0x03,0xef,0xe3,0x92,0xad,0x97,0x32,0x05,0x8c,0x93,0x95,0x45,0xd5,0x75,0x66,0x11,0xd3,0x6f,0x7f,0x5f,0x35,0x44,0xb7 +.byte 0xd7,0x34,0xcf,0x8c,0x4a,0x61,0x68,0x63,0x3f,0x92,0x54,0x01,0x3c,0x25,0x2d,0x6f,0x4a,0x2d,0x55,0xff,0x3f,0x86,0x85,0x9f,0xc2,0xa1,0xde,0x6b,0xbf,0x7e,0xb4,0x7c,0xc1,0x80,0x73,0xf5,0x3b,0x85,0xae,0x36,0x1a,0xdf,0x00,0x52,0xb7,0x70,0xa9,0x42,0x79,0xd2,0x26,0xf8,0x3b,0xeb,0x9f,0x2e,0x15,0x33,0xc8,0x85,0x2d,0x63,0xb2,0x89 +.byte 0x24,0x8e,0xfd,0xe6,0xdf,0x01,0x80,0x8b,0x27,0xe3,0x7e,0x17,0xc2,0x4e,0x26,0xa2,0xe1,0x95,0x81,0x3a,0xdd,0x2a,0xf4,0x75,0x21,0x64,0x11,0x04,0x5e,0x00,0x39,0xf0,0x08,0x68,0x67,0x09,0xa8,0x9b,0xbe,0xb7,0x62,0x0e,0xa8,0x69,0xcd,0x4e,0xaf,0xc8,0x4f,0x92,0x3d,0x8e,0x35,0x60,0x70,0xb3,0xda,0x2f,0x38,0x80,0x6f,0x5e,0xcc,0x3b +.byte 0x6e,0x05,0x26,0x14,0x9d,0x36,0x72,0x7d,0x09,0xb8,0xb7,0xa1,0xf7,0x5f,0xb3,0xe1,0xd6,0xc5,0x54,0x4e,0x80,0x4d,0x06,0x8f,0x84,0xbb,0xb6,0x65,0x87,0x2c,0x19,0x4a,0x74,0x3c,0x34,0x62,0x32,0xad,0x4c,0x06,0xa3,0xbb,0xfb,0x4f,0x4f,0x9d,0x91,0x84,0x63,0x75,0x34,0xcc,0x6b,0x00,0xa1,0x5a,0x63,0x03,0x8d,0x1e,0xdb,0xa4,0x0c,0xe6 +.byte 0x3d,0xd1,0x94,0x77,0xd8,0x77,0x8c,0x39,0x48,0x78,0xb1,0xb5,0xa2,0x41,0xd0,0x6d,0x27,0x20,0x4a,0x41,0x88,0xa5,0x78,0x3f,0x51,0x72,0x8c,0x80,0xe7,0x37,0x81,0x8b,0x06,0x46,0x58,0xab,0x23,0x85,0x47,0x89,0x39,0xf9,0x14,0xfe,0xbf,0x07,0x7c,0x47,0x8e,0xcc,0xd7,0x08,0xfe,0x5d,0xee,0xf9,0x94,0xa2,0x83,0x81,0x8a,0xfd,0x0f,0x9a +.byte 0xa7,0xe4,0x59,0xad,0xe6,0x1f,0xed,0x5d,0xe4,0x20,0xd6,0x2f,0xa7,0xd3,0xcf,0x5b,0x18,0x6d,0x24,0x79,0x66,0xd9,0xaa,0x44,0xfa,0x8d,0x74,0x60,0xcc,0x7e,0xbf,0x4f,0x0e,0xe3,0x9c,0xa5,0xe4,0xff,0x14,0x05,0xff,0x24,0x62,0x94,0x00,0x7a,0x58,0xe5,0x0b,0x3b,0xe8,0xee,0xe1,0x4d,0x4e,0x34,0x26,0xba,0x70,0x10,0x5e,0x14,0x4f,0xa5 +.byte 0x7a,0x9e,0x7b,0x28,0x99,0xbe,0x94,0x4a,0xcb,0x8d,0x65,0x60,0xa0,0x6e,0xc7,0xbc,0x51,0xba,0xb5,0x07,0x97,0x25,0x42,0xb7,0x2c,0x0e,0x9b,0xfc,0xfb,0x35,0x6f,0x74,0x10,0xce,0x25,0xdb,0xa9,0x7c,0x11,0x61,0x43,0xf9,0x19,0xbf,0xe2,0x21,0xa3,0x57,0x3c,0x41,0x0a,0x15,0x4e,0x7f,0x6b,0x38,0xb6,0x73,0x41,0xa2,0x4e,0x8e,0xb9,0x44 +.byte 0xee,0x2a,0x2e,0x0a,0x9e,0x85,0xf1,0x6e,0x93,0x72,0x42,0x50,0x55,0xe1,0xc6,0x18,0x11,0x92,0xf7,0xbf,0x05,0xd8,0xb6,0xbc,0x2b,0xd5,0xe0,0xd3,0x9b,0x64,0xc4,0xdd,0xb0,0xb3,0x46,0xd8,0xfb,0x73,0xea,0xed,0x06,0x96,0x16,0x9e,0xf6,0xc6,0xe8,0xbe,0xae,0x00,0x2f,0x5a,0xf4,0x1f,0xb5,0x28,0x7c,0x75,0x76,0x68,0x74,0xa2,0x57,0x0e +.byte 0x6c,0xfa,0x2d,0xbe,0x34,0xf1,0xc9,0x2b,0x83,0x58,0xe7,0x2a,0x87,0xdb,0x47,0xae,0xc7,0xc2,0x78,0x50,0xed,0x20,0xdf,0x30,0x38,0xdd,0x84,0xa9,0x6b,0x00,0xb1,0x7b,0xbb,0x69,0xd3,0xbe,0xed,0x3d,0x99,0x6e,0x39,0x42,0x75,0x8a,0x6c,0x7c,0xa5,0xcf,0xc9,0xcf,0x11,0x14,0xb3,0xaf,0x72,0x00,0x3b,0x58,0xdd,0x2a,0xe1,0x44,0xa7,0x51 +.byte 0x15,0x05,0x1b,0x18,0x49,0x07,0x90,0x4c,0xbc,0x99,0x88,0x64,0xf6,0x14,0x0b,0x99,0xc0,0x84,0xc9,0x06,0x32,0xf0,0xec,0x19,0x8d,0x4a,0xb8,0xdb,0x32,0xb4,0x5e,0xc9,0x0c,0x24,0xf0,0xad,0xdc,0xf4,0x32,0x3b,0xf6,0x68,0x28,0x4a,0xa5,0x5b,0xb7,0xd5,0x00,0x35,0xf8,0x56,0x03,0xa3,0x86,0xa0,0x8a,0x1b,0x53,0xb5,0x58,0x73,0x8c,0xf9 +.byte 0x2b,0xd8,0xcb,0x88,0xe7,0x7e,0x79,0x68,0x13,0x5d,0x7d,0x23,0xc4,0xec,0x9c,0xf4,0x95,0x97,0xbf,0xb2,0xd9,0xdf,0x38,0xe8,0xa2,0x79,0xf7,0xe8,0x36,0x80,0x59,0x3f,0x58,0x2f,0xf7,0xf9,0x32,0x73,0xdd,0xd6,0x9e,0x20,0x1a,0x29,0xab,0xc1,0x77,0x14,0x71,0x3c,0xde,0x90,0xe9,0xea,0xdb,0x78,0x14,0xa3,0x89,0x43,0xf1,0x42,0x43,0x3f +.byte 0xe7,0x67,0x32,0x3d,0x65,0xdc,0xa4,0x79,0x8f,0x81,0xa5,0xb0,0x94,0x0f,0x96,0xf5,0x82,0xcc,0x47,0xc1,0x29,0x39,0x70,0x7a,0xf3,0x49,0xf5,0x09,0x43,0x50,0x56,0xd6,0xea,0xc4,0x35,0xa5,0xa2,0x8a,0xbe,0xc0,0xe3,0xfe,0x4c,0xa2,0x83,0x09,0xab,0x72,0x8a,0x96,0x7c,0x01,0x70,0xb2,0xd5,0x62,0xb7,0x67,0x59,0x36,0xcf,0x56,0x2d,0x14 +.byte 0xc2,0x69,0x49,0x52,0x4e,0x7c,0x45,0x4b,0xef,0xcd,0x79,0xcd,0xe6,0xa6,0xd0,0xbe,0x10,0x1e,0x18,0xca,0xe7,0x8d,0x65,0xb1,0x17,0xc7,0x2c,0xc8,0x2a,0x5b,0xe8,0x08,0x11,0x15,0xea,0xa9,0x43,0x7b,0x70,0x04,0x0c,0xc8,0xca,0x67,0x18,0x18,0x12,0x16,0xc2,0xd3,0xf2,0x0a,0xc7,0x01,0xa9,0x97,0x61,0xf6,0xa7,0x44,0x9a,0xb3,0x67,0xdc +.byte 0x07,0x63,0x02,0x02,0x2e,0x58,0x80,0xa9,0x95,0xa0,0x8e,0x86,0xb6,0xf6,0x14,0x13,0x0a,0xea,0xf1,0x6d,0xd9,0x98,0x37,0x12,0xdb,0x67,0x1b,0x13,0x8e,0xd1,0xfa,0x2f,0x98,0x53,0x3c,0xd7,0x56,0x55,0x42,0x2f,0x64,0x59,0xd5,0xb7,0x6e,0xa8,0x6c,0xc2,0x40,0x11,0xb5,0xa1,0xc0,0x5c,0x45,0x87,0x91,0xb1,0x1c,0x4e,0xa9,0xf6,0x72,0x57 +.byte 0x50,0x8e,0xc5,0xfc,0x64,0x59,0x52,0x82,0xb0,0x75,0xc3,0x98,0xff,0x32,0xce,0xa4,0x39,0xb8,0xa4,0x61,0xb4,0x53,0x3f,0xc7,0x80,0x35,0x48,0xaf,0xa8,0x67,0xfe,0xa1,0x1d,0x3c,0x95,0xb5,0x63,0x1c,0x3a,0x2c,0x68,0xfa,0x98,0x8b,0xa7,0x19,0x29,0x79,0xe4,0x9b,0xff,0x8f,0x15,0x9c,0x65,0x60,0xd2,0xa9,0x4f,0xd5,0xb2,0x57,0xff,0x32 +.byte 0x4c,0x96,0x82,0x6b,0x09,0x6c,0x74,0x55,0x00,0x5c,0x68,0x68,0xd5,0x9b,0xd4,0xdf,0x3d,0x2d,0xb9,0x0b,0xf5,0x2c,0x87,0x35,0x2a,0xc0,0xc0,0xc9,0xd7,0xa1,0x76,0x30,0x82,0x46,0xd8,0x24,0x6e,0x27,0x02,0x71,0x57,0x5c,0x43,0xf2,0x54,0xd6,0xea,0xd7,0x67,0x7d,0xac,0x76,0x91,0xf1,0x26,0x6e,0xaf,0x87,0x05,0x06,0x48,0x57,0xbd,0x67 +.byte 0x1d,0xd7,0x07,0xcd,0x41,0x02,0x49,0x6c,0x8c,0xe1,0xe3,0x00,0x78,0xbe,0x28,0x84,0x16,0x44,0xb1,0x0d,0x6d,0x40,0xfe,0xab,0x7e,0xf6,0x6b,0xff,0xfa,0xe1,0xc7,0x9d,0x56,0x62,0xf1,0x68,0xba,0x76,0x34,0x8f,0x54,0x20,0x49,0xf5,0xa2,0x54,0x52,0xca,0x42,0xed,0x4f,0x9b,0xdf,0xcf,0xfb,0xf6,0xee,0x12,0x29,0x43,0x8f,0xf9,0xfd,0xf4 +.byte 0x8a,0xbf,0xae,0x50,0xf2,0x8f,0x46,0xa2,0x97,0x3b,0x2d,0xfb,0x84,0x98,0x61,0xae,0xba,0x36,0x25,0x30,0x8b,0xdc,0xd3,0x08,0x8e,0x7e,0xfa,0x91,0xac,0x4b,0x29,0x6d,0x0c,0x81,0x0f,0xc7,0xc8,0xc4,0x5c,0x48,0x68,0xa7,0x83,0xf3,0x6a,0xc8,0x0d,0x3a,0x9b,0x46,0xb9,0xe1,0x31,0xac,0x3c,0x12,0xa2,0xae,0x74,0xb8,0x91,0xed,0x63,0xba +.byte 0x40,0xb8,0x57,0x58,0x1f,0x1d,0x1a,0x2d,0x98,0x60,0xe8,0xe1,0x84,0x16,0xe5,0xf0,0x1e,0x35,0x58,0x31,0xc3,0x0c,0x49,0x6e,0x13,0x2c,0xac,0x14,0xc2,0xde,0x5f,0x62,0xe5,0x37,0x5b,0x1d,0x71,0x8b,0xc3,0x3d,0xd8,0xaf,0x3d,0x0a,0xef,0x80,0x3c,0x9a,0x4b,0x0a,0x3f,0x0e,0x8f,0x90,0x8f,0x73,0x2e,0xff,0x8e,0x8e,0x87,0xf8,0x46,0x52 +.byte 0xed,0x7d,0x76,0xf3,0xff,0xaf,0x5e,0x62,0x87,0x16,0x9c,0xa6,0x12,0x39,0x13,0xc3,0x62,0x4b,0xd2,0x21,0xa2,0x43,0xfa,0x4c,0x5d,0x75,0x61,0x64,0x5b,0x23,0xcd,0x76,0x86,0x81,0xd6,0xa6,0x25,0xe1,0xc1,0xc6,0x04,0x5e,0x65,0xfe,0x89,0x0e,0x67,0x02,0xeb,0xb9,0x26,0x88,0x81,0x97,0x1e,0x62,0x4e,0xf4,0x4e,0x0d,0xef,0xac,0xcf,0xd7 +.byte 0xc5,0x9b,0x9d,0x3a,0xa2,0x71,0xd7,0xd4,0x72,0xa6,0x66,0x90,0xe2,0xf7,0xb7,0xec,0xe4,0xca,0x9f,0xd1,0xd8,0x5a,0x65,0xff,0x39,0x65,0x78,0x47,0x1c,0x64,0xab,0x1a,0x35,0x2e,0xe2,0xf7,0x67,0xa4,0x7f,0xd5,0xea,0x04,0xee,0x4d,0xf6,0x29,0xe4,0xcd,0x1b,0xcf,0x0a,0xef,0xa1,0x14,0x90,0x0e,0xed,0x1a,0x10,0x63,0xa0,0x56,0x11,0x05 +.byte 0x57,0x94,0x3a,0x11,0xff,0xe0,0xc7,0x33,0x19,0x67,0xd7,0xd0,0xcc,0x76,0x52,0x5d,0x9e,0x10,0xe7,0xd6,0xaa,0x13,0xe8,0x8d,0xa5,0x60,0x66,0x98,0x26,0x11,0x66,0x0f,0x2d,0x4d,0xec,0x28,0x93,0x17,0x3a,0x6f,0x99,0x70,0x00,0x2b,0x66,0xb3,0x49,0x69,0x3c,0x3b,0x03,0xb8,0xc0,0x9b,0x1c,0x96,0xd9,0xd1,0xe1,0x6d,0x8f,0x45,0xce,0x22 +.byte 0xcf,0x48,0x61,0x85,0x10,0x1b,0x3f,0x2b,0x74,0x48,0x61,0x68,0x63,0xe3,0xa3,0x83,0xe2,0xcc,0xa0,0x6d,0x82,0x8b,0xe5,0x42,0xab,0xa7,0x62,0x6c,0x05,0xb4,0x7b,0x65,0xf5,0xd8,0x0b,0x7d,0x61,0xd6,0x5c,0xf0,0xc0,0x03,0x0c,0x51,0xec,0x06,0xad,0x79,0x8c,0x62,0x0c,0xf5,0x8e,0xcb,0x97,0x62,0xf9,0x3e,0x39,0x8d,0x3c,0x2e,0xd1,0xc0 +.byte 0x5f,0x98,0xea,0xb5,0x26,0x19,0xf5,0x93,0xbb,0xf8,0xd4,0xd5,0x35,0xee,0x1f,0xf8,0x71,0x81,0x0e,0xe6,0xe9,0xf3,0x2c,0x80,0xa8,0x15,0x35,0x1e,0xda,0x07,0x41,0x39,0x8a,0x19,0x1f,0x70,0x99,0xbe,0x3d,0x5c,0x1f,0xf6,0x72,0x85,0x73,0xea,0xb5,0x61,0xbb,0x77,0xaa,0xef,0xc7,0x2c,0xed,0x1e,0xa6,0xfd,0xc9,0xde,0xa9,0x82,0xba,0x19 +.byte 0x04,0x17,0xf7,0xa1,0x59,0x5c,0x7d,0x8d,0xe7,0x1c,0x89,0x7f,0xe1,0x02,0xd3,0xb0,0x46,0x6c,0xcf,0xde,0xf0,0x0b,0x00,0x43,0x8d,0xd6,0xe6,0xf7,0xc8,0x83,0x20,0x77,0x8b,0x9f,0x14,0xea,0x2b,0xb2,0xd2,0x41,0xfd,0x96,0x7c,0x0d,0x05,0xb9,0x5a,0xa0,0x83,0x50,0xde,0x0e,0xc6,0xa6,0x29,0x55,0x12,0x8e,0x2f,0x0a,0x5c,0xcd,0xae,0x92 +.byte 0x76,0x84,0xc9,0x8a,0x81,0xe5,0x3e,0xf0,0xe6,0x5b,0xe4,0x21,0xfb,0x4c,0xb6,0x0a,0x7b,0x7f,0x7e,0xab,0xdc,0x15,0x44,0xf8,0xeb,0x23,0x21,0x31,0xef,0x98,0xec,0x84,0x69,0x34,0x29,0x99,0x03,0x8a,0x12,0x8e,0x28,0xdd,0x00,0x6a,0xa3,0xe7,0x08,0x17,0x35,0x2a,0x42,0x8a,0xcb,0x4a,0x7b,0x1c,0xd2,0x74,0x4f,0x6a,0x8c,0x85,0x1c,0xd6 +.byte 0x05,0x3a,0xfd,0xdf,0x1c,0xa5,0x59,0xbb,0xdb,0xe3,0xa7,0x59,0xb1,0x67,0x3d,0xa4,0x71,0x4d,0x6c,0x99,0xe0,0xa7,0x8c,0xfa,0x96,0x1f,0x8d,0x0c,0xa7,0xc8,0xce,0xa3,0xbf,0x4d,0xc7,0xa9,0xb7,0xfd,0x04,0x58,0xcd,0xd7,0x20,0xb1,0xb9,0xf5,0x06,0x70,0x1b,0xdd,0xf4,0x1c,0xdc,0x32,0xa0,0x90,0x0d,0xb2,0x91,0x14,0x05,0xa2,0xf7,0xb7 +.byte 0xb6,0xd2,0xf1,0x30,0x75,0xcc,0x78,0x0d,0x56,0x70,0x64,0x02,0xe7,0x83,0x97,0x65,0x63,0x4b,0x64,0xff,0x8b,0x62,0xc9,0xa4,0x6e,0x96,0xbf,0xd3,0xeb,0x74,0xc5,0x1f,0xdb,0x1c,0xf3,0xca,0x54,0x7d,0x8d,0xd9,0xec,0x18,0xd8,0x99,0xd1,0xa5,0x70,0x8a,0xc5,0xdc,0xa0,0xcb,0xb7,0x52,0xe3,0xe6,0x88,0x0c,0x5a,0x42,0xde,0xe6,0xd8,0xc4 +.byte 0x39,0xe5,0x6c,0x0b,0xd4,0xa5,0x9b,0x51,0xa2,0x3d,0xc5,0xc7,0x17,0x17,0xb8,0xd8,0x09,0xad,0xeb,0x67,0x47,0xe0,0x88,0xef,0x1d,0x22,0x18,0x25,0xdc,0x32,0xb2,0xf7,0x47,0xc5,0xb3,0x0b,0x57,0x01,0x67,0xac,0xc3,0x9e,0xb0,0xa8,0xd7,0xce,0xb2,0xcd,0xea,0x3b,0x61,0xbb,0x24,0xad,0x91,0x7b,0xa2,0x9a,0xb3,0x63,0x56,0xe2,0x9d,0x69 +.byte 0x9e,0xd7,0x5f,0x5f,0x47,0x9f,0xae,0xf6,0x09,0xb1,0x9e,0x22,0x35,0xaa,0x55,0x0b,0xfc,0x70,0x96,0xfd,0x53,0x8a,0x37,0xaf,0x2d,0xa2,0xc5,0x49,0x5b,0x1e,0x32,0x47,0x9d,0xc3,0xb4,0x46,0xf3,0x54,0xdb,0x3f,0xb9,0x69,0x9e,0x8b,0xad,0x11,0xb2,0x68,0xe8,0x27,0x0d,0xca,0x33,0x1c,0x86,0xb2,0x2c,0xaa,0xc2,0x15,0xf9,0x6e,0xed,0x30 +.byte 0x71,0x08,0xeb,0x93,0x1d,0x16,0xc5,0x34,0x73,0x65,0x7a,0x19,0x2b,0xa7,0x3d,0xe6,0x88,0xb5,0x0f,0xa0,0x92,0x91,0x22,0x9d,0x01,0xf3,0xf4,0x57,0x9f,0xd9,0x23,0x1b,0xbd,0xd7,0xd5,0x11,0xc9,0x24,0xf6,0x36,0x30,0x30,0x69,0x95,0x17,0x48,0xf9,0x76,0x71,0xef,0xef,0xc0,0x00,0x9c,0x7d,0x87,0xdc,0xdc,0x1a,0x32,0x82,0x7a,0x13,0xc2 +.byte 0x9f,0x53,0xc2,0x7d,0x4d,0xbf,0xbe,0xf5,0x9d,0xc8,0x81,0x5b,0x81,0xe9,0x38,0xb6,0xa5,0x40,0xa5,0xd4,0x6f,0x0c,0xea,0xf1,0x52,0x59,0x37,0x3b,0xc2,0xb2,0x5f,0x10,0xdf,0x22,0xf7,0x77,0xe8,0x66,0xb0,0x97,0x91,0x5f,0xc2,0x18,0x8d,0x17,0x40,0xd1,0x6d,0xde,0x6e,0xf0,0x6c,0x1f,0x4e,0x9b,0x15,0x83,0x9b,0x70,0x21,0x2b,0x98,0x46 +.byte 0xbf,0xa5,0x82,0xac,0x63,0xac,0xd7,0x52,0xec,0x2c,0xf2,0xe4,0xe0,0x2a,0xbf,0x7e,0xa2,0xd2,0x9d,0x0d,0xf2,0x9b,0x79,0x5f,0x22,0xb0,0x6d,0x22,0x2e,0xed,0xe2,0x4f,0x73,0xc5,0x89,0xcc,0x4a,0xaa,0x9a,0x7e,0xab,0x95,0x25,0xa7,0x9d,0xf4,0xc2,0xe8,0x42,0x6e,0xd3,0xf9,0x25,0x54,0xb9,0x1f,0xa9,0x16,0x9c,0x22,0x7a,0xf0,0xa6,0xac +.byte 0x8b,0x9d,0xe6,0xe3,0x93,0x4e,0x65,0x3a,0x39,0x3e,0xf5,0x41,0x38,0x02,0xb7,0x37,0xd4,0xdc,0xea,0xc5,0x53,0x0e,0x52,0x85,0x96,0xc0,0xa7,0x21,0xbf,0xe7,0xca,0x12,0x1c,0x59,0x33,0xe4,0xd5,0x70,0x6b,0x25,0x54,0x24,0x58,0x48,0x1b,0x65,0x6e,0x7e,0xe6,0x84,0x39,0x38,0xbc,0xdf,0x96,0xbc,0x39,0xdf,0x8f,0x36,0x9e,0x3a,0xda,0x02 +.byte 0x86,0xe2,0x9f,0xb7,0x3a,0xd0,0xdb,0xc2,0x5d,0xb0,0xde,0x31,0x73,0x43,0xe5,0x4b,0x6a,0xa1,0x6d,0xaa,0xca,0x34,0xfa,0xa9,0xaf,0xec,0x05,0x2a,0xdb,0x82,0xa1,0xdc,0xdc,0x3d,0xb5,0x92,0x42,0x28,0xdc,0x93,0xec,0xab,0x9b,0x75,0xae,0x7c,0xbf,0x9b,0x25,0x01,0xb1,0xc8,0x3b,0x47,0xb6,0xfd,0x11,0x6f,0x4b,0xaa,0x6f,0xdf,0x1f,0x15 +.byte 0xc2,0xf3,0x87,0x4a,0xaf,0xf7,0x41,0x64,0x5a,0x19,0xa0,0xc4,0x4f,0x58,0xe8,0x19,0xe0,0x84,0x44,0xc7,0x65,0x0c,0xf1,0xff,0xcb,0x73,0xb2,0xac,0x25,0x28,0xe1,0xd4,0x03,0x16,0x3c,0x1c,0x24,0x3a,0xfc,0x2b,0x7e,0xcb,0xa3,0xba,0xb7,0x78,0x87,0xbe,0x95,0x06,0x27,0xb8,0x16,0x72,0xe4,0x24,0xa6,0x5d,0xe7,0x5e,0x93,0xa9,0x96,0xfd +.byte 0x01,0x1d,0xb8,0x7c,0x85,0x3c,0xe3,0xc9,0x56,0x68,0xcd,0xd9,0x79,0x97,0x50,0x39,0xfe,0x96,0x93,0x50,0xae,0xde,0xcd,0x8d,0xa0,0x38,0x31,0xba,0xca,0x21,0xff,0x19,0xea,0x44,0x95,0x4d,0xba,0xae,0xe2,0x62,0xd2,0x82,0x60,0x0c,0xb9,0x10,0x40,0x9a,0xaf,0x9b,0x17,0xcd,0xf3,0x26,0xec,0x38,0x13,0x18,0xd3,0xf2,0xd2,0x11,0xa6,0xc3 +.byte 0x3c,0x3b,0xe8,0xa0,0x49,0xba,0x4e,0x07,0xec,0x44,0x75,0x1c,0xc9,0x2f,0x68,0x64,0x02,0x1d,0x14,0x35,0x80,0xd8,0xa8,0x53,0xde,0x44,0x65,0x72,0x37,0x28,0x61,0x5f,0xa1,0x58,0xea,0x17,0xb3,0x89,0x25,0xf7,0xcb,0x87,0xe6,0x43,0xc5,0xc3,0xf3,0xd1,0xf5,0x1f,0x18,0xe9,0xd1,0x05,0xd9,0x85,0x38,0xf0,0x5e,0x26,0x35,0xf2,0x72,0x92 +.byte 0x34,0x2f,0xea,0xdd,0x7b,0x64,0xac,0x1d,0x78,0x41,0x56,0x83,0x7d,0x83,0x83,0x59,0xbe,0x9f,0x81,0x90,0x00,0x1f,0x04,0xd8,0xd8,0x8e,0xd9,0xeb,0x12,0x16,0x96,0x81,0x61,0x96,0xe8,0x7b,0x36,0x7b,0x26,0x9b,0x43,0x1e,0x0e,0xc2,0x59,0xdf,0x8f,0xb4,0x91,0x74,0x2e,0x1e,0x6d,0x20,0x70,0xe7,0x3c,0x39,0xe3,0xa8,0x62,0x66,0x32,0x63 +.byte 0x7d,0x89,0xb6,0xad,0x69,0x38,0x2c,0x21,0xe5,0x02,0xcc,0x93,0x8a,0x65,0x71,0x65,0x02,0x5c,0xeb,0xc9,0x70,0xf3,0x81,0xce,0x65,0x37,0x22,0xb7,0x47,0x3c,0xd6,0x3d,0x29,0x65,0x29,0xba,0xf9,0xae,0xd9,0x1f,0xd7,0x38,0x88,0x95,0xa9,0x66,0xa8,0x77,0x75,0x4a,0xf9,0x2e,0xd9,0x63,0x75,0x80,0x90,0x82,0x39,0x8b,0x21,0x58,0xf4,0x2e +.byte 0x2d,0x1f,0x7f,0xcb,0x33,0xdb,0x9b,0x9b,0x31,0x21,0x4e,0x6e,0xdb,0x0f,0x1f,0x69,0x22,0x97,0x69,0xd7,0x7f,0x2e,0xd7,0xce,0x6c,0xe4,0xc0,0xe7,0x27,0x82,0xe6,0x8a,0xf8,0xae,0x46,0x2d,0x5a,0x45,0x82,0xce,0xb6,0x49,0x84,0x15,0x4a,0x54,0xa6,0x76,0xf3,0x29,0x28,0xc0,0x05,0x82,0xae,0x7d,0x85,0x41,0xb0,0x87,0x67,0x44,0x37,0x46 +.byte 0x3e,0x47,0xbc,0x00,0x7c,0x05,0xd3,0xdc,0x9a,0x31,0x49,0xf8,0x48,0x99,0x57,0x4a,0x2b,0xe7,0xcf,0xb2,0xa7,0xf0,0xcf,0xc7,0xf5,0xfd,0x73,0x59,0xf1,0xe4,0x86,0xb5,0x5d,0xce,0x6d,0xbf,0xc6,0xe5,0xa9,0xca,0x75,0xe9,0x69,0xe6,0x09,0xab,0x66,0x17,0x09,0xe9,0xbc,0x14,0xd8,0x6f,0xe9,0xc2,0x87,0x39,0x2f,0x87,0x1e,0xb8,0x16,0x08 +.byte 0x10,0xee,0x1c,0x2f,0x47,0x7d,0xa3,0x5b,0x1f,0x1f,0x5d,0x95,0xd0,0xa4,0xbb,0x08,0xc2,0x47,0xab,0x46,0x3c,0xbb,0xbe,0x3a,0x64,0x82,0x40,0x08,0x75,0x03,0x02,0x6e,0x6a,0xab,0x6b,0xd4,0x90,0xa7,0x28,0x7a,0xb4,0x8b,0x1f,0x6b,0xcc,0x16,0x30,0x16,0xf5,0xc6,0xd8,0x4a,0xed,0xc9,0xc7,0xac,0x0f,0x75,0x1b,0x13,0xe3,0x45,0x6d,0x22 +.byte 0x7e,0x3d,0x59,0x55,0x87,0x8d,0x04,0xee,0x85,0xac,0x98,0x0c,0x52,0x5b,0xe6,0x92,0x04,0x31,0xdf,0x7c,0x44,0x4d,0x06,0xbe,0xb2,0x5a,0x95,0xef,0x29,0x75,0x9b,0xb2,0xe7,0xb8,0x83,0x18,0x82,0x23,0x4e,0x66,0xe5,0xdd,0x47,0xa1,0x6b,0x33,0x4e,0x9c,0x13,0x0e,0x0a,0x8a,0x5c,0xba,0x7b,0x2f,0x6c,0x72,0x78,0x86,0xd2,0xf8,0xbd,0x1b +.byte 0x4b,0x9e,0xe0,0x99,0x46,0x7f,0x24,0x0f,0x1b,0xda,0x85,0x87,0xe9,0xda,0x96,0x25,0xc6,0x81,0x77,0x8b,0x56,0xae,0x7a,0x9c,0x47,0x34,0xe1,0xac,0xf2,0xba,0x52,0x95,0xf8,0x56,0x26,0x66,0xf0,0x53,0xcc,0xc4,0x6f,0x46,0x94,0x10,0x22,0x69,0xb1,0x93,0x7b,0x51,0xb7,0xb8,0xdd,0x42,0x67,0x51,0x6d,0x9c,0xb2,0xbd,0xdb,0xdd,0x19,0xa2 +.byte 0x25,0x13,0xfe,0x42,0xca,0x36,0xeb,0xce,0x15,0x41,0xe7,0x35,0xce,0xa8,0x45,0x56,0x58,0x9f,0x46,0xcf,0x11,0xe7,0xcc,0x40,0x54,0xe4,0x85,0x0d,0x73,0x36,0x7e,0xae,0x38,0x8c,0x56,0xab,0xf0,0x5f,0x5c,0xff,0x14,0x9b,0x46,0x1b,0x35,0xbd,0x03,0x0e,0x2f,0x9e,0xde,0xd8,0x82,0xfe,0xa0,0x09,0xb4,0xb4,0xbd,0x58,0xc0,0xe2,0x01,0xb1 +.byte 0xca,0x5c,0x3d,0xc3,0x18,0x5e,0xc1,0xee,0x61,0x60,0x00,0xca,0x1e,0xf3,0x71,0xd8,0x15,0x37,0xf0,0x2e,0x13,0xa0,0xf7,0xac,0x73,0x4b,0xfb,0x6a,0x27,0x6b,0xde,0x69,0x3d,0x19,0x36,0x4b,0x63,0x55,0xae,0xd1,0x2b,0x66,0x69,0x0d,0x64,0xa7,0x86,0xfd,0x3a,0xb8,0xe6,0x87,0xaa,0x32,0x5f,0xbc,0xa7,0x67,0xde,0x7a,0xe0,0xdd,0xff,0x57 +.byte 0x2c,0xc9,0x25,0x92,0x03,0x91,0xa8,0x0e,0x39,0xe4,0x9a,0xdf,0x21,0x29,0xc7,0xbc,0x93,0x01,0x2a,0x02,0xd8,0xaf,0xbc,0x20,0x57,0xc7,0x37,0x77,0xa7,0xad,0x5e,0x15,0x20,0xcf,0x4a,0x3c,0x22,0x1b,0x92,0xa9,0x05,0x91,0x70,0xb3,0x88,0x4e,0x97,0x58,0xf7,0x33,0x1a,0x05,0x33,0x57,0xdc,0xbb,0x2a,0xba,0xd0,0x22,0xac,0x40,0xbe,0x60 +.byte 0xa2,0x89,0xe6,0x6c,0xf3,0x5d,0xef,0x58,0xb4,0x7c,0x4a,0x28,0xb8,0x16,0xd2,0xe0,0x49,0xf5,0xe8,0xaf,0x84,0x39,0xae,0x1e,0xa2,0x34,0x67,0x42,0x26,0x31,0x93,0x87,0x7a,0xd5,0xde,0x79,0xdb,0x4c,0x7e,0xcf,0x1f,0xef,0x9a,0x4c,0xb9,0x70,0xe2,0x72,0x9b,0xcd,0x30,0xe5,0xf1,0x84,0x44,0x5a,0xff,0x36,0xa2,0x37,0xe7,0x49,0x78,0x63 +.byte 0xbe,0xe0,0x90,0xdf,0xef,0x9e,0xf3,0x55,0x9e,0x8a,0x51,0xe8,0xa3,0x32,0x2d,0xed,0xc8,0x99,0xf6,0x92,0xf9,0x62,0x74,0xa7,0x8d,0xcf,0xa5,0x09,0xb3,0x43,0xb9,0x18,0x70,0x59,0x4f,0xd2,0x7f,0x7e,0xce,0x1e,0x7d,0xe8,0xa9,0xb7,0x29,0x0f,0x86,0x8a,0xac,0x22,0x41,0x98,0xb2,0xc3,0x48,0x3b,0x60,0xcb,0x7b,0x1d,0xc3,0x5e,0x19,0x5b +.byte 0x31,0x57,0x12,0x09,0x41,0x54,0xf8,0x01,0x70,0x02,0x03,0x8a,0x6e,0x8e,0x5b,0x23,0xf3,0xd4,0x13,0xbf,0x51,0xba,0xf9,0x2d,0x6c,0xb9,0xb3,0x90,0xd0,0xa3,0x76,0xfb,0xef,0x85,0x17,0x8b,0x2c,0x05,0xa3,0x06,0x0a,0xaa,0xdd,0xbf,0xd4,0xcc,0xe4,0x96,0x19,0x7f,0x51,0xf6,0x7e,0xa1,0x2c,0x14,0x1c,0x21,0x99,0x28,0x3a,0x0e,0x36,0x1b +.byte 0xf1,0xd7,0x3e,0x29,0x94,0xa6,0x03,0xf7,0xe5,0x6f,0x1b,0x56,0xc8,0xfb,0x2d,0x4f,0x12,0x2b,0xc7,0x3a,0xec,0x5e,0xc8,0x88,0x1b,0xd8,0x65,0x21,0x04,0x0e,0xe2,0x95,0x6d,0x62,0xea,0xeb,0xee,0xbe,0x47,0x0a,0x90,0x26,0xe3,0x85,0xd7,0x1d,0xb5,0xd5,0x56,0x8b,0xc0,0x2f,0x7f,0x01,0xc8,0xac,0x90,0xc3,0x2d,0x10,0xf2,0x11,0x30,0x0c +.byte 0xa9,0x4d,0x13,0xde,0x65,0x6d,0x34,0x68,0x5d,0xad,0x3f,0x7a,0x56,0x3a,0x1f,0xb9,0xd6,0x7b,0x8f,0xe8,0x42,0x2a,0x16,0xb6,0x3f,0xf2,0x4f,0x14,0x8e,0x8e,0x29,0x88,0x68,0x1b,0x10,0x80,0x80,0x47,0x36,0xaa,0x82,0xf5,0xa8,0x97,0xc4,0xcb,0xc2,0xef,0xaa,0x9f,0xdc,0x96,0x4f,0x1f,0xaf,0x39,0x71,0x55,0x8f,0x3c,0xbf,0x26,0x91,0x46 +.byte 0x38,0x59,0xa7,0xd1,0xb5,0x87,0xd6,0x81,0x71,0x17,0x83,0x05,0x40,0x9c,0xf3,0x33,0x4b,0x09,0x06,0xb1,0x69,0xfb,0x43,0x1f,0xef,0x9a,0xfe,0xc3,0x4e,0x4e,0x25,0xe1,0x3a,0xfb,0xf9,0xc9,0x97,0xe2,0x1c,0xa1,0x9a,0x06,0x6e,0xbb,0x16,0x4a,0x9f,0xf4,0x87,0x31,0x38,0x78,0xae,0x77,0x4c,0x42,0x28,0xc4,0x63,0xc0,0x49,0x37,0x4f,0xf9 +.byte 0xeb,0x31,0x0d,0x3e,0x0c,0x8a,0xb7,0x17,0xa7,0x90,0x26,0xc2,0xea,0xa5,0x9d,0xe4,0x4d,0xc6,0x3a,0x33,0x2d,0x47,0x42,0x8c,0xeb,0x50,0xea,0xfe,0x74,0x43,0x06,0xcd,0xa5,0xb1,0x49,0xf0,0x98,0x91,0x25,0xf4,0x8d,0x06,0xd1,0xeb,0x56,0x2c,0xf9,0xc4,0x84,0x02,0x9e,0xf2,0x3a,0xfe,0xb4,0x39,0xce,0xee,0x85,0xb6,0x64,0x6c,0xbc,0x1f +.byte 0xe6,0x86,0x00,0xc3,0xa9,0xb4,0x53,0xdf,0x2d,0x7c,0xc6,0xde,0x2e,0x79,0x25,0x5c,0xbb,0xe5,0xbe,0x33,0xe9,0x58,0x49,0x35,0xbe,0xae,0xbc,0x06,0xdc,0x48,0x9d,0xc3,0x08,0x6f,0xe8,0xb8,0x48,0x67,0xea,0x1c,0x05,0xb4,0xf7,0xe3,0xcc,0xc1,0xb3,0xa8,0x61,0xcb,0xa8,0xf6,0x12,0x52,0x68,0x06,0x36,0x2b,0x15,0x43,0xc9,0x98,0xfe,0xe5 +.byte 0x43,0x11,0x0d,0xc3,0x37,0x38,0x7a,0xcb,0x98,0x14,0xc1,0xaf,0x29,0x36,0x35,0x63,0x74,0x98,0xcf,0x0f,0x44,0xe4,0x6e,0xf7,0x3f,0x6e,0x15,0xe8,0xe9,0x93,0x7b,0x96,0x1b,0x84,0xe7,0x8b,0x83,0x30,0xa1,0xdc,0xc3,0xb8,0x18,0x2f,0xc5,0x34,0xd1,0xa5,0xb9,0xee,0x4a,0x04,0xbf,0x26,0x63,0x29,0xba,0x90,0xb5,0x7c,0x83,0x2b,0x1f,0xe8 +.byte 0x5c,0x9f,0x23,0x40,0x7f,0x9c,0x2f,0x76,0x96,0xd6,0xd5,0x13,0xda,0x5c,0x81,0xa4,0x60,0x60,0xbd,0x5e,0xb3,0xd2,0x2c,0xaa,0x48,0x04,0x74,0x31,0x5d,0xbd,0x46,0xd8,0x8d,0x3f,0x62,0x2d,0x1e,0x17,0x97,0x08,0x71,0x06,0x1b,0x96,0x1b,0xd5,0x80,0xa6,0x41,0x06,0x10,0x6e,0x36,0xd4,0xfb,0x36,0x6d,0x96,0xb8,0x86,0x22,0x34,0xda,0x7e +.byte 0x6c,0x5f,0x3b,0x95,0x35,0x1b,0x42,0x3c,0xf2,0x9d,0xe3,0xe9,0x3f,0x44,0xd5,0x4c,0x60,0x55,0xae,0xbe,0x4f,0xf2,0xb3,0x84,0xa1,0x79,0xdf,0x86,0xf0,0x8f,0xad,0xa5,0xa3,0x4a,0xea,0x5d,0x68,0x34,0x17,0x4c,0xb7,0xd8,0x6f,0x67,0x22,0x85,0xe2,0x16,0xcf,0xba,0xee,0x92,0xeb,0x95,0x8e,0x67,0xb1,0xf0,0xbb,0xb0,0x34,0x2f,0x58,0x49 +.byte 0x56,0x3e,0x81,0x31,0xb6,0xc3,0x2c,0xee,0x2b,0x85,0x72,0xbc,0xe9,0x20,0xaa,0x4e,0x34,0xb9,0x8b,0x32,0x2f,0x9e,0xd7,0x98,0x63,0x9d,0xfd,0x3a,0xe9,0x30,0x49,0x23,0x4a,0xb4,0xcb,0xc5,0xe5,0x78,0xcd,0x22,0x90,0xce,0x9f,0x35,0x13,0xda,0x8f,0x14,0xdb,0x36,0x0f,0x66,0x87,0x62,0x50,0xde,0x52,0x15,0x10,0x67,0x8a,0x5c,0xdb,0x76 +.byte 0x51,0x7f,0x72,0x9b,0x8e,0x91,0x39,0xc8,0x3c,0x34,0x0f,0x3d,0x92,0x07,0xb8,0xef,0x2a,0x8b,0x59,0xbd,0x82,0xc1,0x5c,0x95,0x93,0x0d,0x3d,0x9b,0x51,0x53,0x38,0x6b,0xd0,0xe3,0x5b,0xbb,0xe5,0x6c,0xc0,0xb5,0x71,0xa8,0xd8,0x7d,0x5d,0xbd,0xfc,0x69,0xcf,0xcc,0xa1,0xcd,0x83,0x9d,0x8f,0x46,0x47,0xe7,0x36,0x19,0x9f,0x4d,0xda,0x9c +.byte 0xcb,0x2a,0x47,0x58,0x93,0xbb,0x64,0xa3,0x89,0x53,0xbf,0xc7,0xc2,0xe2,0x65,0x0f,0x4f,0x17,0xc6,0x4c,0x15,0xfe,0x4b,0x95,0xb2,0x79,0x4a,0xb8,0xf6,0xae,0xcc,0xba,0xc3,0x5d,0x18,0xb2,0x8e,0xd8,0x6b,0x43,0x1b,0x2f,0xe1,0x36,0xb2,0xa5,0x22,0xa0,0xc7,0xc0,0x26,0x8e,0x48,0x77,0x0c,0x14,0xdd,0xdc,0xde,0x71,0x98,0xce,0xdd,0x61 +.byte 0x85,0xd9,0x23,0x42,0x7f,0x85,0xc8,0x06,0x81,0x3e,0xa2,0x0f,0x1e,0x3e,0xcf,0x33,0xef,0x43,0x6a,0xc7,0xee,0x3f,0x91,0x68,0x32,0x89,0xd9,0xed,0xdf,0x45,0x33,0x10,0xbb,0xd5,0xef,0x1d,0x3c,0x1e,0x26,0x21,0x4d,0x1a,0x06,0x98,0x60,0x71,0x7f,0xce,0x45,0x4e,0xe3,0x3f,0xfa,0xff,0xcd,0xe2,0x92,0x82,0x2e,0x83,0x69,0x9c,0xc6,0x5c +.byte 0x6e,0xb6,0xec,0x28,0xdc,0x7b,0xdb,0xf3,0x02,0x3a,0xf7,0xad,0x9b,0x7a,0x73,0xb2,0x07,0x70,0x76,0x9d,0xa2,0x11,0xcf,0x89,0xea,0xaf,0x6a,0xd2,0x15,0xeb,0x5a,0x99,0x1a,0x17,0x1d,0xce,0xc0,0x7f,0x50,0x26,0x84,0x07,0xd7,0x7e,0x33,0x27,0x74,0x84,0x18,0x32,0x86,0x32,0x34,0x28,0xe8,0x45,0x21,0xb7,0x26,0x3b,0x11,0xbb,0x9a,0x8b +.byte 0x46,0x8e,0x27,0xf8,0x62,0xb5,0x98,0x6e,0x03,0xee,0x9e,0xcb,0xbc,0x74,0xbe,0x63,0x7a,0x86,0xe5,0x75,0xeb,0x7f,0x14,0xa6,0x96,0x76,0x5a,0x46,0xa9,0xda,0xf1,0x4e,0x0e,0x90,0x59,0x56,0x4a,0x48,0x2d,0x91,0xbe,0x78,0x5b,0xfb,0xf7,0xea,0xab,0x1c,0xc0,0x0c,0x5d,0xba,0xb4,0x7b,0xc7,0x21,0xb1,0xc9,0xa3,0x20,0xe6,0xae,0xee,0x0e +.byte 0xf0,0x3b,0x44,0xd6,0xaa,0x57,0x88,0x1f,0x76,0xc8,0x43,0x07,0x91,0x71,0xa5,0xcc,0x04,0x38,0x01,0x13,0xa6,0xea,0x18,0x48,0x8f,0x09,0x8d,0x37,0x8b,0x6f,0x35,0x36,0x51,0xc6,0x30,0xca,0x9e,0xe2,0xaf,0x0c,0x26,0x14,0xe3,0xbf,0xea,0x0e,0x14,0x88,0x97,0xcc,0xf6,0xc1,0x8f,0xad,0xef,0x2d,0xc1,0x0f,0xad,0x45,0x12,0x7a,0xe6,0x37 +.byte 0x97,0xcb,0x34,0x83,0xd8,0xef,0x34,0x2a,0xce,0xd0,0x21,0x8a,0x7d,0x87,0x7a,0x66,0xf7,0x1c,0xdf,0xa0,0x3f,0xa0,0xf6,0xb3,0x24,0xee,0x6e,0x21,0xe9,0xc3,0x73,0xe4,0xd9,0xc6,0xf6,0xf6,0xac,0x25,0xb7,0xb5,0x64,0x7f,0xcc,0x88,0x3e,0x98,0xe1,0xef,0xa9,0xd2,0x03,0x10,0x4b,0xa3,0xbc,0x3c,0x24,0xfc,0x41,0x36,0x30,0x2d,0xca,0x17 +.byte 0x35,0xd6,0x17,0xa2,0x2b,0x48,0xed,0xd3,0xd7,0x18,0x4f,0x45,0xe9,0x59,0x03,0x35,0xa0,0x80,0x75,0x17,0x48,0xd5,0xea,0x07,0x7a,0x6c,0x3f,0x7a,0x2c,0x02,0x0a,0x7f,0xb5,0x17,0xea,0xf4,0xf6,0xb5,0xf4,0x81,0xba,0x69,0x44,0x81,0x6b,0xff,0xb2,0x43,0xae,0x3d,0x37,0x81,0x91,0x3f,0x6a,0x70,0x35,0x2d,0x06,0x9d,0xa8,0xb5,0xb8,0xc7 +.byte 0x19,0x3a,0x5f,0x59,0x79,0x0b,0x62,0x23,0xa4,0x5b,0x46,0x7b,0x17,0x82,0x19,0x87,0xe8,0xdf,0x09,0xb7,0x50,0x7e,0x40,0xe3,0x71,0x2d,0x09,0xde,0x69,0x2e,0x6c,0x35,0x5c,0x44,0xae,0xb7,0x05,0xb8,0x7e,0xb4,0xe4,0x34,0x05,0x1f,0xd2,0x1f,0xe5,0x79,0x2a,0x15,0xf8,0x8f,0x02,0xc7,0xc8,0x1e,0xe6,0x12,0x83,0x08,0x9c,0x7a,0x2f,0xc6 +.byte 0xc9,0x15,0x0f,0x0f,0x0f,0xa9,0x53,0x16,0x19,0x5b,0x74,0x58,0x6c,0xac,0x21,0x72,0x7f,0xa1,0xae,0xbc,0x34,0x76,0xa6,0x9b,0xbe,0x0f,0x13,0x55,0x50,0x5a,0x8b,0x9e,0xb3,0xf3,0x9e,0x8b,0x61,0xbe,0xb4,0x09,0x71,0x61,0xf0,0xd6,0xaa,0x8c,0x0d,0x0c,0x66,0x31,0x88,0xe3,0x71,0x6a,0xb5,0xaa,0xc0,0x9b,0xce,0x0d,0x79,0x90,0xc1,0x0a +.byte 0xf9,0xfe,0x4d,0x49,0xd0,0x5a,0x63,0xf1,0xfc,0x47,0x71,0x9e,0xbb,0xd1,0x2c,0xef,0xfe,0x90,0x28,0x75,0x82,0xf6,0xa5,0x95,0xea,0x65,0xfa,0xe8,0x04,0xcd,0xb4,0xe1,0x0d,0xb2,0xac,0xd5,0x12,0xf5,0x17,0xbb,0x3b,0x2e,0x52,0x9e,0x7b,0xe7,0x8e,0x86,0x03,0xce,0x77,0x01,0xf0,0x4f,0xb5,0xf7,0xef,0x8b,0x37,0x5e,0x97,0x80,0xbb,0x2b +.byte 0xcf,0x9a,0x63,0x18,0xc5,0x0c,0xfb,0x3c,0x91,0x9c,0x37,0x90,0x76,0x71,0x62,0xbc,0x80,0x40,0x1a,0x74,0xb8,0x1b,0x61,0xb1,0x89,0x4d,0xf7,0x8d,0xd4,0x46,0xef,0x1f,0x3b,0xac,0xe8,0x41,0x62,0x8e,0xea,0x2b,0x56,0x22,0x25,0x37,0x70,0x53,0xcd,0x8f,0x57,0xfa,0xad,0x00,0xc5,0x0c,0x9e,0x57,0xde,0x50,0x07,0x8d,0x80,0xbf,0x22,0x5d +.byte 0x4a,0xbd,0x6a,0xcb,0xfc,0x6f,0xd1,0x56,0x8f,0xd5,0x34,0x8a,0xe6,0xe9,0xa0,0x00,0x06,0x12,0xd8,0xb1,0x49,0x0a,0xbb,0x87,0xe5,0xca,0x75,0x11,0x4c,0x85,0x60,0x77,0xc0,0x90,0x1c,0x14,0x38,0x38,0x3e,0x4f,0xff,0xbf,0xfc,0xa1,0xa1,0xe7,0xb0,0x5d,0xd8,0x1f,0x33,0x07,0x5f,0x04,0x4f,0xc7,0x93,0xc6,0xcc,0xe3,0x01,0xd0,0x43,0xe1 +.byte 0xd9,0x00,0xc5,0x9f,0x79,0xab,0xfc,0xe9,0x55,0x51,0x03,0x0c,0xe1,0x73,0xd6,0x09,0xe3,0xb9,0x76,0x72,0x77,0x4c,0x1b,0x7c,0x57,0x1e,0x7f,0x5f,0x02,0x83,0xa3,0xc6,0xde,0x23,0x85,0x76,0x1a,0xbf,0x48,0xc8,0x02,0xdb,0x31,0x30,0x95,0x85,0x68,0x8a,0xf6,0xe9,0x48,0x7f,0xc9,0x26,0xab,0x68,0x36,0x9f,0x1c,0xf0,0x90,0xbc,0x4a,0x68 +.byte 0x94,0xf8,0x7f,0xae,0xa9,0x3b,0x5b,0x63,0x9a,0xcd,0xe3,0xf0,0xac,0x9f,0x6f,0x78,0xa0,0x67,0x58,0xd8,0x2c,0x71,0x8a,0x14,0x31,0x07,0x95,0x0c,0x38,0xa4,0x53,0x33,0x60,0x23,0x21,0x87,0x6b,0x4f,0xf9,0xa8,0xb8,0xfc,0x8e,0xf1,0x3a,0x03,0x0b,0x03,0x02,0x33,0xbc,0x6a,0xb9,0x8e,0x41,0xc8,0x38,0xd8,0x83,0x30,0x6a,0x61,0x5c,0xcf +.byte 0x49,0xdd,0xd7,0xda,0x2c,0xaf,0xc4,0x68,0xad,0x07,0x9c,0xd4,0xaf,0x94,0x64,0xcf,0xe1,0x9b,0x37,0x50,0x65,0x03,0x20,0x3c,0x34,0x43,0xe9,0xb0,0x9b,0xba,0xb1,0x9a,0x3e,0x10,0x99,0x8f,0x93,0xb7,0x3d,0xac,0xbd,0xab,0xa8,0xfa,0x74,0x90,0xe1,0x38,0xe4,0xf3,0x47,0xfc,0xad,0x8b,0xb4,0x98,0xe4,0x65,0xe9,0xd9,0x8a,0x21,0x81,0x4f +.byte 0x0c,0xd7,0xb1,0x84,0xb9,0x69,0x68,0x64,0xa3,0x1f,0x25,0x84,0x5f,0xf7,0x3f,0xca,0x52,0xff,0xda,0xc9,0x3d,0x5e,0x8b,0x57,0xd3,0x9a,0x1d,0xb7,0xae,0x90,0xa4,0xc3,0x78,0x68,0xfd,0x80,0x3f,0xfd,0x5c,0x09,0x83,0x5d,0xc2,0x48,0xd8,0x84,0xeb,0x8a,0xfe,0xbe,0x30,0x12,0x79,0x54,0x5f,0x7f,0x6e,0x4b,0x8a,0x1e,0xcb,0xcd,0xed,0xb6 +.byte 0xe9,0x6d,0x8a,0x1f,0xdc,0xb1,0x46,0xab,0xdc,0x0d,0xbf,0xda,0xd9,0x39,0x3b,0xd2,0x81,0x00,0x83,0x77,0x32,0xf7,0xdf,0x0e,0x31,0x5d,0x1d,0x6c,0xa7,0x4e,0x54,0xa8,0xac,0x81,0x8c,0xb6,0xa5,0x89,0x02,0xd7,0x2e,0xfd,0x26,0xa3,0x9e,0xcf,0xdb,0x1f,0x5a,0xf3,0x54,0xac,0xe5,0xd0,0x1f,0x9b,0xa7,0xab,0x28,0xcc,0x66,0xd3,0xbc,0x4c +.byte 0x54,0x1a,0x54,0x73,0x78,0xde,0x08,0xd5,0xa5,0x08,0xdc,0x00,0x09,0xc5,0x37,0x61,0x1a,0x98,0x12,0x84,0x2d,0xff,0xc3,0x25,0x62,0x93,0x83,0x05,0x66,0x3d,0xfb,0x1d,0x54,0x08,0x8a,0x50,0x03,0xc4,0xc4,0x6e,0xfa,0x16,0x83,0xbb,0x27,0xf1,0xb7,0x31,0x92,0x64,0x76,0xbc,0xf0,0x44,0x62,0xe9,0x5e,0x15,0x94,0xdc,0xe9,0xf3,0xf8,0x20 +.byte 0x93,0x4d,0x11,0xa2,0xc8,0xde,0x83,0xe6,0x75,0x63,0xfe,0x13,0x75,0x0f,0x79,0xd1,0x3d,0x75,0xb7,0x43,0x62,0x57,0x8d,0x96,0x9c,0xa3,0xc4,0xb2,0x84,0x6a,0x14,0x6e,0x17,0x32,0x09,0x76,0x95,0xbb,0xd6,0xc1,0x2e,0xdc,0x8c,0x73,0xd7,0xad,0x5a,0x41,0x8b,0xb3,0x7e,0x8d,0x90,0xec,0xf5,0xa0,0x46,0x90,0x4c,0x52,0xec,0x97,0xc6,0x98 +.byte 0x7d,0x19,0x77,0xa0,0x99,0x85,0x11,0x26,0x77,0x26,0xf9,0xac,0xe3,0x81,0xcf,0x7d,0x22,0xc8,0x00,0x3d,0x5b,0xee,0xa5,0xf8,0x6d,0xfe,0x47,0xe4,0xef,0x60,0xcc,0xd0,0x33,0xf7,0x5b,0xed,0xbd,0x82,0xc9,0xa8,0x41,0xb8,0x47,0x34,0x9f,0x62,0xb2,0x67,0x62,0xb0,0x3a,0x27,0x95,0xe1,0x22,0x76,0x98,0x0f,0x35,0xaf,0xfc,0x4d,0xc7,0x92 +.byte 0x92,0x7e,0xaf,0x3b,0x3a,0x36,0x5e,0x5c,0xbf,0x43,0x02,0x66,0x5a,0x30,0x78,0x82,0x52,0x20,0x98,0xd6,0xa1,0xe9,0x9a,0x61,0x54,0x0b,0x74,0x85,0xb5,0x99,0x69,0x9f,0x9b,0x3b,0x2f,0x49,0xec,0xb3,0x18,0x0c,0x4a,0x53,0x20,0xd7,0x80,0x7b,0xd4,0x20,0x21,0x32,0x89,0x08,0x81,0x50,0x2b,0x16,0x8d,0xbb,0xe6,0xbb,0xc7,0x74,0x80,0x67 +.byte 0x47,0xf1,0x06,0x68,0x02,0x37,0x31,0x00,0x50,0x8b,0xe2,0x44,0x85,0x2e,0x39,0x54,0xda,0x26,0x7b,0xe1,0xb0,0x23,0xd7,0x0c,0x3c,0x3b,0x81,0x9b,0xa6,0xbe,0x24,0xfd,0x09,0x73,0xbe,0xc3,0x2f,0xa0,0x7b,0x85,0x5b,0x1b,0x55,0x4e,0x9e,0x38,0x80,0x61,0xd7,0xe8,0x9b,0xec,0x88,0x00,0x6a,0x64,0x1b,0xd5,0x65,0x20,0x2a,0x62,0x64,0xbc +.byte 0x21,0xca,0xce,0xc3,0xeb,0x2d,0x2b,0x5c,0x4d,0xb8,0x7c,0xb5,0xbe,0x98,0x0d,0x5b,0x88,0x23,0x60,0xff,0xbe,0x0a,0xb6,0xdd,0xdf,0x28,0xd5,0x2c,0xe5,0x9d,0xb5,0x29,0xea,0x6c,0x3a,0xf4,0x78,0x91,0xa3,0xb2,0xab,0x12,0xf9,0x90,0x96,0xc9,0xa4,0xfc,0x4d,0x28,0x2b,0x0c,0x28,0x8b,0xb7,0x8b,0x36,0xd6,0x80,0xbf,0x07,0x09,0xf9,0x62 +.byte 0x32,0xc0,0x50,0x60,0xd9,0x73,0xe3,0xbe,0xfa,0xa6,0x78,0x48,0x47,0xd7,0xb5,0x39,0xd8,0x04,0x6d,0x79,0x98,0x2e,0xd6,0x3a,0xe5,0xc9,0x01,0xd0,0x00,0x2e,0xd2,0x8b,0xd7,0x1f,0xf1,0xba,0xd4,0x0e,0x9f,0x9d,0xab,0xbf,0x2c,0xe1,0x75,0xf6,0x9c,0xc0,0xae,0x73,0x2b,0x58,0xcb,0x6d,0x46,0x6d,0x11,0xb7,0xce,0xc7,0xef,0x34,0x2c,0x11 +.byte 0x93,0x3c,0x17,0xd9,0x3e,0xad,0xc9,0x4c,0xb3,0xd0,0x0a,0xd0,0xfe,0xf3,0x9d,0xc5,0x43,0x03,0xa9,0x78,0x4a,0x42,0x7f,0xfb,0x75,0xd2,0x85,0xfb,0xe7,0xe6,0xa9,0x48,0x2f,0xa6,0xc3,0x16,0xe2,0x2a,0x9d,0x0d,0xcb,0x2e,0x8b,0x75,0xa8,0x14,0x3a,0x2e,0xb1,0xff,0x58,0x1d,0xa8,0xa6,0xc0,0xf6,0x17,0xda,0xc1,0xce,0xaf,0x08,0xa9,0xc2 +.byte 0xa3,0xc1,0xab,0xb6,0xe8,0x10,0x57,0x8a,0xce,0xc0,0x03,0x5c,0x53,0x5c,0x02,0x5d,0xcf,0x5c,0x65,0xc6,0x47,0x3c,0x62,0x0e,0xa3,0xfc,0xe2,0xae,0x10,0x55,0x4a,0xb4,0x27,0xe8,0x59,0x5e,0x45,0xa9,0xbb,0x21,0x10,0x91,0x46,0x1f,0x50,0x3b,0xc6,0x8c,0xa1,0x8a,0xee,0x5e,0x6e,0x32,0xe6,0x42,0x40,0x79,0x7f,0xbb,0xb3,0x5b,0x05,0xde +.byte 0xe0,0xf6,0x7f,0x3d,0x37,0xe6,0xc3,0x3b,0x40,0xc9,0xe0,0x42,0x36,0xd0,0x0e,0x13,0x32,0x3e,0x48,0xce,0xd8,0xa2,0xef,0xae,0x93,0x66,0x7d,0xde,0xb9,0xdd,0x60,0x15,0x53,0xf2,0xd9,0x90,0x3d,0x38,0x8c,0xa6,0x34,0x44,0xb5,0x6c,0x74,0x7d,0x9d,0xe7,0xd0,0xef,0x6c,0xd6,0xfe,0x9b,0x79,0x4e,0x79,0x5e,0x48,0xef,0x93,0xb2,0x81,0x0b +.byte 0x2b,0xee,0x83,0x69,0x3d,0x15,0x8c,0x27,0x69,0x6f,0xca,0xbf,0x75,0x29,0x37,0xc6,0xe6,0xca,0xb2,0x70,0xd0,0xaf,0xc8,0x5e,0x69,0xf1,0x6b,0x2d,0x0d,0xe7,0xe9,0xbf,0x07,0x52,0xe5,0xac,0x98,0xcf,0xcf,0xd6,0xdd,0x7c,0x2b,0xfc,0x8f,0xd2,0x5f,0x81,0x4b,0x1b,0x7b,0x2d,0x84,0xe2,0x69,0x96,0xcb,0xa2,0x59,0x10,0xba,0xda,0x51,0x11 +.byte 0xeb,0xc3,0x4f,0x10,0xbf,0x8e,0x5b,0xbb,0xa3,0x29,0xe9,0xd8,0x0e,0x71,0xa0,0x1b,0xff,0xee,0x36,0x8c,0x00,0x83,0x6b,0x32,0xfe,0x05,0xeb,0x89,0x8f,0xed,0x48,0x22,0xe1,0x76,0x0a,0xac,0xae,0x3c,0x24,0x54,0x84,0xc2,0x0f,0x79,0x33,0x2b,0x49,0x35,0x1c,0x84,0x5a,0xca,0x92,0x6c,0x1f,0x78,0x15,0x5a,0x36,0xad,0xd5,0x1d,0x9d,0x10 +.byte 0xc1,0x5f,0x7c,0x61,0x60,0xba,0x2e,0xe6,0x9b,0x34,0x02,0xe9,0x68,0x1c,0xfb,0xbf,0x02,0xdc,0x79,0x57,0x1c,0x0f,0xc8,0x8c,0x2a,0x66,0x2a,0x50,0xaa,0x81,0x4e,0x1f,0xa8,0x2d,0xe4,0x61,0xe8,0x43,0x84,0xcb,0xda,0x96,0xf9,0x4a,0xd0,0x8f,0xe1,0xd7,0xc4,0x05,0xf5,0x76,0xfa,0x47,0x7a,0x07,0x1a,0x77,0xbb,0x63,0xb3,0x3a,0x85,0x3b +.byte 0x0d,0x32,0x4f,0x14,0x15,0x02,0x5b,0x9c,0xbc,0xc2,0x12,0x90,0x0f,0x7b,0x94,0x27,0x5f,0x70,0x23,0xd8,0x5d,0x54,0xc4,0xca,0x6a,0x69,0x9e,0xd1,0xb3,0x2a,0x75,0x1a,0x07,0x9c,0x20,0xf6,0x76,0x22,0x4d,0x09,0x30,0x24,0x3f,0x3b,0xe5,0xcb,0x4b,0x5a,0x03,0x2d,0xe8,0xbe,0xed,0xf0,0xe3,0x91,0xf2,0x6c,0xb8,0x02,0x2d,0x6c,0x7a,0xa6 +.byte 0xc1,0x8e,0xa7,0xbb,0x73,0xdf,0x40,0xa5,0x60,0x91,0xbf,0xbe,0x28,0x0b,0x37,0x2e,0x5f,0x4b,0xcd,0x14,0x4d,0x2d,0xfc,0x5e,0x43,0xb5,0x78,0x8d,0xea,0xa0,0x86,0x54,0x4f,0xb6,0x25,0x40,0x39,0x3f,0x9c,0x7a,0x26,0x74,0x88,0x42,0x53,0xb0,0x3b,0x81,0x75,0x04,0x67,0x41,0x65,0x66,0x2c,0xdc,0xe9,0xf0,0xb3,0xab,0x2a,0xa5,0xf3,0xef +.byte 0xfa,0xc5,0x10,0x63,0xe2,0x70,0xb5,0x29,0x60,0x86,0x9e,0xb9,0x0b,0xe2,0xc4,0x05,0xa9,0x3c,0x1b,0x60,0x15,0x6b,0x2f,0x74,0x93,0x5e,0x70,0x9a,0x56,0x6a,0xc4,0x92,0x49,0xaa,0x95,0x51,0xc4,0xba,0xfd,0xf6,0x2d,0x36,0x3e,0x66,0xbd,0x74,0xbc,0x2e,0xb3,0xad,0xa1,0x41,0x50,0x33,0x79,0x84,0xac,0x21,0x7a,0xfc,0x3a,0x8e,0xdb,0xcc +.byte 0x27,0xf6,0x2c,0x5c,0x23,0x38,0x73,0xd5,0xaf,0xc9,0x2d,0x9c,0x18,0x58,0xdf,0x8f,0x89,0x9d,0xdd,0x00,0x3c,0x5f,0x23,0x00,0x6e,0x66,0x1d,0xf3,0x1c,0x40,0x9d,0x43,0xb0,0x74,0xf1,0x41,0xa5,0x77,0xcb,0x8d,0x5b,0x94,0x68,0x95,0xb6,0x0e,0xd4,0x4d,0x47,0x9b,0xd2,0xcd,0x9b,0x94,0xa4,0x28,0xf9,0xf0,0x3d,0xcf,0x89,0xb1,0xc3,0x73 +.byte 0x84,0x15,0xb6,0xc8,0x6b,0xf1,0xb1,0xdc,0x1b,0x1a,0x6f,0xb5,0x73,0x87,0x8b,0x63,0xbf,0x4b,0x25,0x9b,0xe4,0xdd,0x44,0xed,0xe7,0x0e,0x6f,0x03,0xae,0xa1,0x5e,0x1f,0x5f,0xa7,0xa4,0xed,0x69,0x7a,0x91,0x6d,0x55,0xac,0xce,0x18,0x32,0x17,0x78,0x49,0x9f,0x1e,0x9c,0xd2,0x7b,0x1f,0x74,0x60,0xa5,0x64,0xb1,0x99,0xe6,0xc5,0x0d,0x69 +.byte 0xfa,0xb2,0xd9,0x05,0x61,0x71,0xa4,0x6f,0xc2,0xb6,0x91,0x0e,0x6c,0xf2,0xa6,0x6c,0xea,0x8e,0x94,0x8b,0xac,0xa7,0xfe,0x70,0x8e,0x8d,0xc2,0x85,0xa6,0xa7,0x8e,0xe8,0xfa,0xbc,0xa1,0xaf,0x0e,0xa9,0x06,0xa4,0x9a,0xb0,0x23,0x93,0xbc,0x93,0x2d,0x97,0x42,0xe2,0x0d,0x3a,0x65,0xb4,0x60,0x5b,0xeb,0xa1,0x20,0x8a,0xdc,0x17,0x6b,0xc5 +.byte 0x19,0xc3,0x67,0xbf,0xae,0xf7,0xb9,0xb1,0x88,0x7f,0xe5,0x1b,0xc2,0x61,0x97,0xa0,0xd3,0x64,0x74,0x6b,0x7a,0x46,0x39,0x3f,0xc8,0xd3,0x53,0x79,0x74,0x4e,0x1e,0x63,0x91,0xc5,0x4a,0x70,0xb0,0x05,0x35,0x19,0xc2,0x26,0x54,0x44,0x3b,0xa9,0x12,0x40,0xd0,0x21,0x19,0xf3,0x8d,0xc7,0x2b,0x88,0x9a,0xec,0x41,0x8f,0x4f,0x23,0x19,0x1a +.byte 0xf3,0x1d,0x0a,0x88,0x0f,0xa7,0x02,0xd4,0x78,0x88,0xe6,0x43,0xb6,0x9e,0x07,0xdf,0x6a,0x1f,0x41,0xbb,0x3e,0xea,0x15,0xff,0x66,0x4c,0x7a,0x8b,0xee,0x27,0x47,0x81,0x81,0x95,0xa2,0x22,0xb4,0x9f,0x1c,0x09,0x1c,0xfc,0x0a,0xef,0x88,0x7f,0x59,0x60,0x91,0x6a,0xe4,0x92,0x8c,0x02,0x54,0xc9,0xee,0xc7,0x5e,0xd1,0xbf,0xc9,0x41,0xde +.byte 0x2f,0xa3,0x22,0x07,0x1d,0x8c,0xe1,0x04,0x59,0x94,0x75,0x3e,0xee,0x56,0x62,0x07,0x80,0x18,0x60,0x78,0x0e,0x55,0x06,0xec,0xe1,0xa5,0xf6,0x21,0x7e,0xf9,0x37,0xab,0x6a,0xed,0x07,0xcb,0xbf,0xa2,0xab,0x50,0xee,0x1f,0x2f,0x54,0x2b,0x82,0x93,0x59,0x03,0x35,0xd9,0xe8,0x2b,0xa6,0x03,0xc2,0xef,0x37,0x85,0xfc,0x89,0x06,0x30,0xe0 +.byte 0xc2,0x00,0xc4,0xaf,0x59,0xb6,0x31,0x52,0x37,0xa4,0x6c,0xdb,0x1b,0x20,0x87,0xf0,0xa4,0x15,0x4b,0xa8,0xd9,0x7e,0x1b,0x96,0x00,0x07,0xf4,0x86,0x07,0x14,0x55,0x70,0x37,0xe3,0xe3,0xf0,0xeb,0xd6,0xf1,0xe0,0xe9,0x6c,0xdf,0x3d,0xaf,0x86,0xb8,0x00,0x9b,0xdf,0xc6,0x5c,0xd2,0x53,0xcb,0xcf,0x63,0xcc,0x3e,0x6d,0x62,0xeb,0xe6,0x97 +.byte 0xd8,0x54,0xed,0x36,0xe4,0xed,0x69,0xaa,0x10,0x83,0xde,0x16,0xfd,0xcc,0xd6,0x24,0xb9,0x3c,0x4f,0x99,0x81,0xc2,0x23,0x16,0x91,0x5d,0x9f,0x46,0xa5,0xdd,0xb4,0x8a,0xe1,0x07,0x89,0x84,0x2e,0x62,0x48,0xf6,0x1a,0x17,0x7b,0xc8,0xf7,0xb4,0x3d,0x9e,0x82,0xe3,0xe3,0xcf,0x0b,0xd9,0x52,0x90,0x61,0xd8,0xdf,0x9e,0xc4,0xc7,0x7c,0xfa +.byte 0xcf,0x09,0xd2,0x94,0x86,0x37,0x94,0xaf,0x7e,0x0a,0x9d,0x16,0xee,0xad,0xfb,0xa2,0x9e,0x2d,0x2f,0xad,0xd5,0xc2,0xf9,0x91,0xf8,0x7e,0x2b,0xb8,0xb2,0x60,0x3c,0x0a,0x89,0x53,0x07,0x87,0x3b,0x83,0x70,0xee,0x71,0xa3,0x94,0x0b,0x77,0x50,0xeb,0xcc,0x23,0xf0,0xbe,0x95,0x51,0x54,0xd2,0xd6,0xd2,0x09,0xa5,0x19,0x3d,0x4e,0xec,0xe3 +.byte 0x88,0x71,0xa7,0xb1,0x10,0x03,0x7e,0xc4,0x92,0x2a,0xe7,0x99,0x75,0xff,0xae,0x10,0x3d,0xbb,0x33,0xc9,0x7f,0xc2,0xe6,0x3c,0xc4,0xe7,0xba,0x37,0xba,0x68,0x69,0x92,0x4a,0xfb,0x32,0x3b,0xb5,0xde,0xdb,0x91,0xd0,0x8e,0x77,0xf2,0x1e,0x2d,0x25,0xb4,0xa0,0x42,0xef,0x78,0x6c,0x75,0xcb,0xa0,0x73,0xdf,0xde,0xd8,0x26,0xfe,0xe3,0xf9 +.byte 0x74,0xe7,0xa0,0xd2,0xbd,0x6c,0x99,0x8d,0x07,0xf2,0xf8,0xff,0x36,0x2d,0x8e,0xda,0x5e,0x5c,0x47,0x06,0xf8,0x08,0x33,0x1d,0x93,0xcf,0xc3,0x1a,0x20,0x86,0xb6,0x8e,0x44,0x10,0xbc,0xba,0x89,0xfc,0xa3,0x57,0x92,0x2c,0x28,0xa1,0xd0,0xab,0xdc,0xba,0x0a,0x7e,0x9d,0xd2,0xfd,0x09,0xd3,0x87,0x6c,0x06,0x44,0x17,0x73,0xfe,0xc9,0x8b +.byte 0x52,0xd3,0x09,0x60,0x14,0x03,0xb1,0x79,0x4c,0x9c,0xc4,0xec,0x42,0x4c,0xd3,0x21,0xe5,0x34,0x21,0x38,0xdd,0x12,0x95,0xd4,0x20,0x50,0xef,0x5f,0x46,0x4f,0x37,0x65,0xd5,0xf1,0xb2,0x2c,0x6c,0x9a,0x06,0x28,0x77,0xbf,0xe3,0xec,0xec,0x2b,0xcb,0x2c,0x8b,0x62,0x2e,0x39,0xaa,0x28,0x0b,0x51,0x01,0xa5,0x02,0x06,0x66,0x4a,0x67,0x0c +.byte 0x96,0xa3,0x12,0x74,0x94,0x2c,0x0f,0x23,0xa3,0xea,0xda,0x1a,0x6d,0x54,0x30,0x33,0xc8,0x33,0x0a,0xfb,0x25,0x2a,0x8b,0x9a,0x87,0xd9,0x9d,0x37,0x4c,0x41,0x3b,0xe5,0x4a,0x81,0x92,0x40,0x38,0x18,0x82,0x13,0x54,0xde,0x56,0x11,0x63,0xf3,0x09,0x61,0x3b,0xdd,0x0c,0x71,0xe8,0x4f,0xc2,0x9a,0x77,0x2f,0xeb,0xf1,0x39,0x1c,0x10,0x0e +.byte 0x01,0xaf,0x92,0x34,0x9a,0xb6,0x7b,0x79,0x86,0x0c,0xf1,0x53,0xb6,0x59,0xbd,0x6d,0x79,0x6e,0x37,0x11,0x25,0x67,0x95,0x31,0x4f,0x43,0xdf,0xb7,0x4b,0x80,0x8d,0x07,0x3c,0x49,0x73,0x8a,0x72,0x61,0x02,0x0f,0x2f,0x13,0xed,0x91,0x10,0xf6,0x08,0xf3,0x50,0x4a,0xd4,0x36,0xcb,0x52,0xb3,0x3b,0xe6,0xef,0x85,0xe9,0xe0,0xad,0x0d,0x3d +.byte 0x84,0x07,0x70,0xdf,0x16,0x47,0xeb,0x26,0x19,0x27,0xaf,0x7a,0x9f,0x2f,0x2b,0x6d,0xbb,0x37,0x68,0x8e,0x19,0x46,0x5a,0x65,0x0d,0x0a,0x67,0xd8,0xe2,0xc2,0xcd,0x49,0xf6,0xc2,0x27,0xac,0x12,0xea,0x1f,0x81,0x60,0xac,0x8b,0x5d,0xcc,0x9a,0x5b,0xec,0xc3,0xcb,0x85,0x0d,0xef,0xa6,0xd5,0x33,0xb3,0x67,0x73,0x3f,0xc9,0x90,0x25,0x3e +.byte 0xe6,0x7c,0x41,0x59,0x83,0xf7,0x90,0x4a,0xbf,0x14,0x72,0x11,0xf2,0x3a,0x38,0x58,0x17,0xd8,0x3d,0x00,0xc6,0x42,0xf2,0xbc,0xfd,0x05,0x37,0x6d,0x11,0xb0,0xd7,0xb2,0xb7,0x73,0x69,0x80,0x47,0x30,0x64,0x13,0x8c,0x24,0xb2,0x42,0x12,0x8c,0xc0,0x8a,0x45,0x0b,0x71,0x23,0xeb,0xac,0x65,0xda,0x44,0x13,0x85,0x77,0xdf,0xb8,0x4b,0x69 +.byte 0xd4,0x8e,0x40,0x54,0x24,0xac,0xc8,0x62,0x36,0x51,0x20,0xaa,0xcd,0x5d,0xa5,0x73,0x2c,0x81,0x92,0x99,0x44,0x6b,0x04,0xac,0x8e,0xee,0x96,0x29,0xca,0xdc,0x2f,0xd1,0x13,0x5c,0x9e,0xc2,0x67,0x6a,0xaf,0xf6,0x3e,0xe2,0xa1,0x6d,0xda,0xbe,0x8a,0x55,0x50,0x27,0xee,0x6d,0xb8,0x35,0x5f,0xb4,0xa8,0x76,0xa1,0xe2,0x52,0x87,0xf6,0xfb +.byte 0xe2,0x16,0x1c,0x90,0x78,0xe4,0x17,0xb0,0xd9,0x56,0xf5,0xd3,0xa4,0xb0,0x3f,0xe9,0x01,0xf9,0xd0,0x67,0x2b,0xeb,0x1d,0x73,0x24,0x90,0x36,0x36,0x0d,0xcf,0xfb,0x3f,0xa1,0xa0,0x25,0x3b,0xf1,0x7f,0x9e,0x90,0xcf,0xb6,0xd0,0x83,0x90,0xcd,0x3f,0xff,0x5f,0xa3,0x33,0x95,0xd7,0xbe,0x78,0xfe,0xcc,0x9a,0xb9,0x64,0x88,0xb7,0xd9,0x5e +.byte 0x46,0x2d,0xf0,0xb1,0xa1,0x81,0x2b,0xab,0x80,0xf5,0x4d,0x3b,0xd8,0x53,0x64,0x8f,0xac,0x7a,0x03,0xb3,0x39,0x7a,0x85,0xef,0x61,0xb5,0x2c,0x8e,0xf4,0x27,0x07,0x9b,0x7b,0xc9,0x8b,0x1a,0xe4,0x4f,0xce,0x8b,0x35,0x32,0xac,0xcf,0x47,0xb8,0x2f,0x9e,0xe5,0x11,0x48,0xc1,0x07,0xea,0x0c,0xee,0x06,0xc6,0xa3,0x48,0xb6,0x1a,0xd8,0xb4 +.byte 0xa7,0xae,0x59,0x7d,0x9e,0x4e,0x66,0x7f,0xe9,0x02,0x40,0xdc,0x21,0x5e,0x74,0x2c,0x1d,0x29,0x22,0xca,0x97,0x4f,0xc8,0xc7,0xea,0x69,0x02,0x89,0xd1,0x43,0xff,0x83,0x89,0x58,0x66,0x92,0xbc,0x11,0xf6,0x02,0x8b,0xa8,0x34,0x8d,0xbe,0x3a,0x70,0xc3,0x10,0xe7,0xb5,0xc4,0xda,0xdb,0xc6,0x87,0xee,0xee,0xe0,0x48,0x62,0x80,0x8d,0xfc +.byte 0xaa,0xc7,0xce,0x1a,0xea,0xb9,0x1b,0x30,0x4a,0x48,0x9b,0xf4,0x58,0xff,0x5d,0x15,0xc8,0xf2,0x84,0x44,0xae,0x63,0xe8,0xb1,0xe0,0x2e,0x38,0x8e,0x47,0xf9,0x09,0xec,0xb9,0x94,0x18,0x37,0x68,0xef,0xbd,0xd5,0x67,0x72,0x01,0x9a,0x15,0xb9,0x7c,0x36,0xc0,0x22,0x80,0x12,0xb1,0x4e,0xab,0x3c,0xea,0x81,0xcf,0x70,0xf3,0xde,0x1f,0xd4 +.byte 0x67,0x94,0xfa,0xe1,0xf0,0xb6,0xd6,0x6b,0xc3,0xa2,0xbb,0x59,0x6b,0x9f,0x58,0x26,0x99,0x0c,0xdc,0xcd,0xb8,0xae,0x49,0xf0,0x8f,0xd3,0x0d,0xb7,0x4c,0x22,0xcf,0xb6,0x6c,0xa3,0x19,0x09,0x42,0x59,0x25,0xf8,0xdc,0xf3,0xc2,0x00,0xc3,0xc3,0xd3,0x9e,0x98,0xd3,0xa3,0xd0,0x96,0xfd,0x4f,0x15,0x57,0x5b,0xa7,0x08,0x3a,0x0e,0x3d,0xd2 +.byte 0x7d,0xa1,0xa0,0x94,0xc0,0x76,0x83,0xf6,0xc1,0xe8,0x7e,0xd3,0x97,0xc1,0xbf,0x38,0x74,0x9b,0xfb,0x35,0xeb,0xf7,0x34,0x20,0xea,0xda,0xd3,0xb1,0x2e,0x10,0x16,0x9c,0x09,0x1c,0x67,0x46,0xa2,0x05,0xf9,0x47,0xde,0x35,0x53,0x18,0x58,0xb0,0xbb,0x7a,0x88,0x58,0xc5,0x3e,0x98,0x29,0x43,0x98,0x07,0x76,0xa3,0xe1,0x95,0x92,0x21,0xe9 +.byte 0x06,0x17,0x15,0xe0,0x6b,0xd5,0x5a,0x6d,0x10,0xa6,0x08,0x92,0xa9,0xf5,0xcf,0x57,0x1a,0x28,0x5d,0x14,0x33,0x99,0xf9,0xa0,0xb3,0xeb,0xee,0xd4,0x6e,0x0b,0x5e,0xf7,0xe9,0xe3,0xc6,0x71,0x34,0x55,0xf3,0xde,0xd5,0xc2,0x52,0xc3,0x7b,0x06,0x87,0xef,0x26,0x81,0xc9,0xbd,0xaf,0x12,0x61,0x95,0x2b,0xa4,0x8e,0xe8,0x08,0x9a,0x13,0x48 +.byte 0x2e,0x84,0x98,0xf6,0x95,0x21,0x22,0xe5,0xcf,0x30,0x8d,0xaf,0x70,0x16,0x27,0x0c,0xcd,0x26,0x7f,0xe8,0xa0,0x35,0x0c,0x01,0x0e,0xdd,0x9d,0x2c,0x89,0x41,0x34,0xc4,0xa2,0xaa,0xf6,0x3f,0xca,0x3b,0x86,0xce,0xd7,0x4c,0xe3,0xb5,0x69,0xe9,0x41,0xbe,0x3c,0x9a,0x4c,0x1a,0xb3,0x88,0xea,0x78,0x12,0x4c,0x1b,0x79,0xc7,0xcd,0x32,0x72 +.byte 0xfa,0x3f,0x0b,0x73,0x1b,0xd9,0xec,0x85,0xd4,0x52,0x6c,0x91,0x2d,0xbe,0x76,0x8b,0xfd,0xb6,0x49,0xcf,0x67,0xd1,0x18,0x7b,0xae,0x86,0x47,0x47,0xfd,0xff,0x63,0xf2,0x88,0x1b,0x58,0xd5,0x30,0x69,0xf9,0x9a,0x03,0x52,0xae,0xe5,0xe2,0x55,0xbf,0x35,0x12,0xb0,0x84,0xa9,0xed,0xb6,0x8d,0x5f,0x6c,0xed,0x1a,0x00,0x7a,0xdc,0xf2,0x03 +.byte 0x9e,0xef,0x59,0x27,0x4c,0xf4,0x83,0xa2,0x36,0x3d,0x3d,0x8c,0x75,0x8c,0x37,0x68,0x93,0x0b,0x30,0x48,0xea,0x91,0x14,0x37,0x88,0x87,0x7f,0xe6,0xd8,0xbd,0x04,0x34,0x1e,0xe8,0x2a,0x41,0x48,0x5c,0x66,0xf9,0xc2,0xd1,0x56,0x25,0x29,0x45,0xfa,0x71,0xe1,0x59,0xa8,0x52,0x99,0x0b,0x92,0xe0,0x33,0x52,0x91,0xd6,0x5f,0x0a,0x70,0x83 +.byte 0x4f,0xa3,0x47,0x6e,0xfa,0x85,0x5e,0xb1,0x0a,0x1d,0xe7,0x35,0xc9,0x88,0x27,0xc9,0x8c,0x3e,0x7f,0x6d,0x34,0x1e,0x11,0x7b,0xcd,0xe7,0x09,0x82,0x3a,0xa1,0x46,0xc6,0x15,0xde,0x0b,0xde,0x35,0x71,0x92,0x5c,0x72,0x50,0x08,0x6b,0x62,0xa7,0xec,0xa2,0xca,0x53,0x6e,0x47,0x7d,0x50,0x32,0xa7,0x32,0x7b,0x49,0x0c,0x97,0xcc,0x98,0x8d +.byte 0xc3,0x29,0x72,0x1e,0x85,0x47,0x1b,0xa7,0x89,0x19,0x85,0xaa,0x3f,0x11,0x6a,0xea,0x61,0x84,0x07,0x9a,0xc8,0xb3,0x25,0xfe,0x72,0xca,0x83,0xa9,0xf0,0x9e,0x01,0xe4,0x9a,0xd6,0x1b,0x87,0xfc,0xd4,0x3a,0x04,0x34,0x8c,0x0b,0x46,0xbc,0xe9,0x3c,0x3f,0xd9,0x93,0xf1,0xca,0x41,0x0b,0xdb,0x28,0xe8,0x28,0x1b,0x84,0x36,0x16,0x84,0x22 +.byte 0x1e,0x1e,0x2b,0xb0,0xfb,0xa6,0xcc,0x95,0x31,0x46,0xd7,0xca,0xc2,0x8b,0xa3,0x3a,0xa5,0xb0,0xaf,0x52,0x66,0x53,0x39,0x5f,0x58,0xb5,0xdf,0x01,0x52,0x07,0xb4,0x82,0xdc,0xb7,0xf9,0x88,0xd8,0x77,0xf8,0x12,0x9d,0xe8,0x21,0xd7,0x0b,0x0f,0x57,0x90,0x40,0xb2,0x64,0x3f,0xce,0xa0,0xa3,0xfa,0x12,0x16,0xec,0x6d,0xcc,0xc7,0x2a,0x43 +.byte 0xc9,0xe7,0xb7,0x90,0x52,0x35,0x22,0x6d,0x46,0x99,0x1e,0x44,0x12,0xd6,0x0f,0xaf,0x5c,0x16,0xd3,0x7a,0xd6,0xb4,0xfe,0x20,0x26,0x11,0xe1,0xc6,0xa5,0x10,0xfd,0x9f,0x0c,0x47,0xae,0x32,0x08,0x15,0x8f,0xef,0xef,0x4c,0x83,0xbc,0xbf,0x6a,0xe5,0xf5,0x69,0x11,0x4d,0x7d,0x47,0x1f,0x10,0x58,0x61,0xb0,0x0d,0x98,0x67,0xc0,0x99,0x3a +.byte 0x2d,0x9a,0x5b,0xd5,0x37,0xe7,0xe5,0xd4,0x56,0x96,0x69,0xf8,0x53,0x7e,0x24,0x70,0x51,0x01,0x83,0x8d,0x49,0x01,0x32,0x7d,0x4f,0x41,0x92,0x54,0x9c,0x15,0xf1,0x3c,0x05,0x32,0x28,0x0d,0x0f,0x67,0xbe,0x65,0xfa,0x1b,0xa3,0xd0,0x28,0x18,0xb8,0x84,0xfe,0x6a,0x30,0xea,0xb9,0x00,0xb1,0x10,0x7c,0xa2,0x94,0x4f,0x86,0x18,0xdd,0xb4 +.byte 0x80,0x18,0x48,0x18,0xe1,0x56,0x70,0x7d,0x5c,0x3b,0xe5,0xd7,0x88,0x66,0x57,0xe3,0xe1,0x04,0x4c,0x68,0x5b,0x64,0x4d,0x0d,0x30,0x76,0x26,0xaa,0x84,0x0e,0xe0,0xed,0x53,0x62,0x20,0x33,0xaf,0x45,0x42,0x40,0x47,0x01,0x15,0xc9,0x0b,0x27,0x7c,0x68,0x4d,0x55,0xc4,0x6a,0x5f,0x96,0x9f,0x96,0x67,0xae,0x13,0x1c,0x84,0x52,0x33,0x41 +.byte 0x80,0xfc,0xae,0xb6,0xb1,0x8c,0xc3,0x19,0x80,0xa8,0x5f,0xe5,0x8c,0xd0,0xa8,0xb4,0x58,0xc9,0x48,0x29,0xab,0x11,0xd1,0x09,0xc6,0x20,0x98,0x4c,0xdb,0xa4,0x83,0x5c,0x26,0x51,0xce,0x80,0xe5,0xc4,0x9b,0xae,0xba,0x8e,0x99,0x4e,0xa4,0xff,0xdc,0x99,0x4c,0x02,0xa0,0x42,0x80,0xca,0xd7,0xea,0x6a,0x58,0x31,0xdb,0x16,0xd8,0x4d,0xab +.byte 0x03,0x2e,0x3a,0xdc,0xe9,0x07,0xfb,0xfb,0x5b,0x57,0x67,0x2a,0x7b,0xdc,0xc1,0x66,0xd1,0x31,0x3a,0x03,0x87,0xd8,0x66,0xda,0xa1,0x24,0x00,0x26,0xc0,0x26,0x78,0xf8,0x59,0x13,0x3f,0x34,0x08,0x35,0x45,0xbd,0x45,0x4f,0x89,0x65,0x97,0xdb,0xe6,0x1e,0x09,0x6e,0x23,0x2a,0xc4,0xf5,0x6a,0x74,0x28,0xb0,0xae,0x8c,0xfb,0x49,0x35,0x99 +.byte 0x06,0x30,0xc6,0xb2,0x8c,0xcd,0x8b,0x41,0xea,0xf2,0x04,0x18,0x29,0x25,0x1b,0x32,0x42,0x45,0xb5,0x92,0x42,0xb4,0x33,0xd2,0x90,0x31,0x08,0xcd,0x35,0x5d,0x50,0x64,0xa8,0x93,0xfd,0xa5,0xfd,0x32,0xbd,0xe8,0x13,0x1c,0x48,0x5c,0x14,0x70,0x03,0x92,0x0f,0x12,0x86,0xf6,0x6c,0xcd,0xc6,0xec,0xbf,0x8e,0x85,0x28,0x1d,0x1c,0x63,0x3f +.byte 0x81,0x93,0xd4,0x80,0x3c,0x29,0x0b,0x63,0xfe,0x87,0xa6,0x24,0xd6,0x3e,0x62,0xb6,0xd9,0xb0,0x58,0xf1,0x41,0x36,0xc7,0x47,0x8b,0xfd,0x4b,0x91,0x4e,0x5d,0x41,0x44,0xb0,0x65,0x3d,0x9e,0x3b,0x70,0x01,0xcc,0x7d,0x77,0xf0,0x23,0xd9,0xca,0x5f,0xda,0xa1,0x8c,0x71,0x11,0x91,0x7d,0x36,0xf5,0xc9,0xcd,0xf4,0x34,0x5f,0x69,0x57,0xd6 +.byte 0x33,0x4c,0xb2,0xe1,0x38,0x5f,0x86,0x3c,0x57,0x7b,0x2e,0x99,0x05,0x80,0x63,0xc4,0x77,0x69,0x06,0xc2,0x47,0x44,0xca,0x17,0x27,0x1d,0x55,0x34,0x02,0xd0,0x89,0x3a,0x3b,0x79,0xf0,0x86,0xd7,0x6b,0x01,0x9c,0xc7,0xa8,0xde,0xdb,0xdf,0x49,0xd1,0xb9,0x11,0xaf,0x7e,0x22,0x8b,0x5d,0xb5,0x0b,0xdc,0xd0,0x36,0xe6,0x9d,0x85,0x41,0x4a +.byte 0x35,0xf0,0xe1,0xcd,0xce,0x7b,0xd1,0xd6,0x00,0xdd,0xb6,0xe4,0x06,0x3e,0x66,0xe9,0x2b,0xa8,0x44,0x0d,0x18,0xd4,0xbc,0xfb,0x3c,0x58,0x6c,0x11,0xe9,0xdc,0x19,0x14,0x08,0x27,0x23,0x0c,0xd0,0xf9,0x97,0xaf,0x97,0x07,0x02,0x1a,0x5e,0xcd,0xae,0xd2,0x80,0x96,0x16,0x49,0xc3,0xfc,0xda,0x25,0x12,0x20,0xe1,0xc0,0x68,0x90,0x4b,0x30 +.byte 0x2d,0x06,0x53,0x2c,0x57,0x63,0x4a,0x7a,0xf6,0xc8,0x5a,0xb7,0x58,0x8c,0x13,0xfe,0x43,0xb3,0xf8,0x25,0x3e,0x7a,0x25,0x3e,0x1d,0x7f,0x8f,0x5e,0xdb,0xad,0x99,0x83,0xfc,0xd9,0x0a,0xdf,0xb5,0x19,0x1c,0x2c,0xf6,0xe8,0x06,0xbe,0xc0,0x9f,0x7e,0x0f,0x95,0xaa,0xac,0x09,0xdc,0x8c,0x37,0xcf,0x35,0x35,0x95,0x62,0xf1,0xff,0x96,0x1c +.byte 0x77,0xe9,0x53,0x7e,0x12,0x56,0x2d,0x4e,0x3e,0x1f,0xdb,0x1d,0x71,0x0e,0xdc,0xf7,0x65,0xb1,0x78,0x7f,0xe4,0xba,0xbf,0x7f,0x6c,0xcb,0x73,0xd3,0xe8,0xd9,0xce,0xfb,0xdb,0x48,0x87,0xe0,0x10,0x00,0x74,0xcb,0xdf,0x32,0xa8,0xdd,0x83,0x24,0x49,0xda,0x86,0x38,0x1c,0x2c,0x93,0x09,0x8a,0x26,0xbb,0x34,0x21,0x1d,0xac,0xb5,0x16,0xae +.byte 0xd8,0xcb,0x94,0x04,0xd6,0xbc,0xde,0x9c,0x70,0x28,0xa5,0x1a,0x15,0x5e,0x35,0xe4,0xe6,0x53,0xea,0x9c,0x3b,0x0c,0x36,0x3b,0x80,0x13,0x28,0x1d,0xc7,0x1a,0xa8,0x8e,0x9e,0x09,0xce,0x5d,0x50,0xd3,0xc7,0x6f,0x3a,0x75,0xa5,0x84,0x1c,0x08,0x66,0xe6,0x05,0xda,0x8b,0xf1,0x4b,0x5c,0xe2,0xc7,0x0f,0xa1,0xf1,0x47,0x02,0xf4,0xa7,0x24 +.byte 0xf3,0x0e,0x2c,0xa9,0xae,0x67,0xdf,0xce,0x30,0x88,0x4a,0x9a,0x39,0x4a,0x97,0x64,0xa8,0x30,0x53,0xf9,0x47,0x66,0x5c,0x19,0x1c,0xfb,0x2f,0x05,0x89,0x4f,0xfe,0x25,0xe7,0xed,0xed,0x17,0x5a,0x86,0xeb,0x25,0xee,0xe4,0x09,0x88,0x05,0x49,0x20,0x54,0x4b,0x7f,0x3e,0xb5,0x23,0x85,0xa9,0x66,0x61,0x73,0xe0,0x61,0x94,0xc6,0xe5,0x29 +.byte 0xb4,0xe1,0x6f,0xa4,0x4d,0x50,0x56,0x2e,0x30,0x75,0x51,0x5d,0xdd,0xa2,0x68,0x56,0x67,0xd8,0xec,0x2d,0x2a,0xfd,0x49,0xc5,0xbc,0xae,0x2f,0x6b,0xc7,0x8d,0x2e,0xca,0x91,0x35,0xe8,0xea,0x65,0xe9,0x9c,0x65,0xaf,0x8e,0xd5,0x16,0xdf,0xac,0x44,0x1e,0xb6,0x16,0xf0,0xb6,0x33,0x6a,0xe6,0x96,0x0f,0x85,0x2e,0xa1,0xaa,0x6a,0xe0,0x12 +.byte 0x0c,0xaa,0x7d,0xae,0xf7,0xe3,0xb2,0x4c,0x3c,0x10,0xc6,0x87,0x8e,0x87,0xfb,0xac,0xf7,0xd7,0x7a,0x2e,0x9a,0x7a,0xa7,0x4f,0xf0,0x75,0xce,0xbd,0xc3,0xe6,0x79,0x1d,0x56,0xab,0xff,0x56,0xfe,0x69,0xbd,0xcf,0x15,0x27,0x64,0x3c,0x83,0x1c,0x08,0xb0,0x91,0x60,0x67,0xe7,0x27,0x44,0x49,0x22,0x78,0xd5,0x1a,0xc8,0x3b,0x35,0x9b,0xa5 +.byte 0x53,0xce,0xde,0x04,0xd2,0x3e,0x67,0x48,0xaf,0x54,0xdf,0x9c,0xf7,0xb9,0xd4,0xe3,0xb6,0x85,0x02,0x68,0x21,0x10,0xdb,0xb5,0xca,0x11,0xa2,0x7c,0xcf,0x13,0x41,0x7a,0xfd,0xe9,0x0a,0x3c,0x53,0xd6,0x07,0xf2,0xdd,0xe2,0x7c,0x16,0xf0,0x44,0x3f,0x5d,0x34,0x09,0x7c,0x7b,0x21,0x8c,0x8e,0xdb,0x0d,0xc5,0x73,0xce,0x61,0xce,0x17,0x46 +.byte 0x6c,0x14,0x07,0xb5,0x70,0x80,0xf0,0x29,0x7c,0x13,0x41,0x2d,0x8e,0xdc,0x53,0xc2,0xbf,0xf0,0xc2,0xfb,0x59,0xa0,0x66,0x5f,0x25,0xda,0x17,0x5f,0xac,0xab,0x75,0x1b,0xc7,0x61,0x87,0x53,0x80,0x2e,0x11,0x4e,0x04,0x48,0xf9,0xee,0x54,0xe6,0x69,0x69,0x57,0xc2,0x46,0xd8,0xb3,0x2e,0x7b,0xc8,0xa5,0xd0,0xb2,0x5e,0xd4,0x6b,0x9b,0x1a +.byte 0xd6,0x79,0x9d,0x99,0xa6,0xbb,0x4d,0xca,0x74,0x2c,0x3d,0xd4,0x86,0xd0,0x64,0xd4,0x81,0x49,0x76,0x42,0xb8,0xf9,0x2c,0x52,0xe7,0x77,0x37,0x31,0xbb,0x2e,0x5b,0x38,0x81,0x01,0x2c,0x27,0x28,0xcb,0x0c,0xba,0xfa,0x8a,0x9a,0x45,0x51,0xa2,0xde,0xf2,0x7b,0xe6,0x65,0xec,0x5b,0x2d,0xe8,0x55,0x8e,0xb4,0x7f,0xf8,0x1a,0x66,0x3a,0x5f +.byte 0x06,0x10,0x15,0xb2,0x3d,0xb2,0x36,0x6e,0x9f,0x8e,0xe2,0x4c,0x78,0xe5,0x3a,0xac,0x21,0x16,0x20,0x30,0x0f,0x51,0x56,0xcb,0x53,0xca,0x70,0x3c,0xa2,0x3f,0x37,0x06,0x6c,0x70,0xec,0xf4,0x3d,0x7c,0x77,0xa0,0x61,0xc7,0x0e,0x26,0x9f,0x25,0xc0,0xf2,0x28,0xdb,0x57,0xbe,0xe6,0x4e,0x9c,0x4d,0x2e,0x48,0x50,0xc2,0xd4,0xfd,0x5e,0x52 +.byte 0x3f,0xd0,0x82,0xd1,0xd4,0x53,0xad,0x42,0x38,0xb1,0x02,0xd6,0xa0,0x34,0x7a,0xb4,0xb3,0xdd,0x91,0x12,0xf4,0x91,0xc9,0xa2,0x35,0x2d,0xdc,0x97,0xa1,0xdb,0x82,0xe7,0x92,0x99,0x66,0x13,0x99,0x20,0x95,0x1f,0x47,0x64,0x80,0x5e,0x5f,0x74,0x6b,0xa6,0xca,0x47,0x0b,0x24,0x72,0xa6,0x27,0xe7,0x56,0x61,0xa7,0x8e,0x62,0xa4,0xff,0x8e +.byte 0x29,0xf8,0x09,0xa4,0xbb,0x70,0x97,0x8a,0x39,0xe8,0x65,0xc8,0x52,0x23,0x9d,0xbf,0x10,0xe8,0x7d,0xbc,0x3c,0xc4,0x8b,0x1e,0x5c,0x75,0x94,0x24,0x62,0x3f,0x5b,0x2b,0x9a,0x08,0x00,0x78,0xfd,0x28,0x44,0x12,0x62,0x2a,0x6f,0x47,0x9d,0x57,0xb0,0x4e,0x3b,0xcd,0x01,0x7d,0x6e,0x62,0xe3,0x99,0x9c,0xae,0x6e,0xe2,0x70,0x7a,0x32,0xb4 +.byte 0xc1,0x19,0xb1,0x03,0x6b,0x92,0x89,0x4f,0x37,0xaf,0x36,0xee,0x5e,0x03,0x31,0x8c,0x41,0x27,0x17,0x21,0xdf,0xe4,0x34,0x97,0x8d,0xe7,0x41,0x47,0xf2,0x80,0x51,0x41,0x01,0xe4,0x0c,0x1a,0x09,0xfc,0x07,0xc3,0x94,0x07,0x6f,0xa7,0x6c,0xff,0x32,0x21,0xa5,0x01,0x8c,0xa2,0x88,0x3c,0xc8,0x57,0xe8,0x68,0x19,0x4a,0x46,0x7a,0x36,0xd2 +.byte 0x75,0x8e,0xc5,0xa4,0x84,0x91,0x13,0x7f,0xdd,0x2b,0x3c,0x2e,0xc4,0x92,0x29,0xb3,0x60,0x74,0xc8,0x81,0x58,0x0e,0xad,0x6a,0x9d,0xaa,0x81,0x49,0x26,0x0f,0xd4,0x2a,0x39,0xdd,0x4d,0x2b,0x13,0xdb,0x2e,0x72,0xe6,0x45,0x99,0xeb,0xe6,0xe5,0xd5,0x76,0xd4,0x19,0xd8,0xd7,0xa9,0x1f,0xce,0x7f,0xc4,0x1c,0x9e,0x6f,0x68,0x32,0xb1,0x26 +.byte 0xc4,0xb6,0x4e,0x9f,0xbf,0xdc,0xe0,0xde,0x54,0x9b,0xe0,0x04,0x03,0xae,0xc9,0xce,0x3a,0xcb,0x93,0xad,0xcc,0x1f,0x46,0xf6,0xbb,0xff,0x40,0x52,0x9c,0x64,0x97,0x5a,0x6f,0x8d,0x28,0x45,0x1c,0xf6,0x8b,0xcb,0xb9,0x38,0xb8,0x00,0xee,0xec,0xac,0x68,0x3f,0x50,0xcb,0x36,0x6e,0x97,0xfd,0xa5,0x1d,0x29,0x6e,0xfa,0x9f,0x4b,0x83,0xcd +.byte 0x0d,0x34,0xf3,0x1e,0x3f,0x0f,0x2e,0x89,0xeb,0xf7,0x8e,0x5f,0xe0,0x3b,0x39,0xd2,0xe8,0x87,0xe3,0xe7,0xe9,0xd0,0x1b,0x32,0x03,0x6b,0x3c,0x75,0x7d,0xe2,0x5c,0x3c,0x42,0xb4,0x46,0x69,0x0b,0xaf,0x0a,0x5d,0x1a,0x83,0x0b,0x0e,0x3c,0x5a,0x36,0xbd,0x5d,0xb6,0xad,0x4c,0xdd,0xf1,0x8d,0xbf,0x2b,0x70,0x8e,0xbc,0x92,0x95,0x1b,0x0f +.byte 0xed,0x3f,0xae,0x9e,0xa2,0x5a,0x50,0xe4,0xda,0xde,0x04,0x51,0x31,0xac,0xa4,0x0b,0x94,0xcc,0x14,0x87,0x59,0xa8,0x30,0x09,0xe6,0x46,0xb9,0x07,0x3e,0x1a,0xbf,0x5a,0x23,0x32,0xfb,0x60,0x63,0x24,0x25,0x12,0xf6,0x3e,0x2d,0xd0,0x8b,0x88,0x9b,0xe9,0x2d,0xab,0xf5,0xaf,0xba,0xbc,0xfe,0xab,0xb2,0x61,0x7a,0x7c,0xbb,0x28,0x6b,0x86 +.byte 0xe5,0xa2,0x9c,0x2c,0x5a,0x23,0x12,0x11,0xe5,0x72,0xe8,0x7b,0x6b,0x40,0xf1,0x91,0x37,0x3b,0x47,0x75,0x65,0xac,0x4d,0x22,0x59,0x75,0x13,0xb0,0x73,0xff,0x59,0xd1,0x1b,0xcc,0x05,0x1f,0xf2,0xc8,0x50,0x83,0xf1,0x28,0x38,0x0b,0xc3,0xa0,0x3b,0xe3,0x86,0xbb,0x9c,0x7e,0xc1,0xe9,0xcc,0xd9,0xb8,0x2b,0x05,0xf3,0x6f,0xc7,0x9d,0xaf +.byte 0x7b,0xb7,0x38,0x41,0xa3,0x50,0x8f,0x92,0xe0,0x63,0x35,0xb3,0x95,0x9f,0x80,0xf8,0x75,0xbb,0xf3,0x2b,0x0e,0xaf,0x32,0x6e,0xff,0xeb,0x79,0xca,0xbf,0x1c,0x4f,0x6c,0x9c,0x06,0xb2,0xeb,0x99,0x57,0x1f,0xf6,0x64,0x0b,0x81,0x57,0xba,0xf4,0x32,0x1e,0x77,0x37,0x55,0xb7,0xbc,0xba,0x70,0x0b,0x0d,0xdd,0x95,0x41,0xb5,0x17,0x5b,0x14 +.byte 0x10,0x9d,0x14,0x52,0x83,0x65,0x0a,0xf4,0x55,0xca,0xf8,0xbe,0xa6,0x3a,0xa0,0x6e,0xcc,0x83,0x84,0x65,0xb4,0x1c,0x7e,0x40,0xdd,0x32,0x36,0x5a,0x23,0x17,0x7d,0xb5,0xb9,0x38,0x48,0x5c,0x6f,0x23,0x54,0x0e,0x93,0x74,0x27,0x0f,0xfd,0x58,0xc1,0x97,0x26,0x78,0x9a,0xd3,0x85,0xc5,0xb2,0xb3,0x44,0xb7,0x36,0x85,0x69,0xde,0x3b,0xa1 +.byte 0x2b,0x11,0xef,0x75,0xfc,0xaa,0x92,0xf1,0xf1,0x72,0xa0,0x5f,0x33,0xf6,0x0b,0x72,0xdb,0xce,0x6c,0x2a,0x15,0x76,0x40,0xd4,0x85,0xff,0x96,0xe1,0x48,0xe1,0x27,0x8f,0x74,0xf3,0xfa,0xa1,0xb7,0x2a,0xb6,0x41,0x90,0x92,0x7e,0xfa,0xfc,0xad,0xa3,0x94,0x91,0x77,0xf1,0x8f,0xee,0xa2,0x64,0x47,0x01,0xb3,0x01,0x99,0x05,0xe7,0x31,0x4a +.byte 0xe8,0xd2,0x65,0x40,0x21,0xc4,0x83,0x8e,0xc9,0x89,0xda,0x16,0x7b,0xe0,0xcb,0xc0,0xc0,0x3d,0x37,0x18,0x66,0xe9,0x70,0x86,0x0b,0x6c,0xe8,0x65,0x44,0xce,0x3a,0xcd,0x84,0x1e,0xce,0x0e,0xe3,0xf9,0x77,0x12,0xfb,0xe6,0x92,0x8b,0x0d,0x7e,0x15,0x7a,0x34,0x94,0x2a,0xa7,0xc5,0x35,0xa4,0xfc,0xbe,0xa3,0x13,0x70,0xe4,0x6b,0x2f,0x71 +.byte 0x31,0xef,0xdb,0x79,0x44,0xf2,0x77,0xc7,0xc9,0x0d,0x1a,0x7b,0xff,0x34,0xf8,0xc9,0xe8,0xc9,0xc2,0xe0,0x0c,0x9e,0xd6,0xb4,0x7a,0xdb,0x1f,0x65,0xb8,0xd4,0x92,0xbf,0x7f,0x06,0x44,0xe3,0xb4,0xd8,0x14,0xe3,0x9b,0x49,0x81,0x12,0xec,0x7d,0x01,0xe2,0x50,0x2c,0x0e,0xfd,0x4b,0x84,0x3b,0x4d,0x89,0x1d,0x2e,0x4b,0xe9,0xda,0xa5,0x3f +.byte 0x19,0xc2,0x53,0x36,0x5d,0xd8,0xdc,0x6e,0xc3,0x48,0x8f,0x09,0xd5,0x95,0x4b,0x0c,0x7c,0x00,0x15,0x33,0x8e,0x1d,0x0c,0xdf,0x32,0x3b,0x93,0x1f,0xf5,0x49,0x4f,0xfd,0x8b,0x64,0xe7,0x96,0xaf,0x2f,0xc8,0xea,0xab,0x91,0x53,0x29,0xe3,0x31,0x0a,0x1c,0x6e,0xe0,0xbb,0x81,0x11,0x83,0xe0,0x07,0xfb,0x29,0x11,0x0f,0x0d,0x85,0xd4,0x61 +.byte 0x3c,0x75,0xbb,0x8a,0x23,0xb6,0xa0,0x7f,0xa4,0xbb,0x11,0xd4,0x75,0xde,0x27,0xe5,0xeb,0x11,0x5d,0x02,0xfe,0x5c,0x62,0x60,0x0f,0x6f,0x45,0x9b,0xfb,0xb7,0x32,0xa8,0x1c,0xd6,0xff,0x43,0x7b,0x53,0xee,0xa4,0x1f,0xf2,0xba,0xb6,0xb7,0xb7,0x39,0x18,0x85,0x79,0x77,0x27,0x30,0x26,0xe4,0xef,0xd1,0x39,0xc9,0xa2,0x0d,0x50,0xd7,0xef +.byte 0x9e,0xd8,0x8e,0xd2,0x74,0x1a,0x3f,0x99,0x24,0xf4,0x8b,0x4d,0x02,0x63,0x18,0x3a,0xaf,0x26,0xef,0xfc,0x1d,0xfe,0x46,0xc1,0x55,0xd7,0x92,0x65,0x2f,0xe7,0x4f,0x47,0xa8,0x2f,0x5d,0x47,0x67,0xeb,0x62,0x1d,0x69,0xa6,0x0e,0x51,0x1d,0x2c,0xed,0x6e,0x94,0xe9,0x48,0x4c,0x22,0xc2,0x93,0x79,0x6f,0x1b,0xc2,0x93,0x61,0x3d,0x8b,0xba +.byte 0xcb,0xe9,0x4a,0x88,0x5e,0x19,0x50,0x14,0xfe,0xda,0x3f,0x4d,0x47,0x54,0xfc,0x1c,0x09,0x77,0x37,0x30,0xfe,0x75,0x9f,0xdd,0xa4,0x74,0x04,0x04,0x88,0xe0,0xac,0x93,0x64,0x6f,0xbf,0x50,0xd8,0xf0,0xf7,0xa0,0xfa,0x98,0x49,0xfa,0xf7,0x6e,0xcf,0xa2,0xbf,0xb6,0x07,0x15,0x0e,0x4e,0x21,0x74,0x0a,0xa6,0xa3,0x67,0xce,0xf9,0x3b,0xd6 +.byte 0x4c,0xc8,0x43,0xe3,0x3b,0x3b,0x6a,0x86,0x62,0x3f,0x5a,0xf3,0x3f,0xf9,0xeb,0xbf,0xa3,0x2a,0x83,0x8a,0x70,0x8f,0x01,0x65,0x17,0x9a,0xa6,0x26,0x3b,0x09,0x06,0x22,0x19,0xed,0xd7,0x25,0x4b,0xd2,0x9a,0x30,0xfe,0x1c,0x82,0x68,0x16,0x04,0x0e,0x04,0x8f,0xc6,0x92,0xbe,0xe4,0x43,0x98,0x1d,0x3b,0x10,0x15,0x5b,0xef,0x4e,0x60,0x5e +.byte 0x6b,0xc9,0xde,0xb8,0x47,0x02,0x86,0x45,0x39,0x7a,0x1a,0xef,0x67,0x28,0xc5,0x40,0x73,0x2a,0xa7,0x12,0x9d,0x58,0x3a,0x34,0xc2,0xda,0x34,0xb0,0x48,0xd9,0x34,0xcd,0x18,0xe9,0x76,0x41,0x78,0x8f,0xe5,0xe8,0x3d,0xb2,0x01,0x3b,0x84,0xd1,0xca,0x5e,0x26,0x1d,0x8c,0xea,0xe1,0x46,0xa3,0xf9,0x11,0xac,0x0d,0x98,0x9f,0xd3,0x46,0x79 +.byte 0xff,0xad,0x99,0x32,0x63,0x96,0xbc,0x57,0x39,0x16,0xce,0x06,0x7e,0x63,0x78,0x7b,0x86,0x92,0x1a,0xe1,0x45,0xc0,0x73,0xe1,0xec,0xfc,0x88,0x8f,0xf8,0x36,0x0f,0x54,0x76,0x02,0x98,0x49,0x40,0xb9,0xef,0xd8,0x13,0x68,0xf5,0x1d,0x0a,0x98,0x65,0x21,0xc5,0x1a,0x22,0x4e,0x8e,0xad,0xa9,0x52,0x57,0xc4,0xc6,0xa8,0x48,0x01,0x7a,0x78 +.byte 0xc9,0xfc,0xdd,0xf3,0xc3,0x83,0xc0,0x06,0xb5,0x56,0x84,0xe2,0x0c,0x6b,0x80,0xd9,0x59,0xa1,0x3d,0xe3,0x56,0xf0,0xe3,0x3f,0x93,0x61,0xf7,0x8c,0x6b,0x40,0x65,0x6e,0x01,0xc2,0xa1,0xc1,0xb8,0x9b,0x15,0x6c,0xa1,0x18,0x4a,0x6c,0x8b,0x18,0x2d,0x8e,0x71,0x7a,0xa1,0x26,0xc1,0x4b,0xac,0x0c,0xca,0x08,0x33,0xef,0x35,0x33,0x63,0xeb +.byte 0x57,0x6e,0x7e,0x36,0xe0,0x31,0xad,0x10,0x76,0xb7,0x45,0xd9,0x3a,0x92,0x66,0x69,0x13,0x61,0x59,0x87,0xfd,0x6b,0xf1,0x46,0x0a,0x7a,0x3f,0x29,0x88,0x5b,0x7d,0xef,0x07,0x02,0xa8,0xa1,0xdc,0xd4,0x0e,0x77,0x8f,0x68,0x32,0xbd,0x8e,0xd6,0x0b,0xe4,0xd1,0x75,0xc1,0xb0,0x74,0x6c,0x0e,0xc3,0x46,0x79,0x36,0x3b,0x5f,0x0e,0xa0,0xad +.byte 0x28,0x8c,0xcb,0x01,0x8e,0x58,0x14,0x09,0xf1,0xd4,0x3b,0x2e,0xdc,0xbf,0x37,0x95,0x26,0xda,0xb6,0xcf,0xc8,0xa1,0xd4,0xec,0x72,0xf3,0x44,0xf5,0x4e,0x27,0x9b,0x2e,0x7c,0xfa,0x37,0x16,0x1d,0x7f,0x90,0x86,0xae,0x96,0x3b,0xe1,0xda,0xf7,0xc4,0x54,0x0b,0x51,0x7e,0x83,0xbe,0xed,0xd6,0x5f,0xd2,0x6d,0xbb,0xd3,0xc6,0x53,0x95,0x65 +.byte 0x3d,0x19,0xc2,0xc5,0xdf,0x47,0x00,0x2c,0x4b,0x2d,0xec,0x32,0xd5,0x28,0xb5,0x30,0xe0,0x79,0x15,0x2e,0xab,0x97,0xa8,0xcf,0xc5,0x40,0x98,0x30,0x22,0x9f,0xbc,0xdb,0x65,0x06,0xfc,0x58,0xe5,0x55,0x5b,0xe2,0xf8,0x6e,0xc6,0xfc,0xec,0x6c,0x14,0xd2,0xe3,0x9a,0x71,0x8a,0x61,0xea,0x39,0xc6,0x77,0x94,0xdf,0x7b,0x99,0x71,0xdd,0x18 +.byte 0xc6,0x03,0x2d,0x49,0xf6,0xc3,0xe8,0x2b,0x7e,0x3f,0x28,0xfc,0xc8,0xa1,0xb0,0x15,0x31,0x7e,0x83,0xb8,0x14,0x34,0x0e,0x7f,0xde,0x74,0x7b,0xbf,0xb7,0x8e,0xd9,0x31,0x90,0x16,0xb6,0x57,0x14,0x4a,0xc6,0x67,0x3d,0xb9,0x46,0x92,0xf2,0xf9,0x94,0x36,0x2b,0xd6,0x1f,0x84,0xa5,0x8c,0x0f,0xd9,0x8c,0x5f,0x97,0x7a,0x7b,0xff,0xc9,0xf5 +.byte 0x5e,0x13,0x5f,0x19,0x58,0xba,0xa6,0xe8,0x29,0xf4,0xb8,0x7e,0x98,0xb7,0xef,0x1b,0x00,0xe8,0x90,0x8f,0x86,0x4c,0xe0,0x51,0x13,0x8b,0xa1,0x37,0x40,0x38,0x51,0x2f,0x5a,0x9b,0x63,0x8f,0xce,0x9a,0x97,0x07,0x0d,0x8e,0xce,0xb1,0x66,0x89,0x78,0xca,0xa6,0x0c,0x20,0xc4,0xf1,0xe3,0xab,0xe2,0x1c,0x83,0x2b,0x46,0x97,0xe8,0x8f,0x94 +.byte 0xb4,0x71,0x40,0xde,0xa1,0x05,0x4b,0xed,0xbf,0x0c,0x46,0xe1,0x25,0xf1,0xd0,0x5a,0xdb,0x9c,0x2a,0x09,0x03,0x80,0x24,0xc1,0x22,0x02,0xa5,0xde,0xf6,0x4c,0xbc,0x93,0x37,0xa9,0x28,0xb3,0x92,0x19,0xa8,0x3f,0x71,0x90,0x62,0x78,0xaa,0x9a,0x0c,0xab,0x50,0xaf,0x89,0x2b,0xf1,0xf4,0x12,0xbd,0xc9,0xd5,0xee,0x64,0x8b,0x48,0x21,0xd6 +.byte 0xa1,0xa1,0xf2,0x68,0x4a,0xf8,0x06,0x3e,0x20,0x31,0x66,0xb7,0x2f,0x64,0x01,0x5a,0x46,0x14,0x85,0xfb,0xde,0x04,0xc3,0xe4,0xd6,0x25,0x14,0xa0,0xbe,0x4d,0x39,0xd8,0xe0,0x9b,0xb7,0x6b,0x00,0xe6,0x46,0xfb,0xcc,0xa8,0xad,0x67,0x12,0x2c,0x53,0x2c,0xb6,0x9f,0x6e,0xfe,0xbc,0xcc,0x2c,0xa8,0x09,0x17,0x00,0x8e,0xf1,0xf4,0x3e,0xa9 +.byte 0x92,0x4d,0x83,0xe6,0x3c,0xf0,0xd3,0x1c,0xaf,0x84,0x2c,0x59,0x7e,0xda,0x1e,0xfd,0x7d,0xf3,0xef,0x93,0x05,0x03,0xb0,0x76,0x69,0xb5,0x51,0xa8,0x65,0x8f,0x8a,0xf8,0x55,0x92,0x08,0xfe,0xbf,0xc1,0x95,0x98,0x58,0xb1,0xd3,0xb6,0x78,0x4f,0x2f,0x25,0xcb,0x9d,0x32,0x4f,0xa6,0xcc,0xf8,0x36,0xff,0x72,0xb3,0x93,0x3d,0xd8,0x0b,0xe6 +.byte 0xc6,0xf6,0xed,0xcc,0x2a,0xa5,0x44,0x6e,0xe2,0x2d,0x6e,0x02,0xb4,0x7c,0x24,0x7f,0x57,0x02,0x84,0x61,0x8e,0xbd,0x32,0x4e,0x41,0x92,0x01,0x1b,0x8b,0x1d,0xd1,0x1e,0x31,0xc1,0x4c,0x5b,0x0c,0xa7,0x48,0x52,0x67,0xc2,0xd9,0xdc,0x86,0x9d,0xbd,0x6c,0x19,0x95,0x00,0xf0,0xd4,0x47,0xaf,0xfe,0x5d,0xa5,0x81,0xbd,0x1b,0x42,0x62,0xce +.byte 0x18,0x1b,0xa3,0x6f,0xf5,0x0b,0xb7,0x6a,0x3d,0xe3,0xcc,0x41,0x27,0xcd,0x49,0x4b,0xe5,0x2b,0xc4,0x28,0xfa,0xbe,0xd5,0x7e,0xb7,0xac,0xab,0x64,0x3b,0xe3,0x87,0xb1,0x33,0x8b,0xa8,0xe5,0x75,0xce,0x61,0x57,0x89,0xad,0x5f,0x61,0xdd,0x7c,0x06,0x2a,0x3f,0x50,0xb8,0x7e,0xd2,0xfb,0x32,0x83,0x07,0xd4,0xc5,0x3f,0xad,0x64,0x59,0x1f +.byte 0x21,0x59,0x6f,0x1b,0xd7,0x40,0x89,0x28,0x18,0xac,0xca,0xee,0x92,0x1c,0x0d,0x88,0x98,0x7a,0x75,0x68,0xe0,0xe2,0x96,0xda,0x88,0xb3,0xc6,0x21,0x02,0x34,0xfa,0xae,0x0b,0x38,0xcf,0x1c,0x6c,0x7a,0xc9,0xd9,0x5f,0xf0,0x4c,0x73,0xfd,0xe6,0x14,0xf3,0x39,0xed,0xbc,0x28,0x2f,0xf8,0x79,0x02,0x39,0x05,0xf3,0x6a,0x88,0xd9,0x03,0xe2 +.byte 0xb9,0x65,0x81,0x3a,0x34,0x80,0x3f,0x17,0x37,0x1e,0xe8,0x7d,0x41,0x49,0xfb,0x70,0x5d,0x58,0x3a,0x71,0x7b,0x3e,0xd3,0x83,0x0b,0x1b,0x11,0xfc,0x53,0xce,0xc6,0xc4,0x39,0x55,0xbe,0xbe,0x32,0xa5,0x88,0xab,0xcd,0x38,0x78,0x3e,0x52,0xaf,0x64,0x42,0x10,0xc3,0x70,0x81,0x76,0xe9,0x7d,0x8e,0x46,0x41,0xca,0x2c,0x0c,0x4c,0x30,0xd3 +.byte 0xca,0x38,0xa3,0x97,0x2e,0x0f,0xa5,0x18,0x3b,0xaa,0x0f,0x00,0x75,0x35,0x9c,0xcd,0x28,0x83,0xd4,0xa7,0x7c,0xb9,0xcd,0xb5,0x55,0x29,0x4c,0x14,0xcd,0xfc,0x8f,0xaf,0x7d,0x69,0x4f,0xf7,0x0f,0xed,0x7c,0xa5,0x79,0x9d,0x36,0xbb,0x72,0xbc,0xf2,0x14,0xfd,0xf0,0x04,0x2a,0x89,0x1e,0xf7,0x80,0x4c,0x5e,0xb8,0xc1,0xdb,0xfa,0x3c,0x27 +.byte 0xbb,0x30,0x08,0x2b,0xd2,0xf8,0xdb,0xe0,0x8c,0x00,0xe4,0xca,0xa9,0xde,0xb0,0x14,0x5b,0xec,0x6b,0xe6,0x5c,0x90,0x17,0x02,0x59,0x5f,0x5f,0x51,0xf8,0x30,0x10,0x11,0xc4,0xdf,0x37,0x30,0x32,0xb1,0x4d,0x49,0xfe,0x82,0x87,0xd2,0x42,0xf5,0x38,0x76,0xf9,0xa5,0x28,0xfc,0x14,0xb2,0xe0,0x72,0x82,0xde,0xc8,0x47,0x9e,0x8f,0x8a,0xb5 +.byte 0x85,0x44,0x42,0x12,0xc6,0xc0,0xa5,0x60,0x5a,0x27,0xd0,0x36,0x14,0x7b,0x2a,0x83,0x98,0x92,0x08,0xe9,0x03,0xc9,0xc3,0xd3,0x36,0x97,0xba,0x5e,0xd5,0x51,0xcc,0x44,0xeb,0x81,0x76,0xae,0x28,0x94,0x0b,0xf6,0xc7,0xeb,0xae,0x61,0x6f,0x7b,0x34,0xb5,0x8c,0x5f,0x31,0xb6,0x23,0xe3,0xe7,0x4b,0x60,0xe6,0xba,0x8d,0x0e,0xd1,0xb2,0x37 +.byte 0x72,0x3d,0xc1,0x75,0x9b,0x5e,0xcb,0x0f,0xf9,0xe4,0xdb,0x82,0x4c,0xc4,0x37,0xef,0x9d,0xde,0x16,0x85,0xe9,0xc2,0x03,0xd8,0x5b,0xa1,0xff,0xfa,0xd4,0xd7,0x5c,0x34,0xb6,0x1e,0x25,0x96,0xf5,0x8b,0xc3,0xee,0x16,0x1f,0xf8,0x55,0x4e,0x1c,0x83,0x80,0x77,0x1d,0x4f,0xb6,0x95,0x1c,0x91,0x7d,0x50,0x25,0xf4,0x2a,0x5d,0x2e,0xc7,0x8a +.byte 0x14,0xf8,0xb9,0xbc,0xab,0x5b,0xcd,0x47,0xb5,0xaf,0x85,0xc0,0x34,0x27,0x7d,0x6a,0x8c,0x84,0x8a,0xae,0x68,0x60,0x0e,0xa1,0x45,0xf7,0x83,0x66,0x91,0x69,0x30,0xed,0x26,0x5e,0xf5,0x48,0x6b,0x20,0xb3,0x11,0x50,0xf7,0x70,0x9d,0x10,0x50,0x44,0x87,0xfe,0x96,0x5c,0xc6,0xa4,0xa4,0xed,0x5e,0x7f,0x3d,0x90,0x19,0xbe,0x31,0xa3,0xdd +.byte 0x44,0xbb,0x9b,0x51,0x5a,0x06,0x1d,0x2e,0xd7,0xef,0xd1,0x81,0xb6,0xec,0xc6,0x89,0xfb,0x13,0xc5,0x21,0xef,0x9a,0x1a,0x48,0xf2,0xf8,0xb3,0xa3,0xec,0x7f,0x85,0xc1,0xc6,0x8c,0x5f,0xa9,0x30,0x38,0x25,0x1e,0x8d,0xcf,0x18,0x24,0xef,0x5a,0x9a,0x14,0x31,0xc0,0x2c,0x88,0xa5,0x3f,0x50,0x8b,0xb1,0xda,0x5d,0x26,0xd9,0xd3,0x81,0xb1 +.byte 0xec,0xf0,0x42,0x88,0xd0,0x81,0x51,0xf9,0x1b,0xbc,0x43,0xa4,0x37,0xf1,0xd7,0x90,0x21,0x7e,0xa0,0x3e,0x63,0xfb,0x21,0xfa,0x12,0xfb,0xde,0xc7,0xbf,0xb3,0x58,0xe7,0x76,0x42,0x20,0x01,0x3d,0x66,0x80,0xf1,0xb8,0xaf,0xfa,0x7d,0x96,0x89,0x36,0x48,0x95,0xd9,0x6e,0x6d,0xe6,0x4f,0xff,0x2a,0x47,0x61,0xf2,0x04,0xb7,0x83,0x14,0xce +.byte 0x0a,0x3c,0x73,0x17,0x50,0x88,0x03,0x25,0x4a,0xe3,0x13,0x55,0x8b,0x7e,0x50,0x38,0xfc,0x14,0x0b,0x04,0x8e,0xa8,0x5b,0xd6,0x72,0x20,0x60,0xe9,0xaa,0x22,0x82,0x11,0xc6,0xc4,0xd7,0xb9,0xc8,0x0c,0x7e,0x05,0xfb,0x90,0xe4,0x9c,0x28,0x89,0x29,0x99,0x63,0x4d,0xec,0x7b,0x50,0xbd,0xd8,0xa3,0x5b,0x50,0x77,0x19,0x81,0x92,0xce,0x82 +.size ecp_nistz256_precomputed,.-ecp_nistz256_precomputed +.align 5 +.Lpoly: +.quad 0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xffffffff00000001 +.LRR: // 2^512 mod P precomputed for NIST P256 polynomial +.quad 0x0000000000000003,0xfffffffbffffffff,0xfffffffffffffffe,0x00000004fffffffd +.Lone_mont: +.quad 0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000000fffffffe +.Lone: +.quad 1,0,0,0 +.Lord: +.quad 0xf3b9cac2fc632551,0xbce6faada7179e84,0xffffffffffffffff,0xffffffff00000000 +.LordK: +.quad 0xccd1c8aaee00bc4f +.byte 69,67,80,95,78,73,83,84,90,50,53,54,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 + +.text + +// void ecp_nistz256_to_mont(BN_ULONG x0[4],const BN_ULONG x1[4]); +.globl ecp_nistz256_to_mont +.type ecp_nistz256_to_mont,%function +.align 6 +ecp_nistz256_to_mont: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-32]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + + adrp x3,.LRR + ldr x3,[x3,#:lo12:.LRR] // bp[0] + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + adrp x2,.LRR + add x2,x2,#:lo12:.LRR + + bl __ecp_nistz256_mul_mont + + ldp x19,x20,[sp,#16] + ldp x29,x30,[sp],#32 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_nistz256_to_mont,.-ecp_nistz256_to_mont + +// void ecp_nistz256_from_mont(BN_ULONG x0[4],const BN_ULONG x1[4]); +.globl ecp_nistz256_from_mont +.type ecp_nistz256_from_mont,%function +.align 4 +ecp_nistz256_from_mont: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-32]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + + mov x3,#1 // bp[0] + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + adrp x2,.Lone + add x2,x2,#:lo12:.Lone + + bl __ecp_nistz256_mul_mont + + ldp x19,x20,[sp,#16] + ldp x29,x30,[sp],#32 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_nistz256_from_mont,.-ecp_nistz256_from_mont + +// void ecp_nistz256_mul_mont(BN_ULONG x0[4],const BN_ULONG x1[4], +// const BN_ULONG x2[4]); +.globl ecp_nistz256_mul_mont +.type ecp_nistz256_mul_mont,%function +.align 4 +ecp_nistz256_mul_mont: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-32]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + + ldr x3,[x2] // bp[0] + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + bl __ecp_nistz256_mul_mont + + ldp x19,x20,[sp,#16] + ldp x29,x30,[sp],#32 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont + +// void ecp_nistz256_sqr_mont(BN_ULONG x0[4],const BN_ULONG x1[4]); +.globl ecp_nistz256_sqr_mont +.type ecp_nistz256_sqr_mont,%function +.align 4 +ecp_nistz256_sqr_mont: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-32]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + bl __ecp_nistz256_sqr_mont + + ldp x19,x20,[sp,#16] + ldp x29,x30,[sp],#32 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont + +// void ecp_nistz256_add(BN_ULONG x0[4],const BN_ULONG x1[4], +// const BN_ULONG x2[4]); +.globl ecp_nistz256_add +.type ecp_nistz256_add,%function +.align 4 +ecp_nistz256_add: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ldp x14,x15,[x1] + ldp x8,x9,[x2] + ldp x16,x17,[x1,#16] + ldp x10,x11,[x2,#16] + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + bl __ecp_nistz256_add + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_nistz256_add,.-ecp_nistz256_add + +// void ecp_nistz256_div_by_2(BN_ULONG x0[4],const BN_ULONG x1[4]); +.globl ecp_nistz256_div_by_2 +.type ecp_nistz256_div_by_2,%function +.align 4 +ecp_nistz256_div_by_2: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ldp x14,x15,[x1] + ldp x16,x17,[x1,#16] + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + bl __ecp_nistz256_div_by_2 + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_nistz256_div_by_2,.-ecp_nistz256_div_by_2 + +// void ecp_nistz256_mul_by_2(BN_ULONG x0[4],const BN_ULONG x1[4]); +.globl ecp_nistz256_mul_by_2 +.type ecp_nistz256_mul_by_2,%function +.align 4 +ecp_nistz256_mul_by_2: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ldp x14,x15,[x1] + ldp x16,x17,[x1,#16] + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + mov x8,x14 + mov x9,x15 + mov x10,x16 + mov x11,x17 + + bl __ecp_nistz256_add // ret = a+a // 2*a + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2 + +// void ecp_nistz256_mul_by_3(BN_ULONG x0[4],const BN_ULONG x1[4]); +.globl ecp_nistz256_mul_by_3 +.type ecp_nistz256_mul_by_3,%function +.align 4 +ecp_nistz256_mul_by_3: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ldp x14,x15,[x1] + ldp x16,x17,[x1,#16] + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + mov x8,x14 + mov x9,x15 + mov x10,x16 + mov x11,x17 + mov x4,x14 + mov x5,x15 + mov x6,x16 + mov x7,x17 + + bl __ecp_nistz256_add // ret = a+a // 2*a + + mov x8,x4 + mov x9,x5 + mov x10,x6 + mov x11,x7 + + bl __ecp_nistz256_add // ret += a // 2*a+a=3*a + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_nistz256_mul_by_3,.-ecp_nistz256_mul_by_3 + +// void ecp_nistz256_sub(BN_ULONG x0[4],const BN_ULONG x1[4], +// const BN_ULONG x2[4]); +.globl ecp_nistz256_sub +.type ecp_nistz256_sub,%function +.align 4 +ecp_nistz256_sub: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ldp x14,x15,[x1] + ldp x16,x17,[x1,#16] + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + bl __ecp_nistz256_sub_from + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_nistz256_sub,.-ecp_nistz256_sub + +// void ecp_nistz256_neg(BN_ULONG x0[4],const BN_ULONG x1[4]); +.globl ecp_nistz256_neg +.type ecp_nistz256_neg,%function +.align 4 +ecp_nistz256_neg: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + mov x2,x1 + mov x14,xzr // a = 0 + mov x15,xzr + mov x16,xzr + mov x17,xzr + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + bl __ecp_nistz256_sub_from + + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_nistz256_neg,.-ecp_nistz256_neg + +// note that __ecp_nistz256_mul_mont expects a[0-3] input pre-loaded +// to x4-x7 and b[0] - to x3 +.type __ecp_nistz256_mul_mont,%function +.align 4 +__ecp_nistz256_mul_mont: + mul x14,x4,x3 // a[0]*b[0] + umulh x8,x4,x3 + + mul x15,x5,x3 // a[1]*b[0] + umulh x9,x5,x3 + + mul x16,x6,x3 // a[2]*b[0] + umulh x10,x6,x3 + + mul x17,x7,x3 // a[3]*b[0] + umulh x11,x7,x3 + ldr x3,[x2,#8] // b[1] + + adds x15,x15,x8 // accumulate high parts of multiplication + lsl x8,x14,#32 + adcs x16,x16,x9 + lsr x9,x14,#32 + adcs x17,x17,x10 + adc x19,xzr,x11 + mov x20,xzr + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + mul x8,x4,x3 // lo(a[0]*b[i]) + adcs x15,x16,x9 + mul x9,x5,x3 // lo(a[1]*b[i]) + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + mul x10,x6,x3 // lo(a[2]*b[i]) + adcs x17,x19,x11 + mul x11,x7,x3 // lo(a[3]*b[i]) + adc x19,x20,xzr + + adds x14,x14,x8 // accumulate low parts of multiplication + umulh x8,x4,x3 // hi(a[0]*b[i]) + adcs x15,x15,x9 + umulh x9,x5,x3 // hi(a[1]*b[i]) + adcs x16,x16,x10 + umulh x10,x6,x3 // hi(a[2]*b[i]) + adcs x17,x17,x11 + umulh x11,x7,x3 // hi(a[3]*b[i]) + adc x19,x19,xzr + ldr x3,[x2,#8*(1+1)] // b[1+1] + adds x15,x15,x8 // accumulate high parts of multiplication + lsl x8,x14,#32 + adcs x16,x16,x9 + lsr x9,x14,#32 + adcs x17,x17,x10 + adcs x19,x19,x11 + adc x20,xzr,xzr + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + mul x8,x4,x3 // lo(a[0]*b[i]) + adcs x15,x16,x9 + mul x9,x5,x3 // lo(a[1]*b[i]) + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + mul x10,x6,x3 // lo(a[2]*b[i]) + adcs x17,x19,x11 + mul x11,x7,x3 // lo(a[3]*b[i]) + adc x19,x20,xzr + + adds x14,x14,x8 // accumulate low parts of multiplication + umulh x8,x4,x3 // hi(a[0]*b[i]) + adcs x15,x15,x9 + umulh x9,x5,x3 // hi(a[1]*b[i]) + adcs x16,x16,x10 + umulh x10,x6,x3 // hi(a[2]*b[i]) + adcs x17,x17,x11 + umulh x11,x7,x3 // hi(a[3]*b[i]) + adc x19,x19,xzr + ldr x3,[x2,#8*(2+1)] // b[2+1] + adds x15,x15,x8 // accumulate high parts of multiplication + lsl x8,x14,#32 + adcs x16,x16,x9 + lsr x9,x14,#32 + adcs x17,x17,x10 + adcs x19,x19,x11 + adc x20,xzr,xzr + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + mul x8,x4,x3 // lo(a[0]*b[i]) + adcs x15,x16,x9 + mul x9,x5,x3 // lo(a[1]*b[i]) + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + mul x10,x6,x3 // lo(a[2]*b[i]) + adcs x17,x19,x11 + mul x11,x7,x3 // lo(a[3]*b[i]) + adc x19,x20,xzr + + adds x14,x14,x8 // accumulate low parts of multiplication + umulh x8,x4,x3 // hi(a[0]*b[i]) + adcs x15,x15,x9 + umulh x9,x5,x3 // hi(a[1]*b[i]) + adcs x16,x16,x10 + umulh x10,x6,x3 // hi(a[2]*b[i]) + adcs x17,x17,x11 + umulh x11,x7,x3 // hi(a[3]*b[i]) + adc x19,x19,xzr + adds x15,x15,x8 // accumulate high parts of multiplication + lsl x8,x14,#32 + adcs x16,x16,x9 + lsr x9,x14,#32 + adcs x17,x17,x10 + adcs x19,x19,x11 + adc x20,xzr,xzr + // last reduction + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + adcs x15,x16,x9 + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + adcs x17,x19,x11 + adc x19,x20,xzr + + adds x8,x14,#1 // subs x8,x14,#-1 // tmp = ret-modulus + sbcs x9,x15,x12 + sbcs x10,x16,xzr + sbcs x11,x17,x13 + sbcs xzr,x19,xzr // did it borrow? + + csel x14,x14,x8,lo // ret = borrow ? ret : ret-modulus + csel x15,x15,x9,lo + csel x16,x16,x10,lo + stp x14,x15,[x0] + csel x17,x17,x11,lo + stp x16,x17,[x0,#16] + + ret +.size __ecp_nistz256_mul_mont,.-__ecp_nistz256_mul_mont + +// note that __ecp_nistz256_sqr_mont expects a[0-3] input pre-loaded +// to x4-x7 +.type __ecp_nistz256_sqr_mont,%function +.align 4 +__ecp_nistz256_sqr_mont: + // | | | | | |a1*a0| | + // | | | | |a2*a0| | | + // | |a3*a2|a3*a0| | | | + // | | | |a2*a1| | | | + // | | |a3*a1| | | | | + // *| | | | | | | | 2| + // +|a3*a3|a2*a2|a1*a1|a0*a0| + // |--+--+--+--+--+--+--+--| + // |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is , i.e. follow + // + // "can't overflow" below mark carrying into high part of + // multiplication result, which can't overflow, because it + // can never be all ones. + + mul x15,x5,x4 // a[1]*a[0] + umulh x9,x5,x4 + mul x16,x6,x4 // a[2]*a[0] + umulh x10,x6,x4 + mul x17,x7,x4 // a[3]*a[0] + umulh x19,x7,x4 + + adds x16,x16,x9 // accumulate high parts of multiplication + mul x8,x6,x5 // a[2]*a[1] + umulh x9,x6,x5 + adcs x17,x17,x10 + mul x10,x7,x5 // a[3]*a[1] + umulh x11,x7,x5 + adc x19,x19,xzr // can't overflow + + mul x20,x7,x6 // a[3]*a[2] + umulh x1,x7,x6 + + adds x9,x9,x10 // accumulate high parts of multiplication + mul x14,x4,x4 // a[0]*a[0] + adc x10,x11,xzr // can't overflow + + adds x17,x17,x8 // accumulate low parts of multiplication + umulh x4,x4,x4 + adcs x19,x19,x9 + mul x9,x5,x5 // a[1]*a[1] + adcs x20,x20,x10 + umulh x5,x5,x5 + adc x1,x1,xzr // can't overflow + + adds x15,x15,x15 // acc[1-6]*=2 + mul x10,x6,x6 // a[2]*a[2] + adcs x16,x16,x16 + umulh x6,x6,x6 + adcs x17,x17,x17 + mul x11,x7,x7 // a[3]*a[3] + adcs x19,x19,x19 + umulh x7,x7,x7 + adcs x20,x20,x20 + adcs x1,x1,x1 + adc x2,xzr,xzr + + adds x15,x15,x4 // +a[i]*a[i] + adcs x16,x16,x9 + adcs x17,x17,x5 + adcs x19,x19,x10 + adcs x20,x20,x6 + lsl x8,x14,#32 + adcs x1,x1,x11 + lsr x9,x14,#32 + adc x2,x2,x7 + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + adcs x15,x16,x9 + lsl x8,x14,#32 + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + lsr x9,x14,#32 + adc x17,x11,xzr // can't overflow + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + adcs x15,x16,x9 + lsl x8,x14,#32 + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + lsr x9,x14,#32 + adc x17,x11,xzr // can't overflow + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + adcs x15,x16,x9 + lsl x8,x14,#32 + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + lsr x9,x14,#32 + adc x17,x11,xzr // can't overflow + subs x10,x14,x8 // "*0xffff0001" + sbc x11,x14,x9 + adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] + adcs x15,x16,x9 + adcs x16,x17,x10 // +=acc[0]*0xffff0001 + adc x17,x11,xzr // can't overflow + + adds x14,x14,x19 // accumulate upper half + adcs x15,x15,x20 + adcs x16,x16,x1 + adcs x17,x17,x2 + adc x19,xzr,xzr + + adds x8,x14,#1 // subs x8,x14,#-1 // tmp = ret-modulus + sbcs x9,x15,x12 + sbcs x10,x16,xzr + sbcs x11,x17,x13 + sbcs xzr,x19,xzr // did it borrow? + + csel x14,x14,x8,lo // ret = borrow ? ret : ret-modulus + csel x15,x15,x9,lo + csel x16,x16,x10,lo + stp x14,x15,[x0] + csel x17,x17,x11,lo + stp x16,x17,[x0,#16] + + ret +.size __ecp_nistz256_sqr_mont,.-__ecp_nistz256_sqr_mont + +// Note that __ecp_nistz256_add expects both input vectors pre-loaded to +// x4-x7 and x8-x11. This is done because it's used in multiple +// contexts, e.g. in multiplication by 2 and 3... +.type __ecp_nistz256_add,%function +.align 4 +__ecp_nistz256_add: + adds x14,x14,x8 // ret = a+b + adcs x15,x15,x9 + adcs x16,x16,x10 + adcs x17,x17,x11 + adc x1,xzr,xzr // zap x1 + + adds x8,x14,#1 // subs x8,x4,#-1 // tmp = ret-modulus + sbcs x9,x15,x12 + sbcs x10,x16,xzr + sbcs x11,x17,x13 + sbcs xzr,x1,xzr // did subtraction borrow? + + csel x14,x14,x8,lo // ret = borrow ? ret : ret-modulus + csel x15,x15,x9,lo + csel x16,x16,x10,lo + stp x14,x15,[x0] + csel x17,x17,x11,lo + stp x16,x17,[x0,#16] + + ret +.size __ecp_nistz256_add,.-__ecp_nistz256_add + +.type __ecp_nistz256_sub_from,%function +.align 4 +__ecp_nistz256_sub_from: + ldp x8,x9,[x2] + ldp x10,x11,[x2,#16] + subs x14,x14,x8 // ret = a-b + sbcs x15,x15,x9 + sbcs x16,x16,x10 + sbcs x17,x17,x11 + sbc x1,xzr,xzr // zap x1 + + subs x8,x14,#1 // adds x8,x4,#-1 // tmp = ret+modulus + adcs x9,x15,x12 + adcs x10,x16,xzr + adc x11,x17,x13 + cmp x1,xzr // did subtraction borrow? + + csel x14,x14,x8,eq // ret = borrow ? ret+modulus : ret + csel x15,x15,x9,eq + csel x16,x16,x10,eq + stp x14,x15,[x0] + csel x17,x17,x11,eq + stp x16,x17,[x0,#16] + + ret +.size __ecp_nistz256_sub_from,.-__ecp_nistz256_sub_from + +.type __ecp_nistz256_sub_morf,%function +.align 4 +__ecp_nistz256_sub_morf: + ldp x8,x9,[x2] + ldp x10,x11,[x2,#16] + subs x14,x8,x14 // ret = b-a + sbcs x15,x9,x15 + sbcs x16,x10,x16 + sbcs x17,x11,x17 + sbc x1,xzr,xzr // zap x1 + + subs x8,x14,#1 // adds x8,x4,#-1 // tmp = ret+modulus + adcs x9,x15,x12 + adcs x10,x16,xzr + adc x11,x17,x13 + cmp x1,xzr // did subtraction borrow? + + csel x14,x14,x8,eq // ret = borrow ? ret+modulus : ret + csel x15,x15,x9,eq + csel x16,x16,x10,eq + stp x14,x15,[x0] + csel x17,x17,x11,eq + stp x16,x17,[x0,#16] + + ret +.size __ecp_nistz256_sub_morf,.-__ecp_nistz256_sub_morf + +.type __ecp_nistz256_div_by_2,%function +.align 4 +__ecp_nistz256_div_by_2: + subs x8,x14,#1 // adds x8,x4,#-1 // tmp = a+modulus + adcs x9,x15,x12 + adcs x10,x16,xzr + adcs x11,x17,x13 + adc x1,xzr,xzr // zap x1 + tst x14,#1 // is a even? + + csel x14,x14,x8,eq // ret = even ? a : a+modulus + csel x15,x15,x9,eq + csel x16,x16,x10,eq + csel x17,x17,x11,eq + csel x1,xzr,x1,eq + + lsr x14,x14,#1 // ret >>= 1 + orr x14,x14,x15,lsl#63 + lsr x15,x15,#1 + orr x15,x15,x16,lsl#63 + lsr x16,x16,#1 + orr x16,x16,x17,lsl#63 + lsr x17,x17,#1 + stp x14,x15,[x0] + orr x17,x17,x1,lsl#63 + stp x16,x17,[x0,#16] + + ret +.size __ecp_nistz256_div_by_2,.-__ecp_nistz256_div_by_2 +.globl ecp_nistz256_point_double +.type ecp_nistz256_point_double,%function +.align 5 +ecp_nistz256_point_double: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + sub sp,sp,#32*4 + +.Ldouble_shortcut: + ldp x14,x15,[x1,#32] + mov x21,x0 + ldp x16,x17,[x1,#48] + mov x22,x1 + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + mov x8,x14 + ldr x13,[x13,#24] + mov x9,x15 + ldp x4,x5,[x22,#64] // forward load for p256_sqr_mont + mov x10,x16 + mov x11,x17 + ldp x6,x7,[x22,#64+16] + add x0,sp,#0 + bl __ecp_nistz256_add // p256_mul_by_2(S, in_y); + + add x0,sp,#64 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Zsqr, in_z); + + ldp x8,x9,[x22] + ldp x10,x11,[x22,#16] + mov x4,x14 // put Zsqr aside for p256_sub + mov x5,x15 + mov x6,x16 + mov x7,x17 + add x0,sp,#32 + bl __ecp_nistz256_add // p256_add(M, Zsqr, in_x); + + add x2,x22,#0 + mov x14,x4 // restore Zsqr + mov x15,x5 + ldp x4,x5,[sp,#0] // forward load for p256_sqr_mont + mov x16,x6 + mov x17,x7 + ldp x6,x7,[sp,#0+16] + add x0,sp,#64 + bl __ecp_nistz256_sub_morf // p256_sub(Zsqr, in_x, Zsqr); + + add x0,sp,#0 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(S, S); + + ldr x3,[x22,#32] + ldp x4,x5,[x22,#64] + ldp x6,x7,[x22,#64+16] + add x2,x22,#32 + add x0,sp,#96 + bl __ecp_nistz256_mul_mont // p256_mul_mont(tmp0, in_z, in_y); + + mov x8,x14 + mov x9,x15 + ldp x4,x5,[sp,#0] // forward load for p256_sqr_mont + mov x10,x16 + mov x11,x17 + ldp x6,x7,[sp,#0+16] + add x0,x21,#64 + bl __ecp_nistz256_add // p256_mul_by_2(res_z, tmp0); + + add x0,sp,#96 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(tmp0, S); + + ldr x3,[sp,#64] // forward load for p256_mul_mont + ldp x4,x5,[sp,#32] + ldp x6,x7,[sp,#32+16] + add x0,x21,#32 + bl __ecp_nistz256_div_by_2 // p256_div_by_2(res_y, tmp0); + + add x2,sp,#64 + add x0,sp,#32 + bl __ecp_nistz256_mul_mont // p256_mul_mont(M, M, Zsqr); + + mov x8,x14 // duplicate M + mov x9,x15 + mov x10,x16 + mov x11,x17 + mov x4,x14 // put M aside + mov x5,x15 + mov x6,x16 + mov x7,x17 + add x0,sp,#32 + bl __ecp_nistz256_add + mov x8,x4 // restore M + mov x9,x5 + ldr x3,[x22] // forward load for p256_mul_mont + mov x10,x6 + ldp x4,x5,[sp,#0] + mov x11,x7 + ldp x6,x7,[sp,#0+16] + bl __ecp_nistz256_add // p256_mul_by_3(M, M); + + add x2,x22,#0 + add x0,sp,#0 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S, S, in_x); + + mov x8,x14 + mov x9,x15 + ldp x4,x5,[sp,#32] // forward load for p256_sqr_mont + mov x10,x16 + mov x11,x17 + ldp x6,x7,[sp,#32+16] + add x0,sp,#96 + bl __ecp_nistz256_add // p256_mul_by_2(tmp0, S); + + add x0,x21,#0 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(res_x, M); + + add x2,sp,#96 + bl __ecp_nistz256_sub_from // p256_sub(res_x, res_x, tmp0); + + add x2,sp,#0 + add x0,sp,#0 + bl __ecp_nistz256_sub_morf // p256_sub(S, S, res_x); + + ldr x3,[sp,#32] + mov x4,x14 // copy S + mov x5,x15 + mov x6,x16 + mov x7,x17 + add x2,sp,#32 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S, S, M); + + add x2,x21,#32 + add x0,x21,#32 + bl __ecp_nistz256_sub_from // p256_sub(res_y, S, res_y); + + add sp,x29,#0 // destroy frame + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_nistz256_point_double,.-ecp_nistz256_point_double +.globl ecp_nistz256_point_add +.type ecp_nistz256_point_add,%function +.align 5 +ecp_nistz256_point_add: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#32*12 + + ldp x4,x5,[x2,#64] // in2_z + ldp x6,x7,[x2,#64+16] + mov x21,x0 + mov x22,x1 + mov x23,x2 + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + orr x8,x4,x5 + orr x10,x6,x7 + orr x25,x8,x10 + cmp x25,#0 + csetm x25,ne // ~in2infty + add x0,sp,#192 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z2sqr, in2_z); + + ldp x4,x5,[x22,#64] // in1_z + ldp x6,x7,[x22,#64+16] + orr x8,x4,x5 + orr x10,x6,x7 + orr x24,x8,x10 + cmp x24,#0 + csetm x24,ne // ~in1infty + add x0,sp,#128 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z); + + ldr x3,[x23,#64] + ldp x4,x5,[sp,#192] + ldp x6,x7,[sp,#192+16] + add x2,x23,#64 + add x0,sp,#320 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S1, Z2sqr, in2_z); + + ldr x3,[x22,#64] + ldp x4,x5,[sp,#128] + ldp x6,x7,[sp,#128+16] + add x2,x22,#64 + add x0,sp,#352 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, Z1sqr, in1_z); + + ldr x3,[x22,#32] + ldp x4,x5,[sp,#320] + ldp x6,x7,[sp,#320+16] + add x2,x22,#32 + add x0,sp,#320 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S1, S1, in1_y); + + ldr x3,[x23,#32] + ldp x4,x5,[sp,#352] + ldp x6,x7,[sp,#352+16] + add x2,x23,#32 + add x0,sp,#352 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, S2, in2_y); + + add x2,sp,#320 + ldr x3,[sp,#192] // forward load for p256_mul_mont + ldp x4,x5,[x22] + ldp x6,x7,[x22,#16] + add x0,sp,#160 + bl __ecp_nistz256_sub_from // p256_sub(R, S2, S1); + + orr x14,x14,x15 // see if result is zero + orr x16,x16,x17 + orr x26,x14,x16 // ~is_equal(S1,S2) + + add x2,sp,#192 + add x0,sp,#256 + bl __ecp_nistz256_mul_mont // p256_mul_mont(U1, in1_x, Z2sqr); + + ldr x3,[sp,#128] + ldp x4,x5,[x23] + ldp x6,x7,[x23,#16] + add x2,sp,#128 + add x0,sp,#288 + bl __ecp_nistz256_mul_mont // p256_mul_mont(U2, in2_x, Z1sqr); + + add x2,sp,#256 + ldp x4,x5,[sp,#160] // forward load for p256_sqr_mont + ldp x6,x7,[sp,#160+16] + add x0,sp,#96 + bl __ecp_nistz256_sub_from // p256_sub(H, U2, U1); + + orr x14,x14,x15 // see if result is zero + orr x16,x16,x17 + orr x14,x14,x16 // ~is_equal(U1,U2) + + mvn x27,x24 // -1/0 -> 0/-1 + mvn x28,x25 // -1/0 -> 0/-1 + orr x14,x14,x27 + orr x14,x14,x28 + orr x14,x14,x26 + cbnz x14,.Ladd_proceed // if(~is_equal(U1,U2) | in1infty | in2infty | ~is_equal(S1,S2)) + +.Ladd_double: + mov x1,x22 + mov x0,x21 + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + add sp,sp,#32*(12-4) // difference in stack frames + b .Ldouble_shortcut + +.align 4 +.Ladd_proceed: + add x0,sp,#192 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Rsqr, R); + + ldr x3,[x22,#64] + ldp x4,x5,[sp,#96] + ldp x6,x7,[sp,#96+16] + add x2,x22,#64 + add x0,sp,#64 + bl __ecp_nistz256_mul_mont // p256_mul_mont(res_z, H, in1_z); + + ldp x4,x5,[sp,#96] + ldp x6,x7,[sp,#96+16] + add x0,sp,#128 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Hsqr, H); + + ldr x3,[x23,#64] + ldp x4,x5,[sp,#64] + ldp x6,x7,[sp,#64+16] + add x2,x23,#64 + add x0,sp,#64 + bl __ecp_nistz256_mul_mont // p256_mul_mont(res_z, res_z, in2_z); + + ldr x3,[sp,#96] + ldp x4,x5,[sp,#128] + ldp x6,x7,[sp,#128+16] + add x2,sp,#96 + add x0,sp,#224 + bl __ecp_nistz256_mul_mont // p256_mul_mont(Hcub, Hsqr, H); + + ldr x3,[sp,#128] + ldp x4,x5,[sp,#256] + ldp x6,x7,[sp,#256+16] + add x2,sp,#128 + add x0,sp,#288 + bl __ecp_nistz256_mul_mont // p256_mul_mont(U2, U1, Hsqr); + + mov x8,x14 + mov x9,x15 + mov x10,x16 + mov x11,x17 + add x0,sp,#128 + bl __ecp_nistz256_add // p256_mul_by_2(Hsqr, U2); + + add x2,sp,#192 + add x0,sp,#0 + bl __ecp_nistz256_sub_morf // p256_sub(res_x, Rsqr, Hsqr); + + add x2,sp,#224 + bl __ecp_nistz256_sub_from // p256_sub(res_x, res_x, Hcub); + + add x2,sp,#288 + ldr x3,[sp,#224] // forward load for p256_mul_mont + ldp x4,x5,[sp,#320] + ldp x6,x7,[sp,#320+16] + add x0,sp,#32 + bl __ecp_nistz256_sub_morf // p256_sub(res_y, U2, res_x); + + add x2,sp,#224 + add x0,sp,#352 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, S1, Hcub); + + ldr x3,[sp,#160] + ldp x4,x5,[sp,#32] + ldp x6,x7,[sp,#32+16] + add x2,sp,#160 + add x0,sp,#32 + bl __ecp_nistz256_mul_mont // p256_mul_mont(res_y, res_y, R); + + add x2,sp,#352 + bl __ecp_nistz256_sub_from // p256_sub(res_y, res_y, S2); + + ldp x4,x5,[sp,#0] // res + ldp x6,x7,[sp,#0+16] + ldp x8,x9,[x23] // in2 + ldp x10,x11,[x23,#16] + ldp x14,x15,[x22,#0] // in1 + cmp x24,#0 // ~, remember? + ldp x16,x17,[x22,#0+16] + csel x8,x4,x8,ne + csel x9,x5,x9,ne + ldp x4,x5,[sp,#0+0+32] // res + csel x10,x6,x10,ne + csel x11,x7,x11,ne + cmp x25,#0 // ~, remember? + ldp x6,x7,[sp,#0+0+48] + csel x14,x8,x14,ne + csel x15,x9,x15,ne + ldp x8,x9,[x23,#0+32] // in2 + csel x16,x10,x16,ne + csel x17,x11,x17,ne + ldp x10,x11,[x23,#0+48] + stp x14,x15,[x21,#0] + stp x16,x17,[x21,#0+16] + ldp x14,x15,[x22,#32] // in1 + cmp x24,#0 // ~, remember? + ldp x16,x17,[x22,#32+16] + csel x8,x4,x8,ne + csel x9,x5,x9,ne + ldp x4,x5,[sp,#0+32+32] // res + csel x10,x6,x10,ne + csel x11,x7,x11,ne + cmp x25,#0 // ~, remember? + ldp x6,x7,[sp,#0+32+48] + csel x14,x8,x14,ne + csel x15,x9,x15,ne + ldp x8,x9,[x23,#32+32] // in2 + csel x16,x10,x16,ne + csel x17,x11,x17,ne + ldp x10,x11,[x23,#32+48] + stp x14,x15,[x21,#32] + stp x16,x17,[x21,#32+16] + ldp x14,x15,[x22,#64] // in1 + cmp x24,#0 // ~, remember? + ldp x16,x17,[x22,#64+16] + csel x8,x4,x8,ne + csel x9,x5,x9,ne + csel x10,x6,x10,ne + csel x11,x7,x11,ne + cmp x25,#0 // ~, remember? + csel x14,x8,x14,ne + csel x15,x9,x15,ne + csel x16,x10,x16,ne + csel x17,x11,x17,ne + stp x14,x15,[x21,#64] + stp x16,x17,[x21,#64+16] + +.Ladd_done: + add sp,x29,#0 // destroy frame + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#96 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_nistz256_point_add,.-ecp_nistz256_point_add +.globl ecp_nistz256_point_add_affine +.type ecp_nistz256_point_add_affine,%function +.align 5 +ecp_nistz256_point_add_affine: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-80]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + sub sp,sp,#32*10 + + mov x21,x0 + mov x22,x1 + mov x23,x2 + adrp x13,.Lpoly + add x13,x13,#:lo12:.Lpoly + ldr x12,[x13,#8] + ldr x13,[x13,#24] + + ldp x4,x5,[x1,#64] // in1_z + ldp x6,x7,[x1,#64+16] + orr x8,x4,x5 + orr x10,x6,x7 + orr x24,x8,x10 + cmp x24,#0 + csetm x24,ne // ~in1infty + + ldp x14,x15,[x2] // in2_x + ldp x16,x17,[x2,#16] + ldp x8,x9,[x2,#32] // in2_y + ldp x10,x11,[x2,#48] + orr x14,x14,x15 + orr x16,x16,x17 + orr x8,x8,x9 + orr x10,x10,x11 + orr x14,x14,x16 + orr x8,x8,x10 + orr x25,x14,x8 + cmp x25,#0 + csetm x25,ne // ~in2infty + + add x0,sp,#128 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z); + + mov x4,x14 + mov x5,x15 + mov x6,x16 + mov x7,x17 + ldr x3,[x23] + add x2,x23,#0 + add x0,sp,#96 + bl __ecp_nistz256_mul_mont // p256_mul_mont(U2, Z1sqr, in2_x); + + add x2,x22,#0 + ldr x3,[x22,#64] // forward load for p256_mul_mont + ldp x4,x5,[sp,#128] + ldp x6,x7,[sp,#128+16] + add x0,sp,#160 + bl __ecp_nistz256_sub_from // p256_sub(H, U2, in1_x); + + add x2,x22,#64 + add x0,sp,#128 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, Z1sqr, in1_z); + + ldr x3,[x22,#64] + ldp x4,x5,[sp,#160] + ldp x6,x7,[sp,#160+16] + add x2,x22,#64 + add x0,sp,#64 + bl __ecp_nistz256_mul_mont // p256_mul_mont(res_z, H, in1_z); + + ldr x3,[x23,#32] + ldp x4,x5,[sp,#128] + ldp x6,x7,[sp,#128+16] + add x2,x23,#32 + add x0,sp,#128 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, S2, in2_y); + + add x2,x22,#32 + ldp x4,x5,[sp,#160] // forward load for p256_sqr_mont + ldp x6,x7,[sp,#160+16] + add x0,sp,#192 + bl __ecp_nistz256_sub_from // p256_sub(R, S2, in1_y); + + add x0,sp,#224 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Hsqr, H); + + ldp x4,x5,[sp,#192] + ldp x6,x7,[sp,#192+16] + add x0,sp,#288 + bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Rsqr, R); + + ldr x3,[sp,#160] + ldp x4,x5,[sp,#224] + ldp x6,x7,[sp,#224+16] + add x2,sp,#160 + add x0,sp,#256 + bl __ecp_nistz256_mul_mont // p256_mul_mont(Hcub, Hsqr, H); + + ldr x3,[x22] + ldp x4,x5,[sp,#224] + ldp x6,x7,[sp,#224+16] + add x2,x22,#0 + add x0,sp,#96 + bl __ecp_nistz256_mul_mont // p256_mul_mont(U2, in1_x, Hsqr); + + mov x8,x14 + mov x9,x15 + mov x10,x16 + mov x11,x17 + add x0,sp,#224 + bl __ecp_nistz256_add // p256_mul_by_2(Hsqr, U2); + + add x2,sp,#288 + add x0,sp,#0 + bl __ecp_nistz256_sub_morf // p256_sub(res_x, Rsqr, Hsqr); + + add x2,sp,#256 + bl __ecp_nistz256_sub_from // p256_sub(res_x, res_x, Hcub); + + add x2,sp,#96 + ldr x3,[x22,#32] // forward load for p256_mul_mont + ldp x4,x5,[sp,#256] + ldp x6,x7,[sp,#256+16] + add x0,sp,#32 + bl __ecp_nistz256_sub_morf // p256_sub(res_y, U2, res_x); + + add x2,x22,#32 + add x0,sp,#128 + bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, in1_y, Hcub); + + ldr x3,[sp,#192] + ldp x4,x5,[sp,#32] + ldp x6,x7,[sp,#32+16] + add x2,sp,#192 + add x0,sp,#32 + bl __ecp_nistz256_mul_mont // p256_mul_mont(res_y, res_y, R); + + add x2,sp,#128 + bl __ecp_nistz256_sub_from // p256_sub(res_y, res_y, S2); + + ldp x4,x5,[sp,#0] // res + ldp x6,x7,[sp,#0+16] + ldp x8,x9,[x23] // in2 + ldp x10,x11,[x23,#16] + ldp x14,x15,[x22,#0] // in1 + cmp x24,#0 // ~, remember? + ldp x16,x17,[x22,#0+16] + csel x8,x4,x8,ne + csel x9,x5,x9,ne + ldp x4,x5,[sp,#0+0+32] // res + csel x10,x6,x10,ne + csel x11,x7,x11,ne + cmp x25,#0 // ~, remember? + ldp x6,x7,[sp,#0+0+48] + csel x14,x8,x14,ne + csel x15,x9,x15,ne + ldp x8,x9,[x23,#0+32] // in2 + csel x16,x10,x16,ne + csel x17,x11,x17,ne + ldp x10,x11,[x23,#0+48] + stp x14,x15,[x21,#0] + stp x16,x17,[x21,#0+16] + adrp x23,.Lone_mont-64 + add x23,x23,#:lo12:.Lone_mont-64 + ldp x14,x15,[x22,#32] // in1 + cmp x24,#0 // ~, remember? + ldp x16,x17,[x22,#32+16] + csel x8,x4,x8,ne + csel x9,x5,x9,ne + ldp x4,x5,[sp,#0+32+32] // res + csel x10,x6,x10,ne + csel x11,x7,x11,ne + cmp x25,#0 // ~, remember? + ldp x6,x7,[sp,#0+32+48] + csel x14,x8,x14,ne + csel x15,x9,x15,ne + ldp x8,x9,[x23,#32+32] // in2 + csel x16,x10,x16,ne + csel x17,x11,x17,ne + ldp x10,x11,[x23,#32+48] + stp x14,x15,[x21,#32] + stp x16,x17,[x21,#32+16] + ldp x14,x15,[x22,#64] // in1 + cmp x24,#0 // ~, remember? + ldp x16,x17,[x22,#64+16] + csel x8,x4,x8,ne + csel x9,x5,x9,ne + csel x10,x6,x10,ne + csel x11,x7,x11,ne + cmp x25,#0 // ~, remember? + csel x14,x8,x14,ne + csel x15,x9,x15,ne + csel x16,x10,x16,ne + csel x17,x11,x17,ne + stp x14,x15,[x21,#64] + stp x16,x17,[x21,#64+16] + + add sp,x29,#0 // destroy frame + ldp x19,x20,[x29,#16] + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x29,x30,[sp],#80 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine +//////////////////////////////////////////////////////////////////////// +// void ecp_nistz256_ord_mul_mont(uint64_t res[4], uint64_t a[4], +// uint64_t b[4]); +.globl ecp_nistz256_ord_mul_mont +.type ecp_nistz256_ord_mul_mont,%function +.align 4 +ecp_nistz256_ord_mul_mont: + AARCH64_VALID_CALL_TARGET + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + adrp x23,.Lord + add x23,x23,#:lo12:.Lord + ldr x3,[x2] // bp[0] + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + + ldp x12,x13,[x23,#0] + ldp x21,x22,[x23,#16] + ldr x23,[x23,#32] + + mul x14,x4,x3 // a[0]*b[0] + umulh x8,x4,x3 + + mul x15,x5,x3 // a[1]*b[0] + umulh x9,x5,x3 + + mul x16,x6,x3 // a[2]*b[0] + umulh x10,x6,x3 + + mul x17,x7,x3 // a[3]*b[0] + umulh x19,x7,x3 + + mul x24,x14,x23 + + adds x15,x15,x8 // accumulate high parts of multiplication + adcs x16,x16,x9 + adcs x17,x17,x10 + adc x19,x19,xzr + mov x20,xzr + ldr x3,[x2,#8*1] // b[i] + + lsl x8,x24,#32 + subs x16,x16,x24 + lsr x9,x24,#32 + sbcs x17,x17,x8 + sbcs x19,x19,x9 + sbc x20,x20,xzr + + subs xzr,x14,#1 + umulh x9,x12,x24 + mul x10,x13,x24 + umulh x11,x13,x24 + + adcs x10,x10,x9 + mul x8,x4,x3 + adc x11,x11,xzr + mul x9,x5,x3 + + adds x14,x15,x10 + mul x10,x6,x3 + adcs x15,x16,x11 + mul x11,x7,x3 + adcs x16,x17,x24 + adcs x17,x19,x24 + adc x19,x20,xzr + + adds x14,x14,x8 // accumulate low parts + umulh x8,x4,x3 + adcs x15,x15,x9 + umulh x9,x5,x3 + adcs x16,x16,x10 + umulh x10,x6,x3 + adcs x17,x17,x11 + umulh x11,x7,x3 + adc x19,x19,xzr + mul x24,x14,x23 + adds x15,x15,x8 // accumulate high parts + adcs x16,x16,x9 + adcs x17,x17,x10 + adcs x19,x19,x11 + adc x20,xzr,xzr + ldr x3,[x2,#8*2] // b[i] + + lsl x8,x24,#32 + subs x16,x16,x24 + lsr x9,x24,#32 + sbcs x17,x17,x8 + sbcs x19,x19,x9 + sbc x20,x20,xzr + + subs xzr,x14,#1 + umulh x9,x12,x24 + mul x10,x13,x24 + umulh x11,x13,x24 + + adcs x10,x10,x9 + mul x8,x4,x3 + adc x11,x11,xzr + mul x9,x5,x3 + + adds x14,x15,x10 + mul x10,x6,x3 + adcs x15,x16,x11 + mul x11,x7,x3 + adcs x16,x17,x24 + adcs x17,x19,x24 + adc x19,x20,xzr + + adds x14,x14,x8 // accumulate low parts + umulh x8,x4,x3 + adcs x15,x15,x9 + umulh x9,x5,x3 + adcs x16,x16,x10 + umulh x10,x6,x3 + adcs x17,x17,x11 + umulh x11,x7,x3 + adc x19,x19,xzr + mul x24,x14,x23 + adds x15,x15,x8 // accumulate high parts + adcs x16,x16,x9 + adcs x17,x17,x10 + adcs x19,x19,x11 + adc x20,xzr,xzr + ldr x3,[x2,#8*3] // b[i] + + lsl x8,x24,#32 + subs x16,x16,x24 + lsr x9,x24,#32 + sbcs x17,x17,x8 + sbcs x19,x19,x9 + sbc x20,x20,xzr + + subs xzr,x14,#1 + umulh x9,x12,x24 + mul x10,x13,x24 + umulh x11,x13,x24 + + adcs x10,x10,x9 + mul x8,x4,x3 + adc x11,x11,xzr + mul x9,x5,x3 + + adds x14,x15,x10 + mul x10,x6,x3 + adcs x15,x16,x11 + mul x11,x7,x3 + adcs x16,x17,x24 + adcs x17,x19,x24 + adc x19,x20,xzr + + adds x14,x14,x8 // accumulate low parts + umulh x8,x4,x3 + adcs x15,x15,x9 + umulh x9,x5,x3 + adcs x16,x16,x10 + umulh x10,x6,x3 + adcs x17,x17,x11 + umulh x11,x7,x3 + adc x19,x19,xzr + mul x24,x14,x23 + adds x15,x15,x8 // accumulate high parts + adcs x16,x16,x9 + adcs x17,x17,x10 + adcs x19,x19,x11 + adc x20,xzr,xzr + lsl x8,x24,#32 // last reduction + subs x16,x16,x24 + lsr x9,x24,#32 + sbcs x17,x17,x8 + sbcs x19,x19,x9 + sbc x20,x20,xzr + + subs xzr,x14,#1 + umulh x9,x12,x24 + mul x10,x13,x24 + umulh x11,x13,x24 + + adcs x10,x10,x9 + adc x11,x11,xzr + + adds x14,x15,x10 + adcs x15,x16,x11 + adcs x16,x17,x24 + adcs x17,x19,x24 + adc x19,x20,xzr + + subs x8,x14,x12 // ret -= modulus + sbcs x9,x15,x13 + sbcs x10,x16,x21 + sbcs x11,x17,x22 + sbcs xzr,x19,xzr + + csel x14,x14,x8,lo // ret = borrow ? ret : ret-modulus + csel x15,x15,x9,lo + csel x16,x16,x10,lo + stp x14,x15,[x0] + csel x17,x17,x11,lo + stp x16,x17,[x0,#16] + + ldp x19,x20,[sp,#16] + ldp x21,x22,[sp,#32] + ldp x23,x24,[sp,#48] + ldr x29,[sp],#64 + ret +.size ecp_nistz256_ord_mul_mont,.-ecp_nistz256_ord_mul_mont + +//////////////////////////////////////////////////////////////////////// +// void ecp_nistz256_ord_sqr_mont(uint64_t res[4], uint64_t a[4], +// uint64_t rep); +.globl ecp_nistz256_ord_sqr_mont +.type ecp_nistz256_ord_sqr_mont,%function +.align 4 +ecp_nistz256_ord_sqr_mont: + AARCH64_VALID_CALL_TARGET + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + stp x29,x30,[sp,#-64]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + + adrp x23,.Lord + add x23,x23,#:lo12:.Lord + ldp x4,x5,[x1] + ldp x6,x7,[x1,#16] + + ldp x12,x13,[x23,#0] + ldp x21,x22,[x23,#16] + ldr x23,[x23,#32] + b .Loop_ord_sqr + +.align 4 +.Loop_ord_sqr: + sub x2,x2,#1 + //////////////////////////////////////////////////////////////// + // | | | | | |a1*a0| | + // | | | | |a2*a0| | | + // | |a3*a2|a3*a0| | | | + // | | | |a2*a1| | | | + // | | |a3*a1| | | | | + // *| | | | | | | | 2| + // +|a3*a3|a2*a2|a1*a1|a0*a0| + // |--+--+--+--+--+--+--+--| + // |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is , i.e. follow + // + // "can't overflow" below mark carrying into high part of + // multiplication result, which can't overflow, because it + // can never be all ones. + + mul x15,x5,x4 // a[1]*a[0] + umulh x9,x5,x4 + mul x16,x6,x4 // a[2]*a[0] + umulh x10,x6,x4 + mul x17,x7,x4 // a[3]*a[0] + umulh x19,x7,x4 + + adds x16,x16,x9 // accumulate high parts of multiplication + mul x8,x6,x5 // a[2]*a[1] + umulh x9,x6,x5 + adcs x17,x17,x10 + mul x10,x7,x5 // a[3]*a[1] + umulh x11,x7,x5 + adc x19,x19,xzr // can't overflow + + mul x20,x7,x6 // a[3]*a[2] + umulh x1,x7,x6 + + adds x9,x9,x10 // accumulate high parts of multiplication + mul x14,x4,x4 // a[0]*a[0] + adc x10,x11,xzr // can't overflow + + adds x17,x17,x8 // accumulate low parts of multiplication + umulh x4,x4,x4 + adcs x19,x19,x9 + mul x9,x5,x5 // a[1]*a[1] + adcs x20,x20,x10 + umulh x5,x5,x5 + adc x1,x1,xzr // can't overflow + + adds x15,x15,x15 // acc[1-6]*=2 + mul x10,x6,x6 // a[2]*a[2] + adcs x16,x16,x16 + umulh x6,x6,x6 + adcs x17,x17,x17 + mul x11,x7,x7 // a[3]*a[3] + adcs x19,x19,x19 + umulh x7,x7,x7 + adcs x20,x20,x20 + adcs x1,x1,x1 + adc x3,xzr,xzr + + adds x15,x15,x4 // +a[i]*a[i] + mul x24,x14,x23 + adcs x16,x16,x9 + adcs x17,x17,x5 + adcs x19,x19,x10 + adcs x20,x20,x6 + adcs x1,x1,x11 + adc x3,x3,x7 + subs xzr,x14,#1 + umulh x9,x12,x24 + mul x10,x13,x24 + umulh x11,x13,x24 + + adcs x10,x10,x9 + adc x11,x11,xzr + + adds x14,x15,x10 + adcs x15,x16,x11 + adcs x16,x17,x24 + adc x17,xzr,x24 // can't overflow + mul x11,x14,x23 + lsl x8,x24,#32 + subs x15,x15,x24 + lsr x9,x24,#32 + sbcs x16,x16,x8 + sbc x17,x17,x9 // can't borrow + subs xzr,x14,#1 + umulh x9,x12,x11 + mul x10,x13,x11 + umulh x24,x13,x11 + + adcs x10,x10,x9 + adc x24,x24,xzr + + adds x14,x15,x10 + adcs x15,x16,x24 + adcs x16,x17,x11 + adc x17,xzr,x11 // can't overflow + mul x24,x14,x23 + lsl x8,x11,#32 + subs x15,x15,x11 + lsr x9,x11,#32 + sbcs x16,x16,x8 + sbc x17,x17,x9 // can't borrow + subs xzr,x14,#1 + umulh x9,x12,x24 + mul x10,x13,x24 + umulh x11,x13,x24 + + adcs x10,x10,x9 + adc x11,x11,xzr + + adds x14,x15,x10 + adcs x15,x16,x11 + adcs x16,x17,x24 + adc x17,xzr,x24 // can't overflow + mul x11,x14,x23 + lsl x8,x24,#32 + subs x15,x15,x24 + lsr x9,x24,#32 + sbcs x16,x16,x8 + sbc x17,x17,x9 // can't borrow + subs xzr,x14,#1 + umulh x9,x12,x11 + mul x10,x13,x11 + umulh x24,x13,x11 + + adcs x10,x10,x9 + adc x24,x24,xzr + + adds x14,x15,x10 + adcs x15,x16,x24 + adcs x16,x17,x11 + adc x17,xzr,x11 // can't overflow + lsl x8,x11,#32 + subs x15,x15,x11 + lsr x9,x11,#32 + sbcs x16,x16,x8 + sbc x17,x17,x9 // can't borrow + adds x14,x14,x19 // accumulate upper half + adcs x15,x15,x20 + adcs x16,x16,x1 + adcs x17,x17,x3 + adc x19,xzr,xzr + + subs x8,x14,x12 // ret -= modulus + sbcs x9,x15,x13 + sbcs x10,x16,x21 + sbcs x11,x17,x22 + sbcs xzr,x19,xzr + + csel x4,x14,x8,lo // ret = borrow ? ret : ret-modulus + csel x5,x15,x9,lo + csel x6,x16,x10,lo + csel x7,x17,x11,lo + + cbnz x2,.Loop_ord_sqr + + stp x4,x5,[x0] + stp x6,x7,[x0,#16] + + ldp x19,x20,[sp,#16] + ldp x21,x22,[sp,#32] + ldp x23,x24,[sp,#48] + ldr x29,[sp],#64 + ret +.size ecp_nistz256_ord_sqr_mont,.-ecp_nistz256_ord_sqr_mont +// void ecp_nistz256_scatter_w5(void *x0,const P256_POINT *x1, +// int x2); +.globl ecp_nistz256_scatter_w5 +.type ecp_nistz256_scatter_w5,%function +.align 4 +ecp_nistz256_scatter_w5: + AARCH64_VALID_CALL_TARGET + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + add x0,x0,x2,lsl#2 + + ldp x4,x5,[x1] // X + ldp x6,x7,[x1,#16] + stur w4,[x0,#64*0-4] + lsr x4,x4,#32 + str w5,[x0,#64*1-4] + lsr x5,x5,#32 + str w6,[x0,#64*2-4] + lsr x6,x6,#32 + str w7,[x0,#64*3-4] + lsr x7,x7,#32 + str w4,[x0,#64*4-4] + str w5,[x0,#64*5-4] + str w6,[x0,#64*6-4] + str w7,[x0,#64*7-4] + add x0,x0,#64*8 + + ldp x4,x5,[x1,#32] // Y + ldp x6,x7,[x1,#48] + stur w4,[x0,#64*0-4] + lsr x4,x4,#32 + str w5,[x0,#64*1-4] + lsr x5,x5,#32 + str w6,[x0,#64*2-4] + lsr x6,x6,#32 + str w7,[x0,#64*3-4] + lsr x7,x7,#32 + str w4,[x0,#64*4-4] + str w5,[x0,#64*5-4] + str w6,[x0,#64*6-4] + str w7,[x0,#64*7-4] + add x0,x0,#64*8 + + ldp x4,x5,[x1,#64] // Z + ldp x6,x7,[x1,#80] + stur w4,[x0,#64*0-4] + lsr x4,x4,#32 + str w5,[x0,#64*1-4] + lsr x5,x5,#32 + str w6,[x0,#64*2-4] + lsr x6,x6,#32 + str w7,[x0,#64*3-4] + lsr x7,x7,#32 + str w4,[x0,#64*4-4] + str w5,[x0,#64*5-4] + str w6,[x0,#64*6-4] + str w7,[x0,#64*7-4] + + ldr x29,[sp],#16 + ret +.size ecp_nistz256_scatter_w5,.-ecp_nistz256_scatter_w5 + +// void ecp_nistz256_gather_w5(P256_POINT *x0,const void *x1, +// int x2); +.globl ecp_nistz256_gather_w5 +.type ecp_nistz256_gather_w5,%function +.align 4 +ecp_nistz256_gather_w5: + AARCH64_VALID_CALL_TARGET + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + cmp x2,xzr + csetm x3,ne + add x2,x2,x3 + add x1,x1,x2,lsl#2 + + ldr w4,[x1,#64*0] + ldr w5,[x1,#64*1] + ldr w6,[x1,#64*2] + ldr w7,[x1,#64*3] + ldr w8,[x1,#64*4] + ldr w9,[x1,#64*5] + ldr w10,[x1,#64*6] + ldr w11,[x1,#64*7] + add x1,x1,#64*8 + orr x4,x4,x8,lsl#32 + orr x5,x5,x9,lsl#32 + orr x6,x6,x10,lsl#32 + orr x7,x7,x11,lsl#32 + csel x4,x4,xzr,ne + csel x5,x5,xzr,ne + csel x6,x6,xzr,ne + csel x7,x7,xzr,ne + stp x4,x5,[x0] // X + stp x6,x7,[x0,#16] + + ldr w4,[x1,#64*0] + ldr w5,[x1,#64*1] + ldr w6,[x1,#64*2] + ldr w7,[x1,#64*3] + ldr w8,[x1,#64*4] + ldr w9,[x1,#64*5] + ldr w10,[x1,#64*6] + ldr w11,[x1,#64*7] + add x1,x1,#64*8 + orr x4,x4,x8,lsl#32 + orr x5,x5,x9,lsl#32 + orr x6,x6,x10,lsl#32 + orr x7,x7,x11,lsl#32 + csel x4,x4,xzr,ne + csel x5,x5,xzr,ne + csel x6,x6,xzr,ne + csel x7,x7,xzr,ne + stp x4,x5,[x0,#32] // Y + stp x6,x7,[x0,#48] + + ldr w4,[x1,#64*0] + ldr w5,[x1,#64*1] + ldr w6,[x1,#64*2] + ldr w7,[x1,#64*3] + ldr w8,[x1,#64*4] + ldr w9,[x1,#64*5] + ldr w10,[x1,#64*6] + ldr w11,[x1,#64*7] + orr x4,x4,x8,lsl#32 + orr x5,x5,x9,lsl#32 + orr x6,x6,x10,lsl#32 + orr x7,x7,x11,lsl#32 + csel x4,x4,xzr,ne + csel x5,x5,xzr,ne + csel x6,x6,xzr,ne + csel x7,x7,xzr,ne + stp x4,x5,[x0,#64] // Z + stp x6,x7,[x0,#80] + + ldr x29,[sp],#16 + ret +.size ecp_nistz256_gather_w5,.-ecp_nistz256_gather_w5 + +// void ecp_nistz256_scatter_w7(void *x0,const P256_POINT_AFFINE *x1, +// int x2); +.globl ecp_nistz256_scatter_w7 +.type ecp_nistz256_scatter_w7,%function +.align 4 +ecp_nistz256_scatter_w7: + AARCH64_VALID_CALL_TARGET + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + add x0,x0,x2 + mov x2,#64/8 +.Loop_scatter_w7: + ldr x3,[x1],#8 + subs x2,x2,#1 + prfm pstl1strm,[x0,#4096+64*0] + prfm pstl1strm,[x0,#4096+64*1] + prfm pstl1strm,[x0,#4096+64*2] + prfm pstl1strm,[x0,#4096+64*3] + prfm pstl1strm,[x0,#4096+64*4] + prfm pstl1strm,[x0,#4096+64*5] + prfm pstl1strm,[x0,#4096+64*6] + prfm pstl1strm,[x0,#4096+64*7] + strb w3,[x0,#64*0] + lsr x3,x3,#8 + strb w3,[x0,#64*1] + lsr x3,x3,#8 + strb w3,[x0,#64*2] + lsr x3,x3,#8 + strb w3,[x0,#64*3] + lsr x3,x3,#8 + strb w3,[x0,#64*4] + lsr x3,x3,#8 + strb w3,[x0,#64*5] + lsr x3,x3,#8 + strb w3,[x0,#64*6] + lsr x3,x3,#8 + strb w3,[x0,#64*7] + add x0,x0,#64*8 + b.ne .Loop_scatter_w7 + + ldr x29,[sp],#16 + ret +.size ecp_nistz256_scatter_w7,.-ecp_nistz256_scatter_w7 + +// void ecp_nistz256_gather_w7(P256_POINT_AFFINE *x0,const void *x1, +// int x2); +.globl ecp_nistz256_gather_w7 +.type ecp_nistz256_gather_w7,%function +.align 4 +ecp_nistz256_gather_w7: + AARCH64_VALID_CALL_TARGET + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + cmp x2,xzr + csetm x3,ne + add x2,x2,x3 + add x1,x1,x2 + mov x2,#64/8 + nop +.Loop_gather_w7: + ldrb w4,[x1,#64*0] + prfm pldl1strm,[x1,#4096+64*0] + subs x2,x2,#1 + ldrb w5,[x1,#64*1] + prfm pldl1strm,[x1,#4096+64*1] + ldrb w6,[x1,#64*2] + prfm pldl1strm,[x1,#4096+64*2] + ldrb w7,[x1,#64*3] + prfm pldl1strm,[x1,#4096+64*3] + ldrb w8,[x1,#64*4] + prfm pldl1strm,[x1,#4096+64*4] + ldrb w9,[x1,#64*5] + prfm pldl1strm,[x1,#4096+64*5] + ldrb w10,[x1,#64*6] + prfm pldl1strm,[x1,#4096+64*6] + ldrb w11,[x1,#64*7] + prfm pldl1strm,[x1,#4096+64*7] + add x1,x1,#64*8 + orr x4,x4,x5,lsl#8 + orr x6,x6,x7,lsl#8 + orr x8,x8,x9,lsl#8 + orr x4,x4,x6,lsl#16 + orr x10,x10,x11,lsl#8 + orr x4,x4,x8,lsl#32 + orr x4,x4,x10,lsl#48 + and x4,x4,x3 + str x4,[x0],#8 + b.ne .Loop_gather_w7 + + ldr x29,[sp],#16 + ret +.size ecp_nistz256_gather_w7,.-ecp_nistz256_gather_w7 diff --git a/contrib/openssl-cmake/asm/crypto/ec/ecp_nistz256-x86_64.s b/contrib/openssl-cmake/asm/crypto/ec/ecp_nistz256-x86_64.s new file mode 100644 index 000000000000..66cdedf67a44 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/ec/ecp_nistz256-x86_64.s @@ -0,0 +1,7345 @@ +.section .rodata +.globl ecp_nistz256_precomputed +.type ecp_nistz256_precomputed,@object +.align 4096 +ecp_nistz256_precomputed: +.long 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b,0xa53755c6,0x18905f76,0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688,0x25885d85,0x8571ff18 +.long 0x10ddd64d,0x850046d4,0xa433827d,0xaa6ae3c1,0x8d1490d9,0x73220503,0x3dcf3a3b,0xf6bb32e4,0x61bee1a5,0x2f3648d3,0xeb236ff8,0x152cd7cb,0x92042dbe,0x19a8fb0e,0x0a5b8a3b,0x78c57751 +.long 0x4eebc127,0xffac3f90,0x087d81fb,0xb027f84a,0x87cbbc98,0x66ad77dd,0xb6ff747e,0x26936a3f,0xc983a7eb,0xb04c5c1f,0x0861fe1a,0x583e47ad,0x1a2ee98e,0x78820831,0xe587cc07,0xd5f06a29 +.long 0x46918dcc,0x74b0b50d,0xc623c173,0x4650a6ed,0xe8100af2,0x0cdaacac,0x41b0176b,0x577362f5,0xe4cbaba6,0x2d96f24c,0xfad6f447,0x17628471,0xe5ddd22e,0x6b6c36de,0x4c5ab863,0x84b14c39 +.long 0xc45c61f5,0xbe1b8aae,0x94b9537d,0x90ec649a,0xd076c20c,0x941cb5aa,0x890523c8,0xc9079605,0xe7ba4f10,0xeb309b4a,0xe5eb882b,0x73c568ef,0x7e7a1f68,0x3540a987,0x2dd1e916,0x73a076bb +.long 0x3e77664a,0x40394737,0x346cee3e,0x55ae744f,0x5b17a3ad,0xd50a961a,0x54213673,0x13074b59,0xd377e44b,0x93d36220,0xadff14b5,0x299c2b53,0xef639f11,0xf424d44c,0x4a07f75f,0xa4c9916d +.long 0xa0173b4f,0x0746354e,0xd23c00f7,0x2bd20213,0x0c23bb08,0xf43eaab5,0xc3123e03,0x13ba5119,0x3f5b9d4d,0x2847d030,0x5da67bdd,0x6742f2f2,0x77c94195,0xef933bdc,0x6e240867,0xeaedd915 +.long 0x9499a78f,0x27f14cd1,0x6f9b3455,0x462ab5c5,0xf02cfc6b,0x8f90f02a,0xb265230d,0xb763891e,0x532d4977,0xf59da3a9,0xcf9eba15,0x21e3327d,0xbe60bbf0,0x123c7b84,0x7706df76,0x56ec12f2 +.long 0x264e20e8,0x75c96e8f,0x59a7a841,0xabe6bfed,0x44c8eb00,0x2cc09c04,0xf0c4e16b,0xe05b3080,0xa45f3314,0x1eb7777a,0xce5d45e3,0x56af7bed,0x88b12f1a,0x2b6e019a,0xfd835f9b,0x086659cd +.long 0x9dc21ec8,0x2c18dbd1,0x0fcf8139,0x98f9868a,0x48250b49,0x737d2cd6,0x24b3428f,0xcc61c947,0x80dd9e76,0x0c2b4078,0x383fbe08,0xc43a8991,0x779be5d2,0x5f7d2d65,0xeb3b4ab5,0x78719a54 +.long 0x6245e404,0xea7d260a,0x6e7fdfe0,0x9de40795,0x8dac1ab5,0x1ff3a415,0x649c9073,0x3e7090f1,0x2b944e88,0x1a768561,0xe57f61c8,0x250f939e,0x1ead643d,0x0c0daa89,0xe125b88e,0x68930023 +.long 0xd2697768,0x04b71aa7,0xca345a33,0xabdedef5,0xee37385e,0x2409d29d,0xcb83e156,0x4ee1df77,0x1cbb5b43,0x0cac12d9,0xca895637,0x170ed2f6,0x8ade6d66,0x28228cfa,0x53238aca,0x7ff57c95 +.long 0x4b2ed709,0xccc42563,0x856fd30d,0x0e356769,0x559e9811,0xbcbcd43f,0x5395b759,0x738477ac,0xc00ee17f,0x35752b90,0x742ed2e3,0x68748390,0xbd1f5bc1,0x7cd06422,0xc9e7b797,0xfbc08769 +.long 0xb0cf664a,0xa242a35b,0x7f9707e3,0x126e48f7,0xc6832660,0x1717bf54,0xfd12c72e,0xfaae7332,0x995d586b,0x27b52db7,0x832237c2,0xbe29569e,0x2a65e7db,0xe8e4193e,0x2eaa1bbb,0x152706dc +.long 0xbc60055b,0x72bcd8b7,0x56e27e4b,0x03cc23ee,0xe4819370,0xee337424,0x0ad3da09,0xe2aa0e43,0x6383c45d,0x40b8524f,0x42a41b25,0xd7663554,0x778a4797,0x64efa6de,0x7079adf4,0x2042170a +.long 0x0bc6fb80,0x808b0b65,0x3ffe2e6b,0x5882e075,0x2c83f549,0xd5ef2f7c,0x9103b723,0x54d63c80,0x52a23f9b,0xf2f11bd6,0x4b0b6587,0x3670c319,0xb1580e9e,0x55c4623b,0x01efe220,0x64edf7b2 +.long 0xd53c5c9d,0x97091dcb,0xac0a177b,0xf17624b6,0x2cfe2dff,0xb0f13975,0x6c7a574e,0xc1a35c0a,0x93e79987,0x227d3146,0xe89cb80e,0x0575bf30,0x0d1883bb,0x2f4e247f,0x3274c3d0,0xebd51226 +.long 0x56ada97a,0x5f3e51c8,0x8f8b403e,0x4afc964d,0x412e2979,0xa6f247ab,0x6f80ebda,0x675abd1b,0x5e485a1d,0x66a2bd72,0x8f4f0b3c,0x4b2a5caf,0x1b847bba,0x2626927f,0x0502394d,0x6c6fc7d9 +.long 0xa5659ae8,0xfea912ba,0x25e1a16e,0x68363aba,0x752c41ac,0xb8842277,0x2897c3fc,0xfe545c28,0xdc4c696b,0x2d36e9e7,0xfba977c5,0x5806244a,0xe39508c1,0x85665e9b,0x6d12597b,0xf720ee25 +.long 0xd2337a31,0x8a979129,0x0f862bdc,0x5916868f,0x5dd283ba,0x048099d9,0xfe5bfb4e,0xe2d1eeb6,0x7884005d,0x82ef1c41,0xffffcbae,0xa2d4ec17,0x8aa95e66,0x9161c53f,0xc5fee0d0,0x5ee104e1 +.long 0xc135b208,0x562e4cec,0x4783f47d,0x74e1b265,0x5a3f3b30,0x6d2a506c,0xc16762fc,0xecead9f4,0xe286e5b9,0xf29dd4b2,0x83bb3c61,0x1b0fadc0,0x7fac29a4,0x7a75023e,0xc9477fa3,0xc086d5f1 +.long 0x2f6f3076,0x0fc61135,0xe3912a9a,0xc99ffa23,0xd2f8ba3d,0x6a0b0685,0xe93358a4,0xfdc777e8,0x35415f04,0x94a787bb,0x4d23fea4,0x640c2d6a,0x153a35b5,0x9de917da,0x5d5cd074,0x793e8d07 +.long 0x2de45068,0xf4f87653,0x9e2e1f6e,0x37c7a7e8,0xa3584069,0xd0825fa2,0x1727bf42,0xaf2cea7c,0x9e4785a9,0x0360a4fb,0x27299f4a,0xe5fda49c,0x71ac2f71,0x48068e13,0x9077666f,0x83d0687b +.long 0x15d02819,0x6d3883b2,0x40dd9a35,0x6d0d7550,0x1d2b469f,0x61d7cbf9,0x2efc3115,0xf97b232f,0xb24bcbc7,0xa551d750,0x88a1e356,0x11ea4949,0x93cb7501,0x7669f031,0xca737b8a,0x595dc55e +.long 0xd837879f,0xa4a319ac,0xed6b67b0,0x6fc1b49e,0x32f1f3af,0xe3959933,0x65432a2e,0x966742eb,0xb4966228,0x4b8dc9fe,0x43f43950,0x96cc6312,0xc9b731ee,0x12068859,0x56f79968,0x7b948dc3 +.long 0xed1f8008,0x61e4ad32,0xd8b17538,0xe6c9267a,0x857ff6fb,0x1ac7c5eb,0x55f2fb10,0x994baaa8,0x1d248018,0x84cf14e1,0x628ac508,0x5a39898b,0x5fa944f5,0x14fde97b,0xd12e5ac7,0xed178030 +.long 0x97e2feb4,0x042c2af4,0xaebf7313,0xd36a42d7,0x084ffdd7,0x49d2c9eb,0x2ef7c76a,0x9f8aa54b,0x09895e70,0x9200b7ba,0xddb7fb58,0x3bd0c66f,0x78eb4cbb,0x2d97d108,0xd84bde31,0x2d431068 +.long 0x172ccd1f,0x4b523eb7,0x30a6a892,0x7323cb28,0xcfe153eb,0x97082ec0,0xf2aadb97,0xe97f6b6a,0xd1a83da1,0x1d3d393e,0x804b2a68,0xa6a7f9c7,0x2d0cb71e,0x4a688b48,0x40585278,0xa9b4cc5f +.long 0xcb66e132,0x5e5db46a,0x0d925880,0xf1be963a,0x0317b9e2,0x944a7027,0x48603d48,0xe266f959,0x5c208899,0x98db6673,0xa2fb18a3,0x90472447,0x777c619f,0x8a966939,0x2a3be21b,0x3798142a +.long 0x3298b343,0xb4241cb1,0xb44f65a1,0xa3a14e49,0x3ac77acd,0xc5f4d6cd,0x52b6fc3c,0xd0288cb5,0x1c040abc,0xd5cc8c2f,0x06bf9b4a,0xb675511e,0x9b3aa441,0xd667da37,0x51601f72,0x460d45ce +.long 0x6755ff89,0xe2f73c69,0x473017e6,0xdd3cf7e7,0x3cf7600d,0x8ef5689d,0xb1fc87b4,0x948dc4f8,0x4ea53299,0xd9e9fe81,0x98eb6028,0x2d921ca2,0x0c9803fc,0xfaecedfd,0x4d7b4745,0xf38ae891 +.long 0xc5e3a3d8,0xd8c5fccf,0x4079dfbf,0xbefd904c,0xfead0197,0xbc6d6a58,0x695532a4,0x39227077,0xdbef42f5,0x09e23e6d,0x480a9908,0x7e449b64,0xad9a2e40,0x7b969c1a,0x9591c2a4,0x6231d792 +.long 0x0f664534,0x87151456,0x4b68f103,0x85ceae7c,0x65578ab9,0xac09c4ae,0xf044b10c,0x33ec6868,0x3a8ec1f1,0x6ac4832b,0x5847d5ef,0x5509d128,0x763f1574,0xf909604f,0xc32f63c4,0xb16c4303 +.long 0x7ca23cd3,0xb6ab2014,0xa391849d,0xcaa7a5c6,0x75678d94,0x5b0673a3,0xdd303e64,0xc982ddd4,0x5db6f971,0xfd7b000b,0x6f876f92,0xbba2cb1f,0x3c569426,0xc77332a3,0x570d74f8,0xa159100c +.long 0xdec67ef5,0xfd16847f,0x233e76b7,0x742ee464,0xefc2b4c8,0x0b8e4134,0x42a3e521,0xca640b86,0x8ceb6aa9,0x653a0190,0x547852d5,0x313c300c,0x6b237af7,0x24e4ab12,0x8bb47af8,0x2ba90162 +.long 0xa8219bb7,0x3d5e58d6,0x1b06c57f,0xc691d0bd,0xd257576e,0x0ae4cb10,0xd54a3dc3,0x3569656c,0x94cda03a,0xe5ebaebd,0x162bfe13,0x934e82d3,0xe251a0c6,0x450ac0ba,0xdd6da526,0x480b9e11 +.long 0x8cce08b5,0x00467bc5,0x7f178d55,0xb636458c,0xa677d806,0xc5748bae,0xdfa394eb,0x2763a387,0x7d3cebb6,0xa12b448a,0x6f20d850,0xe7adda3e,0x1558462c,0xf63ebce5,0x620088a8,0x58b36143 +.long 0x4d63c0ee,0x8a2cc3ca,0x0fe948ce,0x51233117,0x222ef33b,0x7463fd85,0x7c603d6c,0xadf0c7dc,0xfe7765e5,0x0ec32d3b,0xbf380409,0xccaab359,0x8e59319c,0xbdaa84d6,0x9c80c34d,0xd9a4c280 +.long 0xa059c142,0xa9d89488,0xff0b9346,0x6f5ae714,0x16fb3664,0x068f237d,0x363186ac,0x5853e4c4,0x63c52f98,0xe2d87d23,0x81828876,0x2ec4a766,0xe14e7b1c,0x47b864fa,0x69192408,0x0c0bc0e5 +.long 0xb82e9f3e,0xe4d7681d,0xdf25e13c,0x83200f0b,0x66f27280,0x8909984c,0x75f73227,0x462d7b00,0xf2651798,0xd90ba188,0x36ab1c34,0x74c6e18c,0x5ef54359,0xab256ea3,0xd1aa702f,0x03466612 +.long 0x2ed22e91,0x624d6049,0x6f072822,0x6fdfe0b5,0x39ce2271,0xeeca1115,0xdb01614f,0x98100a4f,0xa35c628f,0xb6b0daa2,0xc87e9a47,0xb6f94d2e,0x1d57d9ce,0xc6773259,0x03884a7b,0xf70bfeec +.long 0xed2bad01,0x5fb35ccf,0x1da6a5c7,0xa155cbe3,0x30a92f8f,0xc2e2594c,0x5bfafe43,0x649c89ce,0xe9ff257a,0xd158667d,0xf32c50ae,0x9b359611,0x906014cf,0x4b00b20b,0x89bc7d3d,0xf3a8cfe3 +.long 0x248a7d06,0x4ff23ffd,0x878873fa,0x80c5bfb4,0x05745981,0xb7d9ad90,0x3db01994,0x179c85db,0x61a6966c,0xba41b062,0xeadce5a8,0x4d82d052,0xa5e6a318,0x9e91cd3b,0x95b2dda0,0x47795f4f +.long 0xd55a897c,0xecfd7c1f,0xb29110fb,0x009194ab,0xe381d3b0,0x5f0e2046,0xa98dd291,0x5f3425f6,0x730d50da,0xbfa06687,0x4b083b7f,0x0423446c,0xd69d3417,0x397a247d,0x387ba42a,0xeb629f90 +.long 0xd5cd79bf,0x1ee426cc,0x946c6e18,0x0032940b,0x57477f58,0x1b1e8ae0,0x6d823278,0xe94f7d34,0x782ba21a,0xc747cb96,0xf72b33a5,0xc5254469,0xc7f80c81,0x772ef6de,0x2cd9e6b5,0xd73acbfe +.long 0x49ee90d9,0x4075b5b1,0xa06e9eba,0x785c339a,0xabf825e0,0xa1030d5b,0xa42931dc,0xcec684c3,0xc1586e63,0x42ab62c9,0x5ab43f2b,0x45431d66,0x55f7835d,0x57c8b2c0,0xc1b7f865,0x033da338 +.long 0xcaa76097,0x283c7513,0x36c83906,0x0a624fa9,0x715af2c7,0x6b20afec,0xeba78bfd,0x4b969974,0xd921d60e,0x220755cc,0x7baeca13,0x9b944e10,0x5ded93d4,0x04819d51,0x6dddfd27,0x9bbff86e +.long 0x77adc612,0x6b344130,0xbbd803a0,0xa7496529,0x6d8805bd,0x1a1baaa7,0x470343ad,0xc8403902,0x175adff1,0x39f59f66,0xb7d8c5b7,0x0b26d7fb,0x529d75e3,0xa875f5ce,0x41325cc2,0x85efc7e9 +.long 0x1ff6acd3,0x21950b42,0x53dc6909,0xffe70484,0x28766127,0xff4cd0b2,0x4fb7db2b,0xabdbe608,0x5e1109e8,0x837c9228,0xf4645b5a,0x26147d27,0xf7818ed8,0x4d78f592,0xf247fa36,0xd394077e +.long 0x488c171a,0x0fb9c2d0,0x13685278,0xa78bfbaa,0xd5b1fa6a,0xedfbe268,0x2b7eaba7,0x0dceb8db,0x9ae2b710,0xbf9e8089,0xa4449c96,0xefde7ae6,0xcc143a46,0x43b7716b,0xc3628c13,0xd7d34194 +.long 0x3b3f64c9,0x508cec1c,0x1e5edf3f,0xe20bc0ba,0x2f4318d4,0xda1deb85,0x5c3fa443,0xd20ebe0d,0x73241ea3,0x370b4ea7,0x5e1a5f65,0x61f1511c,0x82681c62,0x99a5e23d,0xa2f54c2d,0xd731e383 +.long 0x83445904,0x2692f36e,0xaf45f9c0,0x2e0ec469,0xc67528b7,0x905a3201,0xd0e5e542,0x88f77f34,0x5864687c,0xf67a8d29,0x22df3562,0x23b92eae,0x9bbec39e,0x5c27014b,0x9c0f0f8d,0x7ef2f226 +.long 0x546c4d8d,0x97359638,0x92f24679,0x5f9c3fc4,0xa8c8acd9,0x912e8bed,0x306634b0,0xec3a318d,0xc31cb264,0x80167f41,0x522113f2,0x3db82f6f,0xdcafe197,0xb155bcd2,0x43465283,0xfba1da59 +.long 0xb212cf53,0xa0425b8e,0xf8557c5f,0x4f2e512e,0x25c4d56c,0xc1286ff9,0xee26c851,0xbb8a0fea,0xe7d6107e,0xc28f70d2,0xe76265aa,0x7ee0c444,0x1d1936b1,0x3df277a4,0xea9595eb,0x1a556e3f +.long 0xe7305683,0x258bbbf9,0x07ef5be6,0x31eea5bf,0x46c814c1,0x0deb0e4a,0xa7b730dd,0x5cee8449,0xa0182bde,0xeab495c5,0x9e27a6b4,0xee759f87,0x80e518ca,0xc2cf6a68,0xf14cf3f4,0x25e8013f +.long 0x7e8d7a14,0x8fc44140,0x9556f36a,0xbb1ff3ca,0x14600044,0x6a844385,0x7451ae63,0xba3f0c4a,0x1f9af32a,0xdfcac25b,0xb1f2214b,0x01e0db86,0xa4b596ac,0x4e9a5bc2,0x026c2c08,0x83927681 +.long 0x7acaca28,0x3ec832e7,0xc7385b29,0x1bfeea57,0xfd1eaf38,0x068212e3,0x6acf8ccc,0xc1329830,0x2aac9e59,0xb909f2db,0xb661782a,0x5748060d,0xc79b7a01,0xc5ab2632,0x00017626,0xda44c6c6 +.long 0xa7ea82f0,0xf26c00e8,0xe4299aaf,0x99cac80d,0x7ed78be1,0xd66fe3b6,0x648d02cd,0x305f725f,0x623fb21b,0x33ed1bc4,0x7a6319ad,0xfa70533e,0xbe5ffb3e,0x17ab562d,0x56674741,0x06374994 +.long 0x5c46aa8e,0x69d44ed6,0xa8d063d1,0x2100d5d3,0xa2d17c36,0xcb9727ea,0x8add53b7,0x4c2bab1b,0x15426704,0xa084e90c,0xa837ebea,0x778afcd3,0x7ce477f8,0x6651f701,0x46fb7a8b,0xa0624998 +.long 0xed8a6e19,0xdc1e6828,0x4189d9c7,0x33fc2336,0x671c39bc,0x026f8fe2,0xbc6f9915,0xd40c4ccd,0xf80e75ca,0xafa135bb,0x22adff2c,0x12c651a0,0x4f51ad96,0xc40a04bd,0xbbe4e832,0x04820109 +.long 0x7f4c04cc,0x3667eb1a,0xa9404f84,0x59556621,0x7eceb50a,0x71cdf653,0x9b8335fa,0x994a44a6,0xdbeb9b69,0xd7faf819,0xeed4350d,0x473c5680,0xda44bba2,0xb6658466,0x872bdbf3,0x0d1bc780 +.long 0xa1962f91,0xe535f175,0xed58f5a7,0x6ed7e061,0x2089a233,0x177aa4c0,0xe539b413,0x0dbcb03a,0xbb32e38e,0xe3dc424e,0x6806701e,0x6472e5ef,0x814be9ee,0xdd47ff98,0x35ace009,0x6b60cfff +.long 0x9ff91fe5,0xb8d3d931,0xf0518eed,0x039c4800,0x9182cb26,0x95c37632,0x82fc568d,0x0763a434,0x383e76ba,0x707c04d5,0x824e8197,0xac98b930,0x91230de0,0x92bf7c8f,0x40959b70,0x90876a01 +.long 0x05968b80,0xdb6d96f3,0x089f73b9,0x380a0913,0xc2c61e01,0x7da70b83,0x569b38c7,0x95fb8394,0x80edfe2f,0x9a3c6512,0x8faeaf82,0x8f726bb9,0x78424bf8,0x8010a4a0,0x0e844970,0x29672044 +.long 0x7a2ad62a,0x63c5cb81,0xac62ff54,0x7ef2b6b9,0xb3ad9db5,0x3749bba4,0x46d5a617,0xad311f2c,0xc2ff3b6d,0xb77a8087,0x367834ff,0xb46feaf3,0x75d6b138,0xf8aa266d,0xec008188,0xfa38d320 +.long 0x696946fc,0x486d8ffa,0xb9cba56d,0x50fbc6d8,0x90f35a15,0x7e3d423e,0xc0dd962c,0x7c3da195,0x3cfd5d8b,0xe673fdb0,0x889dfca5,0x0704b7c2,0xf52305aa,0xf6ce581f,0x914d5e53,0x399d49eb +.long 0x6ec293cd,0x380a496d,0x8e7051f5,0x733dbda7,0xb849140a,0x037e388d,0x5946dbf6,0xee4b32b0,0xcae368d1,0xb1c4fda9,0xfdb0b2f3,0x5001a7b0,0x2e3ac46e,0x6df59374,0x39b3e656,0x4af675f2 +.long 0x39949296,0x44e38110,0x361db1b5,0x5b63827b,0x206eaff5,0x3e5323ed,0xc21f4290,0x942370d2,0xe0d985a1,0xf2caaf2e,0x7239846d,0x192cc64b,0xae6312f8,0x7c0b8f47,0x96620108,0x7dc61f91 +.long 0xc2da7de9,0xb830fb5b,0x0ff8d3be,0xd0e643df,0x188a9641,0x31ee77ba,0xbcf6d502,0x4e8aa3aa,0x9a49110f,0xf9fb6532,0x2dd6b220,0xd18317f6,0x52c3ea5a,0x7e3ced41,0x7d579c4a,0x0d296a14 +.long 0xed4c3717,0x35d6a53e,0x3d0ed2a3,0x9f8240cf,0xe5543aa5,0x8c0d4d05,0xdd33b4b4,0x45d5bbfb,0x137fd28e,0xfa04cc73,0xc73b3ffd,0x862ac6ef,0x31f51ef2,0x403ff9f5,0xbc73f5a2,0x34d5e0fc +.long 0x08913f4f,0xf2526820,0xeac93d95,0xea20ed61,0x6ca6b26c,0x51ed38b4,0xea4327b0,0x8662dcbc,0x725d2aaa,0x6daf295c,0x8e52dcda,0xbad2752f,0x0b17dacc,0x2210e721,0xd51e8232,0xa37f7912 +.long 0x44cc3add,0x4f7081e1,0x87be82cf,0xd5ffa1d6,0x0edd6472,0x89890b6c,0x3ed17863,0xada26e1a,0x63483caa,0x276f2715,0x2f6077fd,0xe6924cd9,0x0a466e3c,0x05a7fe98,0xb1902d1f,0xf1c794b0 +.long 0x82a8042c,0xe5213688,0xcd278298,0xd931cfaf,0xf597a740,0x069a0ae0,0xeb59107c,0x0adbb3f3,0x5eaa8eb8,0x983e951e,0x11b48e78,0xe663a8b5,0x8a03f2c5,0x1631cc0d,0x11e271e2,0x7577c11e +.long 0x08369a90,0x33b2385c,0x190eb4f8,0x2990c59b,0xc68eac80,0x819a6145,0x2ec4a014,0x7a786d62,0x20ac3a8d,0x33faadbe,0x5aba2d30,0x31a21781,0xdba4f565,0x209d2742,0x55aa0fbb,0xdb2ce9e3 +.long 0x168984df,0x8cef334b,0x33879638,0xe81dce17,0x263720f0,0xf6e6949c,0xf593cbec,0x5c56feaf,0xfde58c84,0x8bff5601,0x2eccb314,0x74e24117,0x4c9a8a78,0xbcf01b61,0x544c9868,0xa233e35e +.long 0x8bd7aff1,0xb3156bf3,0x1d81b146,0x1b5ee4cb,0xd628a915,0x7ba1ac41,0xfd89699e,0x8f3a8f9c,0xa0748be7,0x7329b9c9,0xa92e621f,0x1d391c95,0x4d10a837,0xe51e6b21,0x4947b435,0xd255f53a +.long 0xf1788ee3,0x07669e04,0xa86938a2,0xc14f27af,0xe93a01c0,0x8b47a334,0xd9366808,0xff627438,0xca2a5965,0x7a0985d8,0xd6e9b9b3,0x3d9a5542,0x4cf972e8,0xc23eb80b,0x4fdf72fd,0x5c1c33bb +.long 0x74a86108,0x0c4a58d4,0xee4c5d90,0xf8048a8f,0xe86d4c80,0xe3c7c924,0x056a1e60,0x28c889de,0xb214a040,0x57e2662e,0x37e10347,0xe8c48e98,0x80ac748a,0x87742862,0x186b06f2,0xf1c24022 +.long 0x5f74040a,0xac2dd4c3,0xfceac957,0x409aeb71,0x55c4ec23,0x4fbad782,0x8a7b76ec,0xb359ed61,0xed6f4a60,0x12744926,0x4b912de3,0xe21e8d7f,0xfc705a59,0xe2575a59,0xed2dbc0e,0x72f1d4de +.long 0xeb7926b8,0x3d2b24b9,0xcdbe5509,0xbff88cb3,0xe4dd640b,0xd0f399af,0x2f76ed45,0x3c5fe130,0x3764fb3d,0x6f3562f4,0x3151b62d,0x7b5af318,0xd79ce5f3,0xd5bd0bc7,0xec66890f,0xfdaf6b20 +.long 0x6063540c,0x735c67ec,0xe5f9cb8f,0x50b259c2,0x3f99c6ab,0xb8734f9a,0xa3a7bc85,0xf8cc13d5,0xc5217659,0x80c1b305,0x4ec12a54,0xfe5364d4,0x681345fe,0xbd87045e,0x582f897f,0x7f8efeb1 +.long 0xd5923359,0xe8cbf1e5,0x539b9fb0,0xdb0cea9d,0x49859b98,0x0c5b34cf,0xa4403cc6,0x5e583c56,0xd48185b7,0x11fc1a2d,0x6e521787,0xc93fbc7e,0x05105b8b,0x47e7a058,0xdb8260c8,0x7b4d4d58 +.long 0x46eb842a,0xe33930b0,0x7bdae56d,0x8e844a9a,0x13f7fdfc,0x34ef3a9e,0x636ca176,0xb3768f82,0x4e09e61c,0x2821f4e0,0xa0c7cddc,0x414dc3a1,0x54945fcd,0xd5379437,0xb3555ff1,0x151b6eef +.long 0x6339c083,0xb31bd613,0xdfb64701,0x39ff8155,0xe29604ab,0x7c3388d2,0xa6b10442,0x1e19084b,0xeccd47ef,0x17cf54c0,0x4a5dfb30,0x89693385,0x47daf9f6,0x69d023fb,0x7d91d959,0x9222840b +.long 0x803bac62,0x439108f5,0x379bd45f,0x0b7dd91d,0xca63c581,0xd651e827,0x509c104f,0x5c5d75f6,0x1f2dc308,0x7d5fc738,0xd98454be,0x20faa7bf,0xa517b031,0x95374bee,0x642692ac,0xf036b9b1 +.long 0x39842194,0xc5106109,0x49d05295,0xb7e2353e,0xefb42ee0,0xfc8c1d5c,0x08ce811c,0xe04884eb,0x7419f40e,0xf1f75d81,0xa995c241,0x5b0ac162,0xc4c55646,0x120921bb,0x8d33cf97,0x713520c2 +.long 0xe98c5100,0xb4a65a5c,0x2ddd0f5a,0x6cec871d,0x9ba2e78b,0x251f0b7f,0xce3a2a5f,0x224a8434,0x25f5c46f,0x26827f61,0x48545ec0,0x6a22bedc,0xb1bb5cdc,0x25ae5fa0,0xfcb9b98f,0xd693682f +.long 0x91e5d7d3,0x32027fe8,0x73a07678,0xf14b7d17,0xc0dfdd61,0xf88497b3,0x2a8c4f48,0xf7c2eec0,0x3756e621,0xaa5573f4,0x1825b948,0xc013a240,0x63878572,0x1c03b345,0x653a4184,0xa0472bea +.long 0x0ac69a80,0xf4222e27,0xf51e54f6,0x34096d25,0x8fffa591,0x00a648cb,0x69b6527f,0x4e87acdc,0xe285ccb4,0x0575e037,0x50ddcf52,0x188089e4,0x870ff719,0xaa96c9a8,0x1fc7e369,0x74a56cd8 +.long 0x1726931a,0x41d04ee2,0x3660ecfd,0x0bbbb2c8,0x24818e18,0xa6ef6de5,0xe7d57887,0xe421cc51,0xbea87be6,0xf127d208,0xb1cdd682,0x16a475d3,0x439b63f7,0x9db1b684,0xf0f113b6,0x5359b3db +.long 0x8bf06e31,0xdfccf1de,0xdd383901,0x1fdf8f44,0x5017e7d2,0x10775cad,0x58d11eef,0xdfc3a597,0xb1ecff10,0x6ec9c8a0,0x28400549,0xee6ed6cc,0x1b4f8d73,0xb5ad7bae,0xe00aaab9,0x61b4f11d +.long 0xd4eff2d7,0x7b32d69b,0x4288b60f,0x88ae6771,0x37a1e723,0x159461b4,0x570aae8c,0x1f3d4789,0x7f9871da,0x869118c0,0xf635e278,0x35fbda78,0xe1541dac,0x738f3641,0xc0dae45f,0x6794b13a +.long 0x09cc0917,0x065064ac,0xc68540fd,0x27c53729,0xef227671,0x0d2d4c8e,0xa1785a04,0xd23a9f80,0x52650359,0x98c59528,0x74a1acad,0xfa09ad01,0x0b55bf5c,0x082d5a29,0x419b8084,0xa40f1c67 +.long 0xdcc18770,0x3a5c752e,0x8825c3a5,0x4baf1f2f,0x21b153ed,0xebd63f74,0xb2f64723,0xa2383e47,0x2646d19a,0xe7bf620a,0x03c83ffd,0x56cb44ec,0x4f6be9f1,0xaf7267c9,0xc06bb5e9,0x8b2dfd7b +.long 0xa672c5c7,0xb87072f2,0x0d53c5e2,0xeacb11c8,0xff435932,0x22dac29d,0x4408693c,0x37bdb99d,0x2899c20f,0xf6e62fb6,0x447ece24,0x3535d512,0xff577ce3,0xfbdc6b88,0x190575f2,0x726693bd +.long 0xab4b35a2,0x6772b0e5,0xf5eeaacf,0x1d8b6001,0x795b9580,0x728f7ce4,0x41fb81da,0x4a20ed2a,0x4fec01e6,0x9f685cd4,0xa7ff50ad,0x3ed7ddcc,0x0c2d97fd,0x460fd264,0xeb82f4f9,0x3a241426 +.long 0x6a8ea820,0x17d1df2c,0xf22cc254,0xb2b50d3b,0xb7291426,0x03856cba,0x04f5ee39,0x87fd26ae,0x02bee4ba,0x9cb696cc,0x06820fd6,0x53121804,0x0212e985,0xa5dfc269,0x160f9a09,0x666f7ffa +.long 0xbccd9617,0xc503cd33,0xba7730a3,0x365dede4,0x5ddb0786,0x798c6355,0xfc9cd3bc,0xa6c3200e,0xe5e35efd,0x060ffb2c,0x5555a1c1,0x99a4e25b,0xf70b3751,0x11d95375,0x160e1bf6,0x0a57354a +.long 0xf8e4b065,0xecb3ae4b,0x2e53022b,0x07a834c4,0x8692ed96,0x1cd300b3,0x61ee14ec,0x16a6f792,0x6a8649ed,0x8f1063c6,0x869f3e14,0xfbcdfcfe,0x00a7b3ec,0x2cfb97c1,0x7130c2f1,0xcea49b3c +.long 0xe9d96488,0x462d044f,0x8182a0c1,0x4b53d52e,0x0391e9e9,0x84b6ddd3,0xb1741a09,0x80ab7b48,0x27d3317f,0xec0e15d4,0x1a64671e,0x8dfc1ddb,0xd49c5b92,0x93cc5d5f,0x3674a331,0xc995d53d +.long 0x090090ae,0x302e41ec,0xedb06830,0x2278a0cc,0xfbc99690,0x1d025932,0xb80d68da,0x0c32fbd2,0xf341a6c1,0xd79146da,0x1bef68a0,0xae0ba139,0x8d774b3a,0xc6b8a563,0x880ba4d7,0x1cf307bd +.long 0x19803511,0xc033bdc7,0x8888c3be,0xa9f97b3b,0x85c6d05e,0x3d68aebc,0x193919eb,0xc3b88a9d,0xc48b0ee3,0x2d300748,0x07a746c1,0x7506bc7c,0x6e6d57f3,0xfc48437c,0xcfeaa91a,0x5bd71587 +.long 0xc1bc5225,0xa4ed0408,0x2719226d,0xd0b946db,0x758d2d43,0x109ecd62,0x2751759b,0x75c8485a,0x9ce4177a,0xb0b75f49,0x79c10c3d,0x4fa61a1e,0xa167fcd7,0xc062d300,0x750f0fa8,0x4df3874c +.long 0x83dfedc9,0x29ae2cf9,0x8d87631a,0xf8437134,0x7429c8d2,0xaf571711,0x146d9272,0x18d15867,0x69769bb7,0x83053ecf,0xc479ab82,0xc55eb856,0x21b0f4b2,0x5ef7791c,0x3d491525,0xaa5956ba +.long 0x9fe20eba,0x407a96c2,0xe52a5ad3,0xf27168bb,0xbf1d9d89,0x43b60ab3,0x710e727a,0xe45c51ef,0x099b4221,0xdfca5276,0x2557a159,0x8dc6407c,0x91035895,0x0ead8335,0x9c55dc32,0x0a9db957 +.long 0xdf61bc76,0xe40736d3,0x3f778cdb,0x13a619c0,0xc56ea28f,0x6dd921a4,0x2fa647b4,0x76a52433,0xac5bdc5d,0x23591891,0xbac7dc01,0xff4a1a72,0x62df8453,0x9905e261,0xe63b265f,0x3ac045df +.long 0xad53dba7,0x8a3f341b,0x837b625a,0x8ec269cc,0x3ae31189,0xd71a2782,0x55e96120,0x8fb4f9a3,0xff9875cf,0x804af823,0x5d442a9b,0x23224f57,0xecc62679,0x1c4d3b9e,0xa0e7ddb1,0x91da22fb +.long 0x6c04a661,0xa370324d,0x5e376d17,0x9710d3b6,0x3044e357,0xed8c98f0,0x6422701c,0xc364ebbe,0x7733d61c,0x347f5d51,0xcea826c3,0xd55644b9,0x55a25548,0x80c6e0ad,0x844220a7,0x0aa7641d +.long 0x31810660,0x1438ec81,0xde4b4043,0x9dfa6507,0xcc3e0273,0x10b515d8,0x28d8cfb2,0x1b6066dd,0x9c9efebd,0xd3b04591,0xa21c1ff4,0x425d4bdf,0xd57607d3,0x5fe5af19,0x54481084,0xbbf773f7 +.long 0x94b03ed1,0x8435bd69,0x634cc546,0xd9ad1de3,0x00e420ca,0x2cf423fc,0xa03096dd,0xeed26d80,0xa4db09d2,0xd7f60be7,0x960622f7,0xf47f569d,0x7296c729,0xe5925fd7,0x26ca2715,0xeff2db26 +.long 0xb913e759,0xa6fcd014,0x8ff4de93,0x53da4786,0xc32068e1,0x14616d79,0xccdf352e,0xb187d664,0x1dc90b59,0xf7afb650,0x7daa1b26,0x8170e943,0x700c0a84,0xc8e3bdd8,0x6482bdfa,0x6e8d345f +.long 0xc5c5ea50,0x84cfbfa1,0x67960681,0xd3baf14c,0x0dd50942,0x26398403,0x4716a663,0xe4b7839c,0xe7de6dc0,0xd5f1f794,0x622aa7ce,0x5cd0f4d4,0x59acfeec,0x5295f3f1,0x953e0607,0x8d933552 +.long 0x776c5722,0xc7db8ec5,0x2b5f290c,0xdc467e62,0x4ff425a9,0xd4297e70,0x0cf7bb72,0x4be924c1,0xa1892131,0x0d5dc5ae,0xa705c992,0x8bf8a8e3,0x7a305ac5,0x73a0b064,0x9a8c77a8,0x00c9ca4e +.long 0x83774bdd,0x5dfee80f,0x85734485,0x63131602,0x914a69a9,0xa1b524ae,0xd4e300d7,0xebc2ffaf,0x7cfa46a5,0x52c93db7,0x21653b50,0x71e6161f,0xa4bc580a,0x3574fc57,0xe1bc1253,0xc09015dd +.long 0xd174d7aa,0x4b7b47b2,0xf3a15d04,0x4072d8e8,0xd6fa07ed,0xeeb7d47f,0xedbdafb1,0x6f2b9ff9,0x3760fe8a,0x18c51615,0xf06c6c13,0x7a96e6bf,0x0ea2d071,0x4d7a0410,0x0be2a5ce,0xa1914e9b +.long 0xd8a3c5cf,0x5726e357,0x2abb2b13,0x1197ecc3,0x31ae88dd,0x6c0d7f7f,0xfdbb3efe,0x15b20d1a,0x70584039,0xcd06aa26,0xa7dc9747,0x2277c969,0x7855d815,0xbca69587,0x5188b32a,0x899ea238 +.long 0x760c1c9d,0x37d9228b,0x9b5c18da,0xc7efbb11,0x19f6dbc5,0x7f0d1bc8,0x07e6905b,0x4875384b,0x3ba8cd86,0xc7c50baa,0xc2905de0,0xb0ce40fb,0x7a231952,0x70840673,0xcf43de26,0xa912a262 +.long 0xeb5b76c1,0x9c38ddcc,0x26fc0ab4,0x746f5285,0xd62c269f,0x52a63a50,0x99458621,0x60049c55,0x3c2f7c9e,0xe7f48f82,0x917d5cf3,0x6bd99043,0x8701f469,0xeb1317a8,0x9a449fe0,0xbd3fe2ed +.long 0x12ef3d36,0x421e79ca,0x3e7ea5de,0x9ee3c36c,0xcdff36f7,0xe48198b5,0xc6b82228,0xaff4f967,0xc47adb7e,0x15e19dd0,0x032e7dfa,0x45699b23,0x1fae026a,0x40680c8b,0x550dbf4d,0x5a347a48 +.long 0x3cef0d7d,0xe652533b,0x2bbb4381,0xd94f7b18,0x0e80f500,0x838752be,0x9e9c9bfb,0x8e6e2488,0x16caca6a,0xc9751697,0x38531ad9,0x866c49d8,0x7151ade1,0xc917e239,0x6037c407,0x2d016ec1 +.long 0x00eac3f9,0xa407ccc9,0xe2ed4748,0x835f6280,0x1cc98e0d,0xcc54c347,0xdcb572eb,0x0e969937,0x8f30c9cb,0x1b16c8e8,0x373c4661,0xa606ae75,0x35502cab,0x47aa689b,0x4d9bb64f,0xf89014ae +.long 0x31c71f7b,0x202f6a9c,0x296ffe5c,0x01f95aa3,0x53cec3a3,0x5fc06014,0x5f498a45,0xeb991237,0x5d91ba87,0xae9a935e,0x0b564a19,0xc6ac6281,0x3bd44e69,0x8a8fe81c,0x9dd11d45,0x7c8b467f +.long 0xea5b8e69,0xf772251f,0xc5b75fbc,0xaeecb3bd,0x887ff0e5,0x1aca3331,0x19f0a131,0xbe5d49ff,0xe5c8646f,0x582c13aa,0x20e19980,0xdbaa12e8,0xf7abbd94,0x8f40f31a,0x1dfc7663,0x1f13f5a8 +.long 0xaceb4fc0,0x5d81f1ee,0x5e6f0f42,0x36256002,0x751370c8,0x4b67d6d7,0x03e80589,0x2608b698,0x05268301,0xcfc0d2fc,0x40309212,0xa6943d39,0x1fd0e1c2,0x192a90c2,0x37f1dc76,0xb209f113 +.long 0x97bf1298,0xefcc5e06,0x219d639e,0xcbdb6730,0xb81e8c6f,0xd009c116,0x1a7ce2e5,0xa3ffdde3,0xa914d3ba,0xc53fbaaa,0x88df85ee,0x836d500f,0x66ee0751,0xd98dc71b,0x714516fd,0x5a3d7005 +.long 0x39eedbba,0x21d3634d,0x0455a46d,0x35cd2e68,0xf9d7eb0c,0xc8cafe65,0x00cefb3e,0xbda3ce9e,0x2c9cf7a4,0xddc17a60,0x7bcb8773,0x01572ee4,0x8c7548df,0xa92b2b01,0xa84600e3,0x732fd309 +.long 0x16543a40,0xe22109c7,0xfede3c6c,0x9acafd36,0x6824e614,0xfb206852,0xda25dca0,0x2a4544a9,0x91d60b06,0x25985262,0x28753545,0x281b7be9,0x90f13b27,0xec667b1a,0x940e2eb4,0x33a83aff +.long 0xd5d721d5,0x80009862,0x5bd3a182,0x0c3357a3,0x7aa2cda4,0x27f3a83b,0xf6f83085,0xb58ae74e,0x2e6dad6b,0x2a911a81,0xf43d6c5b,0xde286051,0xf996c4d8,0x4bdccc41,0x0ae1e24e,0xe7312ec0 +.long 0x6e6485b3,0xf8d112e7,0x771c52f8,0x4d3e24db,0x684a2f6d,0x48e3ee41,0x21d95551,0x7161957d,0xcdb12a6c,0x19631283,0x2e50e164,0xbf3fa882,0x3166cc73,0xf6254b63,0xaee8cc38,0x3aefa7ae +.long 0x3b36f9fd,0x79b0fe62,0xfde19fc0,0x26543b23,0x958482ef,0x136e64a0,0x9b095825,0x23f63771,0xb6a1142e,0x14cfd596,0x335aac0b,0x5ea6aac6,0xf3081dd5,0x86a0e8bd,0x003dc12a,0x5fb89d79 +.long 0xf72e34d4,0xf615c33a,0x110eec35,0x0bd9ea40,0xc1dea34e,0x1c12bc5b,0x49ae4699,0x686584c9,0x8c97b942,0x13ad95d3,0x4e5c7562,0x4609561a,0xf2737f89,0x9e94a4ae,0x371c78b6,0xf57594c6 +.long 0xe3779ee3,0x0f0165fc,0xbd495d9e,0xe00e7f9d,0x20284e7a,0x1fa4efa2,0x47ac6219,0x4564bade,0xc4708e8e,0x90e6312a,0xa71e9adf,0x4f5725fb,0x3d684b9f,0xe95f55ae,0x1e94b415,0x47f7ccb1 +.long 0x8d946581,0x7322851b,0xbdf4a012,0xf0d13133,0x6584dae0,0xa3510f69,0x3c9f6c6d,0x03a7c171,0xe475381a,0x5be97f38,0x85823334,0xca1ba422,0x0be17dda,0xf83cc5c7,0x0b918c0f,0x158b1494 +.long 0x522e6b69,0xda3a77e5,0xbbcd6c18,0x69c908c3,0xd924fd56,0x1f1b9e48,0xaa4bb3f7,0x37c64e36,0xee478d7d,0x5a4fdbdf,0x0193f7a0,0xba75c8bc,0x56cd16df,0x84bc1e84,0x46fad151,0x1fb08f08 +.long 0x842e9f30,0x8a7cabf9,0x5eab83af,0xa331d4bf,0x017f2a6a,0xd272cfba,0x83aba0e3,0x27560abc,0x0e3a6b75,0x94b83387,0x6b9f50f5,0x25c6aea2,0xb5fdf6d0,0x803d691d,0xe6333514,0x03b77509 +.long 0x61a341c1,0x36178903,0x0cfd6142,0x3604dc60,0x8533316c,0x022295eb,0x44af2922,0x3dbde4ac,0x1c7eef69,0x898afc5d,0xd14f4fa1,0x58896805,0x203c21ca,0x05002160,0x40ef730b,0x6f0d1f30 +.long 0x196224f8,0x8e8c44d4,0x374d079d,0x75a4ab95,0x7d48f123,0x79085ecc,0x1bf65ad8,0x56f04d31,0xbda602b2,0xe220bf1c,0xf9612c69,0x73ee1742,0x084fd06b,0x76008fc8,0xf11380d1,0x4000ef9f +.long 0x12cfe297,0x48201b4b,0x292f74e5,0x3eee129c,0xc9e874e8,0xe1fe114e,0x92c5fc41,0x899b055c,0x3a39c8cf,0x4e477a64,0x78963cc9,0x82f09efe,0xd333f863,0x6fd3fd8f,0xdc949c63,0x85132b2a +.long 0x516eb17b,0x7e06a3ab,0xd2c7372b,0x73bec06f,0xba896da6,0xe4f74f55,0x8e9eb40f,0xbb4afef8,0xe61d66b0,0x2d75bec8,0xef29300b,0x02bda4b4,0x026baa5a,0x8bbaa8de,0xa07f4440,0xff54befd +.long 0xbe7a2af3,0xbd9b8b1d,0x4fb74a72,0xec51caa9,0x63879697,0xb9937a4b,0xec2687d5,0x7c9a9d20,0x6ef5f014,0x1773e44f,0xe90c6900,0x8abcf412,0x8142161e,0x387bd022,0xfcb6ff2a,0x50393755 +.long 0xed6def63,0x9813fd56,0x7d53106c,0x53cf6482,0x431f7ac1,0x991a35bd,0x63e65faf,0xf1e274dd,0x44cc7880,0xf63ffa3c,0x7c256981,0x411a426b,0x93a420e0,0xb698b9fd,0xae53f8fe,0x89fdddc0 +.long 0x32398baa,0x766e0722,0x5cfca031,0x205fee42,0x7a029cf2,0xa49f5341,0x4023890d,0xa88c68b8,0x7337aaa8,0xbc275041,0x0eb384f4,0x9ed364ad,0x29aba92f,0xe0816f85,0x04e38a88,0x2e9e1941 +.long 0x3dafd2d5,0x57eef44a,0x97ed98d8,0x35d1fae5,0x2307f9b1,0x50628c09,0xd6cba5c6,0x09d84aae,0x88aaa691,0x67071bc7,0xafe6cb03,0x2dea57a9,0x3d78ac01,0xdfe11bb4,0x7fd7aa51,0x7286418c +.long 0x77f7195a,0xfabf7709,0xadeb838f,0x8ec86167,0xbb4f012d,0xea1285a8,0x9a3eab3f,0xd6883503,0x309004c2,0xee5d24f8,0x13ffe95e,0xa96e4b76,0xbd223ea4,0x0cdffe12,0xb6739a53,0x8f5c2ee5 +.long 0xdd968198,0x5cb4aaa5,0x72413a6c,0xfa131c52,0x9536d903,0x53d46a90,0x48606d8e,0xb270f0d3,0xa053a3bc,0x518c7564,0x1a86caef,0x088254b7,0x0ab5efd0,0xb3ba8cb4,0x4605945d,0x5c59900e +.long 0xa1887395,0xecace1dd,0x932a65de,0x40960f36,0x3aa95529,0x9611ff5c,0x7c1e5a36,0xc58215b0,0xf0e1a524,0xd48c9b58,0xf590dfb8,0xb406856b,0x9cd95662,0xc7605e04,0xa33ecf82,0x0dd036ee +.long 0xc33156b3,0xa50171ac,0x4a80172e,0xf09d24ea,0x76dc8eef,0x4e1f72c6,0x5e3d44ee,0xe60caadc,0x979b1d8f,0x006ef8a6,0x97788d26,0x60908a1c,0x266feec0,0x6e08f95b,0x22e8c94e,0x618427c2 +.long 0x59145a65,0x3d613339,0xfa406337,0xcd9bc368,0x2d8a52a0,0x82d11be3,0x97a1c590,0xf6877b27,0xf5cbdb25,0x837a819b,0xde090249,0x2a4fd1d8,0x74990e5f,0x622a7de7,0x7945511b,0x840fa5a0 +.long 0x6558842d,0x30b974be,0x17f3d0a6,0x70df8c64,0x7542e46d,0x7c803520,0xe4ecc823,0x7251fe7f,0x5e9aac9a,0xe59134cb,0xf0045d71,0x11bb0934,0xdbcb1d4e,0x53e5d9b5,0x92defc91,0x8d97a905 +.long 0x7946d3f9,0xfe289327,0x07472273,0xe132bd24,0x1eb6ae86,0xeeeb510c,0xf0595067,0x777708c5,0x1297029e,0x18e2c8cd,0xbbf9305e,0x2c61095c,0x6b85d6d9,0xe466c258,0xda1ea530,0x8ac06c36 +.long 0xa1304668,0xa365dc39,0x07f89606,0xe4a9c885,0xacc7228d,0x65a4898f,0x84ca8303,0x3e2347ff,0xea7d23a3,0xa5f6fb77,0x672a71cd,0x2fac257d,0x7e6a44d3,0x6908bef8,0x891d3d7a,0x8ff87566 +.long 0x6b0cf82e,0xe58e90b3,0x2615b5e7,0x6438d246,0x669c145a,0x07b1f8fc,0x36f1e1cb,0xb0d8b2da,0xd9184c4d,0x54d5dadb,0xf93d9976,0x3dbb18d5,0xd1147d47,0x0a3e0f56,0xa0a48609,0x2afa8c8d +.long 0xbc36742c,0x275353e8,0xeea0ed90,0x898f427e,0x3e477b00,0x26f4947e,0x308741e3,0x8ad8848a,0xd74a2a46,0x6c703c38,0x9ba17ba2,0x5e3e05a9,0x4ab9a9e4,0xc1fa6f66,0x3841d6ec,0x474a2d9a +.long 0x653ae326,0x871239ad,0xa74cbb43,0x14bcf72a,0x20d4c083,0x8737650e,0x110ed4af,0x3df86536,0xb53ca555,0xd2d86fe7,0xabd5d538,0x688cb00d,0x1ad38468,0xcf81bda3,0xf01167b6,0x7ccfe3cc +.long 0x6c4c1fe6,0xcf4f47e0,0x298bbb79,0x557e1f1a,0x30d45a14,0xf93b974f,0x0baf97c4,0x174a1d2d,0xc51fbf53,0x7a003b30,0xee68b225,0xd8940991,0x1c0f4173,0x5b0aa7b7,0xa20a7153,0x975797c9 +.long 0xe3533d77,0x26e08c07,0x2e341c99,0xd7222e6a,0x8d2dc4ed,0x9d60ec3d,0x7c476cf8,0xbdfe0d8f,0x1d056605,0x1fe59ab6,0x86a8551f,0xa9ea9df6,0x47fb8d8c,0x8489941e,0x4a7f1b10,0xfeb874eb +.long 0x7ee0d98f,0xfe5fea86,0xdbf61864,0x201ad34b,0x37c031d4,0x45d8fe47,0x795f0822,0xd5f49fae,0xc7f4a40c,0xdb0fb291,0x730ddd92,0x2e69d9c1,0x49d76987,0x754e1054,0x7662db87,0x8a24911d +.long 0x60a71676,0x61fc1810,0xf66a8ad1,0xe852d1a8,0x6417231e,0x172bbd65,0x3babb11f,0x0d6de7bd,0xc8e347f8,0x6fde6f88,0x9bd99cc3,0x1c587547,0x34076950,0x78e54ed0,0x796e83ba,0x97f0f334 +.long 0x4924867a,0xe4dbe1ce,0x60b84917,0xbd5f51b0,0x3cb09a79,0x37530040,0xff1743d8,0xdb3fe0f8,0x556fa9db,0xed7894d8,0x23412fbf,0xfa262169,0xba7b9291,0x563be0db,0x0c9fb234,0x6ca8b8c0 +.long 0xbd763802,0xed406aa9,0x65303da1,0xc21486a0,0xc7e62ec4,0x61ae291e,0xdf99333e,0x622a0492,0xbb7a8ee0,0x7fd80c9d,0x6c01aedb,0xdc2ed3bc,0x08be74ec,0x35c35a12,0x469f671f,0xd540cb1a +.long 0xcf84f6c7,0xd16ced4e,0x2d090f43,0x8561fb9c,0x6f239db4,0x7e693d79,0x77bd0d94,0xa736f928,0x2c1950ee,0x07b4d929,0x56dc11b3,0xda177543,0x7a6a878e,0xa5dfbbaa,0x4decb08a,0x1c70cb29 +.long 0x6f0f7c50,0xfba28c8b,0x854dcc6d,0xa8eba2b8,0x36b78642,0x5ff8e89a,0xf6873adf,0x070c1c8e,0x6484d2e4,0xbbd3c371,0x0d414129,0xfb78318f,0x6ad93b0b,0x2621a39c,0xa9e917f7,0x979d74c2 +.long 0x61fb0428,0xfc195647,0xbee624d4,0x4d78954a,0xb8ae86fd,0xb94896e0,0xc91c8b13,0x6667ac0c,0x43bcf832,0x9f180512,0xa0010137,0xfbadf8b7,0xb3ba8aa7,0xc69b4089,0xe687ce85,0xfac4bacd +.long 0x977eab40,0x9164088d,0x2760b390,0x51f4c5b6,0x340dd553,0xd238238f,0xdb1d31c9,0x358566c3,0x5068f5ff,0x3a5ad69e,0xdaff6b06,0xf31435fc,0xd6debff0,0xae549a5b,0x75e01331,0x59e5f0b7 +.long 0x98559acf,0x5d492fb8,0x4db79b50,0x96018c2e,0x609f66aa,0x55f4a48f,0x4900a14f,0x1943b3af,0x15a40d39,0xc22496df,0x4c20f7c5,0xb2a44684,0x3b98404c,0x76a35afa,0xff5d1b77,0xbec75725 +.long 0xbea06444,0xb67aa163,0xf724b6f2,0x27e95bb2,0xd238c8ab,0x3c20e3e9,0xddd6ae17,0x1213754e,0x716e0f74,0x8c431020,0xffc095c2,0x6679c82e,0xd0ac2932,0x2eb3adf4,0x01bb7a76,0x2cc970d3 +.long 0x740f0e66,0x70c71f2f,0x2b6b23cc,0x545c616b,0xb40a8bd7,0x4528cfcb,0x2ab27722,0xff839633,0x025ac99a,0x049127d9,0x2b63e33b,0xd314d4a0,0x28d84519,0xc8c310e7,0xb3bc84ba,0x0fcb8983 +.long 0x38634818,0x2cc52261,0xb44c2e0b,0x501814f4,0x54dfdba3,0xf7e181aa,0xe759718c,0xcfd58ff0,0xd3b507a8,0xf90cdb14,0xc50bdad8,0x57bd478e,0x50e5f9aa,0x29c197e2,0xe40bc855,0x4db6eef8 +.long 0xd1fc0654,0x2cc8f21a,0x81269d73,0xc71cc963,0x077f49f9,0xecfbb204,0xca56b793,0xdde92571,0xf97ad8f7,0x9abed6a3,0x924de3bd,0xe6c19d3f,0xa140a800,0x8dce92f4,0x1337af07,0x85f44d1e +.long 0x09d64c52,0x5953c08b,0xf5df9749,0xa1b5e49f,0x52735f7d,0x336a8fb8,0x9add676b,0xb332b6db,0xb4511aa4,0x558b88a0,0xdbd5cc55,0x09788752,0xd8cd52bd,0x16b43b9c,0xc2a2696b,0x7f0bc5a0 +.long 0xc11f61ef,0x146e12d4,0x3a83e79e,0x9ce10754,0x6cbfca15,0x08ec73d9,0x5b49653f,0x09ff29ad,0xe7da946e,0xe31b72bd,0xee80a4f2,0xebf9eb3b,0x17598ce4,0xd1aabd08,0x53f37e80,0x18b5fef4 +.long 0x5958cd79,0xd5d5cdd3,0x1d373114,0x3580a1b5,0xfa935726,0xa36e4c91,0xef20d760,0xa38c534d,0x2ff5845b,0x7088e40a,0xbd78177f,0xe5bb40bd,0x857f9920,0x4f06a7a8,0xe968f05d,0xe3cc3e50 +.long 0xe5682d26,0x1d68b7fe,0xaec7f87c,0x5206f76f,0x041951ab,0x41110530,0xd4b5a71a,0x58ec52c1,0x0f75cf9a,0xf3488f99,0xba82d0d5,0xf411951f,0x618895ab,0x27ee75be,0x6d8aab14,0xeae060d4 +.long 0x7fb54dc2,0x9ae1df73,0x25963649,0x1f3e391b,0xfe055081,0x242ec32a,0x8491c9bd,0x5bd450ef,0x981eb389,0x367efc67,0x3a0550d5,0xed7e1928,0xab3ce75c,0x362e776b,0x1f24c523,0xe890e308 +.long 0xfeccef76,0xb961b682,0x8bba6d92,0x8b8e11f5,0x2b2375c4,0x8f2ccc4c,0xe2f86cfa,0x0d7f7a52,0x9efe5633,0xfd94d30a,0x5451f934,0x2d8d246b,0x244e6a00,0x2234c6e3,0xddec8c50,0xde2b5b0d +.long 0xbf776f5b,0x2ce53c5a,0x60357b05,0x6f724071,0x71bf3f7a,0xb2593717,0x440c4a9f,0x87d2501c,0x87b05340,0x440552e1,0x21624c32,0xb7bf7cc8,0x22facddb,0x4155a6ce,0x889837ef,0x5a4228cb +.long 0xfd4fd671,0xef87d6d6,0xc2daa10e,0xa233687e,0x03c0eb96,0x75622244,0x8bf19be6,0x7632d184,0x40735ff4,0x05d0f8e9,0xc00931f1,0x3a3e6e13,0xdafe3f18,0x31ccde6a,0xcfe51207,0xf381366a +.long 0x60167d92,0x24c222a9,0x7529f18c,0x62f9d6f8,0x0353b114,0x412397c0,0xef808043,0x334d89dc,0x2a4383ce,0xd9ec63ba,0x5cf92ba0,0xcec8e937,0xc8be74c0,0xfb8b4288,0x105d4391,0x67d6912f +.long 0x1b913149,0x7b996c46,0x3a4e02da,0x36aae2ef,0x972de594,0xb68aa003,0x4ec6d545,0x284ec70d,0x61391d54,0xf3d2b2d0,0xfe114e92,0x69c5d5d6,0xb4482dff,0xbe0f00b5,0xf5bf33c5,0xe1596fa5 +.long 0x96a71cba,0x10595b56,0xfdcadeb7,0x944938b2,0xfccd8471,0xa282da4c,0x0d37bfe1,0x98ec05f3,0x0698304a,0xe171ce1b,0x21bdf79b,0x2d691444,0x1b21dec1,0xd0cd3b74,0x16a15f71,0x712ecd8b +.long 0x00fd56e1,0x8d4c00a7,0xf9527c18,0x02ec9692,0x4a3e42e1,0x21c44937,0x1392ae0a,0x9176fbab,0x44b7b618,0x8726f1ba,0xf1de491c,0xb4d7aae9,0x07b582c0,0xf91df7b9,0xef60aa3a,0x7e116c30 +.long 0x466265d7,0x99270f81,0x4df7adf0,0xb15b6fe2,0xf9738f7f,0xfe33b2d3,0xd6d70f95,0x48553ab9,0xc21e94db,0x2cc72ac8,0xbdc0bbee,0x795ac38d,0x2e40478f,0x0a1be449,0x052bde55,0x81bd3394 +.long 0x56b3c4f2,0x63c8dbe9,0x904177cc,0x017a99cf,0x4d010fc1,0x947bbddb,0xbb2c9b21,0xacf9b00b,0x47173611,0x2970bc8d,0xac7d756f,0x1a4cbe08,0x67d541a2,0x06d9f4aa,0x59c2cf44,0xa3e8b689 +.long 0x4d88f1dd,0xaad066da,0x7ad35dea,0xc604f165,0x4478ca67,0x7edc0720,0xba02ce06,0xa10dfae0,0xaf36f4e4,0xeceb1c76,0xaf3f8f48,0x994b2292,0x77c8a68c,0xbf9ed77b,0x51744c9d,0x74f544ea +.long 0x8113a757,0x82d05bb9,0x8a9885e4,0x4ef2d2b4,0x1aa7865f,0x1e332be5,0x290d1a52,0x22b76b18,0x44351683,0x308a2310,0xa3f22840,0x9d861896,0x841ed947,0x5959ddcd,0x154b73bf,0x0def0c94 +.long 0x4c7c15e0,0xf0105417,0x3a277c32,0x539bfb02,0xf9dccf5f,0xe699268e,0x0247a3bd,0x9f5796a5,0x4f157269,0x8b839de8,0x7a30196b,0xc825c1e5,0xdc8a5a91,0x6ef0aabc,0x498b7fe6,0xf4a8ce6c +.long 0x70cbac78,0x1cce35a7,0xf6b23958,0x83488e9b,0xd76cb011,0x0341a070,0xae1b2658,0xda6c9d06,0xdd648c52,0xb701fb30,0x52fb9fd1,0x994ca02c,0x6f563086,0x06933117,0x17856bab,0x3d2b8100 +.long 0x5963a46e,0xe89f48c8,0xa99e61c7,0x658ab875,0x4b8517b4,0x6e296f87,0xfc1bc656,0x36c4fcdc,0xa3906def,0xde5227a1,0x62418945,0x9fe95f57,0xfdd96cde,0x20c91e81,0xda4480de,0x5adbe47e +.long 0x396de2b6,0xa009370f,0xf0ecc7bd,0x98583d4b,0xe51d0672,0xf44f6b57,0x556b1984,0x03d6b078,0xb0b64912,0x27dbdd93,0x15687b09,0x9b3a3434,0x51ec20a9,0x0dba6461,0xff28187c,0xec93db7f +.long 0x66e48bdd,0x00ff8c24,0x11ccd78e,0x2514f2f9,0xe1250603,0xeba11f4f,0x243fa156,0x8a22cd41,0xb283e4c6,0xa4e58df4,0x8b39783f,0x78c29859,0xa5259809,0x5235aee2,0x0e0227dd,0xc16284b5 +.long 0x1338830d,0xa5f57916,0xd2123fca,0x6d4b8a6b,0xf9c546f8,0x236ea68a,0xfa608d36,0xc1d36873,0x8d436d13,0xcd76e495,0x8fb080af,0xd4d9c221,0xe8ad3fb5,0x665c1728,0xb3d572e0,0xcf1ebe4d +.long 0x584c5e20,0xa7a8746a,0xb9dc7035,0x267e4ea1,0xb9548c9b,0x593a15cf,0x4bd012f3,0x5e6e2135,0x8c8f936e,0xdf31cc6a,0xb5c241dc,0x8af84d04,0x345efb86,0x63990a6f,0xb9b962cb,0x6fef4e61 +.long 0x25722608,0xf6368f09,0x131cf5c6,0x131260db,0xfab4f7ac,0x40eb353b,0x37eee829,0x85c78880,0xc3bdf24e,0x4c1581ff,0xf5c3c5a8,0x5bff75cb,0xa14e6f40,0x35e8c83f,0x0295e0ca,0xb81d1c0f +.long 0xf43a730f,0xfcde7cc8,0x33ab590e,0xe89b6f3c,0xad03240b,0xc823f529,0x98bea5db,0x82b79afe,0x962fe5de,0x568f2856,0x60c591f3,0x0c590adb,0x4a28a858,0x1fc74a14,0xb3203f4c,0x3b662498 +.long 0x6c39765a,0x91e3cf0d,0xac3cca0b,0xa2db3acd,0xcb953b50,0x288f2f08,0xcf43cf1a,0x2414582c,0x60eee9a8,0x8dec8bbc,0x729aa042,0x54c79f02,0x6532f5d5,0xd81cd5ec,0xcf82e15f,0xa672303a +.long 0x719c0563,0x376aafa8,0xbc5fc79f,0xcd8ad2dc,0xcb750cd3,0x303fdb9f,0x4418b08e,0x14ff052f,0x3e2d6520,0xf75084cf,0x144ed509,0x7ebdf0f8,0xd3f25b98,0xf43bf0f2,0xa354d837,0x86ad71cf +.long 0x26f43572,0xb827fe92,0x5d824758,0xdfd3ab5b,0x539094c1,0x315dd23a,0x66623d68,0x85c0e37a,0x7be19ae0,0x575c7972,0xdf0d36b5,0x616a3396,0x26b1ff7e,0xa1ebb3c8,0x140ad453,0x635b9485 +.long 0xda430c0b,0x92bf3cda,0x3a96dac6,0x4702850e,0x15ac326a,0xc91cf0a5,0xab8c25e4,0x95de4f49,0xe265c17c,0xb01bad09,0x087b3881,0x24e45464,0xe1fac5ca,0xd43e583c,0x6ead97a6,0xe17cb318 +.long 0x74dcec46,0x6cc39243,0x54c2b73f,0x33cfc02d,0xf26cd99c,0x82917844,0xd1773f89,0x8819dd95,0x0871f427,0x09572aa6,0xf6f01c34,0x8e0cf365,0xbff1f5af,0x7fa52988,0xe75e8e50,0x4eb357ea +.long 0x868af75d,0xd9d0c8c4,0x45c8c7ea,0xd7325cff,0xcc81ecb0,0xab471996,0x611824ed,0xff5d55f3,0x1977a0ee,0xbe314541,0x722038c6,0x5085c4c5,0xf94bb495,0x2d5335bf,0xc8e2a082,0x894ad8a6 +.long 0xada35438,0x5c3e2341,0x049b8c4e,0xf4a9fc89,0x9f17cf34,0xbeeb355a,0x6c91fe10,0x3f311e0e,0x92ab9891,0xc2d20038,0x3e8ce9a9,0x257bdcc1,0x88c53bee,0x1b2d9789,0xcdba143a,0x927ce89a +.long 0x523db280,0xb0a32cca,0x50d43783,0x5c889f8a,0x4897d16f,0x503e04b3,0x08f5f2e8,0x8cdb6e78,0x179c8e74,0x6ab91cf0,0x48211d60,0xd8874e52,0xea851200,0xf948d4d5,0xe6f9840a,0x4076d41e +.long 0x47b517ea,0xc20e263c,0x30685e5e,0x79a448fd,0xf90631a0,0xe55f6f78,0xa79e6346,0x88a790b1,0x80969fe8,0x62160c7d,0x41491bb9,0x54f92fd4,0x5c957526,0xa6645c23,0xbea3ce7b,0xf44cc5ae +.long 0x8b1e68b7,0xf7628327,0x303f29d3,0xc731ad7a,0x57d03ecb,0xfe5a9ca9,0x41bc97a7,0x96c0d50c,0x9b4f7f24,0xc4669fe7,0x3d9967ef,0xfdd781d8,0x5d2c208d,0x7892c7c3,0xae545cb3,0x8bf64f7c +.long 0x467be912,0xc01f862c,0xc73d30cc,0xf4c85ee9,0x6ab83ec7,0x1fa6f4be,0x4e3e3cf9,0xa07a3c1c,0x0c00beb3,0x87f8ef45,0x000d4c3e,0x30e2c2b3,0xfe08bf5b,0x1aa00b94,0x9224ef52,0x32c133aa +.long 0x32e5685d,0x38df16bb,0x58e6f544,0x68a9e069,0xcdc5ebc6,0x495aaff7,0x378b135f,0xf894a645,0x09e27ecf,0xf316350a,0x58f7179d,0xeced201e,0xe97861ba,0x2eec273c,0xd693be2e,0x47ec2cae +.long 0xf68367ce,0xfa4c97c4,0xbe5a5755,0xe4f47d0b,0xb298a979,0x17de815d,0xc177dc7d,0xd7eca659,0x49ded0a3,0x20fdbb71,0xfb34d3c5,0x4cb2aad4,0x60858a33,0x2cf31d28,0xa24aa40f,0x3b6873ef +.long 0x2c11bb37,0x540234b2,0xed4c74a3,0x2d0366dd,0xeec5f25d,0xf9a968da,0x67b63142,0x36601068,0x68d7b6d4,0x07cd6d2c,0x0c842942,0xa8f74f09,0x7768b1ee,0xe2751404,0xfe62aee4,0x4b5f7e89 +.long 0x89070d26,0xc6a77177,0xdd1c8bc7,0xa1f28e4e,0x469e1f17,0xea5f4f06,0xfbdb78e0,0x78fc242a,0x8b0588f1,0xc9c7c592,0x1535921e,0xb6b7a0fd,0xbde5ae35,0xcc5bdb91,0x12ff1864,0xb42c485e +.long 0xdbab98aa,0xa1113e13,0xa17b1024,0xde9d469b,0xc0462d3a,0x23f48b37,0x7c5c078d,0x3752e537,0x15544eb9,0xe3a86add,0x80fba279,0xf013aea7,0xf22001b5,0x8b5bb76c,0xf02891ab,0xe617ba14 +.long 0x936219d3,0xd39182a6,0xae51cb19,0x5ce1f194,0xbf07a74c,0xc78f8598,0x22cbf1bc,0x6d7158f2,0xe300ce18,0x3b846b21,0x2d11275d,0x35fba630,0xa0239b9b,0x5fe25c36,0xdf05d940,0xd8beb35d +.long 0x1f7e320d,0x4db02bb0,0x6da320ea,0x0641c364,0x821389a3,0x6d95fa5d,0x8fcd8e3d,0x92699748,0xceb6c143,0x316fef17,0xd933762b,0x67fcb841,0x118b17f8,0xbb837e35,0x9fd24821,0x4b92552f +.long 0x46aca793,0xae6bc70e,0xe579311b,0x1cf0b0e4,0x5802f716,0x8dc631be,0xbddbee4d,0x099bdc6f,0x0caf8b05,0xcc352bb2,0x72d63df2,0xf74d505a,0x91c4f408,0xb9876d4b,0x9e229b2d,0x1ce18473 +.long 0x83abdb4a,0x49507597,0xdee84b18,0x850fbcb6,0x609e67dc,0x6325236e,0x9336c6d8,0x04d831d9,0xfa12d45d,0x8deaae3b,0x4746e246,0xe425f8ce,0x24f5f31e,0x8004c175,0xad62c3b7,0xaca16d8f +.long 0x9152f934,0x0dc15a6a,0xed0e12c1,0xf1235e5d,0xda477dac,0xc33c06ec,0xb2ea0006,0x76be8732,0x0c0cd313,0xcf3f7831,0xa614260d,0x3c524553,0xcab22d15,0x31a756f8,0x77827a20,0x03ee10d1 +.long 0x1994ef20,0xd1e059b2,0x638ae318,0x2a653b69,0x2f699010,0x70d5eb58,0x09f5f84a,0x279739f7,0x8b799336,0x5da4663c,0x203c37eb,0xfdfdf14d,0xa1dbfb2d,0x32d8a9dc,0x77d48f9b,0xab40cff0 +.long 0xd20b42d5,0xc018b383,0x9f78845f,0xf9a810ef,0xbdba9df0,0x40af3753,0x131dfdf9,0xb90bdcfc,0xf01ab782,0x18720591,0x6af12a88,0xc823f211,0x0dc14401,0xa51b80f3,0xfb2dfbe3,0xde248f77 +.long 0x0cafe751,0xef5a44e5,0xd4dcd221,0x73997c9c,0xde854024,0x32fd86d1,0xa09b84bb,0xd5b53adc,0xdcedd8d1,0x008d7a11,0x74b32c84,0x406bd1c8,0x05dde8b1,0x5d4472ff,0xfce2b32f,0x2e25f2cd +.long 0x29dfc254,0xbec0dd5e,0x2b98b267,0x4455fcf6,0xc72df2ad,0x0b4d43a5,0x48a75397,0xea70e6be,0x5820f3bf,0x2aad6169,0x9e37f68f,0xf410d2dd,0x7be5ac83,0x70fb7dba,0x36ec3eec,0x636bb645 +.long 0x9754e21c,0x27104ea3,0x8d63c373,0xbc87a3e6,0x4109db9a,0x483351d7,0x60134da7,0x0fa724e3,0xb0720b16,0x9ff44c29,0x06aceead,0x2dd0cf13,0xe26929a6,0x5942758c,0xb766a92b,0x96c5db92 +.long 0x5f18395e,0xcec7d4c0,0x1f80d032,0xd3f22744,0xcb86075b,0x7a68b37a,0xafef92db,0x074764dd,0x7bc7f389,0xded1e950,0xb9756460,0xc580c850,0x7da48157,0xaeeec2a4,0x82c587b3,0x3f0b4e7f +.long 0xa9f19c53,0x231c6de8,0x6974e34e,0x5717bd73,0xf1508fa9,0xd9e1d216,0xdadaa124,0x9f112361,0x823b7348,0x80145e31,0xac634069,0x4dd8f0d5,0x2297c258,0xe3d82fc7,0x9cee7431,0x276fcfee +.long 0x2bc0aea9,0x8eb61b5e,0xde329431,0x4f668fd5,0x38e4b87e,0x03a32ab1,0x73d0ef0b,0xe1374517,0x853ac983,0x1a46f7e6,0x68e78a57,0xc3bdf42e,0x2ea96dd1,0xacf20785,0xf1638460,0xa10649b9 +.long 0x879fbbed,0xf2369f0b,0xda9d1869,0x0ff0ae86,0x56766f45,0x5251d759,0x2be8d0fc,0x4984d8c0,0xd21008f0,0x7ecc95a6,0x3a1a1c49,0x29bd54a0,0xd26c50f3,0xab9828c5,0x51d0d251,0x32c0087c +.long 0x0c1cdb26,0x9bac3ce6,0x557ca205,0xcd94d947,0x9db1fdcd,0x1b1bd598,0xa3d8b149,0x0eda0108,0x56152fcc,0x95066610,0xe7192b33,0xc2f037e6,0xc92e05a4,0xdeffb41a,0xc2f6c62e,0x1105f6c2 +.long 0x8733913c,0x68e73500,0x3f3adc40,0xcce86163,0x38a278e9,0xf407a942,0x2ab21292,0xd13c1b9d,0x1c74cf5c,0x93ed7ec7,0xf1a4c1b4,0x8887dc48,0x4b3a11f1,0x3830ff30,0x58937cb6,0x358c5a3c +.long 0x89022829,0x027dc404,0x3b798f79,0x40e93977,0x38be6ead,0x90ad3337,0xf34c0a5d,0x9c23f6bc,0xfbffd8bb,0xd1711a35,0x1949d3dd,0x60fcfb49,0x7825d93a,0x09c8ef4b,0xa0a8c968,0x24233cff +.long 0xe6d982af,0x67ade46c,0xe7544d7c,0xebb6bf3e,0x3d8bd087,0xd6b9ba76,0x4dc61280,0x46fe382d,0xb5bdbd75,0xbd39a7e8,0xb8f228fe,0xab381331,0xce1c4300,0x0709a77c,0xf337ceac,0x6a247e56 +.long 0x636288be,0x8f34f21b,0xc8a7c305,0x9dfdca74,0xea919e04,0x6decfd1b,0x8e1991f8,0xcdf2688d,0xd0f8a67e,0xe607df44,0x0b58d010,0xd985df4b,0x0c24f8f4,0x57f834c5,0xa0bf01ae,0xe976ef56 +.long 0xa1c32373,0x536395ac,0x734c0a13,0x351027aa,0x5e6bd5bc,0xd2f1b5d6,0x223debed,0x2b539e24,0x0eaa1d71,0xd4994cec,0x661dcf65,0x2a83381d,0x7b54c740,0x5f1aed2f,0xd6dda5ee,0x0bea3fa5 +.long 0x36cc6134,0x9d4fb684,0xc0a443dd,0x8eb9bbf3,0x383b7d2a,0xfc500e2e,0x5b775257,0x7aad621c,0x0a8f7cc0,0x69284d74,0x07562d65,0xe820c2ce,0x499758ee,0xbf9531b9,0x6ee0cc2d,0x73e95ca5 +.long 0xfbaf50a5,0xf61790ab,0x684e0750,0xdf55e76b,0xf176b005,0xec516da7,0x7a2dddc7,0x575553bb,0x553afa73,0x37c87ca3,0x4d55c251,0x315f3ffc,0xaf3e5d35,0xe846442a,0x6495ff28,0x61b91149 +.long 0xfa326dc3,0x23cc95d3,0x18fc2cea,0x1df4da1f,0xd0a37d59,0x24bf9adc,0x320d6e1e,0xb6710053,0x618344d1,0x96f9667e,0xa06445af,0xcc7ce042,0xd68dbc3a,0xa02d8514,0x280b5a5b,0x4ea109e4 +.long 0xb40961bf,0x5741a7ac,0x6aa56bfa,0x4ada5937,0x02b765d1,0x7feb9145,0xe6ad1582,0x561e97be,0xda3982f5,0xbbc4a5b6,0xb546f468,0x0c2659ed,0x59612d20,0xb8e7e6aa,0xac19e8e0,0xd83dfe20 +.long 0xb835398c,0x8530c45f,0xb38a41c2,0x6106a8bf,0x35f5dcdb,0x21e8f9a6,0xcae498ed,0x39707137,0xd8249f00,0x70c23834,0xab2537a0,0x9f14b58f,0x5f61c0c2,0xd043c365,0x09a194a7,0xdc5926d6 +.long 0x8e77738a,0xddec0339,0xfba46426,0xd07a63ef,0xee7f6e86,0x2e58e79c,0xff32d241,0xe59b0459,0x20fa0338,0xc5ec84e5,0xeaff5ace,0x97939ac8,0xb4a38313,0x0310a4e3,0x8f9d9885,0x9115fba2 +.long 0x5fadf8c3,0x8dd710c2,0xce19c0e2,0x66be38a2,0x4cfe5022,0xd42a279c,0x0e24e1b8,0x597bb530,0xc153ca7f,0x3cde86b7,0x707d63bd,0xa8d30fb3,0xbd60d21e,0xac905f92,0x7b9a54ab,0x98e7ffb6 +.long 0xe9726a30,0xd7147df8,0xafce3533,0xb5e216ff,0x2ff1ec40,0xb550b799,0xa1e953fd,0x6b613b87,0x792d5610,0x87b88dba,0xa190fbe1,0x2ee1270a,0x2ef581da,0x02f4e2dc,0xeff82a95,0x016530e4 +.long 0x8fd6ee89,0xcbb93dfd,0x46848fff,0x16d3d986,0x1da47adf,0x600eff24,0x0ad47a71,0x1b9754a0,0x70c33b98,0x8f9266df,0xdf34186e,0xaadc87ae,0x4ad24132,0x0d2ce8e1,0x19946eba,0x8a47cbfc +.long 0x62b5f3af,0x47feeb66,0x0abb3734,0xcefab561,0x19f35cb1,0x449de60e,0x157f0eb9,0x39f8db14,0x3c61bfd6,0xffaecc5b,0x41216703,0xa5a4d41d,0x224e1cc2,0x7f8fabed,0x871ad953,0x0d5a8186 +.long 0xd22da9a9,0xf10774f7,0xcc8a9b0d,0x45b8a678,0xbdc32cff,0xd9c2e722,0x337202a5,0xbf71b5f5,0x69fc4db9,0x95c57f2f,0x765d01e1,0xb6dad34c,0xcb904635,0x7e0bd13f,0x763a588c,0x61751253 +.long 0x81af2c2d,0xd85c2997,0x81b9d7da,0xc0f7d9c4,0x08533e8d,0x838a34ae,0x311d8311,0x15c4cb08,0x8e121e14,0x97f83285,0x85000a5f,0xeea7dc1e,0x5d256274,0x0c6059b6,0xb95075c0,0xec9beace +.long 0x1df97828,0x173daad7,0xa8937877,0xbf851cb5,0x01646f3c,0xb083c594,0x50c6d352,0x3bad30cf,0x496bbcea,0xfeb2b202,0x18a1e8ba,0x3cf9fd4f,0x1c066029,0xd26de7ff,0x4e9ed4f8,0x39c81e9e +.long 0x7b390d35,0xd8be0cb9,0x964aab27,0x01df2bbd,0xc3ef64f8,0x3e8c1a65,0x716ed1dd,0x567291d1,0x5f5406d3,0x95499c6c,0x5ba8e23f,0x71fdda39,0xd5096ece,0xcfeb320e,0xca66dd16,0xbe7ba92b +.long 0xc6fb5a7d,0x4608d36b,0x6d2dd0e0,0xe3eea15a,0x8f97a36a,0x75b0a3eb,0x1c83de1e,0xf59814cc,0x1c33c23f,0x56c9c5b0,0x6faa4136,0xa96c1da4,0xde316551,0x46bf2074,0x1f756c8f,0x3b866e7b +.long 0x1495ed6b,0x727727d8,0xb682dce7,0xb2394243,0x758610f3,0x8ab8454e,0x857d72a4,0xc243ce84,0xdbbf370f,0x7b320d71,0x78e0f7ca,0xff9afa37,0xea7b523f,0x0119d1e0,0x058c7d42,0xb997f8cb +.long 0x37bbb184,0x285bcd2a,0xa45d1fa6,0x51dcec49,0xe29634cb,0x6ade3b64,0x26b86ef1,0x080c94a7,0x2283fbe3,0xba583db1,0x5a9315ed,0x902bddc8,0x86964bec,0x07c1ccb3,0xb6258301,0x78f4eacf +.long 0x56f90823,0x4bdf3a49,0x741d777b,0xba0f5080,0xf38bf760,0x091d71c3,0x9b625b02,0x9633d50f,0xb8c9de61,0x03ecb743,0x5de74720,0xb4751254,0x74ce1cb2,0x9f9defc9,0x00bd32ef,0x774a4f6a +.long 0x73848f22,0xaca385f7,0xf3f8558e,0x53dad716,0x93c471f9,0xab7b34b0,0x19644bc7,0xf530e069,0xdd59d31a,0x3d9fb1ff,0x08daa795,0x4382e0df,0xd5cc88d7,0x165c6f4b,0x4a18c900,0xeaa392d5 +.long 0x648024ee,0x94203c67,0x8c2fabcd,0x188763f2,0xbbaec835,0xa80f87ac,0xf29d8d54,0x632c96e0,0x4c00a95e,0x29b0a60e,0xe011e9fa,0x2ef17f40,0x15b77223,0xf6c0e1d1,0x14b04e32,0xaaec2c62 +.long 0x3d84e58c,0xd35688d8,0x958571db,0x2af5094c,0x760682a6,0x4fff7e19,0xe39a407c,0x4cb27077,0x4ff0e321,0x0f59c547,0x1b34c8ff,0x169f34a6,0x52bc1ba7,0x2bff1096,0x83583544,0xa25423b7 +.long 0x0ac8b782,0x5d55d5d5,0x2db3c892,0xff6622ec,0x6b8bb642,0x48fce741,0x69d7e3dc,0x31d6998c,0xcadcaed0,0xdbaf8004,0xd81d053c,0x801b0142,0x59630ec6,0x94b189fc,0xaf762c8e,0x120e9934 +.long 0xfdc6a404,0x53a29aa4,0xa1909948,0x19d8e01e,0xd7e89681,0x3cfcabf1,0x4e132d37,0x3321a50d,0xe9a86111,0xd0496863,0x06a3bc65,0x8c0cde61,0xfc9f8eef,0xaf866c49,0xff7f5141,0x2066350e +.long 0xe56ddfbd,0x4f8a4689,0xfe32983a,0xea1b0c07,0x873cb8cb,0x2b317462,0x2d93229f,0x658deddc,0x0f64ef58,0x65efaf4d,0x730cc7a8,0xfe43287d,0x3d047d70,0xaebc0c72,0xd92d26c9,0x92efa539 +.long 0x94b56526,0x06e78457,0x0961002d,0x415cb80f,0x76dcb10f,0x89e5c565,0xff9259fe,0x8bbb6982,0x9abc2668,0x4fe8795b,0x1e678fb1,0xb5d4f534,0x7b7da2b9,0x6601f3be,0xa13d6805,0x98da59e2 +.long 0x01799a52,0x190d8ea6,0xb86d2952,0xa20cec41,0x7fff2a7c,0x3062ffb2,0x79f19d37,0x741b32e5,0x4eb57d47,0xf80d8181,0x16aef06b,0x7a2d0ed4,0x1cecb588,0x09735fb0,0xc6061f5b,0x1641caaa +.long 0x20151427,0x7f99824f,0x92430206,0x206828b6,0xe1112357,0xaa9097d7,0x09e414ec,0xacf9a2f2,0x27915356,0xdbdac9da,0x001efee3,0x7e0734b7,0xd2b288e2,0x54fab5bb,0xf62dd09c,0x4c630fc4 +.long 0x1ac2703b,0x8537107a,0x6bc857b5,0xb49258d8,0xbcdaccd1,0x57df14de,0xc4ae8529,0x24ab68d7,0x734e59d0,0x7ed8b5d4,0xc495cc80,0x5f8740c8,0x291db9b3,0x84aedd5a,0x4fb995be,0x80b360f8 +.long 0x5fa067d1,0xae915f5d,0x9668960c,0x4134b57f,0xa48edaac,0xbd3656d6,0xfc1d7436,0xdac1e3e4,0xd81fbb26,0x674ff869,0xb26c33d4,0x449ed3ec,0xd94203e8,0x85138705,0xbeeb6f4a,0xccde538b +.long 0xa61a76fa,0x55d5c68d,0xca1554dc,0x598b441d,0x773b279c,0xd39923b9,0x36bf9efc,0x33331d3c,0x298de399,0x2d4c848e,0xa1a27f56,0xcfdb8e77,0x57b8ab70,0x94c855ea,0x6f7879ba,0xdcdb9dae +.long 0x019f2a59,0x7bdff8c2,0xcb4fbc74,0xb3ce5bb3,0x8a9173dd,0xea907f68,0x95a75439,0x6cd3d0d3,0xefed021c,0x92ecc4d6,0x6a77339a,0x09a9f9b0,0x7188c64a,0x87ca6b15,0x44899158,0x10c29968 +.long 0xed6e82ef,0x5859a229,0x65ebaf4e,0x16f338e3,0x5ead67ae,0x0cd31387,0x54ef0bb4,0x1c73d228,0x74a5c8c7,0x4cb55131,0x7f69ad6a,0x01cd2970,0xe966f87e,0xa04d00dd,0x0b7b0321,0xd96fe447 +.long 0x88fbd381,0x342ac06e,0x5c35a493,0x02cd4a84,0x54f1bbcd,0xe8fa89de,0x2575ed4c,0x341d6367,0xd238202b,0xebe357fb,0xa984ead9,0x600b4d1a,0x52436ea0,0xc35c9f44,0xa370751b,0x96fe0a39 +.long 0x7f636a38,0x4c4f0736,0x0e76d5cb,0x9f943fb7,0xa8b68b8b,0xb03510ba,0x9ed07a1f,0xc246780a,0x6d549fc2,0x3c051415,0x607781ca,0xc2953f31,0xd8d95413,0x955e2c69,0x7bd282e3,0xb300fadc +.long 0x87e9189f,0x81fe7b50,0xf42dda27,0xdb17375c,0xcf0a5904,0x22f7d896,0xebe348e6,0xa0e57c5a,0xf40e3c80,0xa61011d3,0x8db705c5,0xb1189321,0x50fedec3,0x4ed9309e,0x4d6d5c1d,0xdcf14a10 +.long 0x55691342,0x056c265b,0x91049dc7,0xe8e08504,0xc9bae20a,0x131329f5,0xd9dccdb4,0x96c8b3e8,0xfb4ee6b4,0x8c5ff838,0x41e8ccf0,0xfc5a9aeb,0xfae050c6,0x7417b764,0x00452080,0x0953c3d7 +.long 0x38dfe7e8,0x21372682,0x2bb79d4b,0xea417e15,0x76e7cf2d,0x59641f1c,0xea0bcfcc,0x271e3059,0x7253ecbd,0x624c7dfd,0x4fca6186,0x2f552e25,0x4d866e9c,0xcbf84ecd,0xf68d4610,0x73967709 +.long 0xc27901b4,0xa14b1163,0x899b8bf3,0xfd9236e0,0xcbc6da0a,0x42b091ec,0x5ad1d297,0xbb1dac6f,0xa91cf76e,0x80e61d53,0xd31f1ee7,0x4110a412,0x13efcf77,0x2d87c3ba,0xdf450d76,0x1f374bb4 +.long 0x0d188dab,0x5e78e2f2,0xf4b885ef,0xe3968ed0,0x7314570f,0x46c0568e,0x01170521,0x31616338,0x4f0c8afe,0x18e1e7e2,0xdeea78da,0x4caa75ff,0x7c5d8a51,0x82db67f2,0x6f505370,0x36a44d86 +.long 0x0333974f,0xd72c5bda,0x27a70146,0x5db516ae,0x210ef921,0x34705281,0x0c9c38e5,0xbff17a8f,0x12476da1,0x78f4814e,0x33c16980,0xc1e16613,0x424d4bca,0x9e5b386f,0xc85740de,0x4c274e87 +.long 0x6c2f5226,0xb6a9b88d,0x550d7ca8,0x14d1b944,0x1fc41709,0x580c85fc,0x54c6d519,0xc1da368b,0xd5113cf7,0x2b0785ce,0x5a34708f,0x0670f633,0x15cc3f88,0x46e23767,0x50c72c8f,0x1b480cfa +.long 0x4147519a,0x20288602,0x26b372f0,0xd0981eac,0xa785ebc8,0xa9d4a7ca,0xdbdf58e9,0xd953c50d,0xfd590f8f,0x9d6361cc,0x44e6c917,0x72e9626b,0x22eb64cf,0x7fd96110,0x9eb288f3,0x863ebb7e +.long 0x6aca8ee7,0x6e6ab761,0xd7b40358,0x97d10b39,0x1e5feb0d,0x1687d377,0x8265a27a,0xc83e50e4,0xc954b313,0x8f75a9fe,0x310d1f61,0xcc2e8f47,0x6557d0e0,0xf5ba81c5,0x3eaf6207,0x25f9680c +.long 0x4354080b,0xf95c6609,0x7bf2fe1c,0x5225bfa5,0x5c7d98fa,0xc5c004e2,0x019aaf60,0x3561bf1c,0xba151474,0x5e6f9f17,0xb04f6eca,0xdec2f934,0x269acb1e,0x64e368a1,0x0cdda493,0x1332d9e4 +.long 0xdf23de05,0x60d6cf69,0x009339a0,0x66d17da2,0x0a693923,0x9fcac985,0xed7c6a6d,0xbcf057fc,0xf0b5662c,0xc3c5c8c5,0xdcba4f24,0x25318dd8,0x082b69ff,0x60e8cb75,0x1e728c01,0x7c23b3ee +.long 0x097e4403,0x15e10a0a,0x19854665,0xcb3d0a86,0xd67d4826,0x88d8e211,0x0b9d2839,0xb39af66e,0xbd475ca8,0xa5f94588,0xc077b80b,0xe06b7966,0xda27c26c,0xfedb1485,0xfe0fd5e0,0xd290d33a +.long 0xf34fb0fa,0xa40bcc47,0x1fb1ab09,0xb4760cc8,0xa273bfe3,0x8fca0993,0xf70b213c,0x13e4fe07,0xfdb05163,0x3bcdb992,0x0c2b19b6,0x8c484b11,0xaaf2e3e2,0x1acb815f,0xb89ff1b4,0xc6905935 +.long 0x586e74e1,0xb2ad6f9d,0x67b80484,0x488883ad,0x369c3ddb,0x758aa2c7,0x9f9afd31,0x8ab74e69,0x5e21beb1,0x10fc2d28,0x318c42f9,0x3484518a,0x53cf40c3,0x377427dc,0x391bc1d9,0x9de0781a +.long 0x693807e1,0x8faee858,0x4e81ccc7,0xa3865327,0x6f835b84,0x02c30ff2,0x0d3d38d4,0xb604437b,0x5ca1823d,0xb3fc8a98,0x03be0324,0xb82f7ec9,0xcf684a33,0xee36d761,0x9f29bf7d,0x5a01df0e +.long 0x1306583d,0x686202f3,0x437c622e,0x05b10da0,0x076a7bc8,0xbf9aaa0f,0x8f8f4e43,0x25e94efb,0xfa3dc26d,0x8a35c9b7,0x96ff03c5,0xe0e5fb93,0xebc394ce,0xa77e3843,0x8361de60,0xcede6595 +.long 0xa1993545,0xd27c22f6,0x24d671ba,0xab01cc36,0xa169c28e,0x63fa2877,0x2eb08376,0x925ef904,0x53aa0b32,0x3b2fa3cf,0x71c49d7a,0xb27beb5b,0xd105e27f,0xb60e1834,0x4f68570d,0xd6089788 +.long 0xd6fbc2ac,0x23094ce0,0x815ff551,0x738037a1,0x6bef119c,0xda73b1bb,0xeef506ba,0xdcf6c430,0xe3ef104a,0x00e4fe7b,0x0a065628,0xebdd9a2c,0x8792043e,0x853a81c3,0xb3b59108,0x22ad6ece +.long 0x39cd297d,0x9fb813c0,0x05bda5d9,0x8ec7e16e,0x0d104b96,0x2834797c,0x7c511510,0xcc11a2e7,0x96ee6380,0x96ca5a53,0xcea38742,0x054c8655,0xd54dfa7d,0xb5946852,0x1f4ab207,0x97c422e7 +.long 0x0c22b540,0xbf907509,0xb7c267d4,0x2cde42aa,0x5ab0d693,0xba18f9ed,0x6e4660d9,0x3ba62aa6,0xab9ea96a,0xb24bf97b,0xe3b60e32,0x5d039642,0x7c4d9bd5,0x4e6a4506,0x7ed4a6a4,0x666c5b9e +.long 0x8edbd7cc,0xfa3fdcd9,0xc6ccd753,0x4660bb87,0x21e6b64f,0x9ae90820,0xb36bfb3f,0x8a56a713,0x5726d47f,0xabfce096,0x0b1a9a7f,0x9eed01b2,0x4eb74a37,0x30e9cad4,0x53e9666d,0x7b2524cc +.long 0x8f4b002f,0x6a29683b,0x41f4fc20,0xc2200d7a,0x3a338acc,0xcf3af47a,0xe7128975,0x6539a4fb,0xc33c7fcf,0xcec31c14,0xc7be322b,0x7eb6799b,0x6646f623,0x119ef4e9,0x54d7299b,0x7b7a26a5 +.long 0x403f46f2,0xcb37f08d,0x1a0ec0c7,0x94b8fc43,0xc332142f,0xbb8514e3,0xe80d2a7a,0xf3ed2c33,0xb639126c,0x8d2080af,0xe3553ade,0xf7b6be60,0x1c7e2b09,0x3950aa9f,0x6410f02b,0x847ff958 +.long 0x678a31b0,0x877b7cf5,0x3998b620,0xd50301ae,0xc00fb396,0x734257c5,0x04e672a6,0xf9fb18a0,0xe8758851,0xff8bd8eb,0x5d99ba44,0x1e64e4c6,0x7dfd93b7,0x4b8eaedf,0x04e76b8c,0xba2f2a98 +.long 0xe8053433,0x7d790cba,0x3d2c9585,0xc8e725a0,0xcdd8f5ed,0x58c5c476,0xefa9fe1d,0xd106b952,0x0eff13a9,0x3c5c775b,0xe057b930,0x242442ba,0xc9b70cbd,0xe9f458d4,0xa3cdb89a,0x69b71448 +.long 0x0e2ed742,0x41ee46f6,0x40067493,0x573f1045,0x9d54c304,0xb1e154ff,0x8d3a7502,0x2ad0436a,0x431a8121,0xee4aaa2d,0x886f11ed,0xcd38b3ab,0x034a0eb7,0x57d49ea6,0xf7e85e58,0xd2b773bd +.long 0x9b5c1f14,0x4a559ac4,0x3e54df2b,0xc444be1a,0xeda41891,0x13aad704,0x5eb5c788,0xcd927bec,0xe48c8a34,0xeb3c8516,0x4b546669,0x1b7ac812,0x594df8ec,0x1815f896,0x79227865,0x87c6a79c +.long 0x9b56ddbd,0xae02a2f0,0x8a2f1cf3,0x1339b5ac,0x839dff0d,0xf2b569c7,0xfee9a43d,0xb0b9e864,0x77bb064e,0x4ff8ca41,0xfd249f63,0x145a2812,0xf86f689a,0x3ab7beac,0x01d35f5e,0x9bafec27 +.long 0x4265aa91,0x28054c65,0x035efe42,0xa4b18304,0x9639dec7,0x6887b0e6,0x3d52aea5,0xf4b8f6ad,0x971a8a13,0xfb9293cc,0x4c934d07,0x3f159e5d,0x09acbc29,0x2c50e9b1,0x7154d129,0x08eb65e6 +.long 0x30b75c3e,0x4feff589,0x94491c93,0x0bb82fe2,0x89af62bb,0xd8ac377a,0x9685e49f,0xd7b51490,0x04497f19,0xabca9a7b,0x1a7ad13f,0x1b35ed0a,0x3ec86ed6,0x6b601e21,0xce0c76f1,0xda91fcb9 +.long 0xd7ab27e1,0x9e28507b,0x63945b7b,0x7c19a555,0xaafc9827,0x6b43f0a1,0x3aa55b91,0x443b4fbd,0x6962c88f,0x962b2e65,0xce0db0ca,0x139da8d4,0x1b8d6c4f,0xb93f05dd,0x180b9824,0x779cdff7 +.long 0xae57c7b7,0xbba23fdd,0x1b932522,0x345342f2,0x556d4aa3,0xfd9c80fe,0x6525bb61,0xa03907ba,0xff218933,0x38b010e1,0xaa52117b,0xc066b654,0x94f2e6ea,0x8e141920,0x0d32f2b2,0x66a27dca +.long 0x048b3717,0x69c7f993,0xb178ae1c,0xbf5a989a,0x564f1d6b,0x49fa9058,0xd31fde4e,0x27ec6e15,0x7276e7fc,0x4cce0373,0x89d6bf02,0x64086d79,0x4ccdd979,0x5a72f046,0x47775631,0x909c3566 +.long 0x75dd7125,0x1c07bc6b,0x87a0428d,0xb4c6bc97,0xfdeb6b9d,0x507ece52,0xb2c95432,0xfca56512,0xd0e8bd06,0x15d97181,0xc6bb46ea,0x384dd317,0x3952b624,0x5441ea20,0x4e7dc2fb,0xbcf70dee +.long 0x6628e8c3,0x372b016e,0xb60a7522,0x07a0d667,0x0a344ee2,0xcf05751b,0x118bdeec,0x0ec09a48,0xd83dce46,0x6e4b3d4e,0x99d2fc6e,0x43a6316d,0x56cf044c,0xa99d8989,0xae3e5fb7,0x7c7f4454 +.long 0xfbabbe92,0xb2e6b121,0xe1330076,0x281850fb,0x97890015,0x093581ec,0x75ff77f5,0x69b1dded,0xab105105,0x7cf0b18f,0xa89ccfef,0x953ced31,0xeb914009,0x3151f85f,0x88ed48ad,0x3c9f1b87 +.long 0x4a7eadcb,0xc9aba1a1,0x522e71cf,0x928e7501,0x3a2e4f83,0xeaede727,0x1ce3bbd3,0x467e10d1,0xb955dcf0,0xf3442ac3,0xd3d5e527,0xba96307d,0xfd77f474,0xf763a10e,0x6a6e1ff0,0x5d744bd0 +.long 0xa777899e,0xd287282a,0xd03f3cde,0xe20eda8f,0x50b07d31,0x6a7e75bb,0x6f379de4,0x0b7e2a94,0x19f593cf,0x31cb64ad,0x1e76ef1d,0x7b1a9e4f,0xb62d609c,0xe18c9c9d,0xe779a650,0x439bad6d +.long 0xe032f144,0x219d9066,0xe8b2ec6a,0x1db632b8,0xfda12f78,0xff0d0fd4,0x2a25d265,0x56fb4c2d,0x255a03f1,0x5f4e2ee1,0xe96af176,0x61cd6af2,0xd068bc97,0xe0317ba8,0x264b988e,0x927d6bab +.long 0xe90fb21e,0xa18f07e0,0xbba7fca1,0x00fd2b80,0x95cd67b5,0x20387f27,0xd39707f7,0x5b89a4e7,0x894407ce,0x8f83ad3f,0x6c226132,0xa0025b94,0xf906c13b,0xc79563c7,0x4e7bb025,0x5f548f31 +.long 0xeac6d113,0x2b4c6b8f,0x0e813c76,0xa67e3f9c,0x3fe1f4b9,0x3982717c,0x26d8050e,0x58865819,0xf7f06f20,0x99f3640c,0x2a66ebc2,0xdc610216,0x767a1e08,0x52f2c175,0x5999871b,0x05660e1a +.long 0x6d3c4693,0x6b0f1762,0x37ed7bea,0xf0e7d627,0xb75b226d,0xc51758c7,0x1f91613b,0x40a88628,0xbbb38ce0,0x889dbaa7,0xbddcad81,0xe0404b65,0x8bc9671f,0xfebccd3a,0xee1f5375,0xfbf9a357 +.long 0x28f33398,0x5dc169b0,0x72e90f65,0xb07ec11d,0xfaab1eb1,0xae7f3b4a,0x5f17538a,0xd970195e,0x0181e640,0x52b05cbe,0x2643313d,0xf5debd62,0x5df31f82,0x76148154,0x3a9e13c5,0x23e03b33 +.long 0x4fde0c1f,0xff758949,0xe5b6ec20,0xbf8a1abe,0x87e1db6c,0x702278fb,0x35ed658f,0xc447ad7a,0x03d0ccf2,0x48d4aa38,0x819a7c03,0x80acb338,0x6e17cecc,0x9bc7c89e,0x03be1d82,0x46736b8b +.long 0xc0432f96,0xd65d7b60,0xdeb5442f,0xddebe7a3,0x7dff69a2,0x79a25307,0x02cf3122,0x37a56d94,0xf2350d0a,0x8bab8aed,0x037b0d9a,0x13c3f276,0x44c65cae,0xc664957c,0xc2e71a88,0x88b44089 +.long 0x5cb02664,0xdb88e5a3,0x8686c72e,0x5d4c0bf1,0xa682d53e,0xea3d9b62,0x0b2ad431,0x9b605ef4,0xc69645d0,0x71bac202,0x6a1b66e7,0xa115f03a,0x158f4dc4,0xfe2c563a,0x4d12a78c,0xf715b3a0 +.long 0xd413213a,0x8f7f0a48,0xc04becdb,0x2035806d,0x5d8587f5,0xecd34a99,0x9f6d3a71,0x4d8c3079,0x8d95a8f6,0x1b2a2a67,0xf2110d0d,0xc58c9d7d,0xcf8fba3f,0xdeee81d5,0x0c7cdf68,0xa42be3c0 +.long 0xd43b5eaa,0x2126f742,0xdfa59b85,0x054a0766,0x126bfd45,0x9d0d5e36,0x384f8a8f,0xa1f8fbd7,0xd563fccc,0x317680f5,0xf280a928,0x48ca5055,0x27b578cf,0xe00b81b2,0x2994a514,0x10aad918 +.long 0xb7bdc953,0xd9e07b62,0x5bc086dd,0x9f0f6ff2,0x655eee77,0x09d1ccff,0x5bef7df1,0x45475f79,0x86f702cc,0x3faa28fa,0x0f021f07,0x92e60905,0x7f8fa8c6,0xe9e62968,0xf036ea2c,0xbd71419a +.long 0x6028da9a,0x171ee1cc,0xc251f573,0x5352fe1a,0x3fa997f4,0xf8ff236e,0xa5749d5f,0xd831b6c9,0xe350e2c2,0x7c872e1d,0x1e0ce403,0xc56240d9,0x6974f5cb,0xf9deb077,0x961c3728,0x7d50ba87 +.long 0x5a3a2518,0xd6f89426,0xc6303d43,0xcf817799,0x619e5696,0x510a0471,0x3a5e307b,0xab049ff6,0xfeb13ec7,0xe4cdf9b0,0x9d8ff90c,0xd5e97117,0x9afa96af,0xf6f64d06,0x9d2012a2,0x00d0bf5e +.long 0x358bcdc0,0xe63f301f,0x0a9d47f8,0x07689e99,0x4f43d43a,0x1f689e2f,0x90920904,0x4d542a16,0x9ca0a707,0xaea293d5,0x8ac68065,0xd061fe45,0x0090008c,0x1033bf1b,0xc08a6db6,0x29749558 +.long 0xc1d5d034,0x74b5fc59,0x67e215e0,0xf712e9f6,0x860200e6,0xfd520cbd,0x3ea22588,0x0229acb4,0xfff0c82e,0x9cd1e14c,0x59c69e73,0x87684b62,0x96ccb989,0xda85e61c,0xa3d06493,0x2d5dbb02 +.long 0xe86b173c,0xf22ad33a,0xa79ff0e3,0xe8e41ea5,0xdd0d0c10,0x01d2d725,0x032d28f9,0x31f39088,0x7829839e,0x7b3f71e1,0x4502ae58,0x0cf691b4,0xbefc6115,0xef658dbd,0xb3ab5314,0xa5cd6ee5 +.long 0x5f1d2347,0x206c8d7b,0x4cc2253a,0x794645ba,0x58389e08,0xd517d8ff,0x9f847288,0x4fa20dee,0xd797770a,0xeba072d8,0xbf429e26,0x7360c91d,0x80af8279,0x7200a3b3,0x82dadce3,0x6a1c9150 +.long 0xc35d8794,0x0ee6d3a7,0x0356bae5,0x042e6558,0x643322fd,0x9f59698d,0x50a61967,0x9379ae15,0xfcc9981e,0x64b9ae62,0x6d2934c6,0xaed3d631,0x5e4e65eb,0x2454b302,0xf9950428,0xab09f647 +.long 0x22248acc,0xb2083a12,0x3264e366,0x1f6ec0ef,0x5afdee28,0x5659b704,0xe6430bb5,0x7a823a40,0xe1900a79,0x24592a04,0xc9ee6576,0xcde09d4a,0x4b5ea54a,0x52b6463f,0xd3ca65a7,0x1efe9ed3 +.long 0x305406dd,0xe27a6dbe,0xdd5d1957,0x8eb7dc7f,0x387d4d8f,0xf54a6876,0xc7762de4,0x9c479409,0x99b30778,0xbe4d5b5d,0x6e793682,0x25380c56,0xdac740e3,0x602d37f3,0x1566e4ae,0x140deabe +.long 0xafd32acf,0x4481d067,0xe1f71ccf,0xd8f0fcca,0xb596f2da,0xd208dd0c,0x9aad93f9,0xd049d730,0x42ab580e,0xc79f263d,0x23f707b4,0x09411bb1,0x835e0eda,0x8cfde1ff,0x90f03402,0x72707490 +.long 0xc49a861e,0xeaee6126,0xe14f0d06,0x024f3b65,0xc69bfc17,0x51a3f1e8,0xa7686381,0xc3c3a8e9,0xb103d4c8,0x3400752c,0x9218b36b,0x02bc4613,0x7651504a,0xc67f75eb,0xd02aebfa,0xd6848b56 +.long 0xc30fa92b,0xbd9802e6,0x9a552784,0x5a70d96d,0x3f83169b,0x9085c4ea,0x06908228,0xfa9423bb,0xfe97a5b9,0x2ffebe12,0x71b99118,0x85da6049,0x63178846,0x9cbc2f7f,0x9153218e,0xfd96bc70 +.long 0x1782269b,0x958381db,0x2597e550,0xae34bf79,0x5f385153,0xbb5c6064,0xe3088048,0x6f0e96af,0x77884456,0xbf6a0215,0x69310ea7,0xb3b5688c,0x04fad2de,0x17c94295,0x17896d4d,0xe020f0e5 +.long 0x0976505f,0x730ba0ab,0x095e2ec5,0x567f6813,0x6331ab71,0x47062010,0x41d22b9f,0x72cfa977,0x8a2373da,0x33e55ead,0x7ba45a68,0xa8d0d5f4,0x03029d15,0xba1d8f9c,0xfc55b9f3,0x8f34f1cc +.long 0xbbe5a1a9,0xcca4428d,0x3126bd67,0x8187fd5f,0x48105826,0x0036973a,0xb8bd61a0,0xa39b6663,0x2d65a808,0x6d42deef,0x94636b19,0x4969044f,0xdd5d564c,0xf611ee47,0xd2873077,0x7b2f3a49 +.long 0x300eb294,0x94157d45,0x169c1494,0x2b2a656e,0xd3a47aa9,0xc000dd76,0xa6243ea4,0xa2864e4f,0xdb89842e,0x82716c47,0x61479fb7,0x12dfd7d7,0xe0b2f6dc,0x3b9a2c56,0xd7f85d67,0x46be862a +.long 0x0f82b214,0x03b0d8dd,0xf103cbc6,0x460c34f9,0x18d79e19,0xf32e5c03,0xa84117f8,0x8b8888ba,0xc0722677,0x8f3c37dc,0x1c1c0f27,0x10d21be9,0xe0f7a0c6,0xd47c8468,0xadecc0e0,0x9bf02213 +.long 0x42b48b99,0x0baa7d12,0x48424096,0x1bcb665d,0xebfb5cfb,0x8b847cd6,0x9ad4d10d,0x87c2ae56,0x0de36726,0xf1cbb122,0x3fdfbd21,0xe7043c68,0x4e79d460,0x4bd0826a,0x4bd1a2cb,0x11f5e598 +.long 0xb7fe7b6e,0x97554160,0x400a3fb2,0x7d16189a,0xe328ca1e,0xd73e9bea,0xe793d8cc,0x0dd04b97,0x506db8cc,0xa9c83c9b,0xcf38814c,0x5cd47aae,0xb64b45e6,0x26fc430d,0xd818ea84,0x079b5499 +.long 0xc1c24a3b,0xebb01102,0x1c161c1a,0xca24e568,0x36f00a4a,0x103eea69,0x76176c7b,0x9ad76ee8,0x538e0ff7,0x97451fc2,0x6604b3b0,0x94f89809,0x3249cfd7,0x6311436e,0x41224f69,0x27b4a7bd +.long 0xe0ac2941,0x03b5d21a,0xc2d31937,0x279b0254,0xcac992d0,0x3307c052,0xefa8b1f3,0x6aa7cb92,0x0d37c7a5,0x5a182580,0x342d5422,0x13380c37,0xd5d2ef92,0x92ac2d66,0x030c63c6,0x035a70c9 +.long 0x4ce4f152,0xc16025dd,0xf9df7c06,0x1f419a71,0x91e4bb14,0x6d5b2214,0x839fb4ce,0xfc43c6cc,0x925d6b2d,0x49f06591,0x62186598,0x4b37d9d3,0xd01b1629,0x8c54a971,0x51d50e05,0xe1a9c29f +.long 0x71ba1861,0x5109b785,0xd0c8f93d,0x48b22d5c,0x8633bb93,0xe8fa84a7,0x5aebbd08,0x53fba6ba,0xe5eea7d8,0x7ff27df3,0x68ca7158,0x521c8796,0xce6f1a05,0xb9d5133b,0xfd0ebee4,0x2d50cd53 +.long 0xc5a3ef16,0xc82115d6,0xba079221,0x993eff9d,0x4b5da81c,0xe4da2c5e,0x8033fd85,0x9a89dbdb,0x2b892891,0x60819ebf,0x5d14a4d5,0x53902b21,0xd7fda421,0x6ac35051,0x61c83284,0xcc6ab885 +.long 0xf74cff17,0x14eba133,0xecb813f2,0x240aaa03,0x6f665bee,0xcfbb6540,0xa425ad73,0x084b1fe4,0xd081f6a6,0x009d5d16,0xeef82c90,0x35304fe8,0xaa9eaa22,0xf20346d5,0xac1c91e3,0x0ada9f07 +.long 0x968a6144,0xa6e21678,0x07b31a1e,0x54c1f77c,0x5781fbe1,0xd6bb787e,0xe31f1c4a,0x61bd2ee0,0x781105fc,0xf25aa1e9,0x7b2f8e80,0x9cf2971f,0xcdff919b,0x26d15412,0x34bc896e,0x01db4ebe +.long 0xb40df1cf,0x7d9b3e23,0x94e971b4,0x59337373,0x669cf921,0xbf57bd14,0x0c1a1064,0x865daedf,0x83279125,0x3eb70bd3,0x34ecdaab,0xbc3d5b9f,0x5f755caf,0x91e3ed7e,0xd41e6f02,0x49699f54 +.long 0xd4a7a15b,0x185770e1,0xeaac87e7,0x08f3587a,0x473133ea,0x352018db,0x04fd30fc,0x674ce719,0x088b3e0e,0x7b8d9835,0x5d0d47a1,0x7a0356a9,0x6474a3c4,0x9d9e7659,0xff66966c,0x61ea48a7 +.long 0x0f3e4834,0x30417758,0x17a9afcb,0xfdbb21c2,0x2f9a67b3,0x756fa17f,0xa245c1a8,0x2a6b2421,0x4af02291,0x64be2794,0x2a5804fe,0xade465c6,0xa6f08fd7,0x8dffbd39,0xaa14403b,0xc4efa84c +.long 0x442b0f5c,0xa1b91b2a,0xcf997736,0xb748e317,0xcee90e16,0x8d1b62bf,0x0b2078c0,0x907ae271,0x0c9bcddd,0xdf31534b,0x39adce83,0x043fb054,0xd826846a,0x99031043,0xb144f393,0x61a9c0d6 +.long 0x47718427,0xdab48046,0x6e830f8b,0xdf17ff9b,0xe49a1347,0x408d7ee8,0x91c1d4ae,0x6ac71e23,0x1defd73c,0xc8cbb9fd,0xbbbbfec5,0x19840657,0x9e7ef8ea,0x39db1cb5,0x64105f30,0x78aa8296 +.long 0xa3738c29,0xa3d9b7f0,0xbc3250a3,0x0a2f235a,0x445e4caf,0x55e506f6,0x33475f7a,0x0974f73d,0x5ba2f5a8,0xd37dbba3,0x6af40066,0x542c6e63,0xc5d73e2c,0x26d99b53,0x6c3ca33e,0x06060d7d +.long 0x065fef4a,0xcdbef1c2,0xfd5b92e3,0x77e60f7d,0x26708350,0xd7c549f0,0x34f121bf,0x201b3ad0,0x0334fc14,0x5fcac2a1,0x344552f6,0x8a9a9e09,0x97653082,0x7dd8a1d3,0x79d4f289,0x5fc0738f +.long 0x17d2d8c3,0x787d244d,0x70830684,0xeffc6345,0xe4f73ae5,0x5ddb96dd,0x172549a5,0x8efb14b1,0x2245ae7a,0x6eb73eee,0xea11f13e,0xbca4061e,0x30b01f5d,0xb577421d,0x782e152c,0xaa688b24 +.long 0xbd3502ba,0x67608e71,0xb4de75a0,0x4ef41f24,0xfd6125e5,0xb08dde5e,0xa409543f,0xde484825,0x65cc2295,0x1f198d98,0x6e0edfa2,0x428a3771,0xadf35fc7,0x4f9697a2,0xf7cac3c7,0x01a43c79 +.long 0x0fd3659a,0xb05d7059,0xbb7f2d9a,0x8927f30c,0x8cf984d3,0x4023d1ac,0x02897a45,0x32125ed3,0x3d414205,0xfb572dad,0xe3fa82a9,0x73000ef2,0xf10a5581,0x4c0868e9,0x6b0b3ca5,0x5b61fc67 +.long 0x7cae440c,0xc1258d5b,0x402b7531,0x21c08b41,0xde932321,0xf61a8955,0x2d1408af,0x3568faf8,0x9ecf965b,0x71b15e99,0xe917276f,0xf14ed248,0x820cf9e2,0xc6f4caa1,0x18d83c7e,0x681b20b2 +.long 0xc6c01120,0x6cde738d,0xae70e0db,0x71db0813,0x74afe18c,0x95fc0644,0x129e2be7,0x34619053,0xdb2a3b15,0x80615cea,0xdb4c7073,0x0a49a19e,0x8fd2d367,0x0e1b84c8,0x033fb8aa,0xd74bf462 +.long 0x533ef217,0x889f6d65,0xc3ca2e87,0x7158c7e4,0xdc2b4167,0xfb670dfb,0x844c257f,0x75910a01,0xcf88577d,0xf336bf07,0xe45e2ace,0x22245250,0x7ca23d85,0x2ed92e8d,0x2b812f58,0x29f8be4c +.long 0x076fe12b,0xdd9ebaa7,0xae1537f9,0x3f2400cb,0x17bdfb46,0x1aa93528,0x67883b41,0xc0f98430,0x0170911d,0x5590ede1,0x34d4b17f,0x7562f5bb,0x1826b8d2,0xe1fa1df2,0x6bd80d59,0xb40b796a +.long 0x3467ba92,0xd65bf197,0xf70954b0,0x8c9b46db,0x0e78f15d,0x97c8a0f3,0x85a4c961,0xa8f3a69a,0x61e4ce9b,0x4242660f,0x6ea6790c,0xbf06aab3,0xec986416,0xc6706f8e,0x9a9fc225,0x9e56dec1 +.long 0x9a9898d9,0x527c46f4,0x5633cdef,0xd799e77b,0x7d9e4297,0x24eacc16,0x6b1cb734,0xabb61cea,0xf778443c,0xbee2e8a7,0x29de2fe6,0x3bb42bf1,0x3003bb6f,0xcbed86a1,0xd781cdf6,0xd3918e6c +.long 0x9a5103f1,0x4bee3271,0xf50eac06,0x5243efc6,0x6adcc119,0xb8e122cb,0xc0b80a08,0x1b7faa84,0x6dfcd08c,0x32c3d1bd,0x0be427de,0x129dec4e,0x1d263c83,0x98ab679c,0xcef64eff,0xafc83cb7 +.long 0x2fa6be76,0x85eb6088,0x1328cbfe,0x892585fb,0xcf618dda,0xc154d3ed,0x3abaf26e,0xc44f601b,0x2be1fdfd,0x7bf57d0b,0x21137fee,0xa833bd2d,0x2db591a8,0x9353af36,0x5562a056,0xc76f26dc +.long 0x3fdf5a51,0x1d87e47d,0x55c9cab0,0x7afb5f93,0x89e0586e,0x91bbf58f,0x0d843709,0x7c72c018,0x99b5c3dc,0xa9a5aafb,0x3844aeb0,0xa48a0f1d,0xb667e482,0x7178b7dd,0x6e23a59a,0x453985e9 +.long 0x01b25dd8,0x4a54c860,0xfb897c8a,0x0dd37f48,0x0ea90cd9,0x5f8aa610,0x16d5830d,0xc8892c68,0xef514ca5,0xeb4befc0,0xe72c9ee6,0x478eb679,0xdbc40d5f,0x9bca20da,0xdde4f64a,0xf015de21 +.long 0xeaf4b8a5,0xaa6a4de0,0x4bc60e32,0x68cfd9ca,0x7fd15e70,0x668a4b01,0xf27dc09d,0xd9f0694a,0xba708bcd,0xf6c3cad5,0x5bb95c2a,0x5cd2ba69,0x33c0a58f,0xaa28c1d3,0xabc77870,0x23e274e3 +.long 0xdfd20a4a,0x44c3692d,0x81a66653,0x091c5fd3,0x09a0757d,0x6c0bb691,0x667343ea,0x9072e8b9,0x80848bec,0x31d40eb0,0x79fd36cc,0x95bd480a,0x65ed43f5,0x01a77c61,0x2e0d40bf,0xafccd127 +.long 0x1cc1884b,0xeccfc82d,0x5d4753b4,0xc85ac201,0x658e099f,0xc7a6caac,0x04b27390,0xcf46369e,0x506467ea,0xe2e7d049,0x37cdeccc,0x481b63a2,0xed80143a,0x4029abd8,0xbcb00b88,0x28bfe3c7 +.long 0x0643d84a,0x3bec1009,0xabd11041,0x885f3668,0xf83a34d6,0xdb02432c,0x719ceebe,0x32f7b360,0xdad1fe7a,0xf06c7837,0x5441a0b0,0x60a157a9,0xe2d47550,0x704970e9,0x271b9020,0xcd2bd553 +.long 0x33e24a0b,0xff57f82f,0xf2565079,0x9cbee23f,0xeb5f5825,0x16353427,0xe948d662,0x276feec4,0xda10032b,0xd1b62bc6,0xf0e72a53,0x718351dd,0x2420e7ba,0x93452076,0x3a00118d,0x96368fff +.long 0x150a49e4,0x00ce2d26,0x3f04706b,0x0c28b636,0x58b196d0,0xbad65a46,0xec9f8b7c,0x6c8455fc,0x2d71867e,0xe90c895f,0xedf9f38c,0x5c0be31b,0xd8f6ec04,0x2a37a15e,0x8cd85251,0x239639e7 +.long 0x9c7c4c6b,0xd8975315,0xd7409af7,0x603aa3c0,0x007132fb,0xb8d53d0c,0xa6849238,0x68d12af7,0xbf5d9279,0xbe0607e7,0xaada74ce,0x9aa50055,0xba7e8ccb,0xe81079cb,0xa5f4ff5e,0x610c71d1 +.long 0x5aa07093,0x9e2ee1a7,0xa75da47c,0xca84004b,0x3de75401,0x074d3951,0xbb311592,0xf938f756,0x00a43421,0x96197618,0x07bc78c8,0x39a25362,0x0a171276,0x278f710a,0x8d1a8f08,0xb28446ea +.long 0xe3b6a661,0x184781bf,0xe6d279f7,0x7751cb1d,0xc59eb662,0xf8ff95d6,0x58d3dea7,0x186d90b7,0xdfb4f754,0x0e4bb6c1,0x2b2801dc,0x5c5cf56b,0x1f54564d,0xc561e452,0xf0dd7f13,0xb4fb8c60 +.long 0x33ff98c7,0xf8849630,0xcf17769c,0x9619fffa,0x1bfdd80a,0xf8090bf6,0x422cfe63,0x14d9a149,0x6f6df9ea,0xb354c360,0x218f17ea,0xdbcf770d,0x79eb3480,0x207db7c8,0x559b6a26,0x213dbda8 +.long 0x29fc81b3,0xac4c200b,0x171d87c1,0xebc3e09f,0x1481aa9e,0x91799530,0x92e114fa,0x051b92e1,0xecb5537f,0xdf8f92e9,0x290c7483,0x44b1b2cc,0x2adeb016,0xa711455a,0x81a10c2c,0x964b6856 +.long 0xcec03623,0x4f159d99,0xef3271ea,0x05532225,0xc5ee4849,0xb231bea3,0x7094f103,0x57a54f50,0x9598b352,0x3e2d421d,0x67412ab4,0xe865a49c,0x1cc3a912,0xd2998a25,0x0c74d65d,0x5d092808 +.long 0x4088567a,0x73f45908,0x1f214a61,0xeb6b280e,0xcaf0c13d,0x8c9adc34,0xf561fb80,0x39d12938,0xbc6edfb4,0xb2dc3a5e,0xfe4d210e,0x7485b1b1,0xe186ae72,0x062e0400,0x6eeb3b88,0x91e32d5c +.long 0x4be59224,0x6df574d7,0x716d55f3,0xebc88ccc,0xcad6ed33,0x26c2e6d0,0x0d3e8b10,0xc6e21e7d,0x5bcc36bb,0x2cc5840e,0x7da74f69,0x9292445e,0x4e5193a8,0x8be8d321,0x8df06413,0x3ec23629 +.long 0xb134defa,0xc7e9ae85,0x1bb2d475,0x6073b1d0,0x2863c00d,0xb9ad615e,0x525f4ac4,0x9e29493d,0x4e9acf4f,0xc32b1dea,0xa50db88d,0x3e1f01c8,0x04da916c,0xb05d70ea,0xd865803e,0x714b0d0a +.long 0x9920cb5e,0x4bd493fc,0x92c7a3ac,0x5b44b1f7,0xbcec9235,0xa2a77293,0xcd378553,0x5ee06e87,0xda621607,0xceff8173,0x99f5d290,0x2bb03e4c,0xa6f734ac,0x2945106a,0xd25c4732,0xb5056604 +.long 0xe079afee,0x5945920c,0x6789831f,0x686e17a0,0xb74a5ae5,0x5966bee8,0x1e258d46,0x38a673a2,0x83141c95,0xbd1cc1f2,0x0e96e486,0x3b2ecf4f,0x74e5fc78,0xcd3aa896,0x2482fa7a,0x415ec10c +.long 0x80503380,0x15234419,0xd314b392,0x513d917a,0x63caecae,0xb0b52f4e,0x2dc7780b,0x07bf22ad,0xe4306839,0xe761e8a1,0x5dd7feaa,0x1b3be962,0x74c778f1,0x4fe728de,0x5e0070f6,0xf1fa0bda +.long 0x6ec3f510,0x85205a31,0xd2980475,0x2c7e4a14,0x6f30ebfd,0xde3c19c0,0xd4b7e644,0xdb1c1f38,0x5dce364a,0xfe291a75,0x058f5be3,0xb7b22a3c,0x37fea38c,0x2cd2c302,0x2e17be17,0x2930967a +.long 0x0c061c65,0x87f009de,0xedc6ed44,0xcb014aac,0x3bafb1eb,0x49bd1cb4,0x282d3688,0x81bd8b5c,0xf01a17af,0x1cdab87e,0xe710063b,0x21f37ac4,0x42fc8193,0x5a6c5676,0x56a6015c,0xf4753e70 +.long 0xa15b0a44,0x020f795e,0x8958a958,0x8f37c8d7,0xa4b675b5,0x63b7e89b,0x0fc31aea,0xb4fb0c0c,0xa7ff1f2e,0xed95e639,0x619614fb,0x9880f5a3,0x947151ab,0xdeb6ff02,0xa868dcdb,0x5bc5118c +.long 0x4c20cea5,0xd8da2055,0x14c4d69a,0xcac2776e,0x622d599b,0xcccb22c1,0x68a9bb50,0xa4ddb653,0x1b4941b4,0x2c4ff151,0x6efba588,0xe1ff19b4,0xc48345e0,0x35034363,0x1e29dfc4,0x45542e3d +.long 0x349f7aed,0xf197cb91,0x8fca8420,0x3b2b5a00,0x23aaf6d8,0x7c175ee8,0x35af32b6,0x54dcf421,0x27d6561e,0x0ba14307,0xd175b1e2,0x879d5ee4,0x99807db5,0xc7c43673,0x9cd55bcd,0x77a54455 +.long 0x0105c072,0xe6c2ff13,0x8dda7da4,0x18f7a99f,0x0e2d35c1,0x4c301820,0xd9cc6c82,0x06a53ca0,0xf1aa1d9e,0xaa21cc1e,0x4a75b1e8,0x32414334,0x0ebe9fdc,0x2a6d1328,0x98a4755a,0x16bd173f +.long 0x2133ffd9,0xfbb9b245,0x830f1a20,0x39a8b2f1,0xd5a1f52a,0x484bc97d,0xa40eddf8,0xd6aebf56,0x76ccdac6,0x32257acb,0x1586ff27,0xaf4d36ec,0xf8de7dd1,0x8eaa8863,0x88647c16,0x0045d5cf +.long 0xc005979d,0xa6f3d574,0x6a40e350,0xc2072b42,0x8de2ecf9,0xfca5c156,0xa515344e,0xa8c8bf5b,0x114df14a,0x97aee555,0xfdc5ec6b,0xd4374a4d,0x2ca85418,0x754cc28f,0xd3c41f78,0x71cb9e27 +.long 0x03605c39,0x89105079,0xa142c96c,0xf0843d9e,0x16923684,0xf3744934,0xfa0a2893,0x732caa2f,0x61160170,0xb2e8c270,0x437fbaa3,0xc32788cc,0xa6eda3ac,0x39cd818e,0x9e2b2e07,0xe2e94239 +.long 0x0260e52a,0x6967d39b,0x90653325,0xd42585cc,0x21ca7954,0x0d9bd605,0x81ed57b3,0x4fa20877,0xe34a0bbe,0x60c1eff8,0x84f6ef64,0x56b0040c,0xb1af8483,0x28be2b24,0xf5531614,0xb2278163 +.long 0x5922ac1c,0x8df27545,0xa52b3f63,0xa7b3ef5c,0x71de57c4,0x8e77b214,0x834c008b,0x31682c10,0x4bd55d31,0xc76824f0,0x17b61c71,0xb6d1c086,0xc2a5089d,0x31db0903,0x184e5d3f,0x9c092172 +.long 0xc00cc638,0xdd7ced5b,0x61278fc2,0x1a2015eb,0x6a37f8d6,0x2e8e5288,0xe79933ad,0xc457786f,0x2c51211a,0xb3fe4cce,0x24c20498,0xad9b10b2,0xd28db5e5,0x90d87a4f,0x3aca2fc3,0x698cd105 +.long 0xe91b536d,0x4f112d07,0x9eba09d6,0xceb982f2,0x197c396f,0x3c157b2c,0x7b66eb24,0xe23c2d41,0x3f330d37,0x480c57d9,0x79108deb,0xb3a4c8a1,0xcb199ce5,0x702388de,0xb944a8d4,0x0b019211 +.long 0x840bb336,0x24f2a692,0xa669fa7b,0x7c353bdc,0xdec9c300,0xda20d6fc,0xa13a4f17,0x625fbe2f,0xdbc17328,0xa2b1b61a,0xa9515621,0x008965bf,0xc620ff46,0x49690939,0x8717e91c,0x182dd27d +.long 0xea6c3997,0x5ace5035,0xc2610bef,0x54259aaa,0x3c80dd39,0xef18bb3f,0x5fc3fa39,0x6910b95b,0x43e09aee,0xfce2f510,0xa7675665,0xced56c9f,0xd872db61,0x10e265ac,0xae9fce69,0x6982812e +.long 0xce800998,0x29be11c6,0xb90360d9,0x72bb1752,0x5a4ad590,0x2c193197,0x9fc1dbc0,0x2ba2f548,0xe490ebe0,0x7fe4eebb,0x7fae11c0,0x12a0a4cd,0xe903ba37,0x7197cf81,0xde1c6dd8,0xcf7d4aa8 +.long 0x3fd5684c,0x92af6bf4,0x80360aa1,0x2b26eecf,0x00546a82,0xbd960f30,0xf59ad8fe,0x407b3c43,0x249c82ba,0x86cae5fe,0x2463744c,0x9e0faec7,0x94916272,0x87f551e8,0x6ceb0615,0x033f9344 +.long 0x8be82e84,0x1e5eb0d1,0x7a582fef,0x89967f0e,0xa6e921fa,0xbcf687d5,0xd37a09ba,0xdfee4cf3,0xb493c465,0x94f06965,0x7635c030,0x638b9a1c,0x66f05e9f,0x76667864,0xc04da725,0xccaf6808 +.long 0x768fccfc,0xca2eb690,0xb835b362,0xf402d37d,0xe2fdfcce,0x0efac0d0,0xb638d990,0xefc9cdef,0xd1669a8b,0x2af12b72,0x5774ccbd,0x33c536bc,0xfb34870e,0x30b21909,0x7df25aca,0xc38fa2f7 +.long 0xbf81f3f5,0x74c5f02b,0xaf7e4581,0x0525a5ae,0x433c54ae,0x88d2aaba,0x806a56c5,0xed9775db,0xc0edb37d,0xd320738a,0x66cc1f51,0x25fdb6ee,0x10600d76,0xac661d17,0xbdd1ed76,0x931ec1f3 +.long 0x19ee43f1,0x65c11d62,0x60829d97,0x5cd57c3e,0x984be6e8,0xd26c91a3,0x8b0c53bd,0xf08d9309,0xc016e4ea,0x94bc9e5b,0x11d43d2b,0xd3916839,0x73701155,0x886c5ad7,0x20b00715,0xe0377626 +.long 0xaa80ba59,0x7f01c9ec,0x68538e51,0x3083411a,0xe88128af,0x970370f1,0x91dec14b,0x625cc3db,0x01ac3107,0xfef9666c,0xd5057ac3,0xb2a8d577,0x92be5df7,0xb0f26299,0x00353924,0xf579c8e5 +.long 0x1341ed7a,0xb8fa3d93,0xa7b59d49,0x4223272c,0x83b8c4a4,0x3dcb1947,0xed1302e4,0x4e413c01,0xe17e44ce,0x6d999127,0x33b3adfb,0xee86bf75,0x25aa96ca,0xf6902fe6,0xe5aae47d,0xb73540e4 +.long 0x1b4a158c,0x32801d7b,0x27e2a369,0xe571c99e,0x10d9f197,0x40cb76c0,0x3167c0ae,0xc308c289,0xeb7958f2,0xa6ef9dd3,0x300879b1,0xa7226dfc,0x7edf0636,0x6cd0b362,0x7bc37eed,0x4efbce6c +.long 0x8d699021,0x75f92a05,0x772566e3,0x586d4c79,0x761ad23a,0x378ca5f1,0x1465a8ac,0x650d86fc,0x842ba251,0x7a4ed457,0x42234933,0x6b65e3e6,0x31aad657,0xaf1543b7,0xcbfec369,0xa4cefe98 +.long 0x9f47befb,0xb587da90,0x41312d13,0x6562e9fb,0xeff1cefe,0xa691ea59,0x05fc4cf6,0xcc30477a,0x0b0ffd3d,0xa1632461,0x5b355956,0xa1f16f3b,0x4224ec24,0x5b148d53,0xf977012a,0xdc834e7b +.long 0xb2c69dbc,0x7bfc5e75,0x03c3da6c,0x3aa77a29,0xca910271,0xde0df03c,0x7806dc55,0xcbd5ca4a,0x6db476cb,0xe1ca5807,0x5f37a31e,0xfde15d62,0xf41af416,0xf49af520,0x7d342db5,0x96c5c5b1 +.long 0xeb4ceb9b,0x155c43b7,0x4e77371a,0x2e993010,0x675d43af,0x1d2987da,0x8599fd72,0xef2bc1c0,0x9342f6b2,0x96894b7b,0x7c8e71f0,0x201eadf2,0x4a1f3efc,0xf3479d9f,0x702a9704,0xe0f8a742 +.long 0xb3eba40c,0xeafd44b6,0xc1c1e0d0,0xf9739f29,0x619d505e,0x0091471a,0x9d7c263e,0xc15f9c96,0x83afbe33,0x5be47285,0x04f1e092,0xa3b6d6af,0x751a9d11,0xe76526b9,0x9a4ae4d2,0x2ec5b26d +.long 0x02f6fb8d,0xeb66f4d9,0x96912164,0x4063c561,0x80ef3000,0xeb7050c1,0xeaa5b3f0,0x288d1c33,0x07806fd8,0xe87c68d6,0x4bbbf50f,0xb2f7f9d5,0xac8d6627,0x25972f3a,0x10e8c13b,0xf8547774 +.long 0x872b4a60,0xcc50ef6c,0x4613521b,0xab2a34a4,0x983e15d1,0x39c5c190,0x59905512,0x61dde5df,0x9f2275f3,0xe417f621,0x451d894b,0x0750c8b6,0x78b0bdaa,0x75b04ab9,0x458589bd,0x3bfd9fd4 +.long 0xee9120b6,0xf1013e30,0x23a4743e,0x2b51af93,0x48d14d9e,0xea96ffae,0x698a1d32,0x71dc0dbe,0x0180cca4,0x914962d2,0xc3568963,0x1ae60677,0x437bc444,0x8cf227b1,0xc9962c7a,0xc650c83b +.long 0xfe7ccfc4,0x23c2c7dd,0x1b929d48,0xf925c89d,0x06783c33,0x4460f74b,0xa590475a,0xac2c8d49,0xb807bba0,0xfb40b407,0x69ff8f3a,0x9d1e362d,0xcbef64a4,0xa33e9681,0x332fb4b2,0x67ece5fa +.long 0x739f10e3,0x6900a99b,0xff525925,0xc3341ca9,0xa9e2d041,0xee18a626,0x29580ddd,0xa5a83685,0x9d7de3cd,0xf3470c81,0x2062cf9c,0xedf02586,0xc010edb0,0xf43522fa,0x13a4b1ae,0x30314135 +.long 0xdb22b94b,0xc792e02a,0xa1eaa45b,0x993d8ae9,0xcd1e1c63,0x8aad6cd3,0xc5ce688a,0x89529ca7,0xe572a253,0x2ccee3aa,0x02a21efb,0xe02b6438,0xc9430358,0xa7091b6e,0x9d7db504,0x06d1b1fa +.long 0xc4744733,0x58846d32,0x379f9e34,0x40517c71,0x130ef6ca,0x2f65655f,0xf1f3503f,0x526e4488,0x7ee4a976,0x8467bd17,0x921363d1,0x1d9dc913,0xb069e041,0xd8d24c33,0x2cdf7f51,0x5eb5da0a +.long 0x197b994f,0x1c0f3cb1,0x2843eae9,0x3c95a6c5,0xa6097ea5,0x7766ffc9,0xd723b867,0x7bea4093,0x4db378f9,0xb48e1f73,0xe37b77ac,0x70025b00,0xaf24ad46,0x943dc8e7,0x16d00a85,0xb98a15ac +.long 0x2743b004,0x3adc38ba,0x334415ee,0xb1c7f4f7,0x1e62d05a,0xea43df8f,0x9d76a3b6,0x32618905,0xa23a0f46,0x2fbd0bb5,0x6a01918c,0x5bc971db,0xb4743f94,0x7801d94a,0x676ae22b,0xb94df65e +.long 0xaf95894c,0xaafcbfab,0x276b2241,0x7b9bdc07,0x5bdda48b,0xeaf98362,0xa3fcb4df,0x5977faf2,0x052c4b5b,0xbed042ef,0x067591f0,0x9fe87f71,0x22f24ec7,0xc89c73ca,0xe64a9f1b,0x7d37fa9e +.long 0x15562627,0x2710841a,0xc243b034,0x2c01a613,0x2bc68609,0x1d135c56,0x8b03f1f6,0xc2ca1715,0x3eb81d82,0xc9966c2d,0x8f6df13e,0xc02abf4a,0x8f72b43b,0x77b34bd7,0x360c82b0,0xaff6218f +.long 0x8d55b9d2,0x0aa5726c,0x99e9bffb,0xdc0adbe9,0xefb9e72a,0x9097549c,0x9dfb3111,0x16755712,0xf26847f9,0xdd8bf984,0xdfb30cb7,0xbcb8e387,0x5171ef9c,0xc1fd32a7,0x389b363f,0x977f3fc7 +.long 0xf4babda0,0x116eaf2b,0xf7113c8e,0xfeab68bd,0xb7def526,0xd1e3f064,0xe0b3fa02,0x1ac30885,0x40142d9d,0x1c5a6e7b,0x30921c0b,0x839b5603,0x36a116a3,0x48f301fa,0xcfd9ee6d,0x380e1107 +.long 0x58854be1,0x7945ead8,0xcbd4d49d,0x4111c12e,0x3a29c2ef,0xece3b1ec,0x8d3616f5,0x6356d404,0x594d320e,0x9f0d6a8f,0xf651ccd2,0x0989316d,0x0f8fdde4,0x6c32117a,0xa26a9bbc,0x9abe5cc5 +.long 0x9723f671,0xcff560fb,0x7f3d593c,0x21b2a12d,0x24ba0696,0xe4cb18da,0xc3543384,0x186e2220,0x88312c29,0x722f64e0,0x17dc7752,0x94282a99,0x5a85ee89,0x62467bbf,0xf10076a0,0xf435c650 +.long 0x43b3a50b,0xc9ff1539,0x1a53efbc,0x7132130c,0xf7b0c5b7,0x31bfe063,0x4ea994cc,0xb0179a7d,0xc85f455b,0x12d064b3,0x8f6e0062,0x47259328,0xb875d6d9,0xf64e590b,0xad92bcc7,0x22dd6225 +.long 0xb9c3bd6d,0xb658038e,0xfbba27c8,0x00cdb0d6,0x1062c45d,0x0c681337,0x2d33407d,0xd8515b8c,0x8cbb5ecf,0xcb8f699e,0xc608d7d8,0x8c4347f8,0xbb3e00db,0x2c11850a,0xecb49d19,0x20a8dafd +.long 0x45ee2f40,0xbd781480,0x416b60cf,0x75e354af,0x8d49a8c4,0xde0b58a1,0xfa359536,0xe40e94e2,0x62accd76,0xbd4fa59f,0x8c762837,0x05cf466a,0x448c277b,0xb5abda99,0x48b13740,0x5a9e01bf +.long 0x326aad8d,0x9d457798,0xc396f7e7,0xbdef4954,0xc253e292,0x6fb274a2,0x1cfe53e7,0x2800bf0a,0x44438fd4,0x22426d31,0x5e259f9a,0xef233923,0x03f66264,0x4188503c,0x7f9fdfab,0x9e5e7f13 +.long 0x5fcc1aba,0x565eb76c,0x59b5bff8,0xea632548,0xaab6d3fa,0x5587c087,0x6ce39c1b,0x92b639ea,0x953b135c,0x0706e782,0x425268ef,0x7308912e,0x090e7469,0x599e92c7,0x9bc35e75,0x83b90f52 +.long 0x244975b3,0x4750b3d0,0x11965d72,0xf3a44358,0x9c8dc751,0x179c6774,0xd23d9ff0,0xff18cdfe,0x2028e247,0xc4013833,0xf3bfbc79,0x96e280e2,0xd0880a84,0xf60417bd,0x2a568151,0x263c9f3d +.long 0x2d2ce811,0x36be15b3,0xf8291d21,0x846dc0c2,0x789fcfdb,0x5cfa0ecb,0xd7535b9a,0x45a0beed,0x96d69af1,0xec8e9f07,0x599ab6dc,0x31a7c5b8,0xf9e2e09f,0xd36d45ef,0xdcee954b,0x3cf49ef1 +.long 0x086cff9b,0x6be34cf3,0x39a3360f,0x88dbd491,0x0dbfbd1d,0x1e96b8cc,0xcb7e2552,0xc1e5f7bf,0x28819d98,0x0547b214,0x7aea9dcb,0xc770dd9c,0x041d68c8,0xaef0d4c7,0x13cb9ba8,0xcc2b9818 +.long 0xfe86c607,0x7fc7bc76,0x502a9a95,0x6b7b9337,0xd14dab63,0x1948dc27,0xdae047be,0x249dd198,0xa981a202,0xe8356584,0x3a893387,0x3531dd18,0xc85c7209,0x1be11f90,0xe2a52b5a,0x93d2fe1e +.long 0xec6d6b97,0x8225bfe2,0xbd0aa5de,0x9cf6d6f4,0x54779f5f,0x911459cb,0x86aeb1f3,0x5649cddb,0x3f26ce5a,0x32133579,0x550f431e,0xc289a102,0x73b84c6f,0x559dcfda,0xee3ac4d7,0x84973819 +.long 0xf2606a82,0xb51e55e6,0x90f2fb57,0xe25f7061,0xb1a4e37c,0xacef6c2a,0x5dcf2706,0x864e359d,0x7ce57316,0x479e6b18,0x3a96b23d,0x2cab2500,0x8ef16df7,0xed489862,0xef3758b5,0x2056538c +.long 0xf15d3101,0xa7df865e,0x61b553d7,0x80c5533a,0x4ed14294,0x366e1997,0xb3c0bcd6,0x6620741f,0xedc45418,0x21d1d9c4,0xc1cc4a9d,0x005b859e,0xa1c462f0,0xdf01f630,0xf26820c7,0x15d06cf3 +.long 0x3484be47,0x9f7f24ee,0x4a0c902f,0x2ff33e96,0x5a0bc453,0x00bdf457,0x1aa238db,0x2378dfaf,0x856720f2,0x272420ec,0x96797291,0x2ad9d95b,0x768a1558,0xd1242cc6,0x5cc86aa8,0x2e287f8b +.long 0x990cecaa,0x796873d0,0x675d4080,0xade55f81,0x21f0cd84,0x2645eea3,0xb4e17d02,0x7a1efa0f,0x037cc061,0xf6858420,0xd5d43e12,0x682e05f0,0x27218710,0x59c36994,0x3f7cd2fc,0x85cbba4d +.long 0x7a3cd22a,0x726f9729,0x4a628397,0x9f8cd5dc,0xc23165ed,0x17b93ab9,0x122823d4,0xff5f5dbf,0x654a446d,0xc1e4e4b5,0x677257ba,0xd1a9496f,0xde766a56,0x6387ba94,0x521ec74a,0x23608bc8 +.long 0x6688c4d4,0x16a522d7,0x07373abd,0x9d6b4282,0xb42efaa3,0xa62f07ac,0xe3b90180,0xf73e00f7,0x49421c3e,0x36175fec,0x3dcf2678,0xc4e44f9b,0x7220f09f,0x76df436b,0x3aa8b6cf,0x172755fb +.long 0x446139cc,0xbab89d57,0x5fe0208f,0x0a0a6e02,0x11e5d399,0xcdbb63e2,0xa8977f0b,0x33ecaa12,0xf7c42664,0x59598b21,0xab65d08a,0xb3e91b32,0xf4502526,0x035822ee,0x720a82a9,0x1dcf0176 +.long 0x3d589e02,0x50f8598f,0xb1d63d2c,0xdf0478ff,0x1571cd07,0x8b8068bd,0xd79670cd,0x30c3aa4f,0x941ade7f,0x25e8fd4b,0x32790011,0x3d1debdc,0x3a3f9ff0,0x65b6dcbd,0x793de69c,0x282736a4 +.long 0xd41d3bd3,0xef69a0c3,0x07a26bde,0xb533b8c9,0xdb2edf9f,0xe2801d97,0xe1877af0,0xdc4a8269,0x3d590dbe,0x6c1c5851,0xee4e9357,0x84632f6b,0x79b33374,0xd36d36b7,0x9bbca2e6,0xb46833e3 +.long 0xf7fc0586,0x37893913,0x66bf4719,0x385315f7,0xb31855dc,0x72c56293,0x849061fe,0xd1416d4e,0x51047213,0xbeb3ab78,0xf040c996,0x447f6e61,0x638b1d0c,0xd06d310d,0xbad1522e,0xe28a413f +.long 0x82003f86,0x685a76cb,0x0bcdbca3,0x610d07f7,0x9ca4c455,0x6ff66021,0xcea10eec,0x7df39b87,0xe22db218,0xb9255f96,0x08a34c44,0x8cc6d9eb,0x859f9276,0xcd4ffb86,0x50d07335,0x8fa15eb2 +.long 0xcf2c24b5,0xdf553845,0x52f9c3ba,0x89f66a9f,0xe4a7ceb3,0x8f22b5b9,0x0e134686,0xaffef809,0x8eb8fac2,0x3e53e1c6,0x28aec98e,0x93c1e4eb,0x32a43bcb,0xb6b91ec5,0xb2d74a51,0x2dbfa947 +.long 0xca84bad7,0xe065d190,0xad58e65c,0xfb13919f,0xf1cb6e31,0x3c41718b,0x06d05c3f,0x688969f0,0x21264d45,0xd4f94ce7,0x7367532b,0xfdfb65e9,0x0945a39d,0x5b1be8b1,0x2b8baf3b,0x229f789c +.long 0x6f49f15d,0xd8f41f3e,0x907f0792,0x678ce828,0xfca6e867,0xc69ace82,0xd01dcc89,0x106451ae,0x19fc32d2,0x1bb4f7f0,0xb00c52d2,0x64633dfc,0xad9ea445,0x8f13549a,0xfb323705,0x99a3bf50 +.long 0x534d4dbc,0x0c9625a2,0xc2a2fea3,0x45b8f1d1,0xa530fc1a,0x76ec21a1,0x9e5bd734,0x4bac9c2a,0x7b4e3587,0x5996d76a,0x1182d9e3,0x0045cdee,0x1207f13d,0x1aee24b9,0x97345a41,0x66452e97 +.long 0x9f950cd0,0x16e5b054,0xd7fdd075,0x9cc72fb1,0x66249663,0x6edd61e7,0xf043cccb,0xde4caa4d,0x55c7ac17,0x11b1f57a,0x1a85e24d,0x779cbd44,0xe46081e7,0x78030f86,0x8e20f643,0xfd4a6032 +.long 0x0a750c0f,0xcc7a6488,0x4e548e83,0x39bacfe3,0x0c110f05,0x3d418c76,0xb1f11588,0x3e4daa4c,0x5ffc69ff,0x2733e7b5,0x92053127,0x46f147bc,0xd722df94,0x885b2434,0xe6fc6b7c,0x6a444f65 +.long 0xc3f16ea8,0x7a1a465a,0xb2f1d11c,0x115a461d,0x6c68a172,0x4767dd95,0xd13a4698,0x3392f2eb,0xe526cdc7,0xc7a99ccd,0x22292b81,0x8e537fdc,0xa6d39198,0x76d8cf69,0x2446852d,0xffc5ff43 +.long 0xa90567e6,0x97b14f7e,0xb6ae5cb7,0x513257b7,0x9f10903d,0x85454a3c,0x69bc3724,0xd8d2c9ad,0x6b29cb44,0x38da9324,0x77c8cbac,0xb540a21d,0x01918e42,0x9bbfe435,0x56c3614e,0xfffa707a +.long 0xd4e353b7,0x0ce4e3f1,0xef46b0a0,0x062d8a14,0x574b73fd,0x6408d5ab,0xd3273ffd,0xbc41d1c9,0x6be77800,0x3538e1e7,0xc5655031,0x71fe8b37,0x6b9b331a,0x1cd91621,0xbb388f73,0xad825d0b +.long 0x1cb76219,0x56c2e05b,0x71567e7e,0x0ec0bf91,0x61c4c910,0xe7076f86,0xbabc04d9,0xd67b085b,0x5e93a96a,0x9fb90459,0xfbdc249a,0x7526c1ea,0xecdd0bb7,0x0d44d367,0x9dc0d695,0x95399917 +.long 0x9e240d18,0x61360ee9,0xb4b94466,0x057cdcac,0x2fe5325c,0xe7667cd1,0x21974e3b,0x1fa297b5,0xdb083d76,0xfa4081e7,0xf206bd15,0x31993be6,0x14c19f8c,0x8949269b,0xa9d92357,0x21468d72 +.long 0xa4c506ec,0x2ccbc583,0xd1acfe97,0x957ed188,0x12f1aea2,0x8baed833,0x8325362d,0xef2a6cb4,0x8e195c43,0x130dde42,0x0e6050c6,0xc842025a,0x08686a5d,0x2da972a7,0xe508b4a8,0xb52999a1 +.long 0x10a5a8bd,0xd9f090b9,0x096864da,0xca91d249,0x3f67dbc1,0x8e6a93be,0xf5f4764c,0xacae6fba,0xd21411a0,0x1563c6e0,0xda0a4ad8,0x28fa787f,0x908c8030,0xd524491c,0x4c795f07,0x1257ba0e +.long 0xceca9754,0x83f49167,0x4b7939a0,0x426d2cf6,0x723fd0bf,0x2555e355,0xc4f144e2,0xa96e6d06,0x87880e61,0x4768a8dd,0xe508e4d5,0x15543815,0xb1b65e15,0x09d7e772,0xac302fa0,0x63439dd6 +.long 0xc14e35c2,0xb93f802f,0x4341333c,0x71735b7c,0x16d4f362,0x03a25104,0xbf433c8e,0x3f4d069b,0xf78f5a7c,0x0d83ae01,0x7c4eed07,0x50a8ffbe,0x76e10f83,0xc74f8906,0x9ddaf8e1,0x7d080966 +.long 0x698e04cc,0xb11df8e1,0x169005c8,0x877be203,0x4f3c6179,0x32749e8c,0x7853fc05,0x2dbc9d0a,0x9454d937,0x187d4f93,0xb4800e1b,0xe682ce9d,0x165e68e8,0xa9129ad8,0xbe7f785b,0x0fe29735 +.long 0x5b9e02b7,0x5303f40c,0x35ee04e8,0xa37c9692,0x34d6632b,0x5f46cc20,0x96ac545b,0x55ef72b2,0x7b91b062,0xabec5c1f,0xbb33e821,0x0a79e1c7,0x3a9f4117,0xbb04b428,0xfd2a475a,0x0de1f28f +.long 0x3a4434b4,0x31019ccf,0x1a7954dc,0xa3458111,0xe34972a7,0xa9dac80d,0x74f6b8dd,0xb043d054,0x11137b1a,0x021c319e,0xed5cc03f,0x00a754ce,0xcbea5ad4,0x0aa2c794,0x70c015b6,0x093e67f4 +.long 0xc97e3f6b,0x72cdfee9,0xb6da7461,0xc10bcab4,0xb59806b9,0x3b02d2fc,0xa1de6f47,0x85185e89,0x0eb6c4d4,0x39e6931f,0xd4fa5b04,0x4d4440bd,0x34be7eb8,0x5418786e,0x9d7259bc,0x6380e521 +.long 0xd598d710,0x20ac0351,0xcb3a4da4,0x272c4166,0xca71de1f,0xdb82fe1a,0xd8f54b0f,0x746e79f2,0x4b573e9b,0x6e7fc736,0xfd4b5040,0x75d03f46,0x0b98d87b,0x5c1cc36d,0x1f472da1,0x513ba3f1 +.long 0xabb177dd,0x79d0af26,0x7891d564,0xf82ab568,0x72232173,0x2b6768a9,0x8c1f6619,0xefbb3bb0,0xa6d18358,0xb29c11db,0xb0916d3a,0x519e2797,0x9188e290,0xd4dc18f0,0x98b0ca7f,0x648e86e3 +.long 0x983c38b5,0x859d3145,0x637abc8b,0xb14f176c,0xcaff7be6,0x2793fb9d,0x35a66a5a,0xebe5a55f,0x9f87dc59,0x7cec1dcd,0xfbdbf560,0x7c595cd3,0x26eb3257,0x5b543b22,0xc4c935fd,0x69080646 +.long 0x81e9ede3,0x7f2e4403,0xcaf6df0a,0x243c3894,0x1c073b11,0x7c605bb1,0xba6a4a62,0xcd06a541,0x49d4e2e5,0x29168949,0x4af66880,0x33649d07,0xe9a85035,0xbfc0c885,0xfc410f4b,0xb4e52113 +.long 0x78a6513b,0xdca3b706,0x9edb1943,0x92ea4a2a,0xdb6e2dd8,0x02642216,0x9fd57894,0x9b45d0b4,0xc69d11ae,0x114e70db,0x4c57595f,0x1477dd19,0xec77c272,0xbc2208b4,0xdb68f59c,0x95c5b4d7 +.long 0x42e532b7,0xb8c4fc63,0x9ae35290,0x386ba422,0xd201ecbc,0xfb5dda42,0xa0e38fd6,0x2353dc8b,0x68f7e978,0x9a0b85ea,0x2ad6d11f,0x96ec5682,0xe5f6886d,0x5e279d6c,0x3cb1914d,0xd3fe03cd +.long 0x7ea67c77,0xfe541fa4,0xe3ea810c,0x952bd2af,0x8d01d374,0x791fef56,0x0f11336e,0xa3a1c621,0xc7ec6d79,0x5ad0d5a9,0x3225c342,0xff7038af,0xbc69601b,0x003c6689,0x45e8747d,0x25059bc7 +.long 0xf2086fbf,0xfa4965b2,0x86916078,0xf6840ea6,0x70081d6c,0xd7ac7620,0xb5328645,0xe600da31,0x529b8a80,0x01916f63,0x2d7d6f3e,0xe80e4858,0xd664ca7c,0x29eb0fe8,0xe7b43b0c,0xf017637b +.long 0x76cb2566,0x9a75c806,0xb24892d9,0x8f76acb1,0x1f08fe45,0x7ae7b9cc,0x6a4907d8,0x19ef7329,0x5f228bf0,0x2db4ab71,0x817032d7,0xf3cdea39,0xdcabe3c0,0x0b1f482e,0xbb86325c,0x3baf76b4 +.long 0x10089465,0xd49065e0,0x8e77c596,0x3bab5d29,0x193dbd95,0x7636c3a6,0xb246e499,0xdef5d294,0x286b2475,0xb22c58b9,0xcd80862b,0xa0b93939,0xf0992388,0x3002c83a,0xeacbe14c,0x6de01f9b +.long 0xadd70482,0x6aac688e,0x7b4a4e8a,0x708de92a,0x758a6eef,0x75b6dd73,0x725b3c43,0xea4bf352,0x87912868,0x10041f2c,0xef09297a,0xb1b1be95,0xa9f3860a,0x19ae23c5,0x515dcf4b,0xc4f0f839 +.long 0x97f6306a,0x3c7ecca3,0x68a3a4b0,0x744c44ae,0xb3a1d8a2,0x69cd13a0,0x5256b578,0x7cad0a1e,0x33791d9e,0xea653fcd,0x74b2e05f,0x9cc2a05d,0xfd7affa2,0x73b391dc,0xb6b05442,0xddb7091e +.long 0x8538a5c6,0xc71e27bf,0x89abff17,0x195c63dd,0x1b71e3da,0xfd315285,0xfa680fa0,0x9cbdfda7,0x849d7eab,0x9db876ca,0x3c273271,0xebe2764b,0xf208dcea,0x663357e3,0x565b1b70,0x8c5bd833 +.long 0x9837fc0d,0xccc3b4f5,0xa79cf00f,0x9b641ba8,0xdfdf3990,0x7428243d,0x020786b1,0x83a594c4,0x526c4502,0xb712451a,0x6adb3f93,0x9d39438e,0xe9ff0ccd,0xfdb261e3,0xe07af4c3,0x80344e3c +.long 0x2fa4f126,0x75900d7c,0x5c99a232,0x08a3b865,0xdb25e0c3,0x2478b6bf,0x71db2edf,0x482cc2c2,0x5f321bb8,0x37df7e64,0x9a8005b4,0x8a93821b,0xcc8c1958,0x3fa2f10c,0x2c269d0a,0x0d332218 +.long 0xe246b0e6,0x20ab8119,0xd349fd17,0xb39781e4,0xb31aa100,0xd293231e,0xbb032168,0x4b779c97,0xc8470500,0x4b3f19e1,0x0c4c869d,0x45b7efe9,0xa1a6bbcc,0xdb84f38a,0xb2fddbc1,0x3b59cb15 +.long 0x3fd165e8,0xba5514df,0x061f8811,0x499fd6a9,0xbfef9f00,0x72cd1fe0,0x79ad7e8a,0x120a4bb9,0x5f4a5ac5,0xf2ffd095,0x95a7a2f0,0xcfd174f1,0x9d17baf1,0xd42301ba,0x77f22089,0xd2fa487a +.long 0xb1dc77e1,0x9cb09efe,0x21c99682,0xe9566939,0x6c6067bb,0x8c546901,0x61c24456,0xfd378574,0x81796b33,0x2b6a6cbe,0x58e87f8b,0x62d550f6,0x7f1b01b4,0x1b763e1c,0x1b1b5e12,0x4b93cfea +.long 0x1d531696,0xb9345238,0x88cdde69,0x57201c00,0x9a86afc7,0xdde92251,0xbd35cea8,0xe3043895,0x8555970d,0x7608c1e1,0x2535935e,0x8267dfa9,0x322ea38b,0xd4c60a57,0x804ef8b5,0xe0bf7977 +.long 0xc06fece4,0x1a0dab28,0x94e7b49d,0xd405991e,0x706dab28,0xc542b6d2,0xa91618fb,0xcb228da3,0x107d1cea,0x224e4164,0xd0f5d8f1,0xeb9fdab3,0x0d6e41cd,0xc02ba386,0x9b1f7146,0x676a72c5 +.long 0x4d6cb00b,0xffd6dd98,0xde2e8d7c,0xcef9c5ca,0x641c7936,0xa1bbf5d7,0xee8f772e,0x1b95b230,0xe8ac25b1,0xf765a92e,0x3a18b7c6,0xceb04cfc,0x0acc8966,0x27944cef,0x434c1004,0xcbb3c957 +.long 0xa43ff93c,0x9c9971a1,0xa1e358a9,0x5bc2db17,0xa8d9bc82,0x45b4862e,0x2201e052,0x70ebfbfb,0x92871591,0xafdf64c7,0xb42d0219,0xea5bcae6,0x2ad8f03c,0xde536c55,0xa76aa33c,0xcd6c3f4d +.long 0x0bca6de3,0xbeb5f623,0xb1e706fd,0xdd20dd99,0xac9059d4,0x90b3ff9d,0x7ccccc4e,0x2d7b2902,0xce98840f,0x8a090a59,0x8410680a,0xa5d947e0,0x923379a5,0x49ae346a,0xb28a3156,0x7dbc84f9 +.long 0x54a1aff2,0xfd40d916,0x3a78fb9b,0xabf318ba,0x3029f95e,0x50152ed8,0xc58ad7fa,0x9fc1dd77,0x13595c17,0x5fa57915,0x8f62b3a9,0xb9504668,0xff3055b0,0x907b5b24,0x9a84f125,0x2e995e35 +.long 0x7e9bbcfb,0x87dacf69,0xe86d96e3,0x95d0c1d6,0x2d95a75c,0x65726e3c,0xacd27f21,0x2c3c9001,0x6c973f57,0x1deab561,0xa5221643,0x108b7e2c,0xc4ef79d4,0x5fee9859,0x40d4b8c6,0xbd62b88a +.long 0x197c75d6,0xb4dd29c4,0xb7076feb,0x266a6df2,0x4bf2df11,0x9512d0ea,0x6b0cc9ec,0x1320c24f,0x01a59596,0x6bb1e0e1,0xeff9aaac,0x8317c5bb,0x385aa6c9,0x65bb405e,0x8f07988f,0x613439c1 +.long 0x16a66e91,0xd730049f,0xfa1b0e0d,0xe97f2820,0x304c28ea,0x4131e003,0x526bac62,0x820ab732,0x28714423,0xb2ac9ef9,0xadb10cb2,0x54ecfffa,0xf886a4cc,0x8781476e,0xdb2f8d49,0x4b2c87b5 +.long 0x0a44295d,0xe857cd20,0x58c6b044,0x707d7d21,0xf596757c,0xae8521f9,0x67b2b714,0x87448f03,0x5ebcd58d,0x13a9bc45,0x9122d3c1,0x79bcced9,0x9e076642,0x3c644247,0x2df4767d,0x0cf22778 +.long 0x71d444b6,0x5e61aee4,0xc5084a1d,0x211236bf,0x4fd3eaf6,0x7e15bc9a,0xab622bf5,0x68df2c34,0x59bf4f36,0x9e674f0f,0xd7f34d73,0xf883669b,0x31497b1d,0xc48ac1b8,0x5106703b,0x323b925d +.long 0x74082008,0x22156f42,0xc8482bcb,0xeffc521a,0x12173479,0x5c6831bf,0xc4739490,0xcaa2528f,0x8f1b3c4d,0x84d2102a,0x2d9bec0d,0xcf64dfc1,0x78a546ef,0x433febad,0x7b73cef1,0x1f621ec3 +.long 0x37338615,0x6aecd627,0x01d8edf6,0x162082ab,0x19e86b66,0x833a8119,0xd299b5db,0x6023a251,0xbbf04b89,0xf5bb0c3a,0xae749a44,0x6735eb69,0x4713de3b,0xd0e058c5,0x2c3d4ccd,0xfdf2593e +.long 0xfdd23667,0x1b8f414e,0xfa2015ee,0xdd52aaca,0xbd9625ff,0x3e31b517,0x8db5918c,0x5ec9322d,0xa96f5294,0xbc73ac85,0x61a0666a,0x82aa5bf3,0xbf08ac42,0x49755810,0x891cedfc,0xd21cdfd5 +.long 0x67f8be10,0x918cb57b,0x56ffa726,0x365d1a7c,0x6532de93,0x2435c504,0x2674cd02,0xc0fc5e10,0x9cbbb142,0x6e51fcf8,0xafc50692,0x1d436e5a,0x3fbcae22,0x766bffff,0xfd55d3b8,0x3148c2fd +.long 0x233222fa,0x52c7fdc9,0xe419fb6b,0x89ff1092,0x25254977,0x3cd6db99,0x1cf12ca7,0x2e85a161,0xdc810bc9,0xadd2547c,0x9d257c22,0xea3f458f,0x27d6b19b,0x642c1fbe,0x140481a6,0xed07e6b5 +.long 0x86d2e0f8,0x6ada1d42,0x0e8a9fd5,0xe5920122,0x708c1b49,0x02c936af,0x2b4bfaff,0x60f30fee,0x858e6a61,0x6637ad06,0x3fd374d0,0xce4c7767,0x7188defb,0x39d54b2d,0xf56a6b66,0xa8c9d250 +.long 0xb24fe1dc,0x58fc0f5e,0x6b73f24c,0x9eaf9dee,0x33650705,0xa90d588b,0xaf2ec729,0xde5b62c5,0xd3c2b36e,0x5c72cfae,0x034435da,0x868c19d5,0xe17ee145,0x88605f93,0x77a5d5b1,0xaa60c4ee +.long 0x3b60c472,0xbcf5bfd2,0xeb1d3049,0xaf4ef13c,0xe13895c9,0x373f44fc,0x0cbc9822,0xf29b382f,0x73efaef6,0x1bfcb853,0xa8c96f40,0xcf56ac9c,0x7a191e24,0xd7adf109,0xbf8a8dc2,0x98035f44 +.long 0x1e750c84,0xf40a71b9,0x5dc6c469,0xc57f7b0c,0x6fbc19c1,0x49a0e79c,0xa48ebdb8,0x6b0f5889,0xa07c4e9f,0x5d3fd084,0xab27de14,0xc3830111,0x33e08dcc,0x0e4929fe,0x40bb73a3,0xf4a5ad24 +.long 0x490f97ca,0xde86c2bf,0x67a1ce18,0x288f09c6,0x1844478d,0x364bb886,0xceedb040,0x7840fa42,0x5a631b37,0x1269fdd2,0xa47c8b7d,0x94761f1e,0x481c6266,0xfc0c2e17,0x3daa5fa7,0x85e16ea2 +.long 0x92491048,0xccd86033,0xf4d402d7,0x0c2f6963,0xdf6a865c,0x6336f7df,0xb5c02a87,0x0a2a463c,0xbf2f12ee,0xb0e29be7,0x66bad988,0xf0a22002,0x9123c1d7,0x27f87e03,0x328a8c98,0x21669c55 +.long 0x92f14529,0x186b9803,0x63954df3,0xd3d056cc,0x175a46f6,0x2f03fd58,0x11558558,0x63e34ebe,0x5b80cfa5,0xe13fedee,0xd401dbd1,0xe872a120,0xe8a9d667,0x52657616,0xe08d6693,0xbc8da4b6 +.long 0x1b703e75,0x370fb9bb,0xd4338363,0x6773b186,0xecef7bff,0x18dad378,0x995677da,0xaac787ed,0x0437164b,0x4801ea8b,0x73fe795e,0xf430ad20,0x8ee5eb73,0xb164154d,0x108f7c0e,0x0884ecd8 +.long 0x5f520698,0x0e6ec096,0x44f7b8d9,0x640631fe,0xa35a68b9,0x92fd34fc,0x4d40cf4e,0x9c5a4b66,0x80b6783d,0x949454bf,0x3a320a10,0x80e701fe,0x1a0a39b2,0x8d1a564a,0x320587db,0x1436d53d +.long 0x6556c362,0xf5096e6d,0xe2455d7e,0xbc23a3c0,0x807230f9,0x3a7aee54,0x22ae82fd,0x9ba1cfa6,0x99c5d706,0x833a057a,0x842315c9,0x8be85f4b,0x66a72f12,0xd083179a,0xcdcc73cd,0x2fc77d5d +.long 0x5616ee30,0x22b88a80,0xe7ab1083,0xfb09548f,0x511270cd,0x8ad6ab0d,0x6924d9ab,0x61f6c57a,0x90aecb08,0xa0f7bf72,0x0df784a4,0x849f87c9,0xcfaf1d03,0x27c79c15,0xc463face,0xbbf9f675 +.long 0x765ba543,0x91502c65,0x42ea60dd,0x18ce3cac,0x6e43ecb3,0xe5cee6ac,0x68f2aeeb,0x63e4e910,0xc85932ee,0x26234fa3,0x4c90c44d,0x96883e8b,0xa18a50f6,0x29b9e738,0x3f0420df,0xbfc62b2a +.long 0x6d3e1fa9,0xd22a7d90,0xfe05b8a3,0x17115618,0xbb2b9c01,0x2a0c9926,0xe07e76a2,0xc739fcc6,0x165e439a,0x540e9157,0x6a9063d8,0x06353a62,0x61e927a3,0x84d95594,0xe2e0be7f,0x013b9b26 +.long 0x973497f1,0x4feaec3b,0x093ebc2d,0x15c0f94e,0x33af0583,0x6af5f227,0xc61f3340,0x0c2af206,0x4457397c,0xd25dbdf1,0xcabcbae0,0x2e8ed017,0xc2815306,0xe3010938,0xe8c6cd68,0xbaa99337 +.long 0x3b0ec7de,0x08513182,0x58df05df,0x1e1b822b,0xa5c3b683,0x5c14842f,0x3eba34ce,0x98fe977e,0x0d5e8873,0xfd2316c2,0xbd0d427d,0xe48d839a,0x623fc961,0x495b2218,0xb46fba5e,0x24ee56e7 +.long 0x91e4de58,0x9184a55b,0xdfdea288,0xa7488ca5,0xa8dcc943,0xa723862e,0x849dc0fc,0x92d762b2,0x091ff4a9,0x3c444a12,0x0cada274,0x581113fa,0x30d8eae2,0xb9de0a45,0xdf6b41ea,0x5e0fcd85 +.long 0xc094dbb5,0x6233ea68,0xd968d410,0xb77d062e,0x58b3002d,0x3e719bbc,0x3dc49d58,0x68e7dd3d,0x013a5e58,0x8d825740,0x3c9e3c1b,0x21311747,0x7c99b6ab,0x0cb0a2a7,0xc2f888f2,0x5c48a3b3 +.long 0x991724f3,0xc7913e91,0x39cbd686,0x5eda799c,0x63d4fc1e,0xddb595c7,0xac4fed54,0x6b63b80b,0x7e5fb516,0x6ea0fc69,0xd0f1c964,0x737708ba,0x11a92ca5,0x9628745f,0x9a86967a,0x61f37958 +.long 0xaa665072,0x9af39b2c,0xefd324ef,0x78322fa4,0xc327bd31,0x3d153394,0x3129dab0,0x81d5f271,0xf48027f5,0xc72e0c42,0x8536e717,0xaa40cdbc,0x2d369d0f,0xf45a657a,0xea7f74e6,0xb03bbfc4 +.long 0x0d738ded,0x46a8c418,0xe0de5729,0x6f1a5bb0,0x8ba81675,0xf10230b9,0x112b33d4,0x32c6f30c,0xd8fffb62,0x7559129d,0xb459bf05,0x6a281b47,0xfa3b6776,0x77c1bd3a,0x7829973a,0x0709b380 +.long 0xa3326505,0x8c26b232,0xee1d41bf,0x38d69272,0xffe32afa,0x0459453e,0x7cb3ea87,0xce8143ad,0x7e6ab666,0x932ec1fa,0x22286264,0x6cd2d230,0x6736f8ed,0x459a46fe,0x9eca85bb,0x50bf0d00 +.long 0x877a21ec,0x0b825852,0x0f537a94,0x300414a7,0x21a9a6a2,0x3f1cba40,0x76943c00,0x50824eee,0xf83cba5d,0xa0dbfcec,0x93b4f3c0,0xf9538148,0x48f24dd7,0x61744162,0xe4fb09dd,0x5322d64d +.long 0x3d9325f3,0x57447384,0xf371cb84,0xa9bef2d0,0xa61e36c5,0x77d2188b,0xc602df72,0xbbd6a7d7,0x8f61bc0b,0xba3aa902,0x6ed0b6a1,0xf49085ed,0xae6e8298,0x8bc625d6,0xa2e9c01d,0x832b0b1d +.long 0xf1f0ced1,0xa337c447,0x9492dd2b,0x800cc793,0xbea08efa,0x4b93151d,0xde0a741e,0x820cf3f8,0x1c0f7d13,0xff1982dc,0x84dde6ca,0xef921960,0x45f96ee3,0x1ad7d972,0x29dea0c7,0x319c8dbe +.long 0x7b82b99b,0xd3ea3871,0x470eb624,0x75922d4d,0x3b95d466,0x8f66ec54,0xbee1e346,0x66e673cc,0xb5f2b89a,0x6afe67c4,0x290e5cd3,0x3de9c1e6,0x310a2ada,0x8c278bb6,0x0bdb323b,0x420fa384 +.long 0x0eb919b0,0x0ae1d63b,0xa74b9620,0xd74ee51d,0xa674290c,0x395458d0,0x4620a510,0x324c930f,0xfbac27d4,0x2d1f4d19,0x9bedeeac,0x4086e8ca,0x9b679ab8,0x0cdd211b,0x7090fec4,0x5970167d +.long 0xfaf1fc63,0x3420f2c9,0x328c8bb4,0x616d333a,0x57f1fe4a,0x7d65364c,0x55e5c73a,0x9343e877,0xe970e78c,0x5795176b,0x60533627,0xa36ccebf,0x09cdfc1b,0xfc7c7380,0xb3fec326,0xb39a2afe +.long 0x6224408a,0xb7ff1ba1,0x247cfc5e,0xcc856e92,0xc18bc493,0x01f102e7,0x2091c727,0x4613ab74,0xc420bf2b,0xaa25e89c,0x90337ec2,0x00a53176,0x7d025fc7,0xd2be9f43,0x6e6fe3dc,0x3316fb85 +.long 0x9ac50814,0x27520af5,0x9a8e4223,0xfdf95e78,0x56bec5a0,0xb7e7df2a,0xdf159e5d,0xf7022f7d,0xcac1fe8f,0x93eeeab1,0x37451168,0x8040188c,0xd967dce6,0x7ee8aa8a,0x3abc9299,0xfa0e79e7 +.long 0x2064cfd1,0x67332cfc,0xb0651934,0x339c31de,0x2a3bcbea,0x719b28d5,0x9d6ae5c6,0xee74c82b,0xbaf28ee6,0x0927d05e,0x9d719028,0x82cecf2c,0xddb30289,0x0b0d353e,0xfddb2e29,0xfe4bb977 +.long 0x640bfd9e,0xbb5bb990,0x82f62108,0xd226e277,0x02ffdd56,0x4bf00985,0x2ca1b1b5,0x7756758a,0x5285fe91,0xc32b62a3,0x8c9cd140,0xedbc546a,0xaf5cb008,0x1e47a013,0x073ce8f2,0xbca7e720 +.long 0x17a91cae,0xe10b2ab8,0x08e27f63,0xb89aab65,0xdba3ddf9,0x7b3074a7,0x330c2972,0x1c20ce09,0x5fcf7e33,0x6b9917b4,0x945ceb42,0xe6793743,0x5c633d19,0x18fc2215,0xc7485474,0xad1adb3c +.long 0x6424c49b,0x646f9679,0x67c241c9,0xf888dfe8,0x24f68b49,0xe12d4b93,0xa571df20,0x9a6b62d8,0x179483cb,0x81b4b26d,0x9511fae2,0x666f9632,0xd53aa51f,0xd281b3e4,0x7f3dbd16,0x7f96a765 +.long 0x074a30ce,0xa7f8b5bf,0x005a32e6,0xd7f52107,0x50237ed4,0x6f9e0907,0x8096fa2b,0x2f21da47,0xeec863a0,0xf3e19cb4,0x9527620a,0xd18f77fd,0x407c1cf8,0x9505c81c,0x1b6ec284,0x9998db4e +.long 0xc247d44d,0x7e3389e5,0x3f4f3d80,0x12507141,0x4a78a6c7,0xd4ba0110,0x767720be,0x312874a0,0x75944370,0xded059a6,0x3b2c0bdd,0xd6123d90,0x51c108e3,0xa56b717b,0x070623e9,0x9bb7940e +.long 0x84ac066c,0x794e2d59,0xe68c69a0,0xf5954a92,0x4fd99dcc,0x28c52458,0xb1012517,0x60e639fc,0x7de79248,0xc2e60125,0xf12fc6d7,0xe9ef6404,0x2a3b5d32,0x4c4f2808,0xc768eb8a,0x865ad32e +.long 0x13fb70b6,0xac02331b,0x95599b27,0x037b44c1,0x60bd082c,0x1a860fc4,0xc980cd01,0xa2e25745,0x1da0263e,0xee3387a8,0x2d10f3d6,0x931bfb95,0xa1f24a32,0x5b687270,0xca494b86,0xf140e65d +.long 0xb2f1ac7a,0x4f4ddf91,0x760fee27,0xf99eaabb,0x49c228e5,0x57f4008a,0x1cf713bb,0x090be440,0x5004f022,0xac91fbe4,0x569e1af6,0xd838c2c2,0x0f1daaa5,0xd6c7d20b,0x1bbb02c0,0xaa063ac1 +.long 0x59558a78,0x0938a422,0x8435da2f,0x5343c669,0x034410dc,0x96f67b18,0x84510804,0x7cc1e424,0x16dfbb7d,0x86a1543f,0x5b5bd592,0x921fa942,0xb33dd03c,0x9dcccb6e,0xb843f51e,0x8581ddd9 +.long 0x81d73c9e,0x54935fcb,0x0a5e97ab,0x6d07e979,0xcf3a6bab,0x4dc7b30a,0x170bee11,0x147ab1f3,0x9fafdee4,0x0aaf8e3d,0x538a8b95,0xfab3dbcb,0x6ef13871,0x405df4b3,0x088d5a49,0xf1f4e9cb +.long 0x66b33f1d,0x9bcd24d3,0x5ce445c0,0x3b97b820,0xba93ff61,0xe2926549,0x4dafe616,0xd9c341ce,0x16efb6f3,0xfb30a76e,0x605b953c,0xdf24b8ca,0xc2fffb9f,0x8bd52afe,0xe19d0b96,0xbbac5ff7 +.long 0x459afccd,0x43c01b87,0xb7432652,0x6bd45143,0x55b5d78e,0x84734530,0x1554ba7d,0x81088fdb,0x1e269375,0xada0a52c,0x2dc5ec10,0xf9f037c4,0x94bfbc11,0xc0660607,0xc9c40d2f,0xc0a630bb +.long 0xab64c31e,0x5efc797e,0x74507144,0xffdb1dab,0x1ca6790c,0xf6124287,0xe69bf1bf,0xe9609d81,0x00d24fc9,0xdb898595,0xe51fb417,0x9c750333,0xfef7bbde,0x51830a91,0x945f585c,0x0ce67dc8 +.long 0x4763eb50,0x9a730ed4,0xc1ab0d66,0x24a0e221,0x648748f3,0x643b6393,0x6d3c6291,0x1982daa1,0x8bbc5549,0x6f00a9f7,0x7f36384e,0x7a1783e1,0xde977f50,0xe8346323,0xb245502a,0x91ab688d +.long 0x6d0bdd66,0x331ab6b5,0x64b71229,0x0a6ef32e,0xfe7c352f,0x1028150e,0xce7b39d3,0x27e04350,0xc1070c82,0x2a3c8acd,0x80c9feef,0xfb2034d3,0x709f3729,0x2d729621,0x62cb4549,0x8df290bf +.long 0xfc2e4326,0x02f99f33,0x5eddf032,0x3b30076d,0x0c652fb5,0xbb21f8cf,0xed91cf7b,0x314fb49e,0x2f700750,0xa013eca5,0x712a4575,0x2b9e3c23,0xaf30fbb0,0xe5355557,0x7c77e771,0x1ada3516 +.long 0x7b135670,0x45f6ecb2,0x7cfc202e,0xe85d19df,0x58d1be9f,0x0f1b50c7,0xead2e344,0x5ebf2c0a,0xabc199c9,0x1531fe4e,0x56bab0ae,0xc7032592,0x6c1fec54,0x16ab2e48,0x04280188,0x0f87fda8 +.long 0x609e4a74,0xdc9f46fc,0xba667f91,0x2a44a143,0xb4d83436,0xbc3d8b95,0xc7bd2958,0xa01e4bd0,0x73483c90,0x7b182932,0xa7c7b598,0xa79c6aa1,0xeaaac07e,0xbf3983c6,0x96e0d4e6,0x8f18181e +.long 0x051af62b,0x8553d37c,0x0bf94496,0xe9a998eb,0xb0d59aa1,0xe0844f9f,0xe6afb813,0x983fd558,0x65d69804,0x9670c0ca,0x6ea5ff2d,0x732b22de,0x5fd8623b,0xd7640ba9,0xa6351782,0x9f619163 +.long 0xacee5043,0x0bfc27ee,0x2eb10f02,0xae419e73,0x8943fb05,0x19c028d1,0xff13aa2a,0x71f01cf7,0x8887a132,0x7790737e,0x66318410,0x67513309,0x7ddb795e,0x9819e8a3,0xdad100b2,0xfecb8ef5 +.long 0x3021926a,0x59f74a22,0x6f9b4c1c,0xb7c28a49,0x912ad0ab,0xed1a733f,0x01a5659c,0x42a910af,0x7bd68cab,0x3842c6e0,0x76d70ac8,0x2b57fa38,0x3c53aaeb,0x8a6707a8,0x65b4db18,0x62c1c510 +.long 0xb2d09dc7,0x8de2c1fb,0x266bd23b,0xc3dfed12,0xd5b27db6,0x927d039b,0x103243da,0x2fb2f0f1,0x80be7399,0xf855a07b,0x1f9f27a8,0xed9327ce,0x729bdef7,0xa0bd99c7,0x28250d88,0x2b67125e +.long 0x8670ced7,0x784b26e8,0xc31bd3b4,0xe3dfe41f,0xbcc85cbc,0x9e353a06,0x60178a9d,0x302e2909,0xa6eac16e,0x860abf11,0xaa2b3aac,0x76447000,0x850afdab,0x46ff9d19,0xfdb2d4c1,0x35bdd6a5 +.long 0x7e5c9ce9,0xe82594b0,0x20af346e,0x0f379e53,0xbc65ad4a,0x608b31e3,0x267c4826,0x710c6b12,0x71954cf1,0x51c966f9,0x0d0aa215,0xb1cec793,0x86bd23a8,0x1f155989,0xf9452e86,0xae2ff99c +.long 0x340ceaa2,0xd8dd953c,0x2e2e9333,0x26355275,0x8586f06d,0x15d4e5f9,0xf7cab546,0xd6bf94a8,0xb76a9af0,0x33c59a0a,0xba095af7,0x52740ab3,0x24389ca0,0xc444de8a,0x706da0cb,0xcc6f9863 +.long 0x6b2515cf,0xb5a741a7,0x9585c749,0x71c41601,0xe683de97,0x78350d4f,0x63d0b5f5,0x31d61524,0xfbce090b,0x7a0cc5e1,0xfbcb2a5b,0xaac927ed,0x20d84c35,0xe920de49,0x22b4de26,0x8c06a0b6 +.long 0xafe7ddf3,0xd34dd58b,0xc1e6e55b,0x55851fed,0x960696e7,0xd1395616,0x5f22705f,0x940304b2,0xb0a2a860,0x6f43f861,0x0e7cc981,0xcf121282,0x0ab64a96,0x12186212,0xb789383c,0x09215b9a +.long 0x37387c09,0x311eb305,0xf03ee760,0xc5832fce,0x32f7ea19,0x30358f58,0x91d53551,0xe01d3c34,0xda48ea80,0x1ca5ee41,0xcf4fa4c1,0x34e71e8e,0x7af1e1c7,0x312abd25,0x2153f4a5,0xe3afcdeb +.long 0x00235e9a,0x9d5c84d7,0x8c4c836f,0x0308d3f4,0x89332de5,0xc0a66b04,0x89e566ef,0x610dd399,0xd1ac1635,0xf8eea460,0x20a2c0df,0x84cbb3fb,0xe74a48c5,0x40afb488,0xd326b150,0x29738198 +.long 0xa6d74081,0x2a17747f,0x55a26214,0x60ea4c05,0x1f88c5fe,0x53514bb4,0x7e83426c,0xedd64567,0x96460b25,0xd5d6cbec,0x68dc115e,0xa12fd0ce,0x697840ea,0xc5bc3ed2,0xa6331e31,0x969876a8 +.long 0x472ff580,0x60c36217,0x4ad41393,0xf4229705,0xa03b8b92,0x4bd99ef0,0xc144f4f6,0x501c7317,0x18464945,0x159009b3,0x74c5c6be,0x6d5e594c,0x321a3660,0x2d587011,0x3898d022,0xd1e184b1 +.long 0x4c6a7e04,0x5ba04752,0x45550b65,0x47fa1e2b,0x48c0a9a5,0x9419daf0,0x7c243236,0x66362953,0x5cb12a88,0xcd0744b1,0x2b646188,0x561b6f9a,0x66c2c0c0,0x599415a5,0x0f83f09a,0xbe3f0859 +.long 0xb92041b8,0x9141c5be,0x26477d0d,0x01ae38c7,0xd12c7a94,0xca8b71f3,0x765c70db,0xfab5b31f,0x487443e9,0x76ae7492,0x990d1349,0x8595a310,0x7d460a37,0xf8dbeda8,0x1e45a38f,0x7f7ad082 +.long 0x1059705a,0xed1d4db6,0xe6b9c697,0xa3dd492a,0x6eb38bd5,0x4b92ee3a,0x67cc0bb7,0xbab2609d,0x6e70ee82,0x7fc4fe89,0x13e6b7e3,0xeff2c56e,0x34d26fca,0x9b18959e,0x889d6b45,0x2517ab66 +.long 0xbdefdd4f,0xf167b4e0,0xf366e401,0x69958465,0xa73bbec0,0x5aa368ab,0x7b240c21,0x12148709,0x18969006,0x378c3233,0xe1fe53d1,0xcb4d73ce,0x130c4361,0x5f50a80e,0x7ef5212b,0xd67f5951 +.long 0x9e70c72e,0xf145e21e,0x5566d2fb,0xb2e52e29,0x032397f5,0x44eaba4a,0x7e31a7de,0x5e56937b,0x456c61e1,0x68dcf517,0xa8b0a388,0xbc2e954a,0x60a8b755,0xe3552fa7,0x73ad0cde,0x03442dae +.long 0xceb26210,0x37ffe747,0x787baef9,0x983545e8,0x86a3de31,0x8b8c8535,0xfacd46db,0xc621dbcb,0x59266fbb,0x82e442e9,0x339d471c,0xa3514c37,0x62cdad96,0x3a11b771,0xecf9bdf0,0xf0cb3b3c +.long 0x478e2135,0x3fcbdbce,0xbda35342,0x7547b5cf,0x8a677af6,0xa97e81f1,0x28817987,0xc8c2bf83,0x45580985,0xdf07eaaf,0xc93b45cb,0xc68d1f05,0xc77b4cac,0x106aa2fe,0x04a7ae86,0x4c1d8afc +.long 0x9eb45ab2,0xdb41c3fd,0xd4b22e74,0x5b234b5b,0xf215958a,0xda253dec,0xa04edfa0,0x67e0606e,0xef751b11,0xabbbf070,0xf6f06dce,0xf352f175,0x6839f6b4,0xdfc4b6af,0x9959848e,0x53ddf9a8 +.long 0xc21520b0,0xda49c379,0xdbd5d1b6,0x90864ff0,0x5f49c7f7,0x2f055d23,0xa796b2d8,0xe51e4e6a,0x5c9dc340,0xc361a67f,0xbca7c620,0x5ad53c37,0x32c756d0,0xda1d6588,0x8bb67e13,0xad60d911 +.long 0x0eeec8c6,0xd6c47bdf,0x078a1821,0x4a27fec1,0xc3099524,0x081f7415,0x82cd8060,0x8effdf0b,0x65842df8,0xdb70ec1c,0xd319a901,0x8821b358,0xde42b529,0x72ee56ee,0x236e4286,0x5bb39592 +.long 0xfd6f7140,0xd1183316,0xbd8e81f7,0xf9fadb5b,0x5a02d962,0x701d5e0c,0x1b601324,0xfdee4dbf,0x35d7620e,0xbed17407,0xf48c0012,0x04e3c2c3,0x3455449a,0x9ee29da7,0x91a836c4,0x562cdef4 +.long 0x47701097,0x8f682a5f,0xff88d0c2,0x617125d8,0x57bb86dd,0x948fda24,0x289f7286,0x348abb8f,0x99d94bbd,0xeb10eab5,0x4684d160,0xd51ba28e,0x30c8f41a,0xabe0e51c,0x13254f4a,0x66588b45 +.long 0xfad097a5,0x147ebf01,0x610e815d,0x49883ea8,0x8a11de56,0xe44d60ba,0x827a7a6d,0xa970de6e,0x5e17fc19,0x2be41424,0x01214057,0xd833c657,0x363e723f,0x1375813b,0xe6a52e9b,0x6820bb88 +.long 0xd875d56a,0x7e7f6970,0x51fbf6bf,0xd6a0a9ac,0xa3083c12,0x54ba8790,0x6ae7eb64,0xebaeb23d,0xb99a907a,0xa8685c3a,0x026bf40b,0xf1e74550,0xc802cd9e,0x7b73a027,0x4fef4635,0x9a8a927c +.long 0x08191224,0xe1b6f60c,0xde4ec091,0xc4126ebb,0x4ae38d84,0xe1dff4dc,0x4f2ef985,0xde3f57db,0xd446a1dd,0x34964337,0x859e77f6,0x7bf217a0,0x8e1d13f5,0x8ff10527,0x74eeae27,0xa304ef03 +.long 0xd19dfa5a,0xfc6f5e47,0x7fad982b,0xdb007de3,0x613715f5,0x28205ad1,0x7889529e,0x251e6729,0x1ae98e78,0x72705184,0x271cac32,0xf818537d,0xb7f410f5,0xc8a15b7e,0x81f62393,0xc474356f +.long 0xc242316b,0x92dbdc5a,0xdbf4aff5,0xabe060ac,0x909a8ec6,0x6e8c38fe,0x6116cb94,0x43e514e5,0x07d784f9,0x2078fa38,0xf4b5b357,0x1161a880,0x13adea3d,0x5283ce79,0xcc6a910b,0x0756c3e6 +.long 0xaaa79697,0x60bcfe01,0x56391db1,0x04a73b29,0x189b45a0,0xdd8dad47,0x48d5b8d9,0xbfac0dd0,0x7d3d2ec2,0x34ab3af5,0x207bd3af,0x6fa2fc2d,0x66550ded,0x9ff40092,0x1fd5b913,0x719b3e87 +.long 0x6d17fbc7,0xa573a496,0x73d2b24e,0x0cd1a70a,0xb2676937,0x34e2c5ca,0xbf669f21,0xe7050b06,0x1ede9046,0xfbe948b6,0x97662659,0xa0530051,0xf10124c5,0x58cbd4ed,0xdd6c06c8,0xde2646e4 +.long 0x8cad38c0,0x332f8108,0x6bd68ae2,0x471b7e90,0x0d8e27a3,0x56ac3fb2,0x136b4b0d,0xb54660db,0xa6fd8de4,0x123a1e11,0xa37799ef,0x44dbffea,0xce6ac17c,0x4540b977,0xaf60acef,0x495173a8 +.long 0x391c2a82,0x9ebb284d,0x158308e8,0xbcdd4863,0x83f1edca,0x006f16ec,0x695dc6c8,0xa13e2c37,0x4a057a87,0x2ab756f0,0xa6b48f98,0xa8765500,0x68651c44,0x4252face,0xe1765e02,0xa52b540b +.long 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a,0xfd1b667f,0x2f5e6961,0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37,0x8d6f0f7b,0xf648f916 +.long 0xe60b7cf7,0x6dc1acaf,0x84a9d869,0x25860a50,0xe7ba8ac4,0x56fc6f09,0x6148d29e,0x828c5bd0,0xdc55ae5f,0xac6b435e,0xc0117411,0xa527f56c,0xfd24342c,0x94d5045e,0x70b67c0d,0x2c4c0a35 +.long 0xfac61d9a,0x027cc8b8,0xe3c6fe8a,0x7d25e062,0xe5bff503,0xe08805bf,0x6ff632f7,0x13271e6c,0x232f76a5,0x55dca6c0,0x701ef426,0x8957c32d,0xa10a5178,0xee728bcb,0xb62c5173,0x5ea60411 +.long 0xd0b8892b,0xfc4e964e,0x9301bb74,0x9ea17683,0xfcc48626,0x6265c5ae,0xbb3e9102,0xe60cf82e,0xd4df5531,0x57adf797,0x8deeefe2,0x235b59a1,0x3f306eb1,0x60adcf58,0x3d09492d,0x105c2753 +.long 0xb5def996,0x4090914b,0x233dd1e7,0x1cb69c83,0x9b3d5e76,0xc1e9c1d3,0xfccf6012,0x1f3338ed,0x2f5378a8,0xb1e95d0d,0x2f00cd21,0xacf4c2c7,0xeb5fe290,0x6e984240,0x248088ae,0xd66c038d +.long 0xf94d70cf,0x804d264a,0x7314bf7e,0xbdb802ef,0x4333ed02,0x8fb54de2,0x285635d9,0x740461e0,0x365e9383,0x4113b2c8,0x3fdef652,0xea762c83,0x47b956c1,0x4eec6e2e,0x65620fa4,0xa3d814be +.long 0xb4d8bc50,0x9ad5462b,0xa9195770,0x181c0b16,0x78412a68,0xebd4fe1c,0xc0dff48c,0xae0341bc,0x7003e866,0xb6bc45cf,0x8a24a41b,0xf11a6dea,0xd04c24c2,0x5407151a,0xda5b7b68,0x62c9d27d +.long 0x88cceff6,0x2e964235,0x8b07ed69,0x8594c54f,0xc84d0d0d,0x1578e73c,0xff532868,0x7b4e1055,0xb5ec995a,0xa348c0d5,0x14289a54,0xbf4b9d55,0x58fbd777,0x9ba155a6,0x1a84491d,0x186ed7a8 +.long 0x614c0900,0xd4992b30,0xbd00c24b,0xda98d121,0x7ec4bfa1,0x7f534dc8,0x37dc34bc,0x4a5ff674,0x1d7ea1d7,0x68c196b8,0x80a6d208,0x38cf2893,0xe3cbbd6e,0xfd56cd09,0x4205a5b6,0xec72e27e +.long 0xa44f77f7,0x15ea68f5,0xb43c52bc,0x7aa5f9fd,0x94f0e609,0x86ff676f,0x2e2d432b,0xa4cde963,0xeee470af,0x8cafa0c0,0x8a3f5ec8,0x84137d0e,0xfaa31231,0xebb40411,0x6f7f7ccf,0xa239c13f +.long 0xa8afd30b,0x32865719,0x8a826dce,0x86798328,0xc4a8fbe0,0xdf04e891,0xebf56ad3,0xbb6b6e1b,0x471f1ff0,0x0a695b11,0xbe15baf0,0xd76c3389,0xbe96c43e,0x018edb95,0x90794158,0xf2beaaf4 +.long 0xc3076a27,0x152db09e,0xe416545d,0x5e82908e,0x356d6f2e,0xa2c41272,0x31fd74e1,0xdc9c9642,0x519bf615,0x66ceb88d,0x05a2274e,0xe29ecd76,0xbf5e2fa0,0x3a0473c4,0x64284e67,0x6b6eb671 +.long 0xb88756dd,0xe8b97932,0xf17e3e61,0xed4e8652,0x3ee1c4a4,0xc2dd1499,0x597f8c0e,0xc0aaee17,0x6c168af3,0x15c4edb9,0xb39ae875,0x6563c7bf,0x20adb436,0xadfadb6f,0x9a042ac0,0xad55e8c9 +.long 0xb76da1f5,0x975a1ed8,0xa58acb94,0x10dfa466,0xac060282,0x8dd7f7e3,0x572a051e,0x6813e66a,0x350cb901,0xb4ccae1e,0x50cb7822,0xb653d656,0xdfab3b87,0x42484710,0x9b670fd0,0xcd7ee537 +.long 0x523b8bf6,0x0a50b12e,0x8f910c1b,0x8009eb5b,0x4a167588,0xf535af82,0xfb2a2abd,0x0f835f9c,0x2afceb62,0xf59b2931,0x169d383f,0xc797df2a,0x66ac02b0,0xeb3f5fb0,0xdaa2d0ca,0x029d4c6f +.long 0xafab4bc5,0xd4059bc1,0x56783247,0x833f5c6f,0x8d2d3605,0xb5346630,0xd34d8433,0x83387891,0xadd9419a,0xd973b30f,0xafe3fce8,0xbcca1099,0x0809aac6,0x08178315,0x540f0f11,0x01b7f21a +.long 0x909523c8,0x65c29219,0xa3a1c741,0xa62f648f,0x60c9e55a,0x88598d4f,0x0e4f347a,0xbce9141b,0x35f9b988,0x9af97d84,0x320475b6,0x0210da62,0x9191476c,0x3c076e22,0x44fc7834,0x7520dbd9 +.long 0xc1ab1bbd,0x6a6b2cfe,0xdc650938,0xef8a65be,0x805d7bc4,0x72855540,0xed11fdfd,0xda389396,0x74660876,0xa9d5bd36,0xb45dff35,0x11d67c54,0xa4f5da94,0x6af7d148,0xc0bbeb31,0xbb8d4c3f +.long 0xe0a1b12a,0x87a7ebd1,0x770ba95f,0x1e4ef88d,0xdc2ae9cb,0x8c33345c,0x01cc8403,0xcecf1276,0x1b39b80f,0x687c012e,0x35c33ba4,0xfd90d0ad,0x5c9661c2,0xa3ef5a67,0xe017429e,0x368fc88e +.long 0x196a2fa2,0xd30c6761,0xbd5b312e,0x931b9817,0x72f54a31,0xba01000c,0x66eaa541,0xa203d2c8,0x98939db3,0xf2abdee0,0x3e606c02,0xe37d6c2c,0x521ff643,0xf2921574,0xd7e2fca3,0x2781b3c4 +.long 0x7850ec06,0x664300b0,0x7d3a10cf,0xac5a38b9,0xe34ab39d,0x9233188d,0x5072cbb9,0xe77057e4,0xb59e78df,0xbcf0c042,0x1d97de52,0x4cfc91e8,0x3ee0ca4a,0x4661a26c,0xfb8507bc,0x5620a4c1 +.long 0x049f842c,0x4b44d4aa,0x1540e82b,0xceabc5d5,0x15c6f156,0x306710fd,0x63db1d72,0xbe5ae52b,0x334957f1,0x06f1e7e6,0x31144a70,0x57e388f0,0xdf96447b,0xfb69bb2f,0x73e38a12,0x0f78ebd3 +.long 0x2b7ce542,0xb8222605,0x7472bde1,0xe6d4ce99,0x09d2f4da,0x53e16ebe,0x53b92b2e,0x180ff42e,0x2c34a1c6,0xc59bcc02,0x422c46c2,0x3803d6f9,0x5c14a8a2,0x18aff74f,0x10a08b28,0x55aebf80 +.long 0x7135593f,0x66097d58,0x2be570cd,0x32e6eff7,0x2a8c860d,0x584e6a10,0xa2eb4163,0xcd185890,0x6d97e134,0x7ceae99d,0xdd8447ce,0xd42c6b70,0xb8c50273,0x59ddbb4a,0x3cf34e1e,0x03c612df +.long 0x04b6c5a0,0x84b9ca15,0x18f0e3a3,0x35216f39,0xbd986c00,0x3ec2d2bc,0xd19228fe,0x8bf546d9,0x4cd623c3,0xd1c655a4,0x502b8e5a,0x366ce718,0xeea0bfe7,0x2cfc84b4,0xcf443e8e,0xe01d5cee +.long 0x036520f8,0x8ec045d9,0x92d40e98,0xdfb3c3d1,0xcc559a04,0x0bac4cce,0x240ea6b1,0x35eccae5,0xf8a5a0ac,0x180b32db,0xeb699700,0x547972a5,0xca26bca0,0xa3765801,0xa647f25a,0x57e09d0e +.long 0x2fdd23cc,0xb956970e,0x5682e971,0xb80288bc,0x9ae86ebc,0xe6e6d91e,0x8c9f1939,0x0564c83f,0x39560368,0x551932a2,0x049c28e2,0xe893752b,0xa6a158c3,0x0b03cee5,0x04964263,0xe12d656b +.long 0x63e3bc1d,0x4b47554e,0x45044ff7,0xc719b6a2,0xe48daa07,0x4f24d30a,0xc8c1edc3,0xa3f37556,0x0700d360,0x9a47bf76,0x822ae4e2,0xbb1a1824,0x89f1fb4c,0x22e275a3,0x9968c5f5,0x72b1aa23 +.long 0xbe063f64,0xa75feaca,0xbce47a09,0x9b392f43,0x1ad07aca,0xd4241509,0x8d26cd0f,0x4b0c591b,0x92f1169a,0x2d42ddfd,0x4cbf2392,0x63aeb1ac,0x0691a2af,0x1de9e877,0xd98021da,0xebe79af7 +.long 0x40e50acf,0xcfdf2a4e,0xaf01d665,0xf0a98ad7,0x1831be1f,0xefb640bf,0x80e9ada0,0x6fe8bd2f,0x6cafbc91,0x94c103a1,0x8308e08c,0x170f8759,0x9780ff4f,0x5de2d2ab,0x45b201f2,0x666466bc +.long 0xf5b343bc,0x58af2010,0xf2f142fe,0x0f2e400a,0xa85f4bdf,0x3483bfde,0x03bfeaa9,0xf0b1d093,0xc7081603,0x2ea01b95,0x3dba1097,0xe943e4c9,0xb438f3a6,0x47be92ad,0xe5bf6636,0x00bb7742 +.long 0x824297b4,0x136b7083,0x5584455f,0x9d0e5580,0xf1c7d69e,0xab48cedc,0x2a256e76,0x53a9e481,0x65eb2413,0x0402b0e0,0x8fc407a7,0xdadbbb84,0x8d7f5492,0xa65cd5a4,0x74bae294,0x21d44293 +.long 0x3b5f1cc4,0x66917ce6,0xce872e62,0x37ae52ea,0x2905f244,0xbb087b72,0x1e6af74f,0x12077086,0x1058edea,0x4b644e49,0xb638ca1d,0x827510e3,0x6038591c,0x8cf2b704,0xfe635063,0xffc8b47a +.long 0x1b4d5e63,0x3ae220e6,0x9d961b4b,0xbd864742,0x9bd16bed,0x610c107e,0x1127147b,0x4270352a,0x64cfc50e,0x7d17ffe6,0x1e36cb42,0x50dee01a,0x35dc5f9a,0x068a7622,0xdf53f62c,0x9a08d536 +.long 0x6be5f7de,0x4ed71457,0xc2263c9e,0xd93006f8,0xcacacb36,0xe073694c,0x3ae118ab,0x2ff7a5b4,0xcd871236,0x3cce53f1,0xc2aa6d52,0xf156a39d,0xb198d76d,0x9cc5f271,0x81383d39,0xbc615b6f +.long 0xde3eee6b,0xa54538e8,0xab910d91,0x58c77538,0x58d278bd,0x31e5bdbc,0xb963acae,0x3cde4adf,0x5302169c,0xb1881fd2,0xa989ed8b,0x8ca60fa0,0xff96a0ee,0xa1999458,0xac6c283d,0xc1141f03 +.long 0x6dfafed3,0x7677408d,0x39661588,0x33a01653,0x0b726fa0,0x3c9c15ec,0x6c9b56da,0x090cfd93,0xa3c40af5,0xe34f4bae,0xd21129f1,0x3469eadb,0x1e207ce8,0xcc51674a,0xc83b1ef9,0x1e293b24 +.long 0x1e6c0bb4,0x17173d13,0x90776d35,0x19004695,0x6de6f922,0xe7980e34,0xf4dd9a22,0x873554cb,0xcbf18a51,0x0316c627,0x3032c081,0x4d93651b,0x3946834d,0x207f2771,0x30cdbf80,0x2c08d7b4 +.long 0x86df2a61,0x137a4fb4,0xecf7b4a2,0xa1ed9c07,0x7bd042ff,0xb2e460e2,0x5f62f5ec,0xb7f5e2fa,0xcc2423b7,0x7aa6ec6b,0xba63eea7,0x75ce0a7f,0xf250a6e1,0x67a45fb1,0xe53cdc9f,0x93bc919c +.long 0x871942df,0x9271f56f,0x7859ad66,0x2372ff6f,0x33cb1a78,0x5f4c2b96,0x5838aa83,0xe3e29101,0xe4e8110c,0xa7ed1611,0x330198ce,0x2a2d70d5,0x6720efe0,0xbdf132e8,0x66a471bf,0xe61a8962 +.long 0x825808bd,0x796d3a85,0x3fd6e902,0x51dc3cb7,0x916219d1,0x643c768a,0xa2ad7d32,0x36cd7685,0xb22922a4,0xe3db9d05,0xdba29660,0x6494c87e,0xbcd2ebc7,0xf0ac91df,0x45107f8d,0x4deb57a0 +.long 0xc3d12a73,0x42271f59,0xa5c2c51d,0x5f71687c,0x05797bcb,0xcb1f50c6,0xd6d34eb0,0x29ed0ed9,0x4683c2eb,0xe5fe5b47,0x97447c46,0x4956eeb5,0x71207167,0x5b163a43,0x0248c5ef,0x93fa2fed +.long 0x31f63950,0x67930af2,0x14caa2c9,0xa77797c1,0x27ac7e62,0x526e80ee,0x58b28aec,0xe1e6e626,0xb3c9fef0,0x636178b0,0x6d5f90be,0xaf7752e0,0xeece51cf,0x94ecaf18,0xca806e1f,0x2864d0ed +.long 0x97c69134,0x6de2e383,0xeb291293,0x5a42c316,0x6a60bae0,0xc7779219,0x6b7599d1,0xa24de346,0xb75d4941,0x49d374aa,0x2d501ff0,0x98900586,0xeb7974cf,0x9f16d40e,0xcdd8c115,0x1033860b +.long 0x2094cec3,0xb6c69ac8,0x403b770c,0x9976fb88,0x4859590d,0x1dea026c,0x8562d1fd,0xb6acbb46,0x44569d85,0x7cd6c461,0x97f0891d,0xc3190a36,0x48d5a17d,0xc6f53195,0xd749abc8,0x7d919966 +.long 0xdd1c8a20,0x65104837,0x2f683419,0x7e5410c8,0xbe94022e,0x958c3ca8,0x6145dac2,0x605c3197,0x01683d54,0x3fc07501,0x595b1234,0x1d7127c5,0x9481277f,0x10b8f87c,0xe65a1adb,0x677db2a8 +.long 0xddce3345,0xec2fccaa,0x012a4350,0x2a6811b7,0xac598bdc,0x96760ff1,0xd1bf4128,0x054d652a,0x92a21005,0x0a1151d4,0x33110fdf,0xad7f3971,0x1960100f,0x8c95928c,0x7bf03362,0x6c91c825 +.long 0xce309f06,0xc8c8b2a2,0xca27204b,0xfdb27b59,0x0848e32e,0xd223eaa5,0xe7bfaf1e,0xb93e4b2e,0x44aa3ded,0xc5308ae6,0xc015d573,0x317a666a,0x1a979707,0xc888ce23,0x0d5c4958,0xf141c1e6 +.long 0x61906373,0xb53b7de5,0xeb999595,0x858dbade,0xa59e5c36,0x8cbb47b2,0xdcf4e842,0x660318b3,0x12ba4b7a,0xbd161ccd,0xf8c8282a,0xf399daab,0xeeb2130d,0x1587633a,0xda38dd7d,0xa465311a +.long 0x64d3779b,0x5f75eec8,0xad64c171,0x3c5d0476,0x2a914428,0x87410371,0x90e2fc29,0x8096a891,0x23b3ebc2,0xd3d2ae9d,0xa580cfd6,0x90bdd6db,0xc5b01f6c,0x52dbb7f3,0xe102a2dc,0xe68eded4 +.long 0x99eb6df0,0x17785b77,0x7386b779,0x26c3cc51,0x6417a48e,0x345ed988,0x07d6ef31,0xe990b4e4,0x2586abba,0x0f456b7e,0x59c96e9a,0x239ca6a5,0xe2eb4206,0xe327459c,0xa002b90a,0x3a4c3313 +.long 0xf6a3f6fb,0x2a114806,0x85c251dd,0xad5cad2f,0xf5a784d3,0x92c1f613,0x349766d5,0xec7bfacf,0x3e23cb3b,0x04b3cd33,0xc5a64b2d,0x3979fe84,0x7e589106,0x192e2720,0xa15b527f,0xa60c43d1 +.long 0xbe7cf3a6,0x2dae9082,0xbc967274,0xcc86ba92,0xaea0a8a9,0xf28a2ce8,0x6ee988b3,0x404ca6d9,0x005921b8,0xfd7e9c5d,0x44e79bf9,0xf56297f1,0x0d75ddc2,0xa163b460,0xa1f2be87,0x30b23616 +.long 0xbfe50e2b,0x4b070d21,0xe1bfede1,0x7ef8cfd0,0x2aac4ae0,0xadba0011,0xb9ebd033,0x2a3e7d01,0xe38d9d1c,0x995277ec,0x9c5d2de3,0xb500249e,0xf13ca8c9,0x8912b820,0x877793af,0xc8798114 +.long 0xec3f1dec,0x19e6125d,0x911178da,0x07b1f040,0x904a6738,0xd93ededa,0x0bebedcd,0x55187a5a,0xeb329d41,0xf7d04722,0xf170b391,0xf449099e,0xca99f828,0xfd317a69,0x34a4976d,0x50c3db2b +.long 0x3757b392,0xe9ba7784,0xaa3ca05a,0x326caefd,0xf1e593d4,0x78e5293b,0x0d98fd13,0x7842a937,0x5f96b10d,0xe694bf96,0x06a8cd05,0x373a9df6,0xe8f0c7fc,0x997d1e51,0x63fd972e,0x1d019790 +.long 0x5499fb32,0x0064d858,0x77a8aeb7,0x7b67bad9,0x2d08eec5,0x1d3eb977,0xcbabae1d,0x5fc047a6,0xe54a64bb,0x0577d159,0xc43497e4,0x8862201b,0x2ce0608d,0xad6b4e28,0x0b167aac,0x8b687b7d +.long 0x8b2ecfa9,0x6ed4d367,0xa90c3c38,0x24dfe62d,0x3fe5c42b,0xa1862e10,0xd5732a9f,0x1ca73dca,0x76bb87ad,0x35f038b7,0xf242b81f,0x674976ab,0xb0fd90cd,0x4f2bde7e,0xa7fdf092,0x6efc172e +.long 0x92222f1f,0x3806b69b,0x6cf7ae70,0x5a2459ca,0xa85217ee,0x6789f69c,0xe3dc85ac,0x5f232b5e,0x48e9e516,0x660e3ec5,0x3197eb31,0x124b4e47,0xaafcca23,0x10a0cb13,0x8213224f,0x7bd63ba4 +.long 0x290a7f4f,0xaffad7cc,0x0286b461,0x6b409c9e,0xffa407af,0x58ab809f,0xc68ac073,0xc3122eed,0x4ef24d7e,0x17bf9e50,0x3e2a5811,0x5d929794,0x02902e01,0x519bc867,0x39c8a851,0x76bba5da +.long 0xda94951e,0xe9f9669c,0x66b8d418,0x4b6af58d,0x17d426a4,0xfa321074,0x9dde6027,0xc78e66a9,0x4a53b964,0x0516c083,0xff602330,0xfc659d38,0x58c5c897,0x0ab55e5c,0x838bc5df,0x985099b2 +.long 0xc52fc238,0x061d9efc,0x6ac1da3f,0x712b2728,0x9283fe08,0xfb658149,0xb8aaa2f7,0x4954ac94,0x7fb2e74f,0x85c0ada4,0xb89926b0,0xee8ba98e,0x23d1af5b,0xe4f9d37d,0xba9b015e,0x14ccdbf9 +.long 0x7bfe7178,0xb674481b,0x65405868,0x4e1debae,0xc48c867d,0x061b2821,0x513b30ea,0x69c15b35,0x36871088,0x3b4a1666,0x1220b1ff,0xe5e29f5d,0x233d9f4d,0x4b82bb35,0x18cdc675,0x4e076333 +.long 0xa3e6fced,0x0d53f5c7,0xf45fbdeb,0xe8cbbdd5,0x13339a70,0xf85c01df,0x142ceb81,0x0ff71880,0xbd70437a,0x4c4e8774,0xba0bda6a,0x5fb32891,0xf18bd26e,0x1cdbebd2,0x03a9d522,0x2f9526f1 +.long 0x92c4d684,0x40ce3051,0x7612efcd,0x8b04d725,0x6f9cae20,0xb9dcda36,0xf058856c,0x0edc4d24,0x85427900,0x64f2e6bf,0xdc09dfea,0x3de81295,0x379bf26c,0xd41b4487,0x6df135a9,0x50b62c6d +.long 0xc72dfe67,0xd4f8e3b4,0x90e19fdf,0xc416b0f6,0x4c13bd35,0x18b9098d,0x15b8cb9e,0xac11118a,0xf0062841,0xf598a318,0x89f356f4,0xbfe0602f,0x30177a0c,0x7ae3637e,0x61136537,0x34097747 +.long 0xd005832a,0x0db2fb5e,0x91042e4f,0x5f5efd3b,0xed70f8ca,0x8c4ffdc6,0xb52da9cc,0xe4645d0b,0xc9001d1f,0x9596f58b,0x4e117205,0x52c8f0bc,0xe398a084,0xfd4aa0d2,0x104f49de,0x815bfe3a +.long 0x23885e5f,0x97e5443f,0xe8433aab,0xf72f8f99,0xe4d4e604,0xbd00b154,0xe5e173ff,0xd0b35e6a,0x9164722d,0x57b2a048,0x88761ec8,0x3e3c665b,0x3da83832,0x6bdd1397,0x73dafe3b,0x3c8b1a1e +.long 0x54317cac,0x4497ace6,0x521771b3,0xbe600ab9,0xb0dfe8b8,0xb42e409e,0x3942310f,0x386a67d7,0x4431cc28,0x25548d8d,0x985dc524,0xa7cff142,0x93c4be32,0x4d60f5a1,0xd071c6e1,0x83ebd5c8 +.long 0xb1fd2b0b,0xba3a80a7,0x5bec33e8,0x9b3ad396,0x79743fb3,0xb3868d61,0xfdb462fa,0xcfd169fc,0x9ce0a6af,0xd3b499d7,0xe42d3ff8,0x55dc1cf1,0xc6c3e1b2,0x04fb9e6c,0x6f69a474,0x47e6961d +.long 0xe548b37b,0x54eb3acc,0x84d40549,0xb38e7542,0x7b341b4f,0x8c3daa51,0x690bf7fa,0x2f6928ec,0x86ce6c41,0x0496b323,0x10adadcd,0x01be1c55,0x4bb5faf9,0xc04e67e7,0xe15c9985,0x3cbaf678 +.long 0x50ca4247,0x8cd12145,0xe7dd30aa,0xba1aa47a,0xe58fee24,0x2f81ddf1,0xeec9b0e8,0x03452936,0x243aea96,0x8bdc3b81,0x15c3d0e5,0x9a2919af,0x10948361,0x9ea640ec,0x6e0bcccf,0x5ac86d5b +.long 0xc36cf440,0xf892d918,0xc939719c,0xaed3e837,0xc0218b64,0xb07b08d2,0xce9790dd,0x6f1bcbba,0x60919b8e,0x4a84d6ed,0x8ac1f9eb,0xd8900791,0x0dd5daef,0xf84941aa,0x67fd62c5,0xb22fe40a +.long 0x157f2db3,0x97e15ba2,0x8e28ca9c,0xbda2fc8f,0x37b9f454,0x5d050da4,0x2379d72e,0x3d57eb57,0xfb5ee997,0xe9b5eba2,0xe11538ca,0x01648ca2,0xf6327974,0x32bb76f6,0xff3f4bb7,0x338f14b8 +.long 0xd7ab9a2d,0x524d226a,0x7dfae958,0x9c00090d,0x8751d8c2,0x0ba5f539,0x3ab8262d,0x8afcbcdd,0xe99d043b,0x57392729,0xaebc943a,0xef51263b,0x20862935,0x9feace93,0xb06c817b,0x639efc03 +.long 0x66b4be7a,0x1fe054b3,0x84a37a1e,0x3f25a9de,0x78d75cd9,0xf39ef1ad,0x5062c1b5,0xd7b58f49,0xff563436,0x6f74f9a9,0xe8af51e7,0xf718ff29,0x15e97fec,0x5234d313,0x292f1c0a,0xb6a8e2b1 +.long 0x327720c1,0xa7f53aa8,0xba092cc8,0x956ca322,0x28746c4d,0x8f03d64a,0x66d0d392,0x51fe1782,0x3c832c80,0xd19b34db,0x6da2e3b4,0x60dccc5c,0x0a104ccc,0x245dd62e,0x620b21fd,0xa7ab1de1 +.long 0x3893d123,0xb293ae0b,0xb15ee71c,0xf7b75783,0x42a9468b,0x5aa3c614,0xdb15d744,0xd686123c,0xa7ab4116,0x8c616891,0xa4e6a459,0x6fcd72c8,0x77e5fad7,0xac219110,0x704fa46b,0xfb6a20e7 +.long 0x341d81dc,0xe839be7d,0x32148379,0xcddb6889,0xf7026ead,0xda6211a1,0xf4d1cc5e,0xf3b2575f,0xa7a73ae6,0x40cfc8f6,0x61d5b483,0x83879a5e,0x41a50ebc,0xc5acb1ed,0x3c07d8fa,0x59a60cc8 +.long 0xb1876262,0x1b73bdce,0x12af4ee9,0x2b0d79f0,0xd46e1d07,0x8bcf3b0b,0xe45d152f,0x17d6af9d,0x6d736451,0x73520461,0x56b0bf5a,0x43cbbd97,0xd5999b9d,0xb0833a5b,0xeb72e398,0x702614f0 +.long 0x59c3e9f8,0x0aadf01a,0xce6b3d16,0x40200e77,0xdeddafad,0xda22bdd3,0x310d72e1,0x76dedaf4,0x4bc2e88f,0x49ef807c,0x146dd5a5,0x6ba81291,0x7d8d59e9,0xa1a4077a,0x802db349,0x87b6a2e7 +.long 0x1b4e598e,0xd5679997,0x06fe4b1d,0xf499ef1f,0xfcb267c5,0x3978d3ae,0x235786d0,0xb582b557,0x1715cb07,0x32b3b2ca,0x8480241d,0x4c3de6a2,0xcb571ecd,0x63b5ffed,0xed2fe9a9,0xeaf53900 +.long 0xc3b81990,0xdec98d4a,0x9e0cc8fe,0x1cb83722,0xd2b427b9,0xfe0b0491,0xe983a66c,0x0f2386ac,0xb3291213,0x930c4d1e,0x59a62ae4,0xa2f82b2e,0xf93e89e3,0x77233853,0x11777c7f,0x7f8063ac +.long 0x59ad2877,0xff0eb567,0x9865c754,0x6f454642,0x236e9a84,0xe6fe701a,0x06e40fc3,0xc586ef16,0x24bafad9,0x3f62b6e0,0x64da906a,0xc8b42bd2,0xda3276a0,0xc98e1eb4,0x06cbf852,0x30d0e5fc +.long 0xe8b4dfd4,0x1b6b2ae1,0x8301cbac,0xd754d5c7,0x112a39ac,0x66097629,0x93ba4ab9,0xf86b5999,0x99f9d581,0x26c9dea7,0xc2fafeaa,0x0473b1a8,0x3b2505a5,0x1469af55,0xd6a43323,0x227d16d7 +.long 0xad3d97f9,0x3316f73c,0x1f137455,0x52bf3bb5,0x09954e7c,0x953eafeb,0xdd732411,0xa721dfed,0x141d4579,0xb4929821,0xaa3bd435,0x3411321c,0x17fa6015,0xafb355aa,0x18e42f0e,0xb4e7ef4a +.long 0x59371000,0x604ac97c,0x7f759c18,0xe1c48c70,0xa5db6b65,0x3f62ecc5,0x38a21495,0x0a78b173,0xbcc8ad94,0x6be1819d,0xd89c3400,0x70dc04f6,0xa6b4840a,0x462557b4,0x60bd21c0,0x544c6ade +.long 0x907a544b,0x6a00f24e,0x313da210,0xa7520dcb,0x11e4994b,0xfe939b75,0xbc275d70,0x918b6ba6,0x644be892,0xd3e5e0fc,0xfdaf6c42,0x707a9816,0xf15c13fe,0x60145567,0xe130a54a,0x4818ebaa +.long 0x58d2f767,0x28aad3ad,0xd7e7c773,0xdc5267fd,0xc3afcc98,0x4919cc88,0x2db8cd4b,0xaa2e6ab0,0xd0c63eaa,0xd46fec04,0x19ffa832,0xa1cb92c5,0xe43a631f,0x678dd178,0x3dc788b3,0xfb5ae1cd +.long 0x6e77de04,0x68b4fb90,0xf06dbb97,0x7992bcf0,0xc417c01d,0x896e6a13,0xb956be01,0x8d96332c,0x413aa2b9,0x902fc93a,0xfc98c8a5,0x99a4d915,0x565f1137,0x52c29407,0x21e4f281,0x4072690f +.long 0x02ff6072,0x36e607cf,0x8ad98cdc,0xa47d2ca9,0xf5f56609,0xbf471d1e,0xf264ada0,0xbcf86623,0xaa9e5cb6,0xb70c0687,0x17401c6c,0xc98124f2,0xd4a61435,0x8189635f,0xa9d98ea6,0xd28fb8af +.long 0x40c251f8,0xb9a67c2a,0xa2da44be,0x88cd5d87,0xe09b5423,0x437deb96,0x64287dc1,0x150467db,0xcdabb839,0xe161debb,0xf1839a3e,0xa79e9742,0x652d202b,0xbb8dd3c2,0xe9f97d96,0x7b3e67f7 +.long 0xb1cb6ac9,0x5aa5d78f,0xca1d0d45,0xffa13e8e,0x2ba5bf95,0x369295dd,0x39aff05e,0xd68bd1f8,0x26d783f2,0xaf0d86f9,0xfc3aafc1,0x543a59b3,0x7b7da97c,0x3fcf81d2,0xd25dee46,0xc990a056 +.long 0x519cce2c,0x3e6775b8,0xae13d863,0xfc9af71f,0x47c1605c,0x774a4a6f,0x2fd205e8,0x46ba4245,0xd3fd524d,0xa06feea4,0x6de1acc2,0x1e724641,0x334e2b42,0xf53816f1,0x922f0024,0x49e5918e +.long 0x65c7322d,0x439530b6,0xb3c1b3fb,0xcf12cc01,0x0172f685,0xc70b0186,0x1b58391d,0xb915ee22,0xa317db24,0x9afdf03b,0x17b8ffc4,0x87dec659,0xe4d3d050,0x7f46597b,0x006500e7,0x80a1c1ed +.long 0x78bf030e,0x84902a96,0x50560148,0xfb5e9c9a,0x63362426,0x6dae0a92,0xa9e30c40,0xdcaeecf4,0x518d0c6b,0xc0d887bb,0xcb985b9d,0x99181152,0xef7bc381,0xad186898,0x9ee46201,0x18168ffb +.long 0x2502753c,0x9a04cdaa,0x51407c41,0xbb279e26,0xf23564e5,0xeacb03aa,0x71e61016,0x18336582,0xeb809877,0x8684b8c4,0xea0e672e,0xb336e18d,0x34ee5867,0xefb601f0,0x1341cfd1,0x2733edbe +.long 0x26025c3c,0xb15e809a,0x9350df88,0xe6e981a6,0x8502fd8e,0x92376237,0x0c12be9b,0x4791f216,0x25f02425,0xb7256789,0x7a974443,0xec863194,0xfb41cc52,0x7c0ce882,0xf25c07f2,0xc266ff7e +.long 0x017025f3,0x3d4da8c3,0xfb9579b4,0xefcf628c,0x1f3716ec,0x5c4d0016,0x6801116e,0x9c27ebc4,0x1da1767e,0x5eba0ea1,0x47004c57,0xfe151452,0x8c2373b7,0x3ace6df6,0x5dbc37ac,0x75c3dffe +.long 0xddc925fc,0x3dc32a73,0x2f65ee0b,0xb679c841,0x451cbfeb,0x715a3295,0xf76e9a29,0xd9889768,0xb28ad247,0xec20ce7f,0x00894d79,0xe99146c4,0x9f5e3ea7,0x71457d7c,0x38030031,0x097b2662 +.long 0xcf9f82a8,0xdb7f6ae6,0x438f473a,0x319decb9,0x283856c3,0xa63ab386,0xb06a361b,0x13e3172f,0x7d5a006c,0x2959f8dc,0x75fba752,0x2dbc27c6,0x87c22c9e,0xc1227ab2,0x71a268b2,0x06f61f75 +.long 0x04779ce2,0x1b6bb971,0x0aadcb1d,0xaca83812,0xaeaab2d5,0x297ae0bc,0x5bfb9f13,0xa5c14ee7,0xf17a62c7,0xaa00c583,0x173759f6,0x39eb962c,0x86c9a88f,0x1eeba1d4,0xdf016c5e,0x0ab6c37a +.long 0xa28a0749,0xa2a147db,0xee519165,0x246c20d6,0xd3810715,0x5068d1b1,0x748160b9,0xb1e7018c,0xf380ff62,0x03f5b1fa,0xf3cb2c1e,0xef7fb1dd,0xfc91a7da,0xeab539a8,0xf3f9b561,0x83ddb707 +.long 0xfe7df7a4,0xc550e211,0x063f6f40,0xa7cd07f2,0x2976879c,0xb0de3635,0xe55741da,0xb5f83f85,0xf3d8ac3d,0x4ea9d25e,0x62819f02,0x6fe2066f,0xcef4a564,0x4ab2b9c2,0x5ffa2de3,0x1e155d96 +.long 0xc3a72d00,0x0eb0a19b,0x8513c31b,0x4037665b,0x04c64637,0x2fb2b6bf,0x08cdc639,0x45c34d6e,0xf01fd796,0x56f1e10f,0xfe3667b8,0x4dfb8101,0x9021d0c0,0xe0eda253,0x8a06c6ab,0x7a94e9ff +.long 0xbb9aa882,0x2d3bb0d9,0xec05fd10,0xea20e4e5,0x1a1ca64e,0xed7eeb5f,0xc6327cbd,0x2fa6b43c,0x3aa91121,0xb577e3cf,0x3a34079b,0x8c6bd5ea,0x60e02fc0,0xd7e5ba39,0x90141bf8,0xf16dd2c3 +.long 0x80101b98,0xb57276d9,0xb82f0f66,0x760883fd,0x4bc3eff3,0x89d7de75,0x5dc2ab40,0x03b60643,0xe05beeac,0xcd6e53df,0xbc3325cd,0xf2f1e862,0x774f03c3,0xdd0f7921,0x4552cc1b,0x97ca7221 +.long 0x1cd19f72,0x5a0d6afe,0xf183fbeb,0xa20915dc,0x832c403c,0x9fda4b40,0xbe425442,0x32738edd,0xb5eccf1a,0x469a1df6,0x28bbe1f0,0x4b5aff42,0x570dfc93,0x31359d7f,0xf0088628,0xa18be235 +.long 0xb00ed3a9,0xa5b30fba,0x73cdf8be,0x34c61374,0xabc56797,0x2c5c5f46,0xb82a8ae2,0x5cecf93d,0xa968fbf0,0x7d3dbe41,0x1a5c7f3d,0xd23d4583,0xc087a9c7,0xf28f69a0,0x474471ca,0xc2d75471 +.long 0x4eb732ec,0x36ec9f4a,0xb1ca6bed,0x6c943bbd,0xf2457892,0xd64535e1,0xf7e2ac06,0x8b84a8ea,0x2499dd5f,0xe0936cd3,0x0ed04e57,0x12053d7e,0xe4305d9d,0x4bdd0076,0x1f67f0a2,0x34a527b9 +.long 0x9cec46ea,0xe79a4af0,0x658b9bc7,0xb15347a1,0x35af2f75,0x6bd2796f,0x4051c435,0xac957990,0xc33a655d,0x2669dda3,0x88514aa3,0x5d503c2e,0x3753dd41,0xdfa11337,0x0b754f78,0x3f054673 +.long 0x496125bd,0xbf185677,0x3775006c,0xfb0023c8,0x3a037899,0xfa0f072f,0x0e4aea57,0x4222b6eb,0x7866d25a,0x3dde5e76,0x4837aa6f,0xb6eb04f8,0x2cf1cdb8,0x5315591a,0x2d4e683c,0x6dfb4f41 +.long 0x48ee1f3a,0x7e923ea4,0x05a2afd5,0x9604d9f7,0x40ea4948,0xbe1d4a33,0xb44cbd2f,0x5b45f1f4,0x4acc757e,0x5faf8376,0x63d68ff7,0xa7cf9ab8,0xdf0e404b,0x8ad62f69,0x12bdafdf,0xd65f33c2 +.long 0xa377b14e,0xc365de15,0x8e39f60c,0x6bf5463b,0x2ce68148,0x62030d2d,0xe6f843a8,0xd95867ef,0xef5ab017,0xd39a0244,0x4ab55d12,0x0bd2d8c1,0x41639169,0xc9503db3,0xf7660c8a,0x2d4e25b0 +.long 0xe224c5d7,0x760cb3b5,0x68616919,0xfa3baf8c,0x8d142552,0x9fbca113,0x7669ebf5,0x1ab18bf1,0x9bdf25dd,0x55e6f53e,0xcb6cd154,0x04cc0bf3,0x95e89080,0x595bef49,0x104a9ac1,0xfe9459a8 +.long 0xcce9bb32,0xad2d89ca,0xf7de8285,0xddea65e1,0xb351bd4b,0x62ed8c35,0x0c0e19a7,0x4150ff36,0x345f4e47,0x86e3c801,0x203a266c,0x3bf21f71,0x855b1f13,0x7ae110d4,0x07262517,0x5d6aaf6a +.long 0x813d28f1,0x1e0f12e1,0x7ad7a523,0x6000e11d,0xc744a17b,0xc7d8deef,0x14c05a00,0x1e990b48,0x93e976d5,0x68fddaee,0x46610d63,0x696241d1,0x893dda88,0xb204e7c3,0x6a3a6946,0x8bccfa65 +.long 0xc5cd1411,0xb59425b4,0xff3658b1,0x701b4042,0x4784cf93,0xe3e56bca,0x8fe68d60,0x27de5f15,0xf8d53f19,0x4ab9cfce,0xa40a730d,0xddb10311,0x4eee0a8a,0x6fa73cd1,0x5249719d,0xfd548748 +.long 0xa8123ef0,0x49d66316,0xe7f95438,0x73c32db4,0x0d9e7854,0x2e2ed209,0x9d9f0507,0xf98a9329,0x0c6aa20a,0xc5d33cf6,0x75279bb2,0x9a32ba14,0x774a7307,0x7e3202cb,0xe8c42dbd,0x64ed4bc4 +.long 0xd4caed0d,0xc20f1a06,0x171d22b3,0xb8021407,0xd13268d7,0xd426ca04,0x25f4d126,0x92377007,0x71f21a85,0x4204cbc3,0xf82369ba,0x18461b7a,0x3fc858f9,0xc0c07d31,0xe2bab569,0x5deb5a50 +.long 0xd5eea89e,0xd5959d46,0x08437f4b,0xfdff8424,0x3cfe254f,0xf21071e4,0x95468321,0x72417696,0x102cae3e,0x5d8288b9,0xf1965dff,0x2d143e3d,0xa078d847,0x00c9a376,0x26028731,0x6fc0da31 +.long 0xe45083a2,0xa2baeadf,0x5e5b4bcd,0x66bc7218,0xd04b8e7f,0x2c826442,0x6c4b586b,0xc19f5451,0x5b7eeed5,0x60182c49,0x7aa9dfa1,0xd9954ecd,0xc73884ad,0xa403a8ec,0x9bb39041,0x7fb17de2 +.long 0xabb020e8,0x694b64c5,0x19c4eec7,0x3d18c184,0x1c4793e5,0x9c4673ef,0x056092e6,0xc7b8aeb5,0xf0f8c16b,0x3aa1ca43,0xd679b2f6,0x224ed5ec,0x55a205c9,0x0d56eeaf,0x4b8e028b,0xbfe115ba +.long 0x3927f4fe,0x97e60849,0x759aa7c5,0xf91fbf94,0x6be90a51,0x985af769,0x78ccb823,0xc1277b78,0xe7a75952,0x395b656e,0x928da5f5,0x00df7de0,0x4ca4454f,0x09c23175,0x7aa2d3c1,0x4ec971f4 +.long 0xe75d9ccc,0x45c3c507,0x3dc90306,0x63b7be8a,0x5db44bdc,0x37e09c66,0x6841c6a2,0x50d60da1,0x08df1b12,0x6f9b65ee,0x7ff089df,0x38734879,0x3fe8013d,0x9c331a66,0x5f42fcc8,0x017f5de9 +.long 0xe8e57567,0x43077866,0xf9fcdb18,0xc9f781ce,0x9b12e174,0x38131dda,0x8a03752a,0x25d84aa3,0x4d0c0ce2,0x45e09e09,0x92bebba5,0x1564008b,0xa87284c7,0xf7e8ad31,0x97e7bbaa,0xb7c4b46c +.long 0x97acf4ec,0x3e22a7b3,0x5ea8b640,0x0426c400,0x4e969285,0x5e3295a6,0xa6a45670,0x22aabc59,0x5f5942bc,0xb929714c,0xfa3182ed,0x9a6168bd,0x104152ba,0x2216a665,0xb6926368,0x46908d03 +.long 0x5a1251fb,0xa9f5d874,0xc72725c7,0x967747a8,0x31ffe89e,0x195c33e5,0xe964935e,0x609d210f,0x2fe12227,0xcafd6ca8,0x0426469d,0xaf9b5b96,0x5693183c,0x2e9ee04c,0xc8146fef,0x1084a333 +.long 0xaed1d1f7,0x96649933,0x50563090,0x566eaff3,0xad2e39cf,0x345057f0,0x1f832124,0x148ff65b,0xcf94cf0d,0x042e89d4,0x520c58b3,0x319bec84,0x5361aa0d,0x2a267626,0x8fbc87ad,0xc86fa302 +.long 0x5c8b06d5,0xfc83d2ab,0xfe4eac46,0xb1a785a2,0x846f7779,0xb99315bc,0xef9ea505,0xcf31d816,0x15d7dc85,0x2391fe6a,0xb4016b33,0x2f132b04,0x181cb4c7,0x29547fe3,0x650155a1,0xdb66d8a6 +.long 0xadc1696f,0x6b66d7e1,0x0acd72d0,0x98ebe593,0xcc1b7435,0x65f24550,0xb4b9a5ec,0xce231393,0xdb067df9,0x234a22d4,0xcaff9b00,0x98dda095,0x6100c9c1,0x1bbc75a0,0x939cf695,0x1560a9c8 +.long 0x99e0925f,0xcf006d3e,0x6322375a,0x2dd74a96,0xb56af5ba,0xc58b446a,0xe0b9b4f1,0x50292683,0x1aeaffa3,0xe2c34cb4,0x9b9587c1,0x8b17203f,0xead1350c,0x6d559207,0xfb7f9604,0x2b66a215 +.long 0xfe51bf74,0x0850325e,0x5e460094,0x9c4f579e,0x76da2f25,0x5c87b92a,0x6febef33,0x889de4e0,0x646083ce,0x6900ec06,0xbfe12773,0xbe2a0335,0xc5344110,0xadd1da35,0xb802cd20,0x757568b7 +.long 0x00f7e6c8,0x75559779,0x0facd2f0,0x38e8b94f,0x03fde375,0xfea1f3af,0x75881dfc,0x5e11a1d8,0xc1e2f2ef,0xb3a6b02e,0xc605a6c5,0x193d2bbb,0x339a0b2d,0x325ffeee,0x9e0c8846,0x27b6a724 +.long 0xf1c367ca,0xe4050f1c,0xc90fbc7d,0x9bc85a9b,0xe1a11032,0xa373c4a2,0xad0393a9,0xb64232b7,0x167dad29,0xf5577eb0,0x94b78ab2,0x1604f301,0xe829348b,0x0baa94af,0x41654342,0x77fbd8dd +.long 0xb964e39a,0xdab50ea5,0xd0d3c76e,0xd4c29e3c,0x56d11964,0x80dae67c,0xe5ffcc2f,0x7307a8bf,0x91708c3b,0x65bbc1aa,0x28bf0eeb,0xa151e62c,0x6fa34db7,0x6cb53381,0xa29403a8,0x5139e05c +.long 0x94a7cd2e,0x6ff651b4,0x0699336c,0x5671ffd1,0x979a896a,0x6f5fd2cc,0xd8148cef,0x11e893a8,0x65cf7b10,0x988906a1,0xc50d8485,0x81b67178,0x8a35b3de,0x7c0deb35,0xc1d29799,0x423ac855 +.long 0xdac50b74,0xaf580d87,0x5869734c,0x28b2b89f,0x874e28fb,0x99a3b936,0x25f3f73a,0xbb2c9190,0x84a9d5b7,0x199f6918,0x7e770374,0x7ebe2325,0x0738efe2,0xf442e107,0xcf9082d2,0xcf9f3f56 +.long 0x09618708,0x719f69e1,0xc183f9b1,0xcc9e8364,0x366a21af,0xec203a95,0x068b141f,0x6aec5d6d,0x994f04e9,0xee2df78a,0x271245b0,0xb39ccae8,0x97e43f4f,0xb875a4a9,0xdb2cea98,0x507dfe11 +.long 0x489b03e9,0x4fbf81cb,0x6ec414fa,0xdb86ec5b,0xf51b3ae5,0xfad444f9,0x1914e3fe,0xca7d33d6,0x0ae6c4d0,0xa9c32f5c,0x73969568,0xa9ca1d1e,0x1aa7467e,0x98043c31,0xe21b5ac6,0xe832e75c +.long 0x5232123d,0x314b7aea,0x65ae86db,0x08307c8c,0xaa4668ed,0x06e7165c,0xb4d3ec39,0xb170458b,0xc19bb986,0x4d2e3ec6,0xae0304ed,0xc5f34846,0x6c9f9722,0x917695a0,0x4cab1c0a,0x6c7f7317 +.long 0x9d6d2e8b,0x6295940e,0x549f7c97,0xd318b8c1,0x97713885,0x22453204,0xa8a440fe,0x468d834b,0xbfba796e,0xd81fe5b2,0x6d71f116,0x152364db,0xb5b66e53,0xbb8c7c59,0x2641a192,0x0b12c61b +.long 0xfcf0a7fd,0x31f14802,0x5488b01e,0x42fd0789,0x9952b498,0x71d78d6d,0x07ac5201,0x8eb572d9,0x4d194a88,0xe0a2a44c,0xba017e66,0xd2b63fd9,0xf888aefc,0x78efc6c8,0x4a881a11,0xb76f6bda +.long 0xb46c2397,0x187f314b,0x5ded2819,0x004cf566,0x38764d34,0xa9ea5704,0x78084709,0xbba45217,0x1171121e,0x06474571,0xe7c9b671,0xad7b7eb1,0x730f7507,0xdacfbc40,0xc7ad7bd1,0x178cd8c6 +.long 0xb2a67238,0xbf0be101,0xaf9c14f2,0x3556d367,0xa5662075,0x104b7831,0x79d9e60a,0x58ca59bb,0xa569a73b,0x4bc45392,0x5698f6c9,0x517a52e8,0xaeadd755,0x85643da5,0x2a581b84,0x1aed0cd5 +.long 0x80af1372,0xb9b4ff84,0xf1ba5d1f,0x244c3113,0xf5f98d31,0x2a5dacbe,0x4375bc2a,0x2c3323e8,0x5594b1dd,0x17a3ab4a,0xceb4797e,0xa1928bfb,0xe4886a19,0xe83af245,0x72b5a74a,0x8979d546 +.long 0x19f9e967,0xa0f726bc,0xe8fbbf4e,0xd9d03152,0xb7707d40,0xcfd6f51d,0x63f6e6e0,0x633084d9,0x55667eaf,0xedcd9cdc,0x2e44d56f,0x73b7f92b,0x4e962b14,0xfb2e39b6,0xf671fcbf,0x7d408f6e +.long 0x164a89bb,0xcc634ddc,0x3ef3bd05,0x74a42bb2,0x428decbb,0x1280dbb2,0x402c8596,0x6103f6bb,0x355a5752,0xfa2bf581,0x00946674,0x562f96a8,0x6da0223b,0x4e4ca16d,0x28d3aa25,0xfe47819f +.long 0xf8dfcf8a,0x9eea3075,0x95669825,0xa284f0aa,0x867d3fd8,0xb3fca250,0x269d691e,0x20757b5f,0x93b8a5de,0xf2c24020,0xebc06da6,0xd3f93359,0xb2739c33,0x1178293e,0xbcd686e5,0xd2a3e770 +.long 0xcd941534,0xa76f49f4,0xe3c71c0e,0x0d37406b,0x3b97f7e3,0x172d9397,0xbd7fd0de,0xec17e239,0x6f496ba2,0xe3290551,0x36ad50e7,0x6a693172,0x83e7eff5,0xc4e539a2,0x18e1b4cf,0x752737e7 +.long 0x68af43ee,0xa2f7932c,0x703d00bd,0x5502468e,0x2fb061f5,0xe5dc978f,0x28c815ad,0xc9a1904a,0x470c56a4,0xd3af538d,0x193d8ced,0x159abc5f,0x20108ef3,0x2a37245f,0x223f7178,0xfa17081e +.long 0x10c8c0f5,0x27b0fb2b,0x40650547,0x2102c3ea,0x8ac3bfa7,0x594564df,0x509dad96,0x98102033,0xf1d18a13,0x6989643f,0xd7fc5af0,0x35eebd91,0xfaeaafd8,0x078d096a,0xdef3de98,0xb7a89341 +.long 0xecf2a73a,0x2a206e8d,0x8e551994,0x066a6397,0xb98d53a2,0x3a6a088a,0x2d1124aa,0x0ce7c67c,0x759a113c,0x48cec671,0x4f6f67fa,0xe3b373d3,0xfd36727b,0x5455d479,0xa13c0d81,0xe5a428ee +.long 0x1c86682b,0xb853dbc8,0xb8d02b2a,0xb78d2727,0x8ebc329a,0xaaf69bed,0x293b2148,0xdb6b40b3,0xb8c4961f,0xe42ea77d,0x20e5e0ab,0xb1a12f7c,0x79e8b05e,0xa0ec5274,0xfab60a80,0x68027391 +.long 0x16b1bd5e,0x6bfeea5f,0x4de30ad3,0xf957e420,0x6a353b9e,0xcbaf664e,0x26d14feb,0x5c873312,0xb65f57cb,0x4e87f98c,0x5e0cdd41,0xdb60a621,0xa6881440,0x67c16865,0x46ab52aa,0x1093ef1a +.long 0x3f4ece64,0xc095afb5,0x7604551a,0x6a6bb02e,0x0b26b8cd,0x55d44b4e,0xf971268a,0xe5f9a999,0x11a7de84,0xc08ec425,0xfda469dd,0x83568095,0x6c6c90a2,0x737bfba1,0xbe229831,0x1cb9c4a0 +.long 0xbb2eec64,0x93bccbba,0xda03adbe,0xa0c23b64,0xe0e86ac4,0x5f7aa00a,0xfc1401e6,0x470b941e,0x9df43574,0x5ad8d679,0x0f65d810,0x4ccfb8a9,0xaa7fbd81,0x1bce80e3,0x9508d20a,0x273291ad +.long 0x42a92806,0xf5c4b46b,0xa86ab44a,0x810684ec,0xca0bc9f8,0x4591640b,0x5c4b6054,0xb5efcdfc,0x6e9edd12,0x16fc8907,0xd4d792f9,0xe29d0b50,0x9b03116d,0xa45fd01c,0xc81765a4,0x85035235 +.long 0xb4b4b67c,0x1fe2a9b2,0xe8020604,0xc1d10df0,0xbc8058d8,0x9d64abfc,0x712a0fbb,0x8943b9b2,0x3b3def04,0x90eed914,0x4ce775ff,0x85ab3aa2,0x7bbc9040,0x605fd4ca,0xe2c75dfb,0x8b34a564 +.long 0x10358560,0x41ffc94a,0x9e5c28aa,0x2d8a5072,0x4cc7eb15,0xe915a0fc,0x8f6d0f5d,0xe9efab05,0xd19e9b91,0xdbab47a9,0x0276154c,0x8cfed745,0x2cfede0d,0x154357ae,0x19f5a4ef,0x520630df +.long 0xe382360f,0x25759f7c,0x88bf5857,0xb6db05c9,0x6c58d46c,0x2917d61d,0xfd20cb7a,0x14f8e491,0x11c20340,0xb68a727a,0xaf7ccbb6,0x0386f86f,0xfee09a20,0x5c8bc6cc,0xbb7eea35,0x7d76ff4a +.long 0xdb15be7a,0xa7bdebe7,0xd89f0302,0x67a08054,0xc1193364,0x56bf0ea9,0x62837ebe,0xc8244467,0x20d841b8,0x32bd8e8b,0xdbb8a54f,0x127a0548,0x63b20236,0x83dd4ca6,0x203491fa,0x87714718 +.long 0xaa8a5288,0x4dabcaaa,0xaf23a1c9,0x91cc0c8a,0x3f220e0c,0x34c72c6a,0x1232144a,0xbcc20bdf,0xa20ede1b,0x6e2f42da,0x74a00515,0xc441f00c,0x734b8c4b,0xbf46a5b6,0x7b56c9a4,0x57409503 +.long 0xe4585d45,0x9f735261,0x6734e642,0x9231faed,0xbe70ee6c,0x1158a176,0x7c3501bf,0x35f1068d,0xa2d26115,0x6beef900,0xef0afee3,0x649406f2,0xbc2420a1,0x3f43a60a,0xd5aee4ac,0x509002a7 +.long 0x3ff3571b,0xb46836a5,0x837927c1,0x24f98b78,0x4533c716,0x6254256a,0xd07ee196,0xf27abb0b,0x5c6d5bfd,0xd7cf64fc,0xf0cd7a77,0x6915c751,0x8798f534,0xd9f59012,0xf81d8b5f,0x772b0da8 +.long 0x2e03fa69,0x1244260c,0x3be1a374,0x36cf0e3a,0xef06b960,0x6e7c1633,0x671f90f6,0xa71a4c55,0x33c673db,0x7a941251,0x73e8c131,0xc0bea510,0xd4f6c734,0x61a8a699,0x341ed001,0x25e78c88 +.long 0x8e2f7d90,0x5c18acf8,0x77be32cd,0xfdbf33d7,0xd2eb5ee9,0x0a085cd7,0xb3201115,0x2d702cfb,0x85c88ce8,0xb6e0ebdb,0x1e01d617,0x23a3ce3c,0x567333ac,0x3041618e,0x157edb6b,0x9dd0fd8f +.long 0xb57872b8,0x27f74702,0x657d5fe1,0x2ef26b4f,0x57cf3d40,0x95426f0a,0x65a6067a,0x847e2ad1,0x09996a74,0xd474d9a0,0x2a26115c,0x16a56acd,0xd16f4d43,0x02a615c3,0xaadb85b7,0xcc3fc965 +.long 0xce07d1b0,0x386bda73,0x58ad4178,0xd82910c2,0xcd2617f4,0x124f82cf,0xef691770,0xcc2f5e8d,0xb8c30ccc,0x82702550,0x1a8e575a,0x7b856aea,0xb1ab9459,0xbb822fef,0xec24e38e,0x085928bc +.long 0xba8f4b4d,0x5d0402ec,0x00b4d58b,0xc07cd4ba,0x29227e7a,0x5d8dffd5,0x31bf386f,0x61d44d0c,0x135e6f4d,0xe486dc2b,0xe79410ef,0x680962eb,0xf10088b5,0xa61bd343,0xe2e28686,0x6aa76076 +.long 0x8fb98871,0x80463d11,0xbbc76aff,0xcb26f5c3,0xfbe03614,0xd4ab8edd,0xc0cf2dee,0xc8eb579b,0xc93bae41,0xcc004c15,0x3aeca3b2,0x46fbae5d,0x0f1e9ab1,0x671235cf,0x9ec285c1,0xadfba934 +.long 0xf216c980,0x88ded013,0xf79e0bc1,0xc8ac4fb8,0xfb97a237,0xa29b89c6,0x9922d8e7,0xb697b780,0xddb945b5,0x3142c639,0xe094c3a9,0x447b06c7,0x72266c90,0xcdcb3642,0xa9385046,0x633aad08 +.long 0xb57c6477,0xa36c936b,0xe94dbcc6,0x871f8b64,0xa591a67b,0x28d0fb62,0xc1d926f5,0x9d40e081,0xf2d84b5a,0x3111eaf6,0xa565b644,0x228993f9,0x2c83188b,0x0ccbf592,0x3df3e197,0xf87b30ab +.long 0x7642bca8,0xb8658b31,0x52800f17,0x1a032d7f,0x79bf9445,0x051dcae5,0x54a2e253,0xeba6b8ee,0xd4485692,0x5c8b9cad,0x8986e9be,0x84bda40e,0x2f0db448,0xd16d16a4,0xa14d4188,0x8ec80050 +.long 0x98fa7aaa,0xb2b26107,0xf073aa4e,0x41209ee4,0xf2d6b19b,0xf1570359,0xfc577caf,0xcbe6868c,0x32c04dd3,0x186c4bdc,0xcfeee397,0xa6c35fae,0xf086c0cf,0xb4a1b312,0xd9461fe2,0xe0a5ccc6 +.long 0x1536189f,0xc32278aa,0xba6df571,0x1126c55f,0xb194560e,0x0f71a602,0x324bd6e1,0x8b2d7405,0x3738be71,0x8481939e,0x1a4d97a9,0xb5090b1a,0xf05ba915,0x116c65a3,0xaae448aa,0x21863ad3 +.long 0xa7aae5d3,0xd24e2679,0x0de5c1c4,0x7076013d,0xbb05b629,0x2d50f8ba,0x6e66efbb,0x73c1abe2,0xf2488af7,0xefd4b422,0x663ba575,0xe4105d02,0x53a69457,0x7eb60a8b,0xc945973b,0x62210008 +.long 0x77a50ec6,0xfb255478,0x0a37a72c,0xbf0392f7,0x4be18e7a,0xa0a7a19c,0x25b1e0af,0x90d8ea16,0xef953f57,0x7582a293,0xbdc5465a,0x90a64d05,0xe2510717,0xca79c497,0x18cb641f,0x560dbb7c +.long 0x4b66abfb,0x1d8e3286,0x59030900,0xd26f52e5,0x5584941a,0x1ee3f643,0x569f5958,0x6d3b3730,0x4789dba5,0x9ff2a62f,0x72b5c9b7,0x91fcb815,0x6c8f9a0e,0xf446cb7d,0x39b7ecb5,0x48f625c1 +.long 0x1c6219b8,0xbabae801,0x28ac2f23,0xe7a562d9,0x26e20588,0xe1b48732,0x775af051,0x06ee1cad,0xfaff79f7,0xda29ae43,0x652ee9e0,0xc141a412,0x195f4bd0,0x1e127f6f,0x072f34f8,0x29c6ab4f +.long 0x30448112,0x7b7c1477,0xe4a38656,0x82b51af1,0x2f315010,0x2bf2028a,0x6ea88cd4,0xc9a4a01f,0x257e5818,0xf63e95d8,0xb4519b16,0xdd8efa10,0x0da910bf,0xed8973e0,0x5c0fe4a9,0xed49d077 +.long 0xb7caee1e,0xac3aac5e,0xa7f4da57,0x1033898d,0x5c6669b9,0x42145c0e,0xc1aa2aa0,0x42daa688,0x1a1d885a,0x629cc15c,0xf4b76817,0x25572ec0,0x9c8f8f28,0x8312e435,0x81965490,0x8107f8cd +.long 0x6fa6110c,0x516ff3a3,0xfb93561f,0x74fb1eb1,0x8457522b,0x6c0c9047,0x6bb8bdc6,0xcfd32104,0xcc80ad57,0x2d6884a2,0x86a9b637,0x7c27fc35,0xadf4e8cd,0x3461baed,0x617242f0,0x1d56251a +.long 0xc955bef4,0x0b80d209,0x06adb047,0xdf02cad2,0x5ec74fee,0xf0d7cb91,0x1111ba44,0xd2503375,0xdf53cb36,0x9671755e,0x3368551b,0x54dcb612,0xc8a025a4,0x66d69aac,0xe77ef445,0x6be946c6 +.long 0xa995e094,0x719946d1,0xe51e04d8,0x65e848f6,0x6a1e3113,0xe62f3300,0x501de503,0x1541c7c1,0xf4acfade,0x4daac9fa,0x44cd0b71,0x0e585897,0x0a51cd77,0x544fd869,0x0031016d,0x60fc20ed +.long 0xa4276867,0x58b404ec,0x34f34993,0x46f6c3cc,0xc636e5bd,0x477ca007,0x7c458b47,0x8018f5e5,0xe47b668f,0xa1202270,0xee14f203,0xcef48ccd,0x62ff9b4d,0x23f98bae,0xc589eddd,0x55acc035 +.long 0x64db4444,0x3fe712af,0xbecdd480,0x19e9d634,0xa930978a,0xe08bc047,0xa1280733,0x2dbf24ec,0x2cd706b2,0x3c0ae38c,0x359017b9,0x5b012a5b,0x72e0f5ae,0x3943c38c,0x57176fa3,0x786167ea +.long 0x594881dc,0xe5f9897d,0xcfb820c1,0x6b5efad8,0xd55018de,0xb2179093,0x0bac56ce,0x39ad7d32,0x2cfc0e81,0xb55122e0,0xf6d89daa,0x117c4661,0xcb64fa09,0x362d01e1,0x3e9c4ddd,0x6a309b4e +.long 0xabea49b1,0xfa979fb7,0x10e2c6c5,0xb4b1d27d,0x23afde7a,0xbd61c2c4,0x9786d358,0xeb6614f8,0x7f6f7459,0x4a5d816b,0x09360e7b,0xe431a44f,0xc309914c,0x8c27a032,0xcaede3d8,0xcea5d68a +.long 0x3a0a3f95,0x3668f665,0x7ceba27b,0x89369416,0xe4728fe9,0x89981fad,0x8a093562,0x7102c8a0,0x235d21c8,0xbb80310e,0xbefb7f7b,0x505e55d1,0x12958a67,0xa0a90811,0x4d851fef,0xd67e106a +.long 0x431dd80e,0xb84011a9,0x73306cd9,0xeb7c7cca,0xd1b3b730,0x20fadd29,0xfe37b3d3,0x83858b5b,0xb6251d5c,0xbf4cd193,0x1352d952,0x1cca1fd3,0x90fbc051,0xc66157a4,0x89b98636,0x7990a638 +.long 0x87dec0e1,0xe5aa692a,0xf7b39d00,0x010ded8d,0x54cfa0b5,0x7b1b80c8,0xa0f8ea28,0x66beb876,0x3476cd0e,0x50d7f531,0xb08d3949,0xa63d0e65,0x53479fc6,0x1a09eea9,0xf499e742,0x82ae9891 +.long 0x5ca7d866,0xab58b910,0x3adb3b34,0x582967e2,0xcceac0bc,0x89ae4447,0x7bf56af5,0x919c667c,0x60f5dcd7,0x9aec17b1,0xddcaadbc,0xec697b9f,0x463467f5,0x0b98f341,0xa967132f,0xb187f1f7 +.long 0x214aeb18,0x90fe7a1d,0x741432f7,0x1506af3c,0xe591a0c4,0xbb5565f9,0xb44f1bc3,0x10d41a77,0xa84bde96,0xa09d65e4,0xf20a6a1c,0x42f060d8,0xf27f9ce7,0x652a3bfd,0x3b3d739f,0xb6bdb65c +.long 0xec7fae9f,0xeb5ddcb6,0xefb66e5a,0x995f2714,0x69445d52,0xdee95d8e,0x09e27620,0x1b6c2d46,0x8129d716,0x32621c31,0x0958c1aa,0xb03909f1,0x1af4af63,0x8c468ef9,0xfba5cdf6,0x162c429f +.long 0x753b9371,0x2f682343,0x5f1f9cd7,0x29cab45a,0xb245db96,0x571623ab,0x3fd79999,0xc507db09,0xaf036c32,0x4e2ef652,0x05018e5c,0x86f0cc78,0xab8be350,0xc10a73d4,0x7e826327,0x6519b397 +.long 0x9c053df7,0xe8cb5eef,0xb300ea6f,0x8de25b37,0xc849cffb,0xdb03fa92,0xe84169bb,0x242e43a7,0xdd6f958e,0xe4fa51f4,0xf4445a8d,0x6925a77f,0xe90d8949,0xe6e72a50,0x2b1f6390,0xc66648e3 +.long 0x173e460c,0xb2ab1957,0x30704590,0x1bbbce75,0xdb1c7162,0xc0a90dbd,0x15cdd65d,0x505e399e,0x57797ab7,0x68434dcb,0x6a2ca8e8,0x60ad35ba,0xde3336c1,0x4bfdb1e0,0xd8b39015,0xbbef99eb +.long 0x1711ebec,0x6c3b96f3,0xce98fdc4,0x2da40f1f,0x57b4411f,0xb99774d3,0x15b65bb6,0x87c8bdf4,0xc2eef12d,0xda3a89e3,0x3c7471f3,0xde95bb9b,0xd812c594,0x600f225b,0x2b75a56b,0x54907c5d +.long 0x8db60e35,0xa93cc5f0,0xfa833319,0x743e3cd6,0xf81683c9,0x7dad5c41,0x9c34107e,0x70c1e7d9,0xa6be0907,0x0edc4a39,0x86d0b7d3,0x36d47035,0x272bfa60,0x8c76da03,0x0f08a414,0x0b4a07ea +.long 0x45c1dd53,0x699e4d29,0x231debb5,0xcadc5898,0xa77f00e0,0xdf49fcc7,0xa73e5a0e,0x93057bbf,0x027a4cd1,0x2f8b7ecd,0xc614011a,0x114734b3,0x67677c68,0xe7a01db7,0x7e273f4f,0x89d9be5e +.long 0x089808ef,0xd225cb2e,0xd59e4107,0xf1f7a27d,0x8211b9c9,0x53afc761,0xe6819159,0x0361bc67,0x7f071426,0x2a865d0b,0xe7072567,0x6a3c1810,0x0d6bcabd,0x3e3bca1e,0x408591bc,0xa1b02bc1 +.long 0x31fba239,0xe0deee59,0x98bd91d1,0xf47424d3,0x071a3c1d,0x0f8886f4,0xa819233b,0x3f7d41e8,0xcf6eb998,0x708623c2,0x609a287f,0x86bb49af,0x63c90762,0x942bb249,0x55a9654b,0x0ef6eea5 +.long 0x36f5defe,0x5f6d2d72,0x56f99176,0xfa9922dc,0xf78ce0c7,0x6c8c5ece,0xbe09b55e,0x7b44589d,0x9ea83770,0xe11b3bca,0x2ab71547,0xd7fa2c7f,0x2a1ddcc0,0x2a3dd6fa,0x5a7b7707,0x09acb430 +.long 0x649d4e57,0x4add4a2e,0x1917526e,0xcd53a2b0,0x20b44ac4,0xc5262330,0xbaa2c31d,0x4028746a,0x64291d4c,0x51318390,0xee5ad909,0xbf48f151,0x7b185681,0xcce57f59,0x4854d442,0x7c3ac1b0 +.long 0xc093c171,0x65587dc3,0x24f42b65,0xae7acb24,0x955996cb,0x5a338adb,0x6051f91b,0xc8e65675,0x28b8d0b1,0x66711fba,0xb6c10a90,0x15d74137,0x3a232a80,0x70cdd7eb,0x6191ed24,0xc9e2f07f +.long 0xf79588c0,0xa80d1db6,0xb55768cc,0xfa52fc69,0x7f54438a,0x0b4df1ae,0xf9b46a4f,0x0cadd1a7,0x1803dd6f,0xb40ea6b3,0x55eaae35,0x488e4fa5,0x382e4e16,0x9f047d55,0x2f6e0c98,0xc9b5b7e0 +.long 0x95762649,0x6b1bd2d3,0xc7aea3f6,0xa9604ee7,0x6dc6f896,0x3646ff27,0x2860bad1,0x9bf0e7f5,0x7cb44b92,0x2d92c821,0xaea9c182,0xa2f5ce63,0x9154a5fd,0xd0a2afb1,0x95801da6,0x482e474c +.long 0xb611c24b,0xc19972d0,0x60a8f351,0x1d468e65,0x7bcf6421,0xeb758069,0x88fbc491,0xec9dd0ee,0x956c2e32,0x5b59d2bf,0xdcddf94e,0x73dc6864,0xbcee7665,0xfd5e2321,0x5e9a06c4,0xa7b4f8ef +.long 0x7280f855,0xfba918dd,0x8baec688,0xbbaac260,0x33400f42,0xa3b3f00f,0x66f2e6e4,0x3d2dba29,0x98509375,0xb6f71a94,0xcea423cc,0x8f33031f,0x4807e6fb,0x009b8dd0,0x5cdb954c,0x5163cfe5 +.long 0xcf41c6e8,0x03cc8f17,0x037b925c,0xf1f03c2a,0x66d2427c,0xc39c19cc,0x7b6c18e4,0x823d24ba,0x901f0b4f,0x32ef9013,0xf8941c2e,0x684360f1,0x2c28092e,0x0ebaff52,0x256c932f,0x7891e4e3 +.long 0xac445e3d,0x51264319,0x8ea74381,0x553432e7,0x67e9c50a,0xe6eeaa69,0x62e628c7,0x27ced284,0x7a4afa57,0x3f96d375,0xe484c150,0xde0a14c3,0x38bd9923,0x364a24eb,0xe5177422,0x1df18da0 +.long 0xd8d38a9b,0x174e8f82,0xe7de1391,0x2e97c600,0xa1c175dd,0xc5709850,0x32ae5035,0x969041a0,0x76a2086b,0xcbfd533b,0xd7c2e8fe,0xd6bba71b,0x099dfb67,0xb2d58ee6,0x064a85d9,0x3a8b342d +.long 0x522f9be3,0x3bc07649,0xdf1f49a8,0x690c075b,0x3854ec42,0x80e1aee8,0x17689dc7,0x2a7dbf44,0x3faf4078,0xc004fc0e,0xdf11862c,0xb2f02e9e,0xa0a1b7b3,0xf10a5e0f,0x8936ec80,0x30aca623 +.long 0x02f40d9a,0xf83cbf05,0x2c318a4d,0x4681c468,0x0e9c2674,0x98575618,0x1847092e,0xbe79d046,0x78bd01e0,0xaf1e480a,0x72a51db9,0x6dd359e4,0xe3afbab6,0x62ce3821,0x17733199,0xc5cee5b6 +.long 0x6ffd9fbb,0xe08b30d4,0x36c610b7,0x6e5bc699,0x9ce262cf,0xf343cff2,0x68b914c1,0xca2e4e35,0x16de36c5,0x011d64c0,0x42e2b829,0xe0b10fdd,0x6685aaf8,0x78942981,0x230ede97,0xe7511708 +.long 0x3b922bf8,0x671ed8fc,0x4c29b133,0xe4d8c0a0,0x3b6e99c4,0x87eb1239,0x8793beba,0xaff3974c,0x2c18df9b,0x03749405,0x91007139,0xc5c3a293,0xe37a0b95,0x6a77234f,0xb661c96b,0x02c29a21 +.long 0x141ecf61,0xc3aaf1d6,0x3bb22f53,0x9195509e,0x22d51357,0x29597404,0x537bed60,0x1b083822,0xe07289f0,0xcd7d6e35,0x6dd86eff,0x1f94c48c,0xeb0f9cfa,0xc8bb1f82,0x1b2eb97d,0x9ee0b7e6 +.long 0x34d74e31,0x5a52fe2e,0x3bf79ab6,0xa352c310,0xabfeeb8f,0x97ff6c5a,0xf5c97305,0xbfbe8fef,0xa7904608,0xd6081ce6,0xc4fca249,0x1f812f3a,0xb9e5e200,0x9b24bc9a,0x38012ee8,0x91022c67 +.long 0x30a713a1,0xe83d9c5d,0x84ef0f93,0x4876e3f0,0xc1fbf928,0xc9777029,0xbce7d2a4,0xef7a6bb3,0xdfa2a659,0xb8067228,0xd877a48f,0xd5cd3398,0x025d0f3f,0xbea4fd8f,0x2eae7c2b,0xd67d2e35 +.long 0xcc5f4394,0x184de7d7,0x4536e142,0xb5551b5c,0xd34aa60a,0x2e89b212,0xf50051d5,0x14a96fea,0x0d12bb0b,0x4e21ef74,0x60b9677e,0xc522f020,0x2df7731d,0x8b12e467,0x7b326d31,0x39f80382 +.long 0x39024a94,0xdfb8630c,0x97319452,0xaacb96a8,0xeda3867c,0xd68a3961,0x77c4ffca,0x0c58e2b0,0x4da919fa,0x3d545d63,0xf15e2289,0xef79b69a,0x808bab10,0x54bc3d3d,0x45f82c37,0xc8ab3007 +.long 0x7c4a658a,0xc12738b6,0x40e72182,0xb3c47639,0x8798e44f,0x3b77be46,0x17a7f85f,0xdc047df2,0x5e59d92d,0x2439d4c5,0xe8e64d8d,0xcedca475,0x87ca9b16,0xa724cd0d,0xa5540dfe,0x35e4fd59 +.long 0xe4bcf6b1,0xf8c1ff18,0x295018fa,0x856d6285,0x3263c949,0x433f665c,0xa1f21409,0xa6a76dd6,0xcc7b4f79,0x17d32334,0x06720e4a,0xa1d03122,0x81d9bed5,0xadb6661d,0x11db15d1,0xf0d6fb02 +.long 0x1fb747d2,0x7fd11ad5,0x3033762b,0xab50f959,0xfbefaf5a,0x2a7e711b,0x3fef2bbf,0xc7393278,0x0df6f9be,0xe29fa244,0x71efd215,0x9092757b,0x4f3d6fd9,0xee60e311,0x0acfb78b,0x338542d4 +.long 0x38961a0f,0x44a23f08,0x986987ca,0x1426eade,0x4a863cc6,0x36e6ee2e,0x628b8b79,0x48059420,0x7396e1de,0x30303ad8,0x38c5aad1,0x5c8bdc48,0x5c8f5066,0x3e40e11f,0x8d246bbd,0xabd6e768 +.long 0x23330a01,0x68aa40bb,0xc34eafa0,0xd23f5ee4,0x5de02c21,0x3bbee315,0xd1d8dd06,0x18dd4397,0x122d7b44,0x3ba1939a,0xa33870d6,0xe6d3b40a,0x1c4fe3f8,0x8e620f70,0xd3a50cbf,0xf6bba1a5 +.long 0xcfc0aee0,0x4a78bde5,0xc08c50bd,0x847edc46,0xad63c9b2,0xbaa2439c,0x10fc2acb,0xceb4a728,0x26da033d,0xa419e40e,0x03e02683,0x6cc3889d,0xfdccf725,0x1cd28559,0x8d13d208,0x0fd7e0f1 +.long 0x1f0df9d4,0x01b9733b,0xa2b5e4f3,0x8cc2c5f3,0x3a304fd4,0x43053bfa,0x0a9f1aa7,0x8e87665c,0xd73dc965,0x087f29ec,0x3e9023db,0x15ace455,0x2bce28b4,0x2370e309,0xb6b1e84a,0xf9723442 +.long 0xb72d9f26,0xbeee662e,0xf0e47109,0xb19396de,0xe13289d0,0x85b1fa73,0x54e58e32,0x436cf77e,0xe990ef77,0x0ec833b3,0x1b11fc25,0x7373e3ed,0x0fc332ce,0xbe0eda87,0x8d7ea856,0xced04970 +.long 0x7e977ca0,0xf85ff785,0xdfdd5d2b,0xb66ee8da,0x905af461,0xf5e37950,0x966d487c,0x587b9090,0x32ba0127,0x6a198a1b,0x141615ac,0xa7720e07,0x996ef2f2,0xa23f3499,0x470bcb3d,0xef5f64b4 +.long 0x92b8c559,0xa526a962,0x69740a0f,0x0c14aac0,0xa6bdc0a5,0x0d41a9e3,0x9c48aef4,0x97d52106,0x3e7c253b,0xcf16bd30,0x47fdedc1,0xcc834b1a,0x373aab2e,0x7362c6e5,0xc5f590ff,0x264ed85e +.long 0x66d41870,0x7a46d9c0,0x4787ba09,0xa50c20b1,0xe3d44635,0x185e7e51,0x31e2d8dc,0xb3b3e080,0xa179e9d9,0xbed1e558,0x74a76781,0x2daa3f79,0x3a40864f,0x4372baf2,0x4fe75cb5,0x46900c54 +.long 0xf76765d0,0xb95f171e,0x95c87502,0x4ad726d2,0x4d7c99bd,0x2ec769da,0xc36cdfa8,0x5e2ddd19,0xa93e6dea,0xc22117fc,0x93771123,0xe8a2583b,0xfa08a3a2,0xbe2f6089,0x8f0e1112,0x4809d5ed +.long 0xda7a095e,0x3b414aa3,0x26f5aadd,0x9049acf1,0x6be8b84a,0x78d46a4d,0xb732b9b3,0xd66b1963,0xde6e9555,0x5c2ac2a0,0xb5bd8770,0xcf52d098,0x0fd28921,0x15a15fa6,0x8b27536d,0x56ccb81e +.long 0x9f4ccbb8,0x0f0d8ab8,0xdb221729,0xed5f44d2,0x00bed10c,0x43141988,0x1d735b8b,0xc94348a4,0x29ef8479,0x79f3e9c4,0x614c693f,0x4c13a4e3,0x8e143a14,0x32c9af56,0xe29ac5c4,0xbc517799 +.long 0x2774856f,0x05e17992,0x6c1bf55f,0x6e52fb05,0xe4f19e16,0xaeda4225,0xaf5ccb26,0x70f4728a,0xb2947f22,0x5d2118d1,0x281d6fb9,0xc827ea16,0x8cf0eabd,0x8412328d,0x03ef9dcf,0x45ee9fb2 +.long 0xbb937d63,0x8e700421,0xcc4b37a6,0xdf8ff2d5,0x5ced7b68,0xa4c0d5b2,0xc7308f59,0x6537c1ef,0x3b37f8e8,0x25ce6a26,0xdeebc6ce,0x170e9a9b,0x8728d72c,0xdd037952,0x850154bc,0x445b0e55 +.long 0x83a7337b,0x4b7d0e06,0xffecf249,0x1e3416d4,0x66a2b71f,0x24840eff,0xb37cc26d,0xd0d9a50a,0x6fe28ef7,0xe2198150,0x23324c7f,0x3cc5ef16,0x769b5263,0x220f3455,0xa10bf475,0xe2ade2f1 +.long 0x458d3671,0x28cd20fa,0x2dc4847b,0x1549722c,0x591941e3,0x6dd01e55,0x27128ccb,0x0e6fbcea,0x3bef0262,0xae1a1e6b,0x8f54e103,0xfa8c472c,0x72c052ec,0x7539c0a8,0x5a3490e9,0xd7b27369 +.long 0x71684349,0x143fe1f1,0x32e19b97,0x36b4722e,0x90980aff,0xdc059227,0x9e13d674,0x175c9c88,0x6e6bfdb1,0xa7de5b22,0xbedb4b46,0x5ea5b7b2,0xd34a6e44,0xd5570191,0xa24ff7e6,0xfcf60d2e +.long 0x677819e1,0x614a392d,0xaa5a29e8,0x7be74c7e,0x63c85f3f,0xab50fece,0x46cab337,0xaca2e2a9,0x122a6fe3,0x7f700388,0x882a04a8,0xdb69f703,0xcf7aed57,0x9a77935d,0x8d91c86f,0xdf16207c +.long 0x63ed9998,0x2fca49ab,0xa77ddf96,0xa3125c44,0x24344072,0x05dd8a86,0xfec3fb56,0xa023dda2,0x0c743032,0x421b41fc,0x5e438639,0x4f2120c1,0xc83c1b07,0xfb7cae51,0xcac2171a,0xb2370caa +.long 0x6cc820fb,0x2eb2d962,0xb85a44bf,0x59feee5c,0x5b6598f0,0x94620fca,0x7e314051,0x6b922cae,0x106bed4e,0xff8745ad,0xdfa1e9ab,0x546e71f5,0x1ec29487,0x935c1e48,0x4d936530,0x9509216c +.long 0x85c9a2db,0xc7ca3067,0x6be8606f,0xd6ae5152,0xe14c651d,0x09dbcae6,0x9bc32f96,0xc9536e23,0x34521b03,0xa90535a9,0x878756ff,0xf39c526c,0x8aedf03c,0x383172ec,0xefe0c034,0x20a8075e +.long 0x64026422,0xf22f9c62,0x24b9d076,0x8dd10780,0x3bef2950,0x944c742a,0x88a2b00b,0x55b9502e,0x86a09817,0xa59e14b4,0x47bb4071,0xa39dd3ac,0x3be0592f,0x55137f66,0xc9e63f5b,0x07fcafd4 +.long 0x346eb226,0x963652ee,0xec2facb7,0x7dfab085,0x691add26,0x273bf2b8,0xf2b46c44,0x30d74540,0xf2c2d065,0x05e8e73e,0xd42eeac9,0xff9b8a00,0x97209d22,0x2fcbd205,0xde14ea2c,0xeb740ffa +.long 0xa8aef518,0xc71ff913,0xfff4cfa2,0x7bfc74bb,0xb6b36048,0x1716680c,0x9ef79af1,0x121b2cce,0xa01eb3d3,0xbff3c836,0x5f79077b,0x50eb1c6a,0xa004bbcf,0xa48c32d6,0x7d64f61d,0x47a59316 +.long 0x93102016,0x6068147f,0x94d12576,0x12c5f654,0xc9bc6b91,0xefb071a7,0x6e23ea95,0x7c2da0c5,0xd4a1dd5d,0xf4fd45b6,0x9122b13c,0x3e7ad9b6,0xe6f57a48,0x342ca118,0x06f8288f,0x1c2e94a7 +.long 0x5a97d231,0x99e68f07,0x4d838758,0x7c80de97,0x05872727,0xbce0f5d0,0x19c4d016,0xbe5d95c2,0x9c2492ee,0x921d5cb1,0x404d6fb3,0x42192dc1,0x32f988d3,0x4c84dcd1,0xa17b8e85,0xde26d61f +.long 0x137c7408,0xc466dcb6,0x36a266da,0x9a38d7b6,0x83bebf1b,0x7ef5cb06,0x0fd014e3,0xe5cdcbbf,0xf65965a0,0x30aa376d,0xebb3e95e,0x60fe88c2,0x66ee6f20,0x33fd0b61,0x3f41f0a0,0x8827dcdb +.long 0x0c56c690,0xbf8a9d24,0xddb7641d,0x40265dad,0x3a6b662b,0x522b05bf,0xb1478c9b,0x466d1dfe,0x1484469b,0xaa616962,0x02df8f9f,0x0db60549,0x3cb8bf51,0xc37bca02,0x21371ce8,0x5effe346 +.long 0xff112c32,0xe8f65264,0x7b971fb2,0x8a9c736d,0x7b75080d,0xa4f19470,0x8839c59b,0xfc3f2c5a,0x5aeb49c2,0x1d6c777e,0xda1addfe,0xf3db034d,0x5535affc,0xd76fee5a,0xb92251fd,0x0853ac70 +.long 0x8b2a29d5,0x37e3d594,0x4de00ddb,0x28f1f457,0xf42c328b,0x8083c1b5,0xe493c73b,0xd8ef1d8f,0x41dc61bd,0x96fb6260,0x27ee2f8a,0xf74e8a9d,0x2c946a5d,0x7c605a80,0x3839ccfd,0xeed48d65 +.long 0x3a29467a,0x9894344f,0xc51eba6d,0xde81e949,0xa5e5c2f2,0xdaea066b,0x08c8c7b3,0x3fc8a614,0x06d0de9f,0x7adff88f,0x3b75ce0a,0xbbc11cf5,0xfbbc87d5,0x9fbb7acc,0x7badfde2,0xa1458e26 +.long 0xe039c256,0x1cb43668,0x7c17fd5d,0x5f26fb8b,0x79aa062b,0xeee426af,0xd78fbf04,0x072002d0,0xe84fb7e3,0x4c9ca237,0x0c82133d,0xb401d8a1,0x6d7e4181,0xaaa52592,0x73dbb152,0xe9430833 +.long 0xbe24319a,0xf92dda31,0xe095a8e7,0x03f7d28b,0x98782185,0xa52fe840,0x29c24dbc,0x276ddafe,0x1d7a64eb,0x80cd5496,0x7f1dbe42,0xe4360889,0x8438d2d5,0x2f81a877,0x85169036,0x7e4d52a8 +.long 0x1d59715d,0x19e3d5b1,0xd788983e,0xc7eaa762,0xabf1f248,0xe5a730b0,0xfae3fd83,0xfbab8084,0x53765b2f,0x65e50d21,0xfa127f3d,0xbdd4e083,0x397b1b10,0x9cf3c074,0xb1b59fd3,0x59f8090c +.long 0x615faa8f,0x7b15fd9d,0x968554ed,0x8fa1eb40,0x7aa44882,0x7bb4447e,0x029fff32,0x2bb2d0d1,0x6caa6d2f,0x075e2a64,0x22e7351b,0x8eb879de,0x9a506c62,0xbcd5624e,0xa87e24dc,0x218eaef0 +.long 0x44ddfa35,0x37e56847,0xdab3f747,0x9ccfc5c5,0x1ee96cf4,0x9ac1df3f,0x3b480b8f,0x0c0571a1,0x4b3a7b3c,0x2fbeb3d5,0x5dcdbb99,0x35c03669,0xb2415b3a,0x52a0f5dc,0x4413ed9a,0xd57759b4 +.long 0x3d30a2c5,0x1fe647d8,0xf78a81dc,0x0857f77e,0x131a4a9b,0x11d5a334,0x29d393f5,0xc0a94af9,0xdaa6ec1a,0xbc3a5c0b,0x88d2d7ed,0xba9fe493,0xbb614797,0xbb4335b4,0x72f83533,0x991c4d68 +.long 0xd2f01cb3,0x53258c28,0xd75db0b1,0x93d6eaa3,0xe87d0db4,0x419a2b0d,0xd8fe8493,0xa1e48f03,0xc508b23a,0xf747faf6,0x35d53549,0xf137571a,0xfcf9b838,0x9f5e58e2,0xa7fd3cf5,0xc7186cee +.long 0xe978a1d3,0x77b868ce,0x7ab92d04,0xe3a68b33,0x87a5b862,0x51029794,0x3a61d41d,0x5f0606c3,0x6f9326f1,0x2814be27,0xc6fe3c2e,0x2f521c14,0xacdf7351,0x17464d7d,0x777f7e44,0x10f5f9d3 +.long 0x269fb37d,0xce8e616b,0x7de62de5,0xaaf73804,0x4fdd4153,0xaba11175,0x3770b49b,0x515759ba,0xaa423a61,0x8b09ebf8,0xcd41fb92,0x592245a1,0x9b4c8936,0x1cba8ec1,0xaf36710e,0xa87e91e3 +.long 0x3d34a2e3,0x1fd84ce4,0xb43b5d61,0xee3759ce,0x619186c7,0x895bc78c,0xcbb9725a,0xf19c3809,0xde744b1f,0xc0be21aa,0x60f8056b,0xa7d222b0,0xb23efe11,0x74be6157,0x0cd68253,0x6fab2b4f +.long 0x4bf1d725,0xad33ea5f,0x4f6c950f,0x9c1d8ee2,0xa377af06,0x544ee78a,0x94a113e1,0x54f489bb,0x992fb7e8,0x8f11d634,0xa2a44347,0x0169a7aa,0x95020e00,0x1d49d4af,0xe08e120b,0x95945722 +.long 0xa4d32282,0xb6e33878,0x48020ae7,0xe36e029d,0x37a9b750,0xe05847fb,0xb29e3819,0xf876812c,0xd23a17f0,0x84ad138e,0xf0b3950e,0x6d7b4480,0x2fd67ae0,0xdfa8aef4,0x52333af6,0x8d3eea24 +.long 0xb15d5acc,0x0d052075,0xbd815bc4,0xc6d9c79f,0xdfa36cf2,0x8dcafd88,0x38aa9070,0x908ccbe2,0xba35afce,0x638722c4,0xfd6abf0b,0x5a3da8b0,0xc9c335c1,0x2dce252c,0x65aa799b,0x84e7f0de +.long 0xb99a72cb,0x2101a522,0x87618016,0x06de6e67,0xe6f3653e,0x5ff8c7cd,0xc7a6754a,0x0a821ab5,0x7cb0b5a2,0x7e3fa52b,0xc9048790,0xa7fb121c,0x06ce053a,0x1a725020,0x04e929b0,0xb490a31f +.long 0x62dd61ad,0xe17be47d,0x6be01371,0x781a961c,0xdae3cbba,0x1063bfd3,0x7f73c9ba,0x35647406,0x2736a129,0xf50e957b,0xed13f256,0xa6313702,0x3a19fcc5,0x9436ee65,0xe7a4c8b6,0xcf2bdb29 +.long 0xc5f95cd8,0xb06b1244,0xf4ab95f4,0xda8c8af0,0xb9e5836d,0x1bae59c2,0x3acffffc,0x07d51e7e,0xc2ccbcda,0x01e15e6a,0x8528c3e0,0x3bc1923f,0xa49fead4,0x43324577,0x2aa7a711,0x61a1b884 +.long 0x700230ef,0xf9a86e08,0xbd19adf8,0x0af585a1,0xf55ad8f2,0x7645f361,0x46c3614c,0x6e676223,0x4e774d3f,0x23cb257c,0xac102d1b,0x82a38513,0x7b126aa5,0x9bcddd88,0xeefd3ee4,0xe716998b +.long 0xfb167583,0x4239d571,0xd16c8f8a,0xdd011c78,0x69a27519,0x271c2895,0xd2d64b6a,0x9ce0a3b7,0xd5ec6738,0x8c977289,0x8840ef6b,0xa3b49f9a,0x9a453419,0x808c14c9,0x0cf0a2d5,0x5c00295b +.long 0x1d4bcc76,0x524414fb,0x459a88f1,0xb07691d2,0xf70d110f,0x77f43263,0xb7abf9f3,0x64ada5e0,0x5b544cf5,0xafd0f94e,0xfd2713fe,0xb4a13a15,0x250c74f4,0xb99b7d6e,0x20324e45,0x097f2f73 +.long 0xaffa8208,0x994b37d8,0xdc29aafc,0xc3c31b0b,0x7a3a607f,0x3da74651,0xfe6955d6,0xd8e1b8c1,0xc8418682,0x716e1815,0x7dc91d97,0x541d487f,0xc6996982,0x48a04669,0x83a6502e,0xf39cab15 +.long 0xe68db055,0x025801a0,0xba3338d5,0xf3569758,0xee2afa84,0xb0c8c0aa,0xfb6562d1,0x4f6985d3,0x132ed17a,0x351f1f15,0xc04365fe,0x510ed0b4,0xe5b1f066,0xa3f98138,0x32df03dc,0xbc9d95d6 +.long 0x19abd09e,0xa83ccf6e,0x4ff17edb,0x0b4097c1,0xd64a06ce,0x58a5c478,0x544a58fd,0x2ddcc3fd,0x9e8153b8,0xd449503d,0x7774179b,0x3324fd02,0xdbd9120c,0xaf5d47c8,0x34fa94db,0xeb860162 +.long 0x972f07f4,0x5817bdd1,0xd27bbceb,0xe5579e2e,0x5f11e5a6,0x86847a1f,0x7c3cf048,0xb39ed255,0xa2f62e55,0xe1076417,0x1bcf82a2,0x6b9ab38f,0x7aeb29f9,0x4bb7c319,0x17227a46,0xf6d17da3 +.long 0x0f968c00,0xab53ddbd,0x000c880b,0xa03da7ec,0x6a9ad24d,0x7b239624,0x01ec60d0,0x612c0401,0x109f5df1,0x70d10493,0x80af7550,0xfbda4030,0xc6b9a9b3,0x30b93f95,0x007d9418,0x0c74ec71 +.long 0x6edb951f,0x94175564,0x7f22c282,0x5f4a9d78,0xb38d1196,0xb7870895,0xa228ce7c,0xbc593df3,0x6af3641a,0xc78c5bd4,0x3d9b3dcc,0x7802200b,0x8be33304,0x0dc73f32,0x61ffb79a,0x847ed87d +.long 0x6d671192,0xf85c974e,0xde16f60f,0x1e14100a,0x95c38797,0x45cb0d5a,0x9b022da4,0x18923bba,0xbbe7e86e,0xef2be899,0x216067bf,0x4a1510ee,0x84d5ce3e,0xd98c8154,0xf92a2b90,0x1af777f0 +.long 0x4ef65724,0x9fbcb400,0x3c0ca6fe,0x3e04a4c9,0x55002994,0xfb3e2cb5,0x5363ecab,0x1f3a93c5,0x3923555b,0x1fe00efe,0x1e1751ea,0x744bedd9,0x6ab69357,0x3fb2db59,0xf5e6618b,0x8dbd7365 +.long 0xdf1ea40e,0x99d53099,0x57d61e64,0xb3f24a0b,0x596eb812,0xd088a198,0x5762940b,0x22c8361b,0xf9c0d95c,0x66f01f97,0x8e43cdae,0x88461172,0xb72b15c3,0x11599a7f,0x420d95cc,0x135a7536 +.long 0x5f7ae2f6,0x2dcdf0f7,0xd7fa6da2,0x15fc6e1d,0xd1d441b6,0x81ca829a,0x04a106b6,0x84c10cf8,0xa73fbbd0,0xa9b26c95,0x4d8f6ee8,0x7f24e0cb,0x1e25a043,0x48b45937,0x036f3dfe,0xf8a74fca +.long 0xc9f84296,0x1ed46585,0x3bc278b0,0x7fbaa8fb,0x6c4fcbd0,0xa8e96cd4,0x73b60a5f,0x940a1202,0x55a4aec8,0x34aae120,0xdbd742f0,0x550e9a74,0x228c68ab,0x794456d7,0xa4e25ec6,0x492f8868 +.long 0xb2d8f398,0x682915ad,0x5b84c953,0xf13b51cc,0x5bb917d6,0xcda90ab8,0x4ea3dee1,0x4b615560,0x0a52c1c8,0x578b4e85,0x20b75fc4,0xeab1a695,0xaa0bb3c6,0x60c14f3c,0xb8216094,0x220f448a +.long 0xb0e63d34,0x4fe7ee31,0xa9e54fab,0xf4600572,0xd5e7b5a4,0xc0493334,0x06d54831,0x8589fb92,0x6583553a,0xaa70f5cc,0xe25649e5,0x0879094a,0x10044652,0xcc904507,0x02541c4f,0xebb0696d +.long 0xb9718710,0x5a171fde,0xf374a9f5,0x38f1bed8,0xba39bdc1,0xc8c582e1,0x908cc0ce,0xfc457b0a,0x883841e2,0x9a187fd4,0x38725381,0x8ec25b39,0x96f84395,0x2553ed05,0x6f6c6897,0x095c7661 +.long 0x4bdc5610,0x917ac85c,0x179eb301,0xb2885fe4,0x8b78bdcc,0x5fc65547,0xe59e4699,0x4a9fc893,0x3ce299af,0xbb7ff0cd,0xadf38b20,0x195be9b3,0xd38ddb8f,0x6a929c87,0xb21a51b9,0x55fcc99c +.long 0x721a4593,0x2b695b4c,0x768eaac2,0xed1e9a15,0x7489f914,0xfb63d71c,0x78118910,0xf98ba31c,0x9b128eb4,0x80291373,0xd448af4a,0x7801214e,0x55418dd3,0xdbd2e22b,0xd3998242,0xeffb3c0d +.long 0xc7bf3827,0xdfa6077c,0x47f8238f,0xf2165bcb,0x8564d554,0xfe37cf68,0x0a81fb98,0xe5f825c4,0xffed4d6f,0x43cc4f67,0xb50a34b0,0xbc609578,0x5041faf1,0x8aa8fcf9,0x651773b6,0x5659f053 +.long 0x6044d63b,0xe87582c3,0x0cdb0ca0,0xa6089409,0xbfb2bcf6,0x8c993e0f,0x45985cfc,0xfc64a719,0x83dbedba,0x15c4da80,0x2be67df7,0x804ae112,0xa23defde,0xda4c9658,0x5156e0d3,0x12002ddd +.long 0x5dd21b96,0xe68eae89,0xcf44624d,0x8b99f28b,0x1ec8897a,0x0ae00808,0x6712f76e,0xdd0a9303,0x4e233de4,0x96237522,0x2b36a8a5,0x192445b1,0x023993d9,0xabf9ff74,0x2aad4a8f,0x21f37bf4 +.long 0xf8bd2bbd,0x340a4349,0x4868195d,0x1d902cd9,0xe5fdb6f1,0x3d27bbf1,0x124f9f1c,0x7a5ab088,0xf7a09e03,0xc466ab06,0x31f2c123,0x2f8a1977,0x041b6657,0xda355dc7,0x8ece2a7c,0xcb840d12 +.long 0x7db32675,0xb600ad9f,0x07a06f1b,0x78fea133,0xb31f6094,0x5d032269,0x83ec37aa,0x07753ef5,0x9c0bea78,0x03485aed,0xbc3f4524,0x41bb3989,0x697f726d,0x09403761,0xdf394820,0x6109beb3 +.long 0x3b6d1145,0x804111ea,0xa8582654,0xb6271ea9,0x24e66562,0x619615e6,0xd7b6ad9c,0xa2554945,0x99bfe35f,0xd9c4985e,0x7b51cdf6,0x9770ccc0,0x92881832,0x7c327013,0x286b26d1,0x8777d45f +.long 0xd847999d,0x9bbeda22,0xc3525d32,0x03aa33b6,0x28a959a1,0x4b7b96d4,0x31e5d234,0xbb3786e5,0x6961f247,0xaeb5d3ce,0x02f93d3f,0x20aa85af,0xd7a7ae4f,0x9cd1ad3d,0x781adaa8,0xbf6688f0 +.long 0x7469cead,0xb1b40e86,0x309fca48,0x1904c524,0x4b54bbc7,0x9b7312af,0x593affa2,0xbe24bf8f,0xbd98764b,0xbe5e0790,0xa26e299e,0xa0f45f17,0x6b8fe4c7,0x4af0d2c2,0x8ae8a3e6,0xef170db1 +.long 0x29e0ccc1,0x0e8d61a0,0x60ad36ca,0xcd53e87e,0xc8173822,0x328c6623,0xa496be55,0x7ee1767d,0x648945af,0x89f13259,0x25c8009c,0x9e45a5fd,0x1f61ab8c,0xaf2febd9,0x8a275385,0x43f6bc86 +.long 0xf2142e79,0x87792348,0xc6e6238a,0x17d89259,0x4a839d9b,0x7536d2f6,0x76a1fbdc,0x1f428fce,0x0db06dfe,0x1c109601,0x50a3a3cc,0xbfc16bc1,0x9b30f41b,0xf9cbd9ec,0x00138cce,0x5b5da0d6 +.long 0x56ef96a7,0xec1d0a48,0x982bf842,0xb47eb848,0xec3f700d,0x66deae32,0xaa1181e0,0x4e43c42c,0xd1a4aa2a,0xa1d72a31,0xc004f3ce,0x440d4668,0x45fe8a7a,0x0d6a2d3b,0xfb128365,0x820e52e2 +.long 0x25e51b09,0x29ac5fcf,0x2023d159,0x180cd2bf,0xa1ebf90e,0xa9892171,0x7c132181,0xf97c4c87,0xc03dbb7e,0x9f1dc724,0x018cbbe4,0xae043765,0x0767d153,0xfb0b2a36,0x249cbaeb,0xa8e2f4d6 +.long 0xd95ea168,0x172a5247,0x2970764a,0x1758fada,0x1d978169,0xac803a51,0xde77e01b,0x299cfe2e,0xb0a98927,0x652a1e17,0x20014495,0x2e26e1d1,0x7175b56a,0x7ae0af9f,0xd64b9f95,0xc2e22a80 +.long 0xd90a060a,0x4d0ff9fb,0xbaf38085,0x496a27db,0xda776bcf,0x32305401,0x725f209e,0xb8cdcef6,0x436a0bba,0x61ba0f37,0x76860049,0x263fa108,0xda3542cf,0x92beb98e,0xd5849538,0xa2d4d14a +.long 0x12e9a1bc,0x989b9d68,0x5f6e3268,0x61d9075c,0x99ace638,0x352c6aa9,0x920f43ff,0xde4e4a55,0xd673c017,0xe5e4144a,0x6f6e05ea,0x667417ae,0xdcd1bd56,0x613416ae,0x86693711,0x5eb36201 +.long 0x3a1aa914,0x2d7bc504,0x76dc5975,0x175a1299,0x3fc8125c,0xe900e0f2,0x11198875,0x569ef68c,0x63a113b4,0x9012db63,0x98835766,0xe3bd3f56,0x76412dea,0xa5c94a52,0xaa735e5c,0xad9e2a09 +.long 0x508b65e9,0x405a984c,0x6df1a0d1,0xbde4a1d1,0xdfba80da,0x1a9433a1,0x9440ad2e,0xe9192ff9,0x5099fe92,0x9f649696,0x0b27a54a,0x25ddb65c,0xc590da61,0x178279dd,0xfbde681a,0x5479a999 +.long 0x013fe162,0xd0e84e05,0x632d471b,0xbe11dc92,0xfc0e089f,0xdf0b0c45,0x4c144025,0x04fb15b0,0x13c99927,0xa61d5fc2,0x3de2eb35,0xa033e9e0,0xb8dacbb4,0xf8185d5c,0x8644549d,0x9a88e265 +.long 0x54671ff6,0xf717af62,0x5fa58603,0x4bd4241b,0xe67773c0,0x06fba40b,0x6a2847e9,0xc1d933d2,0x689e2c70,0xf4f5acf3,0x46bafd31,0x92aab0e7,0x3473f6e5,0x798d76aa,0x93141934,0xcc6641db +.long 0xd31e535e,0xcae27757,0x87c2ee11,0x04cc43b6,0x2e029ffa,0x8d1f9675,0xe4cc7a2c,0xc2150672,0x8d68b013,0x3b03c1e0,0xedf298f3,0xa9d6816f,0xa2804464,0x1bfbb529,0x5db22125,0x95a52fae +.long 0x0e1cb64e,0x55b32160,0x7e7fc9fe,0x004828f6,0x1bb0fb93,0x13394b82,0x35f1a920,0xb6293a2d,0xd145d2d9,0xde35ef21,0xbb8fa603,0xbe6225b3,0x32cf252d,0x00fc8f6b,0x117cf8c2,0xa28e52e6 +.long 0x4c371e6d,0x9d1dc89b,0x36ef0f28,0xcebe0675,0xa4292f81,0x5de05d09,0x353e3083,0xa8303593,0x7e37a9bb,0xa1715b0a,0x2b8faec3,0x8c56f61e,0x33c9b102,0x52507431,0xa44431f0,0x0130cefc +.long 0xbd865cfb,0x56039fa0,0xbc5f1dd7,0x4b03e578,0xbabe7224,0x40edf2e4,0x3a1988f6,0xc752496d,0x564beb6b,0xd1572d3b,0x39a1c608,0x0db1d110,0x16f60126,0x568d1934,0xf354af33,0x05ae9668 +.long 0xc92544f2,0x19de6d37,0xa35837d5,0xcc084353,0x1a514ece,0xcbb6869c,0x2e1d1066,0xb633e728,0x936c581c,0xf15dd69f,0x7439c4f9,0x96e7b8ce,0x2e448a5b,0x5e676f48,0xfd916bbb,0xb2ca7d5b +.long 0xf5024025,0xd55a2541,0xe4c2d937,0x47bc5769,0x0362189f,0x7d31b92a,0xef7816f9,0x83f3086e,0xb587579a,0xf9f46d94,0x30e76c5f,0xec2d22d8,0xb000ffcf,0x27d57461,0x364ffc2c,0xbb7e65f9 +.long 0x6652a220,0x7c7c9477,0xd696c981,0x61618f89,0x89effff3,0x5021701d,0x7c314163,0xf2c8ff8e,0x8efb4d3e,0x2da413ad,0xce176d95,0x937b5adf,0x2a67d51c,0x22867d34,0x18eb3ac9,0x262b9b10 +.long 0xc43ff28b,0x4e314fe4,0x6a664e7a,0x76476627,0xb7a565c2,0x3e90e40b,0xc1acf831,0x8588993a,0x8f938829,0xd7b501d6,0x3edd7d4c,0x996627ee,0x90cd34c7,0x37d44a62,0xf3833e8d,0xa8327499 +.long 0x4bf50353,0x2e18917d,0x556765fb,0x85dd726b,0x93d5ab66,0x54fe65d6,0x915c25fe,0x3ddbaced,0x12f22e85,0xa799d9a4,0x6d06f6bc,0xe2a24867,0x43ca1637,0xf4f1ee56,0x61ece30a,0xfda2828b +.long 0xa2dee7a6,0x758c1a3e,0x734b2284,0xdcde2f3c,0x4eaba6ad,0xaba445d2,0x76cee0a7,0x35aaf668,0xe5aa049a,0x7e0b04a9,0x91103e84,0xe74083ad,0x40afecc3,0xbeb183ce,0xea043f7a,0x6b89de9f +.long 0xfe67ba66,0x0e299d23,0x93cf2f34,0x91450760,0x97fcf913,0xf45b5ea9,0x8bd7ddda,0x5be00843,0xd53ff04d,0x358c3e05,0x5de91ef7,0xbf7ccdc3,0xb69ec1a0,0xad684dbf,0x801fd997,0x367e7cf2 +.long 0xb0dc8595,0x0ca1f3b7,0x9f1d9f2e,0x27de4608,0xbadd82a7,0x1af3bf39,0x65862448,0x79356a79,0xf5f9a052,0xc0602345,0x139a42f9,0x1a8b0f89,0x844d40fc,0xb53eee42,0x4e5b6368,0x93b0bfe5 +.long 0xc024789c,0x5434dd02,0x41b57bfc,0x90dca9ea,0x243398df,0x8aa898e2,0x894a94bb,0xf607c834,0xc2c99b76,0xbb07be97,0x18c29302,0x6576ba67,0xe703a88c,0x3d79efcc,0xb6a0d106,0xf259ced7 +.long 0xc8de610b,0x0f893a5d,0x67e223ce,0xe8c515fb,0x4ead6dc5,0x7774bfa6,0x925c728f,0x89d20f95,0x098583ce,0x7a1e0966,0x93f2a7d7,0xa2eedb94,0x4c304d4a,0x1b282097,0xc077282d,0x0842e3da +.long 0x3b9e2d7b,0xe4d972a3,0xc48218ff,0x7cc60b27,0x84149d91,0x8fc70838,0x2f461ecc,0x5c04346f,0x614650a9,0xebe9fdf2,0xc1f666ac,0x5e35b537,0x88babc83,0x645613d1,0xc5e1c93e,0x88cace3a +.long 0x3de92e23,0x209ca375,0x5fbbb6e3,0xccb03cc8,0xd7b1487e,0xccb90f03,0xc710941f,0xfa9c2a38,0x6724ceed,0x756c3823,0x192d0323,0x3a902258,0xea5e038e,0xb150e519,0xc7427591,0xdcba2865 +.long 0x78890732,0xe549237f,0x53fcb4d9,0xc443bef9,0xeb3480d6,0x9884d8a6,0x3048b186,0x8a35b6a1,0x65e9a90a,0xb4e44716,0x653006c0,0x45bf380d,0x4fe9ae3b,0x8f3f820d,0x979a3b71,0x244a35a0 +.long 0x74cd06ff,0xa1010e9d,0xaca3eeac,0x9c17c7df,0x8063aa2b,0x74c86cd3,0x734614ff,0x8595c4b3,0x990f62cc,0xa3de00ca,0xca0c3be5,0xd9bed213,0xdf8ce9f5,0x7886078a,0x5cd44444,0xddb27ce3 +.long 0x58926ddd,0xed374a66,0x908015b8,0x138b2d49,0xde1f7ab8,0x886c6579,0xc3020b7a,0x888b9aa0,0x3a96e355,0xd3ec034e,0xf30fbe9a,0xba65b0b8,0xff21367a,0x064c8e50,0x0b04b46e,0x1f508ea4 +.long 0x747c866c,0x98561a49,0x0518a062,0xbbb1e5fe,0xecdc3608,0x20ff4e8b,0x20184027,0x7f55cded,0xf38c85f0,0x8d73ec95,0x8bc3b8c3,0x5b589fdf,0x0f12b66f,0xbe95dd98,0x0e338e01,0xf5bd1a09 +.long 0x5e915918,0x65163ae5,0x86f8a46b,0x6158d6d9,0xeeebf99c,0x8466b538,0xbca477ef,0xca8761f6,0x9ebbc601,0xaf3449c2,0xe0c3ae2f,0xef3b0f41,0x5de63752,0xaa6c577d,0x64682a51,0xe9166601 +.long 0xfc15aa1e,0x5a3097be,0xb54b0745,0x40d12548,0x519a5f12,0x5bad4706,0xa439dee6,0xed03f717,0x4a02c499,0x0794bb6c,0xcffe71d2,0xf725083d,0x0f3adcaf,0x2cad7519,0x43729310,0x7f68ea1c +.long 0xb7ffd977,0xe747c8c7,0x80761a22,0xec104c35,0x5a3ffb83,0x8395ebaf,0xe4b63db7,0xfb3261f4,0xd883e544,0x53544960,0x8cc2eeb8,0x13520d70,0xd3d65f99,0x08f6337b,0x781cf95b,0x83997db2 +.long 0x0dbd2c01,0xce6ff106,0x1f9ce934,0x4f8eea6b,0x0e993921,0x546f7c4b,0x5e753fc7,0x6236a324,0xa16022e9,0x65a41f84,0x43d1dbb2,0x0c18d878,0x2d4cef9c,0x73c55640,0x70444c74,0xa0428108 +.long 0x9afdfb3c,0x68e4f15e,0x5bdfb6df,0x49a56143,0x5f823d97,0xa9bc1bd4,0xea111c2a,0xbceb5970,0xb269bbc4,0x366b455f,0xe9bc5d62,0x7cd85e1e,0x4f18b086,0xc743c41c,0x95294fb9,0xa4b40990 +.long 0x26ee8382,0x9c7c581d,0x359d638e,0xcf17dcc5,0xb728ae3d,0xee8273ab,0xf821f047,0x1d112926,0x50491a74,0x11498477,0xfde0dfb9,0x687fa761,0x7ea435ab,0x2c258022,0x91ce7e3f,0x6b8bdb94 +.long 0x3bf834aa,0x4c5b5dc9,0x4f6c7e4b,0x04371819,0x3736bcad,0xc284e00a,0x21ae8f8d,0x0d881118,0xf48c8e33,0xf9cf0f82,0xa1bf40db,0xa11fd075,0xdc2733e5,0xdceab0de,0x8e986bd7,0xc560a8b5 +.long 0x3929d097,0x48dd1fe2,0x92f188f1,0x3885b290,0xda6fcdac,0x0f2ae613,0xb662a46c,0x9054303e,0x0738042a,0xb6871e44,0xbdaf6449,0x98e6a977,0xd1c9df1b,0xd8bc0650,0x36e098f9,0xef3d6451 +.long 0xb6d72d28,0x03fbae82,0xf5d84080,0x77ca9db1,0xa58efc1c,0x8a112cff,0xc564cb4a,0x518d761c,0xf0d1b5ce,0x69b5740e,0xe9eb1785,0x717039cc,0x22f53382,0x3fe29f90,0x6bc7c95c,0x8e54ba56 +.long 0xf7f91d0f,0x9c806d8a,0xa82a5728,0x3b61b0f1,0x94d76754,0x4640032d,0x47d834c6,0x273eb5de,0x7b4e4d53,0x2988abf7,0xde401777,0xb7ce66bf,0x715071b3,0x9fba6b32,0xad3a1a98,0x82413c24 +.long 0xe0e8ad93,0x5b7fc8c4,0x5fab868d,0xb5679aee,0x2b3946f3,0xb1f9d2fa,0x5685b50a,0x458897dc,0x89d0caf3,0x1e98c930,0x78642e92,0x39564c5f,0x0dbdaf18,0x1b77729a,0x579e82e6,0xf9170722 +.long 0xe4515fa5,0x680c0317,0xfb0c790f,0xf85cff84,0x6d2e0765,0xc7a82aab,0x35c82b32,0x7446bca9,0x6d63184f,0x5de607aa,0x262803a6,0x7c1a46a8,0xaebe8035,0xd218313d,0xc73c51f8,0x92113ffd +.long 0x12e7e46c,0x4b38e083,0x56126bd5,0x69d0a37a,0x73c07e04,0xfb3f324b,0x8fda7267,0xa0c22f67,0x4d2c7d8f,0x8f2c0051,0xcbe2cae5,0xbc45ced3,0xa8f0f277,0xe1c6cf07,0x1eb99a98,0xbc392312 +.long 0x3cc8ac85,0x75537b7e,0xdd02753b,0x8d725f57,0xb737df2f,0xfd05ff64,0xf6d2531d,0x55fe8712,0x6ab6b01c,0x57ce04a9,0x7cd93724,0x69a02a89,0xcf86699b,0x4f82ac35,0x9cb4b232,0x8242d3ad +.long 0xd62105e5,0x713d0f65,0x2d29be61,0xbb222bfa,0x6cfbef09,0xf2f9a79e,0xd5d6782f,0xfc24d8d3,0xd4129967,0x5db77085,0xdc3c2a43,0xdb81c3cc,0x05d8d9a3,0x9d655fc0,0x54298026,0x3f5d057a +.long 0x88c54694,0x1157f56d,0x9b09573e,0xb26baba5,0x22adffd1,0x2cab03b0,0xdd69f383,0x60a412c8,0x54b25039,0xed76e98b,0x687e714d,0xd4ee67d3,0x7b00b594,0x87739648,0xc9ef709b,0xce419775 +.long 0x1c203a40,0x40f76f85,0xeafd8f91,0x30d352d6,0x95578dd2,0xaf196d3d,0x77cc3f3d,0xea4bb3d7,0xb98e782b,0x42a5bd03,0x0624920d,0xac958c40,0xfc56fcc8,0xb838134c,0x89572e5e,0x86ec4ccf +.long 0x9be47be0,0x69c43526,0xcb28fea1,0x323b7dd8,0x3a6c67e5,0xfa5538ba,0x1d378e46,0xef921d70,0x3c4b880e,0xf92961fc,0x98940a67,0x3f6f914e,0xfef0ff39,0xa990eb0a,0xf0eeff9c,0xa6c2920f +.long 0x51b8d9a3,0xca804166,0x0ffb0db1,0x42531bc9,0xaa82e7ce,0x72ce4718,0xdf574741,0x6e199913,0xd5d36946,0xd5f1b13d,0xf68f0194,0x8255dc65,0x8710d230,0xdc9df4cd,0x138c1988,0x3453c20f +.long 0x89a6ef01,0x9af98dc0,0x9857df85,0x4dbcc3f0,0x5c1ad924,0x34805601,0xd0493046,0x40448da5,0x4ee343e2,0xf629926d,0x90e8a301,0x6343f1bd,0x40815b3f,0xefc93491,0xde8f66fb,0xf882a423 +.long 0xe7db9f57,0x3a12d5f4,0x3c384c27,0x7dfba38a,0x6fc660b1,0x7a904bfd,0x2773b21c,0xeb6c5db3,0x1cdfe049,0xc350ee66,0x44540f29,0x9baac0ce,0xa5ec6aad,0xbc57b6ab,0x0a7c1baa,0x167ce8c3 +.long 0x53fb2b56,0xb23a03a5,0x4e057f78,0x6ce141e7,0x89e490d9,0x796525c3,0xa31a7e75,0x0bc95725,0x1220fd06,0x1ec56791,0x408b0bd6,0x716e3a3c,0xe8ebeba9,0x31cd6bf7,0xbee6b670,0xa7326ca6 +.long 0xcd090c43,0x3d9f851c,0xf12c3988,0x561e8f13,0x904b7be4,0x50490b6a,0x0410737b,0x61690ce1,0x0f009052,0x299e9a37,0xf026092e,0x258758f0,0xfdfcdc0f,0x9fa255f3,0xc0e1bcd2,0xdbc9fb1f +.long 0x24651840,0x35f9dd6e,0xa5c59abc,0xdca45a84,0xecca4938,0x103d396f,0xb97b3f29,0x4532da0a,0x1999a6bf,0xc4135ea5,0x5e6bf2ee,0x3aa9505a,0x3f5be093,0xf77cef06,0xa943152e,0x97d1a0f8 +.long 0x2e1c21dd,0x2cb0ebba,0x2c6797c4,0xf41b29fc,0xb300101f,0xc6e17321,0xd0d79a89,0x4422b0e9,0x92f1bfc4,0x49e4901c,0xe1e10ed9,0x06ab1f8f,0xdb2926b8,0x84d35577,0x356e8ec2,0xca349d39 +.long 0x343bf1a9,0x70b63d32,0x37d1a6b1,0x8fd3bd28,0x316865b4,0x0454879c,0xc458efa2,0xee959ff6,0x9706dc3f,0x0461dcf8,0x164e4b2e,0x737db0e2,0x2f8843c8,0x09262680,0x7745e6f6,0x54498bbc +.long 0xa29e24af,0x359473fa,0x70aa87a1,0xfcc3c454,0x00573ace,0xfd2c4bf5,0x28dd1965,0xb65b514e,0x2193e393,0xe46ae7cf,0xf5444d97,0x60e9a4e1,0x00ff38ed,0xe7594e96,0x0a0e0f02,0x43d84d2f +.long 0xee398a21,0x8b6db141,0xe3bcc5be,0xb88a56ae,0x373460ea,0x0a1aa52f,0x160bb19b,0x20da1a56,0x65bf0384,0xfb54999d,0x5d5a180e,0x71a14d24,0x21737b04,0xbc44db7b,0x01dd8e92,0xd84fcb18 +.long 0xfa44b479,0x80de937b,0x5c98fd4f,0x53505499,0x28f08727,0x1edb12ab,0xa5f3ef53,0x4c58b582,0x8327f246,0xbfb236d8,0x4d7df320,0xc3a3bfaa,0xb96024f2,0xecd96c59,0x7f4e0433,0xfc293a53 +.long 0x5acf6e10,0x5341352b,0xafe652c3,0xc50343fd,0x18577a7f,0x4af3792d,0xaf16823d,0xe1a4c617,0x33425d0a,0x9b26d0cd,0x9b7bc47f,0x306399ed,0x706bb20b,0x2a792f33,0x98111055,0x31219614 +.long 0x87f5d28b,0x864ec064,0x962277fd,0x11392d91,0xbb6aed5f,0xb5aa7942,0x47e799d9,0x080094dc,0x208ba19b,0x4afa588c,0x8512f284,0xd3e7570f,0x02f5799a,0xcbae64e6,0x514b9492,0xdeebe7ef +.long 0xe5c298ff,0x30300f98,0x3678361f,0x17f561be,0x98cb9a16,0xf52ff312,0x5562d490,0x6233c3bc,0x92e3a2cb,0x7bfa15a1,0xe6365119,0x961bcfd1,0x2c8c53b1,0x3bdd29bf,0x822844ba,0x739704df +.long 0x7e7b754b,0x7dacfb58,0xa806c9b9,0x23360791,0x23504452,0xe7eb88c9,0x852c1783,0x2983e996,0x958d881d,0xdd4ae529,0x262c7b3c,0x026bae03,0x960b52d1,0x3a6f9193,0x92696cfb,0xd0980f90 +.long 0xd5f30851,0x4c1f428c,0x2a4f6630,0x94dfed27,0xfc5d48a4,0x4df53772,0x933260ce,0xdd2d5a2f,0xd44cc7a5,0x574115bd,0xbd12533a,0x4ba6b20d,0x243057c9,0x30e93cb8,0x14de320e,0x794c486a +.long 0xf21496e4,0xe925d4ce,0xec696331,0xf951d198,0x3e8d812f,0x9810e2de,0x389294ab,0xd0a47259,0x0e3bab66,0x513ba2b5,0xabad306f,0x462caff5,0xaf04c49e,0xe2dc6d59,0xe0b84b0b,0x1aeb8750 +.long 0x2f7d0ca2,0xc034f12f,0xe06acf2f,0x6d2e8128,0x21facc2f,0x801f4f83,0xf40ef607,0xa1170c03,0x7805a99c,0xfe0a1d4f,0xcc26aba5,0xbde56a36,0x35531f40,0x5b1629d0,0x9afa6108,0xac212c2b +.long 0x15697be5,0x30a06bf3,0x2c63c7c1,0x6f0545dc,0x7ccdadaf,0x5d8cb842,0xac7015bb,0xd52e379b,0xf462c23e,0xc4f56147,0x46bc24b0,0xd44a4298,0xe2856d4f,0xbc73d23a,0x0832bcdf,0x61cedd8c +.long 0x99f241d7,0x60953556,0x001a349d,0xee4adbd7,0xaa89e491,0x0b35bf6a,0x136f7546,0x7f0076f4,0x9264da3d,0xd19a18ba,0x62a7a28b,0x6eb2d2cd,0x8761c971,0xcdba941f,0xa3be4a5d,0x1550518b +.long 0x57d0b70c,0xd0e8e2f0,0xcd133ba3,0xeea8612e,0x44416aec,0x814670f0,0x30775061,0x424db6c3,0x16213fd1,0xd96039d1,0x18a3478f,0xc61e7fa5,0xcb0c5021,0xa805bdcc,0x0cc616dd,0xbdd6f3a8 +.long 0x5d97f7e2,0x06009667,0xaf0bf4b6,0x31db0fc1,0x5491627a,0x23680ed4,0x7d741fb1,0xb99a3c66,0x36b1ff92,0xe9bb5f55,0x512b388d,0x29738577,0x50fcf263,0xdb8a2ce7,0x6c4f7b47,0x385346d4 +.long 0x31631f9e,0xbe86c5ef,0x03a57a29,0xbf91da21,0x7b23f821,0xc3b1f796,0x770db354,0x0f7d00d2,0xd8fe79da,0x8ffc6c3b,0xd525c996,0xcc5e8c40,0xcfff632a,0x4640991d,0x67112528,0x64d97e8c +.long 0x02f1cd1e,0xc232d973,0x1dd212a4,0xce87eacb,0xe69802f7,0x6e4c8c73,0x1fffddbd,0x12ef0290,0x1bcea6e2,0x941ec74e,0x3cb92cbb,0xd0b54024,0x7e8f9d05,0x809fb9d4,0xf2992aae,0x3bf16159 +.long 0xf8a7a838,0xad40f279,0x05615660,0x11aea631,0xa01f6fa1,0xbf52e6f1,0x3dc2aec9,0xef046995,0xd8080711,0x785dbec9,0x9fdedf76,0xe1aec60a,0xfa21c126,0xece797b5,0x05e52732,0xc66e898f +.long 0x08811fdb,0x39bb69c4,0x2fc7f082,0x8bfe1ef8,0x174f4138,0xc8e7a393,0xd58d1f98,0xfba8ad1d,0xbfd2fd5b,0xbc21d0ce,0x6ee60d61,0x0b839a82,0xafd22253,0xaacf7658,0xaae396b3,0xb526bed8 +.long 0x38564464,0xccc1bbc2,0x8c45bc73,0x9e3ff947,0x58188a78,0xcde9bca3,0xd73bf8f7,0x138b8ee0,0x4123c489,0x5c7e234c,0xfa643297,0x66e69368,0x39a15fa3,0x0629eeee,0xa9e2a927,0x95fab881 +.long 0xeafbb1e1,0xb2497007,0xe75b7a93,0xd75c9ce6,0xefb68d78,0x3558352d,0x223f6396,0xa2f26699,0xe469b17a,0xeb911ecf,0xe72d3ec2,0x62545779,0x82cb113f,0x8ea47de7,0x4e1fa98d,0xebe4b086 +.long 0x8cdfedb1,0xec2d5ed7,0xfe211a74,0xa535c077,0x11d244c5,0x9678109b,0xbe299a76,0xf17c8bfb,0xfb11fbc4,0xb651412e,0x94ab3f65,0xea0b5482,0x0cf78243,0xd8dffd95,0xce0361d4,0x2e719e57 +.long 0x304ddc5b,0x9007f085,0x4daba2ea,0x095e8c6d,0x3f9d28a9,0x5a33cdb4,0xe2283003,0x85b95cd8,0xb9744733,0xbcd6c819,0xfc7f5783,0x29c5f538,0xd59038e4,0x6c49b2fa,0x3bbe1018,0x68349cc1 +.long 0x21830ee5,0xcc490c1d,0xe9bfa297,0x36f9c4ee,0x48de1a94,0x58fd7294,0x4e8f2cdc,0xaadb13a8,0x81313dba,0x515eaaa0,0xc2152dd8,0xc76bb468,0xa653dbf8,0x357f8d75,0xb14ac143,0xe4d8c4d1 +.long 0xb055cb40,0xbdb8e675,0x977b5167,0x898f8e7b,0xb82fb863,0xecc65651,0x6d88f01f,0x56544814,0x263a75a9,0xb0928e95,0x1a22fcda,0xcfb6836f,0x3f3bd37c,0x651d14db,0xb6ad4664,0x1d3837fb +.long 0xff4f94ab,0x7c5fb538,0x6d7fb8f2,0x7243c712,0xa85c5287,0xef13d60c,0x4bb8dd1b,0x18cfb7c7,0x72908219,0x82f9bfe6,0x9d5144ab,0x35c4592b,0x9cf4b42f,0x52734f37,0x8c60ddc4,0x6bac55e7 +.long 0x94dea0f6,0xb5cd811e,0xe18cc1a3,0x259ecae4,0x15e660f8,0x6a0e836e,0x0e02bff2,0x6c639ea6,0x7e1026fd,0x8721b8cb,0x63261942,0x9e73b50b,0x77f01da3,0xb8c70974,0x8268f57f,0x1839e6a6 +.long 0x5150b805,0x571b9415,0xf92c7097,0x1892389e,0x4a084b95,0x8d69c18e,0xbe5b495c,0x7014c512,0x1b07523c,0x4780db36,0x2c1c64fa,0x2f6219ce,0x602c105a,0xc38b81b0,0x5dc8e360,0xab4f4f20 +.long 0xcf7d62d2,0x20d3c982,0x23ba8150,0x1f36e29d,0x92763f9e,0x48ae0bf0,0x1d3a7007,0x7a527e6b,0x581a85e3,0xb4a89097,0xdc158be5,0x1f1a520f,0x167d726e,0xf98db37d,0x1113e862,0x8802786e +.long 0x36f09ab0,0xefb2149e,0x4a10bb5b,0x03f163ca,0x06e20998,0xd0297045,0x1b5a3bab,0x56f0af00,0x70880e0d,0x7af4cfec,0xbe3d913f,0x7332a66f,0x7eceb4bd,0x32e6c84a,0x9c228f55,0xedc4a79a +.long 0xc55c4496,0xc37c7dd0,0x25bbabd2,0xa6a96357,0xadd7f363,0x5b7e63f2,0x2e73f1df,0x9dce3782,0xb2b91f71,0xe1e5a16a,0x5ba0163c,0xe4489823,0xf6e515ad,0xf2759c32,0x8615eecf,0xa5e2f1f8 +.long 0xabded551,0x74519be7,0xc8b74410,0x03d358b8,0x0e10d9a9,0x4d00b10b,0x28da52b7,0x6392b0b1,0x0b75c904,0x6744a298,0xa8f7f96c,0xc305b0ae,0x182cf932,0x042e421d,0x9e4636ca,0xf6fc5d50 +.long 0xd64cc78c,0x795847c9,0x9b6cb27b,0x6c50621b,0xdf8022ab,0x07099bf8,0xc04eda1d,0x48f862eb,0xe1603c16,0xd12732ed,0x5c9a9450,0x19a80e0f,0xb429b4fc,0xe2257f54,0x45460515,0x66d3b2c6 +.long 0x822e37be,0x6ca4f87e,0x253bda4e,0x73f237b4,0x41190aeb,0xf747f3a2,0x804cf284,0xf06fa36f,0xfc621c12,0x0a6bbb6e,0x40b80ec6,0x5d624b64,0x7ba556f3,0x4b072425,0x3e2d20a8,0x7fa0c354 +.long 0xe3229d41,0xe921fa31,0x94531bd4,0xa929c652,0xa6d38209,0x84156027,0x6bdb97bd,0xf3d69f73,0x16833631,0x8906d19a,0x03d51be3,0x68a34c2e,0x0e511cd8,0xcb59583b,0xfdc132a8,0x99ce6bfd +.long 0xffcdb463,0x3facdaaa,0x34a38b08,0x658bbc1a,0xf1a9078d,0x12a801f8,0x6ab855de,0x1567bcf9,0x3572359b,0xe08498e0,0x8659e68b,0xcf0353e5,0x7d23807c,0xbb86e9c8,0x2198e8a2,0xbc08728d +.long 0x453cadd6,0x8de2b7bc,0xbc0bc1f8,0x203900a7,0xa6abd3af,0xbcd86e47,0x8502effb,0x911cac12,0xec965469,0x2d550242,0x29e0017e,0x0e9f7692,0x65979885,0x633f078f,0x4cf751ef,0xfb87d449 +.long 0xfc25419a,0xe1790e4b,0x4bff3cfd,0x36467203,0x25b6e83f,0xc8db6386,0x6cad6fd2,0x6cc69f23,0x6bc68bb9,0x0219e45a,0x297f7334,0xe43d79b6,0x465dc97c,0x7d445368,0x2a0b949a,0x4b9eea32 +.long 0x6102d021,0x1b96c6ba,0x2f4461ea,0xeaafac78,0xc49f19a8,0xd4b85c41,0xcf538875,0x275c28e4,0xdd2e54e0,0x35451a9d,0x0605618b,0x6991adb5,0x7b36cd24,0x5b8b4bcd,0x56f37216,0x372a4f8c +.long 0xa6a5da60,0xc890bd73,0xdc4c9ff0,0x6f083da0,0xf0536e57,0xf4e14d94,0xaaec8243,0xf9ee1eda,0x8bdcf8e7,0x571241ec,0x0b041e26,0xa5db8271,0xe3fff040,0x9a0b9a99,0x7c271202,0xcaaf21dd +.long 0x4f0dd2e8,0xb4e2b2e1,0x0a377ac7,0xe77e7c4f,0x0d7a2198,0x69202c3f,0x28200eb8,0xf759b7ff,0xdcfe314e,0xc87526ed,0x53d5cf99,0xeb84c524,0x515138b6,0xb1b52ace,0x23fca3f4,0x5aa7ff8c +.long 0xb9791a26,0xff0b13c3,0xcdd58b16,0x960022da,0x57aad2de,0xdbd55c92,0xf30fe619,0x3baaaaa3,0x0d881efd,0x9a4b2346,0x46325e2a,0x506416c0,0x035c18d4,0x91381e76,0xf27817b0,0xb3bb68be +.long 0x5116f937,0x15bfb8bf,0xc1268943,0x7c64a586,0x8419a2c8,0x71e25cc3,0x8335f463,0x9fd6b0c4,0xe8ee0e0e,0x4bf0ba3c,0x298c21fa,0x6f6fba60,0xae66bee0,0x57d57b39,0x22672544,0x292d5130 +.long 0xbab093b3,0xf451105d,0x02839986,0x012f59b9,0x3474a89c,0x8a915802,0x2de03e97,0x048c919c,0x91071cd5,0xc476a2b5,0x034970a5,0x791ed89a,0xe1b7994b,0x89bd9042,0xa1057ffd,0x8eaf5179 +.long 0xd551ee10,0x6066e2a2,0x727e09a6,0x87a8f1d8,0x2c01148d,0x00d08bab,0x424f33fe,0x6da8e4f1,0xcf9a4e71,0x466d17f0,0x3bf5cb19,0xff502010,0xd062ecc0,0xdccf97d8,0x81d80ac4,0x80c0d9af +.long 0x033f2876,0xe87771d8,0x7d5cc3db,0xb0186ec6,0x3bc9bc1d,0x58e8bb80,0x6f6ef60e,0x4d1395cc,0x186244a0,0xa73c62d6,0x110a5b53,0x918e5f23,0x741b7eab,0xed4878ca,0xdbe03e51,0x3038d71a +.long 0xa93c3246,0x840204b7,0xa0b9b4cd,0x21ab6069,0xb1d64218,0xf5fa6e2b,0xf3d56191,0x1de6ad0e,0xff1929c7,0x570aaa88,0x640e87b5,0xc6df4c6b,0xc65f0ccc,0xde8a74f2,0xe6f6cc01,0x8b972fd5 +.long 0x0b846531,0x3fff36b6,0x10a5e475,0xba7e45e6,0x4145b6c5,0x84a1d10e,0x5e046d9d,0xf1f7f91a,0x44de90d7,0x0317a692,0xf199c15e,0x951a1d4a,0xc9d73deb,0x91f78046,0xfab8224f,0x74c82828 +.long 0xe7560b90,0xaa6778fc,0xa7e824ce,0xb4073e61,0xd642eba8,0xff0d693c,0x5dccef38,0x7ce2e57a,0x1df1ad46,0x89c2c789,0x098346fd,0x83a06922,0xda2fc177,0x2d715d72,0x85b6cf1d,0x7b6dd71d +.long 0x73fa9cb0,0xc60a6d0a,0x328bf5a9,0xedd3992e,0x832c8c82,0xc380ddd0,0xa2a0bf50,0xd182d410,0xd9a528db,0x7d9d7438,0xcaf53994,0xe8b1a0e9,0x0e19987c,0xddd6e5fe,0x190b059d,0xacb8df03 +.long 0x8300129f,0x53703a32,0x68c43bfd,0x1f637662,0x00e54051,0xbcbd1913,0x7bf5a8c5,0x812fcc62,0x29fb85da,0x3f969d5f,0x694759e8,0x72f4e00a,0x790726b7,0x426b6e52,0x3bdbb209,0x617bbc87 +.long 0x97aee317,0x511f8bb9,0xe81536a8,0x812a4096,0x3ac09b9b,0x137dfe59,0xba8c9a7a,0x0682238f,0xaeccb4bd,0x7072ead6,0x692ba633,0x6a34e9aa,0x6fff9d33,0xc82eaec2,0x1d4d2b62,0xfb753512 +.long 0x1d7aadab,0x1a0445ff,0xd5f6a67c,0x65d38260,0x91cfb26f,0x6e62fb08,0x5c7d91d6,0xef1e0fa5,0x33db72cd,0x47e7c7ba,0xfa7c74b2,0x017cbc09,0xf50a503c,0x3c931590,0x616baa42,0xcac54f60 +.long 0xb2369f0f,0x9b6cd380,0x23c76151,0x97d3a70d,0x9862a9c6,0x5f9dd6fc,0x12312f51,0x044c4ab2,0x834a2ddc,0x035ea0fd,0xcc7b826d,0x49e6b862,0x62fce490,0xb03d6883,0xb37e36e9,0x62f2497a +.long 0xc6458293,0x04b005b6,0xe8d10af7,0x36bb5276,0x8ee617b8,0xacf2dc13,0xb004b3d4,0x470d2d35,0xfeeb1b77,0x06790832,0x85657f9c,0x2bb75c39,0xc0f60004,0xd70bd4ed,0x219b018b,0xfe797ecc +.long 0x753aebcc,0x9b5bec2a,0xc939eca5,0xdaf9f3dc,0xd095ad09,0xd6bc6833,0xdaa4d2fc,0x98abdd51,0x8d168be5,0xd9840a31,0x2325a23c,0xcf7c10e0,0x7e6ecfaf,0xa5c02aa0,0xb5bfdf18,0x2462e7e6 +.long 0xa0cc3f12,0xab2d8a8b,0xbc672a29,0x68dd485d,0x596f2cd3,0x72039752,0xa0cf3d8d,0x5d3eea67,0xe6602671,0x810a1a81,0x14026c0c,0x8f144a40,0x76b50f85,0xbc753a6d,0x645cd4a4,0xc4dc21e8 +.long 0x521d0378,0xc5262dea,0x05011c6f,0x802b8e0e,0x0b4c19ea,0x1ba19cbb,0xebf0aaec,0x21db64b5,0x70342f9d,0x1f394ee9,0x1bc44a14,0x93a10aee,0x3efd0baa,0xa7eed31b,0x1d154e65,0x6e7c824e +.long 0x9966e7ee,0xee23fa81,0x05b7920d,0x64ec4aa8,0x2d90aad4,0x2d44462d,0xdf277ad5,0xf44dd195,0xbb46b6a1,0x8d6471f1,0xfd885090,0x1e65d313,0x13a977b4,0x33a800f5,0x0797e1ef,0xaca9d721 +.long 0xfcff6a17,0x9a5a85a0,0x1eca7cee,0x9970a3f3,0xc9504be3,0xbb9f0d6b,0xadd24ee2,0xe0c504be,0x77fcc2f4,0x7e09d956,0x65bb5fc4,0xef1a5227,0x8b9286aa,0x145d4fb1,0x6649028b,0x66fd0c5d +.long 0x1bf4581c,0x98857ceb,0xaca7b166,0xe635e186,0x659722ac,0x278ddd22,0x1db68007,0xa0903c4c,0x48f21402,0x366e4589,0xb96abda2,0x31b49c14,0xe0403190,0x329c4b09,0xd29f43fe,0x97197ca3 +.long 0x274983d8,0x8073dd1e,0x55717c8f,0xda1a3bde,0x0361f9d1,0xfd3d4da2,0x4c7de1ce,0x1332d081,0xaa6d0e10,0x9b7ef7a3,0xf54f1c4a,0x17db2e73,0x4cd35567,0xaf3dffae,0xe56f4e71,0xaaa2f406 +.long 0x7ace3fc7,0x8966759e,0x45a8d8c6,0x9594eacf,0x91834e0e,0x8de3bd8b,0x548c0421,0xafe4ca53,0xe6ee81c6,0xfdd7e856,0x6b891a3a,0x8f671beb,0xfae63829,0xf7a58f2b,0x9c11ac9f,0x9ab186fb +.long 0x10b5be76,0x8d6eb369,0xfb040bcd,0x046b7739,0xcb73de88,0xccb4529f,0xcf26be03,0x1df0fefc,0xbcfcd027,0xad7757a6,0xbb3165ca,0xa8786c75,0x7e99a4d9,0xe9db1e34,0xb06c504b,0x99ee86df +.long 0xc15c9f0a,0x5b7c2ddd,0x4295989e,0xdf87a734,0x03d08fda,0x59ece47c,0xad5fc702,0xb074d3dd,0x51a03776,0x20407903,0x2a608007,0x2bb1f77b,0xe1153185,0x25c58f4f,0x766e6447,0xe6df62f6 +.long 0xed51275a,0xefb3d1be,0x2f0f483f,0x5de47dc7,0x97c2bedf,0x7932d98e,0x0219f8a1,0xd5c11927,0xa73a294e,0x9d751200,0x9dc20172,0x5f88434a,0xa26f506a,0xd28d9fd3,0x9d1dcd48,0xa890cd31 +.long 0x70f4d3b4,0x0aebaec1,0x0ffc8d00,0xfd1a1369,0x57d57838,0xb9d9c240,0x68bac361,0x45929d26,0x25b15ca6,0x5a2cd060,0x6e474446,0x4b3c83e1,0xee1e5134,0x1aac7578,0xc91e2f41,0xa418f5d6 +.long 0x213ed68b,0x6936fc8a,0x510a5224,0x860ae7ed,0xdef09b53,0x63660335,0xcd79c98d,0x641b2897,0x01110f35,0x29bd38e1,0x648b1937,0x79c26f42,0x9d9164f4,0x64dae519,0x0265c273,0xd85a2310 +.long 0x4b07e2b1,0x7173dd5d,0x8d9ea221,0xd144c4cb,0x1105ab14,0xe8b04ea4,0xfe80d8f1,0x92dda542,0xcf03dce6,0xe9982fa8,0x1a22cffc,0x8b5ea965,0x3fad88c4,0xf7f4ea7f,0x6a5ba95c,0x62db773e +.long 0x93f24567,0xd20f02fb,0x315257ca,0xfd46c69a,0x8bcab987,0x0ac74cc7,0x5ceca2f5,0x46f31c01,0x888b219e,0x40aedb59,0xe1fccd02,0xe50ecc37,0x911f816c,0x1bcd9dad,0x8db9b00c,0x583cc1ec +.long 0xa483bf11,0xf3cd2e66,0xb1b2c169,0xfa08a6f5,0x4be9fa28,0xf375e245,0x5b6d011f,0x99a7ffec,0xc4ae62da,0x6a3ebddb,0x374aef5d,0x6cea00ae,0x9d4d05bc,0xab5fb98d,0xd560f252,0x7cba1423 +.long 0x208490de,0x49b2cc21,0xbcfb2879,0x1ca66ec3,0x1b6fb16f,0x7f1166b7,0x65fe5db3,0xfff63e08,0x8b2610be,0xb8345abe,0x39de3df4,0xb732ed80,0x211c32b4,0x0e24ed50,0x848ff27d,0xd10d8a69 +.long 0xed4de248,0xc1074398,0x10488927,0xd7cedace,0x85673e13,0xa4aa6bf8,0x6daf30af,0xb46bae91,0xfcef7ad8,0x07088472,0xd4b35e97,0x61151608,0xdde29986,0xbcfe8f26,0xd5a34c79,0xeb84c4c7 +.long 0x164e1214,0xc1eec55c,0xa147bb03,0x891be86d,0x0ba96835,0x9fab4d10,0xa5c1ae9f,0xbf01e9b8,0xb186ebc0,0x6b4de139,0x85b91bca,0xd5c74c26,0xc2d93854,0x5086a99c,0xa7a9dfbc,0xeed62a7b +.long 0x76b7618a,0x8778ed6f,0x03b66062,0xbff750a5,0xb65186db,0x4cb7be22,0xcc3a6d13,0x369dfbf0,0x7191a321,0xc7dab26c,0x40ed718e,0x9edac3f9,0xd0cfd183,0xbc142b36,0x7c991693,0xc8af82f6 +.long 0x97ce0b2a,0xb3d1e4d8,0xc3a55cdf,0xe6d7c87f,0x68b81afe,0x35846b95,0xd3c239d8,0x018d12af,0x01206e15,0x2b2c6208,0xa3b882c6,0xe0e42453,0xa50162d5,0x854470a3,0x7017a62a,0x08157478 +.long 0x820357c7,0x18bd3fb4,0x6f1458ad,0x992039ae,0x25b44aa1,0x9a1df3c5,0xed3d5281,0x2d780357,0xc77ad4d4,0x58cf7e4d,0xf9df4fc4,0xd49a7998,0x1d71205e,0x4465a8b5,0x649254aa,0xa0ee0ea6 +.long 0xab7bd771,0x4b5eeecf,0x35c262b9,0x6c873073,0x3c9d61e7,0xdc5bd648,0x321460d2,0x233d6d54,0xfc195bcc,0xd20c5626,0x04d78b63,0x25445958,0x17ec8ef3,0xe03fcb3d,0x46b8f781,0x54b690d1 +.long 0x21230646,0x82fa2c8a,0x084f418c,0xf51aabb9,0x1a30ba43,0xff4fbec1,0x743c9df7,0x6a5acf73,0xd635b4d5,0x1da2b357,0xecd5c1da,0xc3de68dd,0xd61af0dd,0xa689080b,0xd665bf99,0xdea5938a +.long 0xfe637294,0x0231d71a,0xa5a81cd8,0x01968aa6,0x048e63b5,0x11252d50,0x6ca007e9,0xc446bc52,0x96d6134b,0xef8c50a6,0x9e09a05c,0x9361fbf5,0xdca3291a,0xf17f85a6,0xff251a21,0xb178d548 +.long 0xa4df3915,0x87f6374b,0x2fd5d608,0x566ce1bf,0x7de35102,0x425cba4d,0x58c5d5e2,0x6b745f8f,0x63122edf,0x88402af6,0x3b989a89,0x3190f9ed,0xebba3156,0x4ad3d387,0xc7c469a5,0xef385ad9 +.long 0x3f642c29,0xb08281de,0x910ffb88,0x20be0888,0xd5292546,0xf353dd4a,0x8377a262,0x3f1627de,0xeefcd638,0xa5faa013,0x74cc77c3,0x8f3bf626,0xa348f55e,0x32618f65,0x9fefeb9e,0x5787c0dc +.long 0xd9a23e44,0xf1673aa2,0x4e10690d,0x88dfa993,0x2bf91108,0x1ced1b36,0x3af48649,0x9193ceca,0x2d738fc5,0xfb34327d,0x975fee6c,0x6697b037,0xc04079a5,0x2f485da0,0x2feaa1ac,0x2cdf5735 +.long 0xbd55659e,0x76944420,0x4376090c,0x7973e32b,0x163b591a,0x86bb4fe1,0xc196f0ca,0x10441aed,0x045ad915,0x3b431f4a,0xa4afacb1,0x6c11b437,0x71fdbbd8,0x30b0c7db,0xeda65acd,0xb642931f +.long 0x9c92b235,0x4baae6e8,0x6b3993a1,0xa73bbd0e,0x693dd031,0xd06d60ec,0x7156881c,0x03cab91b,0x1db3574b,0xd615862f,0x64bb061a,0x485b0185,0xa0181e06,0x27434988,0xc1c0c757,0x2cd61ad4 +.long 0x2ff9f403,0x3effed5a,0x62239029,0x8dc98d8b,0x1f17b70d,0x2206021e,0xbf510015,0xafbec0ca,0x80130dfa,0x9fed7164,0x8a02dcf5,0x306dc2b5,0xfeb10fc0,0x48f06620,0x5a57cf51,0x78d1e1d5 +.long 0x192ef710,0xadef8c5a,0x3b7431f9,0x88afbd4b,0x64250c9e,0x7e1f7407,0xb58bec07,0x6e31318d,0x24f89b4e,0xfd4fc4b8,0x48c36a2a,0x65a5dd88,0xf024baa7,0x4f1eccff,0xcba94650,0x22a21cf2 +.long 0x42a554f7,0x95d29dee,0x002ec4ba,0x828983a5,0x8badb73d,0x8112a1f7,0xa27c1839,0x79ea8897,0xd065fd83,0x8969a5a7,0xb262a0bc,0xf49af791,0xaf2b5127,0xfcdea8b6,0x564c2dbc,0x10e913e1 +.long 0xbc21ef51,0x51239d14,0x4ce57292,0xe51c3ceb,0x47bbcc3b,0x795ff068,0xbd7e11e6,0x86b46e1e,0x80041ef4,0x0ea6ba23,0x6262342e,0xd72fe505,0x31d294d4,0x8abc6dfd,0x1278c2c9,0xbbe017a2 +.long 0xb389328a,0xb1fcfa09,0xd01771b5,0x322fbc62,0x60b045bf,0x04c0d063,0x10e52d01,0xdb652edc,0x03ec6627,0x50ef932c,0xc1ee50e3,0xde1b3b2d,0xdc37a90d,0x5ab7bdc5,0x31e33a96,0xfea67213 +.long 0x4f2999aa,0x6482b5cb,0xb8cbf0dd,0x38476cc6,0x173405bb,0x93ebfacb,0xe52369ec,0x15cdafe7,0xd935b7db,0xd42d5ba4,0x1c99a4cd,0x648b6004,0xa3b5545b,0x785101bd,0x9dd67faf,0x4bf2c38a +.long 0x4442449c,0xb1aadc63,0x33ad4fb8,0xe0e9921a,0xaa686d82,0x5c552313,0x465d866c,0xdee635fa,0x18ee6e8a,0xbc3c224a,0xed42e02f,0xeed748a6,0xd474cd08,0xe70f930a,0xfff24adf,0x774ea6ec +.long 0xf3480d4a,0x03e2de1c,0xbc8acf1a,0xf0d8edc7,0x68295a9c,0xf23e3303,0xc546a97d,0xfadd5f68,0x96f8acb1,0x895597ad,0x671bdae2,0xbddd49d5,0x21dd43f4,0x16fcd528,0x6619141a,0xa5a45412 +.long 0xc360e25a,0x8ce9b6bf,0x075a1a78,0xe6425195,0x481732f4,0x9dc756a8,0x5432b57a,0x83c0440f,0xd720281f,0xc670b3f1,0xd135e051,0x2205910e,0xdb052be7,0xded14b0e,0xc568ea39,0x697b3d27 +.long 0xfb3ff9ed,0x2e599b9a,0x17f6515c,0x28c2e0ab,0x474da449,0x1cbee4fd,0x4f364452,0x071279a4,0x01fbe855,0x97abff66,0x5fda51c4,0x3ee394e8,0x67597c0b,0x190385f6,0xa27ee34b,0x6e9fccc6 +.long 0x14092ebb,0x0b89de93,0x428e240c,0xf17256bd,0x93d2f064,0xcf89a7f3,0xe1ed3b14,0x4f57841e,0xe708d855,0x4ee14405,0x03f1c3d0,0x856aae72,0xbdd7eed5,0xc8e5424f,0x73ab4270,0x3333e4ef +.long 0xdda492f8,0x3bc77ade,0x78297205,0xc11a3aea,0x34931b4c,0x5e89a3e7,0x9f5694bb,0x17512e2e,0x177bf8b6,0x5dc349f3,0x08c7ff3e,0x232ea4ba,0xf511145d,0x9c4f9d16,0x33b379c3,0xccf109a3 +.long 0xa1f25897,0xe75e7a88,0xa1b5d4d8,0x7ac6961f,0x08f3ed5c,0xe3e10773,0x0a892dfb,0x208a54ec,0x78660710,0xbe826e19,0x237df2c8,0x0cf70a97,0xed704da5,0x418a7340,0x08ca33fd,0xa3eeb9a9 +.long 0x169bca96,0x49d96233,0x2da6aafb,0x04d286d4,0xa0c2fa94,0xc09606ec,0x23ff0fb3,0x8869d0d5,0xd0150d65,0xa99937e5,0x240c14c9,0xa92e2503,0x108e2d49,0x656bf945,0xa2f59e2b,0x152a733a +.long 0x8434a920,0xb4323d58,0x622103c5,0xc0af8e93,0x938dbf9a,0x667518ef,0x83a9cdf2,0xa1843073,0x5447ab80,0x350a94aa,0xc75a3d61,0xe5e5a325,0x68411a9e,0x74ba507f,0x594f70c5,0x10581fc1 +.long 0x80eb24a9,0x60e28570,0x488e0cfd,0x7bedfb4d,0xc259cdb8,0x721ebbd7,0xbc6390a9,0x0b0da855,0xde314c70,0x2b4d04db,0x6c32e846,0xcdbf1fbc,0xb162fc9e,0x33833eab,0xb0dd3ab7,0x9939b48b +.long 0xcb0c9c8c,0x5aaa98a7,0x81c4375c,0x75105f30,0x5ef1c90f,0xceee5057,0xc23a17bf,0xb31e065f,0xd4b6d45a,0x5364d275,0x62ec8996,0xd363f3ad,0x4391c65b,0xb5d21239,0xebb41b47,0x84564765 +.long 0x37107c78,0x20d18ecc,0x570c2a66,0xacff3b6b,0x9bd0d845,0x22f975d9,0xba178fa0,0xef0a0c46,0x76b6028e,0x1a419651,0x248612d4,0xc49ec674,0x7338af55,0x5b6ac4f2,0x7bee5a36,0x06145e62 +.long 0xe75746b5,0x33e95d07,0xc40c78be,0x1c1e1f6d,0x222ff8e2,0x967833ef,0xb49180ad,0x4bedcf6a,0x3d7a4c8a,0x6b37e9c1,0x6ddfe760,0x2748887c,0xaa3a5bbc,0xf7055123,0x7bbb8e74,0x954ff225 +.long 0x97c3dfb9,0xc42b8ab1,0xcf168154,0x55a549b0,0xc1b50692,0xad6748e7,0x6fc5cbcb,0x2775780f,0xe1c9d7c8,0x4eab80b8,0x3fdbcd56,0x8c69dae1,0x9969eace,0x47e6b4fb,0xa705cb5a,0x002f1085 +.long 0x6d3fea55,0x4e23ca44,0xf4810568,0xb4ae9c86,0x2a62f27d,0x47bfb91b,0xd9bac28c,0x60deb4c9,0x7de6c34c,0xa892d894,0x4494587d,0x4ee68259,0x1a3f8a5b,0x914ee14e,0x28700385,0xbb113eaa +.long 0x2115b4c9,0x81ca03b9,0x8908cad1,0x7c163d38,0xaa18179a,0xc912a118,0x886e3081,0xe09ed750,0x26f516ca,0xa676e3fa,0x8e732f91,0x753cacf7,0x833da8b4,0x51592aea,0x4cbea8aa,0xc626f42f +.long 0xa7b56eaf,0xef9dc899,0x34ef7316,0x00c0e52c,0xfe818a86,0x5b1e4e24,0xc538be47,0x9d31e20d,0x3ed68974,0x22eb932d,0x7c4e87c4,0xe44bbc08,0x0dde9aef,0x4121086e,0x134f4345,0x8e6b9cff +.long 0x711b0eb9,0x96892c1f,0x780ab954,0xb905f2c8,0xa20792db,0xace26309,0x0684e126,0xec8ac9b3,0xb40a2447,0x486ad8b6,0x9fe3fb24,0x60121fc1,0x1a8e3b3f,0x5626fccf,0x6ad1f394,0x4e568622 +.long 0x196aa5a1,0xda7aae0d,0x1041b5fb,0xe0df8c77,0x26b318b7,0x451465d9,0x7ab136e9,0xc29b6e55,0x71148463,0x2c2ab48b,0x64454a76,0xb5738de3,0x5a03abe4,0x54ccf9a0,0x0427d58e,0x377c0296 +.long 0x2bb39c1f,0x73f5f0b9,0xe608d8c5,0x14373f2c,0x00fbb805,0xdcbfd314,0x83afdcfb,0xdf18fb20,0x42b3523f,0x81a57f42,0x87f650fb,0xe958532d,0x8b0a7d7c,0xaa8dc8b6,0x150166be,0x1b75dfb7 +.long 0x2d7d1413,0x90e4f7c9,0x9834f597,0x67e2d6b5,0xa808c3e8,0x4fd4f4f9,0xd5281ec1,0xaf8237e0,0x84687cee,0x25ab5fdc,0xa5b26c09,0xc5ded6b1,0xc8ea7650,0x8e4a5aec,0x14cc417f,0x23b73e5c +.long 0x3037bf52,0x2bfb4318,0x78c725d7,0xb61e6db5,0xbbb3e5d7,0x8efd4060,0xdbac488e,0x2e014701,0x360aa449,0xac75cf9a,0x79634d08,0xb70cfd05,0xfffb15ef,0xa591536d,0xd07c106c,0xb2c37582 +.long 0xf50225f9,0xb4293fdc,0xb0e12b03,0xc52e175c,0xd0a8bf64,0xf649c3ba,0xeb8ae3c6,0x745a8fef,0x58321bc3,0x30d7e5a3,0x0bc4df48,0xb1732be7,0xe9ea5058,0x1f217993,0x3e4fd745,0xf7a71cde +.long 0x894c5bbb,0x86cc533e,0x69d83082,0x6915c7d9,0x5815c244,0xa6aa2d05,0x49b22ce5,0xaeeee592,0x78135486,0x89e39d13,0x16b76f2f,0x3a275c1f,0xe036e8f5,0xdb6bcc1b,0x5e4709f5,0x4df69b21 +.long 0x2d0f39aa,0xa188b250,0x15a85947,0x622118bb,0xfde0f4fa,0x2ebf520f,0x4860e539,0xa40e9f29,0x22b57f0f,0x7b6a51eb,0x7e80644a,0x849a33b9,0x1cf095fe,0x50e5d16f,0xec55f002,0xd754b54e +.long 0x236f4a98,0x5cfbbb22,0x066800bb,0x0b0c59e9,0x5a9a7774,0x4ac69a8f,0xd6bec948,0x2b33f804,0x32e6c466,0xb3729295,0x4e599c73,0x68956d0f,0x155c31cc,0xa47a249f,0xe1ce284e,0x24d80f0d +.long 0x988baf01,0xcd821dfb,0xdbb16647,0xe6331a7d,0x094cb960,0x1eb8ad33,0xc91bbca5,0x593cca38,0x26567456,0x384aac8d,0xc04b6490,0x40fa0309,0xdab6c8f6,0x97834cd6,0x3f91e55f,0x68a7318d +.long 0xfc4d3157,0xa00fd04e,0x2bf3bdea,0xb56f8ab2,0x4fa57172,0x014f5648,0x450abdb3,0x948c5860,0x0ebd4f08,0x342b5df0,0x0e82938e,0x3e5168cd,0xb0df5dd0,0x7aedc1ce,0xe5732516,0x6bbbc6d9 +.long 0x605daaa6,0xc7bfd486,0xbb9a6c9e,0x46fd72b7,0xa124fb89,0xe4847fb1,0xa2d8ffbc,0x75959cbd,0xc8a588ee,0x42579f65,0xb80b499d,0x368c92e6,0x999a5df1,0xea4ef6cd,0x936fe604,0xaa73bb7f +.long 0x6457d188,0xf347a70d,0x8b7a388b,0x86eda86b,0x0ccd6013,0xb7cdff06,0xd0053fb2,0xbeb1b6c7,0x99240a9f,0x0b022387,0x776189b2,0x1bbb384f,0x9066193a,0x8695e71e,0x06ffac7e,0x2eb50097 +.long 0x4a7d2caa,0x0654a9c0,0xa5aaa290,0x6f3fb3d1,0xff476e8f,0x835db041,0xc42295e4,0x540b8b0b,0x05e214f5,0xa5c73ac9,0x56a0b638,0x9a74075a,0xce9e680b,0x2e4b1090,0x6b8d9afa,0x57a5b479 +.long 0x26bfe65c,0x0dca48e7,0x7290c307,0x097e391c,0x6669e72e,0x683c462e,0x062559ac,0xf505be1e,0xe3a3035a,0x5fbe3ea1,0x9cd50da8,0x6431ebf6,0x1f6407f2,0xfd169d5c,0x60fce6b8,0x8d838a95 +.long 0x650006f0,0x2a2bfa7f,0x50c0fbb2,0xdfd7dad3,0xccf9ad96,0x92452495,0xd95635f9,0x183bf494,0x4a7bd989,0x02d5df43,0xa5431095,0x505385cc,0xfd43f53e,0xdd98e67d,0x500c34a9,0xd61e1a6c +.long 0x4a8a3d62,0x5a4b46c6,0x247743d2,0x8469c4d0,0x88f7e433,0x2bb3a13d,0x01be5849,0x62b23a10,0xa63d1a4c,0xe83596b4,0x7d183f3e,0x454e7fea,0x17afb01c,0x643fce61,0x1c4c3638,0x4e65e5e6 +.long 0xef74c45b,0x41d85ea1,0xae328506,0x2cfbfa66,0x3ada7da9,0x98b078f5,0xec752fbb,0xd985fe37,0x5a0148b4,0xeece68fe,0x2d78136d,0x6f9a55c7,0xd2b729ce,0x232dccc4,0x90aafbc4,0xa27e0dfd +.long 0x12b4603e,0x96474452,0x6b706d14,0xa876c551,0x69a9d412,0xdf145fcf,0x2d479c34,0xe2ab75b7,0x1a23ff97,0x12df9a76,0x5d359d10,0xc6138992,0xfa835f22,0x6e51c7ae,0xc0fcc4d9,0x69a79cb1 +.long 0x594cc7e1,0xf57f350d,0x3350ab79,0x3079ca63,0x9aff594a,0x226fb614,0x6d59a62b,0x35afec02,0x06ed2c6e,0x9bee46f4,0x7d939a57,0x58da1735,0x8fd1797e,0x44c50402,0x5ccea6ca,0xd8853e7c +.long 0xa35fcd5f,0x4065508d,0x495ccaeb,0x8965df8c,0x12e1a962,0x0f2da850,0xc1cf1cc4,0xee471b94,0x0a08fb75,0xcef19bc8,0x81de3591,0x704958f5,0x3aef4f88,0x2867f8b2,0xea9f9a5f,0x8d749384 +.long 0x8c9049f4,0x1b385537,0x7b92d8b6,0x5be948f3,0xb6e2bd6b,0xd96f725d,0x958c454d,0x37a222bc,0x8809bf61,0xe7c61abb,0x1346f18d,0x46f07fbc,0xe87c0d1c,0xfb567a7a,0x7ef3d07a,0x84a461c8 +.long 0xd9278d98,0x0a5adce6,0x9dfc73e1,0x24d94813,0x054321c3,0x4f3528b6,0x692ea706,0x2e03fdde,0x47b533c0,0x10e60619,0x2ca3c055,0x1a8bc73f,0x1bb62b8f,0xae58d4b2,0x584a24e3,0xb2045a73 +.long 0xbd76e195,0x3ab3d5af,0x6938a810,0x478dd1ad,0x6ee3d5cb,0x6ffab393,0x22b361e4,0xdfb693db,0x51dbf1a7,0xf9694496,0x08a2e762,0xcab4b4ef,0xd39bba9a,0xe8c92f25,0xf1464d96,0x850e61bc +.long 0xdc09508b,0xb7e830e3,0x74317655,0xfaf6d2cf,0xdf690355,0x72606ceb,0xd0c3ded6,0x48bb92b3,0x5c7cf892,0x65b75484,0xd5d5f01f,0xf6cd7ac9,0x96401d69,0xc2c30a59,0xed921878,0x91268650 +.long 0xb78c558f,0x380bf913,0xc8afdaa9,0x43c0baeb,0x54f169d3,0x377f61d5,0xae5ff20b,0xf8da07e3,0xa8a90ea8,0xb676c49d,0x83a29b21,0x81c1ff2b,0x2ad8d276,0x383297ac,0xba89f982,0x3001122f +.long 0x6718e448,0xe1d794be,0x7c3e6e13,0x246c1482,0x5d26b5ef,0x56646ef8,0x88069cdd,0x80f5091e,0x724bdd38,0xc5992e2f,0x8471e8c7,0x02e915b4,0x0d0ff2a9,0x96ff320a,0x4384d1a0,0xbf886487 +.long 0xc93f72d6,0xbbe1e6a6,0xcad800ea,0xd5f75d12,0xe7acf117,0xfa40a09f,0x7581a355,0x32c8cdd5,0x7023c499,0x74221992,0x38ec3901,0xa8afe5d7,0xa90e83f0,0x5691afcb,0x0b8f8eac,0x41bcaa03 +.long 0x8d2668d5,0xe38b5ff9,0x7ad81965,0x0715281a,0x03c6ce11,0x1bc8fc7c,0x8b650436,0xcbbee6e2,0x0cdb9808,0x06b00fe8,0xfe3ed315,0x17d6e066,0x4d0b5018,0x2e9d38c6,0x844dcaef,0xab8bfd56 +.long 0x513aed8b,0x42894a59,0x314bd07a,0xf77f3b6d,0x8e42b582,0xbbdecb8f,0xd2390fe6,0xf10e2fa8,0x62a2f201,0xefb95022,0x50ee32b0,0x4d59ea50,0x6da789a8,0xd87f7728,0xf79492c4,0xcf98a2cf +.long 0x720943c2,0xf9577239,0x3990b9d0,0xba044cf5,0x95f2884a,0x5aa8e823,0x0278a0af,0x834de6ed,0x5f25bd12,0xc8e1ee9a,0x6f7ab271,0x9259ceaa,0x77d00b76,0x7e6d97a2,0xa437832a,0x5c0c6eea +.long 0x5606b81d,0x5232c20f,0x0d991ee5,0xabd7b375,0x8632d951,0x4d2bfe35,0x98ed9364,0x78f85146,0xf30c3282,0x951873f0,0xa789230b,0x0da8ac80,0x5398967f,0x3ac7789c,0xbdda0fb5,0xa69b8f7f +.long 0x6add8545,0xe5db7717,0x72c49b66,0x1b71cb66,0x68421d77,0xd8560739,0x83e3afea,0x03840fe8,0x1ec69977,0xb391dad5,0x307f6726,0xae243fb9,0xe8ca160c,0xc88ac87b,0x4ce355f4,0x5174cced +.long 0xe58ba37d,0x98a35966,0x7817335d,0xfdcc8da2,0x83fbc7bf,0x5b752830,0xd9c96984,0x68e419d4,0x02a40380,0x409a39f4,0x1fe977bc,0x88940faf,0x8f8edea6,0xc640a94b,0xed11547d,0x1e22cd17 +.long 0x59ffc3e2,0xe28568ce,0xc1dee4e7,0x60aa1b55,0x837cb363,0xc67497c8,0x105a2bf2,0x06fb438a,0x500d8e20,0x30357ec4,0x0670db10,0x1ad9095d,0xc73b7cfd,0x7f589a05,0x880d6d28,0xf544607d +.long 0xa20ef103,0x17ba93b1,0x6ba6577b,0xad859130,0x6fa214a0,0x65c91cf6,0x27990da5,0xd7d49c6c,0x20bb569d,0xecd9ec8d,0xeeffbc33,0xbd4b2502,0x6bed0467,0x2056ca5a,0x5b63728c,0x7916a1f7 +.long 0x53a4f566,0xd4f9497d,0x97b56810,0x89734664,0x0494a621,0xf8e1da74,0x8d011c68,0x82546a93,0xc61ac162,0x1f3acb19,0xabad0d3e,0x52f8fa9c,0xb4b7ea43,0x15356523,0xae608125,0x5a16ad61 +.long 0x4faed184,0xb0bcb87f,0x5029f45f,0x5f236b1d,0x0bc6b1fc,0xd42c7607,0x68aefce3,0xc644324e,0x5c5d8446,0x8e191d59,0x13ae1979,0xc0208077,0x3ba59cc7,0xadcaee55,0xa2cb81ba,0x20ed6d6b +.long 0xb6efcffc,0x0952ba19,0x97c0b87c,0x60f12d68,0x9caa30bc,0x4ee2c7c4,0x97fbff4e,0x767238b7,0x501b5d92,0xebc73921,0xc2a37737,0x3279e3df,0x6d197543,0x9fc12bc8,0x0a40db4e,0xfa94dc6f +.long 0x530ccbbd,0x7392b41a,0xea823525,0x87c82146,0x05d98d0c,0xa52f984c,0x5ef6974c,0x2ae57d73,0x3042a6dd,0x9377f7bf,0x19647a64,0xb1a007c0,0x0cca9767,0xfaa9079a,0xf68f72d5,0x3d81a25b +.long 0xff81578e,0x752067f8,0x9045447d,0x78622150,0x0505aa6f,0xc0c22fcf,0x6bed1c77,0x1030f0a6,0x1f0bd739,0x31f29f15,0xe6debe85,0x2d7989c7,0x8e677e98,0x5c070e72,0x06e81fd5,0x0a817bd3 +.long 0xb0f2ac95,0xc110d830,0xab20e64e,0x48d0995a,0x7729cd9a,0x0f3e00e1,0xdd556946,0x2a570c20,0x4e86214d,0x912dbcfd,0xcf615498,0x2d014ee2,0x3530d76e,0x55e2b1e6,0xfd0fd6d1,0xc5135ae4 +.long 0xd4f3049f,0x0066273a,0xe7087477,0xbb8e9893,0x14c6e5fd,0x2dba1ddb,0x51f57e6c,0xdba37886,0x5a72f2cf,0x5aaee0a6,0x7bea5642,0x1208bfbf,0x67872c37,0xf5c6aa3b,0x43f93224,0xd726e083 +.long 0x061f1658,0x1854daa5,0xdf0cd2b3,0xc0016df1,0x833d50de,0xc2a3f23e,0xbbbd3017,0x73b681d2,0x3ac343c0,0x2f046dc4,0x85716421,0x9c847e7d,0x0917eed4,0xe1e13c91,0x63a1b9c6,0x3fc9eebd +.long 0x7fe02299,0x0f816a72,0x294f3319,0x6335ccc2,0x4745c5be,0x3820179f,0x922f066e,0xe647b782,0x02cafb8a,0xc22e49de,0xfcc2eccc,0x299bc2ff,0x6e0e8282,0x9a8feea2,0xfe893205,0xa627278b +.long 0x7933e47b,0xa7e19733,0x2e766402,0xf4ff6b13,0x98440d9f,0xa4d8be0a,0x38938808,0x658f5c2f,0xc95b3b3e,0x90b75677,0x3137b6ff,0xfa044269,0x43c47c29,0x077b039b,0x8a6445b2,0xcca95dd3 +.long 0x2333fc4c,0x0b498ba4,0xf736a1b1,0x274f8e68,0x5f1d4b2e,0x6ca348fd,0xa8f10199,0x24d3be78,0xca14f530,0x8535f858,0x5b982e51,0xa6e7f163,0x36e1bf62,0x847c8512,0x03448418,0xf6a7c58e +.long 0xf9374ab6,0x583f3703,0x6e564145,0x864f9195,0x22526d50,0x33bc3f48,0x1262a496,0x9f323c80,0x3f046a9a,0xaa97a7ae,0xdf8a039a,0x70da183e,0x52aa0ba6,0x5b68f71c,0x21459c2d,0x9be0fe51 +.long 0xcbc613e5,0xc1e17eb6,0x497ea61c,0x33131d55,0xaf7eded5,0x2f69d39e,0xde6af11b,0x73c2f434,0xa4a375fa,0x4ca52493,0xb833c5c2,0x5f06787c,0x3e6e71cf,0x814e091f,0x8b746666,0x76451f57 +.long 0x694db7e0,0x80f9bdef,0xb9fcddc6,0xedca8787,0x03b8dce1,0x51981c34,0x70e10ba1,0x4274dcf1,0x6def6d1a,0xf72743b8,0xebdb1866,0xd25b1670,0x050c6f58,0xc4491e8c,0x87fbd7f5,0x2be2b2ab +.long 0xd111f8ec,0x3e0e5c9d,0xb7c4e760,0xbcc33f8d,0xbd392a51,0x702f9a91,0xc132e92d,0x7da4a795,0x0bb1151b,0x1a0b0ae3,0x02e32251,0x54febac8,0x694e9e78,0xea3a5082,0xe4fe40b8,0xe58ffec1 +.long 0xd1e0cf9e,0xf85592fc,0xc0e7b2e8,0xdea75f0d,0xc135584e,0xc04215cf,0x2f57092a,0x174fc727,0xeb930bea,0xe7277877,0x5eb02a5a,0x504caccb,0xf5241b9b,0xf9fe08f7,0x8d5ca954,0xe7fb62f4 +.long 0x29c4120b,0xfbb8349d,0xc0d0d915,0x9f94391f,0x5410ba51,0xc4074fa7,0x150a5911,0xa66adbf6,0x34bfca38,0xc164543c,0xb9e1ccfc,0xe0f27560,0xe820219c,0x99da0f53,0xc6b4997a,0xe8234498 +.long 0x9d4c5423,0xcfb88b76,0xb0521c49,0x9e56eb10,0xbe8700a1,0x418e0b5e,0xf93cb58a,0x00cbaad6,0xd92a5e67,0xe923fbde,0x1f347f11,0xca4979ac,0x6bc0585b,0x89162d85,0xac3c70e3,0xdd6254af +.long 0x516e19e4,0x7b23c513,0xc5c4d593,0x56e2e847,0x5ce71ef6,0x9f727d73,0xf79a44c5,0x5b6304a6,0x3ab7e433,0x6638a736,0xfe742f83,0x1adea470,0x5b7fc19f,0xe054b854,0xba1d0698,0xf935381a +.long 0x799e9a74,0x546eab2d,0xa949f729,0x96239e0e,0x7090055a,0xca274c6b,0x9020c9b0,0x835142c3,0xa2e8807f,0xa405667a,0x1aa3d39e,0x29f2c085,0x42fc72f5,0xcc555d64,0xfbeacb3c,0xe856e0e7 +.long 0x918e4936,0xb5504f9d,0xb2513982,0x65035ef6,0x6f4d9cb9,0x0553a0c2,0xbea85509,0x6cb10d56,0xa242da11,0x48d957b7,0x672b7268,0x16a4d3dd,0x8502a96b,0x3d7e637c,0x730d463b,0x27c7032b +.long 0xe4136a14,0xbdc02b18,0x678e32bf,0xbacf969d,0xdd9c3c03,0xc98d89a3,0x23becc4f,0x7b92420a,0xc64d565c,0xd4b41f78,0x10f28295,0x9f969d00,0xb13d051a,0xec7f7f76,0xa92da585,0x08945e1e +.long 0x5846426f,0x55366b7d,0x247d441d,0xe7d09e89,0x736fbf48,0x510b404d,0xe784bd7d,0x7fa003d0,0x17fd9596,0x25f7614f,0x35cb98db,0x49e0e0a1,0x2e83a76a,0x2c65957b,0xcddbe0f8,0x5d40da8d +.long 0x050bad24,0xf2b8c405,0xc2aa4823,0x8918426d,0xa38365a7,0x2aeab3dd,0x7c91b690,0x72031717,0x60a94120,0x8b00d699,0xe99eaeec,0x478a255d,0x6f60aafd,0xbf656a5f,0x5dee77b3,0xdfd7cb75 +.long 0xa595939d,0x37f68bb4,0x28740217,0x03556479,0x84ad7612,0x8e740e7c,0x9044695f,0xd89bc843,0x85a9184d,0xf7f3da5d,0x9fc0b074,0x562563bb,0xf88a888e,0x06d2e6aa,0x161fbe7c,0x612d8643 +.long 0xf64085e7,0x465edba7,0x29aa8511,0xb230f304,0xcda2d188,0x53388426,0x4b666649,0x90885735,0x652f54f6,0x6f02ff9a,0x5fae2bf0,0x65c82294,0x62f5eee3,0x7816ade0,0xfcc56d70,0xdcdbdf43 +.long 0x54530bb2,0x9fb3bba3,0xcb0869ea,0xbde3ef77,0x0b431163,0x89bc9046,0xe4819a35,0x4d03d7d2,0x43b6a782,0x33ae4f9e,0x9c88a686,0x216db307,0x00ffedd9,0x91dd88e0,0x12bd4840,0xb280da9f +.long 0x1635e741,0x32a7cb8a,0x78be02a7,0xfe14008a,0x1b7ae030,0x3fafb334,0x5add0ce9,0x7fd508e7,0xd607ad51,0x72c83219,0x8d40964a,0x0f229c0a,0x1c878da2,0x1be2c336,0xeab2ab86,0xe0c96742 +.long 0x3e538cd7,0x458f8691,0x8e08ad53,0xa7001f6c,0xbf5d15ff,0x52b8c6e6,0x011215dd,0x548234a4,0x3d5b4045,0xff5a9d2d,0x4a904190,0xb0ffeeb6,0x48607f8b,0x55a3aca4,0x30a0672a,0x8cbd665c +.long 0x42583068,0x87f834e0,0xf3f6e683,0x02da2aeb,0x05c12248,0x6b763e5d,0x65a8aefc,0x7230378f,0x71e8e5ca,0x93bd80b5,0xb3b62524,0x53ab041c,0x6c9c552e,0x1b860513,0xd5524e66,0xe84d402c +.long 0xf37f5937,0xa37f3573,0xd1e4fca5,0xeb0f6c7d,0xac8ab0fc,0x2965a554,0x274676ac,0x17fbf56c,0xacf7d720,0x2e2f6bd9,0x10224766,0x41fc8f88,0x85d53bef,0x517a14b3,0x7d76a7d1,0xdae327a5 +.long 0xc4818267,0x6ad0a065,0x37c1bbc1,0x33aa189b,0x27392a92,0x64970b52,0x2d1535ea,0x21699a1c,0xc2d7a7fd,0xcd20779c,0x99c83cf2,0xe3186059,0x72c0b8c7,0x9b69440b,0x7b9e0e4d,0xa81497d7 +.long 0x1f5f82dc,0x515d5c89,0x6361079e,0x9a7f67d7,0x11a35330,0xa8da81e3,0x4b18be1b,0xe44990c4,0xaf103e59,0xc7d5ed95,0x8dac9261,0xece8aba7,0x9394b8d3,0xbe82b099,0x16adfe83,0x6830f09a +.long 0x88172d01,0x250a29b4,0xcaff9e02,0x8b20bd65,0xe8a6329a,0xb8a7661e,0xd3fce920,0x4520304d,0x2b47f7ef,0xae45da1f,0x5bffc540,0xe07f5288,0x3464f874,0xf7997009,0xa6fa1f38,0x2244c2cd +.long 0x94d7d9b1,0x43c41ac1,0xc82e7f17,0x5bafdd82,0x5fda0fca,0xdf0614c1,0xa8ae37ad,0x74b043a7,0x9e71734c,0x3ba6afa1,0x9c450f2e,0x15d5437e,0x67e242b1,0x4a5883fe,0x2c1953c2,0x5143bdc2 +.long 0xfc5e8920,0x542b8b53,0x9a9cee08,0x363bf9a8,0xc3486e08,0x02375f10,0x8c5e70d2,0x2037543b,0x625640b4,0x7109bccc,0x8bc62c3b,0xcbc1051e,0x803f26ea,0xf8455fed,0xeb372424,0x6badceab +.long 0x6b53f5f9,0xa2a9ce7c,0x1b176d99,0x64246595,0xb95c081b,0xb1298d36,0x1d9a9ee6,0x53505bb8,0xf2ba70b0,0x3f6f9e61,0x8afad453,0xd07e16c9,0xe7eb4a6a,0x9f1694bb,0x3cb0bc8e,0xdfebced9 +.long 0x53868c8b,0x92d3dcdc,0x386107a6,0x174311a2,0x689b4e64,0x4109e07c,0x2df3dcb6,0x30e4587f,0x0811b3b2,0x841aea31,0x0cce43ea,0x6144d41d,0x2a9a7803,0x464c4581,0x3e158930,0xd03d371f +.long 0xb1f3390b,0xc676d7f2,0xa5b61272,0x9f7a1b8c,0xc2e127a9,0x4ebebfc9,0x5dd997bf,0x4602500c,0x4711230f,0x7f09771c,0x020f09c1,0x058eb37c,0xfee5e38b,0xab693d4b,0x4653cbc0,0x9289eb1f +.long 0xd51b9cf5,0xbecf46ab,0x9f0121af,0xd2aa9c02,0xe90dc274,0x36aaf7d2,0x48b95a3c,0x909e4ea0,0x6f32dbdb,0xe6b70496,0x8b030b3e,0x672188a0,0xcfb617e2,0xeeffe5b3,0x7c82709e,0x87e947de +.long 0x1770f5a7,0xa44d2b39,0x0e44eb82,0xe4d4d791,0x3f69712a,0x42e69d1e,0xac6a820e,0xbf11c4d6,0x42c4224c,0xb5e7f3e5,0x449d941c,0xd6b4e81c,0x5450e878,0x5d72bd16,0xee25ac54,0x6a61e28a +.long 0xe6f1cd95,0x33272094,0x0d18673f,0x7512f30d,0x5afc1464,0x32f7a4ca,0x6bbb977b,0x2f095656,0xa8226200,0x586f47ca,0x1ac07369,0x02c868ad,0xc613acbe,0x4ef2b845,0x0386054c,0x43d7563e +.long 0xab952578,0x54da9dc7,0x26e84d0b,0xb5423df2,0x9b872042,0xa8b64eeb,0x5990f6df,0xac205782,0x21f4c77a,0x4ff696eb,0xaab273af,0x1a79c3e4,0x9436b3f1,0x29bc922e,0xd6d9a27a,0xff807ef8 +.long 0x778f22a0,0x82acea3d,0x5b5e7469,0xfb10b2e8,0x2818ee7d,0xc0b16980,0xc91c1a2f,0x011afff4,0xad124418,0x95a6d126,0xe72e295f,0x31c081a5,0xf2f4db75,0x36bb283a,0x7acef462,0xd115540f +.long 0x33f6746c,0xc7f3a8f8,0xfea990ca,0x21e46f65,0xcaddb0a9,0x915fd5c5,0x78614555,0xbd41f016,0x426ffb58,0x346f4434,0x14dbc204,0x80559436,0x5a969b7f,0xf3dd20fe,0xe899a39a,0x9d59e956 +.long 0x8ad4cf4b,0xf1b0971c,0x2ffb8fb8,0x03448860,0x65340ba4,0xf071ac3c,0xb27fd758,0x408d0596,0x98c364b0,0xe7c78ea4,0x051e8ab5,0xa4aac4a5,0x485d9002,0xb9e1d560,0x88844455,0x9acd518a +.long 0xd06f56c0,0xe4ca688f,0xdf027972,0xa48af70d,0x5e9a609d,0x691f0f04,0xee61270e,0xa9dd82cd,0xa0ef18d3,0x8903ca63,0x3d6ca3bd,0x9fb7ee35,0xabf47d03,0xa7b4a09c,0x1c67de8e,0x4cdada01 +.long 0x9355a244,0x52003749,0x4f2151a9,0xe77fd2b6,0x66b4efcb,0x695d6cf6,0xda2cfe25,0xc5a0cacf,0xef811865,0x104efe5c,0x9ea5cc3d,0xf52813e8,0x40b58dbc,0x855683dc,0x175fcb11,0x0338ecde +.long 0x74921592,0xf9a05637,0xb9bb9d31,0xb4f1261d,0x4e9c5459,0x551429b7,0x6ea71f53,0xbe182e6f,0xdfc50573,0xd3a3b07c,0x62be8d44,0x9ba1afda,0x52ab65d3,0x9bcfd2cb,0xa9571802,0xdf11d547 +.long 0x02a2404a,0x099403ee,0x21088a71,0x497406f4,0x5004ae71,0x99479409,0xa812c362,0xbdb42078,0xd8828442,0x2b72a30f,0xfcb5ed1c,0x283add27,0x66a40015,0xf7c0e200,0x08b295ef,0x3e3be641 +.long 0xe038a675,0xac127dc1,0x8c5c6320,0x729deff3,0xa90d2c53,0xb7df8fd4,0x681e7cd3,0x9b74b0ec,0xdab407e5,0x5cb5a623,0x76b340c6,0xcdbd3615,0x7d28392c,0xa184415a,0xe96f7830,0xc184c1d8 +.long 0x81d3a80f,0xc3204f19,0xc8e02432,0xfde0c841,0x8149e0c1,0x78203b3e,0x08053a73,0x5904bdbb,0x101b6805,0x30fc1dd1,0x49aa6d49,0x43c223bc,0x7a174087,0x9ed67141,0xd5997008,0x311469a0 +.long 0x5e43fc61,0xb189b684,0xe0d3ab57,0xf3282375,0xb1181da8,0x4fa34b67,0x99ee52b8,0x621ed0b2,0xad990676,0x9b178de1,0x56d54065,0xd51de67b,0x7538c201,0x2a2c27c4,0x38a40f5c,0x33856ec8 +.long 0xbe6cdcde,0x2522fc15,0x9f0c6f89,0x1e603f33,0x103e30a6,0x7994edc3,0x220c853e,0x033a00db,0xf7bb7fd7,0xd3cfa409,0x462d18f6,0x70f8781e,0x687fe295,0xbbd82980,0x595669f3,0x6eef4c32 +.long 0x2f7e85c3,0x86a9303b,0x71988f9b,0x5fce4621,0xc138acb5,0x5b935bf6,0x25661212,0x30ea7d67,0xe51ab9a2,0xef1eb5f4,0xae067c78,0x0587c98a,0x77ca9ca6,0xb3ce1b3c,0x54b5f057,0x2a553d4d +.long 0x4da29ec2,0xc7898236,0xb9c57316,0xdbdd5d13,0x2cd80d47,0xc57d6e6b,0xfe9e7391,0x80b460cf,0xf963c31e,0x98648cab,0xcc4d32fd,0x67f9f633,0xfdf7c687,0x0af42a9d,0x0b015ea7,0x55f292a3 +.long 0xcd21ab3d,0x89e468b2,0xc393d392,0xe504f022,0xa5013af9,0xab21e1d4,0xc2c28acb,0xe3283f78,0x226bf99f,0xf38b35f6,0x0e291e69,0xe8354274,0xb20c162d,0x61673a15,0xb04fbdbe,0xc101dc75 +.long 0x255bd617,0x8323b4c2,0x6c2a9154,0x6c969693,0x62679387,0xc6e65860,0xb8c88e23,0x8e01db0c,0x893a5559,0x33c42873,0x47a3e149,0x7630f04b,0xddcf35f8,0xb5d80805,0x77dfe732,0x582ca080 +.long 0x0b1894a0,0x2c7156e1,0xd81c68c0,0x92034001,0xc8b115b5,0xed225d00,0x83b907f2,0x237f9c22,0x4470e2c0,0x0ea2f32f,0x58be4e95,0xb725f7c1,0xb1ae5463,0x0f1dcafa,0x1ba2fc04,0x59ed5187 +.long 0xd0115d4d,0xf6e0f316,0xd3691599,0x5180b12f,0x527f0a41,0x157e32c9,0xa8e0ecc0,0x7b0b081d,0xbf4f0dd0,0x6dbaaa8a,0x4d252696,0x99b289c7,0xdbf864fe,0x79b7755e,0x76cad3ab,0x6974e2b1 +.long 0x06ddd657,0x35dbbee2,0x2ff3a96d,0xe7cbdd11,0x076be758,0x88381968,0x08c91f5d,0x2d737e72,0x86ec3776,0x5f83ab62,0x945fa7a1,0x98aa649d,0x72ef0933,0xf477ec37,0x098c17b1,0x66f52b1e +.long 0xd803738b,0x9eec58fb,0xe4e86aa4,0x91aaade7,0xa5b51492,0x6b1ae617,0xbbc45974,0x63272121,0x862c5129,0x7e0e28f0,0x3321a4a0,0x0a8f79a9,0x5041c88f,0xe26d1664,0x53233e3a,0x0571b805 +.long 0xc9520711,0xd1b0ccde,0x3c8b84bf,0x55a9e4ed,0xa1fef314,0x9426bd39,0x6eb93f2b,0x4f5f638e,0x2bf9341b,0xba2a1ed3,0x4d42d5a9,0xd63c1321,0x316dc7c5,0xd2964a89,0xca511851,0xd1759606 +.long 0xf9e6ed35,0xd8a9201f,0x6736925a,0xb7b5ee45,0x99581af7,0x0a83fbbc,0x64eeb051,0x3076bc40,0x02dec312,0x5511c98c,0x238dcb78,0x270de898,0x539c08c9,0x2cf4cf9c,0x38d3b06e,0xa70cb65e +.long 0xcfe57bbd,0xb12ec10e,0x35a0c2b5,0x82c7b656,0x161c67bd,0xddc7d5cd,0xae3a32cc,0xe32e8985,0xd11a5529,0x7aba9444,0x2427fa1a,0xe964ed02,0x24a1770a,0x1528392d,0x12c72fcd,0xa152ce2c +.long 0x8ec07649,0x714553a4,0x459dd453,0x18b4c290,0x7b64b110,0xea32b714,0x2e6f07a2,0xb871bfa5,0x9e2e3c9b,0xb67112e5,0x44aa90f6,0xfbf250e5,0xbd539006,0xf77aedb8,0xd172a66f,0x3b0cdf9a +.long 0xf8c51187,0xedf69fea,0x741e4da7,0x05bb67ec,0x08114345,0x47df0f32,0xbb9792b1,0x56facb07,0x8f6229e4,0xf3e007e9,0x526fba0f,0x62d103f4,0xb0339d79,0x4f33bef7,0xb59bfec1,0x9841357b +.long 0xc34e6705,0xfa8dbb59,0x7fdaa84c,0xc3c7180b,0xa4108537,0xf95872fc,0x932a3e5a,0x8750cc3b,0xb7275d7d,0xb61cc69d,0x2e59b2e9,0xffa0168b,0x6ecbb493,0xca032abc,0x2c9082d8,0x1d86dbd3 +.long 0xe28ef5ba,0xae1e0b67,0xcb18e169,0x2c9a4699,0x1e6bbd20,0x0ecd0e33,0xaf5e81d2,0x571b360e,0x101c1d45,0xcd9fea58,0x18880452,0x6651788e,0x1f8dd446,0xa9972635,0xe37281d0,0x44bed022 +.long 0x33da525d,0x094b2b2d,0x13144fd8,0xf193678e,0xf4c1061d,0xb8ab5ba4,0xdccbe0f4,0x4343b5fa,0x63812713,0xa8702371,0xf7611d93,0x47bf6d2d,0xbd21e1d7,0x46729b8c,0xd629e77d,0x7484d4e0 +.long 0x60dbac1f,0x830e6eea,0xda06a2f7,0x23d8c484,0x50ca535b,0x896714b0,0xebd97a9b,0xdc8d3644,0xb12177b4,0x106ef9fa,0x534d5d9c,0xf79bf464,0xa6ab360b,0x2537a349,0xa00c744f,0xc7c54253 +.long 0xe5911a76,0xb3c7a047,0x647f1ee7,0x61ffa5c8,0x8f56ab42,0x15aed36f,0xa3ff9ac9,0x6a0d41b0,0xcc30d357,0x68f469f5,0x6b72be96,0xbe9adf81,0x903ad461,0x1cd926fe,0xcaca441b,0x7e89e38f +.long 0xfacf69d4,0xf0f82de5,0x4775344c,0x363b7e76,0xb2e36d04,0x6894f312,0x11d1c9a5,0x3c6cb4fe,0x4008e1f2,0x85d9c339,0x249f326c,0x5e9a85ea,0x678c5e06,0xdc35c60a,0x9f86fba9,0xc08b944f +.long 0x89f71f0f,0xde40c02c,0xff3da3c0,0xad8f3e31,0x42125ded,0x3ea5096b,0xa7379183,0x13879cbf,0x6b306a0b,0x6f4714a5,0x67646c5e,0x359c2ea6,0x07726368,0xfacf8943,0x65ff431e,0x07a58935 +.long 0x68754ab0,0x24d661d1,0x6f429a76,0x801fce1d,0xa58ce769,0xc068a85f,0x5d5eca2b,0xedc35c54,0xa3f660d1,0xea31276f,0xb8fc7167,0xa0184ebe,0x1d8db0ae,0x0f20f21a,0x56c35e12,0xd96d095f +.long 0xf8c2a25b,0xedf402b5,0x059204b6,0x1bb772b9,0x19b4e34c,0x50cbeae2,0x3fa0845a,0x93109d80,0x8ef59fb5,0x54f7ccf7,0x88070963,0x3b438fe2,0x31f3ba9b,0x9e28c659,0xead9da92,0x9cc31b46 +.long 0xb733aa5f,0x3c2f0ba9,0xf05af235,0xdece47cb,0xa2ac82a5,0xf8e3f715,0x2203f18a,0xc97ba641,0x09c11060,0xc3af5504,0x46af512d,0x56ea2c05,0xf3f28146,0xfac28daf,0x959ef494,0x87fab43a +.long 0xd4c5105f,0x09891641,0x6d7fbd65,0x1ae80f8e,0xbee6bdb0,0x9d67225f,0x7fc4d860,0x3b433b59,0x93e85638,0x44e66db6,0xe3e9862f,0xf7b59252,0x665c32ec,0xdb785157,0xae362f50,0x702fefd7 +.long 0x0fefb0c3,0x3754475d,0x46d7c35d,0xd48fb56b,0x363798a4,0xa070b633,0x8fdb98e6,0xae89f3d2,0x6363d14c,0x970b89c8,0x67abd27d,0x89817521,0x44d5a021,0x9bf7d474,0xcac72aee,0xb3083baf +.long 0xbe949a44,0x389741de,0x546a4fa5,0x638e9388,0xa0047bdc,0x3fe6419c,0xaaea57ca,0x7047f648,0x41fbab17,0x54e48a90,0x576bdba2,0xda8e0b28,0xc72afddc,0xe807eebc,0xf42577bf,0x07d3336d +.long 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5,0x21d324f6,0x61d587d4,0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e,0x4621efbe,0xfa11fe12 +.long 0x81685d7b,0x047b772e,0xbf34a976,0x23f27d81,0x915f48ef,0xc27608e2,0xa521d5c3,0x3b0b43fa,0x63ca7284,0x7613fb26,0x1d4db837,0x7f5729b4,0x583b526b,0x87b14898,0xbbadd3d1,0x00b732a6 +.long 0x2048e396,0x8e02f426,0x383d9de4,0x436b50b6,0x471e85ad,0xf78d3481,0xd005c8d6,0x8b01ea6a,0x97015c07,0xd3c7afee,0x4e3ba2ae,0x46cdf1a9,0x83d3a1d2,0x7a42e501,0xb541dff4,0xd54b5268 +.long 0x4e23e9bc,0x3f24cf30,0x126e3624,0x4387f816,0x3b0b6d61,0x26a46a03,0x8b2d777c,0xaf1bc845,0x527de79c,0x25c401ba,0x4261bbb6,0x0e1346d4,0x287b4bc7,0x4b96c44b,0x5254562f,0x658493c7 +.long 0xb8a24a20,0x23f949fe,0xf52ca53f,0x17ebfed1,0xbcfb4853,0x9b691bbe,0x6278a05d,0x5617ff6b,0xe3c99ebd,0x241b34c5,0x1784156a,0xfc64242e,0x695d67df,0x4206482f,0xee27c011,0xb967ce0e +.long 0x21c80b5d,0x65db3751,0xa31ecca0,0x2e7a563c,0x5238a07e,0xe56ffc4e,0x32ced854,0x3d6c2966,0xaf70b885,0xe99d7d1a,0x2d686459,0xafc3bad9,0x0cc8ba5b,0x9c78bf46,0x18955aa3,0x5a439519 +.long 0x5fe4e314,0xf8b517a8,0xfcb8906f,0xe60234d0,0xf2061b23,0xffe542ac,0x6b4cb59c,0x287e191f,0x09d877d8,0x21857ddc,0x14678941,0x1c23478c,0xb6e05ea4,0xbbf0c056,0xb01594fe,0x82da4b53 +.long 0xfadb8608,0xf7526791,0x7b74cdf6,0x049e832d,0xc2b90a34,0xa43581cc,0x9360b10c,0x73639eb8,0xe1e4a71b,0x4fba331f,0x8072f919,0x6ffd6b93,0x65679032,0x6e53271c,0xf14272ce,0x67206444 +.long 0xb2335834,0xc0f734a3,0x90ef6860,0x9526205a,0x04e2bb0d,0xcb8be717,0x02f383fa,0x2418871e,0x4082c157,0xd7177681,0x29c20073,0xcc914ad0,0xe587e728,0xf186c1eb,0x61bcd5fd,0x6fdb3c22 +.long 0xf2f9f8e9,0x30d014a6,0x4fec49d2,0x963ece23,0x9605a8d9,0x862025c5,0x19f8929a,0x39874445,0x12bf476a,0x01b6ff65,0x09cf7d91,0x598a64d8,0x93be56ca,0xd7ec7749,0xcbb33615,0x10899785 +.long 0x02eee3ad,0xb8a092fd,0x30145270,0xa86b3d35,0x8512b675,0x323d98c6,0x62ebb40f,0x4b8bc785,0x413f9cde,0x7d301f54,0x2bab5664,0xa5e4fb4f,0x1cbfec23,0x1d2b252d,0xe177120d,0xfcd576bb +.long 0x83731a34,0x04427d3e,0xed836e8e,0x2bb9028e,0xb612ca7c,0xb36acff8,0xd3d9c73a,0xb88fe5ef,0xedea4eb3,0xbe2a6bc6,0x488eec77,0x43b93133,0xb17106e1,0xf41ff566,0x654efa32,0x469e9172 +.long 0x41c23fa3,0xb4480f04,0xc1989a2e,0xb4712eb0,0x93a29ca7,0x3ccbba0f,0xd619428c,0x6e205c14,0xb3641686,0x90db7957,0x45ac8b4e,0x0432691d,0xf64e0350,0x07a759ac,0x9c972517,0x0514d89c +.long 0xa8e67fc3,0x1701147f,0xab2085be,0x9e2e0b8b,0xac284e57,0xd5651824,0x74893664,0x890d4325,0xc55e68a3,0x8a7c5e6e,0x4339c85a,0xbf12e90b,0xf922b655,0x31846b85,0x0bf4d700,0x9a54ce4d +.long 0xf1a14295,0xd7f4e83a,0xb285d4f9,0x916f955c,0x99ffdaba,0xe57bb0e0,0xeab0d152,0x28a43034,0xb8a9cef8,0x0a36ffa2,0xb9ec051a,0x5517407e,0xea68e672,0x9c796096,0xfb3c77fb,0x853db5fb +.long 0xe864a51a,0x21474ba9,0x6e8a1b8b,0x6c267699,0x94120a28,0x7c823626,0x8383a5db,0xe61e9a48,0x9f84216d,0x7dd75003,0xad43cd85,0xab020d07,0xda12c659,0x9437ae48,0xe65452ad,0x6449c2eb +.long 0x2cf9d7c1,0xcc7c4c1c,0xee95e5ab,0x1320886a,0xbeae170c,0xbb7b9056,0xdbc0d662,0xc8a5b250,0xc11d2303,0x4ed81432,0x1f03769f,0x7da66912,0x84539828,0x3ac7a5fd,0x3bccdd02,0x14dada94 +.long 0x7ef6b0d1,0x8b84c321,0x7c933f22,0x52a9477a,0xfd440b82,0x5ef6728a,0x6ce4bd5e,0x5c3bd859,0xf22c2d3e,0x918b80f5,0xb7bb6cc5,0x368d5040,0x2695a11c,0xb66142a1,0xeb19ea70,0x60ac583a +.long 0x0eab2437,0x317cbb98,0x5e2654c8,0x8cc08c55,0xe6d8307f,0xfe2d6520,0x57428993,0xe9f147f3,0xd2fd6cf1,0x5f9c7d14,0x2d4fcbb0,0xa3ecd064,0x8e7341f7,0xad83fef0,0x3a63115c,0x643f23a0 +.long 0xe65ab743,0xd38a78ab,0x35edc89c,0xbf7c75b1,0x530df568,0x3dd8752e,0xe308c682,0xf85c4a76,0xe68acf37,0x4c9955b2,0xab32af85,0xa544df3d,0xa25cf493,0x4b8ec3f5,0x1a622feb,0x4d8f2764 +.long 0xf0dcbc49,0x7bb4f7aa,0x70bbb45b,0x7de551f9,0x9f2ca2e5,0xcfd0f3e4,0x1f5c76ef,0xece58709,0x167d79ae,0x32920edd,0xfa7d7ec1,0x039df8a2,0xbb30af91,0xf46206c0,0x22676b59,0x1ff5e2f5 +.long 0x6ea51d66,0x11f4a039,0x807d7a26,0x506c1445,0x755a9b24,0x60da5705,0x1f1a319e,0x8fc8cc32,0x9433d67d,0x83642d4d,0x6a7dd296,0x7fa5cb8f,0x9b7bde07,0x576591db,0x419716fb,0x13173d25 +.long 0xd5b340ff,0xea30599d,0xb0fe76c5,0xfc6b5297,0xab8f5adc,0x1c6968c8,0x901c928d,0xf723c7f5,0x9773d402,0x4203c321,0x1b51dd47,0xdf7c6aa3,0x552be23c,0x3d49e37a,0x0b5a6e87,0x57febee8 +.long 0x7bd8e739,0xc5ecbee4,0xae63bf75,0x79d44994,0x38fb8923,0x168bd00f,0xd0533130,0x75d48ee4,0xdb5cdf33,0x554f77aa,0x3c696769,0x3396e896,0xd3fd674e,0x2fdddbf2,0x99d0e3e5,0xbbb8f6ee +.long 0xcbae2f70,0x51b90651,0x93aaa8eb,0xefc4bc05,0xdd1df499,0x8ecd8689,0x22f367a5,0x1aee99a8,0xae8274c5,0x95d485b9,0x7d30b39c,0x6c14d445,0xbcc1ef81,0xbafea90b,0xa459a2ed,0x7c5f317a +.long 0x4ef44227,0x01211075,0xdc20f496,0xa17bed6e,0x819853cd,0x0cdfe424,0xf71e2ce7,0x13793298,0xdbbe307b,0x3c1f3078,0x76ee9936,0x6dd1c20e,0x423caa20,0x23ee4b57,0x8efb840e,0x4ac3793b +.long 0xed1f8ca0,0x934438eb,0x4ebb25a2,0x3e546658,0xc069896f,0xc415af0e,0x9a5aa43d,0xc13eddb0,0xd49eb8f6,0x7a04204f,0xd74f1670,0xd0d5bdfc,0x56fc0558,0x3697e286,0x01cebade,0x10207371 +.long 0x0647a82b,0x5f87e690,0x8f40054f,0x908e0ed4,0x79853803,0xa9f633d4,0x4a28b252,0x8ed13c9a,0x1f460f64,0x3e2ef676,0x36d06336,0x53930b9b,0x8fc4979b,0x347073ac,0x5ecd5597,0x84380e0e +.long 0xc4fe3c39,0xe3b22c6b,0x6c7bebdf,0xba4a8153,0x25693459,0xf23ab6b7,0x14922b11,0x53bc3770,0x5afc60db,0x4645c8ab,0x20b9f2a3,0xaa022355,0xce0fc507,0x52a2954c,0x7ce1c2e7,0x8c2731bb +.long 0x18a0339d,0xf39608ab,0x3735436c,0xac7a658d,0xcd992b4f,0xb22c2b07,0xf40dcfd4,0x4e83daec,0x2f39ea3e,0x8a34c7be,0xb0a56d2e,0xef0c005f,0x6edd8038,0x62731f6a,0x4e3cb075,0x5721d740 +.long 0xfbeeee1b,0x1ea41511,0xef1d0c05,0xd1ef5e73,0x73c07d35,0x42feefd1,0x8a329493,0xe530a00a,0xf15ebfb0,0x5d55b7fe,0xd322491a,0x549de03c,0x745b3237,0xf7b5f602,0x1ab6e2b6,0x3632a3a2 +.long 0x0ef59f78,0x0d3bba89,0xc9e52b9a,0x0dfc6443,0x72631447,0x1dc79699,0xb3be20b1,0xef033917,0xb1383948,0x0c92735d,0xc0dd7d7d,0xc1fc29a2,0x403ed068,0x6485b697,0xaac93bdc,0x13bfaab3 +.long 0x0deeaf52,0x410dc6a9,0x4c641c15,0xb003fb02,0x5bc504c4,0x1384978c,0x864a6a77,0x37640487,0x222a77da,0x05991bc6,0x5e47eb11,0x62260a57,0xf21b432c,0xc7af6613,0xab4953e9,0x22f3acc9 +.long 0x8e41d155,0x52934922,0x3ac059ef,0x4d024568,0x4d884411,0xb0201755,0xa59a178f,0xce8055cf,0xf6204549,0xcd77d1af,0xc7066759,0xa0a00a3e,0x0272c229,0x471071ef,0xd3c4b6b0,0x009bcf6b +.long 0x22305177,0x2a2638a8,0x41645bbf,0xd51d59df,0xc0a7a3c0,0xa81142fd,0x4c7063ee,0xa17eca6d,0x60d9dcec,0x0bb887ed,0x20ad2455,0xd6d28e51,0xa67102ba,0xebed6308,0x8bffa408,0x042c3114 +.long 0x8aa68e30,0xfd099ac5,0x1483513e,0x7a6a3d7c,0xba2d8f0c,0xffcc6b75,0x1e78b954,0x54dacf96,0xa4a9af89,0xf645696f,0x06ac98ec,0x3a411940,0x22a67a20,0x41b8b3f6,0x99dec626,0x2d0b1e0f +.long 0x40be34e8,0x27c89192,0x91907f35,0xc7162b37,0xa956702b,0x90188ec1,0xdf93769c,0xca132f7d,0x0e2025b4,0x3ece44f9,0x0c62f14c,0x67aaec69,0x22e3cc11,0xad741418,0x7ff9a50e,0xcf9b75c3 +.long 0x4d348272,0x02fa2b16,0x9959d56d,0xbd99d61a,0x18762916,0xbc4f19db,0x49c1ac80,0xcc7cce50,0xd846bd83,0x4d59ebaa,0xa9202849,0x8775a9dc,0x6e1f4ca9,0x07ec4ae1,0xba893f11,0x27eb5875 +.long 0x662cc565,0x00284d51,0x0db4138d,0x82353a6b,0xaa32a594,0xd9c7aaaa,0xa5669c47,0xf5528b5e,0x2f23c5ff,0xf3220231,0x6affa3a1,0xe3e8147a,0x202ddda0,0xfb423d5c,0x6b871bd4,0x3d6414ac +.long 0xa51a168a,0x586f82e1,0x48ae5448,0xb712c671,0x76233eb8,0x9a2e4bd1,0x78811ca9,0x0188223a,0xf7c18de1,0x553c5e21,0xb27bb286,0x7682e451,0x0e51e929,0x3ed036b3,0xec9cb34f,0xf487211b +.long 0x0c24efc8,0x0d094277,0xbef737a4,0x0349fd04,0x514cdd28,0x6d1c9dd2,0x30da9521,0x29c135ff,0xf78b0b6f,0xea6e4508,0x678c143c,0x176f5dd2,0x4be21e65,0x08148418,0xe7df38c4,0x27f7525c +.long 0x748ab1a4,0x1fb70e09,0x5efe4433,0x9cba50a0,0x15f75af2,0x7846c7a6,0x5ee73ea8,0x2a7c2c57,0x3f0a449a,0x42e566a4,0xad90fc3d,0x45474c3b,0x8b61d057,0x7447be3d,0x3a4ec092,0x3e9d1cf1 +.long 0xf380a6e6,0x1603e453,0x9b1437c2,0x0b86e431,0xef29610a,0x7a4173f2,0xf03d57f7,0x8fa729a7,0x6c9c217e,0x3e186f6e,0x91919524,0xbe1d3079,0x153d4fb1,0x92a62a70,0xd68c2f71,0x32ed3e34 +.long 0x9eb1a8b7,0xd785027f,0xc5b22fe8,0xbc37eb77,0xb9d6a191,0x466b34f0,0x9a05f816,0x008a89af,0x7d42c10a,0x19b028fb,0x49b3f6b8,0x7fe8c92f,0xa5a0ade3,0x58907cc0,0x559d1a7c,0xb3154f51 +.long 0xd9790ed6,0x5066efb6,0xa6aa793b,0xa77a0cbc,0x223e042e,0x1a915f3c,0x69c5874b,0x1c5def04,0x73b6c1da,0x0e830078,0xfcd8557a,0x55cf85d2,0x0460f3b1,0x0f7c7c76,0x46e58063,0x87052acb +.long 0x907eae66,0x09212b80,0x4d721c89,0x3cb068e0,0xdd45ac1c,0xa87941ae,0x0daa0dbb,0xde8d5c0d,0xe3502e6e,0xda421fdc,0x4d89a084,0xc8944201,0xf0c24bfb,0x7307ba5e,0x20bde0ef,0xda212beb +.long 0xf82ce682,0xea2da24b,0x07f71fe4,0x058d3816,0x5ffad8de,0x35a02462,0xaadcefab,0xcd7b05dc,0x1d9f54ec,0xd442f8ed,0xb2d3b5ca,0x8be3d618,0xe06b2ce2,0xe2220ed0,0x1b0da4c0,0x82699a5f +.long 0x71c0c3a7,0x3ff106f5,0x0d34180c,0x8f580f5a,0x22d7d375,0x4ebb120e,0xe9513675,0x5e5782cc,0x99c82a70,0x2275580c,0x15ea8c4c,0xe8359fbf,0x7b415e70,0x53b48db8,0x100c6014,0xaacf2240 +.long 0xe4652f1d,0x9faaccf5,0xd56157b2,0xbd6fdd2a,0x6261ec50,0xa4f4fb1f,0x476bcd52,0x244e55ad,0x047d320b,0x881c9305,0x6181263f,0x1ca983d5,0x278fb8ee,0x354e9a44,0x396e4964,0xad2dbc0f +.long 0x9268b3de,0x723f3aa2,0xe6e0609a,0x0d1ca29a,0x6cf44252,0x794866aa,0x01af87ed,0x0b59f3e3,0x7f4a6c51,0xe234e5ff,0x61dc2f7e,0xa8768fd2,0x0a94d81f,0xdafc7332,0x06938ce1,0xd7f84282 +.long 0x0546063e,0xae0b3c0e,0x5d61abc6,0x7fbadcb2,0x369ac400,0xd5d7a2c9,0xae67d10c,0xa5978d09,0x4f85eaac,0x290f211e,0xfacac681,0xe61e2ad1,0x388384cd,0xae125225,0xccfde30f,0xa7fb68e9 +.long 0x3daed4c2,0x7a59b936,0x2606f789,0x80a9aa40,0xf6a6d90a,0xb40c1ea5,0x514d5885,0x948364d3,0x70985182,0x062ebc60,0x33310895,0xa6db5b0e,0xe329c2f5,0x64a12175,0x90ea237e,0xc5f25bd2 +.long 0x2d0a4c23,0x7915c524,0x6bb3cc52,0xeb5d26e4,0xc09e2c92,0x369a9116,0xcf182cf8,0x0c527f92,0x2aede0ac,0x9e591938,0x6cc34939,0xb2922208,0x99a34361,0x3c9d8962,0xc1905fe6,0x3c81836d +.long 0xa001ec5a,0x4bfeb57f,0xa0dc5dba,0xe993f5bb,0x724a1380,0x47884109,0x32fe9a04,0x8a0369ab,0x8c927db8,0xea068d60,0x94655741,0xbf5f37cf,0x04b6c7ea,0x47d402a2,0x6af259cb,0x4551c295 +.long 0xed77ee8b,0x698b71e7,0xf309d5c7,0xbddf7bd0,0x34e780ca,0x6201c22c,0x4c295ef4,0xab04f7d8,0x4313a8ce,0x1c947294,0x92ca4cfe,0xe532e4ac,0xd0a7a97a,0x89738f80,0xa580fd5b,0xec088c88 +.long 0x42ce9e51,0x612b1ecc,0xb25fdd2a,0x8f9840fd,0x01e7f839,0x3cda78c0,0xece05480,0x546b3d3a,0x80d30916,0x271719a9,0x584c20c4,0x45497107,0x5bc78608,0xaf8f9478,0x277e2a4c,0x28c7d484 +.long 0x88a2ffe4,0xfce01767,0x28e169a5,0xdc506a35,0x7af9c93a,0x0ea10861,0x03fa0e08,0x1ed24361,0xa3d694e7,0x96eaaa92,0xef50bc74,0xc0f43b4d,0x64114db4,0xce6aa58c,0x7c000fd4,0x8218e8ea +.long 0x185f8844,0xac815dfb,0x1557abfb,0xcd7e90cb,0xafbfecdf,0x23d16655,0x085cac4a,0x80f3271f,0xd0e62f47,0x7fc39aa7,0x460a48e5,0x88d519d1,0xd28f101e,0x59559ac4,0xca9ae816,0x7981d9e9 +.long 0x9ac38203,0x5c38652c,0x57657fe5,0x86eaf87f,0xe21f5416,0x568fc472,0xe7e597b5,0x2afff39c,0x256d4eab,0x3adbbb07,0x8285ab89,0x22598692,0x041caefe,0x35f8112a,0xa5064c8b,0x95df02e3 +.long 0xc7004bf3,0x4d63356e,0xdb83c7de,0x230a08f4,0x8709a7b7,0xca27b270,0xcb9abd2d,0x0d1c4cc4,0x7550fee8,0x8a0bc66e,0x9cf7247e,0x369cd4c7,0x92b5b7e7,0x75562e84,0x5802af7b,0x8fed0da0 +.long 0xe48fb889,0x6a7091c2,0x7b8a9d06,0x26882c13,0x1b82a0e2,0xa2498663,0x3518152d,0x844ed736,0xd86e27c7,0x282f476f,0x04afefdc,0xa04edaca,0x6119e34d,0x8b256ebc,0x0787d78b,0x56a413e9 +.long 0x5a74be50,0x82ee061d,0xdea16ff5,0xe41781c4,0x99bfc8a2,0xe0b0c81e,0x0b547e2d,0x624f4d69,0xbdcc9ae4,0x3a83545d,0x409b1e8e,0x2573dbb6,0xa6c93539,0x482960c4,0x5ae18798,0xf01059ad +.long 0x3112795f,0x715c9f97,0x984e6ee1,0xe8244437,0xecb66bcd,0x55cb4858,0xabaffbee,0x7c136735,0x5dbec38e,0x54661595,0x388ad153,0x51c0782c,0xc6e0952f,0x9ba4c53a,0x1b21dfa8,0x27e6782a +.long 0x4ed2dbc2,0x682f903d,0x7c3b2d83,0x0eba59c8,0x9c7e9335,0x8e9dc84d,0x0eb226d7,0x5f9b21b0,0xaf267bae,0xe33bd394,0xbe2e15ae,0xaa86cc25,0x6a8ec500,0x4f0bf67d,0xf9630658,0x5846aa44 +.long 0xe2c2bf15,0xfeb09740,0xa9e99704,0x627a2205,0xc2fbc565,0xec8d73d0,0xc20c8de8,0x223eed8f,0xa8363b49,0x1ee32583,0xc9c2b0a6,0x1a0b6cb9,0x90dbc85c,0x49f7c3d2,0x1ef4c1ac,0xa8dfbb97 +.long 0x65c7c2ab,0xafb34d4c,0xe2c5ea84,0x1d4610e7,0x973c4ab5,0x893f6d1b,0x945ba5c4,0xa3cdd7e9,0x064417ee,0x60514983,0xad6bdf2b,0x1459b23c,0x5cf726c3,0x23b2c341,0x32d6354a,0x3a829635 +.long 0xab192c18,0x294f901f,0x7030164f,0xec5fcbfe,0xe2246ba6,0xe2e2fcb7,0x221a1a0c,0x1e7c88b3,0xc92d88c5,0x72c7dd93,0x1106fb59,0x41c2148e,0xa0f60f14,0x547dd4f5,0x63960f31,0xed9b52b2 +.long 0xb0a5b358,0x6c8349eb,0x9e7e2ed6,0xb154c5c2,0xeda462db,0xcad5eccf,0x2de66b69,0xf2d6dbe4,0x8665e5b2,0x426aedf3,0x7b7f5723,0x488a8513,0x8bcbb386,0x15cc43b3,0xd791d879,0x27ad0af3 +.long 0x846e364f,0xc16c236e,0xdea50ca0,0x7f33527c,0x0926b86d,0xc4810775,0x0598e70c,0x6c2a3609,0xf024e924,0xa6755e52,0x9db4afca,0xe0fa07a4,0x66831790,0x15c3ce7d,0xa6cbb0d6,0x5b4ef350 +.long 0xb6205969,0x2c4aafc4,0xf6c7854f,0x42563f02,0x1d983b48,0x016aced5,0x99949755,0xfeb356d8,0xd1a39bd7,0x8c2a2c81,0xe6934ae9,0x8f44340f,0x447904da,0x148cf91c,0x0f51a926,0x7340185f +.long 0x7409ab46,0x2f8f00fb,0x80e289b2,0x057e78e6,0xa888e5d1,0x03e5022c,0x9dede4e2,0x3c87111a,0x7809460b,0x5b9b0e1c,0x71c9abc7,0xe751c852,0xc7cc1dc9,0x8b944e28,0x1d3cfa08,0x4f201ffa +.long 0x3e6721ce,0x02fc905c,0xd0b3674c,0xd52d70da,0x18810da4,0x5dc2e5ca,0x5c69dd99,0xa984b273,0x84de5ca4,0x63b92527,0xc852dec4,0x2f1c9872,0xc2e3de09,0x18b03593,0x9813dc2f,0x19d70b01 +.long 0xa6dc1d29,0x42806b2d,0xf871e144,0xd3030009,0xaaf49276,0xa1feb333,0xc70bc04b,0xb5583b9e,0x95695f20,0x1db0be78,0x89d012b5,0xfc841811,0x05f61643,0x6409f272,0xd5883128,0x40d34174 +.long 0x67419833,0xd79196f5,0x863b7b08,0x6059e252,0x1c56700c,0x84da1817,0xb28d3ec4,0x5758ee56,0x013b0ea6,0x7da2771d,0x54c5e9b9,0xfddf524b,0x24305d80,0x7df4faf8,0x3a97763f,0x58f5c1bf +.long 0x7c696042,0xa5af37f1,0x4a2538de,0xd4cba22c,0x9ea42600,0x211cb995,0x7b069889,0xcd105f41,0xddb81e74,0xb1e1cf19,0x5157b8ca,0x472f2d89,0xee9db885,0x086fb008,0x0f26d131,0x365cd570 +.long 0xa2be7053,0x284b02bb,0x7ab9a6d6,0xdcbbf7c6,0x20f7a530,0x4425559c,0x188767c8,0x961f2dfa,0x70dc80c4,0xe2fd9435,0xf0784120,0x104d6b63,0x53567122,0x7f592bc1,0xf688ad77,0xf6bc1246 +.long 0x0f15dde9,0x05214c05,0x0d5f2b82,0xa47a76a8,0x62e82b62,0xbb254d30,0x3ec955ee,0x11a05fe0,0x9d529b36,0x7eaff46e,0x8f9e3df6,0x55ab1301,0x99317698,0xc463e371,0xccda47ad,0xfd251438 +.long 0x23d695ea,0xca9c3547,0x16e589b5,0x48ce626e,0xb187d086,0x6b5b64c7,0xb2207948,0xd02e1794,0x7198111d,0x8b58e98f,0xdcf9c3cc,0x90ca6305,0xf34089b0,0x5691fe72,0xfc7c80ff,0x60941af1 +.long 0x22eb51e5,0xa09bc0a2,0xaa9cf09a,0xc0bb7244,0x80159f06,0x36a8077f,0xdddc560e,0x8b5c989e,0x512e1f43,0x19d2f316,0xad08ff62,0x02eac554,0x07d20b4e,0x012ab84c,0xd6d4e4e1,0x37d1e115 +.long 0xab7b19a8,0xb6443e1a,0xdef8cd45,0xf08d067e,0x685e03da,0x63adf3e9,0x4792b916,0xcf15a10e,0xb738a425,0xf44bcce5,0x9636b2fd,0xebe131d5,0x7850d605,0x94068841,0xb40d749d,0x09684eaa +.long 0x72ba075b,0x8c3c669c,0xba469015,0x89f78b55,0x3e9f8ba8,0x5706aade,0xb32d7ed7,0x6d8bd565,0x805f08d6,0x25f4e63b,0xc3bcc1b5,0x7f48200d,0xb025d847,0x4e801968,0x87cbe0a8,0x74afac04 +.long 0x7e63d690,0x43ed2c2b,0x0223cdb8,0xefb6bbf0,0x2884d3fe,0x4fec3cae,0xd75e25a4,0x065ecce6,0x69f79071,0x6c2294ce,0x044b8666,0x0d9a8e5f,0x17b69d8f,0x5009f238,0xc5dfdaf7,0x3c29f8fe +.long 0xebae68c4,0x9067528f,0x30c5ba21,0x5b385632,0x1fdd1aec,0x540df119,0xcfba4c78,0xcf37825b,0xbeb11454,0x77eff980,0x60c1b066,0x40a1a991,0xf889a1c7,0xe8018980,0x76c24be0,0xb9c52ae9 +.long 0x45650ef4,0x05fbbcce,0x8aa29ac7,0xae000f10,0x4f04c470,0x884b7172,0x19bb5c25,0x7cd4fde2,0xe8840869,0x6477b22a,0x5fbd0686,0xa8868859,0x1116dfba,0xf23cc02e,0xd87d7776,0x76cd563f +.long 0xa9d82abf,0xe2a37598,0xe6c170f5,0x5f188ccb,0x5066b087,0x81682200,0xc7155ada,0xda22c212,0xfbddb479,0x151e5d3a,0x6d715b99,0x4b606b84,0xf997cb2e,0x4a73b54b,0x3ecd8b66,0x9a1bfe43 +.long 0x2a67d48a,0x1c312809,0x031fa9e2,0xcd6a671e,0x0e43a34a,0xbec3312a,0x55ef47d3,0x1d935639,0x8fea73ea,0x5ea02489,0xa035afb2,0x8247b364,0x5265b54c,0xb58300a6,0x722c7148,0x3286662f +.long 0xb4ec4c20,0xb77fd76b,0x0f3fe3fd,0xf0a12fa7,0x41d8c7e8,0xf845bbf5,0x5ec10aa8,0xe4d969ca,0x43e232a3,0x4c0053b7,0x37f8a45a,0xdc7a3fac,0x20d81c8f,0x3c4261c5,0xb00eab00,0xfd4b3453 +.long 0xd36e3062,0x76d48f86,0xa143ff02,0x626c5277,0xaf76f42e,0x538174de,0x6407ceac,0x2267aa86,0x72e572d5,0xfad76351,0xba7330eb,0xab861af7,0x418d8657,0xa0a1c8c7,0x20289a52,0x988821cb +.long 0xcccc18ad,0x79732522,0xf1a6e027,0xaadf3f8d,0x17c2354d,0xf7382c93,0xd818b689,0x5ce1680c,0xd9ecbee9,0x359ebbfc,0x1cae62ac,0x4330689c,0xc51ac38a,0xb55ce5b4,0xfe238ee8,0x7921dfea +.long 0x271d1ca5,0x3972bef8,0xe8aabd18,0x3e423bc7,0x44a3e5e3,0x57b09f3f,0x7b444d66,0x5da886ae,0xa9964375,0x68206634,0x699cd0ff,0x356a2fa3,0xdba515e9,0xaf0faa24,0xb321d79a,0x536e1f5c +.long 0x5c04e4ea,0xd3b9913a,0xd6f11513,0xd549dcfe,0x79fd1d94,0xee227bf5,0xb43f2c67,0x9f35afee,0xf1314f53,0xd2638d24,0xcabcd822,0x62baf948,0x4ef48db0,0x5542de29,0xfc5f6bb2,0xb3eb6a04 +.long 0x1208e16a,0x23c110ae,0xf8363e24,0x1a4d15b5,0x164be00b,0x30716844,0xf6f4690d,0xa8e24824,0x90b170cf,0x548773a2,0x42f191f4,0xa1bef331,0x9247aa97,0x70f418d0,0x48be9147,0xea06028e +.long 0xdbfb894e,0xe13122f3,0xce274b18,0xbe9b79f6,0xca58aadf,0x85a49de5,0x11487351,0x24957758,0xbb939099,0x111def61,0x26d13694,0x1d6a974a,0xd3fc253b,0x4474b4ce,0x4c5db15e,0x3a1485e6 +.long 0x147c15b4,0xe79667b4,0x7bc61301,0xe34f553b,0x17094381,0x032b80f8,0x723eaa21,0x55d8bafd,0xf1c0e74e,0x5a987995,0xebba289c,0x5a9b292e,0xeb4c8251,0x413cd4b2,0xd162db0a,0x98b5d243 +.long 0x68342520,0xbb47bf66,0xbaa862d1,0x08d68949,0xe906abcd,0x11f349c7,0xed7bf00e,0x454ce985,0xb55b803b,0xacab5c9e,0x31e3c16d,0xb03468ea,0xd273bf12,0x5c24213d,0x71587887,0x211538eb +.long 0x731dea2d,0x198e4a2f,0x74ed7b2a,0xd5856cf2,0x13a664fe,0x86a632eb,0xbda41291,0x932cd909,0xc0c4ddc0,0x850e95d4,0x347fc2c9,0xc0f422f8,0x86076bcb,0xe68cbec4,0xcd6cd286,0xf9e7c0c0 +.long 0x0f5f27ca,0x65994ddb,0xa80d59ff,0xe85461fb,0x66601023,0xff05481a,0xfc9ebbfb,0xc665427a,0x7587fd52,0xb0571a69,0x8d49efce,0x935289f8,0xea420688,0x61becc60,0x13a786af,0xb22639d9 +.long 0x361ecf90,0x1a8e6220,0x25506463,0x001f23e0,0x0a5c2b79,0xe4ae9b5d,0xd8149db5,0xebc9cdad,0x934aa728,0xb33164a1,0xae9b60f3,0x750eb00e,0x9b9cfbfd,0x5a91615b,0xef45f7f6,0x97015cbf +.long 0xbf5151df,0xb462c4a5,0xb07118f2,0x21adcc41,0x043fa42c,0xd60c545b,0xe96be1ab,0xfc21aa54,0x4e51ea80,0xe84bc32f,0x259b5d8d,0x3dae45f0,0xc38f1b5e,0xbb73c7eb,0xe8ae617d,0xe405a74a +.long 0x9f1c56bd,0xbb1ae9c6,0x49f196a4,0x8c176b98,0x6875092b,0xc448f311,0x9f976033,0xb5afe3de,0x145813e5,0xa8dafd49,0xe2b34226,0x687fc4d9,0x4c7ff57f,0xf2dfc92d,0x401f1b46,0x004e3fc1 +.long 0x1430c9ab,0x5afddab6,0x2238e997,0x0bdd41d3,0x418042ae,0xf0947430,0xcdddc4cb,0x71f9adda,0xc52dd907,0x7090c016,0x29e2047f,0xd9bdf44d,0x1b1011a6,0xe6f1fe80,0xd9acdc78,0xb63accbc +.long 0x1272a95b,0xcfc7e235,0xa6276ac8,0x0c667717,0xe2d7eef7,0x3c0d3709,0x9a685b3e,0x5add2b06,0x14ea5d65,0x363ad32d,0x8d7dd506,0xf8e01f06,0x75b4aac6,0xc9ea2213,0x0d353466,0xed2a2bf9 +.long 0xe9d3a7c3,0x439d79b5,0x81b7f34b,0x8e0ee5a6,0x1dc4ba75,0xcf3dacf5,0xeb3310c7,0x1d3d1773,0x7747ae83,0xa8e67112,0x197d6b40,0x31f43160,0xcd961400,0x0521ccee,0xf6535768,0x67246f11 +.long 0xef0c3133,0x702fcc5a,0x7e16693b,0x247cc45d,0xc729b749,0xfd484e49,0xb218320f,0x522cef7d,0x59ab93b3,0xe56ef405,0x9f181071,0x225fba11,0x15330ed0,0x33bd6595,0x1ddb32f7,0xc4be69d5 +.long 0x0448087c,0x264c7668,0x71432dae,0xac30903f,0x00f9bf47,0x3851b266,0x6cdd6d03,0x400ed311,0xf8fd2424,0x045e79fe,0xfa6da98b,0xfdfd974a,0x0c1e673a,0x45c9f641,0x5b2c5168,0x76f2e733 +.long 0x2a601753,0x1adaebb5,0xc57c2d49,0xb286514c,0x1e0bfd24,0xd8769670,0x04478922,0x950c547e,0xe5d32bfe,0xd1d41969,0x750d6c3e,0x30bc1472,0xe0e27f3a,0x8f3679fe,0xa4a6ee0c,0x8f64a7dc +.long 0x633dfb1f,0x2fe59937,0x977f2547,0xea82c395,0x661ea646,0xcbdfdf1a,0xb9085451,0xc7ccc591,0x81761e13,0x82177962,0x9196885c,0xda57596f,0x28ffbd70,0xbc17e849,0x2671d36f,0x1e6e0a41 +.long 0x4152fcf5,0x61ae872c,0x9e77e754,0x441c87b0,0xa34dff09,0xd0799dd5,0x88a6b171,0x766b4e44,0x11f1c792,0xdc06a512,0x4be35c3e,0xea02ae93,0xe90c469e,0xe5ca4d6d,0x56e4ff5c,0x4df4368e +.long 0x4baef62e,0x7817acab,0xa85b91e8,0x9f5a2202,0x6ce57610,0x9666ebe6,0xf73bfe03,0x32ad31f3,0x25bcf4d6,0x628330a4,0x515056e6,0xea950593,0xe1332156,0x59811c89,0x8c11b2d7,0xc89cf1fe +.long 0x04e60cc0,0x75b63913,0x4625d375,0xce811e8d,0x2d26e562,0x030e43fc,0x608d36a0,0xfbb30b4b,0x48528118,0x634ff82c,0xcd285911,0x7c6fe085,0x99358f28,0x7f2830c0,0x665e6c09,0x2e60a95e +.long 0x9b785dbf,0x08407d3d,0xa759bce7,0x530889ab,0x52f61239,0xf228e0e6,0x6879be3c,0x2b6d1461,0x51a7bbf7,0xe6902c04,0x76f24a64,0x30ad99f0,0x98bc6da0,0x66d9317a,0xcb596ac0,0xf4f877f3 +.long 0x4c44f119,0xb05ff62d,0xe9b77416,0x4555f536,0x8caed63b,0xc7c0d059,0xc358b2a9,0x0cd2b7ce,0x46945fa3,0x3f33287b,0xd67c8791,0xf8785b20,0x9637bd08,0xc54a7a61,0x18be79d7,0x54d4598c +.long 0xc46d7ce1,0x889e5acb,0x8b085877,0x9a515bb7,0x0b7a5050,0xfac1a03d,0xf2926035,0x7d3e738a,0x2a6cb0eb,0x861cc2ce,0x8f7adc79,0x6f2e2955,0x33016376,0x61c4d451,0x5ad59090,0xd9fd2c80 +.long 0xb2b836a1,0xe5a83738,0x7c0d6622,0x855b41a0,0x7cc19af1,0x186fe317,0xfdd99acb,0x6465c1ff,0x6974b99e,0x46e5c23f,0xa2717cbe,0x75a7cf8b,0x062be658,0x4d2ebc3f,0x5f209c98,0x094b4447 +.long 0xb940cb5a,0x4af285ed,0x7cc82f10,0x6706d792,0x030526fa,0xc8c8776c,0xa0da9140,0xfa8e6f76,0x591ee4f0,0x77ea9d34,0x40274166,0x5f46e337,0xea671457,0x1bdf98bb,0x862a1fe2,0xd7c08b46 +.long 0x1c08ad63,0x46cc303c,0x4c845e7b,0x99543440,0x48f36bf7,0x1b8fbdb5,0x8c8273a7,0x5b82c392,0x928435d5,0x08f712c4,0x79330380,0x071cf0f1,0xa8da054a,0xc74c2d24,0x43c46b5c,0xcb0e7201 +.long 0xc0b7eff3,0x0ad7337a,0xc5e48b3c,0x8552225e,0x73f13a5f,0xe6f78b0c,0x82349cbe,0x5e70062e,0xe7073969,0x6b8d5048,0xc33cb3d2,0x392d2a29,0x4ecaa20f,0xee4f727c,0x2ccde707,0xa068c99e +.long 0xb87a2913,0xfcd5651f,0x3cc252f0,0xea3e3c15,0x3b6cd3e4,0x777d92df,0xc5a732e7,0x7a414143,0xa71ff493,0xa895951a,0xbbd37cf6,0xfe980c92,0xdecfeeff,0x45bd5e64,0xa44c43e9,0x910dc2a9 +.long 0xcca9f54d,0xcb403f26,0x9303f6db,0x928bbdfb,0xa9eee67c,0x3c37951e,0xf79961c3,0x3bd61a52,0x395c9a79,0x09a238e6,0x61eb352d,0x6940ca2d,0xc1875631,0x7d1e5c5e,0x1e1b20d1,0x1e19742c +.long 0x23fc2e6e,0x4633d908,0x08959149,0xa76e29a9,0x84ed7da5,0x61069d9c,0x5dbcad51,0x0baa11cf,0x961849da,0xd01eec64,0xaf3d8c28,0x93b75f1f,0x1ca2ee44,0x57bc4f9f,0x00e00558,0x5a26322d +.long 0x61a023ef,0x1888d658,0xb9e5246e,0x1d72aab4,0xe5563ec0,0xa9a26348,0xc3439a43,0xa0971963,0xadb9b5b7,0x567dd54b,0xc45a524b,0x73fac1a1,0xfe38e608,0x8fe97ef7,0x3f384f48,0x608748d2 +.long 0xc486094f,0xb0571794,0x8bf3a8d6,0x869254a3,0x310b0e25,0x148a8dd1,0x9aa3f7d8,0x99ab9f3f,0x6706c02e,0x0927c68a,0x69790e6c,0x22b5e76c,0x6c71376c,0x6c325260,0x09ef6657,0x53a57690 +.long 0xedffcf3a,0x8d63f852,0x3c0a6f55,0xb4d2ed04,0x12519b9e,0xdb3aa8de,0x1e0a569a,0x5d38e9c4,0x303747e2,0x871528bf,0xf5b5c18d,0xa208e77c,0xca6bf923,0x9d129c88,0xbf02839f,0xbcbf197f +.long 0x27323194,0x9b9bf030,0x339ca59d,0x3b055a8b,0x0f669520,0xb46b2312,0x497e5f24,0x19789f1f,0xaaf01801,0x9c499468,0x8b69d59c,0x72ee1190,0xacf4c079,0x8bd39595,0x8e0cd048,0x3ee11ece +.long 0x1ed66f18,0xebde86ec,0xd61fce43,0x225d906b,0xe8bed74d,0x5cab07d6,0x27855ab7,0x16e4617f,0xb2fbc3dd,0x6568aadd,0x8aeddf5b,0xedb5484f,0x6dcf2fad,0x878f20e8,0x615f5699,0x3516497c +.long 0xfa181e69,0xef0a3fec,0x30d69a98,0x9ea02f81,0x66eab95d,0xb2e9cf8e,0x24720021,0x520f2beb,0x1df84361,0x621c540a,0x71fa6d5d,0x12037721,0x0ff5f6ff,0x6e3c7b51,0xabb2bef3,0x817a069b +.long 0xb294cda6,0x83572fb6,0xb9039f34,0x6ce9bf75,0x095cbb21,0x20e012f0,0xd063f0da,0xa0aecc1b,0xf02909e5,0x57c21c3a,0x48ce9cdc,0xc7d59ecf,0x8ae336f8,0x2732b844,0x3f4f85f4,0x056e3723 +.long 0x89e800ca,0x8a10b531,0x145208fd,0x50fe0c17,0xb714ba37,0x9e43c0d3,0x34189acc,0x427d200e,0xe616e2c0,0x05dee24f,0xee1854c1,0x9c25f4c8,0x8f342a73,0x4d3222a5,0xa027c952,0x0807804f +.long 0x4f0d56f3,0xc222653a,0xca28b805,0x961e4047,0x4a73434b,0x2c03f8b0,0xab712a19,0x4c966787,0x864fee42,0xcc196c42,0x5b0ece5c,0xc1be93da,0xc131c159,0xa87d9f22,0xdce45655,0x2bb6d593 +.long 0xb809b7ce,0x22c49ec9,0xe2c72c2c,0x8a41486b,0xfea0bf36,0x813b9420,0xa66dac69,0xb3d36ee9,0x328cc987,0x6fddc08a,0x3a326461,0x0a3bcd2c,0xd810dbba,0x7103c49d,0x4b78a4c4,0xf9d81a28 +.long 0xe4d55941,0x3de865ad,0x30384087,0xdedafa5e,0x4ef18b9b,0x6f414abb,0xfaee5268,0x9ee9ea42,0x37a55a4a,0x260faa16,0x015f93b9,0xeb19a514,0x9e9c3598,0x51d7ebd2,0x1932178e,0x523fc56d +.long 0xb98fe684,0x501d070c,0x124a1458,0xd60fbe9a,0x92bc6b3f,0xa45761c8,0xfe6f27cb,0xf5384858,0xb59e763b,0x4b0271f7,0x5b5a8e5e,0x3d4606a9,0x05a48292,0x1eda5d9b,0xe6fec446,0xda7731d0 +.long 0x90d45871,0xa3e33693,0x06166d8d,0xe9764040,0x89a90403,0xb5c33682,0x72f1d637,0x4bd17983,0xd5d2c53a,0xa616679e,0xfdcf3b87,0x5ec4bcd8,0xb66a694e,0xae6d7613,0xe3fc27e5,0x7460fc76 +.long 0x95caabee,0x70469b82,0x889501e3,0xde024ca5,0x076ed265,0x6bdadc06,0x5a0ef8b2,0x0cb1236b,0x0972ebf9,0x4065ddbf,0x22aca432,0xf1dd3875,0x744aff76,0xa88b97cf,0xfe8e3d24,0xd1359afd +.long 0x91502cf3,0x52a3ba2b,0x084db75d,0x2c3832a8,0xde30b1c9,0x04a12ddd,0xe31fd60c,0x7802eabc,0xa37fddab,0x33707327,0xfaafa973,0x65d6f2ab,0x11e6f91a,0x3525c5b8,0x5f46530b,0x76aeb0c9 +.long 0x2f93a675,0xe8815ff6,0x05f48679,0xa6ec9684,0x358ae884,0x6dcbb556,0xe19e3873,0x0af61472,0xa5f696be,0x72334372,0x6f22fb70,0xc65e57ea,0x946cea90,0x268da30c,0x65681b2a,0x136a8a87 +.long 0x0f9f44d4,0xad5e81dc,0x2c46585a,0xf09a6960,0xc447d1b1,0xd1649164,0x879dc8b1,0x3b4b36c8,0x3b6b234c,0x20d4177b,0x1730d9d0,0x096a2505,0xef80531d,0x0611b9b8,0x64bb495d,0xba904b3b +.long 0x93a3147a,0x1192d9d4,0x9a565545,0x9f30a5dc,0x6ef07212,0x90b1f9cb,0x0d87fc13,0x29958546,0xc17db9ba,0xd3323eff,0xcb1644a8,0xcb18548c,0x4f49ffbc,0x18a306d4,0x4c2e8684,0x28d658f1 +.long 0xa99f8c71,0x44ba60cd,0x4bf742ff,0x67b7abdb,0x914b3f99,0x66310f9c,0xf412c161,0xae430a32,0x88ace52f,0x1e6776d3,0x52d7067d,0x4bc0fa24,0x8f07cd1b,0x03c286aa,0xa985b2c1,0x4cb8f38c +.long 0x8c3bff36,0x83ccbe80,0x5263e575,0x005a0bd2,0x259bdcd1,0x460d7dda,0xfa5cab6b,0x4a1c5642,0x9fe4fc88,0x2b7bdbb9,0xcc97bbb5,0x09418e28,0xa12321ae,0xd8274fb4,0x5c87b64e,0xb137007d +.long 0xc63c4962,0x80531fe1,0x981fdb25,0x50541e89,0xfd4c2b6b,0xdc1291a1,0xa6df4fca,0xc0693a17,0x0117f203,0xb2c4604e,0x0a99b8d0,0x245f1963,0xc6212c44,0xaedc20aa,0x520f52a8,0xb1ed4e56 +.long 0xf8547be3,0xfe48f575,0xa9e45f98,0x0a7033cd,0x18c50100,0x4b45d3a9,0xa61d41da,0xb2a6cd6a,0x57933c6b,0x60bbb4f5,0x2b0d7ffc,0xa7538ebd,0x8cd626b6,0x9ea3ab8d,0x3601625a,0x8273a484 +.long 0x0168e508,0x88859845,0x99a94abd,0x8cbc9bb2,0xfab0a671,0x713ac792,0x6c9ebffc,0xa3995b19,0x1239e152,0xe711668e,0xbbb8dff4,0x56892558,0xdbf17963,0x8bfc7dab,0xb3de1253,0x5b59fe5a +.long 0x34a9f7ae,0x7e3320eb,0xd751efe4,0xe5e8cf72,0xd9be2f37,0x7ea003bc,0xb6c08ef7,0xc0f551a0,0x038f6725,0x56606268,0x6d92d3b6,0x1dd38e35,0xc3cbd686,0x07dfce7c,0x651c5da8,0x4e549e04 +.long 0x08b19340,0x4058f93b,0xcac6d89d,0xc2fae6f4,0x8f159cc7,0x4bad8a8c,0xcb0b601c,0x0ddba4b3,0x1dd95f8c,0xda4fc7b5,0xcea5c255,0x1d163cd7,0x274a8c4c,0x30707d06,0x2802e9ce,0x79d9e008 +.long 0xe6ddd505,0x02a29ebf,0xb50bed1a,0x37064e74,0xa7327d57,0x3f6bae65,0xf83920bc,0x3846f5f1,0x60df1b9b,0x87c37491,0x2d1da29f,0x4cfb2895,0x4ed1743c,0x10a478ca,0x3edd47c6,0x390c6030 +.long 0x8c0a78de,0x8f3e5312,0x1e85df70,0xccd02bda,0xa61b6582,0xd6c75c03,0xfc0eebd1,0x0762921c,0xd85010c0,0xd34d0823,0x0044cf1f,0xd73aaacb,0xa3b5e78a,0xfb4159bb,0xe5826f3f,0x2287c7f7 +.long 0x580b1a01,0x4aeaf742,0x60423b79,0xf080415d,0xa7dea144,0xe12622cd,0x59d62472,0x49ea4996,0x571f3913,0xb42991ef,0xf5b25a8a,0x0610f214,0x30b79e8f,0x47adc585,0x07a065a2,0xf90e3df6 +.long 0x43e2e034,0x5d0a5deb,0x444024aa,0x53fb5a34,0x6b0c9f7f,0xa8628c68,0xac563656,0x9c69c29c,0xbace47b6,0x5a231feb,0x9ea5a2ec,0xbdce0289,0x9463853e,0x05da1fac,0x509e78aa,0x96812c52 +.long 0x57151692,0xd3fb5771,0xd98e1c44,0xeb2721f8,0x32399be1,0xc0506087,0xd979d8b8,0xda5a5511,0xc6f56780,0x737ed55d,0x0dc7a7f4,0xe20d3004,0xf5941a03,0x02ce7301,0xed30f83a,0x91ef5215 +.long 0x4092d85f,0x28727fc1,0x5c49e41a,0x72d223c6,0xba6a4d81,0xa7cf30a2,0xb030d87d,0x7c086209,0xfc588b09,0x04844c7d,0x5874bbb0,0x728cd499,0xe84c0495,0xcc1281ee,0xec31958f,0x0769b5ba +.long 0xf99c2471,0x665c228b,0x191eb110,0xf2d8a11b,0xd36d7024,0x4594f494,0xcdcb25a1,0x482ded8b,0xdadd4885,0xc958a9d8,0xf1d2b547,0x7004477e,0x2a0af550,0x0a45f6ef,0x2f8d6351,0x4fc739d6 +.long 0x786f08a9,0x75cdaf27,0x42c2737f,0x8700bb26,0x1c4e2670,0x855a7141,0x15076fef,0x810188c1,0xabcd3297,0xc251d0c9,0xf48108eb,0xae4c8967,0x18ceed30,0xbd146de7,0xc986bced,0xf9d4f07a +.long 0x83fa1e08,0x5ad98ed5,0xbeabd1fb,0x7780d33e,0x903b1196,0xe330513c,0xa47bc8c4,0xba11de9e,0x02c2d064,0x684334da,0xa48de23b,0x7ecf360d,0x0a9089d8,0x57a1b474,0xff36734c,0xf28fa439 +.long 0xea4570b3,0xf2a482cb,0xa5ebcee9,0xee65d68b,0xb9694cd5,0x988d0036,0x37885d32,0x53edd0e9,0xbeb9bc6d,0xe37e3307,0x9f5c6768,0xe9abb907,0x51f2160f,0x4396ccd5,0x47336da6,0x2500888c +.long 0x926fce43,0x383f9ed9,0x04da2930,0x809dd1c7,0x8a4cb227,0x30f6f596,0x73a56b38,0x0d700c7f,0xab64a065,0x1825ea33,0x1338df80,0xaab9b735,0x9b63f57f,0x1516100d,0x27a6a634,0x2574395a +.long 0x700a1acd,0xb5560fb6,0xfd999681,0xe823fd73,0x6cb4e1ba,0xda915d1f,0x6ebe00a3,0x0d030118,0x89fca8cd,0x744fb0c9,0xf9da0e0b,0x970d01db,0x7931d76f,0x0ad8c564,0xf659b96a,0xb15737bf +.long 0xa8b484e7,0xdc9933e8,0x7a26dec7,0xb2fdbdf9,0x9f1f0136,0x2349e9a4,0x70fddddb,0x7860368e,0xf9ad3e18,0xd93d2c1c,0x689f4e79,0x6d6c5f17,0xb24ff1b6,0x7a544d91,0xfe16cd8c,0x3e12a5eb +.long 0xa56b872f,0x543574e9,0xfcf68ea2,0xa1ad550c,0x3f560ef7,0x689e37d2,0xc9d47a8b,0x8c54b9ca,0x088ac342,0x46d40a4a,0x1576c6d0,0xec450c7c,0x1f9689e9,0xb589e31c,0xb8781718,0xdacf2602 +.long 0xc8cb6b42,0xa89237c6,0xb96ef381,0x1326fc93,0xb5f07825,0x55d56c6d,0x7449e22d,0xacba2eea,0x633c3000,0x74e0887a,0xd7cbcf71,0xcb6cd172,0xc36cf1be,0x309e81de,0x60ae399b,0x07a18a6d +.long 0x9edce57e,0xb36c2679,0xdf001d41,0x52b892f4,0x16a1f2c6,0xd884ae5d,0xefcc370a,0x9b329424,0xbd2e21df,0x3120daf2,0x02470a99,0x55298d2d,0xa05db32e,0x0b78af6c,0x601f5636,0x5c76a331 +.long 0xf8a4f29c,0xaae861ff,0xd68f8d49,0x70dc9240,0x81b1321c,0x960e649f,0x8792e4ce,0x3d2c801b,0x42521876,0xf479f772,0x416c79b1,0x0bed93bc,0x263e5bc9,0xa67fbc05,0x521db049,0x01e8e630 +.long 0xc6f3431e,0x76f26738,0xe3267541,0xe609cb02,0x818c877c,0xb10cff2d,0x786a13cb,0x1f0e75ce,0x1158544d,0xf4fdca64,0x6cb71ed0,0x5d777e89,0xa9aa4755,0x3c233737,0xe527ab40,0x7b453192 +.long 0x39f05ffe,0xdb59f688,0x6d82574e,0x8f4f4be0,0xee292d1b,0xcce3450c,0x61ccd086,0xaa448a12,0xf7914967,0xabce91b3,0x1908a5ed,0x4537f09b,0xf51042e7,0xa812421e,0xec0b3a34,0xfaf5cebc +.long 0x4ca6b39a,0x730ffd87,0x02efd342,0x70fb72ed,0xd75c8edb,0xeb4735f9,0xc278aa51,0xc11f2157,0xbf3bfebf,0xc459f635,0x6bd9601f,0x3a1ff0b4,0xc420cb73,0xc9d12823,0x3c2915a3,0x3e9af3e2 +.long 0xb41c3440,0xe0c82c72,0xe3039a5f,0x175239e5,0x558795a3,0xe1084b8a,0xd01e5c60,0x328d0a1d,0xd3788a04,0x0a495f2e,0x66c11a9f,0x25d8ff16,0x9ed692d6,0xf5155f05,0x4f425fe4,0x954fa107 +.long 0xe98aaa99,0xd16aabf2,0x96b0f88a,0x90cd8ba0,0xc154026a,0x957f4782,0x52af56d2,0x54ee0734,0x45b4147a,0xbcf89e54,0x9a52816c,0x3d102f21,0x39b62e77,0x6808517e,0x69169ad8,0x92e25421 +.long 0xbb608558,0xd721d871,0xf6d4ff9b,0x60e4ebae,0x41f2763e,0x0ba10819,0x51ee3247,0xca2e45be,0x2bfd7a5f,0x66d172ec,0x74d0b12d,0x528a8f2f,0xdabe70dc,0xe17f1e38,0x9f93983c,0x1d5d7316 +.long 0xdf423e31,0x51b2184a,0xaedb1a10,0xcb417291,0x625bcab9,0x2054ca93,0xa98998f0,0x54396860,0xa54ae57e,0x4e53f6c4,0xee648e9d,0x0ffeb590,0x6afaf6bc,0xfbbdaadc,0xaa3bfb8a,0xf88ae796 +.long 0xd2359ed9,0x209f1d44,0xf3544ce2,0xac68dd03,0xfd51e569,0xf378da47,0x2cc80097,0xe1abd860,0x343b6e3a,0x23ca18d9,0xb40a1bae,0x480797e8,0x533f3e67,0xd1f0c717,0x06e6cdfc,0x44896970 +.long 0x52a82e8d,0x8ca21055,0x78460cdc,0xb2caf785,0xe9037178,0x4c1b7b62,0xdb514b58,0xefc09d2c,0x9113be5c,0x5f2df9ee,0xb3f9271c,0x2fbda78f,0x8f83fc54,0xe09a81af,0x8afb5141,0x06b13866 +.long 0x43e3865d,0x38f6480f,0x1ddf47d9,0x72dd77a8,0x4c205ff7,0xf2a8e971,0x9d088ad8,0x46d449d8,0x185d706f,0x926619ea,0xc7dd7f62,0xe47e02eb,0x8cbc2031,0xe7f120a7,0x998d4ac9,0xc18bef00 +.long 0x6bdf22da,0x18f37a9c,0x90dc82df,0xefbc432f,0x5d703651,0xc52cef8e,0xd99881a5,0x82887ba0,0xb920ec1d,0x7cec9dda,0xec3e8d3b,0xd0d7e8c3,0x4ca88747,0x445bc395,0x9fd53535,0xedeaa2e0 +.long 0x6cc87475,0x461b1d93,0x6d2383bd,0xd92a52e2,0xd7903546,0xfabccb59,0x3d14b112,0x6111a761,0xb3d5f612,0x0ae584fe,0x60e828ec,0x5ea69b8d,0x54087030,0x6c078985,0xac4821fe,0x649cab04 +.long 0x8bdce214,0x25ecedcf,0x86af7361,0xb5622f72,0x7038b9e2,0x0e1227aa,0xac20fa77,0xd0efb273,0x79df975b,0x817ff88b,0x1999503e,0x856bf286,0x5038ec46,0xb4d5351f,0xfc42af6e,0x740a52c5 +.long 0x2cbb1a3f,0x2e38bb15,0x17a83429,0xc3eb99fe,0xdd66bb74,0xca4fcbf1,0xcde5e8fc,0x880784d6,0xb4e7a0be,0xddc84c1c,0xbd15a72f,0x8780510d,0x81ec30e1,0x44bcf1af,0x0a61073e,0x141e50a8 +.long 0x47be87ae,0x0d955718,0xf76a4372,0x68a61417,0xc607c3d3,0xf57e7e87,0x5252f332,0x043afaf8,0x1552a4d2,0xcc14e121,0xbb4d4ab4,0xb6dee692,0xa03816a4,0xb6ab74c8,0x6f394a29,0x84001ae4 +.long 0xd795fb45,0x5bed8344,0xb79f55a5,0x57326e7d,0x4accdffc,0xc9533ce0,0x3993fa04,0x53473caf,0xa13df4c8,0x7906eb93,0x97cbe46f,0xa73e51f6,0x0ae4ccf8,0xd1ab3ae1,0x8a5b3dbc,0x25614508 +.long 0x11a71b27,0x61eff962,0x6bb7fa39,0xdf71412b,0x2bd7f3ef,0xb31ba6b8,0x69180d29,0xb0b9c415,0x014cdde5,0xeec14552,0x227b4bbb,0x702c624b,0xd3e988f3,0x2b15e8c2,0xa4f7fd04,0xee3bcc6d +.long 0x42ac6c85,0x9d00822a,0x1df9f2b7,0x2db0cea6,0x42de1e58,0xd7cad2ab,0x2d6fbb61,0x346ed526,0x1a2faf09,0xb3962995,0x7c25612e,0x2fa8a580,0x7cf56490,0x30ae04da,0x0eea3961,0x75662908 +.long 0x3d080847,0x3609f5c5,0x5241d4f6,0xcb081d39,0x77961a63,0xb4fb3810,0x2abb66fc,0xc20c5984,0xf902f245,0x3d40aa7c,0x4e536b1e,0x9cb12736,0x99b3134f,0x5eda24da,0x5cd011af,0xafbd9c69 +.long 0xc7088c7d,0x9a16e30a,0x3207389f,0x5ab65710,0xe7407a53,0x1b09547f,0x4fdc6eab,0x2322f9d7,0x7430de4d,0xc0f2f22d,0xe68ca9a9,0x19382696,0x918e5868,0x17f1eff1,0x586f4204,0xe3b5b635 +.long 0x3fbc4341,0x146ef980,0x5b5eed4e,0x359f2c80,0x7482e41d,0x9f35744e,0xf3b224c2,0x9a9ac3ec,0x91fc50ae,0x9161a6fe,0xc613fa7c,0x89ccc66b,0xc732f15a,0x89268b14,0xb467ed03,0x7cd6f4e2 +.long 0xce56b40e,0xfbf79869,0xc02dde98,0xf93e094c,0xedee2cd7,0xefe0c3a8,0xb268fd42,0x90f3ffc0,0x08241aed,0x81a7fd56,0x00b1afe8,0x95ab7ad8,0x3e310d52,0x40127056,0x09d9fc43,0xd3ffdeb1 +.long 0xd11a8594,0xc8f85c91,0x31cf6db8,0x2e74d258,0x02b5dfd0,0x829c7ca3,0x69143c86,0xe389cfbe,0x941768d8,0xd01b6405,0x03bf825d,0x45103995,0x56cd17e2,0xcc4ee166,0xba037e79,0xbea3c283 +.long 0xd9a47520,0x4e1ac06e,0xaf852404,0xfbfe18aa,0x8087648a,0x5615f8e2,0xb9d150d9,0x7301e47e,0xb299b977,0x79f9f9dd,0xa5b78314,0x76697a7b,0x7d7c90e7,0x10d67468,0x937210b5,0x7afffe03 +.long 0x28c22cee,0x5aef3e4b,0x09fd55ae,0xefb0ecd8,0x0d2a5d6a,0x4cea7132,0x01db6357,0x9cfb5fa1,0xf36e1ac5,0x395e0b57,0x36cafb7d,0x008fa9ad,0x5308c4db,0x8f6cdf70,0x95ed2477,0x51527a37 +.long 0x5bd21311,0xba0dee30,0x909c90d7,0x6ed41b22,0x7c8696d3,0xc5f6b758,0x3ce83a80,0x0db8eaa8,0xb24b4b6f,0xd297fe37,0x522d1f0d,0xfe58afe8,0x8c98dbd9,0x97358736,0x9454a527,0x6bc226ca +.long 0xce53c2d0,0xa12b384e,0x5e4606da,0x779d897d,0x73ec12b0,0xa53e47b0,0x5756f1ad,0x462dbbba,0xcafe37b6,0x69fe09f2,0xecce2e17,0x273d1ebf,0x3cf607fd,0x8ac1d538,0x12e10c25,0x8035f7ff +.long 0x7e6c5520,0x854d34c7,0xdcb9ea58,0xc27df9ef,0xd686666d,0x405f2369,0x0417aa85,0x29d1febf,0x93470afe,0x9846819e,0xe2a27f9e,0x3e6a9669,0xe31e6504,0x24d008a2,0x9cb7680a,0xdba7cecf +.long 0x338d6e43,0xecaff541,0x4541d5cc,0x56f7dd73,0x96bc88ca,0xb5d426de,0x9ed3a2c3,0x48d94f6b,0x2ef8279c,0x6354a3bb,0x0b1867f2,0xd575465b,0x95225151,0xef99b0ff,0xf94500d8,0xf3e19d88 +.long 0xe32dd620,0x92a83268,0x627849a2,0x913ec99f,0x2c378882,0xedd8fdfa,0xee6f8cfe,0xaf96f33e,0xdc3fa8a5,0xc06737e5,0xb0b03a1d,0x236bb531,0x89f037b0,0x33e59f29,0xd9a12a53,0x13f9b5a7 +.long 0x51efb310,0x0d0df6ce,0x958df5be,0xcb5b2eb4,0x36158e59,0xd6459e29,0x1466e336,0x82aae2b9,0x411aa636,0xfb658a39,0xd4c0a933,0x7152ecc5,0x49f026b7,0xf10c758a,0xcb09311f,0xf4837f97 +.long 0xc753c45f,0xddfb02c4,0xf9c840fe,0x18ca81b6,0xb0f8a3e6,0x846fd09a,0xe7733dbc,0xb1162add,0x236e3ab6,0x7070ad20,0xb2a56326,0xf88cdaf5,0x997cbc7a,0x05fc8719,0x4b665272,0x442cd452 +.long 0xb71698f5,0x7807f364,0x9f7b605e,0x6ba418d2,0xa03b2cbb,0xfd20b00f,0xda54386f,0x883eca37,0xf3437f24,0xff0be43f,0xa48bb33c,0xe910b432,0x329df765,0x4963a128,0xbe2fe6f7,0xac1dd556 +.long 0x24a0a3fc,0x557610f9,0xe881c3f9,0x38e17bf4,0xed0dac99,0x6ba84faf,0x59eeb918,0xd4a222c3,0x13f542b6,0xc79c1dbe,0xe425d457,0x1fc65e0d,0x1debb779,0xeffb754f,0x9e08af60,0x638d8fd0 +.long 0x626332d5,0x994f523a,0x5561bb44,0x7bc38833,0x3d845ea2,0x005ed4b0,0xc2a1f08a,0xd39d3ee1,0xe7676b0d,0x6561fdd3,0xfb706017,0x620e35ff,0xf264f9a8,0x36ce424f,0xda2681f7,0xc4c3419f +.long 0x69beb6e8,0xfb6afd2f,0x6d700d03,0x3a50b993,0x0c83a14f,0xc840b2ad,0x54085bef,0x573207be,0x09fe7e5b,0x5af882e3,0x3b40a7e1,0x957678a4,0x543056e2,0x172d4bdd,0x0df13c0a,0x9c1b26b4 +.long 0xf405ff06,0x1c30861c,0x486e828b,0xebac86bd,0x636933fc,0xe791a971,0x7aeee947,0x50e7c2be,0xfa90d767,0xc3d4a095,0xe670ab7b,0xae60eb7b,0x397b056d,0x17633a64,0x105012aa,0x93a21f33 +.long 0xabb88643,0x663c370b,0x22e21599,0x91df36d7,0x8b761671,0x183ba835,0x728f3bf1,0x381eea1d,0x39966e6c,0xb9b2f1ba,0xe7295492,0x7c464a28,0x09b26b7f,0x0fd5f70a,0xfbe009df,0xa9aba1f9 +.long 0x369b87ad,0x857c1f22,0x32fca556,0x3c00e5d9,0x90b06466,0x1ad74cab,0x550faaf2,0xa7112386,0x6d9bd5f5,0x7435e198,0x59c3463f,0x2dcc7e38,0xca7bd4b2,0xdc7df748,0x9dec2f31,0x13cd4c08 +.long 0xe3237710,0x0d3b5df8,0xcbd2f7b0,0x0dadb26e,0xe4aa082b,0x9f5966ab,0x350e966e,0x666ec8de,0xee524216,0x1bfd1ed5,0x41dab0b6,0xcd93c59b,0xd186d6ba,0x658a8435,0x159d1195,0x1b7d34d2 +.long 0x22caf46b,0x5936e460,0x9a96fe4f,0x6a45dd8f,0xb98f474e,0xf7925434,0x0053ef15,0x41410412,0x41de97bf,0x71cf8d12,0xbd80bef4,0xb8547b61,0xc4db0037,0xb47d3970,0xfef20dff,0xf1bcd328 +.long 0x10caad67,0x31a92e09,0x5531a1e1,0x1f591960,0x5f4fc840,0x3bb852e0,0x93a72c6c,0x63e297ca,0x49abad67,0x3c2b0b2e,0xed3db0d9,0x6ec405fc,0x7fef1d40,0xdc14a530,0x280896fc,0xccd19846 +.long 0x9bb81648,0x00f83176,0x653120d0,0xd69eb485,0x4ccabc62,0xd17d75f4,0xb749fcb1,0x34a07f82,0xbbfb5554,0x2c3af787,0x62e283f8,0xb06ed4d0,0xa19213a0,0x5722889f,0xdcf3c7b4,0x162b085e +.long 0xe0dd3eca,0xbcaecb31,0xe52f13a5,0xc6237fbc,0x27bac297,0xcc2b6b03,0xb917f54a,0x2ae1cac5,0x7845ae4f,0x474807d4,0xce5972e0,0xfec7dd92,0x1d7915bb,0xc3bd2541,0xd94907ca,0x66f85dc4 +.long 0xbdbcf0ca,0xd981b888,0xdf279e9f,0xd75f5da6,0x7054e934,0x128bbf24,0x81db134b,0x3c6ff6e5,0x047d26e4,0x795b7cf4,0x5049ec37,0xf370f7b8,0xced945af,0xc6712d4d,0x095642bc,0xdf30b5ec +.long 0x4896246e,0x9b034c62,0xee90bbd1,0x5652c016,0x87fedb73,0xeb38636f,0x0135a613,0x5e32f847,0xcf933c83,0x0703b312,0x1a7f47e6,0xd05bb76e,0x949c2415,0x825e4f0c,0x7250d6f8,0x569e5622 +.long 0x6568013e,0xbbe9eb3a,0x22f243fc,0x8dbd203f,0xb342734a,0x9dbd7694,0x46afa984,0x8f6d12f8,0xc9eade29,0xb98610a2,0x47dd0f18,0xbab4f323,0x671c0d46,0x5779737b,0xd3e0a42a,0x10b6a7c6 +.long 0x3035b41c,0xfb19ddf3,0x99c45895,0xd336343f,0x54c857e5,0x61fe4938,0xae4e57d5,0xc4d506be,0xbbc33f75,0x3cd8c8cb,0x9262c77d,0x7281f08a,0xf11a2823,0x083f4ea6,0x9fba2e33,0x8895041e +.long 0x9c438edf,0xfcdfea49,0x91edba44,0x7678dcc3,0xe2ba50f0,0xf07b3b87,0x43948c1b,0xc13888ef,0x1140af42,0xc2135ad4,0x926ed1a7,0x8e5104f3,0x88f6695f,0xf24430cb,0x6d73c120,0x0ce0637b +.long 0xfe631e8f,0xb2db01e6,0xd7bdd24b,0x1c5563d7,0x369ad44f,0x8daea3ba,0x8187a9f9,0x000c81b6,0xaae1fd9a,0x5f48a951,0x8d5aed8a,0xe35626c7,0x0498c622,0x20952763,0x773aa504,0x76d17634 +.long 0xeb300f7a,0x36d90dda,0xedb5e801,0x9dcf7dfc,0x74d5244c,0x645cb268,0x348e3aa2,0xa127ee79,0x575f1dbb,0x488acc53,0x80e6161e,0x95037e85,0x292650d0,0x57e59283,0x14938216,0xabe67d99 +.long 0x3f8e1065,0x3c7f944b,0x330e8924,0xed908cb6,0x6f530136,0x08ee8fd5,0xd7ffc169,0x2227b7d5,0xb5cd6dd5,0x4f55c893,0xa62796e8,0x82225e11,0xcb18e12c,0x5c6cead1,0x84f5a51a,0x4381ae0c +.long 0x7fafa4c8,0x345913d3,0x0491aac0,0x3d918082,0x3e69264c,0x9347871f,0xb4f4f0cd,0xbea9dd3c,0x3eadd3e7,0xbda5d067,0x0573bcd8,0x0033c1b8,0x5da2486c,0x25589379,0x86abbee7,0xcb89ee5b +.long 0x22532e5d,0x8fe0a8f3,0x727dfc4c,0xb6410ff0,0x226726db,0x619b9d58,0x7a2b2dc7,0x5ec25669,0x4c3beb01,0xaf4d2e06,0x7acea556,0x852123d0,0xf783487a,0x0e9470fa,0x5664b3eb,0x75a7ea04 +.long 0x6798e4ba,0x4ad78f35,0xc7d0e091,0x9214e6e5,0xb1290403,0xc420b488,0xfc295749,0x64049e0a,0x3ae9841f,0x03ef5af1,0xb0b662a6,0xdbe4ca19,0xfa453458,0x46845c5f,0x10b66722,0xf8dabf19 +.long 0xcce2793b,0xb650f0aa,0xc5ec47c1,0x71db851e,0x3b234fa9,0x3eb78f3e,0xfc0106ce,0xb0c60f35,0x774eadbd,0x05427121,0xce323863,0x25367faf,0xcd086976,0x7541b5c9,0xdc507ad1,0x4ff069e2 +.long 0x8776e667,0x74145256,0xb23c6bb5,0x6e76142c,0x1b3a8a87,0xdbf30712,0x98450836,0x60e7363e,0xb7366d80,0x5741450e,0x4837dbdf,0xe4ee14ca,0x69d4316f,0xa765eb9b,0x8ef43825,0x04548dca +.long 0x5ae888eb,0x9c9f4e4c,0x56e9ac99,0x733abb51,0xba6ac029,0xdaad3c20,0x2ba3e38e,0x9b8dd3d3,0x0bc5d11a,0xa9bb4c92,0x9c5f88a3,0xf20127a7,0x161d3cb8,0x4f52b06e,0x6afaf0a6,0x26c1ff09 +.long 0x7189e71f,0x32670d2f,0x5ecf91e7,0xc6438748,0xdb757a21,0x15758e57,0x290a9ce5,0x427d09f8,0x38384a7a,0x846a308f,0xb0732b99,0xaac3acb4,0x17845819,0x9e941009,0xa7ce5e03,0x95cba111 +.long 0xb00009c4,0x6f3d4f7f,0x8ff28b5f,0xb8396c27,0x1c97975d,0xb1a9ae43,0xe5d9fed5,0x9d7ba8af,0x34f485b6,0x338cf09f,0x64122516,0xbc0ddacc,0x05d471fe,0xa450da12,0x628dd8c9,0x4c3a6250 +.long 0xd1295837,0x69c7d103,0x3807eb2f,0xa2893e50,0xbdb41491,0xd6e1e1de,0x5e138235,0xc630745b,0x48661ae1,0xc892109e,0xea2b2674,0x8d17e7eb,0xc328d6b5,0x00ec0f87,0xf079ff9e,0x6d858645 +.long 0x19115ead,0x6cdf243e,0x4bac4fcf,0x1ce1393e,0x9c29f25b,0x2c960ed0,0x9d388a05,0x59be4d8e,0xd0def72b,0x0d46e06c,0xe0342748,0xb923db5d,0x936d4a3d,0xf7d3aacd,0x0b0b099e,0x558519cc +.long 0x827097ef,0x3ea8ebf8,0xd054f55d,0x259353db,0x6d2ed089,0x84c89abc,0x8e096a7c,0x5c548b69,0x994b995d,0xd587f616,0xa5845601,0x4d1531f6,0x451fd9f0,0x792ab31e,0x65adf6ca,0xc8b57bb2 +.long 0x1cd5ad73,0x68440fcb,0x6144da4f,0xb9c860e6,0x8462beb8,0x2ab286aa,0xef46797f,0xcc6b8fff,0x20c8a471,0xac820da4,0x77ff7faf,0x69ae05a1,0xbfb5da77,0xb9163f39,0x2c73ab7a,0xbd03e590 +.long 0xb2940d9e,0x7e862b5e,0x4b9af564,0x3c663d86,0xbde3033d,0xd8309031,0xd42c5bc6,0x298231b2,0x552ad093,0x42090d2c,0xff854695,0xa4799d1c,0xd31f0d00,0x0a88b5d6,0xa2f26b46,0xf8b40825 +.long 0xf1bd7218,0xec29b1ed,0x4b24c86e,0xd491c53b,0x3395ea65,0xd2fe588f,0x4456ef15,0x6f3764f7,0xcdc34800,0xdb43116d,0xc1e33955,0xcdbcd456,0x74ab286b,0xefdb5540,0xd18c5d7c,0x948c7a51 +.long 0x7378058e,0xeb81aa37,0x04411154,0x41c746a1,0xfb828ac7,0xa10c73bc,0x9d972b29,0x6439be91,0x43a2fbad,0x4bf3b4b0,0x82b5e840,0x39e6dadf,0x6397bd4c,0x4f716408,0x7f1eeccb,0x0f7de568 +.long 0xd2ffbfc1,0x5865c5a1,0x4ccb6451,0xf74211fa,0xc0b32558,0x66368a88,0x9ad7812e,0x5b539dc2,0x2f3af6f6,0x579483d0,0x99934ece,0x52132078,0xdcc9e983,0x50b9650f,0xaee42b8a,0xca989ec9 +.long 0xd6f62f99,0x6a44c829,0x4c2a7c0c,0x8f06a309,0x98a0cb0a,0x4ea2b3a0,0xbeee8364,0x5c547b70,0x682afe11,0x461d40e1,0x7b41c0a8,0x9e0fc77a,0xe20d5d36,0x79e4aefd,0x32dd9f63,0x2916e520 +.long 0x3f883faf,0xf59e52e8,0x2b868d35,0x396f9639,0x4ca19881,0xc902a9df,0xdb2401a6,0x0fc96822,0x66f1c68d,0x41237587,0xfb476c0d,0x10fc6de3,0x841f5d90,0xf8b6b579,0xfa24f44a,0x2ba8446c +.long 0xef4a9975,0xa237b920,0x2330435f,0x60bb6004,0xcfb7e7b5,0xd6f4ab5a,0x83435391,0xb2ac5097,0xb0d1ea67,0xf036ee2f,0x74c56230,0xae779a6a,0xab838ae6,0x59bff8c8,0x9b38e6f0,0xcd83ca99 +.long 0xe33deed3,0xbb27bef5,0x001892a8,0xe6356f6f,0x7adfbd3e,0xbf3be6cc,0x33d1ac9d,0xaecbc81c,0xe6e861dc,0xe4feb909,0x53f5f801,0x90a247a4,0x27346e57,0x01c50acb,0x461acc1b,0xce29242e +.long 0x2f998a91,0x04dd214a,0xd4baf27b,0x271ee9b1,0xe8c26722,0x7e3027d1,0x1820dce5,0x21d1645c,0x7501779c,0x086f242c,0xfa0e8009,0xf0061407,0x60187129,0xf23ce477,0x0fde9bd0,0x05bbdedb +.long 0x25d98473,0x682f4832,0x5c658427,0xf207fe85,0x4166ffa1,0xb6fdd7ba,0x9eed799d,0x0c314056,0x4107e28f,0x0db8048f,0x41216840,0x74ed3871,0x56a3c06e,0x74489f8f,0x12777134,0x1e1c005b +.long 0xf37ec3c3,0xdb332a73,0xdd59eba0,0xc65259bd,0xdb4d3257,0x2291709c,0xbd389390,0x9a793b25,0xe43756f0,0xf39fe34b,0x9afb56c9,0x2f76bdce,0x61208b27,0x9f37867a,0x089972c3,0xea1d4307 +.long 0x8bdf623a,0x8c595330,0x8441fb7d,0x5f5accda,0x32ddfd95,0xfafa9418,0x0fde9be7,0x6ad40c5a,0xaeca8709,0x43faba89,0x2c248a9d,0xc64a7cf1,0x72637a76,0x16620252,0x22b8d1bb,0xaee1c791 +.long 0x21a843b2,0xf0f798fd,0x8d005cb1,0x56e4ed4d,0x1f0d8abe,0x355f7780,0x34522326,0x197b04cf,0xfd42c13f,0x41f9b31f,0xb40f933d,0x5ef7feb2,0x5d60bad4,0x27326f42,0x8c92cf89,0x027ecdb2 +.long 0x4e3352fe,0x04aae4d1,0x73591b90,0x08414d2f,0xb7da7d60,0x5ed6124e,0x4d13d4ec,0xb985b931,0x96bf36f9,0xa592d3ab,0xbbdf51df,0x012dbed5,0xdf6c177d,0xa57963c0,0x87ca29cf,0x010ec869 +.long 0xbf926dff,0xba1700f6,0xf4bf6bc2,0x7c9fdbd1,0x64da11f5,0xdc18dc8f,0xd938ae75,0xa6074b7a,0xe84f44a4,0x14270066,0xd27b954e,0x99998d38,0xb4f38e9a,0xc1be8ab2,0x15c01016,0x8bb55bbf +.long 0x0ea2ab30,0xf73472b4,0xf73d68dd,0xd365a340,0x19c2e1eb,0xc01a7168,0x34061719,0x32f49e37,0x01d8b4d6,0xb73c57f1,0x26b47700,0x03c8423c,0xa4d8826a,0x321d0bc8,0x4bc0e638,0x6004213c +.long 0xc1c06681,0xf78c64a1,0xef018e50,0x16e0a16f,0xdb42b2b3,0x31cbdf91,0xe0d36f58,0xf8f4ffce,0x4cc5e3e0,0xcdcc71cd,0xa129e3e0,0xd55c7cfa,0x0fb2cbf1,0xccdb6ba0,0xc4bce3cb,0x6aba0005 +.long 0xd232cfc4,0x501cdb30,0xd58a3cef,0x9ddcf12e,0x87e09149,0x02d2cf9c,0x2c976257,0xdc5d7ec7,0x0b50d7dd,0x6447986e,0x807f112a,0x88fdbaf7,0xb00ae9f6,0x58c9822a,0x6d3d27e0,0x6abfb950 +.long 0x8a429f4f,0xd0a74487,0xdb516609,0x0649712b,0xe769b5df,0xb826ba57,0x1fc7aaf2,0x82335df2,0x5c93d995,0x2389f067,0x68677be6,0x59ac367a,0x21d9951b,0xa77985ff,0x85011cce,0x038956fb +.long 0xbb734e37,0x608e48cb,0x2be5b26f,0xc08c0bf2,0xf9b1a0d9,0x17bbdd3b,0x10483319,0xeac7d898,0xbc1a6dea,0xc95c4baf,0x172aafdb,0xfdd0e2bf,0x8235c41a,0x40373cbc,0xfb6f41d5,0x14303f21 +.long 0x0408f237,0xba063621,0xecd2d1ed,0xcad3b09a,0x52abb6a2,0x4667855a,0xaa8b417b,0xba9157dc,0x4f013efb,0xfe7f3507,0xaa38c4a2,0x1b112c4b,0x9ba64345,0xa1406a60,0x6993c80b,0xe53cba33 +.long 0xded40d23,0x45466063,0x54908e25,0x3d5f1f4d,0x403c3c31,0x9ebefe62,0x0672a624,0x274ea0b5,0x451d1b71,0xff818d99,0x8f79cf79,0x80e82643,0x73ce37f5,0xa165df13,0xfe3a21fd,0xa744ef4f +.long 0xcf551396,0x73f1e7f5,0x868c676b,0xc616898e,0x8c442c36,0x671c28c7,0x5e0a317d,0xcfe5e558,0x7051f476,0x1242d818,0x14f03442,0x56fad2a6,0x0a44d0f6,0x262068bc,0xce6edf4e,0xdfa2cd6e +.long 0xd15d1517,0x0f43813a,0x377d44f5,0x61214cb2,0xc639b35f,0xd399aa29,0x54c51c19,0x42136d71,0x08417221,0x9774711b,0x52545a57,0x0a5546b3,0x1150582d,0x80624c41,0xfbc555bc,0x9ec5c418 +.long 0x771849f1,0x2c87dcad,0x01d7bf6f,0xb0c932c5,0x89116eb2,0x6aa5cd3e,0x51ca7bd3,0xd378c25a,0x9e6e3e31,0xc612a0da,0xb68ad5d0,0x0417a54d,0x22c6edb8,0x00451e4a,0xb42827ce,0x9fbfe019 +.long 0xba9384a2,0x2fa92505,0x64ad69c1,0x21b8596e,0x983b35a6,0x8f4fcc49,0x72754672,0xde093760,0xf7bffe6d,0x2f14ccc8,0x5d94263d,0x27566bff,0x2df3ec30,0xb5b4e9c6,0x3e6ea6ba,0x94f1d7d5 +.long 0xaaca5e9b,0x97b7851a,0x56713b97,0x518aa521,0x150a61f6,0x3357e8c7,0xec2c2b69,0x7842e7e2,0x6868a548,0x8dffaf65,0xe068fc81,0xd963bd82,0x65917733,0x64da5c8b,0x7b247328,0x927090ff +.long 0xd298c241,0x214bc9a7,0x56807cfd,0xe3b697ba,0x4564eadb,0xef1c7802,0xb48149c5,0xdde8cdcf,0x5a4d2604,0x946bf0a7,0x6c1538af,0x27154d7f,0xde5b1fcc,0x95cc9230,0x66864f82,0xd88519e9 +.long 0x7cb1282c,0xb828dd1a,0xbe46973a,0xa08d7626,0xe708d6b2,0x6baf8d40,0x4daeb3f3,0x72571fa1,0xf22dfd98,0x85b1732f,0x0087108d,0x87ab01a7,0x5988207a,0xaaaafea8,0x69f00755,0xccc832f8 +.long 0x36ff3bf0,0x964d950e,0xf0b34638,0x8ad20f6f,0xb5d7585f,0x4d9177b3,0xef3f019f,0xcf839760,0x8288c545,0x582fc5b3,0x13116bd1,0x2f8e4e9b,0x332120ef,0xf91e1b2f,0x2a17dd23,0xcf568724 +.long 0xca8d9d1a,0x488f1185,0xd987ded2,0xadf2c77d,0x60c46124,0x5f3039f0,0x71e095f4,0xe5d70b75,0x6260e70f,0x82d58650,0xf750d105,0x39d75ea7,0x75bac364,0x8cf3d0b1,0x21d01329,0xf3a7564d +.long 0x2f52d2a7,0x182f04cd,0xe2df565a,0x4fde149a,0xa79fb2f7,0xb80c5eec,0x22ddc897,0xab491d7b,0xc6312c7f,0x99d76c18,0x6aa41a57,0xca0d5f3d,0xd15363a0,0x71207325,0xbeb252c2,0xe82aa265 +.long 0xec3128c2,0x94ab4700,0x8e383f49,0x6c76d862,0xc03024eb,0xdc36b150,0x53daac69,0xfb439477,0x8dc79623,0xfc68764a,0xb440fbb2,0x5b86995d,0xccc5ee0d,0xd66879bf,0x95aa8bd3,0x05228942 +.long 0x1e6a75c1,0xb51a40a5,0x0ea7d817,0x24327c76,0x07774597,0x06630182,0x97fa7164,0xd6fdbec3,0x13c90f48,0x20c99dfb,0x686ef263,0xd6ac5273,0xfef64eeb,0xc6a50bdc,0x86fdfc32,0xcd87b281 +.long 0x3fcd3efc,0xb24aa43e,0xb8088e9a,0xdd26c034,0xbd3d46ea,0xa5ef4dc9,0x8a4c6a6f,0xa2f99d58,0x2f1da46c,0xddabd355,0x1afacdd1,0x72c3f8ce,0x92d40578,0xd90c4eee,0xca623b94,0xd28bb41f +.long 0x745edc11,0x50fc0711,0x3dc87558,0x9dd9ad7d,0xb49d1e64,0xce6931fb,0xc98bd0f9,0x6c77a0a2,0x6baf7cb1,0x62b9a629,0xccf72d22,0xcf065f91,0x79639071,0x7203cce9,0xf9cb732f,0x09ae4885 +.long 0xee8314f3,0x5e7c3bec,0xdbea298f,0x1c068aed,0x7c80acec,0x08d381f1,0xe330495b,0x03b56be8,0x9222882d,0xaeffb8f2,0xc4af8bf7,0x95ff38f6,0x1fc57d8c,0x50e32d35,0x17b444f0,0x6635be52 +.long 0xa5177900,0x04d15276,0xf6858752,0x4e1dbb47,0xc615796c,0x5b475622,0x691867bf,0xa6fa0387,0x2844c6d0,0xed7f5d56,0x03a2477d,0xc633cf9b,0x2d3721d6,0xf6be5c40,0xe9fd68e6,0xaf312eb7 +.long 0xe7417ce1,0x242792d2,0x970ee7f5,0xff42bc71,0x5c67a41e,0x1ff4dc6d,0x20882a58,0x77709b7b,0xbe217f2c,0x3554731d,0x5bb72177,0x2af2a8cd,0x591dd059,0x58eee769,0x4bba6477,0xbb2930c9 +.long 0x7d930cfc,0x863ee047,0x396fd1f4,0x4c262ad1,0x039af7e1,0xf4765bc8,0x5ba104f6,0x2519834b,0xd105f961,0x7cd61b4c,0xd63bca54,0xa5415da5,0x88a1f17c,0x778280a0,0x2329512c,0xc4968949 +.long 0xcecdaa7a,0x174a9126,0x0b13247b,0xfc8c7e0e,0x3484c1c4,0x29c110d2,0x831dfc3b,0xf8eb8757,0xc0067452,0x022f0212,0x7b9b926c,0x3f6f69ee,0xef42daf4,0x09032da0,0x83f80de4,0x79f00ade +.long 0x81236c97,0x6210db71,0x3ee0781f,0x74f7685b,0xa3e41372,0x4df7da7b,0xb1a1553e,0x2aae38b1,0xf6dd9d1b,0x1688e222,0x5b8b6487,0x57695448,0x4b2edeaa,0x478d2127,0x1e85956a,0xb2818fa5 +.long 0xf176f2c0,0x1e6addda,0xe2572658,0x01ca4604,0x85342ffb,0x0a404ded,0x441838d6,0x8cf60f96,0xc9071c4a,0x9bbc691c,0x34442803,0xfd588744,0x809c0d81,0x97101c85,0x8c456f7f,0xa7fb754c +.long 0xd51805e1,0xc95f3c5c,0xb299dca8,0xab4ccd39,0x47eaf500,0x3e03d20b,0xd7b80893,0xfa3165c1,0xe160e552,0x005e8b54,0x9019d11f,0xdc4972ba,0x0c9a4a7a,0x21a6972e,0x37840fd7,0xa52c258f +.long 0xc1e99d81,0xf8559ff4,0xa3c617c0,0x08e1a7d6,0x248c6ba7,0xb398fd43,0xd1283794,0x6ffedd91,0xd629d208,0x8a6a59d2,0x3490530e,0xa9d141d5,0x38505989,0x42f6fc18,0x479d94ee,0x09bf250d +.long 0xb3822790,0x223ad3b1,0x93b8971c,0x6c5926c0,0x75f7fa62,0x609efc7e,0x1ec2d989,0x45d66a6d,0x987d2792,0x4422d663,0x3eb31d2b,0x4a73caad,0xa32cb9e6,0xf06c2ac1,0x91aeba84,0xd9445c5f +.long 0xaf71013f,0x6af7a1d5,0x0bedc946,0xe68216e5,0xd27370a0,0xf4cba30b,0x870421cc,0x7981afbf,0x9449f0e1,0x02496a67,0x0a47edae,0x86cfc4be,0xb1feca22,0x3073c936,0x03f8f8fb,0xf5694612 +.long 0x901515ea,0xd063b723,0x749cf038,0x4c6c77a5,0xab9e5059,0x6361e360,0xa76a37c0,0x596cf171,0x6530ae7a,0x800f53fa,0x0792a7a6,0x0f5e631e,0xefdb81c9,0x5cc29c24,0x3f9c40ba,0xa269e868 +.long 0x2cb7191e,0xec14f9e1,0xe5b08ea6,0x78ea1bd8,0x46332bb9,0x3c65aa9b,0xbf80ce25,0x84cc22b3,0xd49d5bf1,0x0098e9e9,0x19087da4,0xcd4ec1c6,0xaef6e357,0x3c9d07c5,0x9f8f64b8,0x839a0268 +.long 0xc6d8607f,0xc5e9eb62,0x6aa995e4,0x759689f5,0xbbb48317,0x70464669,0xe402417d,0x921474bf,0x2a354c8c,0xcabe135b,0x812fa4b5,0xd51e52d2,0x53311fe8,0xec741096,0xb864514b,0x4f774535 +.long 0x5bde48f8,0xbcadd671,0x2189bc7d,0xc9703873,0xc709ee8a,0x5d45299e,0x845aaff8,0xd1287ee2,0xdb1dbf1f,0x7d1f8874,0x990c88d6,0xea46588b,0x84368313,0x60ba649a,0x60d543ae,0xd5fdcbce +.long 0x810d5ab0,0x90b46d43,0x04d7e5cc,0x6739d8f9,0x0d337c33,0x021c1a58,0x68e67c40,0x00a61162,0x379f0a1f,0x95ef413b,0xe9e2ab95,0xfe126605,0x2f5f199c,0x67578b85,0x2cb84913,0xf5c00329 +.long 0x37577dd8,0xf7956430,0x29c5fe88,0x83b82af4,0xcdbdc132,0x9c1bea26,0x9c04339e,0x589fa086,0xb13799df,0x033e9538,0xd295d034,0x85fa8b21,0xbd9ddcca,0xdf17f73f,0xddb66334,0xf32bd122 +.long 0x858b044c,0x55ef88a7,0x5aa9e397,0x1f0d69c2,0x40d85559,0x55fd9cc3,0x7785ddb2,0xc774df72,0xd3bd2e1c,0x5dcce9f6,0xa85dfed0,0xeb30da20,0xd3ed09c4,0x5ed7f5bb,0x82a9c1bd,0x7d42a35c +.long 0x9890272d,0xcf3de995,0x3e713a10,0x75f3432a,0xe28227b8,0x5e13479f,0xfefacdc8,0xb8561ea9,0x8332aafd,0xa6a297a0,0x73809b62,0x9b0d8bb5,0x0c63036f,0xd2fa1cfd,0xbd64bda8,0x7a16eb55 +.long 0x78e62ddc,0x3f5cf5f6,0x07fd752b,0x2267c454,0x5e437bbe,0x5e361b6b,0x8354e075,0x95c59501,0xf2b254d9,0xec725f85,0x2cb52b4e,0x844b617d,0xcf425fb5,0xed8554f5,0x2af9f312,0xab67703e +.long 0x3cf48283,0x4cc34ec1,0x9c8a705e,0xb09daa25,0x5b7d4f84,0xd1e9d0d0,0xdb38929d,0x4df6ef64,0xaa21ba46,0xe16b0763,0xa293f8fb,0xc6b1d178,0xd520aabf,0x0ff5b602,0xc339397a,0x94d671bd +.long 0x4f5792fa,0x7c7d98cf,0x11215261,0x7c5e0d67,0xa7c5a6d4,0x9b19a631,0x7a45274d,0xc8511a62,0xa5a60d99,0x0c16621c,0xcf5e48cb,0xf7fbab88,0xf7ddee08,0xab1e6ca2,0xe7867f3c,0x83bd08ce +.long 0x2ac13e27,0xf7e48e8a,0x4eb1a9f5,0x4494f6df,0x981f0a62,0xedbf84eb,0x536438f0,0x49badc32,0x004f7571,0x50bea541,0xdf1c94ee,0xbac67d10,0xb727bc31,0x253d73a1,0x30686e28,0xb3d01cf2 +.long 0x55fd0b8b,0x51b77b1b,0xfeec3173,0xa099d183,0x670e72b7,0x202b1fb7,0xa8e1635f,0xadc88b33,0xf989d905,0x34e8216a,0x29b58d01,0xc2e68d20,0x6fe55a93,0x11f81c92,0x8f296f40,0x15f1462a +.long 0xea3d62f2,0x1915d375,0x01c8977d,0xa17765a3,0xe47b26f6,0x7559710a,0x535077a5,0xe0bd29c8,0x08d84858,0x615f976d,0x69ced5c1,0x370dfe85,0xa734fa56,0xbbc7503c,0x91ac4574,0xfbb9f1ec +.long 0x060dd7ef,0x95d7ec53,0x6e657979,0xeef2dacd,0xe2a08235,0x54511af3,0x1f4aea3d,0x1e324aa4,0xe6e67671,0x550e7e71,0xbf52faf7,0xbccd5190,0x223cc62a,0xf880d316,0x2b32eb5d,0x0d402c7e +.long 0x306a5a3b,0xa40bc039,0x96783a1b,0x4e0a41fd,0x0253cdd4,0xa1e8d39a,0xc7388638,0x6480be26,0x2285f382,0xee365e1d,0xec0b5c36,0x188d8d8f,0x1f0f4d82,0x34ef1a48,0xa487d29a,0x1a8f43e1 +.long 0x77aefb3a,0x8168226d,0x1e72c253,0xf69a751e,0xe9594df1,0x8e04359a,0xd14c0467,0x475ffd7d,0x3844e95c,0xb5a2c2b1,0xdd12ef94,0x85caf647,0xf1063d00,0x1ecd2a9f,0x23843311,0x1dd2e229 +.long 0x73d17244,0x38f0e09d,0x8fc653f1,0x3ede7746,0xdc20e21c,0xae4459f5,0x6a8599ea,0x00db2ffa,0x30cfd905,0x11682c39,0xa5c112a6,0x4934d074,0x568bfe95,0xbdf063c5,0x016c441a,0x779a440a +.long 0x97d6fbdc,0x0c23f218,0xe0776aac,0xd3a5cd87,0xd712e8db,0xcee37f72,0x26f74e8d,0xfb28c70d,0xb61301a0,0xffe0c728,0xd3724354,0xa6282168,0x768ffedc,0x7ff4cb00,0x03b02de9,0xc51b3088 +.long 0x3902dda5,0xa5a8147c,0xfe6973b4,0x35d2f706,0xc257457e,0x5ac2efcf,0x8700611b,0x933f48d4,0x4912beb2,0xc365af88,0x162edf94,0x7f5a4de6,0x0c32f34b,0xc646ba7c,0xb2091074,0x632c6af3 +.long 0x753e43a9,0x58d4f2e3,0x24d4e23f,0x70e1d217,0xafede6a6,0xb24bf729,0x710c8b60,0x7f4a94d8,0x8d4faa6a,0xaad90a96,0xb066b690,0xd9ed0b32,0x78b6dbfd,0x52fcd37b,0x8bd2b431,0x0b64615e +.long 0xcfb9fad5,0x228e2048,0x240b76bd,0xbeaa386d,0x90dad7bc,0x2d6681c8,0x06d38f5e,0x3e553fc3,0x9d5f9750,0xf27cdb9b,0xd28c5b0e,0x3e85c52a,0x5247c39b,0x190795af,0xbddd6828,0x547831eb +.long 0x4a82f424,0xf327a227,0x7e47f89d,0x36919c78,0x43c7392c,0xe4783919,0x2316fefe,0xf101b9aa,0x1c5009d2,0xbcdc9e9c,0x9cd18345,0xfb55ea13,0xa3ce77c7,0xf5b5e231,0xd2f2cb3d,0xde6b4527 +.long 0x9bb26f5f,0x10f6a333,0x044d85b6,0x1e85db8e,0x94197e54,0xc3697a08,0xa7cb4ea8,0x65e18cc0,0xa471fe6e,0xa38c4f50,0x2f13439c,0xf031747a,0xc007318b,0x53c4a6ba,0x1deccb3d,0xa8da3ee5 +.long 0x558216b1,0x0555b31c,0x2f79e6c2,0x90c7810c,0xfe8eed3c,0x9b669f4d,0xe0fac126,0x70398ec8,0xf701b235,0xa96a449e,0xeb94f395,0x0ceecdb3,0xd0cb7431,0x285fc368,0x16a18c64,0x0d37bb52 +.long 0xb880d2dd,0x05110d38,0x65930d57,0xa60f177b,0xf36235f5,0x7da34a67,0x183816b9,0x47f5e17c,0xdb394af4,0xc7664b57,0x7036f789,0x39ba215d,0x2f27b472,0x46d2ca0e,0xf73a84b7,0xc42647ee +.long 0x64488f1d,0x44bc7545,0xf4cf85d5,0xaa922708,0x53e4df63,0x721a01d5,0x5db46ced,0x649c0c51,0x3cffcb6c,0x6bf0d64e,0x50f71d96,0xe3bf93fe,0xbcc194a0,0x75044558,0x6afdc554,0x16ae3372 +.long 0x5ca48f3f,0xbfc01adf,0xe22a9b84,0x64352f06,0xc1099e4a,0xcee54da1,0xfa1b89c0,0xbbda54e8,0x6f6e55fb,0x166a3df5,0x20176f88,0x1ca44a24,0xdfb7b5ff,0x936afd88,0x8611d4a0,0xe34c2437 +.long 0x86142103,0x7effbb75,0x1f34fc4d,0x6704ba1b,0x10c1b122,0x7c2a468f,0x8c6aace9,0x36b3a610,0x75a0d050,0xabfcc0a7,0x3ce33e32,0x066f9197,0x29fe09be,0xce905ef4,0xa8376351,0x89ee25ba +.long 0xfd29dc76,0x2a3ede22,0x36f17260,0x7fd32ed9,0x284b4126,0x0cadcf68,0xa7951fc8,0x63422f08,0x0807e199,0x562b24f4,0x22ad4490,0xfe9ce5d1,0x0db2b1b4,0xc2f51b10,0xe4541d0d,0xeb3613ff +.long 0x2680813b,0xbd2c4a05,0x561b08d6,0x527aa55d,0xa7205558,0xa9f8a40e,0x243d0bec,0xe3eea56f,0xa0ff58b3,0x7b853817,0x1a69e627,0xb67d3f65,0xa869b5d6,0x0b76bbb9,0x546723ed,0xa3afeb82 +.long 0x3e554892,0x5f24416d,0x430e2a45,0x8413b53d,0x9032a2a0,0x99c56aee,0xeec367b1,0x09432bf6,0xdaf0ecc1,0x552850c6,0x5bc92048,0x49ebce55,0x54811307,0xdfb66ba6,0x6f298597,0x1b84f797 +.long 0x8d1d7a0d,0x79590481,0x3a6fa556,0xd9fabe03,0xba9e5d35,0xa40f9c59,0xf6247577,0xcb1771c1,0xe9a6312b,0x542a47ca,0x552dd8c5,0xa34b3560,0x0d794716,0xfdf94de0,0x9c623094,0xd46124a9 +.long 0x68afe8b4,0x56b7435d,0x6c0d8ea1,0x27f20540,0x73186898,0x12b77e14,0x7479490f,0xdbc3dd46,0xc03b0c05,0x951a9842,0x7921bc96,0x8b1b3bb3,0x2b202e0a,0xa573b346,0x47254d56,0x77e4665d +.long 0xd23e3984,0x08b70dfc,0xebd14236,0xab86e8bc,0x57114ba7,0xaa3e07f8,0xab0ef4f2,0x5ac71689,0x0139d9af,0x88fca384,0x76644af0,0x72733f88,0x65d74f4a,0xf122f72a,0xa5626c7a,0x13931577 +.long 0x70f8d5a4,0xd5b5d9eb,0xd7bbb228,0x375adde7,0x0c1c0b32,0x31e88b86,0x173edbaa,0xd1f568c4,0x5459df02,0x1592fc83,0x0fcd9a7e,0x2beac0fb,0x1b473b0a,0xb0a6fdb8,0x0fe8fc48,0xe3224c6f +.long 0xe87edf5b,0x680bd00e,0x20e77cf5,0x30385f02,0x4d42d1b2,0xe9ab98c0,0xd3816d77,0x72d191d2,0x0917d9e5,0x1564daca,0x1f8fed7f,0x394eab59,0x7fbb3896,0xa209aa8d,0xbe6ac98e,0x5564f3b9 +.long 0xd73654ef,0xead21d05,0x13d78d74,0x68d1a9c4,0x6d4973a0,0x61e01708,0x46e6d32a,0x83da3500,0x68ae0118,0x6a3dfca4,0xd02da069,0xa1b9a4c9,0xebab8302,0x0b2ff9c7,0x944ba436,0x98af07c3 +.long 0x995f0f9f,0x85997326,0x71b58bc6,0x467fade0,0xbd625a2b,0x47e4495a,0x33c3b8cd,0xfdd2d01d,0xc693f9fa,0x2c38ae28,0x348f7999,0x48622329,0x2161f583,0x97bf738e,0x565e8cc9,0x15ee2fa7 +.long 0x5777e189,0xa1a5c845,0x456f2829,0xcc10bee0,0xda762bd5,0x8ad95c56,0xe9d91da8,0x152e2214,0x7cb23c74,0x975b0e72,0xa90c66df,0xfd5d7670,0x225ffc53,0xb5b5b8ad,0xfaded2ae,0xab6dff73 +.long 0x6f4cbe9d,0xebd56781,0x6a574bd7,0x0ed8b249,0x81a881fa,0x41c246fe,0xc3db9c70,0x91564805,0x5b862809,0xd7c12b08,0x55858d7b,0x1facd1f1,0xaf09e92a,0x7693747c,0x189a425f,0x3b69dcba +.long 0x967365ef,0x0be28e9f,0xe801f5c9,0x57300eb2,0xd583352f,0x93b8ac6a,0xcd05b2b7,0xa2cf1f89,0x4dcc40cc,0x7c0c9b74,0xada523fb,0xfee38c45,0x1099cc4d,0xb49a4dec,0x69f069c6,0x325c377f +.long 0x476cc9ff,0xe12458ce,0xc6d4cb63,0x580e0b6c,0x9072289b,0xd561c8b7,0xa619e6da,0x0377f264,0x88e591a5,0x26685362,0x7523ca2b,0xa453a7bd,0xc1df4533,0x8a9536d2,0xbe972f79,0xc8e50f2f +.long 0x6d3549cf,0xd433e50f,0xfacd665e,0x6f33696f,0xce11fcb4,0x695bfdac,0xaf7c9860,0x810ee252,0x7159bb2c,0x65450fe1,0x758b357b,0xf7dfbebe,0xd69fea72,0x2b057e74,0x92731745,0xd485717a +.long 0xee36860c,0x896c42e8,0x4113c22d,0xdaf04dfd,0x44104213,0x1adbb7b7,0x1fd394ea,0xe5fd5fa1,0x1a4e0551,0x68235d94,0x18d10151,0x6772cfbe,0x09984523,0x276071e3,0x5a56ba98,0xe4e879de +.long 0x285b9491,0xaaafafb0,0x1e4c705e,0x01a0be88,0x2ad9caab,0xff1d4f5d,0xc37a233f,0x6e349a4a,0x4a1c6a16,0xcf1c1246,0x29383260,0xd99e6b66,0x5f6d5471,0xea3d4366,0xff8cc89b,0x36974d04 +.long 0xcfe89d80,0xc26c49a1,0xda9c8371,0xb42c026d,0xdad066d2,0xca6c013a,0x56a4f3ee,0xfb8f7228,0xd850935b,0x08b579ec,0xd631e1b3,0x34c1a74c,0xac198534,0xcb5fe596,0xe1f24f25,0x39ff21f6 +.long 0x8f929057,0x27f29e14,0xc0c853df,0x7a64ae06,0x58e9c5ce,0x256cd183,0xded092a5,0x9d9cce82,0x6e93b7c7,0xcc6e5979,0x31bb9e27,0xe1e47092,0xaa9e29a0,0xb70b3083,0x3785e644,0xbf181a75 +.long 0x8ead09f7,0xf53f2c65,0x9780d14d,0x1335e1d5,0xcd1b66bc,0x69cc20e0,0xbbe0bfc8,0x9b670a37,0x28efbeed,0xce53dc81,0x8326a6e5,0x0c74e77c,0xb88e9a63,0x3604e0d2,0x13dc2248,0xbab38fca +.long 0x5c0a3f1e,0x8ed6e8c8,0x7c87c37f,0xbcad2492,0x9ee3b78d,0xfdfb62bb,0xcbceba46,0xeba8e477,0xeeaede4b,0x37d38cb0,0x7976deb6,0x0bc498e8,0x6b6147fb,0xb2944c04,0xf71f9609,0x8b123f35 +.long 0xde79dc24,0xa155dcc7,0x558f69cd,0xf1168a32,0x0d1850df,0xbac21595,0xb204c848,0x15c8295b,0x7d8184ff,0xf661aa36,0x30447bdb,0xc396228e,0xbde4a59e,0x11cd5143,0x6beab5e6,0xe3a26e3b +.long 0x1402b9d0,0xd3b3a13f,0x2c7bc863,0x573441c3,0x578c3e6e,0x4b301ec4,0x0adaf57e,0xc26fc9c4,0x7493cea3,0x96e71bfd,0x1af81456,0xd05d4b3f,0x6a8c608f,0xdaca2a8a,0x0725b276,0x53ef07f6 +.long 0x7824fc56,0x07a5fbd2,0x13289077,0x34675218,0xe0c48349,0x5bf69fd5,0xb6aa7875,0xa613ddd3,0x5450d866,0x7f78c19c,0x8f84a481,0x46f4409c,0x90fce239,0x9f1d1928,0xb2ce44b9,0x016c4168 +.long 0xc7435978,0xbae023f0,0x20e30e19,0xb152c888,0xe3fa6faf,0x9c241645,0x84823e60,0x735d95c1,0x03955317,0x03197573,0xf03b4995,0x0b4b02a9,0x70274600,0x076bf559,0xaaf57508,0x32c5cc53 +.long 0x60624129,0xe8af6d1f,0x9a5e2b5e,0xb7bc5d64,0x5f082d72,0x3814b048,0xce19677a,0x76f267f2,0xb36eed93,0x626c630f,0x3bf56803,0x55230cd7,0xce2736a0,0x78837949,0xaa6c55f1,0x0d792d60 +.long 0xd5c7c5d2,0x0318dbfd,0x072b342d,0xb38f8da7,0x7b8de38a,0x3569bddc,0xa1c94842,0xf25b5887,0x2946ad60,0xb2d5b284,0xe9d1707e,0x854f29ad,0x2c6a4509,0xaa5159dc,0x57189837,0x899f94c0 +.long 0xf4a55b03,0xcf6adc51,0x35e3b2d5,0x261762de,0x04827b51,0x4cc43012,0xc6021442,0xcd22a113,0x247c9569,0xce2fd61a,0xd152beca,0x59a50973,0x63a716d4,0x6c835a11,0x187dedcf,0xc26455ed +.long 0x49ce89e7,0x27f536e0,0xcc890cb5,0x18908539,0xd83c2aa1,0x308909ab,0x1ab73bd3,0xecd3142b,0xb3f5ab84,0x6a85bf59,0xf2bea4c6,0x3c320a68,0x6da4541f,0xad8dc538,0xb7c41186,0xeaf34eb0 +.long 0x977c97c4,0x1c780129,0xc57eb9fa,0x5ff9beeb,0xc822c478,0xa24d0524,0x461cd415,0xfd8eec2a,0xf027458c,0xfbde194e,0x1d1be115,0xb4ff5319,0x4866d6f4,0x63f874d9,0xb21ad0c9,0x35c75015 +.long 0x46ac49d2,0xa6b5c9d6,0x83137aa9,0x42c77c0b,0x68225a38,0x24d000fc,0x2fe1e907,0x0f63cfc8,0xc6441f95,0x22d1b01b,0xec8e448f,0x7d38f719,0x787fb1ba,0x9b33fa5f,0x190158df,0x94dcfda1 +.long 0x5f6d4a09,0xc47cb339,0xee52b826,0x6b4f355c,0xf51b930a,0x3d100f5d,0x9f668f69,0xf4512fac,0x206c4c74,0x546781d5,0xcb4d2e48,0xd021d4d4,0xca085c2d,0x494a54c2,0x520850a8,0xf1dbaca4 +.long 0x490a1aca,0x63c79326,0x41526b02,0xcb64dd9c,0xa2979258,0xbb772591,0x48d97846,0x3f582970,0x7c213ba7,0xd66b70d1,0xe8a0ced4,0xc28febb5,0xc10338c1,0x6b911831,0xbf0126f3,0x0d54e389 +.long 0x4af206ee,0x7048d460,0x77e97cb9,0x786c88f6,0xac64802e,0xd4375ae1,0xd53ec11c,0x469bcfe1,0x47062230,0xfc9b340d,0xc5b4a3ac,0xe743bb57,0x59ef45ac,0xfe00b4aa,0x59edf188,0x29a4ef23 +.long 0xb483689b,0x40242efe,0x513ac262,0x2575d3f6,0x0ca6db72,0xf30037c8,0x98864be2,0xc9fcce82,0x0149362d,0x84a112ff,0x1c4ae971,0x95e57582,0x945cf86c,0x1fa4b1a8,0x0b024a2f,0x4525a734 +.long 0x8f338360,0xe76c8b62,0x28edf32b,0x483ff593,0x298b1aec,0x67e8e90a,0x736d9a21,0x9caab338,0x66892709,0x5c09d2fd,0xb55a1d41,0x2496b4dc,0xe24a4394,0x93f5fb1a,0x6fa8f6c1,0x08c75049 +.long 0xc905d85f,0xcaead1c2,0x0733ae57,0xe9d7f790,0xf07cdd94,0x24c9a65c,0xa4b55931,0x7389359c,0x367e45f7,0xf58709b7,0xcb7e7adc,0x1f203067,0xc7b72818,0x82444bff,0xbaac8033,0x07303b35 +.long 0xd13b7ea1,0x1e1ee4e4,0xe0e74180,0xe6489b24,0x7e70ef70,0xa5f2c610,0xbdd10894,0xa1655412,0x7af4194e,0x555ebefb,0x8e89bd9c,0x533c1c3c,0x89895856,0x735b9b57,0x567f5c15,0x15fb3cd2 +.long 0x526f09fd,0x057fed45,0x8128240a,0xe8a4f10c,0xff2bfd8d,0x9332efc4,0xbd35aa31,0x214e77a0,0x14faa40e,0x32896d73,0x01e5f186,0x767867ec,0x17a1813e,0xc9adf8f1,0x54741795,0xcb6cda78 +.long 0x349d51aa,0xb7521b6d,0xe3c7b8e9,0xf56b5a9e,0x32a096df,0xc6f1e5c9,0xa3635024,0x083667c4,0x18087f2f,0x365ea135,0xd136e45d,0xf1b8eaac,0x73aec989,0xc8a0e484,0x142c9259,0xd75a324b +.long 0x01dae185,0xb7b4d001,0x9b7a94bc,0x45434e0b,0xfbd8cb0b,0xf54339af,0xe98ef49e,0xdcc4569e,0x09a51299,0x7789318a,0xb2b025d8,0x81b4d206,0xfae85792,0xf64aa418,0xacd7baf7,0x3e50258f +.long 0x2996864b,0xdce84cdb,0x1f485fa4,0xa2e67089,0x534c6a5a,0xb28b2bb6,0xc94b9d39,0x31a7ec6b,0xd6bc20da,0x1d217766,0x86761190,0x4acdb5ec,0x73701063,0x68726328,0x2128c29b,0x4d24ee7c +.long 0xa19fd868,0xc072ebd3,0xdb8ddd3b,0x612e481c,0x1a64d852,0xb4e1d754,0xc4c6c4ab,0x00ef95ac,0xaa0a6c46,0x1536d2ed,0x43774790,0x61294086,0x343fda10,0x54af25e8,0xfd25d6f2,0x9ff9d98d +.long 0x468b8835,0x0746af7c,0x730ecea7,0x977a31cb,0xc2cf4a81,0xa5096b80,0x6458c37a,0xaa986833,0xa6bd9d34,0x6af29bf3,0x33c5d854,0x6a62fe9b,0xb7133b5e,0x50e6c304,0x7d6e6848,0x04b60159 +.long 0x5579bea4,0x4cd296df,0x5ceedaf1,0x10e35ac8,0xe3bcc5b1,0x04c4c5fd,0x89412cf9,0x95f9ee8a,0x82b6eb0f,0x2c9459ee,0x95c2aadd,0x2e845765,0xd327fcfe,0x774a84ae,0x0368d476,0xd8c93722 +.long 0xf83e8a3b,0x0dbd5748,0x8d2495f3,0xa579aa96,0xae496e9b,0x535996a0,0xb7f9bcc2,0x07afbfe9,0x5b7bd293,0x3ac1dc6d,0x7022323d,0x3b592cff,0x9c0a3e76,0xba0deb98,0x4b197acb,0x18e78e9f +.long 0x296c36ef,0x211cde10,0x82c4da77,0x7ee89672,0xa57836da,0xb617d270,0x9cb7560b,0xf0cd9c31,0xe455fe90,0x01fdcbf7,0x7e7334f3,0x3fb53cbb,0x4e7de4ec,0x781e2ea4,0x0b384fd0,0x8adab3ad +.long 0x53d64829,0x129eee2f,0xa261492b,0x7a471e17,0xe4cb4a2c,0xe4f9adb9,0x97ba2c2d,0x3d359f6f,0x0aacd697,0x346c6786,0x75c2f8a8,0x92b444c3,0xd85df44e,0xc79fa117,0x398ddf31,0x56782372 +.long 0xbbbab3b8,0x60e690f2,0x8b04816b,0x4851f8ae,0x9c92e4d2,0xc72046ab,0x7cf3136b,0x518c74a1,0xf9877d4c,0xff4eb50a,0xa919cabb,0x14578d90,0xac5eb2b6,0x8218f8c4,0x542016e4,0xa3ccc547 +.long 0x327f8349,0x025bf48e,0xf43cb641,0xf3e97346,0x500f1085,0xdc2bafdf,0x2f063055,0x57167876,0x411925a6,0x5bd914b9,0xa1123de5,0x7c078d48,0x182b165d,0xee6bf835,0xba519727,0xb11b5e5b +.long 0x1eea7b85,0xe33ea76c,0x92d4f85e,0x2352b461,0xafe115bb,0xf101d334,0x889175a3,0xfabc1294,0x5233f925,0x7f6bcdc0,0xe77fec55,0xe0a802db,0x8069b659,0xbdb47b75,0xf98fbd74,0x1c5e12de +.long 0x4b8457ee,0x869c58c6,0x4f7ea9f7,0xa5360f69,0xf460b38f,0xe576c09f,0x22b7fb36,0x6b70d548,0x3bfae315,0x3fd237f1,0xcbdff369,0x33797852,0x25b516f9,0x97df25f5,0xba38ad2d,0x46f388f2 +.long 0x89d8ddbb,0x656c4658,0x70f38ee8,0x8830b26e,0xde1212b0,0x4320fd5c,0xe4a2edb2,0xc34f30cf,0x56ab64b8,0xabb131a3,0xd99c5d26,0x7f77f0cc,0xbf981d94,0x66856a37,0x738bd76e,0x19e76d09 +.long 0x96238f39,0xe76c8ac3,0xa830b366,0xc0a482be,0x0b4eb499,0xb7b8eaff,0x4bfb4865,0x8ecd83bc,0xa2f3776f,0x971b2cb7,0xf4b88adf,0xb42176a4,0xbe1fa446,0xb9617df5,0xcd031bd2,0x8b32d508 +.long 0x53b618c0,0x1c6bd47d,0x6a227923,0xc424f46c,0xdd92d964,0x7303ffde,0x71b5abf2,0xe9712878,0xf815561d,0x8f48a632,0xd3c055d1,0x85f48ff5,0x7525684f,0x222a1427,0x67360cc3,0xd0d841a0 +.long 0x0b9267c6,0x4245a926,0xcf07f863,0xc78913f1,0x4d0d9e24,0xaa844c8e,0x3d5f9017,0xa42ad522,0xa2c989d5,0xbd371749,0xe1f5e78e,0x928292df,0x0a1ea6da,0x493b383e,0x13aee529,0x5136fd8d +.long 0xf2c34a99,0x860c44b1,0xbf5855ac,0x3b00aca4,0xfaaf37be,0xabf6aaa0,0x2a53ec08,0x65f43682,0xa11b12e1,0x1d9a5801,0xe20ed475,0x78a7ab2c,0x9a41e0d5,0x0de1067e,0x305023ea,0x30473f5f +.long 0x169c7d97,0xdd3ae09d,0xcfaef9cd,0x5cd5baa4,0x65a44803,0x5cd7440b,0x47f364de,0xdc13966a,0x2b8357c1,0x077b2be8,0xe9d57c2a,0x0cb1b4c5,0x05ff363e,0x7a4ceb32,0xca35a9ef,0xf310fa4d +.long 0xf97f68c6,0xdbb7b352,0x0b02cf58,0x0c773b50,0x3c1f96d9,0xea2e4821,0xeee01815,0xffb357b0,0xe0f28039,0xb9c924cd,0x46a3fbe4,0x0b36c95a,0x5e46db6c,0x1faaaea4,0x1928aaff,0xcae575c3 +.long 0xa70dab86,0x7f671302,0x71c58cfc,0xfcbd12a9,0xbee0cb92,0xcbef9acf,0xf8c1b583,0x573da0b9,0x0d41d550,0x4752fcfe,0x2155cffe,0xe7eec0e3,0x545ae248,0x0fc39fcb,0x8065f44e,0x522cb8d1 +.long 0x70cbb96c,0x263c962a,0xbcd124a9,0xe034362a,0x3c2ae58d,0xf120db28,0xfef6d507,0xb9a38d49,0x1ff140fd,0xb1fd2a82,0x20aee7e0,0xbd162f30,0xcb251949,0x4e17a5d4,0x4f7e1c3d,0x2aebcb83 +.long 0x937b0527,0x608eb25f,0xeb7d9997,0xf42e1e47,0xb8a53a29,0xeba699c4,0xe091b536,0x1f921c71,0x5b26bbd5,0xcce29e7b,0x3b61a680,0x7a8ef5ed,0xba1f1c7e,0xe5ef8043,0x18158dda,0x16ea8217 +.long 0x599ff0f9,0x01778a2b,0x8104fc6b,0x68a923d7,0xda694ff3,0x5bfa44df,0xf7667f12,0x4f7199db,0xe46f2a79,0xc06d8ff6,0xe9f8131d,0x08b5dead,0xabb4ce7c,0x02519a59,0xb42aec3e,0xc4f710bc +.long 0x78bde41a,0x3d77b057,0xb4186b5a,0x6474bf80,0x88c65741,0x048b3f67,0x03c7c154,0xc64519de,0x0edfcc4f,0xdf073846,0x48f1aa6b,0x319aa737,0xca909f77,0x8b9f8a02,0x7580bfef,0x90258139 +.long 0xc0c22719,0xd8bfd3ca,0xc9ca151e,0xc60209e4,0xd9a1a69c,0x7a744ab5,0x14937f8f,0x6de5048b,0xe115ac04,0x171938d8,0x1c6b16d2,0x7df70940,0x7f8e94e7,0xa6aeb663,0x2a2cf094,0xc130388e +.long 0x77f54e6e,0x1850be84,0x65d60fe5,0x9f258a72,0x6c9146d6,0xff7ff0c0,0xe63a830b,0x039aaf90,0x9460342f,0x38f27a73,0x3f795f8a,0x4703148c,0x9681a97e,0x1bb5467b,0xecaeb594,0x00931ba5 +.long 0x786f337c,0xcdb6719d,0xe704397d,0xd9c01cd2,0x555c2fef,0x0f4a3f20,0x7c0af223,0x00452509,0x84db8e76,0x54a58047,0x93c8aa06,0x3bacf1aa,0xf7919422,0x11ca957c,0x78cdaa40,0x50641053 +.long 0x9f7144ae,0x7a303874,0x43d4acfd,0x170c963f,0x58ddd3ef,0x5e148149,0x9e72dba8,0xa7bde582,0x6fa68750,0x0769da8b,0x572e0249,0xfa64e532,0x2619ad31,0xfcaadf9d,0xa7b349cd,0x87882daa +.long 0x6c67a775,0x9f6eb731,0xefc5d0b1,0xcb10471a,0xe1b806b2,0xb433750c,0x57b1ae7e,0x19c5714d,0xed03fd3f,0xc0dc8b7b,0x31bc194e,0xdd03344f,0x8c6320b5,0xa66c52a7,0xd0b6fd93,0x8bc82ce3 +.long 0xb35f1341,0xf8e13501,0x25a43e42,0xe53156dd,0x4daeb85c,0xd3adf27e,0xbbeddeb5,0xb81d8379,0x2e435867,0x1b0b546e,0xeba5dd60,0x9020eb94,0x8210cb9d,0x37d91161,0x5c91f1cf,0x4c596b31 +.long 0x0e0b040d,0xb228a90f,0x45ff897f,0xbaf02d82,0x00fa6122,0x2aac79e6,0x8e36f557,0x24828817,0x113ec356,0xb9521d31,0x15eff1f8,0x9e48861e,0xe0d41715,0x2aa1d412,0x53f131b8,0x71f86203 +.long 0x3fd19408,0xf60da8da,0x278d9d99,0x4aa716dc,0xa8c51c90,0x394531f7,0xf59db51c,0xb560b0e8,0xfa34bdad,0xa28fc992,0x9cd4f8bd,0xf024fa14,0x23a9d0d3,0x5cf530f7,0xe28c9b56,0x615ca193 +.long 0x6f73c51e,0x6d2a483d,0xea0dc2dd,0xa4cb2412,0x1eb917ff,0x50663c41,0xeade299e,0x3d3a74cf,0x4a7a9202,0x29b3990f,0xa7b15c3d,0xa9bccf59,0xa5df9208,0x66a3ccdc,0x43f2f929,0x48027c14 +.long 0x40b557f0,0xd385377c,0xcd684660,0xe001c366,0xe2183a27,0x1b18ed6b,0x63210329,0x879738d8,0xbda94882,0xa687c74b,0xa684b299,0xd1bbcc48,0x863b3724,0xaf6f1112,0x2c8ce9f8,0x6943d1b4 +.long 0x098cafb4,0xe044a3bb,0x60d48caf,0x27ed2310,0x3a31b84d,0x542b5675,0xfcddbed7,0xcbf3dd50,0x41b1d830,0x25031f16,0xcb0c1e27,0xa7ec851d,0xb5ae75db,0xac1c8fe0,0x08c52120,0xb24c7557 +.long 0x1d4636c3,0x57f811dc,0x681a9939,0xf8436526,0x9c81adb3,0x1f6bc6d9,0x5b7d80d4,0x840f8ac3,0xf4387f1a,0x731a9811,0xb5156880,0x7c501cd3,0xdfe68867,0xa5ca4a07,0x5fcea120,0xf123d8f0 +.long 0xd607039e,0x1fbb0e71,0xcd3a4546,0x2b70e215,0x53324091,0x32d2f01d,0x180ab19b,0xb796ff08,0x3c57c4aa,0x32d87a86,0xb7c49a27,0x2aed9caf,0x31630d98,0x9fb35eac,0x5c3e20a3,0x338e8cdf +.long 0x66cde8db,0x80f16182,0x2d72fd36,0x4e159980,0x9b6e5072,0xd7b8f13b,0x3b7b5dc1,0xf5213907,0x8ce4396e,0x4d431f1d,0xa7ed2142,0x37a1a680,0xd01aaf6b,0xbf375696,0xe63aab66,0xaa1c0c54 +.long 0x4ed80940,0x3014368b,0x7a6fcedd,0x67e6d056,0xca97579f,0x7c208c49,0xa23597f6,0xfe3d7a81,0x7e096ae2,0x5e203202,0x24b39366,0xb1f3e1e7,0x2fdcdffc,0x26da26f3,0x6097be83,0x79422f1d +.long 0x9db3b381,0x263a2cfb,0xd4df0a4b,0x9c3a2dee,0x7d04e61f,0x728d06e9,0x42449325,0x8b1adfbc,0x7e053a1b,0x6ec1d939,0x66daf707,0xee2be5c7,0x810ac7ab,0x80ba1e14,0xf530f174,0xdd2ae778 +.long 0x205b9d8b,0x0435d97a,0x056756d4,0x6eb8f064,0xb6f8210e,0xd5e88a8b,0xec9fd9ea,0x070ef12d,0x3bcc876a,0x4d849505,0xa7404ce3,0x12a75338,0xb8a1db5e,0xd22b49e1,0x14bfa5ad,0xec1f2051 +.long 0xb6828f36,0xadbaeb79,0x01bd5b9e,0x9d7a0258,0x1e844b0c,0xeda01e0d,0x887edfc9,0x4b625175,0x9669b621,0x14109fdd,0xf6f87b98,0x88a2ca56,0x170df6bc,0xfe2eb788,0xffa473f9,0x0cea06f4 +.long 0xc4e83d33,0x43ed81b5,0x5efd488b,0xd9f35879,0x9deb4d0f,0x164a620f,0xac6a7394,0xc6927bdb,0x9f9e0f03,0x45c28df7,0xfcd7e1a9,0x2868661e,0xffa348f1,0x7cf4e8d0,0x398538e0,0x6bd4c284 +.long 0x289a8619,0x2618a091,0x6671b173,0xef796e60,0x9090c632,0x664e46e5,0x1e66f8fb,0xa38062d4,0x0573274e,0x6c744a20,0xa9271394,0xd07b67e4,0x6bdc0e20,0x391223b2,0xeb0a05a7,0xbe2d93f1 +.long 0x3f36d141,0xf23e2e53,0x4dfca442,0xe84bb3d4,0x6b7c023a,0xb804a48d,0x76431c3b,0x1e16a8fa,0xddd472e0,0x1b5452ad,0x0d1ee127,0x7d405ee7,0xffa27599,0x50fc6f1d,0xbf391b35,0x351ac53c +.long 0x4444896b,0x7efa14b8,0xf94027fb,0x64974d2f,0xde84487d,0xefdcd0e8,0x2b48989b,0x8c45b260,0xd8463487,0xa8fcbbc2,0x3fbc476c,0xd1b2b3f7,0xc8f443c0,0x21d005b7,0x40c0139c,0x518f2e67 +.long 0x06d75fc1,0x56036e8c,0x3249a89f,0x2dcf7bb7,0xe245e7dd,0x81dd1d3d,0xebd6e2a7,0xf578dc4b,0xdf2ce7a0,0x4c028903,0x9c39afac,0xaee36288,0x146404ab,0xdc847c31,0xa4e97818,0x6304c0d8 +.long 0xa91f6791,0xae51dca2,0x9baa9efc,0x2abe4190,0x559c7ac1,0xd9d2e2f4,0xfc9f773a,0xe82f4b51,0x4073e81c,0xa7713027,0xfbb596fc,0xc0276fac,0xa684f70c,0x1d819fc9,0xc9f7b1e0,0x29b47fdd +.long 0x459b1940,0x358de103,0x5b013e93,0xec881c59,0x49532ad3,0x51574c93,0xb37b46de,0x2db1d445,0xdf239fd8,0xc6445b87,0x151d24ee,0xc718af75,0xf43c6259,0xaea1c4a4,0x70be02f7,0x40c0e5d7 +.long 0x721b33f2,0x6a4590f4,0xfedf04ea,0x2124f1fb,0x9745efe7,0xf8e53cde,0x65f046d9,0xe7e10432,0xe4d0c7e6,0xc3fca28e,0x87253b1b,0x847e339a,0x3743e643,0x9b595348,0x4fd12fc5,0xcb6a0a0b +.long 0x27d02dcc,0xfb6836c3,0x7a68bcc2,0x5ad00982,0x005e912d,0x1b24b44c,0x811fdcfe,0xcc83d20f,0x666fba0c,0x36527ec1,0x14754635,0x69948197,0x556da9c2,0xfcdcb1a8,0x81a732b2,0xa5934267 +.long 0xa714181d,0xec1214ed,0x6067b341,0x609ac13b,0xa545df1f,0xff4b4c97,0x34d2076b,0xa1240501,0x1409ca97,0x6efa0c23,0x20638c43,0x254cc1a8,0xdcfb46cd,0xd4e363af,0x03942a27,0x62c2adc3 +.long 0x56e46483,0xc67b9df0,0x63736356,0xa55abb20,0xc551bc52,0xab93c098,0xb15fe64b,0x382b49f9,0x4dff8d47,0x9ec221ad,0x437df4d6,0x79caf615,0xbb456509,0x5f13dc64,0x191f0714,0xe4c589d9 +.long 0x3fd40e09,0x27b6a8ab,0x77313ea9,0xe455842e,0x1f55988b,0x8b51d1e2,0x062bbbfc,0x5716dd73,0x4e8bf3de,0x633c11e5,0x1b85be3b,0x9a0e77b6,0x0911cca6,0x56510729,0xefa6590f,0x27e76495 +.long 0x070d3aab,0xe4ac8b33,0x9a2cd5e5,0x2643672b,0x1cfc9173,0x52eff79b,0x90a7c13f,0x665ca49b,0xb3efb998,0x5a8dda59,0x052f1341,0x8a5b922d,0x3cf9a530,0xae9ebbab,0xf56da4d7,0x35986e7b +.long 0xff3513cc,0x3a636b5c,0x3198f7dd,0xbb0cf8ba,0x41f16f86,0xb8d40522,0xde13a7bf,0x760575d8,0x9f7aa181,0x36f74e16,0xf509ed1c,0x163a3ecf,0x3c40a491,0x6aead61f,0xdfe8fcaa,0x158c95fc +.long 0x13cda46f,0xa3991b6e,0x342faed0,0x79482415,0x666b5970,0xf3ba5bde,0xb26ab6dd,0x1d52e6bc,0x8608dd3d,0x768ba1e7,0xea076586,0x4930db2a,0xe7dc1afa,0xd9575714,0xf7c58817,0x1fc7bf7d +.long 0xd9eee96c,0x6b47accd,0xe58cec37,0x0ca277fb,0xe702c42a,0x113fe413,0xc47cbe51,0xdd1764ee,0x7b3ed739,0x041e7cde,0x5ce9e1c0,0x50cb7459,0x2925b212,0x35568513,0x001b081c,0x7cff95c4 +.long 0x8088b454,0x63ee4cbd,0x9a9e0c8a,0xdb7f32f7,0x6b2447cb,0xb377d418,0xd370219b,0xe3e982aa,0xc2a2a593,0x06ccc1e4,0x0773f24f,0x72c36865,0x95859423,0xa13b4da7,0x75040c8f,0x8bbf1d33 +.long 0xda50c991,0x726f0973,0x822d6ee2,0x48afcd5b,0x20fd7771,0xe5fc718b,0xfd0807a1,0xb9e8e77d,0x99a7703d,0x7f5e0f44,0x618e36f3,0x6972930e,0x23807bbe,0x2b7c77b8,0xcb27ff50,0xe5b82405 +.long 0xbd379062,0xba8b8be3,0x2dce4a92,0xd64b7a1d,0xb2952e37,0x040a73c5,0xd438aeca,0x0a9e252e,0xc39d3bcb,0xdd43956b,0xb32b2d63,0x1a31ca00,0x5c417a18,0xd67133b8,0x2ef442c8,0xd08e4790 +.long 0x255c0980,0x98cb1ae9,0x2b4a739f,0x4bd86381,0x1e4a45a1,0x5a5c31e1,0x9cb0db2f,0x1e5d55fe,0x8ff5cc29,0x74661b06,0x0eb8a4f4,0x026b389f,0x58848c24,0x536b21a4,0x81dc72b0,0x2e5bf8ec +.long 0xad886aac,0x03c187d0,0xb771b645,0x5c16878a,0xc74045ab,0xb07dfc6f,0x7800caed,0x2c6360bf,0xb9c972a3,0x24295bb5,0x7c9a6dba,0xc9e6f88e,0x92a79aa6,0x90ffbf24,0x41c26ac2,0xde29d50a +.long 0xd309cbe6,0x9f0af483,0xe0bced4f,0x5b020d8a,0xb38023e3,0x606e986d,0x1abc6933,0xad8f2c9d,0xe7400e93,0x19292e1d,0x52be5e4d,0xfe3e18a9,0x2e0680bf,0xe8e9771d,0xc54db063,0x8c5bec98 +.long 0x74a55d1f,0x2af9662a,0x046f66d8,0xe3fbf28f,0xd4dc4794,0xa3a72ab4,0x5c7c2dd8,0x09779f45,0xc3d19d8d,0xd893bdaf,0x57d6a6df,0xd5a75094,0x952e6255,0x8cf8fef9,0xda9a8aff,0x3da67cfb +.long 0x2c160dcd,0x4c23f62a,0x8f90eaef,0x34e6c5e3,0xa9a65d5a,0x35865519,0x8fd38a3d,0x07c48aae,0x50068527,0xb7e7aeda,0x1c90936a,0x2c09ef23,0xe879324c,0x31ecfeb6,0xfb0ec938,0xa0871f6b +.long 0xd84d835d,0xb1f0fb68,0x861dc1e6,0xc90caf39,0x7594f8d7,0x12e5b046,0x65012b92,0x26897ae2,0xa4d6755d,0xbcf68a08,0x0991fbda,0x403ee41c,0x3bbf17e8,0x733e343e,0x679b3d65,0xd2c7980d +.long 0xd2e11305,0x33056232,0xf3c07a6f,0x966be492,0xbb15509d,0x6a8878ff,0x0a9b59a4,0xff221101,0xabe30129,0x6c9f564a,0x336e64cf,0xc6f2c940,0x8b0c8022,0x0fe75262,0x6ae8db87,0xbe0267e9 +.long 0x93bc042b,0x22e192f1,0xb237c458,0xf085b534,0x832c4168,0xa0d192bd,0xbdf6271d,0x7a76e9e3,0xb88911b5,0x52a882fa,0xb4db0eb5,0xc85345e4,0x81a7c3ff,0xa3be02a6,0xf0ec0469,0x51889c8c +.long 0xa5e829e5,0x9d031369,0x1607aa41,0xcbb4c6fc,0x241d84c1,0x75ac59a6,0x8829e0ee,0xc043f2bf,0x8ea5e185,0x82a38f75,0xd87cbd9f,0x8bda40b9,0x2d8fc601,0x9e65e75e,0xa35690b3,0x3d515f74 +.long 0xda79e5ac,0x534acf4f,0x8630215f,0x68b83b3a,0xd085756e,0x5c748b2e,0xe5d37cb2,0xb0317258,0xc5ccc2c4,0x6735841a,0x3d9d5069,0x7d7dc96b,0xfd1754bd,0xa147e410,0xd399ddd5,0x65296e94 +.long 0xbc8fa5bc,0xf6b5b2d0,0x500c277b,0x8a5ead67,0xdfa08a5d,0x214625e6,0x959cf047,0x51fdfedc,0x289fca32,0x6bc9430b,0x9d9bdc3f,0xe36ff0cf,0x58ea0ede,0x2fe187cb,0x5a900b3f,0xed66af20 +.long 0x5fa9f4d6,0x00e0968b,0x37a362e7,0x2d4066ce,0xbd07e772,0xa99a9748,0x06a4f1d0,0x710989c0,0xce40cbd8,0xd5dedf35,0x1743293d,0xab55c5f0,0x8aa24e2c,0x766f1144,0x605fbcb4,0x94d874f8 +.long 0xa518001b,0xa365f0e8,0x9d04ef0f,0xee605eb6,0xba8d4d25,0x5a3915cd,0xb5113472,0x44c0e1b8,0x8b6740dc,0xcbb024e8,0xee1d4f0c,0x89087a53,0x1fc4e372,0xa88fa05c,0xaf8b3af2,0x8bf395cb +.long 0xdeb8568b,0x1e71c9a1,0x80fb3d32,0xa35daea0,0x2cf8fb81,0xe8b6f266,0x9490696a,0x6d51afe8,0x51803a19,0x81beac6e,0x86219080,0xe3d24b7f,0xdf6f463c,0x727cfd9d,0x72284ee8,0x8c6865ca +.long 0xb743f4ef,0x32c88b7d,0xe7d11dce,0x3793909b,0x2ff2ebe8,0xd398f922,0xe5e49796,0x2c70ca44,0xcb1131b1,0xdf4d9929,0x25888e79,0x7826f298,0xf1d8740a,0x4d3a112c,0x270afa8b,0x00384cb6 +.long 0x3ab48095,0xcb64125b,0x62d05106,0x3451c256,0xa4955845,0xd73d577d,0xbf9f4433,0x39570c16,0xadecf263,0xd7dfaad3,0xdc76e102,0xf1c3d8d1,0x54c6a836,0x5e774a58,0x3e92d47b,0xdad4b672 +.long 0xf0d796a0,0xbe7e990f,0xdf0e8b02,0x5fc62478,0x030c00ad,0x8aae8bf4,0x9004ba0f,0x3d2db93b,0xd85d5ddc,0xe48c8a79,0x6bb07f34,0xe907caa7,0xa39eaed5,0x58db343a,0xadaf5724,0x0ea6e007 +.long 0xd23233f3,0xe00df169,0x77cb637f,0x3e322796,0x1da0cf6c,0x1f897c0e,0x31d6bbdd,0xa651f5d8,0x1a230c76,0xdd61af19,0xcdaa5e4a,0xbd527272,0xd0abcd7e,0xca753636,0x370bd8dc,0x78bdd37c +.long 0x17cd93fe,0xc23916c2,0xdadce6e2,0x65b97a4d,0x174e42f8,0xe04ed4eb,0xbb21480a,0x1491ccaa,0x23196332,0x145a8280,0x587b479a,0x3c3862d7,0x01dcd0ed,0x9f4a88a3,0x3ea12f1f,0x4da2b7ef +.long 0xb126e48e,0xf8e7ae33,0xf494e237,0x404a0b32,0xc55acadb,0x9beac474,0xcbec9fd9,0x4ee5cf3b,0x7df3c8c3,0x336b33b9,0xb76808fd,0xbd905fe3,0xaa45c16a,0x8f436981,0x3dd27b62,0x255c5bfa +.long 0xc3dd9b4d,0x71965cbf,0xfc068a87,0xce23edbf,0x745b029b,0xb78d4725,0xcefdd9bd,0x74610713,0x1266bf52,0x7116f75f,0x18e49bb6,0x02046722,0x3d6f19e3,0xdf43df9f,0xe685cb2f,0xef1bc7d0 +.long 0x7078c432,0xcddb27c1,0xb77fedb7,0xe1961b9c,0xc2290570,0x1edc2f5c,0x19cbd886,0x2c3fefca,0xc2af389a,0xcf880a36,0xbda71cea,0x96c610fd,0x32aa8463,0xf03977a9,0x8586d90a,0x8eb7763f +.long 0x2a296e77,0x3f342454,0x42837a35,0xc8718683,0x6a09c731,0x7dc71090,0x51b816db,0x54778ffb,0xaf06defd,0x6b33bfec,0x8592b70b,0xfe3c105f,0x61da6114,0xf937fda4,0x4c266ad7,0x3c13e651 +.long 0x855938e8,0xe363a829,0x9de54b72,0x2eeb5d9e,0x20ccfab9,0xbeb93b0e,0x25e61a25,0x3dffbb5f,0x1acc093d,0x7f655e43,0x3964ce61,0x0cb6cc3d,0xe5e9b460,0x6ab283a1,0xa1c7e72d,0x55d787c5 +.long 0xdeadbf02,0x4d2efd47,0xac459068,0x11e80219,0x71f311f0,0x810c7626,0x4ab6ef53,0xfa17ef8d,0x93e43bff,0xaf47fd25,0x0be40632,0x5cb5ff3f,0x8ee61da3,0x54687106,0xb08afd0f,0x7764196e +.long 0xf0290a8f,0x831ab3ed,0xcb47c387,0xcae81966,0x184efb4f,0xaad7dece,0x4749110e,0xdcfc53b3,0x4cb632f9,0x6698f23c,0xb91f8067,0xc42a1ad6,0x6284180a,0xb116a81d,0xe901326f,0xebedf5f8 +.long 0x97e3e044,0xf2274c9f,0x11d09fc9,0x42018520,0xd18e6e23,0x56a65f17,0x352b683c,0x2ea61e2a,0x575eaa94,0x27d291bc,0xb8ff522d,0x9e7bc721,0xa7f04d6f,0x5f7268bf,0xaba41748,0x5868c73f +.long 0x7be0eead,0x9f85c2db,0xff719135,0x511e7842,0xc5ea90d7,0x5a06b1e9,0x26fab631,0x0c19e283,0xe9206c55,0x8af8f0cf,0x3553c06a,0x89389cb4,0xf65f8004,0x39dbed97,0xc508991d,0x0621b037 +.long 0x96e78cc4,0x1c52e635,0x0c06b4a8,0x5385c8b2,0xb0e87d03,0xd84ddfdb,0x934bafad,0xc49dfb66,0x59f70772,0x7071e170,0x3a1db56b,0x3a073a84,0x3b8af190,0x03494903,0xd32920f0,0x7d882de3 +.long 0xb2cf8940,0x91633f0a,0x6f948f51,0x72b0b178,0x782653c8,0x2d28dc30,0xdb903a05,0x88829849,0x6a19d2bb,0xb8095d0c,0x86f782cb,0x4b9e7f0c,0x2d907064,0x7af73988,0x8b32643c,0xd12be0fe +.long 0x0e165dc3,0x358ed23d,0x4e2378ce,0x3d47ce62,0xfeb8a087,0x7e2bb0b9,0xe29e10b9,0x3246e8ae,0x03ce2b4d,0x459f4ec7,0xbbc077cf,0xe9b4ca1b,0x0e9940c1,0x2613b4f2,0x047d1eb1,0xfc598bb9 +.long 0x45036099,0x9744c62b,0x167c65d8,0xa9dee742,0xdabe1943,0x0c511525,0x93c6c624,0xda110554,0x651a3be2,0xae00a52c,0x884449a6,0xcda5111d,0xff33bed1,0x063c06f4,0x0d3d76b4,0x73baaf9a +.long 0x7fc63668,0x52fb0c9d,0x0c039cde,0x6886c9dd,0x55b22351,0x602bd599,0x360c7c13,0xb00cab02,0x81b69442,0x8cb616bc,0xb55c3cee,0x41486700,0xf49ba278,0x71093281,0x64a50710,0xad956d9c +.long 0x638a7e81,0x9561f28b,0x5980ddc3,0x54155cdf,0xd26f247a,0xb2db4a96,0x4787d100,0x9d774e4e,0x078637d2,0x1a9e6e2e,0x5e0ae06a,0x1c363e2d,0xe9cfa354,0x7493483e,0x7f74b98d,0x76843cb3 +.long 0xd4b66947,0xbaca6591,0x04460a8c,0xb452ce98,0x43768f55,0x6830d246,0x7dff12df,0xf4197ed8,0x400dd0f7,0x6521b472,0x4b1e7093,0x59f5ca8f,0x080338ae,0x6feff11b,0xa29ca3c6,0x0ada31f6 +.long 0x94a2c215,0x24794eb6,0x05a57ab4,0xd83a43ab,0x2a6f89fe,0x264a543a,0xdd5ec7c2,0x2c2a3868,0x8439d9b2,0xd3373940,0x0acd1f11,0x715ea672,0xe7e6cc19,0x42c1d235,0xb990585c,0x81ce6e96 +.long 0xd809c7bd,0x04e5dfe0,0x8f1050ab,0xd7b2580c,0xd8a4176f,0x6d91ad78,0x4e2e897c,0x0af556ee,0x921de0ac,0x162a8b73,0x7ea78400,0x52ac9c22,0xefce2174,0xee2a4eea,0x6d637f79,0xbe61844e +.long 0x789a283b,0x0491f1bc,0x880836f4,0x72d3ac3d,0x88e5402d,0xaa1c5ea3,0xd5cc473d,0x1b192421,0x9dc84cac,0x5c0b9998,0x9c6e75b8,0xb0a8482d,0x3a191ce2,0x639961d0,0x6d837930,0xda3bc865 +.long 0x056e6f8f,0xca990653,0x64d133a7,0x84861c41,0x746abe40,0x8b403276,0xebf8e303,0xb7b4d51a,0x220a255d,0x05b43211,0x02419e6e,0xc997152c,0x630c2fea,0x76ff47b6,0x281fdade,0x50518677 +.long 0xcf902b0b,0x3283b8ba,0x37db303b,0x8d4b4eb5,0x755011bc,0xcc89f42d,0xdd09d19b,0xb43d74bb,0x8adba350,0x65746bc9,0xb51c1927,0x364eaf8c,0x10ad72ec,0x13c76596,0xf8d40c20,0x30045121 +.long 0xea7b979b,0x6d2d99b7,0xe6fb3bcd,0xcd78cd74,0x86cffbfe,0x11e45a9e,0x637024f6,0x78a61cf4,0x3d502295,0xd06bc872,0x458cb288,0xf1376854,0x342f8586,0xb9db26a1,0x4beee09e,0xf33effcf +.long 0xb30cfb3a,0xd7e0c4cd,0x6c9db4c8,0x6d09b8c1,0x07c8d9df,0x40ba1a42,0x1c52c66d,0x6fd495f7,0x275264da,0xfb0e169f,0xe57d8362,0x80c2b746,0x49ad7222,0xedd987f7,0x4398ec7b,0xfdc229af +.long 0x52666a58,0xb0d1ed84,0xe6a9c3c2,0x4bcb6e00,0x26906408,0x3c57411c,0x13556400,0xcfc20755,0x5294dba3,0xa08b1c50,0x8b7dd31e,0xa30ba286,0x991eca74,0xd70ba90e,0xe762c2b9,0x094e142c +.long 0x979f3925,0xb81d783e,0xaf4c89a7,0x1efd130a,0xfd1bf7fa,0x525c2144,0x1b265a9e,0x4b296904,0xb9db65b6,0xed8e9634,0x03599d8a,0x35c82e32,0x403563f3,0xdaa7a54f,0x022c38ab,0x9df088ad +.long 0xbb3fd30a,0xe5cfb066,0xeff0354e,0x429169da,0x3524e36c,0x809cf852,0x0155be1d,0x136f4fb3,0x1fbba712,0x4826af01,0x506ba1a1,0x6ef0f0b4,0x77aea73e,0xd9928b31,0x5eaa244e,0xe2bf6af2 +.long 0x4237b64b,0x8d084f12,0xe3ecfd07,0x688ebe99,0xf6845dd8,0x57b8a70c,0x5da4a325,0x808fc59c,0xa3585862,0xa9032b2b,0xedf29386,0xb66825d5,0x431ec29b,0xb5a5a8db,0x3a1e8dc8,0xbb143a98 +.long 0x12ae381b,0x35ee94ce,0x86ccda90,0x3a7f176c,0x4606eaca,0xc63a657e,0x43cd04df,0x9ae5a380,0xed251b46,0x9bec8d15,0xcaca5e64,0x1f5d6d30,0x9ff20f07,0x347b3b35,0xf7e4b286,0x4d65f034 +.long 0xf111661e,0x9e93ba24,0xb105eb04,0xedced484,0xf424b578,0x96dc9ba1,0xe83e9069,0xbf8f66b7,0xd7ed8216,0x872d4df4,0x8e2cbecf,0xbf07f377,0x98e73754,0x4281d899,0x8aab8708,0xfec85fbb +.long 0xa5ba5b0b,0x9a3c0dee,0x42d05299,0xe6a116ce,0xe9b02d42,0xae9775fe,0xa1545cb6,0x72b05200,0x31a3b4ea,0xbc506f7d,0x8bbd9b32,0xe5893078,0xe4b12a97,0xc8bc5f37,0x4a73b671,0x6b000c06 +.long 0x765fa7d0,0x13b5bf22,0x1d6a5370,0x59805bf0,0x4280db98,0x67a5e29d,0x776b1ce3,0x4f53916f,0x33ddf626,0x714ff61f,0xa085d103,0x4206238e,0xe5809ee3,0x1c50d4b7,0x85f8eb1d,0x999f450d +.long 0xe4c79e9b,0x658a6051,0xc66a9fea,0x1394cb73,0xc6be7b23,0x27f31ed5,0x5aa6f8fe,0xf4c88f36,0x4aaa499e,0x0fb0721f,0xe3fb2a6b,0x68b3a7d5,0x3a92851d,0xa788097d,0xe96f4913,0x060e7f8a +.long 0x1a3a93bc,0x82eebe73,0xa21adc1a,0x42bbf465,0xef030efd,0xc10b6fa4,0x87b097bb,0x247aa4c7,0xf60c77da,0x8b8dc632,0xc223523e,0x6ffbc26a,0x344579cf,0xa4f6ff11,0x980250f6,0x5825653c +.long 0xbc1aa2b9,0xb2dd097e,0x37a0333a,0x07889393,0x37a0db38,0x1cf55e71,0x792c1613,0x2648487f,0x3fcef261,0xdad01336,0x0eabf129,0x6239c81d,0x9d276be2,0x8ee761de,0x1eda6ad3,0x406a7a34 +.long 0x4a493b31,0x4bf367ba,0x9bf7f026,0x54f20a52,0x9795914b,0xb696e062,0x8bf236ac,0xcddab96d,0xed25ea13,0x4ff2c70a,0x81cbbbe7,0xfa1d09eb,0x468544c5,0x88fc8c87,0x696b3317,0x847a670d +.long 0x64bcb626,0xf133421e,0x26dee0b5,0xaea638c8,0xb310346c,0xd6e7680b,0xd5d4ced3,0xe06f4097,0x7512a30b,0x09961452,0xe589a59a,0xf3d867fd,0x52d0c180,0x2e73254f,0x333c74ac,0x9063d8a3 +.long 0xd314e7bc,0xeda6c595,0x467899ed,0x2ee7464b,0x0a1ed5d3,0x1cef423c,0x69cc7613,0x217e76ea,0xe7cda917,0x27ccce1f,0x8a893f16,0x12d8016b,0x9fc74f6b,0xbcd6de84,0xf3144e61,0xfa5817e2 +.long 0x0821ee4c,0x1f354164,0x0bc61992,0x1583eab4,0x1d72879f,0x7490caf6,0xf76ae7b2,0x998ad9f3,0xa41157f7,0x1e181950,0xe8da3a7e,0xa9d7e1e6,0x8426b95f,0x963784eb,0x542e2a10,0x0ee4ed6e +.long 0xac751e7b,0xb79d4cc5,0xfd4211bd,0x93f96472,0xc8de4fc6,0x8c72d3d2,0xdf44f064,0x7b69cbf5,0xf4bf94e1,0x3da90ca2,0xf12894e2,0x1a5325f8,0x7917d60b,0x0a437f6c,0x96c9cb5d,0x9be70486 +.long 0xe1dc5c05,0xb4d880bf,0xeebeeb57,0xd738adda,0xdf0fe6a3,0x6f0119d3,0x66eaaf5a,0x5c686e55,0xdfd0b7ec,0x9cb10b50,0x6a497c21,0xbdd0264b,0x8c546c96,0xfc093514,0x79dbf42a,0x58a947fa +.long 0x49ccd6d7,0xc0b48d4e,0x88bd5580,0xff8fb02c,0x07d473b2,0xc75235e9,0xa2188af3,0x4fab1ac5,0x97576ec0,0x030fa3bc,0x0b7e7d2f,0xe8c946e8,0x70305600,0x40a5c9cc,0xc8b013b4,0x6d8260a9 +.long 0x70bba85c,0x0368304f,0xa4a0d311,0xad090da1,0x2415eec1,0x7170e870,0x8461ea47,0xbfba35fe,0xc1e91938,0x6279019a,0x1afc415f,0xa47638f3,0xbcba0e0f,0x36c65cbb,0x034e2c48,0x02160efb +.long 0x615cd9e4,0xe6c51073,0xf1243c06,0x498ec047,0xb17b3d8c,0x3e5a8809,0x0cc565f1,0x5cd99e61,0x7851dafe,0x81e312df,0xa79061e2,0xf156f5ba,0x880c590e,0x80d62b71,0x0a39faa1,0xbec9746f +.long 0xc8ed1f7a,0x1d98a9c1,0xa81d5ff2,0x09e43bb5,0x0da0794a,0xd5f00f68,0x661aa836,0x412050d9,0x90747e40,0xa89f7c4e,0xb62a3686,0x6dc05ebb,0x308e3353,0xdf4de847,0x9fb53bb9,0x53868fbb +.long 0xcfdcf7dd,0x2b09d2c3,0x723fcab4,0x41a9fce3,0x07f57ca3,0x73d905f7,0xac8e1555,0x080f9fb1,0x9ba7a531,0x7c088e84,0xed9a147f,0x07d35586,0xaf48c336,0x602846ab,0x0ccf0e79,0x7320fd32 +.long 0xb18bd1ff,0xaa780798,0xafdd2905,0x52c2e300,0x434267cd,0xf27ea3d6,0x15605b5f,0x8b96d16d,0x4b45706b,0x7bb31049,0x743d25f8,0xe7f58b8e,0x87f30076,0xe9b5e45b,0x5d053d5a,0xd19448d6 +.long 0xd3210a04,0x1ecc8cb9,0xdafb5269,0x6bc7d463,0x67c3489f,0x3e59b10a,0x65641e1b,0x1769788c,0xbd6cb838,0x8a53b82d,0x236d5f22,0x7066d6e6,0x6908536e,0x03aa1c61,0x66ae9809,0xc971da0d +.long 0xc49a2fac,0x01b3a86b,0x3092e77a,0x3b8420c0,0x7d6fb556,0x02057300,0xbff40a87,0x6941b2a1,0x0658ff2a,0x140b6308,0x3424ab36,0x87804363,0x5751e299,0x0253bd51,0x449c3e3a,0xc75bcd76 +.long 0x7f8f875d,0x92eb4090,0x56c26bbf,0x9c9d754e,0x8110bbe7,0x158cea61,0x745f91ea,0x62a6b802,0xc6e7394b,0xa79c41aa,0xad57ef10,0x445b6a83,0x6ea6f40c,0x0c5277eb,0x88633365,0x319fe96b +.long 0x385f63cb,0x0b0fc61f,0x22bdd127,0x41250c84,0x09e942c2,0x67d153f1,0xc021ad5d,0x60920d08,0x724d81a5,0x229f5746,0x5bba3299,0xb7ffb892,0xde413032,0x518c51a1,0x3c2fd94c,0x2a9bfe77 +.long 0x3191f4fd,0xcbcde239,0xd3d6ada1,0x43093e16,0x58769606,0x184579f3,0xd236625c,0x2c94a8b3,0x5c437d8e,0x6922b9c0,0xd8d9f3c8,0x3d4ae423,0x2e7090a2,0xf72c31c1,0xd76a55bd,0x4ac3f5f3 +.long 0x6b6af991,0x342508fc,0x1b5cebbd,0x0d527100,0xdd440dd7,0xb84740d0,0x780162fd,0x748ef841,0xdfc6fafb,0xa8dbfe0e,0xf7300f27,0xeadfdf05,0xfeba4ec9,0x7d06555f,0x9e25fa97,0x12c56f83 +.long 0xd39b8c34,0x77f84203,0x3125eddb,0xed8b1be6,0xf6e39dc5,0x5bbf2441,0x6a5d678a,0xb00f6ee6,0x57d0ea99,0xba456ecf,0x17e06c43,0xdcae0f58,0x0f5b4baa,0x01643de4,0xd161b9be,0x2c324341 +.long 0xe126d468,0x80177f55,0x76748e09,0xed325f1f,0xcfa9bdc2,0x6116004a,0x3a9fb468,0x2d8607e6,0x6009d660,0x0e573e27,0x8d10c5a1,0x3a525d2e,0x3b9009a0,0xd26cb45c,0xde9d7448,0xb6b0cdc0 +.long 0xe1337c26,0x949c9976,0xd73d68e5,0x6faadebd,0xf1b768d9,0x9e158614,0x9cc4f069,0x22dfa557,0xbe93c6d6,0xccd6da17,0xa504f5b9,0x24866c61,0x8d694da1,0x2121353c,0x0140b8c6,0x1c6ca580 +.long 0xe964021e,0xc245ad8c,0x032b82b3,0xb83bffba,0x47ef9898,0xfaa220c6,0x982c948a,0x7e8d3ac6,0xbc2d124a,0x1faa2091,0x05b15ff4,0xbd54c3dd,0xc87c6fb7,0x386bf3ab,0xfdeb6f66,0xfb2b0563 +.long 0x5b45afb4,0x4e77c557,0xefb8912d,0xe9ded649,0x42f6e557,0x7ec9bbf5,0x62671f00,0x2570dfff,0x88e084bd,0x2b3bfb78,0xf37fe5b4,0xa024b238,0x95649aee,0x44e7dc04,0x5e7ec1d8,0x498ca255 +.long 0xaaa07e86,0x3bc766ea,0xf3608586,0x0db6facb,0xbdc259c8,0xbadd2549,0x041c649f,0x95af3c6e,0x02e30afb,0xb36a928c,0x008a88b8,0x9b5356ad,0xcf1d9e9d,0x4b67a5f1,0xa5d8d8ce,0xc6542e47 +.long 0x7adfb6cc,0x73061fe8,0x98678141,0xcc826fd3,0x3c80515a,0x00e758b1,0x41485083,0x6afe3247,0xb6ae8a75,0x0fcb08b9,0x4acf51e1,0xb8cf388d,0x6961b9d6,0x344a5560,0x6a97fd0c,0x1a6778b8 +.long 0xecc4c7e3,0xd840fdc1,0x16db68cc,0xde9fe47d,0xa3e216aa,0xe95f89de,0x9594a8be,0x84f1a6a4,0x5a7b162b,0x7ddc7d72,0xadc817a3,0xc5cfda19,0x78b58d46,0x80a5d350,0x82978f19,0x93365b13 +.long 0x26a1fc90,0x2e44d225,0x4d70705d,0x0d6d10d2,0xd70c45f4,0xd94b6b10,0xb216c079,0x0f201022,0x658fde41,0xcec966c5,0x7e27601d,0xa8d2bc7d,0xff230be7,0xbfcce3e1,0x0033ffb5,0x3394ff6b +.long 0x8132c9af,0xd890c509,0x361e7868,0xaac4b0eb,0xe82d15aa,0x5194ded3,0x23ae6b7d,0x4550bd2e,0xea5399d4,0x3fda318e,0x91638b80,0xd989bffa,0xa14aa12d,0x5ea124d0,0x3667b944,0x1fb1b899 +.long 0x44c44d6a,0x95ec7969,0x57e86137,0x91df144a,0x73adac44,0x915fd620,0x59a83801,0x8f01732d,0x3aa0a633,0xec579d25,0xc9d6d59c,0x06de5e7c,0xb1ef8010,0xc132f958,0xe65c1a02,0x29476f96 +.long 0xd34c3565,0x336a77c0,0x1b9f1e9e,0xef1105b2,0xf9e08002,0x63e6d08b,0xc613809e,0x9aff2f21,0x3a80e75d,0xb5754f85,0x6bbda681,0xde71853e,0x8197fd7a,0x86f041df,0x127817fa,0x8b332e08 +.long 0xb9c20cda,0x05d99be8,0xd5cd0c98,0x89f7aad5,0x5bb94183,0x7ef936fe,0xb05cd7f2,0x92ca0753,0x74a1e035,0x9d65db11,0x13eaea92,0x02628cc8,0x49e4fbf2,0xf2d9e242,0xe384f8b7,0x94fdfd9b +.long 0x63428c6b,0x65f56054,0x90b409a5,0x2f7205b2,0xff45ae11,0xf778bb78,0xc5ee53b2,0xa13045be,0x03ef77fe,0xe00a14ff,0xffef8bef,0x689cd59f,0x1e9ade22,0x3578f0ed,0x6268b6a8,0xe99f3ec0 +.long 0xea1b3c3e,0xa2057d91,0xb8823a4a,0x2d1a7053,0x2cca451e,0xabbb336a,0x2218bb5d,0xcd2466e3,0xc8cb762d,0x3ac1f42f,0x7690211f,0x7e312aae,0x45d07450,0xebb9bd73,0x46c2213f,0x207c4b82 +.long 0x375913ec,0x99d425c1,0x67908220,0x94e45e96,0xcd67dbf6,0xc08f3087,0xc0887056,0xa5670fbe,0x66f5b8fc,0x6717b64a,0x786fec28,0xd5a56aea,0xc0ff4952,0xa8c3f55f,0x457ac49b,0xa77fefae +.long 0x98379d44,0x29882d7c,0x509edc8a,0xd000bdfb,0xe66fe464,0xc6f95979,0xfa61bde0,0x504a6115,0xeffea31a,0x56b3b871,0xf0c21a54,0x2d3de26d,0x834753bf,0x21dbff31,0x69269d86,0xe67ecf49 +.long 0x151fe690,0x7a176952,0x7f2adb5f,0x03515804,0xd1b62a8d,0xee794b15,0xaae454e6,0xf004ceec,0xf0386fac,0x0897ea7c,0xd1fca751,0x3b62ff12,0x1b7a04ec,0x154181df,0xfb5847ec,0x2008e04a +.long 0x41dbd772,0xd147148e,0x22942654,0x2b419f73,0xe9c544f7,0x669f30d3,0xc8540149,0x52a2c223,0x634dfb02,0x5da9ee14,0xf47869f3,0x5f074ff0,0xa3933acc,0x74ee878d,0x4fe35ed1,0xe6510651 +.long 0xf1012e7a,0xb3eb9482,0xa8a566ae,0x51013cc0,0x47c00d3b,0xdd5e9243,0x946bb0e5,0x7fde089d,0xc731b4b3,0x030754fe,0x99fda062,0x12a136a4,0x5a1a35bc,0x7c1064b8,0x446c84ef,0xbf1f5763 +.long 0xa16d4b34,0xed29a56d,0xdca21c4f,0x7fba9d09,0x6d8de486,0x66d7ac00,0x73a2a5e1,0x60061987,0x9da28ff0,0x8b400f86,0x43c4599c,0x3133f708,0xee28cb0d,0x9911c9b8,0x8e0af61d,0xcd7e2874 +.long 0x72ed91fc,0x5a85f0f2,0x9cd4a373,0x85214f31,0x1925253c,0x881fe5be,0x91e8bc76,0xd8dc98e0,0x585cc3a2,0x7120affe,0x735bf97a,0x724952ed,0x3eb34581,0x5581e7dc,0xe52ee57d,0x5cbff4f2 +.long 0x87d8cc7b,0x8d320a0e,0xf1d280d0,0x9beaa7f3,0x9beec704,0x7a0b9571,0x5b7f0057,0x9126332e,0x8ed3bd6d,0x01fbc1b4,0xd945eb24,0x35bb2c12,0x9a8ae255,0x6404694e,0x8d6abfb3,0xb6092eec +.long 0xcc058865,0x4d76143f,0x6e249922,0x7b0a5af2,0x6a50d353,0x8aef9440,0x64f0e07a,0xe11e4bcc,0xa14a90fa,0x4472993a,0xba0c51d4,0x7706e20c,0x1532672d,0xf403292f,0x21829382,0x52573bfa +.long 0x3b5bdb83,0x6a7bb6a9,0xa4a72318,0x08da65c0,0x63eb065f,0xc58d22aa,0x1b15d685,0x1717596c,0xb266d88b,0x112df0d0,0x5941945a,0xf688ae97,0x7c292cac,0x487386e3,0x57d6985c,0x42f3b50d +.long 0x6a90fc34,0x6da4f998,0x65ca8a8d,0xc8f257d3,0x6951f762,0xc2feabca,0x74c323ac,0xe1bc81d0,0x251a2a12,0x1bc68f67,0xbe8a70dc,0x10d86587,0xf0f84d2e,0xd648af7f,0x6a43ac92,0xf0aa9ebc +.long 0x27596893,0x69e3be04,0x45bf452b,0xb6bb02a6,0xf4c698c8,0x0875c11a,0xbece3794,0x6652b5c7,0x4f5c0499,0x7b3755fd,0xb5532b38,0x6ea16558,0xa2e96ef7,0xd1c69889,0x61ed8f48,0x9c773c3a +.long 0x9b323abc,0x2b653a40,0xf0e1d791,0xe26605e1,0x4a87157a,0x45d41064,0xcbbce616,0x8f9a78b7,0xc407eddd,0xcf1e44aa,0xa35b964f,0x81ddd1d8,0xfd083999,0x473e339e,0x8e796802,0x6c94bdde +.long 0x8545d185,0x5a304ada,0x738bb8cb,0x82ae44ea,0xdf87e10e,0x628a35e3,0xa15b9fe3,0xd3624f3d,0x14be4254,0xcc44209b,0xbdbc2ea5,0x7d0efcbc,0x04c37bbe,0x1f603362,0x56a5852c,0x21f363f5 +.long 0xa8501550,0xa1503d1c,0xd8ab10bb,0x2251e0e1,0x6961c51c,0xde129c96,0x81910f68,0x1f7246a4,0x5f2591f2,0x2eb744ee,0x5e627157,0x3c47d33f,0x22f3bd68,0x4d6d62c9,0xcb8df856,0x6120a64b +.long 0x7b5d07df,0x3a9ac6c0,0x7ef39783,0xa92b9558,0xab3a9b4f,0xe128a134,0xb1252f05,0x41c18807,0x80ba9b1c,0xfc7ed089,0xc532a9dd,0xac8dc6de,0x55246809,0xbf829cef,0x5b4ee80f,0x101b784f +.long 0xb6f11603,0xc09945bb,0x41d2801e,0x57b09dbe,0xa97534a8,0xfba5202f,0xc17b9614,0x7fd8ae5f,0x78308435,0xa50ba666,0xd3868c4d,0x9572f77c,0x2dd7aab0,0x0cef7bfd,0x2c7c79ff,0xe7958e08 +.long 0x25346689,0x81262e42,0xb07c7004,0x716da290,0xb7950ee3,0x35f911ea,0x261d21b5,0x6fd72969,0x08b640d3,0x52389803,0x887f12a1,0x5b0026ee,0x742e9311,0x20e21660,0x5ff77ff7,0x0ef6d541 +.long 0xf9c41135,0x969127f0,0x68a64993,0xf21d60c9,0xe541875c,0x656e5d0c,0xa1d3c233,0xf1e0f84e,0x06002d60,0x9bcca359,0x06191552,0xbe2da60c,0x61181ec3,0x5da8bbae,0x65806f19,0x9f04b823 +.long 0xd4b79bb8,0xf1604a7d,0x52c878c8,0xaee806fb,0x8d47b8e8,0x34144f11,0x949f9054,0x72edf52b,0x2127015a,0xebfca84e,0x9cb7cef3,0x9051d0c0,0x296deec8,0x86e8fe58,0x41010d74,0x33b28188 +.long 0x171b445f,0x01079383,0x8131ad4c,0x9bcf21e3,0xc93987e8,0x8cdfe205,0xc92e8c8f,0xe63f4152,0x30add43d,0x729462a9,0xc980f05a,0x62ebb143,0x3b06e968,0x4f3954e5,0x242cf6b1,0xfe1d75ad +.long 0xaf8685c8,0x5f95c6c7,0x2f8f01aa,0xd4c1c8ce,0x2574692a,0xc44bbe32,0xd4a4a068,0xb8003478,0x2eca3cdb,0x7c8fc6e5,0xec04d399,0xea1db16b,0x8f2bc5cf,0xb05bc82e,0xf44793d2,0x763d517f +.long 0x08bd98d0,0x4451c1b8,0x6575f240,0x644b1cd4,0x7375d270,0x6907eb33,0xfa2286bd,0x56c8bebd,0xc4632b46,0xc713d2ac,0xafd60242,0x17da427a,0xc95c7546,0x313065b7,0xbf17a3de,0xf8239898 +.long 0x4c830320,0xf3b7963f,0x903203e3,0x842c7aa0,0xe7327afb,0xaf22ca0a,0x967609b6,0x38e13092,0x757558f1,0x73b8fb62,0xf7eca8c1,0x3cc3e831,0xf6331627,0xe4174474,0xc3c40234,0xa77989ca +.long 0x44a081e0,0xe5fd17a1,0xb70e296a,0xd797fb7d,0x481f719c,0x2b472b30,0xfe6f8c52,0x0e632a98,0xc5f0c284,0x89ccd116,0x2d987c62,0xf51088af,0x4c2de6cf,0x2a2bccda,0xf679f0f9,0x810f9efe +.long 0x7ffe4b3e,0xb0f394b9,0xe5fa5d21,0x0b691d21,0x9dfbbc75,0xb0bd7747,0xfaf78b00,0xd2830fda,0x52434f57,0xf78c249c,0x98096dab,0x4b1f7545,0x8ff8c0b3,0x73bf6f94,0x454e134c,0x34aef03d +.long 0xb7ac7ec5,0xf8d151f4,0xe50da7d5,0xd6ceb95a,0xdc3a0eb8,0xa1b492b0,0xb3dd2863,0x75157b69,0xc5413d62,0xe2c4c74e,0xbc5fc4c7,0xbe329ff7,0x60fa9dda,0x835a2aea,0x7445cb87,0xf117f5ad +.long 0xb0166f7a,0xae8317f4,0xceec74e6,0xfbd3e3f7,0xe0874bfd,0xfdb516ac,0xc681f3a3,0x3d846019,0x7c1620b0,0x0b12ee5c,0x2b63c501,0xba68b4dd,0x6668c51e,0xac03cd32,0x4e0bcb5b,0x2a6279f7 +.long 0x6ae85c10,0x17bd69b0,0x1dfdd3a6,0x72946979,0x2c078bec,0xd9a03268,0xbfd68a52,0x41c6a658,0x0e023900,0xcdea1024,0xb10d144d,0xbaeec121,0x058ab8dc,0x5a600e74,0xbb89ccdd,0x1333af21 +.long 0x3aaba1f1,0xdf25eae0,0x3b7144cf,0x2cada16e,0x71ab98bc,0x657ee27d,0x7a6fc96e,0x99088b4c,0x3549dbd4,0x05d5c0a0,0xf158c3ac,0x42cbdf8f,0x87edd685,0x3fb6b3b0,0x86f064d0,0x22071cf6 +.long 0xff2811e5,0xd2d6721f,0xfe7fae8c,0xdb81b703,0xd3f1f7bb,0x3cfb74ef,0x16cdeb5d,0x0cdbcd76,0x566a808c,0x4f39642a,0x340064d6,0x02b74454,0x0528fa6f,0xfabbadca,0xd3fc0bb6,0xe4c3074c +.long 0xb796d219,0xb32cb8b0,0x34741dd9,0xc3e95f4f,0x68edf6f5,0x87212125,0xa2b9cb8e,0x7a03aee4,0xf53a89aa,0x0cd3c376,0x948a28dc,0x0d8af9b1,0x902ab04f,0xcf86a3f4,0x7f42002d,0x8aacb62a +.long 0xf62ffd52,0x106985eb,0x5797bf10,0xe670b54e,0xc5e30aef,0x4b405209,0x4365b5e9,0x12c97a20,0x1fe32093,0x104646ce,0x3907a8c9,0x13cb4ff6,0xd46e726b,0x8b9f30d1,0xaba0f499,0xe1985e21 +.long 0x10a230cd,0xc573dea9,0xcd30f947,0x24f46a93,0xabe2010a,0xf2623fcf,0x73f00e4f,0x3f278cb2,0x50b920eb,0xed55c67d,0x8e760571,0xf1cb9a2d,0x0895b709,0x7c50d109,0x190d4369,0x4207cf07 +.long 0xc4127fe1,0x3b027e81,0x3ae9c566,0xa9f8b9ad,0xacbfbba5,0x5ab10851,0x569556f5,0xa747d648,0x2ba97bf7,0xcc172b5c,0xbcfa3324,0x15e0f77d,0x7686279d,0xa345b797,0xe38003d3,0x5a723480 +.long 0x8f5fcda8,0xfd8e139f,0xbdee5bfd,0xf3e558c4,0xe33f9f77,0xd76cbaf4,0x71771969,0x3a4c97a4,0xf6dce6a7,0xda27e84b,0x13e6c2d1,0xff373d96,0xd759a6e9,0xf115193c,0x63d2262c,0x3f9b7025 +.long 0x317cd062,0xd9764a31,0x199f8332,0x30779d8e,0x16b11b0b,0xd8074106,0x78aeaed8,0x7917ab9f,0x28fb1d8e,0xb67a9cbe,0x136eda33,0x2e313563,0xa371a86c,0x010b7069,0x6744e6b7,0x44d90fa2 +.long 0xd6b3e243,0x68190867,0x59048c48,0x9fe6cd9d,0x95731538,0xb900b028,0x32cae04f,0xa012062f,0x9399d082,0x8107c8bc,0x41df12e2,0x47e8c54a,0xb6ef3f73,0x14ba5117,0x81362f0b,0x22260bea +.long 0x1a18cc20,0x90ea261e,0x2321d636,0x2192999f,0xe311b6a0,0xef64d314,0x3b54a1f5,0xd7401e4c,0x6fbca2ba,0x19019983,0x8fbffc4b,0x46ad3293,0x3786bf40,0xa142d3f6,0xb67039fc,0xeb5cbc26 +.long 0x252bd479,0x9cb0ae6c,0x12b5848f,0x05e0f88a,0xa5c97663,0x78f6d2b2,0xc162225c,0x6f6e149b,0xde601a89,0xe602235c,0xf373be1f,0xd17bbe98,0xa8471827,0xcaf49a5b,0x18aaa116,0x7e1a0a85 +.long 0x270580c3,0x6c833196,0xf1c98a14,0x1e233839,0xae34e0a5,0x67b2f7b4,0xd8ce7289,0x47ac8745,0x100dd467,0x2b74779a,0x4ee50d09,0x274a4337,0x83608bc9,0x603dcf13,0xc89e8388,0xcd9da6c3 +.long 0x355116ac,0x2660199f,0xb6d18eed,0xcc38bb59,0x2f4bc071,0x3075f31f,0x265dc57e,0x9774457f,0xc6db88bb,0x06a6a9c8,0x4ec98e04,0x6429d07f,0x05ecaa8b,0x8d05e57b,0x7872ea7b,0x20f140b1 +.long 0xca494693,0xdf8c0f09,0xf252e909,0x48d3a020,0x57b14b12,0x4c5c29af,0xbf47ad1c,0x7e6fa37d,0x49a0c938,0x66e7b506,0x6be5f41f,0xb72c0d48,0xb2359412,0x6a6242b8,0x8e859480,0xcd35c774 +.long 0x87baa627,0x12536fea,0xf72aa680,0x58c1fec1,0x601e5dc9,0x6c29b637,0xde9e01b9,0x9e3c3c1c,0x2bcfe0b0,0xefc8127b,0x2a12f50d,0x35107102,0x4879b397,0x6ccd6cb1,0xf8a82f21,0xf792f804 +.long 0xa9b46402,0x509d4804,0xc10f0850,0xedddf85d,0x4b6208aa,0x928410dc,0x391012dc,0xf6229c46,0x7727b9b6,0xc5a7c41e,0xaa444842,0x289e4e4b,0xe9a947ea,0x049ba1d9,0x83c8debc,0x44f9e47f +.long 0x611f8b8e,0xfa77a1fe,0xf518f427,0xfd2e416a,0x114ebac3,0xc5fffa70,0x5d89697b,0xfe57c4e9,0xb1aaf613,0xfdd053ac,0xea585a45,0x31df210f,0x24985034,0x318cc10e,0x5f1d6130,0x1a38efd1 +.long 0x0b1e9e21,0xbf86f237,0x1dbe88aa,0xb258514d,0x90c1baf9,0x1e38a588,0xbdb9b692,0x2936a01e,0x6dd5b20c,0xd576de98,0x70f98ecf,0xb586bf71,0xc42d2fd7,0xcccf0f12,0xfb35bd7b,0x8717e61c +.long 0x35e6fc06,0x8b1e5722,0x0b3e13d5,0x3477728f,0xaa8a7372,0x150c294d,0x3bfa528a,0xc0291d43,0xcec5a196,0xc6c8bc67,0x5c2e8a7c,0xdeeb31e4,0xfb6e1c51,0xba93e244,0x2e28e156,0xb9f8b71b +.long 0x968a2ab9,0xce65a287,0x46bbcb1f,0xe3c5ce69,0xe7ae3f30,0xf8c835b9,0xff72b82b,0x16bbee26,0xfd42cd22,0x665e2017,0xf8b1d2a0,0x1e139970,0x79204932,0x125cda29,0x49c3bee5,0x7aee94a5 +.long 0x89821a66,0x68c70160,0x8f981669,0xf7c37678,0x48cc3645,0xd90829fc,0xd70addfc,0x346af049,0x370bf29c,0x2057b232,0x42e650ee,0xf90c73ce,0xa126ab90,0xe03386ea,0x975a087b,0x0e266e7e +.long 0x0fca65d9,0x80578eb9,0x16af45b8,0x7e2989ea,0xcac75a4e,0x7438212d,0x4fef36b8,0x38c7ca39,0xd402676a,0x8650c494,0xf72c7c48,0x26ab5a66,0xce3a464e,0x4e6cb426,0x2b72f841,0xf8f99896 +.long 0x1a335cc8,0x8c318491,0x6a5913e4,0x563459ba,0xc7b32919,0x1b920d61,0xa02425ad,0x805ab8b6,0x8d006086,0x2ac512da,0xbcf5c0fd,0x6ca4846a,0xac2138d7,0xafea51d8,0x344cd443,0xcb647545 +.long 0xbd7d9040,0x0429ee8f,0x819b9c96,0xee66a2de,0xdea7d744,0x54f9ec25,0x671721bb,0x2ffea642,0x114344ea,0x4f19dbd1,0xfd0dbc8b,0x04304536,0x29ec7f91,0x014b50aa,0xbb06014d,0xb5fc22fe +.long 0x1ee682e0,0x60d963a9,0xfe85c727,0xdf48abc0,0x2e707c2d,0x0cadba13,0xa645aeff,0xde608d3a,0xedafd883,0x05f1c28b,0xbd94de1f,0x3c362ede,0x13593e41,0x8dd0629d,0x766d6eaf,0x0a5e736f +.long 0xf68cf9d1,0xbfa92311,0xc1797556,0xa4f9ef87,0x5601c209,0x10d75a1f,0x09b07361,0x651c374c,0x88b5cead,0x49950b58,0x6fa9dbaa,0x0ef00058,0x4e15f33a,0xf51ddc26,0x2ef46140,0x1f8b5ca6 +.long 0xee9523f0,0x343ac0a3,0x975ea978,0xbb75eab2,0x107387f4,0x1bccf332,0x9ab0062e,0x790f9259,0x1e4f6a5f,0xf1a363ad,0x62519a50,0x06e08b84,0x7265f1ee,0x60915187,0x93ae985e,0x6a80ca34 +.long 0xaaba4864,0x81b29768,0x8d52a7d6,0xb13cabf2,0x8ead03f1,0xb5c36348,0x81c7c1c0,0xc932ad95,0xcae1e27b,0x5452708e,0x1b0df648,0x9dac4269,0xdfcdb8bc,0x233e3f0c,0xec540174,0xe6ceccdf +.long 0x95081181,0xbd0d845e,0x699355d5,0xcc8a7920,0xc3b375a8,0x111c0f6d,0xfd51e0dc,0xfd95bc6b,0x6888523a,0x4a106a26,0xcb01a06d,0x4d142bd6,0xadb9b397,0x79bfd289,0xe9863914,0x0bdbfb94 +.long 0x1660f6a6,0x29d8a229,0x551c042d,0x7f6abcd6,0x0ac3ffe8,0x13039deb,0xec8523fb,0xa01be628,0x0ca1c328,0x6ea34103,0xb903928e,0xc74114bd,0x9e9144b0,0x8aa4ff4e,0x7f9a4b17,0x7064091f +.long 0xe447f2c4,0xa3f4f521,0x604291f0,0x81b8da7a,0x7d5926de,0xd680bc46,0x34a1202f,0x84f21fd5,0x4e9df3d8,0x1d1e3181,0x39ab8d34,0x1ca4861a,0x5b19aa4a,0x809ddeec,0x4d329366,0x59f72f7e +.long 0x386d5087,0xa2f93f41,0xdd67d64f,0x40bf739c,0x66702158,0xb4494205,0x73b1e178,0xc33c65be,0x38ca6153,0xcdcd657c,0xdc791976,0x97f4519a,0xcd6e1f39,0xcc7c7f29,0x7e3c3932,0x38de9cfb +.long 0x7b793f85,0xe448eba3,0xf067e914,0xe9f8dbf9,0xf114ae87,0xc0390266,0xcd6a8e2a,0x39ed75a7,0x7ffba390,0xadb14848,0x6af9bc09,0x67f8cb8b,0x9c7476db,0x322c3848,0x52a538d6,0xa320fecf +.long 0xb2aced2b,0xe0493002,0x616bd430,0xdfba1809,0xc331be70,0x531c4644,0x90d2e450,0xbc04d32e,0x0f9f142d,0x1805a0d1,0x47ee5a23,0x2c44a0c5,0x3989b4e3,0x31875a43,0x0c063481,0x6b1949fd +.long 0xbe0f4492,0x2dfb9e08,0xe9d5e517,0x3ff0da03,0xf79466a8,0x03dbe9a1,0x15ea9932,0x0b87bcd0,0xab1f58ab,0xeb64fc83,0x817edc8a,0x6d9598da,0x1d3b67e5,0x699cff66,0x92635853,0x645c0f29 +.long 0xeabaf21c,0x253cdd82,0x2241659e,0x82b9602a,0x2d9f7091,0x2cae07ec,0x8b48cd9b,0xbe4c720c,0x6f08d6c9,0x6ce5bc03,0xaf10bf40,0x36e8a997,0x3e10ff12,0x83422d21,0xbcc12494,0x7b26d3eb +.long 0xc9469ad6,0xb240d2d0,0x30afa05b,0xc4a11b4d,0xdd6ba286,0x4b604ace,0x3ee2864c,0x18486600,0x8d9ce5be,0x5869d6ba,0xff4bfb0d,0x0d8f68c5,0x5700cf73,0xb69f210b,0x6d37c135,0x61f6653a +.long 0x5aff5a48,0xff3d432b,0x72ba3a69,0x0d81c4b9,0xfa1899ef,0xee879ae9,0x2d6acafd,0xbac7e2a0,0x1c664399,0xd6d93f6c,0x5bcb135d,0x4c288de1,0x9dab7cbf,0x83031dab,0x3abbf5f0,0xfe23feb0 +.long 0xcdedca85,0x9f1b2466,0x1a09538c,0x140bb710,0x5e11115d,0xac8ae851,0x6f03f59e,0x0d63ff67,0x7d234afb,0x755e5551,0x7e208fc1,0x61c2db4e,0xf28a4b5d,0xaa9859ce,0x34af030f,0xbdd6d4fc +.long 0x3be01cb1,0xd1c4a26d,0x243aa07c,0x9ba14ffc,0xb2503502,0xf95cd3a9,0x7d2a93ab,0xe379bc06,0xd4ca8d68,0x3efc18e9,0x80bb412a,0x083558ec,0x9645a968,0xd903b940,0x9ba6054f,0xa499f0b6 +.long 0xb8349abe,0x208b573c,0x30b4fc1c,0x3baab3e5,0xcb524990,0x87e978ba,0xccdf0e80,0x3524194e,0x7d4bcc42,0x62711725,0xb90109ba,0xe90a3d9b,0x1323e1e0,0x3b1bdd57,0x5eae1599,0xb78e9bd5 +.long 0x9e03d278,0x0794b746,0xd70e6297,0x80178605,0x99c97855,0x171792f8,0xf5a86b5c,0x11b393ee,0xd8884f27,0x48ef6582,0xbf19ba5f,0xbd44737a,0xa42062c6,0x8698de4c,0x61ce9c54,0x8975eb80 +.long 0xd7fe71f3,0xd50e57c7,0xbc97ce38,0x15342190,0x4df07b63,0x51bda2de,0x200eb87d,0xba12aeae,0xa9b4f8f6,0xabe135d2,0xfad6d99c,0x04619d65,0x7994937c,0x4a6683a7,0x6f94f09a,0x7a778c8b +.long 0x20a71b89,0x8c508623,0x1c229165,0x241a2aed,0xaaf83a99,0x352be595,0x1562bac8,0x9fbfee7f,0x5c4017e3,0xeaf658b9,0x15120b86,0x1dc7f9e0,0x4c034d6f,0xd84f13dd,0xeaea3038,0x283dd737 +.long 0xcd85d6a2,0x197f2609,0xfae60177,0x6ebbc345,0x4e12fede,0xb80f031b,0x07a2186b,0xde55d0c2,0x24dcdd5a,0x1fb3e37f,0x7ed191fb,0x8d602da5,0x76023e0d,0x108fb056,0x459c20c0,0x70178c71 +.long 0x3fe54cf0,0xfad5a386,0x02bbb475,0xa4a3ec4f,0x919d94d7,0x1aa5ec20,0xa81e4ab3,0x5d3b63b5,0x5ad3d2af,0x7fa733d8,0xd1ac7a37,0xfbc586dd,0x40779614,0x282925de,0xe74a242a,0xfe0ffffb +.long 0x906151e5,0x3f39e67f,0x55e10649,0xcea27f5f,0xc17cf7b7,0xdca1d4e1,0x2fe2362d,0x0c326d12,0x7dd35df3,0x05f7ac33,0xc396dbdf,0x0c3b7639,0x03b7db1c,0x0912f5ac,0x5c9ed4a9,0x9dea4b70 +.long 0xaae3f639,0x475e6e53,0xfc278bac,0xfaba0e7c,0x9490375f,0x16f9e221,0xa5a7ed0a,0xaebf9746,0xf41ad5d6,0x45f9af3f,0xb2e99224,0x03c4623c,0xb3cf56aa,0x82c5bb5c,0x34567ed3,0x64311819 +.long 0x8be489ac,0xec57f211,0xb9a1104b,0x2821895d,0x6064e007,0x610dc875,0x5b20d0fe,0x8e526f3f,0x5b645aee,0x6e71ca77,0x800e10ff,0x3d1dcb9f,0x189cf6de,0x36b51162,0x6bb17353,0x2c5a3e30 +.long 0x2a6c6fbf,0xc186cd3e,0x4bf97906,0xa74516fa,0x279d6901,0x5b4b8f4b,0x2b573743,0x0c4e57b4,0xb6e386b6,0x75fdb229,0x99deac27,0xb46793fd,0xcf712629,0xeeec47ea,0xcbc3b2dd,0xe965f3c4 +.long 0x425c6559,0x8dd1fb83,0x0af06fda,0x7fc00ee6,0x33d956df,0xe98c9225,0x4fbdc8a2,0x0f1ef335,0xb79b8ea2,0x2abb5145,0xbdbff288,0x40fd2945,0xd7185db7,0x6a814ac4,0xc084609a,0xc4329d6f +.long 0xed1be45d,0xc9ba7b52,0xe4cd2c74,0x891dd20d,0x824139b1,0x5a4d4a7f,0xb873c710,0x66c17716,0x2843c4e0,0x5e5bc141,0xb97eb5bf,0xd5ac4817,0x450c95c7,0xc0f8af54,0x318406c5,0xc91b3fa0 +.long 0xab9d97f8,0x360c340a,0x90a2d611,0xfb57bd07,0xa6a6f7e5,0x4339ae3c,0x2feb8a10,0x9c1fcd2a,0xc7ea7432,0x972bcca9,0x308076f6,0x1b0b924c,0x2a5b4ca5,0x80b2814a,0x61ef3b29,0x2f78f55b +.long 0xc18a414f,0xf838744a,0x903d0a86,0xc611eaae,0x2a453f55,0x94dabc16,0x14efb279,0xe6f2e3da,0x9320dc3c,0x5b7a6017,0x8df6b5a4,0x692e382f,0x2d40fa90,0x3f5e15e0,0x643dd318,0xc87883ae +.long 0x53544774,0x511053e4,0x3adba2bc,0x834d0ecc,0xbae371f5,0x4215d7f7,0x6c8663bc,0xfcfd57bf,0xd6901b1d,0xded2383d,0xb5587dc3,0x3b49fbb4,0x07625f62,0xfd44a08d,0x9de9b762,0x3ee4d65b +.long 0x0d63d1fa,0x64e5137d,0x02a9d89f,0x658fc052,0x50436309,0x48894874,0xd598da61,0xe9ae30f8,0x818baf91,0x2ed710d1,0x8b6a0c20,0xe27e9e06,0x1c1a6b44,0x1e28dcfb,0xd6ac57dc,0x883acb64 +.long 0xc2c6ff70,0x8735728d,0xc5dc2235,0x79d6122f,0x19e277f9,0x23f5d003,0xdded8cc7,0x7ee84e25,0x63cd880a,0x91a8afb0,0x3574af60,0x3f3ea7c6,0x02de7f42,0x0cfcdc84,0xb31aa152,0x62d0792f +.long 0x8a5807ce,0x8e1b4e43,0xe4109a7e,0xad283893,0xafd59dda,0xc30cc9cb,0x3d8d8093,0xf65f36c6,0xa60d32b2,0xdf31469e,0x3e8191c8,0xee93df4b,0x355bdeb5,0x9c1017c5,0x8616aa28,0xd2623185 +.long 0xdec31a21,0xb02c83f9,0x6ad9d573,0x988c8b23,0xa57be365,0x53e983ae,0x646f834e,0xe968734d,0x5da6309b,0x9137ea8f,0xc1f1ce16,0x10f3a624,0xca440921,0x782a9ea2,0x5b46f1b5,0xdf94739e +.long 0xcce85c9b,0x9f9be006,0xa4c7c2d3,0x360e70d6,0xaefa1e60,0x2cd5beea,0x8c3d2b6d,0x64cf63c0,0xe1cf6f90,0xfb107fa3,0xd5e044e6,0xb7e937c6,0xce34db9f,0x74e8ca78,0x3e210bd0,0x4f8b36c1 +.long 0x34a35ea8,0x1df165a4,0x4d4412f6,0x3418e0f7,0x518836c3,0x5af1f8af,0x130e1965,0x42ceef4d,0x543a1957,0x5560ca0b,0x886cb123,0xc33761e5,0xfe98ed30,0x66624b1f,0x1090997d,0xf772f4bf +.long 0x4885d410,0xf4e540bb,0x9ba5f8d7,0x7287f810,0xde98dfb1,0x22d0d865,0xbcfbb8a3,0x49ff51a1,0x6bc3012e,0xb6b6fa53,0x170d541d,0x3d31fd72,0x4b0f4966,0x8018724f,0x87dbde07,0x79e7399f +.long 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e,0xcd42ab1b,0x803f3e02,0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273,0x5067adc1,0xc097440e +.long 0x3524ff16,0x730eafb6,0x823fc6ce,0xd7f9b51e,0x443e4ac0,0x27bd0d32,0x4d66f217,0x40c59ad9,0x17c387a4,0x6c33136f,0xeb86804d,0x5043b8d5,0x675a73c9,0x74970312,0xf16669b6,0x838fdb31 +.long 0x418e7ddd,0xc507b6dd,0x472f19d6,0x39888d93,0x0c27eb4d,0x7eae26be,0xfbabb884,0x17b53ed3,0x2b01ae4f,0xfc27021b,0xcf488682,0x88462e87,0x215e2d87,0xbee096ec,0xd242e29b,0xeb2fea9a +.long 0xb821fc28,0x5d985b5f,0xdc1e2ad2,0x89d2e197,0x9030ba62,0x55b566b8,0x4f41b1c6,0xe3fd41b5,0xb9a96d61,0xb738ac2e,0x369443f4,0x7f8567ca,0xf803a440,0x8698622d,0x8fe2f4dc,0x2b586236 +.long 0x56b95bce,0xbbcc00c7,0x616da680,0x5ec03906,0x72214252,0x79162ee6,0x86a892d2,0x43132b63,0x2f3263bf,0x4bdd3ff2,0x9cd0a142,0xd5b3733c,0x44415ccb,0x592eaa82,0x8d5474ea,0x663e8924 +.long 0x5236344e,0x8058a25e,0xbda76ee6,0x82e8df9d,0x11cc3d22,0xdcf6efd8,0x3b4ab529,0x00089cda,0xbd38a3db,0x91d3a071,0xef72b925,0x4ea97fc0,0xea3edf75,0x0c9fc15b,0xa4348ed3,0x5a6297cd +.long 0xce7c42d4,0x0d38ab35,0x82feab10,0x9fd493ef,0x82111b45,0x46056b6d,0x73efc5c3,0xda11dae1,0x5545a7fb,0xdc740278,0x40d507e6,0xbdb2601c,0x7066fa58,0x121dfeeb,0x39ae8c2a,0x214369a8 +.long 0x06e0956c,0x195709cb,0x010cd34b,0x4c9d254f,0x0471a532,0xf51e13f7,0x1e73054d,0xe19d6791,0xdb5c7be3,0xf702a628,0xb24dde05,0xc7141218,0xf29b2e2e,0xdc18233c,0x85342dba,0x3a6bd1e8 +.long 0xb311898c,0x3f747fa0,0xcd0eac65,0xe2a272e4,0xf914d0bc,0x4bba5851,0xc4a43ee3,0x7a1a9660,0xa1c8cde9,0xe5a367ce,0x7271abe3,0x9d958ba9,0x3d1615cd,0xf3ff7eb6,0xf5ae20b0,0xa2280dce +.long 0xcf640147,0x56dba5c1,0x5e83d118,0xea5a2e3d,0xda24c511,0x04cd6b6d,0xe854d214,0x1c0f4671,0x69565381,0x91a6b7a9,0xdecf1f5b,0xdc966240,0xfcf5d009,0x1b22d21c,0x9021dbd5,0x2a05f641 +.long 0xd4312483,0x8c0ed566,0x643e216f,0x5179a95d,0x17044493,0xcc185fec,0x54991a21,0xb3063339,0x0081a726,0xd801ecdb,0x4fa89bbb,0x0149b0c6,0x4391b6b9,0xafe9065a,0xd633f3a3,0xedc92786 +.long 0xae6a8e13,0xe408c24a,0x9f3897ab,0x85833fde,0xd81a0715,0x43800e7e,0xb44ffc5f,0xde08e346,0xcdeff2e0,0x7094184c,0x165eaed1,0x49f9387b,0x777c468a,0x635d6129,0x538c2dd8,0x8c0dcfd1 +.long 0x7a6a308b,0xd6d9d9e3,0x4c2767d3,0x62375830,0xf38cbeb6,0x874a8bc6,0xccb6fd9e,0xd94d3f1a,0xba21f248,0x92a9735b,0x6cd1efb0,0x272ad0e5,0x05b03284,0x7437b69c,0x6948c225,0xe7f04702 +.long 0xcba2ecec,0x8a56c04a,0xe3a73e41,0x0c181270,0x03e93725,0x6cb34e9d,0x496521a9,0xf77c8713,0xfa7f9f90,0x94569183,0x8c9707ad,0xf2e7aa4c,0x26c1c9a3,0xced2c9ba,0x40197507,0x9109fe96 +.long 0xe9adfe1c,0x9ae868a9,0x314e39bb,0x3984403d,0xf2fe378f,0xb5875720,0xba44a628,0x33f901e0,0x3652438c,0xea1125fe,0x9dd1f20b,0xae9ec4e6,0xbebf7fbd,0x1e740d9e,0x42dbe79c,0x6dbd3ddc +.long 0xedd36776,0x62082aec,0xe9859039,0xf612c478,0x032f7065,0xa493b201,0x4ff9b211,0xebd4d8f2,0xaac4cb32,0x3f23a0aa,0x15ed4005,0xea3aadb7,0xafa27e63,0xacf17ea4,0xc11fd66c,0x56125c1a +.long 0x3794f8dc,0x266344a4,0x483c5c36,0xdcca923a,0x3f9d10a0,0x2d6b6bbf,0x81d9bdf3,0xb320c5ca,0x47b50a95,0x620e28ff,0xcef03371,0x933e3b01,0x99100153,0xf081bf85,0xc3a8c8d6,0x183be9a0 +.long 0xd6bbe24d,0x4e3ddc5a,0x53843795,0xc6c74630,0x65ec2d4c,0x78193dd7,0xcd3c89b2,0xb8df26cc,0x5a483f8d,0x98dbe399,0x7dd3313a,0x72d8a957,0xab0bd375,0x65087294,0x7c259d16,0xfcd89248 +.long 0x7613aa81,0x8a9443d7,0x85fe6584,0x80100800,0x7fb10288,0x70fc4dbc,0xe86beee8,0xf58280d3,0x7c978c38,0x14fdd82f,0x0de44d7b,0xdf1204c1,0x4160252f,0xa08a1c84,0xc17646a5,0x591554ca +.long 0xa05bd525,0x214a37d6,0x07957b3c,0x48d5f09b,0xd7109bc9,0x0247cdcb,0x30599ce7,0x40f9e4bb,0xf46ad2ec,0xc325fa03,0xc3e3f9ee,0x00f766cf,0xd43a4577,0xab556668,0x3ee03b93,0x68d30a61 +.long 0x77b46a08,0x7ddc81ea,0xc7480699,0xcf5a6477,0x6633f683,0x43a8cb34,0x92363c60,0x1b867e6b,0x1f60558e,0x43921114,0x2f41450e,0xcdbcdd63,0xcc630e8b,0x7fc04601,0x97038b43,0xea7c66d5 +.long 0x04e99fd8,0x7259b8a5,0x4785549a,0x98a8dd12,0x840552e1,0x0e459a7c,0x4bb0909e,0xcdfcf4d0,0x53758da7,0x34a86db2,0xeac997e1,0xe643bb83,0x530c5b7e,0x96400bd7,0xb41c8b52,0x9f97af87 +.long 0xfbeee3f9,0x34fc8820,0x49091afd,0x93e53490,0x9a31f35c,0x764b9be5,0x57e3d924,0x71f37864,0x943aa75e,0x02fb34e0,0xab8ff6e4,0xa18c9c58,0x33cf0d19,0x080f31b1,0x083518a7,0x5c9682db +.long 0xb709c3de,0x873d4ca6,0x3575b8f0,0x64a84262,0x020154bb,0x6275da1f,0xd17cf1ab,0x97678caa,0x951a95c3,0x8779795f,0x50fccc08,0xdd35b163,0x33d8f031,0x32709627,0x498dd85c,0x3c5ab10a +.long 0x41dca566,0xb6c185c3,0xd8622aa3,0x7de7feda,0x901b6dfb,0x99e84d92,0x7c4ad288,0x30a02b0e,0x2fd3cf36,0xc7c81daa,0xdf89e59f,0xd1319547,0xcd496733,0xb2be8184,0x93d3412b,0xd5f449eb +.long 0x25fe531d,0x7ea41b1b,0x6a1d5646,0xf9797432,0x2bde501a,0x86067f72,0x0c85e89c,0xf91481c0,0xf8b05bc6,0xca8ee465,0x02e83cda,0x1844e1cf,0xb4dbe33b,0xca82114a,0x4eabfde2,0x0f9f8769 +.long 0x38b27fe2,0x4936b1c0,0xaba402df,0x63b6359b,0x656bdbab,0x40c0ea2f,0x6580c39c,0x9c992a89,0x2a60aed1,0x600e8f15,0xe0bf49df,0xeb089ca4,0x2d42d99a,0x9c233d7d,0x4c6bc2fa,0x648d3f95 +.long 0xe1add3f3,0xdcc383a8,0x4f64a348,0xf42c0c6a,0x0030dbdb,0x2abd176f,0x7d6c215e,0x4de501a3,0x4b9a64bc,0x4a107c1f,0x2496cd59,0xa77f0ad3,0x7688dffb,0xfb78ac62,0x67937d8e,0x7025a2ca +.long 0xd1a8f4e7,0xfde8b2d1,0x7354927c,0xf5b3da47,0xd9205735,0xe48606a3,0xe177b917,0xac477cc6,0xa883239a,0xfb1f73d2,0xcc8b8357,0xe12572f6,0xfb1f4f86,0x9d355e9c,0xd9f3ec6e,0x89b795f8 +.long 0xb54398dc,0x27be56f1,0x3fedeed5,0x1890efd7,0x9c6d0140,0x62f77f1f,0x596f0ee4,0x7ef0e314,0xcc61dab3,0x50ca6631,0xf4866e4f,0x4a39801d,0xae363b39,0x66c8d032,0x2ead66aa,0x22c591e5 +.long 0xde02a53e,0x954ba308,0xd389f357,0x2a6c060f,0xfbf40b66,0xe6cfcde8,0xc6340ce1,0x8e02fc56,0x73adb4ba,0xe4957795,0xa7b03805,0x7b86122c,0x0c8e6fa6,0x63f83512,0x057d7804,0x83660ea0 +.long 0x21ba473c,0xbad79105,0xded5389d,0xb6c50bee,0xaa7c9bc0,0xee2caf4d,0x8c4e98a7,0xd97b8de4,0xab3bbddb,0xa9f63e70,0x2597815a,0x3898aabf,0xac15b3d9,0x7659af89,0x703ce784,0xedf7725b +.long 0xe085116b,0x25470fab,0x87285310,0x04a43375,0xe2bfd52f,0x4e39187e,0x7d9ebc74,0x36166b44,0xfd4b322c,0x92ad433c,0xba79ab51,0x726aa817,0xc1db15eb,0xf96eacd8,0x0476be63,0xfaf71e91 +.long 0x641fad98,0xdd69a640,0x29622559,0xb7995918,0xde4199dc,0x03c6daa5,0xad545eb4,0x92cadc97,0x256534e4,0x1028238b,0x8595409a,0x73e80ce6,0xd05dc59b,0x690d4c66,0x981dee80,0xc95f7b8f +.long 0xd856ac25,0xf4337014,0xac524dca,0x441bd9dd,0x5f0499f5,0x640b3d85,0xd5fda182,0x39cf84a9,0xb2aa95a0,0x04e7b055,0x0ddf1860,0x29e33f0a,0x423f6b43,0x082e74b5,0x0aaa2b0f,0x217edeb9 +.long 0x83cbea55,0x58b83f35,0xbc185d70,0xc485ee4d,0x1e5f6992,0x833ff03b,0xcf0c0dd5,0xb5b9b9cc,0x4e9e8a50,0x7caaee8e,0x6269dafd,0x462e907b,0xfbe791c6,0x6ed5cee9,0xed430790,0x68ca3259 +.long 0x13b5ba88,0x2b72bdf2,0x35ef0ac4,0x60294c8a,0x19b99b08,0x9c3230ed,0x6c2589aa,0x560fff17,0xd6770374,0x552b8487,0x9a56f685,0xa373202d,0x45f175d9,0xd3e7f907,0xd080d810,0x3c2f315f +.long 0x7b9520e8,0x1130e9dd,0x0af037b5,0xc078f9e2,0x1e9c104c,0x38cd2ec7,0xc472fe92,0x0f684368,0x6247e7ef,0xd3f1b5ed,0x396dfe21,0xb32d33a9,0x4a9aa2c2,0x46f59cf4,0xff0f7e41,0x69cd5168 +.long 0x4b3234da,0x3f59da0f,0xb4579ebe,0xcf0b0235,0x6d2476c7,0x6d1cbb25,0x9dc30f08,0x4f0837e6,0x906f6e98,0x9a4075bb,0xc761e7d1,0x253bb434,0x6e73af10,0xde2e645f,0x0c5f131c,0xb89a4060 +.long 0xb8cc037f,0xd12840c5,0x7405bb47,0x3d093a5b,0x206348b8,0x6202c253,0xc55a3ca7,0xbf5d57fc,0x8c3bef48,0x89f6c90c,0x5a0a960a,0x23ac7623,0x552b42ab,0xdfbd3d6b,0x132061f6,0x3ef22458 +.long 0xc97e6516,0xd74e9bda,0xc230f49e,0x88779360,0x1e74ea49,0xa6ec1de3,0x3fb645a2,0x581dcee5,0x8f483f14,0xbaef2391,0xd137d13b,0x6d2dddfc,0xd2743a42,0x54cde50e,0xe4d97e67,0x89a34fc5 +.long 0x12e08ce5,0x13f1f5b3,0xa7f0b2ca,0xa80540b8,0x01982805,0x854bcf77,0x233bea04,0xb8653ffd,0x02b0b4c9,0x8e7b8787,0x9acb170a,0x2675261f,0x930c14e5,0x061a9d90,0xdef0abea,0xb59b30e0 +.long 0x0200ec7d,0x1dc19ea6,0x0bce132b,0xb6f4a3f9,0xf13e27e0,0xb8d5de90,0x1fade16f,0xbaee5ef0,0xe4c6cf38,0x6f406aaa,0xd1369815,0xab4cfe06,0xefd550c6,0x0dcffe87,0x75ff7d39,0x9d4f59c7 +.long 0x51deb6ad,0xb02553b1,0xb1877749,0x812399a4,0xca6006e1,0xce90f71f,0xb02b6e77,0xc32363a6,0xdc36c64d,0x02284fbe,0xa7e1ae61,0x86c81e31,0xb909d94a,0x2576c7e5,0x818b2bb0,0x8b6f7d02 +.long 0x56faa38a,0xeca3ed07,0x9305bb54,0xa3790e6c,0x7bc73061,0xd784eeda,0x6dd50614,0xbd56d369,0x229a8aa9,0xd6575949,0x4595ec28,0xdcca8f47,0x06ab4fe6,0x814305c1,0x24f43f16,0xc8c39768 +.long 0x523f2b36,0xe2a45f36,0x920d93bb,0x995c6493,0x90f1632b,0xf8afdab7,0x1c295954,0x79ebbecd,0x79592f48,0xc7bb3ddb,0x5f88e998,0x67216a7b,0xbc01193e,0xd91f098b,0xb1db83fc,0xf7d928a5 +.long 0xe991f600,0x55e38417,0x2981a934,0x2a91113e,0x06b13bde,0xcbc9d648,0x0755ff44,0xb011b6ac,0x045ec613,0x6f4cb518,0xc2f5930a,0x522d2d31,0x382e65de,0x5acae1af,0x27bc966f,0x57643067 +.long 0x1c7193f0,0x5e12705d,0x3be8858e,0xf0f32f47,0x96c6dfc7,0x785c3d7d,0xbf31795d,0xd75b4a20,0x342659d4,0x91acf17b,0x44f0378f,0xe596ea34,0xce52129d,0x4515708f,0x79f2f585,0x17387e1e +.long 0x49dee168,0x72cfd2e9,0x3e2af239,0x1ae05223,0x1d94066a,0x009e75be,0x38abf413,0x6cca31c7,0x9bc49908,0xb50bd61d,0xf5e2bc1e,0x4a9b4a8c,0x946f83ac,0xeb6cc5f7,0xebffab28,0x27da93fc +.long 0x4821c8c5,0xea314c96,0xa83c15f4,0x8de49ded,0x7af33004,0x7a64cf20,0xc9627e10,0x45f1bfeb,0x54b9df60,0x878b0626,0xa95c0b33,0x5e4fdc3c,0xc2035d8e,0xe54a37ca,0x80f20b8c,0x9087cda9 +.long 0x8319ade4,0x36f61c23,0xde8cfdf8,0x766f287a,0x346f3705,0x48821948,0x16e4f4a2,0x49a7b853,0x5cedadfd,0xb9b3f8a7,0x8db2a815,0x8f562815,0x01f68f95,0xc0b7d554,0x688a208e,0x12971e27 +.long 0xd0ff34fc,0xc9f8b696,0x1222718c,0x20824de2,0x0c95284d,0x7213cf9f,0xdc158240,0xe2ad741b,0x54043ccf,0x0ee3a6df,0xd84412b3,0x16ff479b,0xdfc98af0,0xf6c74ee0,0x52fcd2fb,0xa78a169f +.long 0x99c930e9,0xd8ae8746,0x49e117a5,0x1d33e858,0x6624759f,0x7581fcb4,0x5bedc01d,0xde50644f,0xcaf3155e,0xbeec5d00,0xbc73e75f,0x672d66ac,0x270b01db,0x86b9d8c6,0x50f55b79,0xd249ef83 +.long 0x73978fe3,0x6131d6d4,0x754b00a1,0xcc4e4542,0x57dfcfe9,0x4e05df05,0x51ef6bf0,0x94b29cdd,0x9bc7edf2,0xe4530cff,0xd3da65f3,0x8ac236fd,0xc8eb0b48,0x0faf7d5f,0x660eb039,0x4d2de14c +.long 0x60430e54,0xc006bba7,0xda3289ab,0x10a2d0d6,0xd7979c59,0x9c037a5d,0xa116d944,0x04d1f3d3,0x8a0983cd,0x9ff22473,0xc883cabb,0x28e25b38,0x47a58995,0xe968dba5,0x774eebdf,0x2c80b505 +.long 0x4a953beb,0xee763b71,0x1642e7f6,0x502e223f,0x61d5e722,0x6fe4b641,0xdbef5316,0x9d37c5b0,0xf8330bc7,0x0115ed70,0x75a72789,0x139850e6,0xffceccc2,0x27d7faec,0x4fd9f7f6,0x3016a860 +.long 0x4cd8f64c,0xc492ec64,0x279d7b51,0x58a2d790,0x1fc75256,0x0ced1fc5,0x8f433017,0x3e658aed,0x05da59eb,0x0b61942e,0x0ddc3722,0xba3d60a3,0x742e7f87,0x7c311cd1,0xf6b01b6e,0x6473ffee +.long 0x692ac542,0x8303604f,0x227b91d3,0xf079ffe1,0x15aaf9bd,0x19f63e63,0xf1f344fb,0xf99ee565,0xd6219199,0x8a1d661f,0xd48ce41c,0x8c883bc6,0x3c74d904,0x1065118f,0x0faf8b1b,0x713889ee +.long 0x81a1b3be,0x972b3f8f,0xce2764a0,0x4f3ce145,0x28c4f5f7,0xe2d0f1cc,0xc7f3985b,0xdeee0c0d,0xd39e25c3,0x7df4adc0,0xc467a080,0x40619820,0x61cf5a58,0x440ebc93,0x422ad600,0x527729a6 +.long 0xb1b76ba6,0xca6c0937,0x4d2026dc,0x1a2eab85,0x19d9ae0a,0xb1715e15,0xbac4a026,0xf1ad9199,0x07ea7b0e,0x35b3dfb8,0x3ed9eb89,0xedf5496f,0x2d6d08ab,0x8932e5ff,0x25bd2731,0xf314874e +.long 0x3f73f449,0xefb26a75,0x8d44fc79,0x1d1c94f8,0x3bc0dc4d,0x49f0fbc5,0x3698a0d0,0xb747ea0b,0x228d291e,0x5218c3fe,0x43c129d6,0x35b804b5,0xd1acc516,0xfac859b8,0x95d6e668,0x6c10697d +.long 0x0876fd4e,0xc38e438f,0x83d2f383,0x45f0c307,0xb10934cb,0x203cc2ec,0x2c9d46ee,0x6a8f2439,0x65ccde7b,0xf16b431b,0x27e76a6f,0x41e2cd18,0x4e3484d7,0xb9c8cf8f,0x8315244a,0x64426efd +.long 0xfc94dea3,0x1c0a8e44,0xdad6a0b0,0x34c8cdbf,0x04113cef,0x919c3840,0x15490ffa,0xfd32fba4,0x795dcfb7,0x58d190f6,0x83588baf,0xfef01b03,0xca1fc1c0,0x9e6d1d63,0xf0a41ac9,0x53173f96 +.long 0xba16f73b,0x2b1d402a,0x8cf9b9fc,0x2fb31014,0x446ef7bf,0x2d51e60e,0xb91e1745,0xc731021b,0x4fee99d4,0x9d3b4724,0xfac5c1ea,0x4bca48b6,0xbbea9af7,0x70f5f514,0x974c283a,0x751f55a5 +.long 0xcb452fdb,0x6e30251a,0x50f30650,0x31ee6965,0x933548d9,0xb0b3e508,0xf4b0ef5b,0xb8949a4f,0x3c88f3bd,0x208b8326,0xdb1d9989,0xab147c30,0x44d4df03,0xed6515fd,0xe72eb0c5,0x17a12f75 +.long 0x36cf69db,0x3b59796d,0x56670c18,0x1219eee9,0x7a070d8e,0xfe3341f7,0xa327f90c,0x9b70130b,0x0ae18e0e,0x36a32462,0x46c0a638,0x2021a623,0xc62eb0d4,0x251b5817,0x4c762293,0x87bfbcdf +.long 0xcdd61d64,0xf78ab505,0xc8c18857,0x8c7a53fc,0x16147515,0xa653ce6f,0xea7d52d5,0x9c923aa5,0x5c18871f,0xc24709cb,0x73b3cc74,0x7d53bec8,0xfdd1d4c4,0x59264aff,0x240da582,0x5555917e +.long 0x548f5a0e,0xcae8bbda,0x3bbfbbe1,0x1910eaba,0x7677afc3,0xae579685,0x73ff0b5c,0x49ea61f1,0x4f7c3922,0x78655478,0x20c68eef,0x95d337cd,0xdf779ab9,0x68f1e1e5,0xb5cf69a8,0x14b491b0 +.long 0x28e3fe89,0x7a6cbbe0,0xc5aac0eb,0xe7e1fee4,0x697e5140,0x7f47eda5,0xb454921f,0x4f450137,0x95cd8185,0xdb625f84,0xcdb2e583,0x74be0ba1,0xdd5e6de4,0xaee4fd7c,0xe8101739,0x4251437d +.long 0xac620366,0x686d72a0,0xb6d59344,0x4be3fb9c,0xa1eb75b9,0x6e8b44e7,0x91a5c10c,0x84e39da3,0xb38f0409,0x37cc1490,0x2c2ade82,0x02951943,0x1190a2d8,0x9b688783,0x231182ba,0x25627d14 +.long 0x658a6d87,0x6eb550aa,0xcf9c7325,0x1405aaa7,0x5c8748c9,0xd147142e,0x53ede0e0,0x7f637e4f,0x14ffad2c,0xf8ca2776,0xbafb6791,0xe58fb1bd,0xbf8f93fc,0x17158c23,0x0a4a4655,0x7f15b373 +.long 0xd842ca72,0x39d4add2,0x3ed96305,0xa71e4391,0x6700be14,0x5bb09cbe,0xd8befcf6,0x68d69d54,0x37183bcf,0xa45f5367,0x3370dff7,0x7152b7bb,0xbf12525b,0xcf887baa,0xd6d1e3cd,0xe7ac7bdd +.long 0x81fdad90,0x25914f78,0x0d2cf6ab,0xcf638f56,0xcc054de5,0xb90bc03f,0x18b06350,0x932811a7,0x9bbd11ff,0x2f00b330,0xb4044974,0x76108a6f,0xa851d266,0x801bb9e0,0xbf8990c1,0x0dd099be +.long 0xabe32986,0x58c5aaaa,0x50d59c27,0x0fe9dd2a,0x8d307305,0x84951ff4,0x86529b78,0x6c23f829,0x0b136a79,0x50bb2218,0x77a20996,0x7e2174de,0xc0bb4da6,0x6f00a4b9,0xefdde8da,0x89a25a17 +.long 0xc11ee01d,0xf728a27e,0xe5f10dfb,0xf900553a,0x02ec893c,0x189a83c8,0x23f66d77,0x3ca5bdc1,0x97eada9f,0x98781537,0x10256230,0x59c50ab3,0x323c69b3,0x346042d9,0x2c460449,0x1b715a6d +.long 0x6ae06e0b,0xa41dd476,0x9d42e25f,0xcdd7888e,0x56b25a20,0x0f395f74,0x8700e27e,0xeadfe0ae,0x69950093,0xb09d52a9,0x327f8d40,0x3525d9cb,0x67df886a,0xb8235a94,0x035faec2,0x77e4b0dd +.long 0x517d7061,0x115eb20a,0x6c2df683,0x77fe3433,0xcdc6fc67,0x6870ddc7,0x0b87de83,0xb1610588,0xd9c4ddbe,0x343584ca,0x3d754be2,0xb3164f1c,0xc1e6c894,0x0731ed3a,0x4f6b904c,0x26327dec +.long 0x97b5cd32,0x9d49c6de,0xb5eceecd,0x40835dae,0xd9ded7fe,0xc66350ed,0x7a678804,0x8aeebb5c,0x5b8ee9ec,0x51d42fb7,0x8e3ca118,0xd7a17bdd,0x2ef4400e,0x40d7511a,0x875a66f4,0xc48990ac +.long 0x2199e347,0x8de07d2a,0x2a39e051,0xbee75556,0x916e51dc,0x56918786,0x4a2d89ec,0xeb191313,0x37d341ed,0x6679610d,0x56d51c2b,0x434fbb41,0xd7492dba,0xe54b7ee7,0x59021493,0xaa33a79a +.long 0xe4bd6d3d,0x49fc5054,0x5ab551d0,0x09540f04,0x4942d3a6,0x8acc9085,0x2d28323b,0x231af02f,0x0992c163,0x93458cac,0x888e3bb4,0x1fef8e71,0xbe8c268c,0x27578da5,0xe805ec00,0xcc8be792 +.long 0xc61c3855,0x29267bae,0x58c1fd3b,0xebff429d,0x8c0b93b8,0x22d886c0,0x2ddb8953,0xca5e00b2,0xc3fed8b7,0xcf330117,0x819c01f6,0xd49ac6fa,0x3c0fbd54,0x6ddaa6bd,0x8049a2cf,0x91743068 +.long 0xaff2ef81,0xd67f981e,0x2818ae80,0xc3654d35,0x1b2aa892,0x81d05044,0x3d099328,0x2db067bf,0x703dcc97,0xe7c79e86,0xe133e215,0xe66f9b37,0xe39a7a5c,0xcdf119a6,0x876f1b61,0x47c60de3 +.long 0xd860f1b2,0x6e405939,0xf5ed4d4a,0x3e9a1dbc,0xc9b6bcbd,0x3f23619e,0x734e4497,0x5ee790cf,0x5bdaf9bb,0xf0a834b1,0x4ca295f0,0x02cedda7,0xcb8e378c,0x4619aa2b,0xcc987ea4,0xe5613244 +.long 0x76b23a50,0x0bc022cc,0x0a6c21ce,0x4a2793ad,0x89cac3f5,0x38328780,0xcba26d56,0x29176f1b,0x4f6f59eb,0x06296187,0x8bdc658e,0x86e9bca9,0x57e30402,0x2ca9c4d3,0x516a09bb,0x5438b216 +.long 0x7672765a,0x0a6a063c,0x0547b9bf,0x37a3ce64,0x98b1a633,0x42c099c8,0x05ee6961,0xb5ab800d,0x11a5acd6,0xf1963f59,0x46201063,0xbaee6157,0xa596210a,0x36d9a649,0x1ba7138c,0xaed04363 +.long 0xa4a82b76,0xcf817d1c,0xf3806be9,0x5586960e,0x09dc6bb5,0x7ab67c89,0x114fe7eb,0x52ace7a0,0xcbbc9b70,0xcd987618,0x604ca5e1,0x4f06fd5a,0x6dbde133,0x90af14ca,0x948a3264,0x1afe4322 +.long 0xc44b2c6c,0xa70d2ca6,0x0ef87dfe,0xab726799,0x2e696377,0x310f64dc,0x4c8126a0,0x49b42e68,0xcea0b176,0x0ea444c3,0xcb269182,0x53a8ddf7,0xbbba9dcb,0xf3e674eb,0xd8669d33,0x0d2878a8 +.long 0xd019b6a3,0x04b935d5,0x406f1e46,0xbb5cf88e,0x5b57c111,0xa1912d16,0x19ebfd78,0x9803fc21,0xc07764a9,0x4f231c9e,0xb75bd055,0xd93286ee,0x8ee6c9de,0x83a9457d,0x6087ec90,0x04695915 +.long 0x58d6cd46,0x14c6dd8a,0x8e6634d2,0x9cb633b5,0xf81bc328,0xc1305047,0x26a177e5,0x12ede0e2,0x065a6f4f,0x332cca62,0x67be487b,0xc3a47ecd,0x0f47ed1c,0x741eb187,0xe7598b14,0x99e66e58 +.long 0x63d0ff12,0x6f0544ca,0xb610a05f,0xe5efc784,0x7cad7b47,0xf72917b1,0xf2cac0c0,0x3ff6ea20,0xf21db8b7,0xcc23791b,0xd7d93565,0x7dac70b1,0x694bdaad,0x682cda1d,0x1023516d,0xeb88bb8c +.long 0xdfdbeb1b,0xc4c634b4,0xb4ee4dea,0x22f5ca72,0xe6524821,0x1045a368,0x052b18b2,0xed9e8a3f,0xb961f49a,0x9b7f2cb1,0x7b009670,0x7fee2ec1,0x22507a6d,0x350d8754,0x4db55f1d,0x561bd711 +.long 0x320bbcaf,0x4c189ccc,0xdf1de48c,0x568434cf,0x0fa8f128,0x6af1b00e,0x8907583c,0xf0ba9d02,0x32ff9f60,0x735a4004,0xc25dcf33,0x3dd8e4b6,0x42c74cef,0xf2230f16,0x013fa8ad,0xd8117623 +.long 0xf51fe76e,0x36822876,0x11d62589,0x8a6811cc,0x46225718,0xc3fc7e65,0xc82fdbcd,0xb7df2c9f,0xdd7b205b,0x3b1d4e52,0x47a2e414,0xb6959478,0xefa91148,0x05e4d793,0xfd2e9675,0xb47ed446 +.long 0x04c9d9bf,0x1a7098b9,0x1b793048,0x661e2881,0xb01ee461,0xb1a16966,0x2954746f,0xbc521308,0x2477de50,0xc909a0fc,0x7dbd51ef,0xd80bb41c,0x53294905,0xa85be7ec,0x83958f97,0x6d465b18 +.long 0xfb6840fd,0x16f6f330,0x3401e6c8,0xfaaeb214,0xccb5b4f8,0xaf83d30f,0x266dec4b,0x22885739,0x7bc467df,0x51b4367c,0xd842d27a,0x926562e3,0x0fea14a6,0xdfcb6614,0xf2734cd9,0xeb394dae +.long 0x11c0be98,0x3eeae5d2,0x814e8165,0xb1e6ed11,0xe52bce1c,0x191086bc,0xa75a04da,0x14b74cc6,0x8c060985,0x63cf1186,0x2dbd7f7c,0x071047de,0xce0942ca,0x4e433b8b,0xd8fec61d,0xecbac447 +.long 0xebf3232f,0x8f0ed0e2,0xc52a2edd,0xfff80f9e,0x75b55fdb,0xad9ab433,0xe42e0c11,0x73ca7820,0xe6251b46,0x6dace0a0,0x4c0d932d,0x89bc6b5c,0x095da19a,0x3438cd77,0x8d48bdfb,0x2f24a939 +.long 0x766561b7,0x99b47e46,0x0ed0322a,0x736600e6,0x638e1865,0x06a47cb1,0xcb136000,0x927c1c2d,0x0cc5df69,0x29542337,0x09d649a9,0x99b37c02,0x6aefdb27,0xc5f0043c,0x1be95c27,0x6cdd9987 +.long 0x390420d2,0x69850931,0x0983efa4,0x299c40ac,0xaf39aead,0x3a05e778,0x43a45193,0x84274408,0x91a711a0,0x6bcd0fb9,0x9f52ab17,0x461592c8,0xda3c6ed6,0xb49302b4,0x330d7067,0xc51fddc7 +.long 0xda50d531,0x94babeb6,0xa6a7b9da,0x521b840d,0x404bdc89,0x5305151e,0xd0d07449,0x1bcde201,0x3b76a59a,0xf427a78b,0x07791a1b,0xf84841ce,0xbf91ed1c,0xebd314be,0xbf172943,0x8e61d34c +.long 0x5541b892,0x1d5dc451,0xfc9d9e54,0xb186ee41,0xd5bf610d,0x9d9f345e,0xf6acca9f,0x3e7ba65d,0xa8369486,0x9dda787a,0x8eb5ba53,0x09f9dab7,0xd6481bc3,0x5afb2033,0xafa62104,0x76f4ce30 +.long 0xf4f066b5,0xa8fa00cf,0x461dafc2,0x89ab5143,0xa3389998,0x44339ed7,0xbc214903,0x2ff862f1,0xb05556e3,0x2c88f985,0x3467081e,0xcd96058e,0xedc637ea,0x7d6a4176,0x36a5acdc,0xe1743d09 +.long 0x7eb37726,0x66fd72e2,0x1481a037,0xf7fa264e,0x45f4aa79,0x9fbd3bde,0x767c3e22,0xed1e0147,0x82e7abe2,0x7621f979,0x45f633f8,0x19eedc72,0x6137bf3a,0xe69b155e,0x414ee94e,0xa0ad13ce +.long 0x1c0e651a,0x93e3d524,0x02ce227e,0xab1a6e2a,0x4ab27eca,0xe7af1797,0xbd444f39,0x245446de,0x56c07613,0x59e22a21,0xf4275498,0x43deafce,0x67fd0946,0x10834ccb,0x47406edf,0xa75841e5 +.long 0x7b0ac93d,0xebd6a677,0x78f5e0d7,0xa6e37b0d,0x76f5492b,0x2516c096,0x9ac05f3a,0x1e4bf888,0x4df0ba2b,0xcdb42ce0,0x5062341b,0x935d5cfd,0x82acac20,0x8a303333,0x5198b00e,0x429438c4 +.long 0x049d33fa,0x1d083bc9,0x946f67ff,0x58b82dda,0x67a1d6a3,0xac3e2db8,0x1798aac8,0x62e6bead,0xde46c58c,0xfc85980f,0x69c8d7be,0xa7f69379,0x837b35ec,0x23557927,0xe0790c0c,0x06a933d8 +.long 0x077ff55d,0x827c0e9b,0xbb26e680,0x53977798,0x1d9cb54f,0x59530874,0x4aac53ef,0xcca3f449,0xa07eda0f,0x11dc5c87,0xfd6400c8,0xc138bccf,0x13e5da72,0x549680d3,0x4540617e,0xc93eed82 +.long 0x4d0b75c0,0xfd3db157,0x6386075b,0x9716eb42,0x817b2c16,0x0639605c,0xf1e4f201,0x09915109,0x5cca6c3b,0x35c9a928,0x3505c900,0xb25f7d1a,0x630480c4,0xeb9f7d20,0x2a1a501c,0xc3c7b8c6 +.long 0x5a1f8e24,0x3f99183c,0x9dd255f0,0xfdb118fa,0xc27f62a6,0xb9b18b90,0x396ec191,0xe8f732f7,0x0be786ab,0x524a2d91,0x0ac5a0f5,0x5d32adef,0x9725f694,0x9b53d4d6,0x0510ba89,0x032a76c6 +.long 0xebeb1544,0x840391a3,0x3ed73ac3,0x44b7b88c,0x256cb8b3,0xd24bae7a,0xe394cb12,0x7ceb151a,0x5bc1e6a8,0xbd6b66d0,0x090f07bf,0xec70cecb,0x7d937589,0x270644ed,0x5f1dccfe,0xee9e1a3d +.long 0x745b98d2,0xb0d40a84,0x2556ed40,0xda429a21,0x85148cb9,0xf676eced,0xded18936,0x5a22d40c,0x70e8a4ce,0x3bc4b9e5,0x9eae0379,0xbfd1445b,0x1a0bd47e,0xf23f2c0c,0xe1845531,0xa9c0bb31 +.long 0x0a4c3f6b,0x9ddc4d60,0x2c15ef44,0xbdfaad79,0x7f484acc,0xce55a236,0x055b1f15,0x08653ca7,0x538873a3,0x2efa8724,0xace1c7e7,0x09299e5d,0xade332ba,0x07afab66,0x92dd71b7,0x9be1fdf6 +.long 0x5758b11c,0xa49b5d59,0xc8654f40,0x0b852893,0x52379447,0xb63ef6f4,0x105e690c,0xd4957d29,0x646559b0,0x7d484363,0x49788a8e,0xf4a8273c,0x34ce54a9,0xee406cb8,0xf86fda9b,0x1e1c260f +.long 0xcf6a4a81,0xe150e228,0x1b488772,0x1fa3b6a3,0xc5a9c15b,0x1e6ff110,0x8ad6aa47,0xc6133b91,0x9dffa978,0x8ac5d55c,0x5f3965f2,0xba1d1c1d,0x7732b52f,0xf969f4e0,0xa5172a07,0xfceecdb5 +.long 0x10f2b8f5,0xb0120a5f,0x5c4c2f63,0xc83a6cdf,0xf8f9c213,0x4d47a491,0xd3f1bbd5,0xd9e1cce5,0xaba7e372,0x0d91bc7c,0xdfd1a2db,0xfcdc74c8,0x374618e5,0x05efa800,0x15a7925e,0x11216969 +.long 0xf6021c5d,0xd4c89823,0xeff14423,0x880d5e84,0x6dcd1396,0x6523bc5a,0x113c978b,0xd1acfdfc,0xbbb66840,0xb0c164e8,0x72b58459,0xf7f4301e,0xa638e8ec,0xc29ad4a6,0x46b78699,0xf5ab8961 +.long 0x0e954750,0x9dbd7974,0x64f9d2c6,0x0121de88,0xd985232e,0x2e597b42,0x53451777,0x55b6c3c5,0x519cb9fb,0xbb53e547,0x8428600d,0xf134019f,0xe081791a,0x5a473176,0x35fb0c08,0x2f3e2263 +.long 0x73d273b0,0xb28c3017,0x7721ef9a,0xccd21076,0xb650dc39,0x054cc292,0x6188045e,0x662246de,0x6b83c0d1,0x904b52fa,0x97e9cd46,0xa72df267,0x899725e4,0x886b43cd,0xd849ff22,0x2b651688 +.long 0x02f34533,0x60479b79,0x0c77c148,0x5e354c14,0xa8537c78,0xb4bb7581,0xefe1495f,0x188043d7,0x8c1d5026,0x9ba12f42,0x93d4aaab,0x2e0c8a26,0xaa57c450,0xbdba7b8b,0x9bbdafef,0x140c9ad6 +.long 0x25ac0f18,0x2067aa42,0x04d1fbf3,0xf7b1295b,0xa4b04824,0x14829111,0x33bd5e91,0x2ce3f192,0x8f2e1b72,0x9c7a1d55,0x302aa243,0xfe932286,0xd4be9554,0x497ca7b4,0xe0547a6e,0xb8e821b8 +.long 0x67e573e0,0xfb2838be,0x4084c44b,0x05891db9,0x96c1c2c5,0x91311373,0xd958444b,0x6aebfa3f,0xe56e55c1,0xac9cdce9,0x2caa46d0,0x7148ced3,0xb61fe8eb,0x2e10c7ef,0xff97cf4d,0x9fd835da +.long 0x081e9387,0xa36da109,0x8c935828,0xfb9780d7,0xe540b015,0xd5940332,0xe0f466fa,0xc9d7b51b,0xd6d9f671,0xfaadcd41,0xb1a2ac17,0xba6c1e28,0xed201e5f,0x066a7833,0xf90f462b,0x19d99719 +.long 0x060b5f61,0xf431f462,0x7bd057c2,0xa56f46b4,0x47e1bf65,0x348dca6c,0x41bcf1ff,0x9a38783e,0xda710718,0x7a5d33a9,0x2e0aeaf6,0x5a779987,0x2d29d187,0xca87314d,0xc687d733,0xfa0edc3e +.long 0x6a31e09b,0x9df33621,0xc1350e35,0xde89e44d,0x4ca0cf52,0x29214871,0x0b88a538,0xdf379672,0x2591d61b,0xc92a510a,0x585b447b,0x79aa87d7,0xe5287f77,0xf67db604,0x5efe7a80,0x1697c8bf +.long 0xcb198ac7,0x1c894849,0x0f264665,0xa884a93d,0x9b200678,0x2da964ef,0x009834e6,0x3c351b87,0xe2c4b44b,0xafb2ef9f,0x3326790c,0x580f6c47,0x0b02264a,0xb8480521,0x42a194e2,0x8ba6f9e2 +.long 0x8fb54738,0xfc87975f,0x27c3ead3,0x35160788,0xb74a085a,0x834116d2,0xa62fe996,0x53c99a73,0x5b81c51b,0x87585be0,0xbe0852b7,0x925bafa8,0xa84d19a7,0x76a4fafd,0x585206d4,0x39a45982 +.long 0x5eb03c0e,0x499b6ab6,0x72bc3fde,0xf19b7954,0x6e3a80d2,0xa86b5b9c,0x6d42819f,0xe4377508,0xbb3ee8a3,0xc1663650,0xb132075f,0x75eb14fc,0x7ad834f6,0xa8ccc906,0xe6e92ffd,0xea6a2474 +.long 0x0f8d6758,0x9d72fd95,0x408c07dd,0xcb84e101,0xa5e23221,0xb9114bfd,0xe94e742c,0x358b5fe2,0x95f40e75,0x1c0577ec,0x3d73f3d6,0xf0155451,0xbd1b9b66,0x9d55cd67,0xaf8d63c7,0x63e86e78 +.long 0xd3c095f1,0x39d934ab,0xe4b76d71,0x04b261be,0xe73e6984,0x1d2e6970,0x5e5fcb11,0x879fb23b,0xdfd75490,0x11506c72,0x61bcf1c1,0x3a97d085,0xbf5e7007,0x43201d82,0x798232a7,0x7f0ac52f +.long 0x6eb564d4,0x2715cbc4,0x9e570e29,0x8d6c752c,0x9ef5fd5d,0xf80247c8,0xd53eb514,0xc3c66b46,0x0f87de56,0x9666b401,0xc6c603b5,0xce62c06f,0x7e4fc942,0xae7b4c60,0x663a9c19,0x38ac0b77 +.long 0x4b049136,0xcb4d20ee,0x356a4613,0x8b63bf12,0x70e08128,0x1221aef6,0x4acb6b16,0xe62d8c51,0x379e7896,0x71f64a67,0xcafd7fa5,0xb25237a2,0x3841ba6a,0xf077bd98,0x3cd16e7e,0xc4ac0244 +.long 0x21fea4ca,0x548ba869,0xf3dfdac1,0xd36d0817,0xf4685faf,0x09d8d71f,0xc52c459a,0x8eff66be,0x0b57235e,0x182faee7,0x0106712b,0xee3c39b1,0xc0fcdcb0,0x5107331f,0xa51054ba,0x669fb9dc +.long 0x319d7682,0xb25101fb,0x0a982fee,0xb0293129,0x0261b344,0x51c1c9b9,0xbfd371fa,0x0e008c5b,0x0278ca33,0xd866dd1c,0xe5aa53b1,0x666f76a6,0x6013a2cf,0xe5cfb779,0xa3521836,0x1d3a1aad +.long 0x73faa485,0xcedd2531,0xc0a76878,0xc8ee6c4f,0x2a11667d,0xddbccfc9,0x1c2f695a,0x1a418ea9,0x51f73971,0xdb11bd92,0xda2ed89f,0x3e4b3c82,0xe73e0319,0x9a44f3f4,0x303431af,0xd1e3de0f +.long 0x50f75f9c,0x3c5604ff,0x7e752b22,0x1d8eddf3,0x3c9a1118,0x0ef074dd,0xccb86d7b,0xd0ffc172,0x037d90f2,0xabd1ece3,0x6055856c,0xe3f307d6,0x7e4c6daf,0x422f9328,0x334879a0,0x902aac66 +.long 0x94cdfade,0xb6a1e7bf,0x7fc6d634,0x6c97e1ed,0xa2fb63f8,0x662ad24d,0xa5928405,0xf81be1b9,0xd14b4206,0x86d765e4,0x8fa0db65,0xbecc2e0e,0xb17fc76c,0xa28838e0,0xe37cf24e,0xe49a602a +.long 0x567193ec,0x76b4131a,0xe5f6e70b,0xaf3c305a,0x031eebdd,0x9587bd39,0x71bbe831,0x5709def8,0x0eb2b669,0x57059983,0x875b7029,0x4d80ce1b,0x0364ac16,0x838a7da8,0xbe1c83ab,0x2f431d23 +.long 0xf9294dd3,0xe56812a6,0x9b4b0d77,0xb448d01f,0x04e8305c,0xf3ae6061,0x94d8c63e,0x2bead645,0x84fd8b07,0x0a85434d,0xf7a9dee5,0x537b983f,0xef55bd85,0xedcc5f18,0x21c6cf8b,0x2041af62 +.long 0xb940c71e,0x8e52874c,0xdb5f4b3a,0x211935a9,0x301b1dc3,0x94350492,0x29958620,0x33d2646d,0xef911404,0x16b0d64b,0x9a3c5ef4,0x9d1f25ea,0x4a352c78,0x20f200eb,0x4bd0b428,0x43929f2c +.long 0xc7196e29,0xa5656667,0x9391be48,0x7992c2f0,0x9ee0cd6e,0xaaa97cbd,0x3dc8c9bf,0x51b0310c,0xdd9f22cb,0x237f8acf,0xb585d584,0xbb1d81a1,0x8c416388,0x8d5d85f5,0x42fe474f,0x0d6e5a5a +.long 0x38235d4e,0xe7812766,0x496e3298,0x1c62bd67,0x3f175bc8,0x8378660c,0x17afdd4d,0x4d04e189,0x85a8068c,0x32a81601,0x92b29a85,0xdb58e4e1,0xc70d8a3b,0xe8a65b86,0x98a0403b,0x5f0e6f4e +.long 0x69ed2370,0x08129684,0x0871ee26,0x34dc30bd,0x7c9c5b05,0x3a5ce948,0x43a90c87,0x7d487b80,0xdd0e7179,0x4089ba37,0xb4041811,0x45f80191,0x98747ba5,0x1c3e1058,0x6e1ae592,0x98c4e13a +.long 0xe82c9f9e,0xd44636e6,0xc33a1043,0x711db87c,0xaa8aec05,0x6f431263,0x2744a4aa,0x43ff120d,0xae77779b,0xd3bd892f,0x8cdc9f82,0xf0fe0cc9,0xf1c5b1bc,0xca5f7fe6,0x44929a72,0xcc63a682 +.long 0x09dbe19a,0xc7eaba0c,0x6b5c73c2,0x2f3585ad,0x0ae50c30,0x8ab8924b,0x638b30ba,0x17fcd27a,0x10b3d5a5,0xaf414d34,0x2a9accf1,0x09c107d2,0x946a6242,0x15dac49f,0xd707d642,0xaec3df2a +.long 0x3f894ae0,0x2c2492b7,0xb75f18ce,0xf59df3e5,0x8f53cad0,0x7cb740d2,0xc4f01294,0x3eb585fb,0x32c7f717,0x17da0c86,0xaf943f4c,0xeb8c795b,0xf67c51d2,0x4ee23fb5,0x68889949,0xef187575 +.long 0x0389168b,0xa6b4bdb2,0xea577d03,0xc4ecd258,0x55743082,0x3a63782b,0xc72f08cd,0x6f678f4c,0x65e58dd8,0x553511cf,0xd402c0cd,0xd53b4e3e,0xa037c14c,0x37de3e29,0xc05712aa,0x86b6c516 +.long 0xb38dff6f,0x2834da3e,0xea636be8,0xbe012c52,0x61dd37f8,0x292d238c,0x8f8142db,0x0e54523f,0x036a05d8,0xe31eb436,0x1e93c0ff,0x83e3cdff,0x50821ddf,0x3fd2fe0f,0xff9eb33b,0xc8e19b0d +.long 0xb569a5fe,0xc8cc943f,0xd4342d75,0xad0090d4,0xcaeca000,0x82090b4b,0x1bd410eb,0xca39687f,0x65959d77,0xe7bb0df7,0x9c964999,0x39d78218,0xb2415451,0xd87f62e8,0xbed76108,0xe5efb774 +.long 0xe822f0d0,0x3ea011a4,0x5a8704f8,0xbc647ad1,0x50c6820f,0xbb315b35,0xb7e76bec,0x863dec3d,0xf017bfc7,0x01ff5d3a,0x976b8229,0x20054439,0x0bbd0d3b,0x067fca37,0x7f5e3d0f,0xf63dde64 +.long 0x2a4c94e9,0x22dbefb3,0x96f8278a,0xafbff0fe,0x3503793d,0x80aea0b1,0x5f06cd29,0xb2238029,0x8ec3feca,0x65703e57,0x393e7053,0x06c38314,0x7c6734c4,0xa0b751eb,0xc59f0f1e,0xd2e8a435 +.long 0x5e9ca895,0x147d9052,0x972072df,0x2f4dd31e,0xe6c6755c,0xa16fda8e,0xcf196558,0xc66826ff,0x0cf43895,0x1f1a76a3,0x83c3097b,0xa9d604e0,0x66390e0e,0xe1908309,0xb3c85eff,0xa50bf753 +.long 0xf6a70251,0x0696bdde,0x3c6ab16a,0x548b801b,0xa4d08762,0x37fcf704,0xdff76c4e,0x090b3def,0x69cb9158,0x87e8cb89,0x995ece43,0x44a90744,0x0ad9fbf5,0xf85395f4,0x4fb0c82d,0x49b0f6c5 +.long 0xadf7cccf,0x75d9bc15,0xdfa1e1b0,0x81a3e5d6,0x249bc17e,0x8c39e444,0x8ea7fd43,0xf37dccb2,0x907fba12,0xda654873,0x4a372904,0x35daa6da,0x6283a6c5,0x0564cfc6,0x4a9395bf,0xd09fa4f6 +.long 0xaeb19a36,0x688e9ec9,0xc7bfbfb4,0xd913f1ce,0x61c2faa6,0x797b9a3c,0x6a0a9c12,0x2f979bec,0x359679ec,0xb5969d0f,0x079b0460,0xebcf523d,0x10fab870,0xfd6b0008,0x9373a39c,0x3f2edcda +.long 0x6f568431,0x0d64f9a7,0x02f8898c,0xf848c27c,0x260b5bd5,0xf418ade1,0x6973dee8,0xc1f3e323,0x26c185dd,0x46e9319c,0x546f0ac4,0x6d85b7d8,0x247f9d57,0x427965f2,0xb0035f48,0xb519b636 +.long 0xab87d59c,0x6b6163a9,0x39caaa11,0xff9f58c3,0x3177387b,0x4ac39cde,0x873e77f9,0x5f6557c2,0x36a83041,0x67504006,0x75ef196c,0x9b1c96ca,0xb08c7940,0xf34283de,0x1128c316,0x7ea09644 +.long 0x6aa39dff,0xb510b3b5,0x9f8e4d8c,0x59b43da2,0x9e4c4b9f,0xa8ce31fd,0xc1303c01,0x0e20be26,0xe8ee47c9,0x18187182,0x7db98101,0xd9687cdb,0xa1e14ff6,0x7a520e4d,0x8836d572,0x429808ba +.long 0x4944b663,0xa37ca60d,0xa3f91ae5,0xf901f7a9,0x9e36e3b1,0xe4e3e76e,0x29d93250,0x9aa219cf,0x056a2512,0x347fe275,0xde65d95c,0xa4d643d9,0x699fc3ed,0x9669d396,0xcf8c6bbe,0xb598dee2 +.long 0xdda9e5c6,0x682ac1e5,0xcaa9fc95,0x4e0d3c72,0x772bea44,0x17faaade,0xab0009c8,0x5ef8428c,0x460ff016,0xcc4ce47a,0x725281cb,0xda6d12bf,0x0223aad2,0x44c67848,0x36256e28,0x6e342afa +.long 0x93a37c04,0x1400bb0b,0xdd10bd96,0x62b1bc9b,0x0dac46b7,0x7251adeb,0x7be4ef51,0x7d33b92e,0xe61fa29a,0x28b2a94b,0x06422233,0x4b2be13f,0x330d8d37,0x36d6d062,0xb28ca005,0x5ef80e1e +.long 0x6d16768e,0x174d4699,0x628bf217,0x9fc4ff6a,0x154e490d,0x77705a94,0x8d2d997a,0x9d96dd28,0xce5d72c4,0x77e2d9d8,0xc11c714f,0x9d06c5a4,0x79e4a03e,0x02aa5136,0x030ff28b,0x1386b3c2 +.long 0xfb283f61,0xfe82e8a6,0xf3abc3fb,0x7df203e5,0x3a4d3622,0xeec7c351,0xdf762761,0xf7d17dbf,0x522055f0,0xc3956e44,0x8fa748db,0xde3012db,0xbf1dcc14,0xca9fcb63,0xbe4e2f3a,0xa56d9dcf +.long 0x8bcec9c2,0xb86186b6,0x680b9f06,0x7cf24df9,0xc0d29281,0xc46b45ea,0x07b10e12,0xfff42bc5,0x4d289427,0x12263c40,0xb4848ec4,0x3d5f1899,0xd040800c,0x11f97010,0x300feb20,0xb4c5f529 +.long 0xde94fdcb,0xcc543f8f,0xc7c2f05e,0xe96af739,0x882692e1,0xaa5e0036,0x950d4ae9,0x09c75b68,0xb5932a7a,0x62f63df2,0xde0979ad,0x2658252e,0xb5e69631,0x2a19343f,0x525b666b,0x718c7501 +.long 0xea40dc3a,0x26a42d69,0xaecc018f,0xdc84ad22,0x3270f04a,0x25c36c7b,0x50fa72ed,0x46ba6d47,0x93e58a8e,0x6c37d1c5,0x120c088c,0xa2394731,0xcb6e86da,0xc3be4263,0x7126d038,0x2c417d36 +.long 0x8b6f8efa,0x5b70f9c5,0x37718536,0x671a2faa,0xb539c92b,0xd3ced3c6,0xa31203c2,0xe56f1bd9,0x9ff3c8eb,0x8b096ec4,0x43491cea,0x2deae432,0x17943794,0x2465c6eb,0x20586843,0x5d267e66 +.long 0xb07159d0,0x9d3d116d,0xc1896210,0xae07a67f,0xbb961579,0x8fc84d87,0x1c1f8dd6,0x30009e49,0xe3132819,0x8a8caf22,0xf23ab4ff,0xcffa197c,0x205dd687,0x58103a44,0x0ded67a2,0x57b796c3 +.long 0xa1779ad7,0x0b9c3a6c,0x357c09c5,0xa33cfe2e,0x3db4a57e,0x2ea29315,0x8ebeb52e,0x91959695,0xe546c879,0x118db9a6,0x6295c8d6,0x8e996df4,0x55ec806b,0xdd990484,0x165c1035,0x24f291ca +.long 0x440e2229,0xcca523bb,0x73ef4d04,0x324673a2,0x3e11ec39,0xaf3adf34,0xdc5968d3,0x6136d7f1,0xb053a927,0x7a7b2899,0xae067ecd,0x3eaa2661,0x02779cd9,0x8549b9c8,0xc53385ea,0x061d7940 +.long 0xf06d18bd,0x3e0ba883,0xb2700843,0x4ba6de53,0x591a9e4d,0xb966b668,0x7f4fa0ed,0x93f67567,0x4347237b,0x5a02711b,0xe794608e,0xbc041e2f,0x70f73d8c,0x55af10f5,0xbb7564f7,0xd2d4d4f7 +.long 0xb3e93ce7,0xd7d27a89,0x5d3a2c1b,0xf7b5a875,0x255b218a,0xb29e68a0,0x8af76754,0xb533837e,0x579fab2e,0xd1b05a73,0xecd74385,0xb41055a1,0x445e9115,0xb2369274,0xf520274e,0x2972a7c4 +.long 0xf678e68a,0x6c08334e,0x99b057ed,0x4e4160f0,0x52ccb69a,0x3cfe11b8,0x21c8f772,0x2fd1823a,0x3298f055,0xdf7f072f,0xfec74a6e,0x8c0566f9,0x5bb4d041,0xe549e019,0x9208d850,0x7c3930ba +.long 0xaaa2902b,0xe07141fc,0xe4f69ad3,0x539ad799,0x813f9ffd,0xa6453f94,0x375bc2f7,0xc58d3c48,0x5dc64e96,0xb3326fad,0xb240e354,0x3aafcaa9,0xaca1e7a9,0x1d1b0903,0x1211b8a0,0x4ceb9767 +.long 0xe32a858e,0xeca83e49,0xae907bad,0x4c32892e,0x2eb9b494,0xd5b42ab6,0x1eabae1b,0x7fde3ee2,0xcaf54957,0x13b5ab09,0xe5f5d5d5,0xbfb028be,0x2003e2c0,0x928a0650,0x67476843,0x90793aac +.long 0xc81710a0,0x5e942e79,0x27ccadd4,0x557e4a36,0x4bcf6d0c,0x72a2bc56,0x26d7b80c,0x09ee5f43,0xd4292f19,0x6b70dbe9,0x63f16b18,0x56f74c26,0x35fbb42a,0xc23db0f7,0x6ae10040,0xb606bdf6 +.long 0x044573ac,0x1eb15d4d,0x556b0ba4,0x7dc3cf86,0xc60df6f7,0x97af9a33,0xa716ce8c,0x0b1ef85c,0xc96958be,0x2922f884,0x35690963,0x7c32fa94,0xeaa00061,0x2d7f667c,0x3547365c,0xeaaf7c17 +.long 0x87032d58,0x1eb4de46,0x5e2c79e0,0xc54f3d83,0x5d04ef23,0x07818df4,0x673d41b4,0x55faa9c8,0x89b95355,0xced64f6f,0xb7415c84,0x4860d2ea,0x050ebad3,0x5fdb9bd2,0x6685a5bf,0xdb53e0cc +.long 0x9feb6593,0xb830c031,0x6accff17,0xdd87f310,0x9f555c10,0x2303ebab,0x287e7065,0x94603695,0x2e83358c,0xf88311c3,0xeefb0178,0x508dd9b4,0x2dba8652,0x7ca23706,0x0047abe5,0x62aac5a3 +.long 0x8b1ea7b3,0x9a61d2a0,0xae8b1485,0xd495ab63,0x87052f99,0x38740f84,0xb2974eea,0x178ebe5b,0x5b36d17f,0x030bbcca,0xaaf86eea,0xb5e4cce3,0x68f8e9e0,0xb51a0220,0x09eb3e75,0xa4348796 +.long 0xeef1a752,0xbe592309,0x6f2aa1ed,0x5d7162d7,0x0f007dd2,0xaebfb5ed,0xc89edd22,0x255e14b2,0x0303b697,0xba85e072,0xf05720ff,0xc5d17e25,0x5128ebb6,0x02b58d6e,0xd754e113,0x2c80242d +.long 0xabfae1ca,0x919fca5f,0x1a21459b,0x937afaac,0x1f66a4d2,0x9e0ca91c,0x23ec1331,0x194cc7f3,0x8aa11690,0xad25143a,0x09b59e08,0xbe40ad8d,0xe750860a,0x37d60d9b,0xc6bf434c,0x6c53b008 +.long 0x1356eb80,0xb572415d,0x9578ded8,0xb8bf9da3,0x5e8fb38b,0x22658e36,0x5af8cb22,0x9b70ce22,0x829a8180,0x7c00018a,0xb81ed295,0x84329f93,0x5f3cea83,0x7c343ea2,0x67586536,0x38f8655f +.long 0x1d3ec517,0xa661a0d0,0x512321ae,0x98744652,0xeca92598,0x084ca591,0x1dcb3feb,0xa9bb9dc9,0x78b4c240,0x14c54355,0x610cafdc,0x5ed62a3b,0x1b38846b,0x07512f37,0xb0e38161,0x571bb70a +.long 0x2da705d2,0xb556b95b,0xb1a08f98,0x3ef8ada6,0xddecfbe5,0x85302ca7,0x943105cd,0x0e530573,0x21a9255d,0x60554d55,0xf2f3802a,0x63a32fa1,0xcd477875,0x35c8c5b0,0x6ad42da1,0x97f458ea +.long 0xeb6b242d,0x832d7080,0x3b71e246,0xd30bd023,0xbe31139d,0x7027991b,0x462e4e53,0x68797e91,0x6b4e185a,0x423fe20a,0x42d9b707,0x82f2c67e,0x4cf7811b,0x25c81768,0x045bb95d,0xbd53005e +.long 0x9d8e68fd,0xe5f649be,0x1b044320,0xdb0f0533,0xe0c33398,0xf6fde9b3,0x66c8cfae,0x92f4209b,0x1a739d4b,0xe9d1afcc,0xa28ab8de,0x09aea75f,0xeac6f1d0,0x14375fb5,0x708f7aa5,0x6420b560 +.long 0x6254dc41,0x9eae499c,0x7a837e7e,0x7e293924,0x090524a7,0x74aec08c,0x8d6f55f2,0xf82b9219,0x1402cec5,0x493c962e,0xfa2f30e7,0x9f17ca17,0xe9b879cb,0xbcd783e8,0x5a6f145f,0xea3d8c14 +.long 0x5e0dee6e,0xdede15e7,0xdc628aa2,0x74f24872,0x7861bb93,0xd3e9c4fe,0x6187b2e0,0x56d4822a,0xc59826f9,0xb66417cf,0x2408169e,0xca260969,0xc79ef885,0xedf69d06,0xdc7d138f,0x00031f8a +.long 0x0ebcf726,0x103c46e6,0x6231470e,0x4482b831,0x487c2109,0x6f6dfaca,0x62e666ef,0x2e0ace97,0x1f8d1f42,0x3246a9d3,0x574944d2,0x1b1e83f1,0xa57f334b,0x13dfa63a,0x9f025d81,0x0cf8daed +.long 0x00ee11c1,0x30d78ea8,0xb5e3dd75,0xeb053cd4,0xd58c43c5,0x9b65b13e,0xbd151663,0xc3ad49bd,0xb6427990,0x99fd8e41,0x707eae1e,0x12cf15bd,0x1aabb71e,0x29ad4f1b,0x07545d0e,0x5143e74d +.long 0xc88bdee1,0x30266336,0x5876767c,0x25f29306,0xc6731996,0x9c078571,0xed552951,0xc88690b2,0x852705b4,0x274f2c2d,0x4e09552d,0xb0bf8d44,0x986575d1,0x7628beeb,0x7f864651,0x407be238 +.long 0xa639fc6b,0x0e5e3049,0x86003625,0xe75c35d9,0x5dcc1646,0x0cf35bd8,0x6c26273a,0x8bcaced2,0xb5536742,0xe22ecf1d,0x1a9e068b,0x013dd897,0x8a7909c5,0x17f411cb,0x861dd506,0x5757ac98 +.long 0x1e935abb,0x85de1f0d,0x154de37a,0xdefd10b4,0x369cebb5,0xb8d9e392,0x761324be,0x54d5ef9b,0x74f17e26,0x4d6341ba,0x78c1dde4,0xc0a0e3c8,0x87d918fd,0xa6d77581,0x02ca3a13,0x66876015 +.long 0xf36658f0,0xc7313e9c,0x71f8057e,0xc433ef1c,0x1b6a835a,0x85326246,0x7c86394c,0xc8f05398,0xe983c4a1,0xff398cdf,0x03b7b931,0xbf5e8162,0xb7b9045b,0x93193c46,0xa4a6e46b,0x1e4ebf5d +.long 0x43a24fe7,0xf9942a60,0xffb3492b,0x29c1191e,0x902fde05,0x9f662449,0x6713c32d,0xc792a7ac,0xb737982c,0x2fd88ad8,0xa21e60e3,0x7e3a0319,0x7383591a,0x09b0de44,0x8310a456,0x6df141ee +.long 0xe6d6f471,0xaec1a039,0x1198d12e,0x14b2ba0f,0x3aeee5ac,0xebc1a160,0xe0b964ce,0x401f4836,0x4fd03f66,0x2ee43796,0xdd8f3f12,0x3fdb4e49,0x29380f18,0x6ef267f6,0x8da64d16,0x3e8e9670 +.long 0x207674f1,0xbc19180c,0x33ae8fdb,0x112e09a7,0x6aaeb71e,0x99667554,0xe101b1c7,0x79432af1,0xde2ddec6,0xd5eb558f,0x5357753f,0x81392d1f,0x3ae1158a,0xa7a76b97,0x4a899991,0x416fbbff +.long 0x0d4a9dcf,0x9e65fdfd,0x944ddf12,0x7bc29e48,0x3c856866,0xbc1a92d9,0x6e98dfe2,0x273c6905,0xcdfaa6b8,0x69fce418,0x5061c69f,0x606bd823,0x6af75e27,0x42d495a0,0x6d873a1f,0x8ed3d505 +.long 0x6ab25b6a,0xaf552841,0x2b1a4523,0xc6c0ffc7,0x21c99e03,0xab18827b,0x9034691b,0x060e8648,0x93c7f398,0x5207f90f,0x82f8d10b,0x9f4a96cb,0x3ad0f9e3,0xdd71cd79,0xfc3a54f5,0x84f435d2 +.long 0x8e33787f,0x4b03c55b,0xa6384673,0xef42f975,0x5051b9f0,0xff7304f7,0x741c87c2,0x18aca1dc,0x2d4bfe80,0x56f120a7,0x053e732c,0xfd823b3d,0x7537ca16,0x11bccfe4,0x1b5a996b,0xdf6c9c74 +.long 0x904fc3fa,0xee7332c7,0xc7e3636a,0x14a23f45,0xf091d9aa,0xc38659c3,0xb12d8540,0x4a995e5d,0xf3a5598a,0x20a53bec,0xb1eaa995,0x56534b17,0xbf04e03c,0x9ed3dca4,0xd8d56268,0x716c563a +.long 0x1d6178e7,0x27ba77a4,0x68a1ff8e,0xe4c80c40,0x0a13f63d,0x75011099,0xa61d46f3,0x7bf33521,0x10b365bb,0x0aff218e,0x0fd7ea75,0x81021804,0xa4b3a925,0x05a3fd8a,0x9b3db4e6,0xb829e75f +.long 0x4d53e5fb,0x6bdc75a5,0xd52717e3,0x04a5dc02,0xe9a42ec2,0x86af502f,0x2630e382,0x8867e8fb,0xbec9889b,0xbf845c6e,0xcb47c98d,0x54f491f2,0x790c2a12,0xa3091fba,0xc20f708b,0xd7f6fd78 +.long 0xacde5e17,0xa569ac30,0x6852b4d7,0xd0f996d0,0x4609ae54,0xe51d4bb5,0x0daed061,0x3fa37d17,0x34b8fb41,0x62a88684,0x9efb64f1,0x99a2acbd,0x6448e1f2,0xb75c1a5e,0x42b5a069,0xfa99951a +.long 0x2f3b26e7,0x6d956e89,0xda875247,0xf4709860,0x2482dda3,0x3ad15179,0x017d82f0,0xd64110e3,0xfad414e4,0x14928d2c,0x2ed02b24,0x2b155f58,0xcb821bf1,0x481a141b,0x4f81f5da,0x12e3c770 +.long 0x9fff8381,0xe49c5de5,0x5bbec894,0x11053232,0x454d88c4,0xa0d051cc,0x1f8e531b,0x4f6db89c,0xca563a44,0x34fe3fd6,0x58da8ab9,0x7f5c2215,0x9474f0a1,0x8445016d,0xcb7d8a0a,0x17d34d61 +.long 0x1c474019,0x8e9d3910,0xd52ceefb,0xcaff2629,0xc1622c2b,0xf9cf3e32,0xe9071a05,0xd4b95e3c,0x1594438c,0xfbbca61f,0x04aadedf,0x1eb6e6a6,0x68e14940,0x853027f4,0xdfabda9c,0x221d322a +.long 0xb7cb179a,0xed8ea9f6,0xb7934dcc,0xdc7b764d,0x5e09180d,0xfcb13940,0xb47dc2dd,0x6629a6bf,0x9f5a915e,0xbfc55e4e,0x6204441e,0xb1db9d37,0x930c5f53,0xf82d68cf,0xcbb605b1,0x17d3a142 +.long 0x308780f2,0xdd5944ea,0x3845f5e4,0xdc8de761,0x7624d7a3,0x6beaba7d,0x304df11e,0x1e709afd,0x02170456,0x95364376,0xc8f94b64,0xbf204b3a,0x5680ca68,0x4e53af7c,0xe0c67574,0x0526074a +.long 0xecd92af6,0x95d8cef8,0x6cd1745a,0xe6b9fa7a,0xa325c3e4,0x3d546d3d,0x9ae93aae,0x1f57691d,0x9d2e1a33,0xe891f3fe,0xac063d35,0xd430093f,0x5513a327,0xeda59b12,0x5536f18f,0xdc2134f3 +.long 0x5c210286,0xaa51fe2c,0x1cab658c,0x3f68aaee,0xf9357292,0x5a23a00b,0x7efdabed,0x9a626f39,0x199d78e3,0xfe2b3bf3,0x71bbc345,0xb7a2af77,0x1e59802c,0x3d19827a,0xb487a51c,0x823bbc15 +.long 0x99d0a422,0x856139f2,0xf456c6fb,0x9ac3df65,0x701f8bd6,0xaddf65c6,0x3758df87,0x149f321e,0x721b7eba,0xb1ecf714,0x31a3312a,0xe17df098,0xd5c4d581,0xdb2fd6ec,0x8fcea1b3,0xfd02996f +.long 0x7882f14f,0xe29fa63e,0x07c6cadc,0xc9f6dc35,0xb882bed0,0x46f22d6f,0xd118e52c,0x1a45755b,0x7c4608cf,0x9f2c7c27,0x568012c2,0x7ccbdf32,0x61729b0e,0xfcb0aedd,0xf7d75dbf,0x7ca2ca9e +.long 0x6f640f62,0xf58fecb1,0x39f51946,0xe274b92b,0x6288af44,0x7f4dfc04,0xeac329e5,0x0a91f32a,0xd6aaba31,0x43ad274b,0x0f6884f9,0x719a1640,0xdaf91e20,0x685d29f6,0x27e49d52,0x5ec1cc33 +.long 0x3b54a059,0x38f4de96,0xefbcfdb3,0x0e0015e5,0x4dbb8da6,0x177d23d9,0x97a617ad,0x98724aa2,0xfdb6558e,0x30f0885b,0xc7899a96,0xf9f7a28a,0x872dc112,0xd2ae8ac8,0x73c3c459,0xfa0642ca +.long 0xe7dfc8d6,0x15296981,0x1fb5b94a,0x67cd4450,0x0eddfd37,0x0ec71cf1,0x9a8eddc7,0xc7e5eeb3,0x81d95028,0x02ac8e3d,0x70b0e35d,0x0088f172,0xe1881fe3,0xec041fab,0xd99e7faa,0x62cf71b8 +.long 0xe0f222c2,0x5043dea7,0x72e65142,0x309d42ac,0x9216cd30,0x94fe9ddd,0x0f87feec,0xd6539c7d,0x432ac7d7,0x03c5a57c,0x327fda10,0x72692cf0,0x280698de,0xec28c85f,0x7ec283b1,0x2331fb46 +.long 0x2867e633,0xd34bfa32,0x0a9cc815,0x78709a82,0x875e2fa5,0xb7fe6964,0x9e98bfb5,0x25cc064f,0x493a65c5,0x9eb0151c,0x53182464,0x5fb5d941,0xf04618e2,0x69e6f130,0xf89c8ab6,0xa8ecec22 +.long 0xb96209bd,0xcd6ac88b,0xb3e1c9e0,0x65fa8cdb,0x4a8d8eac,0xa47d22f5,0x8d33f963,0x83895cdf,0xb56cd3d1,0xa8adca59,0xdaf38232,0x10c8350b,0xa5080a9f,0x2b161fb3,0x3af65b3a,0xbe7f5c64 +.long 0x97403a11,0x2c754039,0x121b96af,0x94626cf7,0x6a983ec2,0x431de7c4,0x52cc3df7,0x3780dd3a,0x2baf8e3b,0xe28a0e46,0x51d299ae,0xabe68aad,0x647a2408,0x603eb8f9,0x5c750981,0x14c61ed6 +.long 0xc53352e7,0x88b34414,0x1337d46e,0x5a34889c,0xf95f2bc8,0x612c1560,0xd4807a3a,0x8a3f8441,0x5224da68,0x680d9e97,0xc3eb00e9,0x60cd6e88,0x9a6bc375,0x3875a98e,0x4fd554c2,0xdc80f924 +.long 0x6ac77407,0x6c4b3415,0x25420681,0xa1e5ea8f,0x4607a458,0x541bfa14,0x96d7fbf9,0x5dbc7e7a,0x31590a47,0x646a851b,0x15ee6df8,0x039e85ba,0xd7b43fc0,0xd19fa231,0x299a0e04,0x84bc8be8 +.long 0xf20df03a,0x2b9d2936,0x8608d472,0x24054382,0x9149202a,0x76b6ba04,0x3670e7b7,0xb21c3831,0xd6fdee10,0xddd93059,0x78488e71,0x9da47ad3,0xa0fcfb25,0x99cc1dfd,0x64696954,0x42abde10 +.long 0x17eab9fe,0x14cc15fc,0xd3e70972,0xd6e863e4,0x6432112c,0x29a7765c,0x5b0774d8,0x88660001,0x2c088eae,0x3729175a,0x8230b8d4,0x13afbcae,0x915f4379,0x44768151,0xd8d22812,0xf086431a +.long 0xc298b974,0x37461955,0xf8711e04,0x905fb5f0,0xfe969d18,0x787abf3a,0x6f6a494e,0x392167c2,0x28c511da,0xfc7a0d2d,0xb66a262d,0xf127c7dc,0xfd63fdf0,0xf9c4bb95,0x3913ef46,0x90016589 +.long 0x11aa600d,0x74d2a73c,0x9fb5ab52,0x2f5379bd,0x7fb70068,0xe49e53a4,0x404aa9a7,0x68dd39e5,0x2ecaa9c3,0xb9b0cf57,0xe824826b,0xba0e103b,0x4631a3c4,0x60c2198b,0xfa8966a2,0xc5ff84ab +.long 0xac95aff8,0x2d6ebe22,0xb5a46d09,0x1c9bb6db,0x53ee4f8d,0x419062da,0xbb97efef,0x7b9042d0,0x830cf6bd,0x0f87f080,0x6ec8a6c6,0x4861d19a,0x202f01aa,0xd3a0daa1,0xf25afbd5,0xb0111674 +.long 0x1afb20d9,0x6d00d6cf,0x40671bc5,0x13695000,0x2485ea9b,0x913ab0dc,0x9eef61ac,0x1f2bed06,0x6d799e20,0x850c8217,0x3271c2de,0x93415f37,0x6c4f5910,0x5afb06e9,0xc4e9e421,0x688a52df +.long 0xe2a9a6db,0x30495ba3,0x58f9268b,0x4601303d,0x7eb0f04f,0xbe3b0dad,0x4456936d,0x4ea47250,0xd33fd3e7,0x8caf8798,0xeb433708,0x1ccd8a89,0x87fd50ad,0x9effe3e8,0x6b29c4df,0xbe240a56 +.long 0xca0e7ebd,0xec4ffd98,0xe748616e,0xf586783a,0xc77baa99,0xa5b00d8f,0xb4f34c9c,0x0acada29,0x0fe723ac,0x36dad67d,0x39c36c1e,0x1d8e53a5,0x1f4bea41,0xe4dd342d,0xebc9e4e0,0x64fd5e35 +.long 0x57908805,0x96f01f90,0x5ed480dd,0xb5b9ea3d,0x3efd2dd0,0x366c5dc2,0x6e9dfa27,0xed2fe305,0x6e9197e2,0x4575e892,0xab502a5d,0x11719c09,0xe81f213f,0x264c7bec,0x55f5c457,0x741b9241 +.long 0x49a5f4f4,0x78ac7b68,0x9fc45b7d,0xf91d70a2,0xb0f5f355,0x39b05544,0xeef930d9,0x11f06bce,0x038d05e1,0xdb84d25d,0xbacc1d51,0x04838ee5,0x9e8ee00b,0x9da3ce86,0xc36eda1f,0xc3412057 +.long 0x64d9c2f4,0xae80b913,0xa010a8ff,0x7468bac3,0x37359d41,0xdfd20037,0x15efeacc,0x1a0f5ab8,0x659d0ce0,0x7c25ad2f,0x6785cff1,0x4011bcbb,0x7e2192c7,0x128b9912,0x13ccb0e8,0xa549d8e1 +.long 0xc85438b1,0x805588d8,0xbc25cb27,0x5680332d,0x1a4bfdf4,0xdcd1bc96,0x706f6566,0x779ff428,0xf059987a,0x8bbee998,0xcc686de7,0xf6ce8cf2,0x953cfdb2,0xf8ad3c4a,0x2205da36,0xd1d426d9 +.long 0xc781a241,0xb3c0f13f,0xd75362a8,0x3e89360e,0xc8a91184,0xccd05863,0xefa8a7f4,0x9bd0c9b7,0x8a912a4b,0x97ee4d53,0xbcf518fd,0xde5e15f8,0xc467e1e0,0x6a055bf8,0x1587e256,0x10be4b4b +.long 0x668621c9,0xd90c14f2,0xab9c92c1,0xd5518f51,0xd6d47b3c,0x8e6a0100,0x66716175,0xcbe980dd,0xddd83683,0x500d3f10,0x99cac73c,0x3b6cb35d,0x6083d550,0x53730c8b,0xdf0a1987,0xcf159767 +.long 0x43ad73b3,0x84bfcf53,0x4f035a94,0x1b528c20,0x33eeac69,0x4294edf7,0x817f3240,0xb6283e83,0x0a5f25b1,0xc3fdc959,0x5844ee22,0xefaf8aa5,0xdbdde4de,0xde269ba5,0xc56133bf,0xe3347160 +.long 0x8d9ea9f8,0xc1184219,0xf3fc1ab5,0x090de5db,0x0bf22cda,0x404c37b1,0xf5618894,0x7de20ec8,0xecdaecab,0x754c588e,0x88342743,0x6ca4b0ed,0xf4a938ec,0x76f08bdd,0x91493ccb,0xd182de89 +.long 0xc8a4186a,0xd652c53e,0x946d8e33,0xb3e878db,0x5f37663c,0x088453c0,0xb407748b,0x5cd9daaa,0x586d5e72,0xa1f5197f,0xc443ca59,0x47500be8,0xe2652424,0x78ef35b2,0x6dd7767d,0x09c5d26f +.long 0xa74d3f7b,0x7175a79a,0xcf5ea459,0x0428fd8d,0xa5d1746d,0x511cb97c,0xe71d1278,0x36363939,0x10350bf4,0xcf2df955,0x60aae782,0xb3817439,0x3e688809,0xa748c0e4,0xd7a5a006,0x98021fbf +.long 0x0e367a98,0x9076a70c,0x0f62b7c2,0xbea1bc15,0x30fe0343,0x2645a68c,0x699dc14f,0xacaffa78,0x457bf9c4,0xf4469964,0x0d2ead83,0x0db6407b,0xb2c6f3eb,0x68d56cad,0xf376356c,0x3b512e73 +.long 0xfce10408,0xe43b0e1f,0x5a5e257d,0x89ddc003,0x0362e5b3,0xb0ae0d12,0xb0519161,0x07f983c7,0x5d5231e7,0xc2e94d15,0x0b4f9513,0xcff22aed,0x6ad0b0b5,0xb02588dd,0x11d0dcd5,0xb967d1ac +.long 0xcf777b6c,0x8dac6bc6,0x4c6d1959,0x0062bdbd,0x0ef5cc85,0x53da71b5,0x4006f14f,0x07012c7d,0xac47800d,0x4617f962,0xc102ed75,0x53365f2b,0x4ab8c9d3,0xb422efcb,0x34af31c9,0x195cb26b +.long 0x05f2c4ce,0x3a926e29,0x9856966c,0xbd2bdecb,0x85527015,0x5d16ab3a,0x4486c231,0x9f81609e,0xda350002,0xd8b96b2c,0xfa1b7d36,0xbd054690,0xe71d79bc,0xdc90ebf5,0x08964e4e,0xf241b6f9 +.long 0x2fe3cd4c,0x7c838643,0xb4bc633c,0xe0f33acb,0x3d139f1f,0xb4a9ecec,0xdc4a1f49,0x05ce69cd,0xf5f98aaf,0xa19d1b16,0x6f23e0ef,0x45bb71d6,0x46cdfdd3,0x33789fcd,0xcee040ca,0x9b8e2978 +.long 0xae0a6828,0x9c69b246,0x7078d5aa,0xba533d24,0x7bb4fbdb,0x7a2e42c0,0x7035385c,0xcfb4879a,0x3281705b,0x8c3dd30b,0x404fe081,0x7e361c6c,0x3f604edf,0x7b21649c,0xe52ffe47,0x5dbf6a3f +.long 0x4b54d9bf,0xc41b7c23,0x3511c3d9,0x1374e681,0xc1b2b758,0x1863bf16,0x1e9e6a96,0x90e78507,0x5d86f174,0xab4bf98d,0x85e96fe4,0xd74e0bd3,0xcac5d344,0x8afde39f,0xbd91b847,0x90946dbc +.long 0xfe1a838c,0xf5b42358,0x620ac9d8,0x05aae6c5,0xa1ce5a0b,0x8e193bd8,0x4dabfd72,0x8f710571,0x182caaac,0x8d8fdd48,0x040745cf,0x8c4aeefa,0xf3b93e6d,0x73c6c30a,0x16f42011,0x991241f3 +.long 0xe457a477,0xa0158eea,0xee6ddc05,0xd19857db,0x18c41671,0xb3265224,0x3c2c0d58,0x3ffdfc7e,0x26ee7cda,0x3a3a5254,0xdf02c3a8,0x341b0869,0x723bbfc8,0xa023bf42,0x14452691,0x3d15002a +.long 0x85edfa30,0x5ef7324c,0x87d4f3da,0x25976554,0xdcb50c86,0x352f5bc0,0x4832a96c,0x8f6927b0,0x55f2f94c,0xd08ee1ba,0x344b45fa,0x6a996f99,0xa8aa455d,0xe133cb8d,0x758dc1f7,0x5d0721ec +.long 0x79e5fb67,0x6ba7a920,0x70aa725e,0xe1331feb,0x7df5d837,0x5080ccf5,0x7ff72e21,0xe4cae01d,0x0412a77d,0xd9243ee6,0xdf449025,0x06ff7cac,0x23ef5a31,0xbe75f7cd,0x0ddef7a8,0xbc957822 +.long 0xb0ce1c55,0x8cf7230c,0x0bbfb607,0x5b534d05,0x0e16363b,0xee1ef113,0xb4999e82,0x27e0aa7a,0x79362c41,0xce1dac2d,0x91bb6cb0,0x67920c90,0x2223df24,0x1e648d63,0xe32e8f28,0x0f7d9eef +.long 0xfa833834,0x6943f39a,0xa6328562,0x22951722,0x4170fc10,0x81d63dd5,0xaecc2e6d,0x9f5fa58f,0xe77d9a3b,0xb66c8725,0x6384ebe0,0x11235cea,0x5845e24a,0x06a8c118,0xebd093b1,0x0137b286 +.long 0x44ace150,0xc589e1ce,0x4381e97c,0xe0f8d3d9,0x62c5a4b8,0x59e99b11,0xfd0ec9f9,0x90d262f7,0x283e13c9,0xfbc854c9,0xaedc7085,0x2d04fde7,0x47dcbecb,0x057d7765,0x9a76fa5f,0x8dbdf591 +.long 0x0de1e578,0xd0150695,0xe9f72bc6,0x2e1463e7,0x1b39eca5,0xffa68441,0x7c037f2f,0x673c8530,0x747f91da,0xd0d6a600,0xc9cb78e9,0xb08d43e1,0x27b5cef5,0x0fc0c644,0xa60a2fd6,0x5c1d160a +.long 0x28c8e13b,0xf98cae53,0xb2eddcd1,0x375f10c4,0x5cce06ad,0xd4eb8b7f,0x80a2e1ef,0xb4669f45,0x5bbd8699,0xd593f9d0,0xe7976d13,0x5528a4c9,0x1c7e28d3,0x3923e095,0x3f6bb577,0xb9293790 +.long 0xc42bd6d2,0xdb567d6a,0xbb1f96ae,0x6df86468,0x4843b28e,0x0efe5b1a,0x6379b240,0x961bbb05,0x70a6a26b,0xb6caf5f0,0x328e6e39,0x70686c0d,0x895fc8d3,0x80da06cf,0xb363fdc9,0x804d8810 +.long 0x207f1670,0xbe22877b,0x4e615291,0x9b0dd188,0x97a3c2bf,0x625ae8dc,0x439b86e8,0x08584ef7,0xdcd898ff,0xde7190a5,0x2058ee3d,0x26286c40,0x5f87b1c1,0x3db0b217,0x102a6db5,0xcc334771 +.long 0x2f770fb1,0xd99de954,0x4cd7535e,0x97c1c620,0x3f09cefc,0xd3b6c448,0x5a63b4f8,0xd725af15,0xc01e20ec,0x0c95d24f,0x9ae7121f,0xdfd37494,0xec77b7ec,0x7d6ddb72,0x0353a4ae,0xfe079d3b +.long 0x2e6ac8d2,0x3066e70a,0x106e5c05,0x9c6b5a43,0xede59b8c,0x52d3c6f5,0xfccec9ae,0x30d6a5c3,0x4fc0a9ef,0xedec7c22,0x95c16ced,0x190ff083,0x94de0fde,0xbe12ec8f,0x852d3433,0x0d131ab8 +.long 0x85701291,0x42ace07e,0x194061a8,0x94793ed9,0xd7f4a485,0x30e83ed6,0xf9eeff4d,0x9eec7269,0x0c9d8005,0x90acba59,0x1e79b9d1,0x5feca458,0x1d506a1e,0x8fbe5427,0x2439cfa7,0xa32b2c8e +.long 0x73dd0b4e,0x1671c173,0x44a054c6,0x37a28214,0x4e8b53f1,0x81760a1b,0xf9f93b9e,0xa6c04224,0xcf671e3c,0x18784b34,0xcda9b994,0x81bbecd2,0xb2ab3848,0x38831979,0xf2e03c2d,0xef54feb7 +.long 0xfb8088fa,0xcf197ca7,0x4ddc96c5,0x01427247,0x30777176,0xa2d2550a,0x4d0cf71d,0x53469898,0x3a2aaac6,0x6ce937b8,0x5af38d9b,0xe9f91dc3,0xc8bf2899,0x2598ad83,0xb5536c16,0x8e706ac9 +.long 0xf688dc98,0x40dc7495,0x124c4afc,0x26490cd7,0x1f18775c,0xe651ec84,0xb4fdaf4a,0x393ea6c3,0x7f338e0d,0x1e1f3343,0x6053e7b5,0x39fb832b,0x619e14d5,0x46e702da,0xcdeef6e0,0x859cacd1 +.long 0x4462007d,0x63b99ce7,0x4cb5f5b7,0xb8ab48a5,0xf55edde7,0x9ec673d2,0x8cfaefda,0xd1567f74,0x0887bcec,0x46381b6b,0xe178f3c2,0x694497ce,0x1e6266cb,0x5e6525e3,0x697d6413,0x5931de26 +.long 0x0e58d493,0x87f8df7c,0x58b73f12,0xb1ae5ed0,0xdea0c34d,0xc368f784,0x859a91a0,0x9bd0a120,0xcc863c68,0xb00d88b7,0x3d1f4d65,0x3a1cc11e,0x0aa85593,0xea38e0e7,0x7dc4aee8,0x37f13e98 +.long 0xbc947bad,0x10d38667,0x2a36ee2e,0x738e07ce,0xc577fcac,0xc93470cd,0x2782470d,0xdee1b616,0x2e793d12,0x36a25e67,0xe0f186da,0xd6aa6cae,0x80e07af7,0x474d0fd9,0xba8a5cd4,0xf7cdc47d +.long 0xab15247f,0x28af6d9d,0x493a537f,0x7c789c10,0x23a334e7,0x7ac9b110,0x12c9c277,0x0236ac09,0x1d7a5144,0xa7e5bd25,0xf13ec4ec,0x098b9c2a,0xd3f0abca,0x3639daca,0xa23960f9,0x642da81a +.long 0x4f7269b1,0x7d2e5c05,0xe287c385,0xfcf30777,0xf2a46f21,0x10edc84f,0x4f43fa36,0x35441757,0xfd703431,0xf1327899,0x16dd587a,0xa438d7a6,0xe9c8352d,0x65c34c57,0x5cc5a24e,0xa728edab +.long 0x42531689,0xaed78abc,0x010963ef,0x0a51a0e8,0xd717d9b3,0x5776fa0a,0x7dd3428b,0xf356c239,0x8d3a3dac,0x29903fff,0x3d94491f,0x409597fa,0xbf4a56a4,0x4cd7a5ff,0x8adab462,0xe5096474 +.long 0x5c3427b0,0xa97b5126,0xd282c9bd,0x6401405c,0x222c5c45,0x3629f8d7,0xe8d50aed,0xb1c02c16,0xd9635bc9,0xbea2ed75,0x6e24552f,0x226790c7,0x65f1d066,0x3c33f2a3,0x6dfccc2e,0x2a43463e +.long 0xdb483761,0x8cc3453a,0x65d5672b,0xe7cc6085,0xde3efc87,0x277ed6cb,0x69234eaf,0x19f2f368,0x5c0b800b,0x9aaf4317,0x8b6da6e2,0x1f1e7c89,0xb94ec75e,0x6cfb4715,0x453118c2,0xd590dd5f +.long 0x1f17a34c,0x14e49da1,0x235a1456,0x5420ab39,0x2f50363b,0xb7637241,0xc3fabb6e,0x7b15d623,0xe274e49c,0xa0ef40b1,0x96b1860a,0x5cf50744,0x66afe5a4,0xd6583fbf,0xf47e3e9a,0x44240510 +.long 0x11b2d595,0x99254343,0xeec8df57,0xf1367499,0x3e73dd05,0x3cb12c61,0x7dac102a,0xd248c033,0xa77739f5,0xcf154f13,0x23d2af42,0xbf4288cb,0x32e4a1cf,0xaa64c9b6,0xc8a208f3,0xee8c07a8 +.long 0x6fe8393f,0xe10d4999,0xe91f3a32,0x0f809a3f,0x802f63c8,0x61096d1c,0x57750d3d,0x289e1462,0x9889feea,0xed06167e,0xe0993909,0xd5c9c0e2,0x56508ac6,0x46fca0d8,0x4f1b8e83,0x91826047 +.long 0x9a4a2751,0x4f2c877a,0xcae6fead,0x71bd0072,0x06aa1941,0x38df8dcc,0x63beeaa8,0x5a074b4c,0xc1cec8ed,0xd6d65934,0xaabc03bd,0xa6ecb49e,0xde8a8415,0xaade91c2,0x691136e0,0xcfb0efdf +.long 0x23ab3495,0x11af45ee,0x0b77463d,0xa132df88,0x815d06f4,0x8923c15c,0x0d61a436,0xc3ceb3f5,0xe88fb1da,0xaf52291d,0x1da12179,0xea057974,0xd2fef720,0xb0d7218c,0x8e1d8845,0x6c0899c9 +.long 0x752ddad7,0x98157504,0xa1a68a97,0xd60bd74f,0xf658fb99,0x7047a3a9,0x5f8511e4,0x1f5d86d6,0x4b5a6d88,0xb8a4bc42,0x1abefa7d,0x69eb2c33,0x13c9c510,0x95bf39e8,0xd48aab43,0xf571960a +.long 0x704e23c6,0x7e8cfbcf,0x28aaa65b,0xc71b7d22,0x245e3c83,0xa041b2bd,0xd21854ff,0x69b98834,0x963bfeec,0x89d227a3,0xde7da7cb,0x99947aaa,0xee68a9b1,0x1d9ee9db,0x698ec368,0x0a08f003 +.long 0x78ef2487,0xe9ea4094,0x02cfec26,0xc8d2d415,0xb7dcf328,0xc52f9a6e,0x85b6a937,0x0ed489e3,0xbef3366e,0x9b94986b,0xedddddb8,0x0de59c70,0xeadddbe2,0xffdb748c,0x8266ea40,0x9b9784bb +.long 0x1a93507a,0x142b5502,0x8d3c06cf,0xb4cd1187,0x91ec3f40,0xdf70e76a,0x4e7553c2,0x484e81ad,0x272e9d6e,0x830f87b5,0xc6ff514a,0xea1c93e5,0xc4192a8e,0x67cc2adc,0x42f4535a,0xc77e27e2 +.long 0xd2b713c5,0x9cdbab36,0xcf7b0cd3,0x86274ea0,0x09af826b,0x784680f3,0x0c72dea3,0xbfcc837a,0xd6529b73,0xa8bdfe9d,0x63a88002,0x708aa228,0xc91d45b9,0x6c7a9a54,0xfd004f56,0xdf1a38bb +.long 0xb8bad853,0x2e8c9a26,0x3723eae7,0x2d52cea3,0x56ca2830,0x054d6d81,0x9a8dc411,0xa3317d14,0xfd4ddeda,0xa08662fe,0xb55d792b,0xed2a153a,0xbfc6e944,0x7035c16a,0x00171cf3,0xb6bc5834 +.long 0x83d102b6,0xe27152b3,0x0646b848,0xfe695a47,0x916e6d37,0xa5bb09d8,0x0d17015e,0xb4269d64,0x0a1d2285,0x8d8156a1,0x46d26d72,0xfeef6c51,0x4c5434a7,0x9dac57c8,0x59d39e31,0x0282e5be +.long 0x721c486d,0xedfff181,0xbc58824e,0x301baf10,0x00570031,0x8136a6aa,0x1cddde68,0x55aaf78c,0x59c63952,0x26829371,0x8bc25baf,0x3a3bd274,0xb7e52dc3,0xecdf8657,0xfd78e6c8,0x2dd8c087 +.long 0xf5531461,0x20553274,0x5d95499b,0x8b4a1281,0x1a80f9d2,0xe2c8763a,0x4ddec758,0xd1dbe32b,0x30c34169,0xaf12210d,0x78baa533,0xba74a953,0xa438f254,0x3d133c6e,0x201bef5b,0xa431531a +.long 0xf669d7ec,0x15295e22,0x357fb515,0xca374f64,0xeaa3fdb3,0x8a8406ff,0xdf3f2da8,0x106ae448,0x33c8e9a1,0x8f9b0a90,0x71ad5885,0x234645e2,0x1c0aed14,0x3d083224,0x7a942d46,0xf10a7d3e +.long 0x40d5c9be,0x7c11deee,0xba84ed98,0xb2bae7ff,0xaad58ddd,0x93e97139,0x3f6d1fa3,0x3d872796,0x8569ff13,0x483aca81,0x9a600f72,0x8b89a5fb,0xc06f2b86,0x4cbc27c3,0x63ad9c0b,0x22130713 +.long 0x48ac2840,0xb5358b1e,0xecba9477,0x18311294,0xa6946b43,0xda58f990,0x9ab41819,0x3098baf9,0x4198da52,0x66c4c158,0x146bfd1b,0xab4fc17c,0xbf36a908,0x2f0a4c3c,0x58cf7838,0x2ae9e34b +.long 0x3fa11b1f,0xf411529e,0x974af2b4,0x21e43677,0xc230793b,0x7c20958e,0x16e840f3,0x710ea885,0xc5dc67cf,0xfc0b21fc,0x88405718,0x08d51647,0xcfe49eb7,0xd955c21f,0x56dd4a1f,0x9722a5d5 +.long 0xc861baa5,0xc9ef50e2,0x9505ac3e,0xc0c21a5d,0x8b7c063f,0xaf6b9a33,0x2f4779c1,0xc6370339,0x638167c3,0x22df99c7,0x795db30c,0xfe6ffe76,0xa4854989,0x2b822d33,0x30563aa5,0xfef031dd +.long 0xd57c667f,0x16b09f82,0xcc0b76f1,0xc70312ce,0xc9118aec,0xbf04a9e6,0x3409d133,0x82fcb419,0xab45d44d,0x1a8ab385,0x617b83a3,0xfba07222,0x58e81b52,0xb05f50dd,0x21ce5aff,0x1d8db553 +.long 0xe344a873,0x3097b8d4,0xfe36d53e,0x7d8d116d,0x7875e750,0x6db22f58,0x43e144ea,0x2dc5e373,0xe799eb95,0xc05f32e6,0x6899e6ec,0xe9e5f4df,0x1fab23d5,0xbdc3bd68,0x73af60e6,0xb72b8ab7 +.long 0x2cecc84a,0x8db27ae0,0x7bdb871c,0x600016d8,0xd7c46f58,0x42a44b13,0xc3a77d39,0xb8919727,0xdafd6088,0xcfc6bbbd,0x6bd20d39,0x1a740146,0x98c41072,0x8c747abd,0xbdf68ea1,0x4c91e765 +.long 0x08819a78,0x7c95e5ca,0xc9587921,0xcf48b729,0xdebbcc7d,0x091c7c5f,0xf0e05149,0x6f287404,0x26cd44ec,0xf83b5ac2,0xcfea250e,0x88ae32a6,0x1d06ebc5,0x6ac5047a,0xd434f781,0xc7e550b4 +.long 0x5c727bd2,0x61ab1cf2,0x1cf915b0,0x2e4badb1,0xf69d3920,0x1b4dadec,0xf14c1dfe,0xe61b1ca6,0xbd6bd51f,0x90b479cc,0x8045ec30,0x8024e401,0x25ef0e62,0xcab29ca3,0x49e4ebc0,0x4f2e9416 +.long 0x0ccced58,0x45eb40ec,0x0da44f98,0x25cd4b9c,0x871812c6,0x43e06458,0x16cef651,0x99f80d55,0xce6dc153,0x571340c9,0xd8665521,0x138d5117,0x4e07014d,0xacdb45bc,0x84b60b91,0x2f34bb38 +.long 0x2ae8921e,0xf44a4fd2,0x892ba1e2,0xb039288e,0xb1c180b2,0x9da50174,0x1693dc87,0x6b70ab66,0xe7057481,0x7e9babc9,0x9c80dc41,0x4581ddef,0x51294682,0x0c890da9,0x3f4736e5,0x0b5629d3 +.long 0xb06f5b41,0x2340c79e,0x4e243469,0xa42e84ce,0x045a71a9,0xf9a20135,0xd27b6fb6,0xefbfb415,0x9d33cd6f,0x25ebea23,0xaa6c0af8,0x9caedb88,0xd9ce6f96,0x53dc7e9a,0x51e0b15a,0x3897f9fd +.long 0x8e5d788e,0xf51cb1f8,0xe1d490ee,0x1aec7ba8,0xcc58cb3c,0x265991e0,0x9fc3ad31,0x9f306e8c,0x5040a0ac,0x5fed006e,0xfb476f2e,0xca9d5043,0xbeea7a23,0xa19c06e8,0x0edabb63,0xd2865801 +.long 0x6967469a,0xdb92293f,0x8d8a8ed8,0x2894d839,0xbbc77122,0x87c9e406,0x2ea3a26a,0x8671c6f1,0xd7de9853,0xe42df8d6,0xb1f2bcc7,0x2e3ce346,0x899d50cf,0xda601dfc,0xfb1b598f,0xbfc913de +.long 0xe61f7908,0x81c4909f,0x9bbc7b29,0x192e304f,0xc104b338,0xc3ed8738,0x783f5d61,0xedbe9e47,0x2db30660,0x0c06e9be,0xc0eb7d8e,0xda3e613f,0x322e096e,0xd8fa3e97,0xd336e247,0xfebd91e8 +.long 0xdf655a49,0x8f13ccc4,0x5eb20210,0xa9e00dfc,0xc656b6ea,0x84631d0f,0xd8c0d947,0x93a058cd,0x67bd3448,0x6846904a,0xf394fd5c,0x4a3d4e1a,0xdb225f52,0xc102c1a5,0xfc4f5e9a,0xe3455bba +.long 0x4b9ad1ce,0x6b36985b,0x5bb7f793,0xa9818536,0x48b1a416,0x6c25e1d0,0x3c81bee7,0x1381dd53,0x7a4a7620,0xd2a30d61,0x39b8944c,0xc8412926,0x7a97c33a,0x3c1c6fbe,0x938664e7,0x941e541d +.long 0x4a34f239,0x417499e8,0xb90402d5,0x15fdb83c,0x433aa832,0xb75f46bf,0x63215db1,0xb61e15af,0xa127f89a,0xaabe59d4,0x07e816da,0x5d541e0c,0xa618b692,0xaaba0659,0x17266026,0x55327733 +.long 0x95f57552,0xaf53a0fc,0x6cacb0c9,0x32947650,0xc821be01,0x253ff58d,0xa06f1146,0xb0309531,0x05c2e54d,0x59bbbdf5,0x26e8dd22,0x158f27ad,0x397e1e53,0xcc5b7ffb,0x7fc1e50d,0xae03f65b +.long 0x9c95f0f9,0xa9784ebd,0x24640771,0x5ed9deb2,0x035561c4,0x31244af7,0x7ee857de,0x87332f3a,0x2b9e0d88,0x09e16e9e,0x56a06049,0x52d910f4,0xa9592f48,0x507ed477,0x2365d678,0x85cb917b +.long 0x4c8998d1,0xf8511c93,0x730ea58f,0x2186a3f1,0xb2029db0,0x50189626,0x02ceb75a,0x9137a6d9,0x748bc82c,0x2fe17f37,0x80469f8c,0x87c2e931,0xbf891aa2,0x850f71cd,0x75ec3d8d,0x0ca1b89b +.long 0x5e1cd3cd,0x516c43aa,0x9a887c28,0x89397808,0xddea1f9f,0x0059c699,0x8e6868f7,0x7737d6fa,0x60f1524b,0x6d93746a,0xba052aa7,0x36985e55,0xed923ea5,0x41b1d322,0x25852a11,0x3429759f +.long 0x092e9f41,0xbeca6ec3,0x62256bbd,0x3a238c66,0x70ad487d,0xd82958ea,0x65610d93,0x4ac8aaf9,0x5e4ccab0,0x3fa101b1,0x9de14bfb,0x9bf430f2,0x6531899d,0xa10f5cc6,0xea8ce17d,0x590005fb +.long 0x24544cb6,0xc437912f,0xd79ac2e3,0x9987b71a,0xc058a212,0x13e3d9dd,0xd2de9606,0x00075aac,0x6cac8369,0x80ab508b,0xf54f6c89,0x87842be7,0x6bc532a4,0xa7ad663d,0x78a91bc8,0x67813de7 +.long 0xc3427239,0x5dcb61ce,0xc56934d9,0x5f3c7cf0,0xe3191591,0xc079e0fb,0xb01aada7,0xe40896bd,0x0492d25f,0x8d466791,0xe7408276,0x8aeb30c9,0x9287aacc,0xe9437495,0x79fe03d4,0x23d4708d +.long 0xd0c05199,0x8cda9cf2,0xfae78454,0x502fbc22,0xf572a182,0xc0bda9df,0x6158b372,0x5f9b71b8,0x2b82dd07,0xe0f33a59,0x9523032e,0x76302735,0xc4505a32,0x7fe1a721,0xf796409f,0x7b6e3e82 +.long 0x35d0b34a,0xe3417bc0,0x8327c0a7,0x440b386b,0xac0362d1,0x8fb7262d,0xe0cdf943,0x2c41114c,0xad95a0b1,0x2ba5cef1,0x67d54362,0xc09b37a8,0x01e486c9,0x26d6cdd2,0x42ff9297,0x20477abf +.long 0x292a9287,0xa004dcb3,0x77b092c7,0xddc15cf6,0x806c0605,0x083a8464,0x3db997b0,0x4a68df70,0x05bf7dd0,0x9c134e45,0x8ccf7f8c,0xa4e63d39,0x41b5f8af,0xa6e6517f,0xad7bc1cc,0xaa8b9342 +.long 0x1e706ad9,0x126f35b5,0xc3a9ebdf,0xb99cebb4,0xbf608d90,0xa75389af,0xc6c89858,0x76113c4f,0x97e2b5aa,0x80de8eb0,0x63b91304,0x7e1022cc,0x6ccc066c,0x3bdab605,0xb2edf900,0x33cbb144 +.long 0x7af715d2,0xc4176471,0xd0134a96,0xe2f7f594,0xa41ec956,0x2c1873ef,0x77821304,0xe4e7b4f6,0x88d5374a,0xe5c8ff97,0x80823d5b,0x2b915e63,0xb2ee8fe2,0xea6bc755,0xe7112651,0x6657624c +.long 0xdace5aca,0x157af101,0x11a6a267,0xc4fdbcf2,0xc49c8609,0xdaddf340,0xe9604a65,0x97e49f52,0x937e2ad5,0x9be8e790,0x326e17f1,0x846e2508,0x0bbbc0dc,0x3f38007a,0xb11e16d6,0xcf03603f +.long 0x7442f1d5,0xd6f800e0,0x66e0e3ab,0x475607d1,0xb7c64047,0x82807f16,0xa749883d,0x8858e1e3,0x8231ee10,0x5859120b,0x638a1ece,0x1b80e7eb,0xc6aa73a4,0xcb72525a,0x844423ac,0xa7cdea3d +.long 0xf8ae7c38,0x5ed0c007,0x3d740192,0x6db07a5c,0x5fe36db3,0xbe5e9c2a,0x76e95046,0xd5b9d57a,0x8eba20f2,0x54ac32e7,0x71b9a352,0xef11ca8f,0xff98a658,0x305e373e,0x823eb667,0xffe5a100 +.long 0xe51732d2,0x57477b11,0x2538fc0e,0xdfd6eb28,0x3b39eec5,0x5c43b0cc,0xcb36cc57,0x6af12778,0x06c425ae,0x70b0852d,0x5c221b9b,0x6df92f8c,0xce826d9c,0x6c8d4f9e,0xb49359c3,0xf59aba7b +.long 0xda64309d,0x5c8ed8d5,0x91b30704,0x61a6de56,0x2f9b5808,0xd6b52f6a,0x98c958a7,0x0eee4194,0x771e4caa,0xcddd9aab,0x78bc21be,0x83965dfd,0xb3b504f5,0x02affce3,0x561c8291,0x30847a21 +.long 0x52bfda05,0xd2eb2cf1,0x6197b98c,0xe0e4c4e9,0xf8a1726f,0x1d35076c,0x2db11e3d,0x6c06085b,0x4463ba14,0x15c0c4d7,0x0030238c,0x9d292f83,0x3727536d,0x1311ee8b,0xbeaedc1e,0xfeea86ef +.long 0x66131e2e,0xb9d18cd3,0x80fe2682,0xf31d974f,0xe4160289,0xb6e49e0f,0x08e92799,0x7c48ec0b,0xd1989aa7,0x818111d8,0xebf926f9,0xb34fa0aa,0xa245474a,0xdb5fe2f5,0x3c7ca756,0xf80a6ebb +.long 0xafa05dd8,0xa7f96054,0xfcaf119e,0x26dfcf21,0x0564bb59,0xe20ef2e3,0x61cb02b8,0xef4dca50,0x65d30672,0xcda7838a,0xfd657e86,0x8b08d534,0x46d595c8,0x4c5b4395,0x425cb836,0x39b58725 +.long 0x3de9abe3,0x8ea61059,0x9cdc03be,0x40434881,0xcfedce8c,0x9b261245,0xcf5234a1,0x78c318b4,0xfde24c99,0x510bcf16,0xa2c2ff5d,0x2a77cb75,0x27960fb4,0x9c895c2b,0xb0eda42b,0xd30ce975 +.long 0x1a62cc26,0xfda85393,0x50c0e052,0x23c69b96,0xbfc633f3,0xa227df15,0x1bae7d48,0x2ac78848,0x187d073d,0x487878f9,0x967f807d,0x6c2be919,0x336e6d8f,0x765861d8,0xce528a43,0x88b8974c +.long 0xff57d051,0x09521177,0xfb6a1961,0x2ff38037,0xa3d76ad4,0xfc0aba74,0x25a7ec17,0x7c764803,0x48879bc8,0x7532d75f,0x58ce6bc1,0xea7eacc0,0x8e896c16,0xc82176b4,0x2c750fed,0x9a30e0b2 +.long 0x421d3aa4,0xc37e2c2e,0xe84fa840,0xf926407c,0x1454e41c,0x18abc03d,0x3f7af644,0x26605ecd,0xd6a5eabf,0x242341a6,0x216b668e,0x1edb84f4,0x04010102,0xd836edb8,0x945e1d8c,0x5b337ce7 +.long 0xc055dc14,0xd2075c77,0x81d89cdf,0x2a0ffa25,0x6ffdcbaf,0x8ce815ea,0xfb648867,0xa3428878,0x884655fb,0x277699cf,0x364d3e41,0xfa5b5bd6,0x441e1cb7,0x01f680c6,0xb70a7d67,0x3fd61e66 +.long 0xcc78cf66,0x666ba2dc,0x6fdbff77,0xb3018174,0x168d4668,0x8d4dd0db,0x1dab3a2a,0x259455d0,0xcde3acec,0xf58564c5,0x13adb276,0x77141925,0x8a303f65,0x527d725d,0xe6f38f7b,0x55deb6c9 +.long 0xb1fa70fb,0xfd5bb657,0xd8073a00,0xfa07f50f,0xbca02500,0xf72e3aa7,0x9975740d,0xf68f895d,0x5cae2a6a,0x30112060,0x02874842,0x01bd7218,0x7ce47bd3,0x3d423891,0x789544f6,0xa66663c1 +.long 0x3272d838,0x864d05d7,0xfa6295c5,0xe22924f9,0x6c2fda32,0x8189593f,0xb184b544,0x330d7189,0xbde1f714,0x79efa62c,0xe5cb1a63,0x35771c94,0x641c8332,0x2f4826b8,0xc8cee854,0x00a894fb +.long 0x36194d40,0xb4b9a39b,0x77612601,0xe857a7c5,0x4ecf2f58,0xf4209dd2,0x5a033487,0x82b9e66d,0xe4e8b9dd,0xc1e36934,0xa42377d7,0xd2372c9d,0x0e3ae43b,0x51dc94c7,0x04474f6f,0x4c57761e +.long 0x1058a318,0xdcdacd0a,0x78053a9a,0x369cf3f5,0x31c68de2,0xc6c3de50,0x3c4b6d9f,0x4653a576,0xaa4e5c97,0x1688dd5a,0xb7ab3c74,0x5be80aa1,0xbc65c283,0x70cefe7c,0x06867091,0x57f95f13 +.long 0x4415503b,0xa39114e2,0x4cbb17e9,0xc08ff7c6,0xd7dec966,0x1eff674d,0x53376f63,0x6d4690af,0xea74237b,0xff6fe32e,0xcd57508e,0xc436d17e,0xedcc40fe,0x15aa28e1,0x581bbb44,0x0d769c04 +.long 0x34eaacda,0xc240b6de,0x2ba0f1de,0xd9e116e8,0x79438e55,0xcbe45ec7,0x96f752d7,0x91787c9d,0xf129ac2f,0x897f532b,0x5a36e22c,0xd307b7c8,0x749fb8f3,0x91940675,0x157fdb28,0xd14f95d0 +.long 0x6ae55043,0xfe51d029,0x44a87de1,0x8931e98f,0x09e4fee2,0xe57f1cc6,0x4e072d92,0x0d063b67,0xed0e4316,0x70a998b9,0x306aca46,0xe74a736b,0x4fda97c7,0xecf0fbf2,0x3e178d93,0xa40f65cb +.long 0x16df4285,0x16253604,0xd0c56ae2,0xb0c9babb,0xcfc5cfc3,0x73032b19,0x09752056,0xe497e5c3,0x164bda96,0x12096bb4,0xa0b74da1,0x1ee42419,0x403826ba,0x8fc36243,0xdc09e660,0x0c8f0069 +.long 0xc27253c9,0x8667e981,0x92b36a45,0x05a6aefb,0x9cb7bb46,0xa62c4b36,0x11f7027b,0x8394f375,0x5f109d0f,0x747bc79c,0x5b8cc60a,0xcad88a76,0x58f09e68,0x80c5a66b,0xf6127eac,0xe753d451 +.long 0x5b0ec6f5,0xc44b74a1,0x5289b2b8,0x47989fe4,0x58d6fc73,0x745f8484,0xf61c70ab,0xec362a6f,0xb3a8ad41,0x070c98a7,0x7b63db51,0x73a20fc0,0xf44c35f4,0xed2c2173,0x9acc9dca,0x8a56149d +.long 0x9ac6e0f4,0x98f17881,0xa413b5ed,0x360fdeaf,0xa300b0fd,0x0625b8f4,0x5b3222d3,0xf1f4d76a,0x587f76b8,0x9d6f5109,0x2317fdb5,0x8b4ee08d,0x8c68b095,0x88089bb7,0x5808d9b9,0x95570e9a +.long 0x35d33ae7,0xa395c36f,0x50bb5a94,0x200ea123,0x0bafe84b,0x20c789bd,0x0919276a,0x243ef52d,0xe23ae233,0x3934c577,0xa460d1ec,0xb93807af,0xf8fa76a4,0xb72a53b1,0xc3ca4491,0xd8914cb0 +.long 0x3fb42622,0x2e128494,0x500907d5,0x3b2700ac,0x1a95ec63,0xf370fb09,0x31b6dfbd,0xf8f30be2,0x69e55f15,0xf2b2f8d2,0xcc1323e9,0x1fead851,0xd9e5eef6,0xfa366010,0xe316107e,0x64d487b0 +.long 0xd23ddc82,0x4c076b86,0x7e0143f0,0x03fd344c,0x317af2c5,0xa95362ff,0xe18b7a4f,0x0add3db7,0x8260e01b,0x9c673e3f,0x54a1cc91,0xfbeb49e5,0x92f2e433,0x91351bf2,0x851141eb,0xc755e7ec +.long 0x29607745,0xc9a95139,0xa26f2b28,0x0ca07420,0x4bc6f9dd,0xcb2790e7,0xadcaffc0,0x345bbb58,0xbe0f27a2,0xc65ea38c,0x641fcb56,0x67c24d7c,0xa9e2c757,0x2c25f0a7,0x16f16c49,0x93f5cdb0 +.long 0xc5ee30a1,0x2ca5a9d7,0xb909b729,0xd1593635,0xdadeff48,0x804ce9f3,0xb07c30c3,0xec464751,0x9e49af6a,0x89d65ff3,0x6f3d01bc,0xf2d6238a,0x0bced843,0x1095561e,0xc8a13fd8,0x51789e12 +.long 0x763231df,0xd633f929,0xe7cbddef,0x46df9f7d,0xcb265da8,0x01c889c0,0xaf4336d2,0xfce1ad10,0xfc6a0a7e,0x8d110df6,0x6da425dc,0xdd431b98,0x1834aabe,0xcdc4aeab,0x8439b7fc,0x84deb124 +.long 0x3c2a5998,0x8796f169,0x7947190d,0x9b9247b4,0x11597014,0x55b9d9a5,0x7b1566ee,0x7e9dd70d,0xcbcd5e64,0x94ad78f7,0x9bd4c032,0x0359ac17,0x7cc222ae,0x3b11baaf,0xba78e812,0xa6a6e284 +.long 0x24cea1a0,0x8392053f,0x33621491,0xc97bce4a,0x35399ee9,0x7eb1db34,0xece81ad1,0x473f78ef,0xf63d3d0d,0x41d72fe0,0xafab62fc,0xe620b880,0x93158383,0x92096bc9,0x8f896f6c,0x41a21357 +.long 0xc7dcfcab,0x1b5ee2fa,0x9546e007,0x650acfde,0xb1b02e07,0xc081b749,0xf9eca03d,0xda9e41a0,0x175a54ab,0x013ba727,0xea5d8d10,0xca0cd190,0x95fd96a9,0x85ea52c0,0xbc5c3940,0x2c591b9f +.long 0x2bad4d5f,0x6fb4d4e4,0xfef0059b,0xfa4c3590,0xf5122294,0x6a10218a,0xa85751d1,0x9a78a81a,0xa98e84e7,0x04f20579,0x4997e5b5,0xfe1242c0,0xca21e1e4,0xe77a273b,0x9411939d,0xfcc8b1ef +.long 0x92d0487a,0xe20ea302,0x294b91fe,0x1442dbec,0xbb6b0e8f,0x1f7a4afe,0x6889c318,0x1700ef74,0x70f1fc62,0xf5bbffc3,0x69c79cca,0x3b31d4b6,0xa7f6340d,0xe8bc2aab,0xa725e10a,0xb0b08ab4 +.long 0xae340050,0x44f05701,0x1cf0c569,0xba4b3016,0xfbe19a51,0x5aa29f83,0xb71d752e,0x1b9ed428,0xeb4819f5,0x1666e54e,0x9e18b75b,0x616cdfed,0x3ee27b0b,0x112ed5be,0x44c7de4d,0xfbf28319 +.long 0xe0e60d84,0xd685ec85,0x1db7ee78,0x68037e30,0x003c4d6e,0x5b65bdcd,0x93e29a6a,0x33e7363a,0x08d0756c,0x995b3a61,0x2faf134b,0xd727f85c,0x1d337823,0xfac6edf7,0x0439b8b4,0x99b9aa50 +.long 0xe2b4e075,0x722eb104,0x437c4926,0x49987295,0x46a9b82d,0xb1e4c0e4,0x57a006f5,0xd0cb3197,0xd7808c56,0xf3de0f7d,0x51f89772,0xb5c54d8f,0xadbd31aa,0x500a114a,0x295f6cab,0x9afaaaa6 +.long 0x04cf667a,0x94705e21,0x9d3935d7,0xfc2a811b,0x6d09267c,0x560b0280,0xf780e53b,0xf19ed119,0x067b6269,0xf0227c09,0x5caef599,0x967b8533,0x68efeebc,0x155b9243,0xc497bae6,0xcd6d34f5 +.long 0x6cceb370,0x1dd8d5d3,0xa78d7bf9,0x2aeac579,0x70b67a62,0x5d65017d,0x17c53f67,0x70c8e44f,0x86a34d09,0xd1fc0950,0xe7134907,0xe0fca256,0x80fdd315,0xe24fa29c,0xd87499ad,0x2c4acd03 +.long 0x3b5a9ba6,0xbaaf7517,0x12e51a51,0xb9cbe1f6,0x5e154897,0xd88edae3,0x77b66ca0,0xe4309c3c,0xf67f3746,0xf5555805,0xa36401ff,0x85fc37ba,0xd9499a53,0xdf86e2ca,0xecbc955b,0x6270b2a3 +.long 0x974ad33b,0xafae64f5,0xfe7b2df1,0x04d85977,0x4ab03f73,0x2a3db3ff,0x8702740a,0x0b87878a,0x5a061732,0x6d263f01,0xa32a1901,0xc25430ce,0xdb155018,0xf7ebab3d,0x63a9b78e,0x3a86f693 +.long 0xda9f3804,0x349ae368,0xa164349c,0x470f07fe,0x8562baa5,0xd52f4cc9,0x2b290df3,0xc74a9e86,0x43471a24,0xd3a1aa35,0xb8194511,0x239446be,0x81dcd44d,0xbec2dd00,0xc42ac82d,0xca3d7f0f +.long 0xfdaf4520,0x1f3db085,0x4549daf2,0xbb6d3e80,0x19ad5c42,0xf5969d8a,0xdbfd1511,0x7052b13d,0x682b9060,0x11890d1b,0xac34452c,0xa71d3883,0x783805b4,0xa438055b,0x4725b23e,0x43241277 +.long 0x4901bbed,0xf20cf96e,0xf432a2bb,0x6419c710,0xdfa9cd7d,0x57a0fbb9,0x00daa249,0x589111e4,0x7b60554e,0x19809a33,0xede283a4,0xea5f8887,0x503bfd35,0x2d713802,0x585d2a53,0x151bb0af +.long 0x43b30ca8,0x40b08f74,0xd9934583,0xe10b5bba,0xb51110ad,0xe8a546d6,0x28e0b6c5,0x1dd50e66,0xcff2b821,0x292e9d54,0x47281760,0x3882555d,0x3724d6e3,0x134838f8,0x22ddcda1,0xf2c679e0 +.long 0x6d2a5768,0x40ee8815,0x1c1e7e2d,0x7f227bd2,0xd04ff443,0x487ba134,0xc614e54b,0x76e2ff3d,0xa3177ec7,0x36b88d6f,0x2328fff5,0xbf731d51,0x49ba158e,0x758caea2,0x02938188,0x5ab8ff4c +.long 0x35edc56d,0x33e16056,0x7e940d79,0x5a69d349,0x03866dcb,0x6c4fd001,0x4893cdef,0x20a38f57,0xfac3a15b,0xfbf3e790,0x7a4f8e6b,0x6ed7ea2e,0xbc3aca86,0xa663eb4f,0x080d53f7,0x22061ea5 +.long 0xf546783f,0x2480dfe6,0x5a0a641e,0xd38bc6da,0x2ede8965,0xfb093cd1,0xacb455cf,0x89654db4,0x26e1adee,0x413cbf9a,0x373294d4,0x291f3764,0x648083fe,0x00797257,0x208cc341,0x25f504d3 +.long 0xc3a0ee43,0x635a8e5e,0x679898ff,0x70aaebca,0x5dc63d56,0x9ee9f547,0xffb34d00,0xce987966,0x5e26310a,0xf9f86b19,0x382a8ca8,0x9e435484,0xc2352fe4,0x253bcb81,0x4474b571,0xa4eac8b0 +.long 0xc1ad8cf8,0xc1b97512,0x99e0b697,0x193b4e9e,0x01e85df0,0x939d2716,0xcd44eafd,0x4fb265b3,0xe51e1ae2,0x321e7dcd,0xe3d8b096,0x8e3a8ca6,0x52604998,0x8de46cb0,0x39072aa7,0x91099ad8 +.long 0x93aa96b8,0x2617f91c,0x7fca2e13,0x0fc8716b,0x95328723,0xa7106f5e,0x262e6522,0xd1c9c40b,0x42b7c094,0xb9bafe86,0x1543c021,0x1873439d,0x5cbefd5d,0xe1baa5de,0x521e8aff,0xa363fc5e +.long 0xf862eaac,0xefe6320d,0x22c647dc,0x14419c63,0x4e46d428,0x0e06707c,0x4a178f8f,0xcb6c834f,0xd30f917c,0x0f993a45,0x9879afee,0xd4c4b049,0x70500063,0xb6142a1e,0xa5d9d605,0x7c9b41c3 +.long 0x2f8ba2c7,0xbc00fc2f,0x7c67aa28,0x0966eb2f,0x5a786972,0x13f7b516,0x8a2fbba0,0x3bfb7557,0x5a2b9620,0x131c4f23,0x6faf46be,0xbff3ed27,0x7e172323,0x9b4473d1,0x339f6246,0x421e8878 +.long 0x25a41632,0x0fa8587a,0xa35b6c93,0xc0814124,0x59ebb8db,0x2b18a9f5,0x76edb29c,0x264e3357,0xc87c51e2,0xaf245ccd,0x501e6214,0x16b3015b,0x0a3882ce,0xbb31c560,0xfec11e04,0x6961bb94 +.long 0xeff7a3a0,0x3b825b8d,0xb1df7326,0xbec33738,0x99604a1f,0x68ad747c,0x9a3bd499,0xd154c934,0x1cc7a906,0xac33506f,0x6c560e8f,0x73bb5392,0x263e3944,0x6428fcbe,0x1c387434,0xc11828d5 +.long 0x3e4b12ff,0x3cd04be1,0x2d88667c,0xc3aad9f9,0x248120cf,0xc52ddcf8,0x2a389532,0x985a892e,0x3bb85fa0,0xfbb4b21b,0x8dfc6269,0xf95375e0,0x7ee2acea,0xfb4fb06c,0x309c4d1f,0x6785426e +.long 0xd8ceb147,0x659b17c8,0xb70a5554,0x9b649eee,0xac6bc634,0x6b7fa0b5,0x1d6e732f,0xd99fe2c7,0x8d3abba2,0x30e6e762,0xa797b799,0x18fee6e7,0xc696464d,0x5c9d360d,0x27bfde12,0xe3baeb48 +.long 0xf23206d5,0x2bf5db47,0x1d260152,0x2f6d3420,0x3f8ff89a,0x17b87653,0x378fa458,0x5157c30c,0x2d4fb936,0x7517c5c5,0xe6518cdc,0xef22f7ac,0xbf847a64,0xdeb483e6,0x92e0fa89,0xf5084558 +.long 0xdf7304d4,0xab9659d8,0xff210e8e,0xb71bcf1b,0xd73fbd60,0xa9a2438b,0x5d11b4de,0x4595cd1f,0x4835859d,0x9c0d329a,0x7dbb6e56,0x4a0f0d2d,0xdf928a4e,0xc6038e5e,0x8f5ad154,0xc9429621 +.long 0xf23f2d92,0x91213462,0x60b94078,0x6cab71bd,0x176cde20,0x6bdd0a63,0xee4d54bc,0x54c9b20c,0x9f2ac02f,0x3cd2d8aa,0x206eedb0,0x03f8e617,0x93086434,0xc7f68e16,0x92dd3db9,0x831469c5 +.long 0x8f981354,0x8521df24,0x3588a259,0x587e23ec,0xd7a0992c,0xcbedf281,0x38961407,0x06930a55,0xbe5bbe21,0x09320deb,0x2491817f,0xa7ffa5b5,0x09065160,0xe6c8b4d9,0xfff6d2a9,0xac4f3992 +.long 0x3ae9c1bd,0x7aa7a158,0xe37ce240,0xe0af6d98,0x28ab38b4,0xe54342d9,0x0a1c98ca,0xe8b75007,0xe02358f2,0xefce86af,0xea921228,0x31b8b856,0x0a1c67fc,0x052a1912,0xe3aead59,0xb4069ea4 +.long 0x7fa03cb3,0x3232d6e2,0x0fdd7d88,0xdb938e5b,0x2ccbfc5d,0x04c1d2cd,0xaf3a580f,0xd2f45c12,0x7883e614,0x592620b5,0xbe7c5f26,0x5fd27e68,0x1567e1e3,0x139e45a9,0x44d8aaaf,0x2cc71d2d +.long 0xe36d0757,0x4a9090cd,0xd9a29382,0xf722d7b1,0x04b48ddf,0xfb7fb04c,0xebe16f43,0x628ad2a7,0x20226040,0xcd3fbfb5,0x5104b6c4,0x6c34ecb1,0xc903c188,0x30c0754e,0x2d23cab0,0xec336b08 +.long 0x1e206ee5,0x473d62a2,0x8c49a633,0xf1e27480,0xe9f6b2c3,0x87ab956c,0x62b606ea,0x61830b48,0xe78e815f,0x67cd6846,0x4c02082a,0xfe40139f,0x952ec365,0x52bbbfcb,0x6b9836ab,0x74c11642 +.long 0x558df019,0x9f51439e,0xac712b27,0x230da4ba,0x55185a24,0x518919e3,0x84b78f50,0x4dcefcdd,0xa47d4c5a,0xa7d90fb2,0xb30e009e,0x55ac9abf,0x74eed273,0xfd2fc359,0xdbea8faf,0xb72d824c +.long 0x4513e2ca,0xce721a74,0x38240b2c,0x0b418612,0xd5baa450,0x05199968,0x2b0e8c25,0xeb1757ed,0x3dfac6d5,0x6ebc3e28,0x48a237f5,0xb2431e2e,0x52f61499,0x2acb5e23,0xe06c936b,0x5558a2a7 +.long 0xcbb13d1b,0xd213f923,0x5bfb9bfe,0x98799f42,0x701144a9,0x1ae8ddc9,0x4c5595ee,0x0b8b3bb6,0x3ecebb21,0x0ea9ef2e,0x3671f9a7,0x17cb6c4b,0x726f1d1f,0x47ef464f,0x6943a276,0x171b9484 +.long 0x7ef0329c,0x51a4ae2d,0x91c4402a,0x08509222,0xafd45bbc,0x64a61d35,0x3035a851,0x38f096fe,0xa1dec027,0xc7468b74,0x4fc7dcba,0xe8cf10e7,0xf4a06353,0xea35ff40,0x8b77dd66,0x0b4c0dfa +.long 0xde7e5c19,0x779b8552,0xc1c0256c,0xfab28609,0xabd4743d,0x64f58eee,0x7b6cc93b,0x4e8ef838,0x4cb1bf3d,0xee650d26,0x73dedf61,0x4c1f9d09,0xbfb70ced,0xaef7c9d7,0x1641de1e,0x1ec0507e +.long 0xcde45079,0xcd7e5cc7,0x516ac9e4,0xde173c9a,0xc170315c,0x517a8494,0x91d8e8fb,0x438fd905,0xc7d9630b,0x5145c506,0xf47d4d75,0x6457a87b,0x0d9a80e8,0xd31646bf,0xcef3aabe,0x453add2b +.long 0xa607419d,0xc9941109,0xbb6bca80,0xfaa71e62,0x07c431f3,0x34158c13,0x992bc47a,0x594abebc,0xeb78399f,0x6dfea691,0x3f42cba4,0x48aafb35,0x077c04f0,0xedcd65af,0xe884491a,0x1a29a366 +.long 0x1c21f2bf,0x023a40e5,0xa5057aee,0xf99a513c,0xbcab072e,0xa3fe7e25,0x40e32bcf,0x8568d2e1,0xd3f69d9f,0x904594eb,0x07affab1,0x181a9733,0xb6e330f4,0xe4d68d76,0xc75a7fc1,0x87a6dafb +.long 0xef7d9289,0x549db2b5,0x197f015a,0x2480d4a8,0xc40493b6,0x61d5590b,0x6f780331,0x3a55b52e,0x309eadb0,0x40eb8115,0x92e5c625,0xdea7de5a,0xcc6a3d5a,0x64d631f0,0x93e8dd61,0x9d5e9d7c +.long 0x206d3ffc,0xf297bef5,0x7d808bd4,0x23d5e033,0xd24cf5ba,0x4a4f6912,0x09cdaa8a,0xe4d8163b,0xd3082e8e,0x0e0de9ef,0x0192f360,0x4fe1246c,0x4b8eee0a,0x1f900150,0xf1da391b,0x5219da81 +.long 0xf7ea25aa,0x7bf6a5c1,0xfbb07d5f,0xd165e6bf,0x89e78671,0xe3539361,0x2bac4219,0xa3fcac89,0xf0baa8ab,0xdfab6fd4,0xe2c1c2e5,0x5a4adac1,0x40d85849,0x6cd75e31,0x19b39181,0xce263fea +.long 0x07032c72,0xcb6803d3,0x790968c8,0x7f40d5ce,0xdce978f0,0xa6de86bd,0x368f751c,0x25547c4f,0x65fb2a9e,0xb1e685fd,0x1eb9179c,0xce69336f,0x12504442,0xb15d1c27,0xb911a06b,0xb7df465c +.long 0x315980cd,0xb8d804a3,0xfa3bebf7,0x693bc492,0x2253c504,0x3578aeee,0xcd2474a2,0x158de498,0xcfda8368,0x1331f5c7,0x78d7177e,0xd2d7bbb3,0xf3c1e46e,0xdf61133a,0xd30e7be8,0x5836ce7d +.long 0x94f834cb,0x83084f19,0x429ed782,0xd35653d4,0x59e58243,0xa542f16f,0x0470a22d,0xc2b52f65,0x18f23d96,0xe3b6221b,0x3f5252b4,0xcb05abac,0x87d61402,0xca00938b,0x411933e4,0x2f186cdd +.long 0x9a29a5c5,0xe042ece5,0x3b6c8402,0xb19b3c07,0x19d92684,0xc97667c7,0xebc66372,0xb5624622,0x3c04fa02,0x0cb96e65,0x8eaa39aa,0x83a7176c,0xeaa1633f,0x2033561d,0x4533df73,0x45a9d086 +.long 0x3dc090bc,0xe0542c1d,0xaa59c167,0x82c996ef,0x0ee7fc4d,0xe3f735e8,0x7c35db79,0x7b179393,0xf8c5dbfd,0xb6419e25,0x1f327b04,0x4d9d7a1e,0x298dfca8,0x979f6f9b,0x8de9366a,0xc7c5dff1 +.long 0x04c82bdd,0x1b7a588d,0xf8319dfd,0x68005534,0xd8eb9580,0xde8a55b5,0x8d5bca81,0x5ea886da,0x252a0b4d,0xe8530a01,0x35eaa0a1,0x1bffb4fe,0xd8e99563,0x2ad828b1,0x95f9cd87,0x7de96ef5 +.long 0xd77d970c,0x4abb2d0c,0xd33ef9cb,0x03cfb933,0x8b211fe9,0xb0547c01,0xa56ed1c6,0x2fe64809,0xc2ac98cc,0xcb7d5624,0x1a393e33,0x2a1372c0,0x29660521,0xc8d1ec1c,0xb37ac3e9,0xf3d31b04 +.long 0x5ece6e7c,0xa29ae9df,0x0facfb55,0x0603ac8f,0xdda233a5,0xcfe85b7a,0xbd75f0b8,0xe618919f,0x99bf1603,0xf555a3d2,0xf184255a,0x1f43afc9,0x319a3e02,0xdcdaf341,0x03903a39,0xd3b117ef +.long 0x65d1d131,0xe095da13,0xc37ad03e,0x86f16367,0x462cd8dd,0x5f37389e,0xd67a60e6,0xc103fa04,0xf4b478f0,0x57c34344,0xe117c98d,0xce91edd8,0x231fc12e,0x001777b0,0xb207bccb,0x11ae47f2 +.long 0x20f8a242,0xd983cf8d,0xf22e1ad8,0x7aff5b1d,0x7fc4feb3,0x68fd11d0,0xb0f1c3e1,0x5d53ae90,0xec041803,0x50fb7905,0x14404888,0x85e3c977,0xac628d8f,0x0e67faed,0x6668532c,0x2e865150 +.long 0x6a67a6b0,0x15acaaa4,0xb25cec41,0xf4cdee25,0xe4c6701e,0x49ee565a,0xfc7d63d8,0x2a04ca66,0xef0543fb,0xeb105018,0xd1b0d81d,0xf709a4f5,0x2915d333,0x5b906ee6,0x96f1f0ab,0xf4a87412 +.long 0x4d82f4c2,0xb6b82fa7,0x6804efb3,0x90725a60,0xadc3425e,0xbc82ec46,0x2787843e,0xb7b80581,0xdd1fc74c,0xdf46d91c,0xe783a6c4,0xdc1c62cb,0x1a04cbba,0x59d1b9f3,0x95e40764,0xd87f6f72 +.long 0x317f4a76,0x02b4cfc1,0x91036bce,0x8d2703eb,0xa5e72a56,0x98206cc6,0xcf53fb0f,0x57be9ed1,0xef0b17ac,0x09374571,0xd9181b38,0x74b2655e,0x89935d0e,0xc8f80ea8,0x91529936,0xc0d9e942 +.long 0x1e84e0e5,0x19686041,0xaea34c93,0xa5db84d3,0x7073a732,0xf9d5bb19,0x6bcfd7c0,0xb8d2fe56,0xf3eb82fa,0x45775f36,0xfdff8b58,0x8cb20ccc,0x8374c110,0x1659b65f,0x330c789a,0xb8b4a422 +.long 0x6fe8208b,0x75e3c3ea,0x286e78fe,0xbd74b9e4,0xd7d93a1a,0x0be2e81b,0xdd0a5aae,0x7ed06e27,0x6be8b800,0x721f5a58,0xd846db28,0x428299d1,0x5be88ed3,0x95cb8e6b,0x1c034e11,0xc3186b23 +.long 0x8977d99b,0xa6312c9e,0x83f531e7,0xbe944331,0x18d3b1d4,0x8232c0c2,0xe1247b73,0x617aae8b,0x282aec3b,0x40153fc4,0xf7b8f823,0xc6063d2f,0x3304f94c,0x68f10e58,0xee676346,0x31efae74 +.long 0x40a9b97c,0xbadb6c6d,0x4f666256,0x14702c63,0x5184b2e3,0xdeb954f1,0x94b6ca40,0x5184a526,0x003c32ea,0xfff05337,0x205974c7,0x5aa374dd,0x4b0dd71a,0x9a763854,0xdeb947ec,0x459cd27f +.long 0x459c2b92,0xa6e28161,0x75ee8ef5,0x2f020fa8,0x30b06310,0xb132ec2d,0xbc6a4530,0xc3e15899,0xaa3f451a,0xdc5f53fe,0xc2d9acac,0x3a3c7f23,0x6b27e58b,0x2ec2f892,0xd742799f,0x68466ee7 +.long 0x1fa26613,0x98324dd4,0xbdc29d63,0xa2dc6dab,0xd712d657,0xf9675faa,0x21fd8d15,0x813994be,0xfd4f7553,0x5ccbb722,0xf3a36b20,0x5135ff8b,0x69559df5,0x44be28af,0x9d41bf30,0x40b65bed +.long 0x3734e520,0xd98bf2a4,0x209bdcba,0x5e3abbe3,0xbc945b35,0x77c76553,0xc6ef14aa,0x5331c093,0x76b60c80,0x518ffe29,0x7ace16f8,0x2285593b,0xbe2b9784,0xab1f64cc,0xab2421b6,0xe8f2c0d9 +.long 0xc1df065c,0x617d7174,0x5f6578fa,0xafeeb5ab,0x263b54a8,0x16ff1329,0xc990dce3,0x45c55808,0xecc8c177,0x42eab6c0,0x5982ecaa,0x799ea9b5,0xb607ef8e,0xf65da244,0x32a3fc2c,0x8ab226ce +.long 0x7ea973dc,0x745741e5,0x20888f2e,0x5c00ca70,0x45fd9cf1,0x7cdce3cf,0x5507f872,0x8a741ef1,0x196b4cec,0x47c51c2f,0xc97ea618,0x70d08e43,0x15b18a2b,0x930da15c,0x2f610514,0x33b6c678 +.long 0x07ac9794,0xc662e4f8,0xba06cb79,0x1eccf050,0xe7d954e5,0x1ff08623,0x24cf71c3,0x6ef2c5fb,0x67978453,0xb2c063d2,0x1d654af8,0xa0cf3796,0x7ebdaa37,0x7cb242ea,0xb86747e0,0x206e0b10 +.long 0xd5ecfefc,0x481dae5f,0xc2bff8fc,0x07084fd8,0xea324596,0x8040a01a,0xd4de4036,0x4c646980,0xd65abfc3,0x9eb8ab4e,0x13541ec7,0xe01cb91f,0xfd695012,0x8f029adb,0x3c7569ec,0x9ae28483 +.long 0xa66d80a1,0xa5614c9e,0x75f5f911,0x680a3e44,0xceba4fc1,0x0c07b14d,0xa13071c1,0x891c285b,0x799ece3c,0xcac67ceb,0x41e07e27,0x29b910a9,0xf2e43123,0x66bdb409,0x7ac9ecbe,0x06f8b137 +.long 0x38547090,0x5981fafd,0x85e3415d,0x19ab8b9f,0xc7e31b27,0xfc28c194,0x6fbcbb42,0x843be0aa,0xa6db836c,0xf3b1ed43,0x01a45c05,0x2a1330e4,0x95c1a377,0x4f19f3c5,0x44b5ee33,0xa85f39d0 +.long 0x4ae52834,0x3da18e6d,0x7423dcb0,0x5a403b39,0xf2374aef,0xbb555e0a,0x1e8ca111,0x2ad599c4,0x014b3bf8,0x1b3a2fb9,0xf66d5007,0x73092684,0xc4340102,0x079f1426,0x8fddf4de,0x1827cf81 +.long 0xf10ff927,0xc83605f6,0x23739fc6,0xd3871451,0xcac1c2cc,0x6d163450,0xa2ec1ac5,0x6b521296,0x6e3cb4a5,0x0606c4f9,0x778abff7,0xe47d3f41,0xbe8e3a45,0x425a8d5e,0xa6102160,0x53ea9e97 +.long 0x39cbb688,0x477a106e,0xf3386d32,0x532401d2,0xb1b9b421,0x8e564f64,0x81dad33f,0xca9b8388,0x2093913e,0xb1422b4e,0x69bc8112,0x533d2f92,0xebe7b2c7,0x3fa017be,0xcaf197c6,0xb2767c4a +.long 0xaedbae9f,0xc925ff87,0x36880a54,0x7daf0eb9,0x9c4d0e71,0x9284ddf5,0x316f8cf5,0x1581cf93,0x3ac1f452,0x3eeca887,0xfb6aeffe,0xb417fce9,0xeefb8dc3,0xa5918046,0x02209400,0x73d318ac +.long 0x728693e5,0xe800400f,0x339927ed,0xe87d814b,0x57ea9910,0x93e94d3b,0x2245fb69,0xff8a35b6,0x7f200d34,0x043853d7,0x0f653ce1,0x470f1e68,0x59a06379,0x81ac05bd,0x03930c29,0xa14052c2 +.long 0x26bc2797,0x6b72fab5,0x99f16771,0x13670d16,0x1e3e48d1,0x00170052,0xb7adf678,0x978fe401,0xd41c5dd4,0x55ecfb92,0xc7b27da5,0x5ff8e247,0x013fb606,0xe7518272,0x2f547a3c,0x5768d7e5 +.long 0x60017a5f,0xbb24eaa3,0x9c64ce9b,0x6b18e6e4,0x103dde07,0xc225c655,0x7592f7ea,0xfc3672ae,0xd06283a1,0x9606ad77,0xe4d59d99,0x542fc650,0x2a40e7c2,0xabb57c49,0xa8db9f55,0xac948f13 +.long 0xb04465c3,0x6d4c9682,0x6468bd15,0xe3d062fa,0x5f318d7e,0xa51729ac,0x9eb6fc95,0x1fc87df6,0x0591f652,0x63d146a8,0x589621aa,0xa861b8f7,0xce31348c,0x59f5f15a,0x440da6da,0x8f663391 +.long 0xb591ffa3,0xcfa778ac,0x4cdfebce,0x027ca9c5,0x444ea6b3,0xbe8e05a5,0xa78d8254,0x8aab4e69,0xb474d6b8,0x2437f04f,0x045b3855,0x6597ffd4,0xca47ecaa,0xbb0aea4e,0x85c7ebfc,0x568aae83 +.long 0xc73b2383,0x0e966e64,0xd17d8762,0x49eb3447,0x8da05dab,0xde107821,0x016b7236,0x443d8baa,0xea7610d6,0x163b63a5,0xce1ca979,0xe47e4185,0x80baa132,0xae648b65,0x0e0d5b64,0xebf53de2 +.long 0xd3c8c1ca,0x8d3bfcb4,0x5d04b309,0x0d914ef3,0x3de7d395,0x55ef6415,0x26b850e8,0xbde1666f,0xd449ab19,0xdbe1ca6e,0xe89a2672,0x8902b322,0xdacb7a53,0xb1674b7e,0xf52523ff,0x8e9faf6e +.long 0x9a85788b,0x6ba535da,0xbd0626d4,0xd21f03ae,0xe873dc64,0x099f8c47,0x018ec97e,0xcda8564d,0xde92c68c,0x3e8d7a5c,0x73323cc4,0x78e035a1,0xf880ff7c,0x3ef26275,0x273eedaa,0xa4ee3dff +.long 0xaf4e18f8,0x58823507,0x0672f328,0x967ec9b5,0x559d3186,0x9ded19d9,0x6cdce39c,0x5e2ab3de,0x11c226df,0xabad6e4d,0x87723014,0xf9783f43,0x1a885719,0x9a49a0cf,0x90da9dbf,0xfc0c1a5a +.long 0x571d92ac,0x8bbaec49,0x4692517f,0x569e85fe,0xa14ea4af,0x8333b014,0x12e5c5ad,0x32f2a62f,0x06d89b85,0x98c2ce3a,0x2ff77a08,0xb90741aa,0x01f795a2,0x2530defc,0x84b3c199,0xd6e5ba0b +.long 0x12e4c936,0x7d8e8451,0xbd0be17b,0xae419f7d,0x22262bc9,0xa583fc8c,0x91bfe2bd,0x6b842ac7,0x440d6827,0x33cef4e9,0xef81fb14,0x5f69f4de,0x234fbb92,0xf16cf6f6,0xd9e7e158,0x76ae3fc3 +.long 0xe9740b33,0x4e89f6c2,0x4962d6a1,0x677bc85d,0x68d10d15,0x6c6d8a7f,0x0257b1cd,0x5f9a7224,0x4ad85961,0x7096b916,0xe657ab4a,0x5f8c47f7,0xf7461d7e,0xde57d7d0,0x80ce5ee2,0x7eb6094d +.long 0x34190547,0x0b1e1dfd,0xf05dd150,0x8a394f43,0x97df44e6,0x0a9eb24d,0x87675719,0x78ca06bf,0x6ffeec22,0x6f0b3462,0x36cdd8fb,0x9d91bcea,0xa105be47,0xac83363c,0x069710e3,0x81ba76c1 +.long 0x28c682c6,0x3d1b24cb,0x8612575b,0x27f25228,0xe8e66e98,0xb587c779,0x405eb1fe,0x7b0c03e9,0x15b548e7,0xfdf0d030,0x38b36af7,0xa8be76e0,0x4f310c40,0x4cdab04a,0xf47ecaec,0x6287223e +.long 0x8b399320,0x678e6055,0xc01e4646,0x61fe3fa6,0x03261a5e,0xc482866b,0x5c2f244a,0xdfcf45b8,0x2f684b43,0x8fab9a51,0xc7220a66,0xf796c654,0xf5afa58f,0x1d90707e,0x4fdbe0de,0x2c421d97 +.long 0xaf2ebc2f,0xc4f4cda3,0xcb4efe24,0xa0af843d,0x9ccd10b1,0x53b857c1,0x914d3e04,0xddc9d1eb,0x62771deb,0x7bdec8bb,0x91c5aa81,0x829277aa,0x832391ae,0x7af18dd6,0xc71a84ca,0x1740f316 +.long 0xeeaf8c49,0x8928e99a,0x6e24d728,0xee7aa73d,0xe72b156c,0x4c5007c2,0xed408a1d,0x5fcf57c5,0xb6057604,0x9f719e39,0xc2868bbf,0x7d343c01,0x7e103e2d,0x2cca254b,0xf131bea2,0xe6eb38a9 +.long 0x8be762b4,0xb33e624f,0x058e3413,0x2a9ee4d1,0x67d805fa,0x968e6369,0x7db8bfd7,0x9848949b,0xd23a8417,0x5308d7e5,0xf3e29da5,0x892f3b1d,0x3dee471f,0xc95c139e,0xd757e089,0x8631594d +.long 0xde918dcc,0xe0c82a3c,0x26fdcf4b,0x2e7b5994,0x32cb1b2d,0x82c50249,0x7657ae07,0xea613a9d,0xf1fdc9f7,0xc2eb5f6c,0x879fe682,0xb6eae8b8,0x591cbc7f,0x253dfee0,0x3e1290e6,0x000da713 +.long 0x1f095615,0x1083e2ea,0x14e68c33,0x0a28ad77,0x3d8818be,0x6bfc0252,0xf35850cd,0xb585113a,0x30df8aa1,0x7d935f0b,0x4ab7e3ac,0xaddda07c,0x552f00cb,0x92c34299,0x2909df6c,0xc33ed1de +.long 0x80e87766,0x22c2195d,0x9ddf4ac0,0x9e99e6d8,0x65e74934,0x09642e4e,0xff1ff241,0x2610ffa2,0x751c8159,0x4d1d47d4,0xaf3a9363,0x697b4985,0x87477c33,0x0318ca46,0x9441eff3,0xa90cb565 +.long 0x36f024cb,0x58bb3848,0x36016168,0x85be1f77,0xdc7e07f1,0x6c59587c,0xaf1d8f02,0x191be071,0xcca5e55c,0xbf169fa5,0xf7d04eac,0x3864ba3c,0x8d7d05db,0x915e367f,0xa6549e5d,0xb48a876d +.long 0x580e40a2,0xef89c656,0x728068bc,0xf194ed8c,0xa47990c9,0x74528045,0x5e1a4649,0xf53fc7d7,0x78593e7d,0xbec5ae9b,0x41db65d7,0x2cac4ee3,0x04a3d39b,0xa8c1eb24,0x03f8f3ef,0x53b7d634 +.long 0x3e07113c,0x2dc40d48,0x7d8b63ae,0x6e4a5d39,0x79684c2b,0x5582a94b,0x622da26c,0x932b33d4,0x0dbbf08d,0xf534f651,0x64c23a52,0x211d07c9,0xee5bdc9b,0x0eeece0f,0xf7015558,0xdf178168 +.long 0x0a712229,0xd4294635,0x09273f8c,0x93cbe448,0x8f13bc83,0x00b095ef,0x8798978c,0xbb741972,0x56dbe6e7,0x9d7309a2,0x5a5d39ec,0xe578ec56,0x851f9a31,0x3961151b,0xe5709eb4,0x2da7715d +.long 0x53dfabf0,0x867f3017,0xb8e39259,0x728d2078,0x815d9958,0x5c75a0cd,0x16603be1,0xf84867a6,0x70e35b1c,0xc865b13d,0x19b03e2c,0x02414468,0xac1f3121,0xe46041da,0x6f028a7c,0x7c9017ad +.long 0x0a482873,0xabc96de9,0xb77e54d4,0x4265d6b1,0xa57d88e7,0x68c38e79,0x9ce82de3,0xd461d766,0x64a7e489,0x817a9ec5,0xa0def5f2,0xcc5675cd,0x985d494e,0x9a00e785,0x1b03514a,0xc626833f +.long 0x83cdd60e,0xabe7905a,0xa1170184,0x50602fb5,0xb023642a,0x689886cd,0xa6e1fb00,0xd568d090,0x0259217f,0x5b1922c7,0xc43141e4,0x93831cd9,0x0c95f86e,0xdfca3587,0x568ae828,0xdec2057a +.long 0xf98a759a,0xc44ea599,0xf7c23c1d,0x55a0a7a2,0x94c4f687,0xd5ffb6e6,0x12848478,0x3563cce2,0xe7b1fbe1,0x812b3517,0x4f7338e0,0x8a7dc979,0x52d048db,0x211ecee9,0xc86ea3b8,0x2eea4056 +.long 0xba772b34,0xd8cb68a7,0x5f4e2541,0xe16ed341,0x0fec14db,0x9b32f6a6,0x391698be,0xeee376f7,0x83674c02,0xe9a7aa17,0x5843022a,0x65832f97,0x5ba4990f,0x29f3a8da,0xfb8e3216,0x79a59c3a +.long 0xbd19bb16,0x9cdc4d2e,0xb3262d86,0xc6c7cfd0,0x969c0b47,0xd4ce14d0,0x13e56128,0x1fa352b7,0x973db6d3,0x383d55b8,0xe8e5b7bf,0x71836850,0xe6bb571f,0xc7714596,0x2d5b2dd2,0x259df31f +.long 0x913cc16d,0x568f8925,0xe1a26f5a,0x18bc5b6d,0xf5f499ae,0xdfa413be,0xc3f0ae84,0xf8835dec,0x65a40ab0,0xb6e60bd8,0x194b377e,0x65596439,0x92084a69,0xbcd85625,0x4f23ede0,0x5ce433b9 +.long 0x6ad65143,0xe8e8f04f,0xd6e14af6,0x11511827,0x8295c0c7,0x3d390a10,0x621eba16,0x71e29ee4,0x63717b46,0xa588fc09,0xe06ad4a2,0x02be02fe,0x04c22b22,0x931558c6,0x12f3c849,0xbb4d4bd6 +.long 0x20efd662,0x54a4f496,0xc5952d14,0x92ba6d20,0xcc9784c2,0x2db8ea1e,0x4b353644,0x81cc10ca,0x4b4d7f6c,0x40b570ad,0x84a1dcd2,0x5c9f1d96,0x3147e797,0x01379f81,0x2bd499f5,0xe5c6097b +.long 0x328e5e20,0x40dcafa6,0x54815550,0xf7b5244a,0x47bfc978,0xb9a4f118,0xd25825b1,0x0ea0e79f,0x646c7ecf,0xa50f96eb,0x446dea9d,0xeb811493,0xdfabcf69,0x2af04677,0xc713f6e8,0xbe3a068f +.long 0x42e06189,0x860d523d,0x4e3aff13,0xbf077941,0xc1b20650,0x0b616dca,0x2131300d,0xe66dd6d1,0xff99abde,0xd4a0fd67,0xc7aac50d,0xc9903550,0x7c46b2d7,0x022ecf8b,0x3abf92af,0x3333b1e8 +.long 0x6c491c14,0x11cc113c,0x80dd3f88,0x05976688,0x29d932ed,0xf5b4d9e7,0xa2c38b6d,0xe982aad8,0x8be0dcf0,0x6f925347,0x65ca53f2,0x700080ae,0x443ca77f,0xd8131156,0xec51f984,0xe92d6942 +.long 0x85dfe9ae,0xd2a08af8,0x4d2a86ca,0xd825d9a5,0x39dff020,0x2c53988d,0x430cdc40,0xf38b135a,0x62a7150b,0x0c918ae0,0x0c340e9b,0xf31fd8de,0x4dbbf02e,0xafa0e7ae,0x5eba6239,0x5847fb2a +.long 0xdccbac8b,0x6b1647dc,0x06f485c8,0xb642aa78,0x7038ecdf,0x873f3765,0xfa49d3fe,0x2ce5e865,0xc98c4400,0xea223788,0xf1fa5279,0x8104a8cd,0x06becfd7,0xbcf7cc7a,0xc8f974ae,0x49424316 +.long 0x84d6365d,0xc0da65e7,0x8f759fb8,0xbcb7443f,0x7ae81930,0x35c712b1,0x4c6e08ab,0x80428dff,0xa4faf843,0xf19dafef,0xffa9855f,0xced8538d,0xbe3ac7ce,0x20ac409c,0x882da71e,0x358c1fb6 +.long 0xfd349961,0xafa9c0e5,0x8421c2fc,0x2b2cfa51,0xf3a28d38,0x2a80db17,0x5d138e7e,0xa8aba539,0x6e96eb8d,0x52012d1d,0xcbaf9622,0x65d8dea0,0xb264f56c,0x57735447,0x1b6c8da2,0xbeebef3f +.long 0xce785254,0xfc346d98,0xbb64a161,0xd50e8d72,0x49794add,0xc03567c7,0x752c7ef6,0x15a76065,0x961f23d6,0x59f3a222,0x73ecc0b0,0x378e4438,0x5a82fde4,0xc74be434,0xd8b9cf34,0xae509af2 +.long 0x577f44a1,0x4a61ee46,0xb611deeb,0xe09b748c,0xf5f7b884,0xc0481b2c,0x61acfa6b,0x35626678,0xbf8d21e6,0x37f4c518,0xb205a76d,0x22d96531,0x954073c0,0x37fb85e1,0x65b3a567,0xbceafe4f +.long 0xbe42a582,0xefecdef7,0x65046be6,0xd3fc6080,0x09e8dba9,0xc9af13c8,0x641491ff,0x1e6c9847,0xd30c31f7,0x3b574925,0xac2a2122,0xb7eb72ba,0xef0859e7,0x776a0dac,0x21900942,0x06fec314 +.long 0xf8c22049,0x2464bc10,0x875ebf69,0x9bfbcce7,0x4336326b,0xd7a88e2a,0x5bc2acfa,0xda05261c,0xeba7efc8,0xc29f5bdc,0x25dbbf2e,0x471237ca,0x2975f127,0xa72773f2,0x04d0b326,0xdc744e8e +.long 0xa56edb73,0x38a7ed16,0x2c007e70,0x64357e37,0x5080b400,0xa167d15b,0x23de4be1,0x07b41164,0x74c89883,0xb2d91e32,0x2882e7ed,0x3c162821,0x7503e482,0xad6b36ba,0x0ea34331,0x48434e8e +.long 0x2c7ae0b9,0x79f4f24f,0x1939b44a,0xc46fbf81,0x56595eb1,0x76fefae8,0xcd5f29c7,0x417b66ab,0xc5ceec20,0x5f2332b2,0xe1a1cae2,0xd69661ff,0x9b0286e6,0x5ede7e52,0xe276b993,0x9d062529 +.long 0x7e50122b,0x324794b0,0x4af07ca5,0xdd744f8b,0xd63fc97b,0x30a12f08,0x76626d9d,0x39650f1a,0x1fa38477,0x101b47f7,0xd4dc124f,0x3d815f19,0xb26eb58a,0x1569ae95,0x95fb1887,0xc3cde188 +.long 0xf9539a48,0x54e9f37b,0x7408c1a5,0xb0100e06,0xea580cbb,0x821d9811,0x86e50c56,0x8af52d35,0xdbbf698b,0xdfbd9d47,0x03dc1c73,0x2961a1ea,0xe76a5df8,0x203d38f8,0x6def707a,0x08a53a68 +.long 0x1bee45d4,0x26eefb48,0x3c688036,0xb3cee346,0xc42f2469,0x463c5315,0x81378162,0x19d84d2e,0x1c4d349f,0x22d7c3c5,0x163d59c5,0x65965844,0xb8abceae,0xcf198c56,0x628559d5,0x6fb1fb1b +.long 0x07bf8fe3,0x8bbffd06,0x3467734b,0x46259c58,0x35f7f0d3,0xd8953cea,0xd65b0ff1,0x1f0bece2,0xf3c72914,0xf7d5b4b3,0x3cb53389,0x29e8ea95,0x836b6d46,0x4a365626,0xea174fde,0xe849f910 +.long 0xf4737f21,0x7ec62fbb,0x6209f5ac,0xd8dba5ab,0xa5f9adbe,0x24b5d7a9,0xa61dc768,0x707d28f7,0xcaa999ea,0x7711460b,0x1c92e4cc,0xba7b174d,0x18d4bf2d,0x3c4bab66,0xeb8bd279,0xb8f0c980 +.long 0x324b4737,0x024bea9a,0x32a83bca,0xfba9e423,0xa232dced,0x6e635643,0x2571c8ba,0x99619367,0x54b7032b,0xe8c9f357,0x2442d54a,0xf936b3ba,0x8290c65a,0x2263f0f0,0xee2c7fdb,0x48989780 +.long 0x13d4f95e,0xadc5d55a,0xad9b8500,0x737cff85,0x8a73f43d,0x271c557b,0xe18bc476,0xbed617a4,0x7dfd8ab2,0x66245401,0x3a2870aa,0xae7b89ae,0x23a7e545,0x1b555f53,0xbe057e4c,0x6791e247 +.long 0x324fa34d,0x860136ad,0x4cbeae28,0xea111447,0xbedd3299,0x023a4270,0xc1c35c34,0x3d5c3a7f,0x8d0412d2,0xb0f6db67,0xfcdc6b9a,0xd92625e2,0x4e28a982,0x92ae5ccc,0x47a3ce7e,0xea251c36 +.long 0x790691bf,0x9d658932,0x06b736ae,0xed610589,0xc0d63b6e,0x712c2f04,0xc63d488f,0x5cf06fd5,0xd9588e41,0x97363fac,0x2b93257e,0x1f9bf762,0x667acace,0xa9d1ffc4,0x0a061ecf,0x1cf4a1aa +.long 0xdc1818d0,0x40e48a49,0xa3621ab0,0x0643ff39,0xe39ef639,0x5768640c,0x04d86854,0x1fc099ea,0xeccd28fd,0x9130b9c3,0x7eec54ab,0xd743cbd2,0xe5b475b6,0x052b146f,0x900a7d1f,0x058d9a82 +.long 0x91262b72,0x65e02292,0xbb0edf03,0x96f924f9,0xfe206842,0x5cfa59c8,0x5eafa720,0xf6037004,0x18d7dd96,0x5f30699e,0xcbab2495,0x381e8782,0xdd8be949,0x91669b46,0x26aae8ef,0xb40606f5 +.long 0xfc6751a4,0x2812b839,0xfba800ef,0x16196214,0x4c1a2875,0x4398d5ca,0x653d8349,0x720c00ee,0xd820007c,0xc2699eb0,0xa39b5825,0x880ee660,0x471f6984,0x70694694,0xe3dda99a,0xf7d16ea8 +.long 0xc0519a23,0x28d675b2,0x4f6952e3,0x9ebf94fe,0xa2294a8a,0xf28bb767,0xfe0af3f5,0x85512b4d,0x99b16a0d,0x18958ba8,0xba7548a7,0x95c2430c,0xa16be615,0xb30d1b10,0x85bfb74c,0xe3ebbb97 +.long 0x18549fdb,0xa3273cfe,0x4fcdb792,0xf6e200bf,0x83aba56c,0x54a76e18,0x89ef6aa2,0x73ec66f6,0xd1b9a305,0x8d17add7,0xb7ae1b9d,0xa959c5b9,0x6bcc094a,0x88643522,0xd7d429b9,0xcc5616c4 +.long 0xe6a33f7c,0xa6dada01,0x9d4e70ad,0xc6217a07,0x09c15b7c,0xd619a818,0x0e80c854,0xea06b329,0xa5f5e7b9,0x174811ce,0x787c65f4,0x66dfc310,0x3316ab54,0x4ea7bd69,0x1dcc0f70,0xc12c4acb +.long 0x1e407dd9,0xe4308d1a,0x91afa997,0xe8a3587c,0xab77b7a5,0xea296c12,0x673c0d52,0xb5ad49e4,0x7006085a,0x40f9b2b2,0x87bf6ec2,0xa88ff340,0x4e3066a6,0x978603b1,0xb5e486e2,0xb3f99fc2 +.long 0xb2e63645,0x07b53f5e,0x84c84232,0xbe57e547,0x7214d5cf,0xd779c216,0x029a3aca,0x617969cd,0x8a7017a0,0xd17668cd,0xbe9b7ee8,0x77b4d19a,0x9c161776,0x58fd0e93,0xd5968a72,0xa8c4f4ef +.long 0x67b3de77,0x296071cc,0x634f7905,0xae3c0b8e,0x8a7100c9,0x67e440c2,0xeb4b9b42,0xbb8c3c1b,0xc51b3583,0x6d71e8ea,0x9525e642,0x7591f5af,0x13f509f3,0xf73a2f7b,0x5619ac9b,0x618487aa +.long 0x9d61718a,0x3a72e5f7,0x7592d28c,0x00413bcc,0x963c35cf,0x7d9b11d3,0xb90a46ed,0x77623bcf,0xdcdd2a50,0xdeef273b,0x0601846e,0x4a741f9b,0x0ec6e929,0x33b89e51,0x8b7f22cd,0xcb02319f +.long 0x084bae24,0xbbe1500d,0x343d2693,0x2f0ae8d7,0x7cdef811,0xacffb5f2,0x263fb94f,0xaa0c030a,0xa0f442de,0x6eef0d61,0x27b139d3,0xf92e1817,0x0ad8bc28,0x1ae6deb7,0xc0514130,0xa89e38dc +.long 0xd2fdca23,0x81eeb865,0xcc8ef895,0x5a15ee08,0x01905614,0x768fa10a,0x880ee19b,0xeff5b8ef,0xcb1c8a0e,0xf0c0cabb,0xb8c838f9,0x2e1ee9cd,0x8a4a14c0,0x0587d8b8,0x2ff698e5,0xf6f27896 +.long 0x89ee6256,0xed38ef1c,0x6b353b45,0xf44ee1fe,0x70e903b3,0x9115c0c7,0x818f31df,0xc78ec0a1,0xb7dccbc6,0x6c003324,0x163bbc25,0xd96dd1f3,0x5cedd805,0x33aa82dd,0x7f7eb2f1,0x123aae4f +.long 0xa26262cd,0x1723fcf5,0x0060ebd5,0x1f7f4d5d,0xb2eaa3af,0xf19c5c01,0x9790accf,0x2ccb9b14,0x52324aa6,0x1f9c1cad,0x7247df54,0x63200526,0xbac96f82,0x5732fe42,0x01a1c384,0x52fe771f +.long 0xb1001684,0x546ca13d,0xa1709f75,0xb56b4eee,0xd5db8672,0x266545a9,0x1e8f3cfb,0xed971c90,0xe3a07b29,0x4e7d8691,0xe4b696b9,0x7570d9ec,0x7bc7e9ae,0xdc5fa067,0xc82c4844,0x68b44caf +.long 0xbf44da80,0x519d34b3,0x5ab32e66,0x283834f9,0x6278a000,0x6e608797,0x627312f6,0x1e62960e,0xe6901c55,0x9b87b27b,0x24fdbc1f,0x80e78538,0x2facc27d,0xbbbc0951,0xac143b5a,0x06394239 +.long 0x376c1944,0x35bb4a40,0x63da1511,0x7cb62694,0xb7148a3b,0xafd29161,0x4e2ea2ee,0xa6f9d9ed,0x880dd212,0x15dc2ca2,0xa61139a9,0x903c3813,0x6c0f8785,0x2aa7b46d,0x901c60ff,0x36ce2871 +.long 0xe10d9c12,0xc683b028,0x032f33d3,0x7573baa2,0x67a31b58,0x87a9b1f6,0xf4ffae12,0xfd3ed11a,0x0cb2748e,0x83dcaa9a,0x5d6fdf16,0x8239f018,0x72753941,0xba67b49c,0xc321cb36,0x2beec455 +.long 0x3f8b84ce,0x88015606,0x8d38c86f,0x76417083,0x598953dd,0x054f1ca7,0x4e8e7429,0xc939e110,0x5a914f2f,0x9b1ac2b3,0xe74b8f9c,0x39e35ed3,0x781b2fb0,0xd0debdb2,0x2d997ba2,0x1585638f +.long 0x9e2fce99,0x9c4b646e,0x1e80857f,0x68a21081,0x3643b52a,0x06d54e44,0x0d8eb843,0xde8d6d63,0x42146a0a,0x70321563,0x5eaa3622,0x8ba826f2,0x86138787,0x227a58bd,0x10281d37,0x43b6c03c +.long 0xb54dde39,0x6326afbb,0xdb6f2d5f,0x744e5e8a,0xcff158e1,0x48b2a99a,0xef87918f,0xa93c8fa0,0xde058c5c,0x2182f956,0x936f9e7a,0x216235d2,0xd2e31e67,0xace0c0db,0xf23ac3e7,0xc96449bf +.long 0x170693bd,0x7e9a2874,0xa45e6335,0xa28e14fd,0x56427344,0x5757f6b3,0xacf8edf9,0x822e4556,0xe6a285cd,0x2b7a6ee2,0xa9df3af0,0x5866f211,0xf845b844,0x40dde2dd,0x110e5e49,0x986c3726 +.long 0xf7172277,0x73680c2a,0x0cccb244,0x57b94f0f,0x2d438ca7,0xbdff7267,0xcf4663fd,0xbad1ce11,0xd8f71cae,0x9813ed9d,0x961fdaa6,0xf43272a6,0xbd6d1637,0xbeff0119,0x30361978,0xfebc4f91 +.long 0x2f41deff,0x02b37a95,0xe63b89b7,0x0e44a59a,0x143ff951,0x673257dc,0xd752baf4,0x19c02205,0xc4b7d692,0x46c23069,0xfd1502ac,0x2e6392c3,0x1b220846,0x6057b1a2,0x0c1b5b63,0xe51ff946 +.long 0x566c5c43,0x6e85cb51,0x3597f046,0xcff9c919,0x4994d94a,0x9354e90c,0x2147927d,0xe0a39332,0x0dc1eb2b,0x8427fac1,0x2ff319fa,0x88cfd8c2,0x01965274,0xe2d4e684,0x67aaa746,0xfa2e067d +.long 0x3e5f9f11,0xb6d92a7f,0xd6cb3b8e,0x9afe153a,0xddf800bd,0x4d1a6dd7,0xcaf17e19,0xf6c13cc0,0x325fc3ee,0x15f6c58e,0xa31dc3b2,0x71095400,0xafa3d3e7,0x168e7c07,0x94c7ae2d,0x3f8417a1 +.long 0x813b230d,0xec234772,0x17344427,0x634d0f5f,0xd77fc56a,0x11548ab1,0xce06af77,0x7fab1750,0x4f7c4f83,0xb62c10a7,0x220a67d9,0xa7d2edc4,0x921209a0,0x1c404170,0xface59f0,0x0b9815a0 +.long 0x319540c3,0x2842589b,0xa283d6f8,0x18490f59,0xdaae9fcb,0xa2731f84,0xc3683ba0,0x3db6d960,0x14611069,0xc85c63bb,0x0788bf05,0xb19436af,0x347460d2,0x905459df,0xe11a7db1,0x73f6e094 +.long 0xb6357f37,0xdc7f938e,0x2bd8aa62,0xc5d00f79,0x2ca979fc,0xc878dcb9,0xeb023a99,0x37e83ed9,0x1560bf3d,0x6b23e273,0x1d0fae61,0x1086e459,0x9a9414bd,0x78248316,0xf0ea9ea1,0x1b956bc0 +.long 0xc31b9c38,0x7b85bb91,0x48ef57b5,0x0c5aa90b,0xaf3bab6f,0xdedeb169,0x2d373685,0xe610ad73,0x02ba8e15,0xf13870df,0x8ca7f771,0x0337edb6,0xb62c036c,0xe4acf747,0xb6b94e81,0xd921d576 +.long 0x2c422f7a,0xdbc86439,0xed348898,0xfb635362,0xc45bfcd1,0x83084668,0x2b315e11,0xc357c9e3,0x5b2e5b8c,0xb173b540,0xe102b9a4,0x7e946931,0x7b0fb199,0x17c890eb,0xd61b662b,0xec225a83 +.long 0xee3c76cb,0xf306a3c8,0xd32a1f6e,0x3cf11623,0x6863e956,0xe6d5ab64,0x5c005c26,0x3b8a4cbe,0x9ce6bb27,0xdcd529a5,0x04d4b16f,0xc4afaa52,0x7923798d,0xb0624a26,0x6b307fab,0x85e56df6 +.long 0x2bf29698,0x0281893c,0xd7ce7603,0x91fc19a4,0xad9a558f,0x75a5dca3,0x4d50bf77,0x40ceb3fa,0xbc9ba369,0x1baf6060,0x597888c2,0x927e1037,0x86a34c07,0xd936bf19,0xc34ae980,0xd4cf10c1 +.long 0x859dd614,0x3a3e5334,0x18d0c8ee,0x9c475b5b,0x07cd51d5,0x63080d1f,0xb88b4326,0xc9c0d0a6,0xc234296f,0x1ac98691,0x94887fb6,0x2a0a83a4,0x0cea9cf2,0x56511427,0xa24802f5,0x5230a6e8 +.long 0x72e3d5c1,0xf7a2bf0f,0x4f21439e,0x37717446,0x9ce30334,0xfedcbf25,0x7ce202f9,0xe0030a78,0x1202e9ca,0x6f2d9ebf,0x75e6e591,0xe79dde6c,0xf1dac4f8,0xf52072af,0xbb9b404d,0x6c8d087e +.long 0xbce913af,0xad0fc73d,0x458a07cb,0x909e587b,0xd4f00c8a,0x1300da84,0xb54466ac,0x425cd048,0x90e9d8bf,0xb59cb9be,0x3e431b0e,0x991616db,0x531aecff,0xd3aa117a,0x59f4dc3b,0x91af92d3 +.long 0xe93fda29,0x9b1ec292,0xe97d91bc,0x76bb6c17,0xaface1e6,0x7509d95f,0xbe855ae3,0x3653fe47,0x0f680e75,0x73180b28,0xeeb6c26c,0x75eefd1b,0xb66d4236,0xa4cdf29f,0x6b5821d8,0x2d70a997 +.long 0x20445c36,0x7a3ee207,0x59877174,0x71d1ac82,0x949f73e9,0x0fc539f7,0x982e3081,0xd05cf3d7,0x7b1c7129,0x8758e20b,0x569e61f2,0xffadcc20,0x59544c2d,0xb05d3a2f,0x9fff5e53,0xbe16f5c1 +.long 0xaad58135,0x73cf65b8,0x037aa5be,0x622c2119,0x646fd6a0,0x79373b3f,0x0d3978cf,0x0e029db5,0x94fba037,0x8bdfc437,0x620797a6,0xaefbd687,0xbd30d38e,0x3fa5382b,0x585d7464,0x7627cfbf +.long 0x4e4ca463,0xb2330fef,0x3566cc63,0xbcef7287,0xcf780900,0xd161d2ca,0x5b54827d,0x135dc539,0x27bf1bc6,0x638f052e,0x07dfa06c,0x10a224f0,0x6d3321da,0xe973586d,0x26152c8f,0x8b0c5738 +.long 0x34606074,0x07ef4f2a,0xa0f7047a,0x80fe7fe8,0xe1a0e306,0x3d1a8152,0x88da5222,0x32cf43d8,0x5f02ffe6,0xbf89a95f,0x806ad3ea,0x3d9eb9a4,0x79c8e55e,0x012c17bb,0x99c81dac,0xfdcd1a74 +.long 0xb9556098,0x7043178b,0x801c3886,0x4090a1df,0x9b67b912,0x759800ff,0x232620c8,0x3e5c0304,0x70dceeca,0x4b9d3c4b,0x181f648e,0xbb2d3c15,0x6e33345c,0xf981d837,0x0cf2297a,0xb626289b +.long 0x8baebdcf,0x766ac659,0x75df01e5,0x1a28ae09,0x375876d8,0xb71283da,0x607b9800,0x4865a96d,0x237936b2,0x25dd1bcd,0x60417494,0x332f4f4b,0x370a2147,0xd0923d68,0xdc842203,0x497f5dfb +.long 0x32be5e0f,0x9dc74cbd,0x17a01375,0x7475bcb7,0x50d872b1,0x438477c9,0xffe1d63d,0xcec67879,0xd8578c70,0x9b006014,0x78bb6b8b,0xc9ad99a8,0x11fb3806,0x6799008e,0xcd44cab3,0xcfe81435 +.long 0x2f4fb344,0xa2ee1582,0x483fa6eb,0xb8823450,0x652c7749,0x622d323d,0xbeb0a15b,0xd8474a98,0x5d1c00d0,0xe43c154d,0x0e3e7aac,0x7fd581d9,0x2525ddf8,0x2b44c619,0xb8ae9739,0x67a033eb +.long 0x9ef2d2e4,0x113ffec1,0xd5a0ea7f,0x1bf6767e,0x03714c0a,0x57fff75e,0x0a23e9ee,0xa23c422e,0x540f83af,0xdd5f6b2d,0x55ea46a7,0xc2c2c27e,0x672a1208,0xeb6b4246,0xae634f7a,0xd13599f7 +.long 0xd7b32c6e,0xcf914b5c,0xeaf61814,0x61a5a640,0x208a1bbb,0x8dc3df8b,0xb6d79aa5,0xef627fd6,0xc4c86bc8,0x44232ffc,0x061539fe,0xe6f9231b,0x958b9533,0x1d04f25a,0x49e8c885,0x180cf934 +.long 0x9884aaf7,0x89689595,0x07b348a6,0xb1959be3,0x3c147c87,0x96250e57,0xdd0c61f8,0xae0efb3a,0xca8c325e,0xed00745e,0xecff3f70,0x3c911696,0x319ad41d,0x73acbc65,0xf0b1c7ef,0x7b01a020 +.long 0x63a1483f,0xea32b293,0x7a248f96,0x89eabe71,0x343157e5,0x9c6231d3,0xdf3c546d,0x93a375e5,0x6a2afe69,0xe76e9343,0xe166c88e,0xc4f89100,0x4f872093,0x248efd0d,0x8fe0ea61,0xae0eb3ea +.long 0x9d79046e,0xaf89790d,0x6cee0976,0x4d650f2d,0x43071eca,0xa3935d9a,0x283b0bfe,0x66fcd2c9,0x696605f1,0x0e665eb5,0xa54cd38d,0xe77e5d07,0x43d950cf,0x90ee050a,0xd32e69b5,0x86ddebda +.long 0xfddf7415,0x6ad94a3d,0x3f6e8d5a,0xf7fa1309,0xe9957f75,0xc4831d1d,0xd5817447,0x7de28501,0x9e2aeb6b,0x6f1d7078,0xf67a53c2,0xba2b9ff4,0xdf9defc3,0x36963767,0x0d38022c,0x479deed3 +.long 0x3a8631e8,0xd2edb89b,0x7a213746,0x8de855de,0xb00c5f11,0xb2056cb7,0x2c9b85e4,0xdeaefbd0,0xd150892d,0x03f39a8d,0x218b7985,0x37b84686,0xb7375f1a,0x36296dd8,0xb78e898e,0x472cd4b1 +.long 0xe9f05de9,0x15dff651,0x2ce98ba9,0xd4045069,0x9b38024c,0x8466a7ae,0xe5a6b5ef,0xb910e700,0xb3aa8f0d,0xae1c56ea,0x7eee74a6,0xbab2a507,0x4b4c4620,0x0dca11e2,0x4c47d1f4,0xfd896e2e +.long 0x308fbd93,0xeb45ae53,0x02c36fda,0x46cd5a2e,0xbaa48385,0x6a3d4e90,0x9dbe9960,0xdd55e62e,0x2a81ede7,0xa1406aa0,0xf9274ea7,0x6860dd14,0x80414f86,0xcfdcb0c2,0x22f94327,0xff410b10 +.long 0x49ad467b,0x5a33cc38,0x0a7335f1,0xefb48b6c,0xb153a360,0x14fb54a4,0xb52469cc,0x604aa9d2,0x754e48e9,0x5e9dc486,0x37471e8e,0x693cb455,0x8d3b37b6,0xfb2fd7cd,0xcf09ff07,0x63345e16 +.long 0x23a5d896,0x9910ba6b,0x7fe4364e,0x1fe19e35,0x9a33c677,0x6e1da8c3,0x29fd9fd0,0x15b4488b,0x1a1f22bf,0x1f439254,0xab8163e8,0x920a8a70,0x07e5658e,0x3fd1b249,0xb6ec839b,0xf2c4f79c +.long 0x4aa38d1b,0x1abbc3d0,0xb5d9510e,0x3b0db35c,0x3e60dec0,0x1754ac78,0xea099b33,0x53272fd7,0x07a8e107,0x5fb0494f,0x6a8191fa,0x4a89e137,0x3c4ad544,0xa113b7f6,0x6cb9897b,0x88a2e909 +.long 0xb44a3f84,0x17d55de3,0x17c6c690,0xacb2f344,0x10232390,0x32088168,0x6c733bf7,0xf2e8a61f,0x9c2d7652,0xa774aab6,0xed95c5bc,0xfb5307e3,0x4981f110,0xa05c73c2,0xa39458c9,0x1baae31c +.long 0xcbea62e7,0x1def185b,0xeaf63059,0xe8ac9eae,0x9921851c,0x098a8cfd,0x3abe2f5b,0xd959c3f1,0x20e40ae5,0xa4f19525,0x07a24aa1,0x320789e3,0x7392b2bc,0x259e6927,0x1918668b,0x58f6c667 +.long 0xc55d2d8b,0xce1db2bb,0xf4f6ca56,0x41d58bb7,0x8f877614,0x7650b680,0xf4c349ed,0x905e16ba,0xf661acac,0xed415140,0xcb2270af,0x3b8784f0,0x8a402cba,0x3bc280ac,0x0937921a,0xd53f7146 +.long 0xe5681e83,0xc03c8ee5,0xf6ac9e4a,0x62126105,0x936b1a38,0x9503a53f,0x782fecbd,0x3d45e2d4,0x76e8ae98,0x69a5c439,0xbfb4b00e,0xb53b2eeb,0x72386c89,0xf1674712,0x4268bce4,0x30ca34a2 +.long 0x78341730,0x7f1ed86c,0xb525e248,0x8ef5beb8,0xb74fbf38,0xbbc489fd,0x91a0b382,0x38a92a0e,0x22433ccf,0x7a77ba3f,0xa29f05a9,0xde8362d6,0x61189afc,0x7f6a30ea,0x59ef114f,0x693b5505 +.long 0xcd1797a1,0x50266bc0,0xf4b7af2d,0xea17b47e,0x3df9483e,0xd6c4025c,0xa37b18c9,0x8cbb9d9f,0x4d8424cf,0x91cbfd9c,0xab1c3506,0xdb7048f1,0x028206a3,0x9eaf641f,0x25bdf6ce,0xf986f3f9 +.long 0x224c08dc,0x262143b5,0x81b50c91,0x2bbb09b4,0xaca8c84f,0xc16ed709,0xb2850ca8,0xa6210d9d,0x09cb54d6,0x6d8df67a,0x500919a4,0x91eef6e0,0x0f132857,0x90f61381,0xf8d5028b,0x9acede47 +.long 0x90b771c3,0x844d1b71,0xba6426be,0x563b71e4,0xbdb802ff,0x2efa2e83,0xab5b4a41,0x3410cbab,0x30da84dd,0x555b2d26,0xee1cc29a,0xd0711ae9,0x2f547792,0xcf3e8c60,0xdc678b35,0x03d7d5de +.long 0xced806b8,0x071a2fa8,0x697f1478,0x222e6134,0xabfcdbbf,0xdc16fd5d,0x121b53b8,0x44912ebf,0x2496c27c,0xac943674,0x1ffc26b0,0x8ea3176c,0x13debf2c,0xb6e224ac,0xf372a832,0x524cc235 +.long 0x9f6f1b18,0xd706e1d8,0x44cce35b,0x2552f005,0xa88e31fc,0x8c8326c2,0xf9552047,0xb5468b2c,0x3ff90f2b,0xce683e88,0x2f0a5423,0x77947bdf,0xed56e328,0xd0a1b28b,0xc20134ac,0xaee35253 +.long 0x3567962f,0x7e98367d,0x8188bffb,0x379ed61f,0xfaf130a1,0x73bba348,0x904ed734,0x6c1f75e1,0x3b4a79fc,0x18956642,0x54ef4493,0xf20bc83d,0x9111eca1,0x836d425d,0x009a8dcf,0xe5b5c318 +.long 0x13221bc5,0x3360b25d,0x6b3eeaf7,0x707baad2,0x743a95a1,0xd7279ed8,0x969e809f,0x7450a875,0xe5d0338f,0x32b6bd53,0x2b883bbc,0x1e77f7af,0x1063ecd0,0x90da12cc,0xc315be47,0xe2697b58 +.long 0xda85d534,0x2771a5bd,0xff980eea,0x53e78c1f,0x900385e7,0xadf1cf84,0xc9387b62,0x7d3b14f6,0xcb8f2bd2,0x170e74b0,0x827fa993,0x2d50b486,0xf6f32bab,0xcdbe8c9a,0xc3b93ab8,0x55e906b0 +.long 0x8fe280d1,0x747f22fc,0xb2e114ab,0xcd8e0de5,0xe10b68b0,0x5ab7dbeb,0xa480d4b2,0x9dc63a9c,0x4be1495f,0x78d4bc3b,0x9359122d,0x25eb3db8,0x0809cbdc,0x3f8ac05b,0xd37c702f,0xbf4187bb +.long 0x1416a6a5,0x84cea069,0x43ef881c,0x8f860c79,0x38038a5d,0x41311f8a,0xfc612067,0xe78c2ec0,0x5ad73581,0x494d2e81,0x59604097,0xb4cc9e00,0xf3612cba,0xff558aec,0x9e36c39e,0x35beef7a +.long 0xdbcf41b9,0x1845c7cf,0xaea997c0,0x5703662a,0xe402f6d8,0x8b925afe,0x4dd72162,0xd0a1b1ae,0x03c41c4b,0x9f47b375,0x0391d042,0xa023829b,0x503b8b0a,0x5f5045c3,0x98c010e5,0x123c2688 +.long 0x36ba06ee,0x324ec0cc,0x3dd2cc0c,0xface3115,0xf333e91f,0xb364f3be,0x28e832b0,0xef8aff73,0x2d05841b,0x1e9bad04,0x356a21e2,0x42f0e3df,0x4add627e,0xa3270bcb,0xd322e711,0xb09a8158 +.long 0x0fee104a,0x86e326a1,0x3703f65d,0xad7788f8,0x47bc4833,0x7e765430,0x2b9b893a,0x6cee582b,0xe8f55a7b,0x9cd2a167,0xd9e4190d,0xefbee3c6,0xd40c2e9d,0x33ee7185,0xa380b548,0x844cc9c5 +.long 0x66926e04,0x323f8ecd,0x8110c1ba,0x0001e38f,0xfc6a7f07,0x8dbcac12,0x0cec0827,0xd65e1d58,0xbe76ca2d,0xd2cd4141,0xe892f33a,0x7895cf5c,0x367139d2,0x956d230d,0xd012c4c1,0xa91abd3e +.long 0x87eb36bf,0x34fa4883,0x914b8fb4,0xc5f07102,0xadb9c95f,0x90f0e579,0x28888195,0xfe6ea8cb,0xedfa9284,0x7b9b5065,0x2b8c8d65,0x6c510bd2,0xcbe8aafd,0xd7b8ebef,0x96b1da07,0xedb3af98 +.long 0x6295d426,0x28ff779d,0x3fa3ad7b,0x0c4f6ac7,0x8b8e2604,0xec44d054,0x8b0050e1,0x9b32a66d,0xf0476ce2,0x1f943366,0xa602c7b4,0x7554d953,0x524f2809,0xbe35aca6,0xfd4edbea,0xb6881229 +.long 0x508efb63,0xe8cd0c8f,0x6abcefc7,0x9eb5b5c8,0xb441ab4f,0xf5621f5f,0xb76a2b22,0x79e6c046,0xe37a1f69,0x74a4792c,0x03542b60,0xcbd252cb,0xb3c20bd3,0x785f65d5,0x4fabc60c,0x8dea6143 +.long 0xde673629,0x45e21446,0x703c2d21,0x57f7aa1e,0x98c868c7,0xa0e99b7f,0x8b641676,0x4e42f66d,0x91077896,0x602884dc,0xc2c9885b,0xa0d690cf,0x3b9a5187,0xfeb4da33,0x153c87ee,0x5f789598 +.long 0x52b16dba,0x2192dd47,0x3524c1b1,0xdeefc0e6,0xe4383693,0x465ea76e,0x361b8d98,0x79401711,0xf21a15cb,0xa5f9ace9,0xefee9aeb,0x73d26163,0xe677016c,0xcca844b3,0x57eaee06,0x6c122b07 +.long 0x15f09690,0xb782dce7,0x2dfc0fc9,0x508b9b12,0x65d89fc6,0x9015ab4b,0xd6d5bb0f,0x5e79dab7,0x6c775aa2,0x64f021f0,0x37c7eca1,0xdf09d8cc,0xef2fa506,0x9a761367,0x5b81eec6,0xed4ca476 +.long 0x10bbb8b5,0x262ede36,0x0641ada3,0x0737ce83,0xe9831ccc,0x4c94288a,0x8065e635,0x487fc1ce,0xb8bb3659,0xb13d7ab3,0x855e4120,0xdea5df3e,0x85eb0244,0xb9a18573,0xa7cfe0a3,0x1a1b8ea3 +.long 0x67b0867c,0x3b837119,0x9d364520,0x8d5e0d08,0xd930f0e3,0x52dccc1e,0xbf20bbaf,0xefbbcec7,0x0263ad10,0x99cffcab,0xfcd18f8a,0xd8199e6d,0xe9f10617,0x64e2773f,0x08704848,0x0079e8e1 +.long 0x8a342283,0x1169989f,0xa83012e6,0x8097799c,0x8a6a9001,0xece966cb,0x072ac7fc,0x93b3afef,0x2db3d5ba,0xe6893a2a,0x89bf4fdc,0x263dc462,0xe0396673,0x8852dfc9,0x3af362b6,0x7ac70895 +.long 0x5c2f342b,0xbb9cce4d,0xb52d7aae,0xbf80907a,0x2161bcd0,0x97f3d3cd,0x0962744d,0xb25b0834,0x6c3a1dda,0xc5b18ea5,0x06c92317,0xfe4ec7eb,0xad1c4afe,0xb787b890,0x0ede801a,0xdccd9a92 +.long 0xdb58da1f,0x9ac6ddda,0xb8cae6ee,0x22bbc12f,0x815c4a43,0xc6f8bced,0xf96480c7,0x8105a92c,0x7a859d51,0x0dc3dbf3,0x3041196b,0xe3ec7ce6,0x0d1067c9,0xd9f64b25,0x3d1f8dd8,0xf2321321 +.long 0x76497ee8,0x8b5c619c,0xc717370e,0x5d2b0ac6,0x4fcf68e1,0x98204cb6,0x62bc6792,0x0bdec211,0xa63b1011,0x6973ccef,0xe0de1ac5,0xf9e3fa97,0x3d0e0c8b,0x5efb693e,0xd2d4fcb4,0x037248e9 +.long 0x1ec34f9e,0x80802dc9,0x33810603,0xd8772d35,0x530cb4f3,0x3f06d66c,0xc475c129,0x7be5ed0d,0x31e82b10,0xcb9e3c19,0xc9ff6b4c,0xc63d2857,0x92a1b45e,0xb92118c6,0x7285bbca,0x0aec4414 +.long 0x1e29a3ef,0xfc189ae7,0x4c93302e,0xcbe906f0,0xceaae10e,0xd0107914,0xb68e19f8,0xb7a23f34,0xefd2119d,0xe9d875c2,0xfcadc9c8,0x03198c6e,0x4da17113,0x65591bf6,0x3d443038,0x3cf0bbf8 +.long 0x2b724759,0xae485bb7,0xb2d4c63a,0x945353e1,0xde7d6f2c,0x82159d07,0x4ec5b109,0x389caef3,0xdb65ef14,0x4a8ebb53,0xdd99de43,0x2dc2cb7e,0x83f2405f,0x816fa3ed,0xc14208a3,0x73429bb9 +.long 0xb01e6e27,0xb618d590,0xe180b2dc,0x047e2ccd,0x04aea4a9,0xd1b299b5,0x9fa403a4,0x412c9e1e,0x79407552,0x88d28a36,0xf332b8e3,0x49c50136,0xe668de19,0x3a1b6fcc,0x75122b97,0x178851bc +.long 0xfb85fa4c,0xb1e13752,0x383c8ce9,0xd61257ce,0xd2f74dae,0xd43da670,0xbf846bbb,0xa35aa23f,0x4421fc83,0x5e74235d,0xc363473b,0xf6df8ee0,0x3c4aa158,0x34d7f52a,0x9bc6d22e,0x50d05aab +.long 0xa64785f4,0x8c56e735,0x5f29cd07,0xbc56637b,0x3ee35067,0x53b2bb80,0xdc919270,0x50235a0f,0xf2c4aa65,0x191ab6d8,0x8396023b,0xc3475831,0xf0f805ba,0x80400ba5,0x5ec0f80f,0x8881065b +.long 0xcc1b5e83,0xc370e522,0x860b8bfb,0xde2d4ad1,0x67b256df,0xad364df0,0xe0138997,0x8f12502e,0x7783920a,0x503fa0dc,0xc0bc866a,0xe80014ad,0xd3064ba6,0x3f89b744,0xcba5dba5,0x03511dcd +.long 0x95a7b1a2,0x197dd46d,0x3c6341fb,0x9c4e7ad6,0x484c2ece,0x426eca29,0xde7f4f8a,0x9211e489,0xc78ef1f4,0x14997f6e,0x06574586,0x2b2c0910,0x1c3eede8,0x17286a6e,0x0f60e018,0x25f92e47 +.long 0x31890a36,0x805c5646,0x57feea5b,0x703ef600,0xaf3c3030,0x389f747c,0x54dd3739,0xe0e5daeb,0xc9c9f155,0xfe24a4c3,0xb5393962,0x7e4bf176,0xaf20bf29,0x37183de2,0xf95a8c3b,0x4a1bd7b5 +.long 0x46191d3d,0xa83b9699,0x7b87f257,0x281fc8dd,0x54107588,0xb18e2c13,0x9b2bafe8,0x6372def7,0x0d8972ca,0xdaf4bb48,0x56167a3f,0x3f2dd4b7,0x84310cf4,0x1eace32d,0xe42700aa,0xe3bcefaf +.long 0xd785e73d,0x5fe5691e,0x2ea60467,0xa5db5ab6,0xdfc6514a,0x02e23d41,0xe03c3665,0x35e8048e,0x1adaa0f8,0x3f8b118f,0x84ce1a5a,0x28ec3b45,0x2c6646b8,0xe8cacc6e,0xdbd0e40f,0x1343d185 +.long 0xcaaa358c,0xe5d7f844,0x9924182a,0x1a1db7e4,0x9c875d9a,0xd64cd42d,0x042eeec8,0xb37b515f,0x7b165fbe,0x4d4dd409,0xe206eff3,0xfc322ed9,0x59b7e17e,0x7dee4102,0x8236ca00,0x55a481c0 +.long 0xc23fc975,0x8c885312,0x05d6297b,0x15715806,0xf78edd39,0xa078868e,0x03c45e52,0x956b31e0,0xff7b33a6,0x470275d5,0x0c7e673f,0xc8d5dc3a,0x7e2f2598,0x419227b4,0x4c14a975,0x8b37b634 +.long 0x8b11888c,0xd0667ed6,0x803e25dc,0x5e0e8c3e,0xb987a24a,0x34e5d0dc,0xae920323,0x9f40ac3b,0x34e0f63a,0x5463de95,0x6b6328f9,0xa128bf92,0xda64f1b7,0x491ccd7c,0xc47bde35,0x7ef1ec27 +.long 0xa36a2737,0xa857240f,0x63621bc1,0x35dc1366,0xd4fb6897,0x7a3a6453,0xc929319d,0x80f1a439,0xf8cb0ba0,0xfc18274b,0x8078c5eb,0xb0b53766,0x1e01d0ef,0xfb0d4924,0x372ab09c,0x50d7c67d +.long 0x3aeac968,0xb4e370af,0xc4b63266,0xe4f7fee9,0xe3ac5664,0xb4acd4c2,0xceb38cbf,0xf8910bd2,0xc9c0726e,0x1c3ae50c,0xd97b40bf,0x15309569,0xfd5a5a1b,0x70884b7f,0xef8314cd,0x3890896a +.long 0xa5618c93,0x58e1515c,0x77d942d1,0xe665432b,0xb6f767a8,0xb32181bf,0x3a604110,0x753794e8,0xe8c0dbcc,0x09afeb7c,0x598673a3,0x31e02613,0x7d46db00,0x5d98e557,0x9d985b28,0xfc21fb8c +.long 0xb0843e0b,0xc9040116,0x69b04531,0x53b1b3a8,0x85d7d830,0xdd1649f0,0xcb7427e8,0xbb3bcc87,0xc93dce83,0x77261100,0xa1922a2a,0x7e79da61,0xf3149ce8,0x587a2b02,0xde92ec83,0x147e1384 +.long 0xaf077f30,0x484c83d3,0x0658b53a,0xea78f844,0x027aec53,0x912076c2,0x93c8177d,0xf34714e3,0xc2376c84,0x37ef5d15,0x3d1aa783,0x8315b659,0xef852a90,0x3a75c484,0x16086bd4,0x0ba0c58a +.long 0x529a6d48,0x29688d7a,0xc2f19203,0x9c7f250d,0x682e2df9,0x123042fb,0xad8121bc,0x2b7587e7,0xe0182a65,0x30fc0233,0xe3e1128a,0xb82ecf87,0x93fb098f,0x71682861,0x85e9e6a7,0x043e21ae +.long 0x66c834ea,0xab5b49d6,0x47414287,0x3be43e18,0x219a2a47,0xf40fb859,0xcc58df3c,0x0e6559e9,0x0c6615b4,0xfe1dfe8e,0x56459d70,0x14abc8fd,0x05de0386,0x7be0fa8e,0xe9035c7c,0x8e63ef68 +.long 0x53b31e91,0x116401b4,0x4436b4d8,0x0cba7ad4,0x107afd66,0x9151f9a0,0x1f0ee4c4,0xafaca8d0,0x9ee9761c,0x75fe5c1d,0xf0c0588f,0x3497a16b,0x0304804c,0x3ee2bebd,0xc2c990b9,0xa8fb9a60 +.long 0x39251114,0xd14d32fe,0xcac73366,0x36bf25bc,0xdba7495c,0xc9562c66,0x46ad348b,0x324d301b,0xd670407e,0x9f46620c,0xe3733a01,0x0ea8d4f1,0xb0c324e0,0xd396d532,0x03c317cd,0x5b211a0e +.long 0x5ffe7b37,0x090d7d20,0x1747d2da,0x3b7f3efb,0xb54fc519,0xa2cb525f,0xf66a971e,0x6e220932,0xb486d440,0xddc160df,0x3fe13465,0x7fcfec46,0x76e4c151,0x83da7e4e,0xd8d302b5,0xd6fa48a1 +.long 0x5872cd88,0xc6304f26,0x278b90a1,0x806c1d3c,0xcaf0bc1c,0x3553e725,0xbb9d8d5c,0xff59e603,0x7a0b85dd,0xa4550f32,0x93ecc217,0xdec5720a,0x69d62213,0x0b88b741,0x5b365955,0x7212f245 +.long 0xb5cae787,0x20764111,0x1dfd3124,0x13cb7f58,0x1175aefb,0x2dca77da,0xffaae775,0xeb75466b,0xdb6cff32,0x74d76f3b,0x61fcda9a,0x7440f37a,0xb525028b,0x1bb3ac92,0xa1975f29,0x20fbf8f7 +.long 0xdf83097f,0x982692e1,0x554b0800,0x28738f6c,0xa2ce2f2f,0xdc703717,0x40814194,0x7913b93c,0x1fe89636,0x04924593,0xf78834a6,0x7b98443f,0x5114a5a1,0x11c6ab01,0xffba5f4c,0x60deb383 +.long 0x01a982e6,0x4caa54c6,0x3491cd26,0x1dd35e11,0x7cbd6b05,0x973c315f,0x52494724,0xcab00775,0x6565e15a,0x04659b1f,0x8c8fb026,0xbf30f529,0xa8a0de37,0xfc21641b,0xfa5e5114,0xe9c7a366 +.long 0x52f03ad8,0xdb849ca5,0x024e35c0,0xc7e8dbe9,0xcfc3c789,0xa1a2bbac,0x9c26f262,0xbf733e7d,0xb8444823,0x882ffbf5,0x6bf8483b,0xb7224e88,0x65bef640,0x53023b8b,0xd4d5f8cd,0xaabfec91 +.long 0x079ea1bd,0xa40e1510,0xd05d5d26,0x1ad9addc,0x13e68d4f,0xdb3f2eab,0x640f803f,0x1cff1ae2,0xd4cee117,0xe0e7b749,0x4036d909,0x8e9f275b,0x8f4d4c38,0xce34e31d,0xd75130fc,0x22b37f69 +.long 0xb4014604,0x83e0f1fd,0x89415078,0xa8ce9919,0x41792efe,0x82375b75,0x97d4515b,0x4f59bf5c,0x923a277d,0xac4f324f,0x650f3406,0xd9bc9b7d,0x8a39bc51,0xc6fa87d1,0x5ccc108f,0x82588530 +.long 0x82e4c634,0x5ced3c9f,0x3a4464f8,0x8efb8314,0x7a1dca25,0xe706381b,0x5a2a412b,0x6cd15a3c,0xbfcd8fb5,0x9347a8fd,0x6e54cd22,0x31db2eef,0xf8d8932f,0xc4aeb11e,0x344411af,0x11e7c1ed +.long 0xdc9a151e,0x2653050c,0x3bb0a859,0x9edbfc08,0xfd5691e7,0x926c81c7,0x6f39019a,0x9c1b2342,0x7f8474b9,0x64a81c8b,0x01761819,0x90657c07,0x55e0375a,0x390b3331,0xb6ebc47d,0xc676c626 +.long 0xb7d6dee8,0x51623247,0x79659313,0x0948d927,0xe9ab35ed,0x99700161,0x8ddde408,0x06cc32b4,0x061ef338,0x6f2fd664,0xc202e9ed,0x1606fa02,0x929ba99b,0x55388bc1,0x1e81df69,0xc4428c5e +.long 0xf91b0b2a,0xce2028ae,0xf03dfd3f,0xce870a23,0x0affe8ed,0x66ec2c87,0x284d0c00,0xb205fb46,0x44cefa48,0xbf5dffe7,0xa19876d7,0xb6fc37a8,0x08b72863,0xbecfa84c,0x2576374f,0xd7205ff5 +.long 0x8887de41,0x80330d32,0x869ea534,0x5de0df0c,0x3c56ea17,0x13f42753,0x452b1a78,0xeb1f6069,0xe30ea15c,0x50474396,0xc1494125,0x575816a1,0xfe6bb38f,0xbe1ce55b,0x96ae30f7,0xb901a948 +.long 0xd8fc3548,0xe5af0f08,0xd73bfd08,0x5010b5d0,0x53fe655a,0x993d2880,0x1c1309fd,0x99f2630b,0xb4e3b76f,0xd8677baf,0xb840784b,0x14e51ddc,0xbf0092ce,0x326c750c,0xf528320f,0xc83d306b +.long 0x77d4715c,0xc4456715,0x6b703235,0xd30019f9,0xd669e986,0x207ccb2e,0xf6dbfc28,0x57c824af,0xd8f92a23,0xf0eb532f,0x9bb98fd2,0x4a557fd4,0xc1e6199a,0xa57acea7,0x8b94b1ed,0x0c663820 +.long 0xf83a9266,0x9b42be8f,0x0101bd45,0xc7741c97,0x07bd9ceb,0x95770c11,0x8b2e0744,0x1f50250a,0x1477b654,0xf762eec8,0x15efe59a,0xc65b900e,0x9546a897,0x88c96148,0xc30b4d7c,0x7e8025b3 +.long 0x12045cf9,0xae4065ef,0x9ccce8bd,0x6fcb2caf,0xf2cf6525,0x1fa0ba4e,0xcb72c312,0xf683125d,0xe312410e,0xa01da4ea,0x6cd8e830,0x67e28677,0x98fb3f07,0xabd95752,0xeef649a5,0x05f11e11 +.long 0x9d3472c2,0xba47faef,0xc77d1345,0x3adff697,0xdd15afee,0x4761fa04,0xb9e69462,0x64f1f61a,0x9bfb9093,0xfa691fab,0xa1133dfe,0x3df8ae8f,0x58cc710d,0xcd5f8967,0x16c7fe79,0xfbb88d50 +.long 0xe88c50d1,0x8e011b4c,0xa8771c4f,0x7532e807,0xe2278ee4,0x64c78a48,0x3845072a,0x0b283e83,0x49e69274,0x98a6f291,0x1868b21c,0xb96e9668,0xb1a8908e,0x38f0adc2,0x1feb829d,0x90afcff7 +.long 0x210b0856,0x9915a383,0xdef04889,0xa5a80602,0x7c64d509,0x800e9af9,0xb8996f6f,0x81382d0b,0x81927e27,0x490eba53,0x4af50182,0x46c63b32,0xd3ad62ce,0x784c5fd9,0xf8ae8736,0xe4fa1870 +.long 0xd7466b25,0x4ec9d0bc,0xdb235c65,0x84ddbe1a,0x163c1688,0x5e2645ee,0x00eba747,0x570bd00e,0x128bfa0f,0xfa51b629,0x6c1d3b68,0x92fce1bd,0xb66778b1,0x3e7361dc,0x5561d2bb,0x9c7d249d +.long 0x0bbc6229,0xa40b28bf,0xdfd91497,0x1c83c05e,0xf083df05,0x5f9f5154,0xeee66c9d,0xbac38b3c,0xec0dfcfd,0xf71db7e3,0x8b0a8416,0xf2ecda8e,0x7812aa66,0x52fddd86,0x4e6f4272,0x2896ef10 +.long 0x0fe9a745,0xff27186a,0x49ca70db,0x08249fcd,0x441cac49,0x7425a2e6,0xece5ff57,0xf4a0885a,0x7d7ead58,0x6e2cb731,0x1898d104,0xf96cf7d6,0x4f2c9a89,0xafe67c9d,0x1c7bf5bc,0x89895a50 +.long 0x573cecfa,0xdc7cb8e5,0xd15f03e6,0x66497eae,0x3f084420,0x6bc0de69,0xacd532b0,0x323b9b36,0x0115a3c1,0xcfed390a,0x2d65ca0e,0x9414c40b,0x2f530c78,0x641406bd,0x833438f2,0x29369a44 +.long 0x903fa271,0x996884f5,0xb9da921e,0xe6da0fd2,0x5db01e54,0xa6f2f269,0x6876214e,0x1ee3e9bd,0xe27a9497,0xa26e181c,0x8e215e04,0x36d254e4,0x252cabca,0x42f32a6c,0x80b57614,0x99481487 +.long 0x40d9cae1,0x4c4dfe69,0x11a10f09,0x05869580,0x3491b64b,0xca287b57,0x3fd4a53b,0x77862d5d,0x50349126,0xbf94856e,0x71c5268f,0x2be30bd1,0xcbb650a6,0x10393f19,0x778cf9fd,0x639531fe +.long 0xb2935359,0x02556a11,0xaf8c126e,0xda38aa96,0x0960167f,0x47dbe6c2,0x501901cd,0x37bbabb6,0x2c947778,0xb6e979e0,0x7a1a1dc6,0xd69a5175,0x9d9faf0c,0xc3ed5095,0x1d5fa5f0,0x4dd9c096 +.long 0x64f16ea8,0xa0c4304d,0x7e718623,0x8b1cac16,0x7c67f03e,0x0b576546,0xcbd88c01,0x559cf5ad,0x0e2af19a,0x074877bb,0xa1228c92,0x1f717ec1,0x326e8920,0x70bcb800,0x4f312804,0xec6e2c5c +.long 0x3fca4752,0x426aea7d,0x2211f62a,0xf12c0949,0x7be7b6b5,0x24beecd8,0x36d7a27d,0xb77eaf4c,0xfda78fd3,0x154c2781,0x264eeabe,0x848a83b0,0x4ffe2bc4,0x81287ef0,0xb6b6fc2a,0x7b6d88c6 +.long 0xce417d99,0x805fb947,0x8b916cc4,0x4b93dcc3,0x21273323,0x72e65bb3,0x6ea9886e,0xbcc1badd,0x4bc5ee85,0x0e223011,0xc18ee1e4,0xa561be74,0xa6bcf1f1,0x762fd2d4,0x95231489,0x50e6a5a4 +.long 0xa00b500b,0xca96001f,0x5d7dcdf5,0x5c098cfc,0x8c446a85,0xa64e2d2e,0x971f3c62,0xbae9bcf1,0x8435a2c5,0x4ec22683,0x4bad4643,0x8ceaed6c,0xccccf4e3,0xe9f8fb47,0x1ce3b21e,0xbd4f3fa4 +.long 0xa3db3292,0xd79fb110,0xb536c66a,0xe28a37da,0x8e49e6a9,0x279ce87b,0xfdcec8e3,0x70ccfe8d,0x3ba464b2,0x2193e4e0,0xaca9a398,0x0f39d60e,0xf82c12ab,0x7d7932af,0x91e7e0f7,0xd8ff50ed +.long 0xfa28a7e0,0xea961058,0x0bf5ec74,0xc726cf25,0xdb229666,0xe74d55c8,0xa57f5799,0x0bd9abbf,0x4dfc47b3,0x7479ef07,0x0c52f91d,0xd9c65fc3,0x36a8bde2,0x8e0283fe,0x7d4b7280,0xa32a8b5e +.long 0x12e83233,0x6a677c61,0xdcc9bf28,0x0fbb3512,0x0d780f61,0x562e8ea5,0x1dc4e89c,0x0db8b22b,0x89be0144,0x0a6fd1fb,0xca57113b,0x8c77d246,0xff09c91c,0x4639075d,0x5060824c,0x5b47b17f +.long 0x16287b52,0x58aea2b0,0xd0cd8eb0,0xa1343520,0xc5d58573,0x6148b4d0,0x291c68ae,0xdd2b6170,0x1da3b3b7,0xa61b3929,0x08c4ac10,0x5f946d79,0x7217d583,0x4105d4a5,0x25e6de5e,0x5061da3d +.long 0xec1b4991,0x3113940d,0x36f485ae,0xf12195e1,0x731a2ee0,0xa7507fb2,0x6e9e196e,0x95057a8e,0x2e130136,0xa3c2c911,0x33c60d15,0x97dfbb36,0xb300ee2b,0xcaf3c581,0xf4bac8b8,0x77f25d90 +.long 0x6d840cd6,0xdb1c4f98,0xe634288c,0x471d62c0,0xcec8a161,0x8ec2f85e,0xfa6f4ae2,0x41f37cbc,0x4b709985,0x6793a20f,0xefa8985b,0x7a7bd33b,0x938e6446,0x2c6a3fbd,0x2a8d47c1,0x19042619 +.long 0xcc36975f,0x16848667,0x9d5f1dfb,0x02acf168,0x613baa94,0x62d41ad4,0x9f684670,0xb56fbb92,0xe9e40569,0xce610d0d,0x35489fef,0x7b99c65f,0x3df18b97,0x0c88ad1b,0x5d0e9edb,0x81b7d9be +.long 0xc716cc0a,0xd85218c0,0x85691c49,0xf4b5ff90,0xce356ac6,0xa4fd666b,0x4b327a7a,0x17c72895,0xda6be7de,0xf93d5085,0x3301d34e,0xff71530e,0xd8f448e8,0x4cd96442,0x2ed18ffa,0x9283d331 +.long 0x2a849870,0x4d33dd99,0x41576335,0xa716964b,0x179be0e5,0xff5e3a9b,0x83b13632,0x5b9d6b1b,0xa52f313b,0x3b8bd7d4,0x637a4660,0xc9dd95a0,0x0b3e218f,0x30035962,0xc7b28a3c,0xce1481a3 +.long 0x43228d83,0xab41b43a,0x4ad63f99,0x24ae1c30,0x46a51229,0x8e525f1a,0xcd26d2b4,0x14af860f,0x3f714aa1,0xd6baef61,0xeb78795e,0xf51865ad,0xe6a9d694,0xd3e21fce,0x8a37b527,0x82ceb1dd +.size ecp_nistz256_precomputed,.-ecp_nistz256_precomputed +.text + + + +.section .rodata +.align 64 +.Lpoly: +.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001 + + +.LRR: +.quad 0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd + +.LOne: +.long 1,1,1,1,1,1,1,1 +.LTwo: +.long 2,2,2,2,2,2,2,2 +.LThree: +.long 3,3,3,3,3,3,3,3 +.LONE_mont: +.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe + + +.Lord: +.quad 0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000 +.LordK: +.quad 0xccd1c8aaee00bc4f +.previous + +.globl ecp_nistz256_mul_by_2 +.type ecp_nistz256_mul_by_2,@function +.align 64 +ecp_nistz256_mul_by_2: +.cfi_startproc + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-16 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-24 +.Lmul_by_2_body: + + movq 0(%rsi),%r8 + xorq %r13,%r13 + movq 8(%rsi),%r9 + addq %r8,%r8 + movq 16(%rsi),%r10 + adcq %r9,%r9 + movq 24(%rsi),%r11 + leaq .Lpoly(%rip),%rsi + movq %r8,%rax + adcq %r10,%r10 + adcq %r11,%r11 + movq %r9,%rdx + adcq $0,%r13 + + subq 0(%rsi),%r8 + movq %r10,%rcx + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r11,%r12 + sbbq 24(%rsi),%r11 + sbbq $0,%r13 + + cmovcq %rax,%r8 + cmovcq %rdx,%r9 + movq %r8,0(%rdi) + cmovcq %rcx,%r10 + movq %r9,8(%rdi) + cmovcq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + movq 0(%rsp),%r13 +.cfi_restore %r13 + movq 8(%rsp),%r12 +.cfi_restore %r12 + leaq 16(%rsp),%rsp +.cfi_adjust_cfa_offset -16 +.Lmul_by_2_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2 + + + +.globl ecp_nistz256_div_by_2 +.type ecp_nistz256_div_by_2,@function +.align 32 +ecp_nistz256_div_by_2: +.cfi_startproc + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-16 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-24 +.Ldiv_by_2_body: + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq %r8,%rax + movq 24(%rsi),%r11 + leaq .Lpoly(%rip),%rsi + + movq %r9,%rdx + xorq %r13,%r13 + addq 0(%rsi),%r8 + movq %r10,%rcx + adcq 8(%rsi),%r9 + adcq 16(%rsi),%r10 + movq %r11,%r12 + adcq 24(%rsi),%r11 + adcq $0,%r13 + xorq %rsi,%rsi + testq $1,%rax + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + cmovzq %rcx,%r10 + cmovzq %r12,%r11 + cmovzq %rsi,%r13 + + movq %r9,%rax + shrq $1,%r8 + shlq $63,%rax + movq %r10,%rdx + shrq $1,%r9 + orq %rax,%r8 + shlq $63,%rdx + movq %r11,%rcx + shrq $1,%r10 + orq %rdx,%r9 + shlq $63,%rcx + shrq $1,%r11 + shlq $63,%r13 + orq %rcx,%r10 + orq %r13,%r11 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + movq 0(%rsp),%r13 +.cfi_restore %r13 + movq 8(%rsp),%r12 +.cfi_restore %r12 + leaq 16(%rsp),%rsp +.cfi_adjust_cfa_offset -16 +.Ldiv_by_2_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ecp_nistz256_div_by_2,.-ecp_nistz256_div_by_2 + + + +.globl ecp_nistz256_mul_by_3 +.type ecp_nistz256_mul_by_3,@function +.align 32 +ecp_nistz256_mul_by_3: +.cfi_startproc + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-16 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-24 +.Lmul_by_3_body: + + movq 0(%rsi),%r8 + xorq %r13,%r13 + movq 8(%rsi),%r9 + addq %r8,%r8 + movq 16(%rsi),%r10 + adcq %r9,%r9 + movq 24(%rsi),%r11 + movq %r8,%rax + adcq %r10,%r10 + adcq %r11,%r11 + movq %r9,%rdx + adcq $0,%r13 + + subq $-1,%r8 + movq %r10,%rcx + sbbq .Lpoly+8(%rip),%r9 + sbbq $0,%r10 + movq %r11,%r12 + sbbq .Lpoly+24(%rip),%r11 + sbbq $0,%r13 + + cmovcq %rax,%r8 + cmovcq %rdx,%r9 + cmovcq %rcx,%r10 + cmovcq %r12,%r11 + + xorq %r13,%r13 + addq 0(%rsi),%r8 + adcq 8(%rsi),%r9 + movq %r8,%rax + adcq 16(%rsi),%r10 + adcq 24(%rsi),%r11 + movq %r9,%rdx + adcq $0,%r13 + + subq $-1,%r8 + movq %r10,%rcx + sbbq .Lpoly+8(%rip),%r9 + sbbq $0,%r10 + movq %r11,%r12 + sbbq .Lpoly+24(%rip),%r11 + sbbq $0,%r13 + + cmovcq %rax,%r8 + cmovcq %rdx,%r9 + movq %r8,0(%rdi) + cmovcq %rcx,%r10 + movq %r9,8(%rdi) + cmovcq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + movq 0(%rsp),%r13 +.cfi_restore %r13 + movq 8(%rsp),%r12 +.cfi_restore %r12 + leaq 16(%rsp),%rsp +.cfi_adjust_cfa_offset -16 +.Lmul_by_3_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ecp_nistz256_mul_by_3,.-ecp_nistz256_mul_by_3 + + + +.globl ecp_nistz256_add +.type ecp_nistz256_add,@function +.align 32 +ecp_nistz256_add: +.cfi_startproc + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-16 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-24 +.Ladd_body: + + movq 0(%rsi),%r8 + xorq %r13,%r13 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + leaq .Lpoly(%rip),%rsi + + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + movq %r8,%rax + adcq 16(%rdx),%r10 + adcq 24(%rdx),%r11 + movq %r9,%rdx + adcq $0,%r13 + + subq 0(%rsi),%r8 + movq %r10,%rcx + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r11,%r12 + sbbq 24(%rsi),%r11 + sbbq $0,%r13 + + cmovcq %rax,%r8 + cmovcq %rdx,%r9 + movq %r8,0(%rdi) + cmovcq %rcx,%r10 + movq %r9,8(%rdi) + cmovcq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + movq 0(%rsp),%r13 +.cfi_restore %r13 + movq 8(%rsp),%r12 +.cfi_restore %r12 + leaq 16(%rsp),%rsp +.cfi_adjust_cfa_offset -16 +.Ladd_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ecp_nistz256_add,.-ecp_nistz256_add + + + +.globl ecp_nistz256_sub +.type ecp_nistz256_sub,@function +.align 32 +ecp_nistz256_sub: +.cfi_startproc + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-16 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-24 +.Lsub_body: + + movq 0(%rsi),%r8 + xorq %r13,%r13 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + leaq .Lpoly(%rip),%rsi + + subq 0(%rdx),%r8 + sbbq 8(%rdx),%r9 + movq %r8,%rax + sbbq 16(%rdx),%r10 + sbbq 24(%rdx),%r11 + movq %r9,%rdx + sbbq $0,%r13 + + addq 0(%rsi),%r8 + movq %r10,%rcx + adcq 8(%rsi),%r9 + adcq 16(%rsi),%r10 + movq %r11,%r12 + adcq 24(%rsi),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + movq %r8,0(%rdi) + cmovzq %rcx,%r10 + movq %r9,8(%rdi) + cmovzq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + movq 0(%rsp),%r13 +.cfi_restore %r13 + movq 8(%rsp),%r12 +.cfi_restore %r12 + leaq 16(%rsp),%rsp +.cfi_adjust_cfa_offset -16 +.Lsub_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ecp_nistz256_sub,.-ecp_nistz256_sub + + + +.globl ecp_nistz256_neg +.type ecp_nistz256_neg,@function +.align 32 +ecp_nistz256_neg: +.cfi_startproc + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-16 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-24 +.Lneg_body: + + xorq %r8,%r8 + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 + xorq %r13,%r13 + + subq 0(%rsi),%r8 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r8,%rax + sbbq 24(%rsi),%r11 + leaq .Lpoly(%rip),%rsi + movq %r9,%rdx + sbbq $0,%r13 + + addq 0(%rsi),%r8 + movq %r10,%rcx + adcq 8(%rsi),%r9 + adcq 16(%rsi),%r10 + movq %r11,%r12 + adcq 24(%rsi),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + movq %r8,0(%rdi) + cmovzq %rcx,%r10 + movq %r9,8(%rdi) + cmovzq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + movq 0(%rsp),%r13 +.cfi_restore %r13 + movq 8(%rsp),%r12 +.cfi_restore %r12 + leaq 16(%rsp),%rsp +.cfi_adjust_cfa_offset -16 +.Lneg_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ecp_nistz256_neg,.-ecp_nistz256_neg + + + + + + +.globl ecp_nistz256_ord_mul_mont +.type ecp_nistz256_ord_mul_mont,@function +.align 32 +ecp_nistz256_ord_mul_mont: +.cfi_startproc + movl $0x80100,%ecx + andl OPENSSL_ia32cap_P+8(%rip),%ecx + cmpl $0x80100,%ecx + je .Lecp_nistz256_ord_mul_montx + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lord_mul_body: + + movq 0(%rdx),%rax + movq %rdx,%rbx + leaq .Lord(%rip),%r14 + movq .LordK(%rip),%r15 + + + movq %rax,%rcx + mulq 0(%rsi) + movq %rax,%r8 + movq %rcx,%rax + movq %rdx,%r9 + + mulq 8(%rsi) + addq %rax,%r9 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq 16(%rsi) + addq %rax,%r10 + movq %rcx,%rax + adcq $0,%rdx + + movq %r8,%r13 + imulq %r15,%r8 + + movq %rdx,%r11 + mulq 24(%rsi) + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%r12 + + + mulq 0(%r14) + movq %r8,%rbp + addq %rax,%r13 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%rcx + + subq %r8,%r10 + sbbq $0,%r8 + + mulq 8(%r14) + addq %rcx,%r9 + adcq $0,%rdx + addq %rax,%r9 + movq %rbp,%rax + adcq %rdx,%r10 + movq %rbp,%rdx + adcq $0,%r8 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r11 + movq 8(%rbx),%rax + sbbq %rdx,%rbp + + addq %r8,%r11 + adcq %rbp,%r12 + adcq $0,%r13 + + + movq %rax,%rcx + mulq 0(%rsi) + addq %rax,%r9 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 8(%rsi) + addq %rbp,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rsi) + addq %rbp,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rcx,%rax + adcq $0,%rdx + + movq %r9,%rcx + imulq %r15,%r9 + + movq %rdx,%rbp + mulq 24(%rsi) + addq %rbp,%r12 + adcq $0,%rdx + xorq %r8,%r8 + addq %rax,%r12 + movq %r9,%rax + adcq %rdx,%r13 + adcq $0,%r8 + + + mulq 0(%r14) + movq %r9,%rbp + addq %rax,%rcx + movq %r9,%rax + adcq %rdx,%rcx + + subq %r9,%r11 + sbbq $0,%r9 + + mulq 8(%r14) + addq %rcx,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %rbp,%rax + adcq %rdx,%r11 + movq %rbp,%rdx + adcq $0,%r9 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r12 + movq 16(%rbx),%rax + sbbq %rdx,%rbp + + addq %r9,%r12 + adcq %rbp,%r13 + adcq $0,%r8 + + + movq %rax,%rcx + mulq 0(%rsi) + addq %rax,%r10 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 8(%rsi) + addq %rbp,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rsi) + addq %rbp,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rcx,%rax + adcq $0,%rdx + + movq %r10,%rcx + imulq %r15,%r10 + + movq %rdx,%rbp + mulq 24(%rsi) + addq %rbp,%r13 + adcq $0,%rdx + xorq %r9,%r9 + addq %rax,%r13 + movq %r10,%rax + adcq %rdx,%r8 + adcq $0,%r9 + + + mulq 0(%r14) + movq %r10,%rbp + addq %rax,%rcx + movq %r10,%rax + adcq %rdx,%rcx + + subq %r10,%r12 + sbbq $0,%r10 + + mulq 8(%r14) + addq %rcx,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rbp,%rax + adcq %rdx,%r12 + movq %rbp,%rdx + adcq $0,%r10 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r13 + movq 24(%rbx),%rax + sbbq %rdx,%rbp + + addq %r10,%r13 + adcq %rbp,%r8 + adcq $0,%r9 + + + movq %rax,%rcx + mulq 0(%rsi) + addq %rax,%r11 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 8(%rsi) + addq %rbp,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rcx,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq 16(%rsi) + addq %rbp,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %rcx,%rax + adcq $0,%rdx + + movq %r11,%rcx + imulq %r15,%r11 + + movq %rdx,%rbp + mulq 24(%rsi) + addq %rbp,%r8 + adcq $0,%rdx + xorq %r10,%r10 + addq %rax,%r8 + movq %r11,%rax + adcq %rdx,%r9 + adcq $0,%r10 + + + mulq 0(%r14) + movq %r11,%rbp + addq %rax,%rcx + movq %r11,%rax + adcq %rdx,%rcx + + subq %r11,%r13 + sbbq $0,%r11 + + mulq 8(%r14) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rbp,%rax + adcq %rdx,%r13 + movq %rbp,%rdx + adcq $0,%r11 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r8 + sbbq %rdx,%rbp + + addq %r11,%r8 + adcq %rbp,%r9 + adcq $0,%r10 + + + movq %r12,%rsi + subq 0(%r14),%r12 + movq %r13,%r11 + sbbq 8(%r14),%r13 + movq %r8,%rcx + sbbq 16(%r14),%r8 + movq %r9,%rbp + sbbq 24(%r14),%r9 + sbbq $0,%r10 + + cmovcq %rsi,%r12 + cmovcq %r11,%r13 + cmovcq %rcx,%r8 + cmovcq %rbp,%r9 + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbx +.cfi_restore %rbx + movq 40(%rsp),%rbp +.cfi_restore %rbp + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lord_mul_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ecp_nistz256_ord_mul_mont,.-ecp_nistz256_ord_mul_mont + + + + + + + +.globl ecp_nistz256_ord_sqr_mont +.type ecp_nistz256_ord_sqr_mont,@function +.align 32 +ecp_nistz256_ord_sqr_mont: +.cfi_startproc + movl $0x80100,%ecx + andl OPENSSL_ia32cap_P+8(%rip),%ecx + cmpl $0x80100,%ecx + je .Lecp_nistz256_ord_sqr_montx + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lord_sqr_body: + + movq 0(%rsi),%r8 + movq 8(%rsi),%rax + movq 16(%rsi),%r14 + movq 24(%rsi),%r15 + leaq .Lord(%rip),%rsi + movq %rdx,%rbx + jmp .Loop_ord_sqr + +.align 32 +.Loop_ord_sqr: + + movq %rax,%rbp + mulq %r8 + movq %rax,%r9 +.byte 102,72,15,110,205 + movq %r14,%rax + movq %rdx,%r10 + + mulq %r8 + addq %rax,%r10 + movq %r15,%rax +.byte 102,73,15,110,214 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %r8 + addq %rax,%r11 + movq %r15,%rax +.byte 102,73,15,110,223 + adcq $0,%rdx + movq %rdx,%r12 + + + mulq %r14 + movq %rax,%r13 + movq %r14,%rax + movq %rdx,%r14 + + + mulq %rbp + addq %rax,%r11 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r15 + + mulq %rbp + addq %rax,%r12 + adcq $0,%rdx + + addq %r15,%r12 + adcq %rdx,%r13 + adcq $0,%r14 + + + xorq %r15,%r15 + movq %r8,%rax + addq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq %r12,%r12 + adcq %r13,%r13 + adcq %r14,%r14 + adcq $0,%r15 + + + mulq %rax + movq %rax,%r8 +.byte 102,72,15,126,200 + movq %rdx,%rbp + + mulq %rax + addq %rbp,%r9 + adcq %rax,%r10 +.byte 102,72,15,126,208 + adcq $0,%rdx + movq %rdx,%rbp + + mulq %rax + addq %rbp,%r11 + adcq %rax,%r12 +.byte 102,72,15,126,216 + adcq $0,%rdx + movq %rdx,%rbp + + movq %r8,%rcx + imulq 32(%rsi),%r8 + + mulq %rax + addq %rbp,%r13 + adcq %rax,%r14 + movq 0(%rsi),%rax + adcq %rdx,%r15 + + + mulq %r8 + movq %r8,%rbp + addq %rax,%rcx + movq 8(%rsi),%rax + adcq %rdx,%rcx + + subq %r8,%r10 + sbbq $0,%rbp + + mulq %r8 + addq %rcx,%r9 + adcq $0,%rdx + addq %rax,%r9 + movq %r8,%rax + adcq %rdx,%r10 + movq %r8,%rdx + adcq $0,%rbp + + movq %r9,%rcx + imulq 32(%rsi),%r9 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r11 + movq 0(%rsi),%rax + sbbq %rdx,%r8 + + addq %rbp,%r11 + adcq $0,%r8 + + + mulq %r9 + movq %r9,%rbp + addq %rax,%rcx + movq 8(%rsi),%rax + adcq %rdx,%rcx + + subq %r9,%r11 + sbbq $0,%rbp + + mulq %r9 + addq %rcx,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %r9,%rax + adcq %rdx,%r11 + movq %r9,%rdx + adcq $0,%rbp + + movq %r10,%rcx + imulq 32(%rsi),%r10 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r8 + movq 0(%rsi),%rax + sbbq %rdx,%r9 + + addq %rbp,%r8 + adcq $0,%r9 + + + mulq %r10 + movq %r10,%rbp + addq %rax,%rcx + movq 8(%rsi),%rax + adcq %rdx,%rcx + + subq %r10,%r8 + sbbq $0,%rbp + + mulq %r10 + addq %rcx,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %r10,%rax + adcq %rdx,%r8 + movq %r10,%rdx + adcq $0,%rbp + + movq %r11,%rcx + imulq 32(%rsi),%r11 + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r9 + movq 0(%rsi),%rax + sbbq %rdx,%r10 + + addq %rbp,%r9 + adcq $0,%r10 + + + mulq %r11 + movq %r11,%rbp + addq %rax,%rcx + movq 8(%rsi),%rax + adcq %rdx,%rcx + + subq %r11,%r9 + sbbq $0,%rbp + + mulq %r11 + addq %rcx,%r8 + adcq $0,%rdx + addq %rax,%r8 + movq %r11,%rax + adcq %rdx,%r9 + movq %r11,%rdx + adcq $0,%rbp + + shlq $32,%rax + shrq $32,%rdx + subq %rax,%r10 + sbbq %rdx,%r11 + + addq %rbp,%r10 + adcq $0,%r11 + + + xorq %rdx,%rdx + addq %r12,%r8 + adcq %r13,%r9 + movq %r8,%r12 + adcq %r14,%r10 + adcq %r15,%r11 + movq %r9,%rax + adcq $0,%rdx + + + subq 0(%rsi),%r8 + movq %r10,%r14 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r11,%r15 + sbbq 24(%rsi),%r11 + sbbq $0,%rdx + + cmovcq %r12,%r8 + cmovncq %r9,%rax + cmovncq %r10,%r14 + cmovncq %r11,%r15 + + decq %rbx + jnz .Loop_ord_sqr + + movq %r8,0(%rdi) + movq %rax,8(%rdi) + pxor %xmm1,%xmm1 + movq %r14,16(%rdi) + pxor %xmm2,%xmm2 + movq %r15,24(%rdi) + pxor %xmm3,%xmm3 + + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbx +.cfi_restore %rbx + movq 40(%rsp),%rbp +.cfi_restore %rbp + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lord_sqr_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ecp_nistz256_ord_sqr_mont,.-ecp_nistz256_ord_sqr_mont + +.type ecp_nistz256_ord_mul_montx,@function +.align 32 +ecp_nistz256_ord_mul_montx: +.cfi_startproc +.Lecp_nistz256_ord_mul_montx: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lord_mulx_body: + + movq %rdx,%rbx + movq 0(%rdx),%rdx + movq 0(%rsi),%r9 + movq 8(%rsi),%r10 + movq 16(%rsi),%r11 + movq 24(%rsi),%r12 + leaq -128(%rsi),%rsi + leaq .Lord-128(%rip),%r14 + movq .LordK(%rip),%r15 + + + mulxq %r9,%r8,%r9 + mulxq %r10,%rcx,%r10 + mulxq %r11,%rbp,%r11 + addq %rcx,%r9 + mulxq %r12,%rcx,%r12 + movq %r8,%rdx + mulxq %r15,%rdx,%rax + adcq %rbp,%r10 + adcq %rcx,%r11 + adcq $0,%r12 + + + xorq %r13,%r13 + mulxq 0+128(%r14),%rcx,%rbp + adcxq %rcx,%r8 + adoxq %rbp,%r9 + + mulxq 8+128(%r14),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + + mulxq 16+128(%r14),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 24+128(%r14),%rcx,%rbp + movq 8(%rbx),%rdx + adcxq %rcx,%r11 + adoxq %rbp,%r12 + adcxq %r8,%r12 + adoxq %r8,%r13 + adcq $0,%r13 + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r9,%rdx + mulxq %r15,%rdx,%rax + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + adcxq %r8,%r13 + adoxq %r8,%r8 + adcq $0,%r8 + + + mulxq 0+128(%r14),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + + mulxq 8+128(%r14),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 16+128(%r14),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 24+128(%r14),%rcx,%rbp + movq 16(%rbx),%rdx + adcxq %rcx,%r12 + adoxq %rbp,%r13 + adcxq %r9,%r13 + adoxq %r9,%r8 + adcq $0,%r8 + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r10,%rdx + mulxq %r15,%rdx,%rax + adcxq %rcx,%r13 + adoxq %rbp,%r8 + + adcxq %r9,%r8 + adoxq %r9,%r9 + adcq $0,%r9 + + + mulxq 0+128(%r14),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 8+128(%r14),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 16+128(%r14),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 24+128(%r14),%rcx,%rbp + movq 24(%rbx),%rdx + adcxq %rcx,%r13 + adoxq %rbp,%r8 + adcxq %r10,%r8 + adoxq %r10,%r9 + adcq $0,%r9 + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r13 + adoxq %rbp,%r8 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r11,%rdx + mulxq %r15,%rdx,%rax + adcxq %rcx,%r8 + adoxq %rbp,%r9 + + adcxq %r10,%r9 + adoxq %r10,%r10 + adcq $0,%r10 + + + mulxq 0+128(%r14),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 8+128(%r14),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 16+128(%r14),%rcx,%rbp + adcxq %rcx,%r13 + adoxq %rbp,%r8 + + mulxq 24+128(%r14),%rcx,%rbp + leaq 128(%r14),%r14 + movq %r12,%rbx + adcxq %rcx,%r8 + adoxq %rbp,%r9 + movq %r13,%rdx + adcxq %r11,%r9 + adoxq %r11,%r10 + adcq $0,%r10 + + + + movq %r8,%rcx + subq 0(%r14),%r12 + sbbq 8(%r14),%r13 + sbbq 16(%r14),%r8 + movq %r9,%rbp + sbbq 24(%r14),%r9 + sbbq $0,%r10 + + cmovcq %rbx,%r12 + cmovcq %rdx,%r13 + cmovcq %rcx,%r8 + cmovcq %rbp,%r9 + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbx +.cfi_restore %rbx + movq 40(%rsp),%rbp +.cfi_restore %rbp + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lord_mulx_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ecp_nistz256_ord_mul_montx,.-ecp_nistz256_ord_mul_montx + +.type ecp_nistz256_ord_sqr_montx,@function +.align 32 +ecp_nistz256_ord_sqr_montx: +.cfi_startproc +.Lecp_nistz256_ord_sqr_montx: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lord_sqrx_body: + + movq %rdx,%rbx + movq 0(%rsi),%rdx + movq 8(%rsi),%r14 + movq 16(%rsi),%r15 + movq 24(%rsi),%r8 + leaq .Lord(%rip),%rsi + jmp .Loop_ord_sqrx + +.align 32 +.Loop_ord_sqrx: + mulxq %r14,%r9,%r10 + mulxq %r15,%rcx,%r11 + movq %rdx,%rax +.byte 102,73,15,110,206 + mulxq %r8,%rbp,%r12 + movq %r14,%rdx + addq %rcx,%r10 +.byte 102,73,15,110,215 + adcq %rbp,%r11 + adcq $0,%r12 + xorq %r13,%r13 + + mulxq %r15,%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq %r8,%rcx,%rbp + movq %r15,%rdx + adcxq %rcx,%r12 + adoxq %rbp,%r13 + adcq $0,%r13 + + mulxq %r8,%rcx,%r14 + movq %rax,%rdx +.byte 102,73,15,110,216 + xorq %r15,%r15 + adcxq %r9,%r9 + adoxq %rcx,%r13 + adcxq %r10,%r10 + adoxq %r15,%r14 + + + mulxq %rdx,%r8,%rbp +.byte 102,72,15,126,202 + adcxq %r11,%r11 + adoxq %rbp,%r9 + adcxq %r12,%r12 + mulxq %rdx,%rcx,%rax +.byte 102,72,15,126,210 + adcxq %r13,%r13 + adoxq %rcx,%r10 + adcxq %r14,%r14 + mulxq %rdx,%rcx,%rbp +.byte 0x67 +.byte 102,72,15,126,218 + adoxq %rax,%r11 + adcxq %r15,%r15 + adoxq %rcx,%r12 + adoxq %rbp,%r13 + mulxq %rdx,%rcx,%rax + adoxq %rcx,%r14 + adoxq %rax,%r15 + + + movq %r8,%rdx + mulxq 32(%rsi),%rdx,%rcx + + xorq %rax,%rax + mulxq 0(%rsi),%rcx,%rbp + adcxq %rcx,%r8 + adoxq %rbp,%r9 + mulxq 8(%rsi),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + mulxq 16(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + mulxq 24(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r8 + adcxq %rax,%r8 + + + movq %r9,%rdx + mulxq 32(%rsi),%rdx,%rcx + + mulxq 0(%rsi),%rcx,%rbp + adoxq %rcx,%r9 + adcxq %rbp,%r10 + mulxq 8(%rsi),%rcx,%rbp + adoxq %rcx,%r10 + adcxq %rbp,%r11 + mulxq 16(%rsi),%rcx,%rbp + adoxq %rcx,%r11 + adcxq %rbp,%r8 + mulxq 24(%rsi),%rcx,%rbp + adoxq %rcx,%r8 + adcxq %rbp,%r9 + adoxq %rax,%r9 + + + movq %r10,%rdx + mulxq 32(%rsi),%rdx,%rcx + + mulxq 0(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + mulxq 8(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r8 + mulxq 16(%rsi),%rcx,%rbp + adcxq %rcx,%r8 + adoxq %rbp,%r9 + mulxq 24(%rsi),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + adcxq %rax,%r10 + + + movq %r11,%rdx + mulxq 32(%rsi),%rdx,%rcx + + mulxq 0(%rsi),%rcx,%rbp + adoxq %rcx,%r11 + adcxq %rbp,%r8 + mulxq 8(%rsi),%rcx,%rbp + adoxq %rcx,%r8 + adcxq %rbp,%r9 + mulxq 16(%rsi),%rcx,%rbp + adoxq %rcx,%r9 + adcxq %rbp,%r10 + mulxq 24(%rsi),%rcx,%rbp + adoxq %rcx,%r10 + adcxq %rbp,%r11 + adoxq %rax,%r11 + + + addq %r8,%r12 + adcq %r13,%r9 + movq %r12,%rdx + adcq %r14,%r10 + adcq %r15,%r11 + movq %r9,%r14 + adcq $0,%rax + + + subq 0(%rsi),%r12 + movq %r10,%r15 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r11,%r8 + sbbq 24(%rsi),%r11 + sbbq $0,%rax + + cmovncq %r12,%rdx + cmovncq %r9,%r14 + cmovncq %r10,%r15 + cmovncq %r11,%r8 + + decq %rbx + jnz .Loop_ord_sqrx + + movq %rdx,0(%rdi) + movq %r14,8(%rdi) + pxor %xmm1,%xmm1 + movq %r15,16(%rdi) + pxor %xmm2,%xmm2 + movq %r8,24(%rdi) + pxor %xmm3,%xmm3 + + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbx +.cfi_restore %rbx + movq 40(%rsp),%rbp +.cfi_restore %rbp + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lord_sqrx_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ecp_nistz256_ord_sqr_montx,.-ecp_nistz256_ord_sqr_montx + + + + +.globl ecp_nistz256_to_mont +.type ecp_nistz256_to_mont,@function +.align 32 +ecp_nistz256_to_mont: +.cfi_startproc + movl $0x80100,%ecx + andl OPENSSL_ia32cap_P+8(%rip),%ecx + leaq .LRR(%rip),%rdx + jmp .Lmul_mont +.cfi_endproc +.size ecp_nistz256_to_mont,.-ecp_nistz256_to_mont + + + + + + + +.globl ecp_nistz256_mul_mont +.type ecp_nistz256_mul_mont,@function +.align 32 +ecp_nistz256_mul_mont: +.cfi_startproc + movl $0x80100,%ecx + andl OPENSSL_ia32cap_P+8(%rip),%ecx +.Lmul_mont: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lmul_body: + cmpl $0x80100,%ecx + je .Lmul_montx + movq %rdx,%rbx + movq 0(%rdx),%rax + movq 0(%rsi),%r9 + movq 8(%rsi),%r10 + movq 16(%rsi),%r11 + movq 24(%rsi),%r12 + + call __ecp_nistz256_mul_montq + jmp .Lmul_mont_done + +.align 32 +.Lmul_montx: + movq %rdx,%rbx + movq 0(%rdx),%rdx + movq 0(%rsi),%r9 + movq 8(%rsi),%r10 + movq 16(%rsi),%r11 + movq 24(%rsi),%r12 + leaq -128(%rsi),%rsi + + call __ecp_nistz256_mul_montx +.Lmul_mont_done: + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbx +.cfi_restore %rbx + movq 40(%rsp),%rbp +.cfi_restore %rbp + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lmul_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont + +.type __ecp_nistz256_mul_montq,@function +.align 32 +__ecp_nistz256_mul_montq: +.cfi_startproc + + + movq %rax,%rbp + mulq %r9 + movq .Lpoly+8(%rip),%r14 + movq %rax,%r8 + movq %rbp,%rax + movq %rdx,%r9 + + mulq %r10 + movq .Lpoly+24(%rip),%r15 + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %r11 + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %r12 + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + xorq %r13,%r13 + movq %rdx,%r12 + + + + + + + + + + + movq %r8,%rbp + shlq $32,%r8 + mulq %r15 + shrq $32,%rbp + addq %r8,%r9 + adcq %rbp,%r10 + adcq %rax,%r11 + movq 8(%rbx),%rax + adcq %rdx,%r12 + adcq $0,%r13 + xorq %r8,%r8 + + + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rcx,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 16(%rsi) + addq %rcx,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 24(%rsi) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %r9,%rax + adcq %rdx,%r13 + adcq $0,%r8 + + + + movq %r9,%rbp + shlq $32,%r9 + mulq %r15 + shrq $32,%rbp + addq %r9,%r10 + adcq %rbp,%r11 + adcq %rax,%r12 + movq 16(%rbx),%rax + adcq %rdx,%r13 + adcq $0,%r8 + xorq %r9,%r9 + + + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rcx,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 16(%rsi) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 24(%rsi) + addq %rcx,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %r10,%rax + adcq %rdx,%r8 + adcq $0,%r9 + + + + movq %r10,%rbp + shlq $32,%r10 + mulq %r15 + shrq $32,%rbp + addq %r10,%r11 + adcq %rbp,%r12 + adcq %rax,%r13 + movq 24(%rbx),%rax + adcq %rdx,%r8 + adcq $0,%r9 + xorq %r10,%r10 + + + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 16(%rsi) + addq %rcx,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 24(%rsi) + addq %rcx,%r8 + adcq $0,%rdx + addq %rax,%r8 + movq %r11,%rax + adcq %rdx,%r9 + adcq $0,%r10 + + + + movq %r11,%rbp + shlq $32,%r11 + mulq %r15 + shrq $32,%rbp + addq %r11,%r12 + adcq %rbp,%r13 + movq %r12,%rcx + adcq %rax,%r8 + adcq %rdx,%r9 + movq %r13,%rbp + adcq $0,%r10 + + + + subq $-1,%r12 + movq %r8,%rbx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%rdx + sbbq %r15,%r9 + sbbq $0,%r10 + + cmovcq %rcx,%r12 + cmovcq %rbp,%r13 + movq %r12,0(%rdi) + cmovcq %rbx,%r8 + movq %r13,8(%rdi) + cmovcq %rdx,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq + + + + + + + + +.globl ecp_nistz256_sqr_mont +.type ecp_nistz256_sqr_mont,@function +.align 32 +ecp_nistz256_sqr_mont: +.cfi_startproc + movl $0x80100,%ecx + andl OPENSSL_ia32cap_P+8(%rip),%ecx + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lsqr_body: + cmpl $0x80100,%ecx + je .Lsqr_montx + movq 0(%rsi),%rax + movq 8(%rsi),%r14 + movq 16(%rsi),%r15 + movq 24(%rsi),%r8 + + call __ecp_nistz256_sqr_montq + jmp .Lsqr_mont_done + +.align 32 +.Lsqr_montx: + movq 0(%rsi),%rdx + movq 8(%rsi),%r14 + movq 16(%rsi),%r15 + movq 24(%rsi),%r8 + leaq -128(%rsi),%rsi + + call __ecp_nistz256_sqr_montx +.Lsqr_mont_done: + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbx +.cfi_restore %rbx + movq 40(%rsp),%rbp +.cfi_restore %rbp + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lsqr_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont + +.type __ecp_nistz256_sqr_montq,@function +.align 32 +__ecp_nistz256_sqr_montq: +.cfi_startproc + movq %rax,%r13 + mulq %r14 + movq %rax,%r9 + movq %r15,%rax + movq %rdx,%r10 + + mulq %r13 + addq %rax,%r10 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %r13 + addq %rax,%r11 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r12 + + + mulq %r14 + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq %r14 + addq %rax,%r12 + movq %r8,%rax + adcq $0,%rdx + addq %rbp,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + + mulq %r15 + xorq %r15,%r15 + addq %rax,%r13 + movq 0(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + addq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq %r12,%r12 + adcq %r13,%r13 + adcq %r14,%r14 + adcq $0,%r15 + + mulq %rax + movq %rax,%r8 + movq 8(%rsi),%rax + movq %rdx,%rcx + + mulq %rax + addq %rcx,%r9 + adcq %rax,%r10 + movq 16(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq %rax + addq %rcx,%r11 + adcq %rax,%r12 + movq 24(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq %rax + addq %rcx,%r13 + adcq %rax,%r14 + movq %r8,%rax + adcq %rdx,%r15 + + movq .Lpoly+8(%rip),%rsi + movq .Lpoly+24(%rip),%rbp + + + + + movq %r8,%rcx + shlq $32,%r8 + mulq %rbp + shrq $32,%rcx + addq %r8,%r9 + adcq %rcx,%r10 + adcq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + + + + movq %r9,%rcx + shlq $32,%r9 + movq %rdx,%r8 + mulq %rbp + shrq $32,%rcx + addq %r9,%r10 + adcq %rcx,%r11 + adcq %rax,%r8 + movq %r10,%rax + adcq $0,%rdx + + + + movq %r10,%rcx + shlq $32,%r10 + movq %rdx,%r9 + mulq %rbp + shrq $32,%rcx + addq %r10,%r11 + adcq %rcx,%r8 + adcq %rax,%r9 + movq %r11,%rax + adcq $0,%rdx + + + + movq %r11,%rcx + shlq $32,%r11 + movq %rdx,%r10 + mulq %rbp + shrq $32,%rcx + addq %r11,%r8 + adcq %rcx,%r9 + adcq %rax,%r10 + adcq $0,%rdx + xorq %r11,%r11 + + + + addq %r8,%r12 + adcq %r9,%r13 + movq %r12,%r8 + adcq %r10,%r14 + adcq %rdx,%r15 + movq %r13,%r9 + adcq $0,%r11 + + subq $-1,%r12 + movq %r14,%r10 + sbbq %rsi,%r13 + sbbq $0,%r14 + movq %r15,%rcx + sbbq %rbp,%r15 + sbbq $0,%r11 + + cmovcq %r8,%r12 + cmovcq %r9,%r13 + movq %r12,0(%rdi) + cmovcq %r10,%r14 + movq %r13,8(%rdi) + cmovcq %rcx,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq +.type __ecp_nistz256_mul_montx,@function +.align 32 +__ecp_nistz256_mul_montx: +.cfi_startproc + + + mulxq %r9,%r8,%r9 + mulxq %r10,%rcx,%r10 + movq $32,%r14 + xorq %r13,%r13 + mulxq %r11,%rbp,%r11 + movq .Lpoly+24(%rip),%r15 + adcq %rcx,%r9 + mulxq %r12,%rcx,%r12 + movq %r8,%rdx + adcq %rbp,%r10 + shlxq %r14,%r8,%rbp + adcq %rcx,%r11 + shrxq %r14,%r8,%rcx + adcq $0,%r12 + + + + addq %rbp,%r9 + adcq %rcx,%r10 + + mulxq %r15,%rcx,%rbp + movq 8(%rbx),%rdx + adcq %rcx,%r11 + adcq %rbp,%r12 + adcq $0,%r13 + xorq %r8,%r8 + + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r9,%rdx + adcxq %rcx,%r12 + shlxq %r14,%r9,%rcx + adoxq %rbp,%r13 + shrxq %r14,%r9,%rbp + + adcxq %r8,%r13 + adoxq %r8,%r8 + adcq $0,%r8 + + + + addq %rcx,%r10 + adcq %rbp,%r11 + + mulxq %r15,%rcx,%rbp + movq 16(%rbx),%rdx + adcq %rcx,%r12 + adcq %rbp,%r13 + adcq $0,%r8 + xorq %r9,%r9 + + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r10,%rdx + adcxq %rcx,%r13 + shlxq %r14,%r10,%rcx + adoxq %rbp,%r8 + shrxq %r14,%r10,%rbp + + adcxq %r9,%r8 + adoxq %r9,%r9 + adcq $0,%r9 + + + + addq %rcx,%r11 + adcq %rbp,%r12 + + mulxq %r15,%rcx,%rbp + movq 24(%rbx),%rdx + adcq %rcx,%r13 + adcq %rbp,%r8 + adcq $0,%r9 + xorq %r10,%r10 + + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r13 + adoxq %rbp,%r8 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r11,%rdx + adcxq %rcx,%r8 + shlxq %r14,%r11,%rcx + adoxq %rbp,%r9 + shrxq %r14,%r11,%rbp + + adcxq %r10,%r9 + adoxq %r10,%r10 + adcq $0,%r10 + + + + addq %rcx,%r12 + adcq %rbp,%r13 + + mulxq %r15,%rcx,%rbp + movq %r12,%rbx + movq .Lpoly+8(%rip),%r14 + adcq %rcx,%r8 + movq %r13,%rdx + adcq %rbp,%r9 + adcq $0,%r10 + + + + xorl %eax,%eax + movq %r8,%rcx + sbbq $-1,%r12 + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%rbp + sbbq %r15,%r9 + sbbq $0,%r10 + + cmovcq %rbx,%r12 + cmovcq %rdx,%r13 + movq %r12,0(%rdi) + cmovcq %rcx,%r8 + movq %r13,8(%rdi) + cmovcq %rbp,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx + +.type __ecp_nistz256_sqr_montx,@function +.align 32 +__ecp_nistz256_sqr_montx: +.cfi_startproc + mulxq %r14,%r9,%r10 + mulxq %r15,%rcx,%r11 + xorl %eax,%eax + adcq %rcx,%r10 + mulxq %r8,%rbp,%r12 + movq %r14,%rdx + adcq %rbp,%r11 + adcq $0,%r12 + xorq %r13,%r13 + + + mulxq %r15,%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq %r8,%rcx,%rbp + movq %r15,%rdx + adcxq %rcx,%r12 + adoxq %rbp,%r13 + adcq $0,%r13 + + + mulxq %r8,%rcx,%r14 + movq 0+128(%rsi),%rdx + xorq %r15,%r15 + adcxq %r9,%r9 + adoxq %rcx,%r13 + adcxq %r10,%r10 + adoxq %r15,%r14 + + mulxq %rdx,%r8,%rbp + movq 8+128(%rsi),%rdx + adcxq %r11,%r11 + adoxq %rbp,%r9 + adcxq %r12,%r12 + mulxq %rdx,%rcx,%rax + movq 16+128(%rsi),%rdx + adcxq %r13,%r13 + adoxq %rcx,%r10 + adcxq %r14,%r14 +.byte 0x67 + mulxq %rdx,%rcx,%rbp + movq 24+128(%rsi),%rdx + adoxq %rax,%r11 + adcxq %r15,%r15 + adoxq %rcx,%r12 + movq $32,%rsi + adoxq %rbp,%r13 +.byte 0x67,0x67 + mulxq %rdx,%rcx,%rax + movq .Lpoly+24(%rip),%rdx + adoxq %rcx,%r14 + shlxq %rsi,%r8,%rcx + adoxq %rax,%r15 + shrxq %rsi,%r8,%rax + movq %rdx,%rbp + + + addq %rcx,%r9 + adcq %rax,%r10 + + mulxq %r8,%rcx,%r8 + adcq %rcx,%r11 + shlxq %rsi,%r9,%rcx + adcq $0,%r8 + shrxq %rsi,%r9,%rax + + + addq %rcx,%r10 + adcq %rax,%r11 + + mulxq %r9,%rcx,%r9 + adcq %rcx,%r8 + shlxq %rsi,%r10,%rcx + adcq $0,%r9 + shrxq %rsi,%r10,%rax + + + addq %rcx,%r11 + adcq %rax,%r8 + + mulxq %r10,%rcx,%r10 + adcq %rcx,%r9 + shlxq %rsi,%r11,%rcx + adcq $0,%r10 + shrxq %rsi,%r11,%rax + + + addq %rcx,%r8 + adcq %rax,%r9 + + mulxq %r11,%rcx,%r11 + adcq %rcx,%r10 + adcq $0,%r11 + + xorq %rdx,%rdx + addq %r8,%r12 + movq .Lpoly+8(%rip),%rsi + adcq %r9,%r13 + movq %r12,%r8 + adcq %r10,%r14 + adcq %r11,%r15 + movq %r13,%r9 + adcq $0,%rdx + + subq $-1,%r12 + movq %r14,%r10 + sbbq %rsi,%r13 + sbbq $0,%r14 + movq %r15,%r11 + sbbq %rbp,%r15 + sbbq $0,%rdx + + cmovcq %r8,%r12 + cmovcq %r9,%r13 + movq %r12,0(%rdi) + cmovcq %r10,%r14 + movq %r13,8(%rdi) + cmovcq %r11,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx + + + + + + +.globl ecp_nistz256_from_mont +.type ecp_nistz256_from_mont,@function +.align 32 +ecp_nistz256_from_mont: +.cfi_startproc + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-16 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-24 +.Lfrom_body: + + movq 0(%rsi),%rax + movq .Lpoly+24(%rip),%r13 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq %rax,%r8 + movq .Lpoly+8(%rip),%r12 + + + + movq %rax,%rcx + shlq $32,%r8 + mulq %r13 + shrq $32,%rcx + addq %r8,%r9 + adcq %rcx,%r10 + adcq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + + + + movq %r9,%rcx + shlq $32,%r9 + movq %rdx,%r8 + mulq %r13 + shrq $32,%rcx + addq %r9,%r10 + adcq %rcx,%r11 + adcq %rax,%r8 + movq %r10,%rax + adcq $0,%rdx + + + + movq %r10,%rcx + shlq $32,%r10 + movq %rdx,%r9 + mulq %r13 + shrq $32,%rcx + addq %r10,%r11 + adcq %rcx,%r8 + adcq %rax,%r9 + movq %r11,%rax + adcq $0,%rdx + + + + movq %r11,%rcx + shlq $32,%r11 + movq %rdx,%r10 + mulq %r13 + shrq $32,%rcx + addq %r11,%r8 + adcq %rcx,%r9 + movq %r8,%rcx + adcq %rax,%r10 + movq %r9,%rsi + adcq $0,%rdx + + + + subq $-1,%r8 + movq %r10,%rax + sbbq %r12,%r9 + sbbq $0,%r10 + movq %rdx,%r11 + sbbq %r13,%rdx + sbbq %r13,%r13 + + cmovnzq %rcx,%r8 + cmovnzq %rsi,%r9 + movq %r8,0(%rdi) + cmovnzq %rax,%r10 + movq %r9,8(%rdi) + cmovzq %rdx,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + movq 0(%rsp),%r13 +.cfi_restore %r13 + movq 8(%rsp),%r12 +.cfi_restore %r12 + leaq 16(%rsp),%rsp +.cfi_adjust_cfa_offset -16 +.Lfrom_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ecp_nistz256_from_mont,.-ecp_nistz256_from_mont + + +.globl ecp_nistz256_scatter_w5 +.type ecp_nistz256_scatter_w5,@function +.align 32 +ecp_nistz256_scatter_w5: +.cfi_startproc + leal -3(%rdx,%rdx,2),%edx + movdqa 0(%rsi),%xmm0 + shll $5,%edx + movdqa 16(%rsi),%xmm1 + movdqa 32(%rsi),%xmm2 + movdqa 48(%rsi),%xmm3 + movdqa 64(%rsi),%xmm4 + movdqa 80(%rsi),%xmm5 + movdqa %xmm0,0(%rdi,%rdx,1) + movdqa %xmm1,16(%rdi,%rdx,1) + movdqa %xmm2,32(%rdi,%rdx,1) + movdqa %xmm3,48(%rdi,%rdx,1) + movdqa %xmm4,64(%rdi,%rdx,1) + movdqa %xmm5,80(%rdi,%rdx,1) + + .byte 0xf3,0xc3 +.cfi_endproc +.size ecp_nistz256_scatter_w5,.-ecp_nistz256_scatter_w5 + + + +.globl ecp_nistz256_gather_w5 +.type ecp_nistz256_gather_w5,@function +.align 32 +ecp_nistz256_gather_w5: +.cfi_startproc + movl OPENSSL_ia32cap_P+8(%rip),%eax + testl $32,%eax + jnz .Lavx2_gather_w5 + movdqa .LOne(%rip),%xmm0 + movd %edx,%xmm1 + + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + + movdqa %xmm0,%xmm8 + pshufd $0,%xmm1,%xmm1 + + movq $16,%rax +.Lselect_loop_sse_w5: + + movdqa %xmm8,%xmm15 + paddd %xmm0,%xmm8 + pcmpeqd %xmm1,%xmm15 + + movdqa 0(%rsi),%xmm9 + movdqa 16(%rsi),%xmm10 + movdqa 32(%rsi),%xmm11 + movdqa 48(%rsi),%xmm12 + movdqa 64(%rsi),%xmm13 + movdqa 80(%rsi),%xmm14 + leaq 96(%rsi),%rsi + + pand %xmm15,%xmm9 + pand %xmm15,%xmm10 + por %xmm9,%xmm2 + pand %xmm15,%xmm11 + por %xmm10,%xmm3 + pand %xmm15,%xmm12 + por %xmm11,%xmm4 + pand %xmm15,%xmm13 + por %xmm12,%xmm5 + pand %xmm15,%xmm14 + por %xmm13,%xmm6 + por %xmm14,%xmm7 + + decq %rax + jnz .Lselect_loop_sse_w5 + + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + movdqu %xmm4,32(%rdi) + movdqu %xmm5,48(%rdi) + movdqu %xmm6,64(%rdi) + movdqu %xmm7,80(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc +.LSEH_end_ecp_nistz256_gather_w5: +.size ecp_nistz256_gather_w5,.-ecp_nistz256_gather_w5 + + + +.globl ecp_nistz256_scatter_w7 +.type ecp_nistz256_scatter_w7,@function +.align 32 +ecp_nistz256_scatter_w7: +.cfi_startproc + movdqu 0(%rsi),%xmm0 + shll $6,%edx + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqa %xmm0,0(%rdi,%rdx,1) + movdqa %xmm1,16(%rdi,%rdx,1) + movdqa %xmm2,32(%rdi,%rdx,1) + movdqa %xmm3,48(%rdi,%rdx,1) + + .byte 0xf3,0xc3 +.cfi_endproc +.size ecp_nistz256_scatter_w7,.-ecp_nistz256_scatter_w7 + + + +.globl ecp_nistz256_gather_w7 +.type ecp_nistz256_gather_w7,@function +.align 32 +ecp_nistz256_gather_w7: +.cfi_startproc + movl OPENSSL_ia32cap_P+8(%rip),%eax + testl $32,%eax + jnz .Lavx2_gather_w7 + movdqa .LOne(%rip),%xmm8 + movd %edx,%xmm1 + + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + + movdqa %xmm8,%xmm0 + pshufd $0,%xmm1,%xmm1 + movq $64,%rax + +.Lselect_loop_sse_w7: + movdqa %xmm8,%xmm15 + paddd %xmm0,%xmm8 + movdqa 0(%rsi),%xmm9 + movdqa 16(%rsi),%xmm10 + pcmpeqd %xmm1,%xmm15 + movdqa 32(%rsi),%xmm11 + movdqa 48(%rsi),%xmm12 + leaq 64(%rsi),%rsi + + pand %xmm15,%xmm9 + pand %xmm15,%xmm10 + por %xmm9,%xmm2 + pand %xmm15,%xmm11 + por %xmm10,%xmm3 + pand %xmm15,%xmm12 + por %xmm11,%xmm4 + prefetcht0 255(%rsi) + por %xmm12,%xmm5 + + decq %rax + jnz .Lselect_loop_sse_w7 + + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + movdqu %xmm4,32(%rdi) + movdqu %xmm5,48(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc +.LSEH_end_ecp_nistz256_gather_w7: +.size ecp_nistz256_gather_w7,.-ecp_nistz256_gather_w7 + + +.type ecp_nistz256_avx2_gather_w5,@function +.align 32 +ecp_nistz256_avx2_gather_w5: +.cfi_startproc +.Lavx2_gather_w5: + vzeroupper + vmovdqa .LTwo(%rip),%ymm0 + + vpxor %ymm2,%ymm2,%ymm2 + vpxor %ymm3,%ymm3,%ymm3 + vpxor %ymm4,%ymm4,%ymm4 + + vmovdqa .LOne(%rip),%ymm5 + vmovdqa .LTwo(%rip),%ymm10 + + vmovd %edx,%xmm1 + vpermd %ymm1,%ymm2,%ymm1 + + movq $8,%rax +.Lselect_loop_avx2_w5: + + vmovdqa 0(%rsi),%ymm6 + vmovdqa 32(%rsi),%ymm7 + vmovdqa 64(%rsi),%ymm8 + + vmovdqa 96(%rsi),%ymm11 + vmovdqa 128(%rsi),%ymm12 + vmovdqa 160(%rsi),%ymm13 + + vpcmpeqd %ymm1,%ymm5,%ymm9 + vpcmpeqd %ymm1,%ymm10,%ymm14 + + vpaddd %ymm0,%ymm5,%ymm5 + vpaddd %ymm0,%ymm10,%ymm10 + leaq 192(%rsi),%rsi + + vpand %ymm9,%ymm6,%ymm6 + vpand %ymm9,%ymm7,%ymm7 + vpand %ymm9,%ymm8,%ymm8 + vpand %ymm14,%ymm11,%ymm11 + vpand %ymm14,%ymm12,%ymm12 + vpand %ymm14,%ymm13,%ymm13 + + vpxor %ymm6,%ymm2,%ymm2 + vpxor %ymm7,%ymm3,%ymm3 + vpxor %ymm8,%ymm4,%ymm4 + vpxor %ymm11,%ymm2,%ymm2 + vpxor %ymm12,%ymm3,%ymm3 + vpxor %ymm13,%ymm4,%ymm4 + + decq %rax + jnz .Lselect_loop_avx2_w5 + + vmovdqu %ymm2,0(%rdi) + vmovdqu %ymm3,32(%rdi) + vmovdqu %ymm4,64(%rdi) + vzeroupper + .byte 0xf3,0xc3 +.cfi_endproc +.LSEH_end_ecp_nistz256_avx2_gather_w5: +.size ecp_nistz256_avx2_gather_w5,.-ecp_nistz256_avx2_gather_w5 + + + +.globl ecp_nistz256_avx2_gather_w7 +.type ecp_nistz256_avx2_gather_w7,@function +.align 32 +ecp_nistz256_avx2_gather_w7: +.cfi_startproc +.Lavx2_gather_w7: + vzeroupper + vmovdqa .LThree(%rip),%ymm0 + + vpxor %ymm2,%ymm2,%ymm2 + vpxor %ymm3,%ymm3,%ymm3 + + vmovdqa .LOne(%rip),%ymm4 + vmovdqa .LTwo(%rip),%ymm8 + vmovdqa .LThree(%rip),%ymm12 + + vmovd %edx,%xmm1 + vpermd %ymm1,%ymm2,%ymm1 + + + movq $21,%rax +.Lselect_loop_avx2_w7: + + vmovdqa 0(%rsi),%ymm5 + vmovdqa 32(%rsi),%ymm6 + + vmovdqa 64(%rsi),%ymm9 + vmovdqa 96(%rsi),%ymm10 + + vmovdqa 128(%rsi),%ymm13 + vmovdqa 160(%rsi),%ymm14 + + vpcmpeqd %ymm1,%ymm4,%ymm7 + vpcmpeqd %ymm1,%ymm8,%ymm11 + vpcmpeqd %ymm1,%ymm12,%ymm15 + + vpaddd %ymm0,%ymm4,%ymm4 + vpaddd %ymm0,%ymm8,%ymm8 + vpaddd %ymm0,%ymm12,%ymm12 + leaq 192(%rsi),%rsi + + vpand %ymm7,%ymm5,%ymm5 + vpand %ymm7,%ymm6,%ymm6 + vpand %ymm11,%ymm9,%ymm9 + vpand %ymm11,%ymm10,%ymm10 + vpand %ymm15,%ymm13,%ymm13 + vpand %ymm15,%ymm14,%ymm14 + + vpxor %ymm5,%ymm2,%ymm2 + vpxor %ymm6,%ymm3,%ymm3 + vpxor %ymm9,%ymm2,%ymm2 + vpxor %ymm10,%ymm3,%ymm3 + vpxor %ymm13,%ymm2,%ymm2 + vpxor %ymm14,%ymm3,%ymm3 + + decq %rax + jnz .Lselect_loop_avx2_w7 + + + vmovdqa 0(%rsi),%ymm5 + vmovdqa 32(%rsi),%ymm6 + + vpcmpeqd %ymm1,%ymm4,%ymm7 + + vpand %ymm7,%ymm5,%ymm5 + vpand %ymm7,%ymm6,%ymm6 + + vpxor %ymm5,%ymm2,%ymm2 + vpxor %ymm6,%ymm3,%ymm3 + + vmovdqu %ymm2,0(%rdi) + vmovdqu %ymm3,32(%rdi) + vzeroupper + .byte 0xf3,0xc3 +.cfi_endproc +.LSEH_end_ecp_nistz256_avx2_gather_w7: +.size ecp_nistz256_avx2_gather_w7,.-ecp_nistz256_avx2_gather_w7 +.type __ecp_nistz256_add_toq,@function +.align 32 +__ecp_nistz256_add_toq: +.cfi_startproc + xorq %r11,%r11 + addq 0(%rbx),%r12 + adcq 8(%rbx),%r13 + movq %r12,%rax + adcq 16(%rbx),%r8 + adcq 24(%rbx),%r9 + movq %r13,%rbp + adcq $0,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + sbbq $0,%r11 + + cmovcq %rax,%r12 + cmovcq %rbp,%r13 + movq %r12,0(%rdi) + cmovcq %rcx,%r8 + movq %r13,8(%rdi) + cmovcq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq + +.type __ecp_nistz256_sub_fromq,@function +.align 32 +__ecp_nistz256_sub_fromq: +.cfi_startproc + subq 0(%rbx),%r12 + sbbq 8(%rbx),%r13 + movq %r12,%rax + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + addq $-1,%r12 + movq %r8,%rcx + adcq %r14,%r13 + adcq $0,%r8 + movq %r9,%r10 + adcq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + cmovzq %rbp,%r13 + movq %r12,0(%rdi) + cmovzq %rcx,%r8 + movq %r13,8(%rdi) + cmovzq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq + +.type __ecp_nistz256_subq,@function +.align 32 +__ecp_nistz256_subq: +.cfi_startproc + subq %r12,%rax + sbbq %r13,%rbp + movq %rax,%r12 + sbbq %r8,%rcx + sbbq %r9,%r10 + movq %rbp,%r13 + sbbq %r11,%r11 + + addq $-1,%rax + movq %rcx,%r8 + adcq %r14,%rbp + adcq $0,%rcx + movq %r10,%r9 + adcq %r15,%r10 + testq %r11,%r11 + + cmovnzq %rax,%r12 + cmovnzq %rbp,%r13 + cmovnzq %rcx,%r8 + cmovnzq %r10,%r9 + + .byte 0xf3,0xc3 +.cfi_endproc +.size __ecp_nistz256_subq,.-__ecp_nistz256_subq + +.type __ecp_nistz256_mul_by_2q,@function +.align 32 +__ecp_nistz256_mul_by_2q: +.cfi_startproc + xorq %r11,%r11 + addq %r12,%r12 + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + adcq $0,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + sbbq $0,%r11 + + cmovcq %rax,%r12 + cmovcq %rbp,%r13 + movq %r12,0(%rdi) + cmovcq %rcx,%r8 + movq %r13,8(%rdi) + cmovcq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q +.globl ecp_nistz256_point_double +.type ecp_nistz256_point_double,@function +.align 32 +ecp_nistz256_point_double: +.cfi_startproc + movl $0x80100,%ecx + andl OPENSSL_ia32cap_P+8(%rip),%ecx + cmpl $0x80100,%ecx + je .Lpoint_doublex + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $160+8,%rsp +.cfi_adjust_cfa_offset 32*5+8 +.Lpoint_doubleq_body: + +.Lpoint_double_shortcutq: + movdqu 0(%rsi),%xmm0 + movq %rsi,%rbx + movdqu 16(%rsi),%xmm1 + movq 32+0(%rsi),%r12 + movq 32+8(%rsi),%r13 + movq 32+16(%rsi),%r8 + movq 32+24(%rsi),%r9 + movq .Lpoly+8(%rip),%r14 + movq .Lpoly+24(%rip),%r15 + movdqa %xmm0,96(%rsp) + movdqa %xmm1,96+16(%rsp) + leaq 32(%rdi),%r10 + leaq 64(%rdi),%r11 +.byte 102,72,15,110,199 +.byte 102,73,15,110,202 +.byte 102,73,15,110,211 + + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_by_2q + + movq 64+0(%rsi),%rax + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + leaq 64-0(%rsi),%rsi + leaq 64(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 0+0(%rsp),%rax + movq 8+0(%rsp),%r14 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 0(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 32(%rbx),%rax + movq 64+0(%rbx),%r9 + movq 64+8(%rbx),%r10 + movq 64+16(%rbx),%r11 + movq 64+24(%rbx),%r12 + leaq 64-0(%rbx),%rsi + leaq 32(%rbx),%rbx +.byte 102,72,15,126,215 + call __ecp_nistz256_mul_montq + call __ecp_nistz256_mul_by_2q + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_toq + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 0+0(%rsp),%rax + movq 8+0(%rsp),%r14 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 +.byte 102,72,15,126,207 + call __ecp_nistz256_sqr_montq + xorq %r9,%r9 + movq %r12,%rax + addq $-1,%r12 + movq %r13,%r10 + adcq %rsi,%r13 + movq %r14,%rcx + adcq $0,%r14 + movq %r15,%r8 + adcq %rbp,%r15 + adcq $0,%r9 + xorq %rsi,%rsi + testq $1,%rax + + cmovzq %rax,%r12 + cmovzq %r10,%r13 + cmovzq %rcx,%r14 + cmovzq %r8,%r15 + cmovzq %rsi,%r9 + + movq %r13,%rax + shrq $1,%r12 + shlq $63,%rax + movq %r14,%r10 + shrq $1,%r13 + orq %rax,%r12 + shlq $63,%r10 + movq %r15,%rcx + shrq $1,%r14 + orq %r10,%r13 + shlq $63,%rcx + movq %r12,0(%rdi) + shrq $1,%r15 + movq %r13,8(%rdi) + shlq $63,%r9 + orq %rcx,%r14 + orq %r9,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + movq 64(%rsp),%rax + leaq 64(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2q + + leaq 32(%rsp),%rbx + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_toq + + movq 96(%rsp),%rax + leaq 96(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2q + + movq 0+32(%rsp),%rax + movq 8+32(%rsp),%r14 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r15 + movq 24+32(%rsp),%r8 +.byte 102,72,15,126,199 + call __ecp_nistz256_sqr_montq + + leaq 128(%rsp),%rbx + movq %r14,%r8 + movq %r15,%r9 + movq %rsi,%r14 + movq %rbp,%r15 + call __ecp_nistz256_sub_fromq + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 0(%rsp),%rdi + call __ecp_nistz256_subq + + movq 32(%rsp),%rax + leaq 32(%rsp),%rbx + movq %r12,%r14 + xorl %ecx,%ecx + movq %r12,0+0(%rsp) + movq %r13,%r10 + movq %r13,0+8(%rsp) + cmovzq %r8,%r11 + movq %r8,0+16(%rsp) + leaq 0-0(%rsp),%rsi + cmovzq %r9,%r12 + movq %r9,0+24(%rsp) + movq %r14,%r9 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + +.byte 102,72,15,126,203 +.byte 102,72,15,126,207 + call __ecp_nistz256_sub_fromq + + leaq 160+56(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbx +.cfi_restore %rbx + movq -8(%rsi),%rbp +.cfi_restore %rbp + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lpoint_doubleq_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ecp_nistz256_point_double,.-ecp_nistz256_point_double +.globl ecp_nistz256_point_add +.type ecp_nistz256_point_add,@function +.align 32 +ecp_nistz256_point_add: +.cfi_startproc + movl $0x80100,%ecx + andl OPENSSL_ia32cap_P+8(%rip),%ecx + cmpl $0x80100,%ecx + je .Lpoint_addx + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $576+8,%rsp +.cfi_adjust_cfa_offset 32*18+8 +.Lpoint_addq_body: + + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq %rsi,%rbx + movq %rdx,%rsi + movdqa %xmm0,384(%rsp) + movdqa %xmm1,384+16(%rsp) + movdqa %xmm2,416(%rsp) + movdqa %xmm3,416+16(%rsp) + movdqa %xmm4,448(%rsp) + movdqa %xmm5,448+16(%rsp) + por %xmm4,%xmm5 + + movdqu 0(%rsi),%xmm0 + pshufd $0xb1,%xmm5,%xmm3 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rsi),%xmm3 + movq 64+0(%rsi),%rax + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,480(%rsp) + pshufd $0x1e,%xmm5,%xmm4 + movdqa %xmm1,480+16(%rsp) + movdqu 64(%rsi),%xmm0 + movdqu 80(%rsi),%xmm1 + movdqa %xmm2,512(%rsp) + movdqa %xmm3,512+16(%rsp) + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + + leaq 64-0(%rsi),%rsi + movq %rax,544+0(%rsp) + movq %r14,544+8(%rsp) + movq %r15,544+16(%rsp) + movq %r8,544+24(%rsp) + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + pcmpeqd %xmm4,%xmm5 + pshufd $0xb1,%xmm1,%xmm4 + por %xmm1,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $0x1e,%xmm4,%xmm3 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + movq 64+0(%rbx),%rax + movq 64+8(%rbx),%r14 + movq 64+16(%rbx),%r15 + movq 64+24(%rbx),%r8 +.byte 102,72,15,110,203 + + leaq 64-0(%rbx),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 544(%rsp),%rax + leaq 544(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq 0+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 448(%rsp),%rax + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 416(%rsp),%rax + leaq 416(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq 0+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 512(%rsp),%rax + leaq 512(%rsp),%rbx + movq 0+256(%rsp),%r9 + movq 8+256(%rsp),%r10 + leaq 0+256(%rsp),%rsi + movq 16+256(%rsp),%r11 + movq 24+256(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 224(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + orq %r13,%r12 + movdqa %xmm4,%xmm2 + orq %r8,%r12 + orq %r9,%r12 + por %xmm5,%xmm2 +.byte 102,73,15,110,220 + + movq 384(%rsp),%rax + leaq 384(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq 0+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 480(%rsp),%rax + leaq 480(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 160(%rsp),%rbx + leaq 0(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + orq %r13,%r12 + orq %r8,%r12 + orq %r9,%r12 + +.byte 102,73,15,126,208 +.byte 102,73,15,126,217 + + orq %r8,%r12 + orq %r9,%r12 + + +.byte 0x3e + jnz .Ladd_proceedq + +.Ladd_doubleq: +.byte 102,72,15,126,206 +.byte 102,72,15,126,199 + addq $416,%rsp +.cfi_adjust_cfa_offset -416 + jmp .Lpoint_double_shortcutq +.cfi_adjust_cfa_offset 416 + +.align 32 +.Ladd_proceedq: + movq 0+64(%rsp),%rax + movq 8+64(%rsp),%r14 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 448(%rsp),%rax + leaq 448(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 0+0(%rsp),%rax + movq 8+0(%rsp),%r14 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 544(%rsp),%rax + leaq 544(%rsp),%rbx + movq 0+352(%rsp),%r9 + movq 8+352(%rsp),%r10 + leaq 0+352(%rsp),%rsi + movq 16+352(%rsp),%r11 + movq 24+352(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 0(%rsp),%rax + leaq 0(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 160(%rsp),%rax + leaq 160(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montq + + + + + xorq %r11,%r11 + addq %r12,%r12 + leaq 96(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + adcq $0,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + sbbq $0,%r11 + + cmovcq %rax,%r12 + movq 0(%rsi),%rax + cmovcq %rbp,%r13 + movq 8(%rsi),%rbp + cmovcq %rcx,%r8 + movq 16(%rsi),%rcx + cmovcq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subq + + leaq 128(%rsp),%rbx + leaq 288(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 192+0(%rsp),%rax + movq 192+8(%rsp),%rbp + movq 192+16(%rsp),%rcx + movq 192+24(%rsp),%r10 + leaq 320(%rsp),%rdi + + call __ecp_nistz256_subq + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 128(%rsp),%rax + leaq 128(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq 0+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 320(%rsp),%rax + leaq 320(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 320(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 256(%rsp),%rbx + leaq 320(%rsp),%rdi + call __ecp_nistz256_sub_fromq + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 352(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 352+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 544(%rsp),%xmm2 + pand 544+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 480(%rsp),%xmm2 + pand 480+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 320(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 320+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 512(%rsp),%xmm2 + pand 512+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + +.Ladd_doneq: + leaq 576+56(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbx +.cfi_restore %rbx + movq -8(%rsi),%rbp +.cfi_restore %rbp + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lpoint_addq_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ecp_nistz256_point_add,.-ecp_nistz256_point_add +.globl ecp_nistz256_point_add_affine +.type ecp_nistz256_point_add_affine,@function +.align 32 +ecp_nistz256_point_add_affine: +.cfi_startproc + movl $0x80100,%ecx + andl OPENSSL_ia32cap_P+8(%rip),%ecx + cmpl $0x80100,%ecx + je .Lpoint_add_affinex + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $480+8,%rsp +.cfi_adjust_cfa_offset 32*15+8 +.Ladd_affineq_body: + + movdqu 0(%rsi),%xmm0 + movq %rdx,%rbx + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq 64+0(%rsi),%rax + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,320(%rsp) + movdqa %xmm1,320+16(%rsp) + movdqa %xmm2,352(%rsp) + movdqa %xmm3,352+16(%rsp) + movdqa %xmm4,384(%rsp) + movdqa %xmm5,384+16(%rsp) + por %xmm4,%xmm5 + + movdqu 0(%rbx),%xmm0 + pshufd $0xb1,%xmm5,%xmm3 + movdqu 16(%rbx),%xmm1 + movdqu 32(%rbx),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rbx),%xmm3 + movdqa %xmm0,416(%rsp) + pshufd $0x1e,%xmm5,%xmm4 + movdqa %xmm1,416+16(%rsp) + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + movdqa %xmm2,448(%rsp) + movdqa %xmm3,448+16(%rsp) + por %xmm2,%xmm3 + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm1,%xmm3 + + leaq 64-0(%rsi),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + pcmpeqd %xmm4,%xmm5 + pshufd $0xb1,%xmm3,%xmm4 + movq 0(%rbx),%rax + + movq %r12,%r9 + por %xmm3,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $0x1e,%xmm4,%xmm3 + movq %r13,%r10 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + movq %r14,%r11 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + + leaq 32-0(%rsp),%rsi + movq %r15,%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 320(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 384(%rsp),%rax + leaq 384(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 384(%rsp),%rax + leaq 384(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 288(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 448(%rsp),%rax + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 352(%rsp),%rbx + leaq 96(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 0+64(%rsp),%rax + movq 8+64(%rsp),%r14 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 128(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 0+96(%rsp),%rax + movq 8+96(%rsp),%r14 + leaq 0+96(%rsp),%rsi + movq 16+96(%rsp),%r15 + movq 24+96(%rsp),%r8 + leaq 192(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 128(%rsp),%rax + leaq 128(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 320(%rsp),%rax + leaq 320(%rsp),%rbx + movq 0+128(%rsp),%r9 + movq 8+128(%rsp),%r10 + leaq 0+128(%rsp),%rsi + movq 16+128(%rsp),%r11 + movq 24+128(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + + + + + xorq %r11,%r11 + addq %r12,%r12 + leaq 192(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + adcq $0,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + sbbq $0,%r11 + + cmovcq %rax,%r12 + movq 0(%rsi),%rax + cmovcq %rbp,%r13 + movq 8(%rsi),%rbp + cmovcq %rcx,%r8 + movq 16(%rsi),%rcx + cmovcq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subq + + leaq 160(%rsp),%rbx + leaq 224(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 64(%rsp),%rdi + + call __ecp_nistz256_subq + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 352(%rsp),%rax + leaq 352(%rsp),%rbx + movq 0+160(%rsp),%r9 + movq 8+160(%rsp),%r10 + leaq 0+160(%rsp),%rsi + movq 16+160(%rsp),%r11 + movq 24+160(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 96(%rsp),%rax + leaq 96(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 64(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 32(%rsp),%rbx + leaq 256(%rsp),%rdi + call __ecp_nistz256_sub_fromq + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand .LONE_mont(%rip),%xmm2 + pand .LONE_mont+16(%rip),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 224(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 224+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 320(%rsp),%xmm2 + pand 320+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 256(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 256+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 352(%rsp),%xmm2 + pand 352+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + + leaq 480+56(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbx +.cfi_restore %rbx + movq -8(%rsi),%rbp +.cfi_restore %rbp + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Ladd_affineq_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine +.type __ecp_nistz256_add_tox,@function +.align 32 +__ecp_nistz256_add_tox: +.cfi_startproc + xorq %r11,%r11 + adcq 0(%rbx),%r12 + adcq 8(%rbx),%r13 + movq %r12,%rax + adcq 16(%rbx),%r8 + adcq 24(%rbx),%r9 + movq %r13,%rbp + adcq $0,%r11 + + xorq %r10,%r10 + sbbq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + sbbq $0,%r11 + + cmovcq %rax,%r12 + cmovcq %rbp,%r13 + movq %r12,0(%rdi) + cmovcq %rcx,%r8 + movq %r13,8(%rdi) + cmovcq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox + +.type __ecp_nistz256_sub_fromx,@function +.align 32 +__ecp_nistz256_sub_fromx: +.cfi_startproc + xorq %r11,%r11 + sbbq 0(%rbx),%r12 + sbbq 8(%rbx),%r13 + movq %r12,%rax + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + movq %r13,%rbp + sbbq $0,%r11 + + xorq %r10,%r10 + adcq $-1,%r12 + movq %r8,%rcx + adcq %r14,%r13 + adcq $0,%r8 + movq %r9,%r10 + adcq %r15,%r9 + + btq $0,%r11 + cmovncq %rax,%r12 + cmovncq %rbp,%r13 + movq %r12,0(%rdi) + cmovncq %rcx,%r8 + movq %r13,8(%rdi) + cmovncq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx + +.type __ecp_nistz256_subx,@function +.align 32 +__ecp_nistz256_subx: +.cfi_startproc + xorq %r11,%r11 + sbbq %r12,%rax + sbbq %r13,%rbp + movq %rax,%r12 + sbbq %r8,%rcx + sbbq %r9,%r10 + movq %rbp,%r13 + sbbq $0,%r11 + + xorq %r9,%r9 + adcq $-1,%rax + movq %rcx,%r8 + adcq %r14,%rbp + adcq $0,%rcx + movq %r10,%r9 + adcq %r15,%r10 + + btq $0,%r11 + cmovcq %rax,%r12 + cmovcq %rbp,%r13 + cmovcq %rcx,%r8 + cmovcq %r10,%r9 + + .byte 0xf3,0xc3 +.cfi_endproc +.size __ecp_nistz256_subx,.-__ecp_nistz256_subx + +.type __ecp_nistz256_mul_by_2x,@function +.align 32 +__ecp_nistz256_mul_by_2x: +.cfi_startproc + xorq %r11,%r11 + adcq %r12,%r12 + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + adcq $0,%r11 + + xorq %r10,%r10 + sbbq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + sbbq $0,%r11 + + cmovcq %rax,%r12 + cmovcq %rbp,%r13 + movq %r12,0(%rdi) + cmovcq %rcx,%r8 + movq %r13,8(%rdi) + cmovcq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x +.type ecp_nistz256_point_doublex,@function +.align 32 +ecp_nistz256_point_doublex: +.cfi_startproc +.Lpoint_doublex: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $160+8,%rsp +.cfi_adjust_cfa_offset 32*5+8 +.Lpoint_doublex_body: + +.Lpoint_double_shortcutx: + movdqu 0(%rsi),%xmm0 + movq %rsi,%rbx + movdqu 16(%rsi),%xmm1 + movq 32+0(%rsi),%r12 + movq 32+8(%rsi),%r13 + movq 32+16(%rsi),%r8 + movq 32+24(%rsi),%r9 + movq .Lpoly+8(%rip),%r14 + movq .Lpoly+24(%rip),%r15 + movdqa %xmm0,96(%rsp) + movdqa %xmm1,96+16(%rsp) + leaq 32(%rdi),%r10 + leaq 64(%rdi),%r11 +.byte 102,72,15,110,199 +.byte 102,73,15,110,202 +.byte 102,73,15,110,211 + + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_by_2x + + movq 64+0(%rsi),%rdx + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + leaq 64-128(%rsi),%rsi + leaq 64(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 0+0(%rsp),%rdx + movq 8+0(%rsp),%r14 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 0(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 32(%rbx),%rdx + movq 64+0(%rbx),%r9 + movq 64+8(%rbx),%r10 + movq 64+16(%rbx),%r11 + movq 64+24(%rbx),%r12 + leaq 64-128(%rbx),%rsi + leaq 32(%rbx),%rbx +.byte 102,72,15,126,215 + call __ecp_nistz256_mul_montx + call __ecp_nistz256_mul_by_2x + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_tox + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 0+0(%rsp),%rdx + movq 8+0(%rsp),%r14 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 +.byte 102,72,15,126,207 + call __ecp_nistz256_sqr_montx + xorq %r9,%r9 + movq %r12,%rax + addq $-1,%r12 + movq %r13,%r10 + adcq %rsi,%r13 + movq %r14,%rcx + adcq $0,%r14 + movq %r15,%r8 + adcq %rbp,%r15 + adcq $0,%r9 + xorq %rsi,%rsi + testq $1,%rax + + cmovzq %rax,%r12 + cmovzq %r10,%r13 + cmovzq %rcx,%r14 + cmovzq %r8,%r15 + cmovzq %rsi,%r9 + + movq %r13,%rax + shrq $1,%r12 + shlq $63,%rax + movq %r14,%r10 + shrq $1,%r13 + orq %rax,%r12 + shlq $63,%r10 + movq %r15,%rcx + shrq $1,%r14 + orq %r10,%r13 + shlq $63,%rcx + movq %r12,0(%rdi) + shrq $1,%r15 + movq %r13,8(%rdi) + shlq $63,%r9 + orq %rcx,%r14 + orq %r9,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + movq 64(%rsp),%rdx + leaq 64(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2x + + leaq 32(%rsp),%rbx + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_tox + + movq 96(%rsp),%rdx + leaq 96(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2x + + movq 0+32(%rsp),%rdx + movq 8+32(%rsp),%r14 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r15 + movq 24+32(%rsp),%r8 +.byte 102,72,15,126,199 + call __ecp_nistz256_sqr_montx + + leaq 128(%rsp),%rbx + movq %r14,%r8 + movq %r15,%r9 + movq %rsi,%r14 + movq %rbp,%r15 + call __ecp_nistz256_sub_fromx + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 0(%rsp),%rdi + call __ecp_nistz256_subx + + movq 32(%rsp),%rdx + leaq 32(%rsp),%rbx + movq %r12,%r14 + xorl %ecx,%ecx + movq %r12,0+0(%rsp) + movq %r13,%r10 + movq %r13,0+8(%rsp) + cmovzq %r8,%r11 + movq %r8,0+16(%rsp) + leaq 0-128(%rsp),%rsi + cmovzq %r9,%r12 + movq %r9,0+24(%rsp) + movq %r14,%r9 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montx + +.byte 102,72,15,126,203 +.byte 102,72,15,126,207 + call __ecp_nistz256_sub_fromx + + leaq 160+56(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbx +.cfi_restore %rbx + movq -8(%rsi),%rbp +.cfi_restore %rbp + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lpoint_doublex_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ecp_nistz256_point_doublex,.-ecp_nistz256_point_doublex +.type ecp_nistz256_point_addx,@function +.align 32 +ecp_nistz256_point_addx: +.cfi_startproc +.Lpoint_addx: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $576+8,%rsp +.cfi_adjust_cfa_offset 32*18+8 +.Lpoint_addx_body: + + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq %rsi,%rbx + movq %rdx,%rsi + movdqa %xmm0,384(%rsp) + movdqa %xmm1,384+16(%rsp) + movdqa %xmm2,416(%rsp) + movdqa %xmm3,416+16(%rsp) + movdqa %xmm4,448(%rsp) + movdqa %xmm5,448+16(%rsp) + por %xmm4,%xmm5 + + movdqu 0(%rsi),%xmm0 + pshufd $0xb1,%xmm5,%xmm3 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rsi),%xmm3 + movq 64+0(%rsi),%rdx + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,480(%rsp) + pshufd $0x1e,%xmm5,%xmm4 + movdqa %xmm1,480+16(%rsp) + movdqu 64(%rsi),%xmm0 + movdqu 80(%rsi),%xmm1 + movdqa %xmm2,512(%rsp) + movdqa %xmm3,512+16(%rsp) + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + + leaq 64-128(%rsi),%rsi + movq %rdx,544+0(%rsp) + movq %r14,544+8(%rsp) + movq %r15,544+16(%rsp) + movq %r8,544+24(%rsp) + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + pcmpeqd %xmm4,%xmm5 + pshufd $0xb1,%xmm1,%xmm4 + por %xmm1,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $0x1e,%xmm4,%xmm3 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + movq 64+0(%rbx),%rdx + movq 64+8(%rbx),%r14 + movq 64+16(%rbx),%r15 + movq 64+24(%rbx),%r8 +.byte 102,72,15,110,203 + + leaq 64-128(%rbx),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 544(%rsp),%rdx + leaq 544(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq -128+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 448(%rsp),%rdx + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 416(%rsp),%rdx + leaq 416(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq -128+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 512(%rsp),%rdx + leaq 512(%rsp),%rbx + movq 0+256(%rsp),%r9 + movq 8+256(%rsp),%r10 + leaq -128+256(%rsp),%rsi + movq 16+256(%rsp),%r11 + movq 24+256(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 224(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + orq %r13,%r12 + movdqa %xmm4,%xmm2 + orq %r8,%r12 + orq %r9,%r12 + por %xmm5,%xmm2 +.byte 102,73,15,110,220 + + movq 384(%rsp),%rdx + leaq 384(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq -128+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 480(%rsp),%rdx + leaq 480(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 160(%rsp),%rbx + leaq 0(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + orq %r13,%r12 + orq %r8,%r12 + orq %r9,%r12 + +.byte 102,73,15,126,208 +.byte 102,73,15,126,217 + + orq %r8,%r12 + orq %r9,%r12 + + +.byte 0x3e + jnz .Ladd_proceedx + +.Ladd_doublex: +.byte 102,72,15,126,206 +.byte 102,72,15,126,199 + addq $416,%rsp +.cfi_adjust_cfa_offset -416 + jmp .Lpoint_double_shortcutx +.cfi_adjust_cfa_offset 416 + +.align 32 +.Ladd_proceedx: + movq 0+64(%rsp),%rdx + movq 8+64(%rsp),%r14 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 448(%rsp),%rdx + leaq 448(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 0+0(%rsp),%rdx + movq 8+0(%rsp),%r14 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 544(%rsp),%rdx + leaq 544(%rsp),%rbx + movq 0+352(%rsp),%r9 + movq 8+352(%rsp),%r10 + leaq -128+352(%rsp),%rsi + movq 16+352(%rsp),%r11 + movq 24+352(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 0(%rsp),%rdx + leaq 0(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 160(%rsp),%rdx + leaq 160(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montx + + + + + xorq %r11,%r11 + addq %r12,%r12 + leaq 96(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + adcq $0,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + sbbq $0,%r11 + + cmovcq %rax,%r12 + movq 0(%rsi),%rax + cmovcq %rbp,%r13 + movq 8(%rsi),%rbp + cmovcq %rcx,%r8 + movq 16(%rsi),%rcx + cmovcq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subx + + leaq 128(%rsp),%rbx + leaq 288(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 192+0(%rsp),%rax + movq 192+8(%rsp),%rbp + movq 192+16(%rsp),%rcx + movq 192+24(%rsp),%r10 + leaq 320(%rsp),%rdi + + call __ecp_nistz256_subx + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 128(%rsp),%rdx + leaq 128(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq -128+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 320(%rsp),%rdx + leaq 320(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 320(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 256(%rsp),%rbx + leaq 320(%rsp),%rdi + call __ecp_nistz256_sub_fromx + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 352(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 352+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 544(%rsp),%xmm2 + pand 544+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 480(%rsp),%xmm2 + pand 480+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 320(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 320+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 512(%rsp),%xmm2 + pand 512+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + +.Ladd_donex: + leaq 576+56(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbx +.cfi_restore %rbx + movq -8(%rsi),%rbp +.cfi_restore %rbp + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lpoint_addx_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ecp_nistz256_point_addx,.-ecp_nistz256_point_addx +.type ecp_nistz256_point_add_affinex,@function +.align 32 +ecp_nistz256_point_add_affinex: +.cfi_startproc +.Lpoint_add_affinex: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $480+8,%rsp +.cfi_adjust_cfa_offset 32*15+8 +.Ladd_affinex_body: + + movdqu 0(%rsi),%xmm0 + movq %rdx,%rbx + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq 64+0(%rsi),%rdx + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,320(%rsp) + movdqa %xmm1,320+16(%rsp) + movdqa %xmm2,352(%rsp) + movdqa %xmm3,352+16(%rsp) + movdqa %xmm4,384(%rsp) + movdqa %xmm5,384+16(%rsp) + por %xmm4,%xmm5 + + movdqu 0(%rbx),%xmm0 + pshufd $0xb1,%xmm5,%xmm3 + movdqu 16(%rbx),%xmm1 + movdqu 32(%rbx),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rbx),%xmm3 + movdqa %xmm0,416(%rsp) + pshufd $0x1e,%xmm5,%xmm4 + movdqa %xmm1,416+16(%rsp) + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + movdqa %xmm2,448(%rsp) + movdqa %xmm3,448+16(%rsp) + por %xmm2,%xmm3 + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm1,%xmm3 + + leaq 64-128(%rsi),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + pcmpeqd %xmm4,%xmm5 + pshufd $0xb1,%xmm3,%xmm4 + movq 0(%rbx),%rdx + + movq %r12,%r9 + por %xmm3,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $0x1e,%xmm4,%xmm3 + movq %r13,%r10 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + movq %r14,%r11 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + + leaq 32-128(%rsp),%rsi + movq %r15,%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 320(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 384(%rsp),%rdx + leaq 384(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 384(%rsp),%rdx + leaq 384(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 288(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 448(%rsp),%rdx + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 352(%rsp),%rbx + leaq 96(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 0+64(%rsp),%rdx + movq 8+64(%rsp),%r14 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 128(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 0+96(%rsp),%rdx + movq 8+96(%rsp),%r14 + leaq -128+96(%rsp),%rsi + movq 16+96(%rsp),%r15 + movq 24+96(%rsp),%r8 + leaq 192(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 128(%rsp),%rdx + leaq 128(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 320(%rsp),%rdx + leaq 320(%rsp),%rbx + movq 0+128(%rsp),%r9 + movq 8+128(%rsp),%r10 + leaq -128+128(%rsp),%rsi + movq 16+128(%rsp),%r11 + movq 24+128(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montx + + + + + xorq %r11,%r11 + addq %r12,%r12 + leaq 192(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + adcq $0,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + sbbq $0,%r11 + + cmovcq %rax,%r12 + movq 0(%rsi),%rax + cmovcq %rbp,%r13 + movq 8(%rsi),%rbp + cmovcq %rcx,%r8 + movq 16(%rsi),%rcx + cmovcq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subx + + leaq 160(%rsp),%rbx + leaq 224(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 64(%rsp),%rdi + + call __ecp_nistz256_subx + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 352(%rsp),%rdx + leaq 352(%rsp),%rbx + movq 0+160(%rsp),%r9 + movq 8+160(%rsp),%r10 + leaq -128+160(%rsp),%rsi + movq 16+160(%rsp),%r11 + movq 24+160(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 96(%rsp),%rdx + leaq 96(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 64(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 32(%rsp),%rbx + leaq 256(%rsp),%rdi + call __ecp_nistz256_sub_fromx + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand .LONE_mont(%rip),%xmm2 + pand .LONE_mont+16(%rip),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 224(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 224+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 320(%rsp),%xmm2 + pand 320+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 256(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 256+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 352(%rsp),%xmm2 + pand 352+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + + leaq 480+56(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbx +.cfi_restore %rbx + movq -8(%rsi),%rbp +.cfi_restore %rbp + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Ladd_affinex_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex diff --git a/contrib/openssl-cmake/asm/crypto/ec/ecp_sm2p256-armv8.S b/contrib/openssl-cmake/asm/crypto/ec/ecp_sm2p256-armv8.S new file mode 100644 index 000000000000..50497f60a699 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/ec/ecp_sm2p256-armv8.S @@ -0,0 +1,836 @@ +#include "arm_arch.h" +.arch armv8-a +.section .rodata + +.align 5 +// The polynomial p +.Lpoly: +.quad 0xffffffffffffffff,0xffffffff00000000,0xffffffffffffffff,0xfffffffeffffffff +// The order of polynomial n +.Lord: +.quad 0x53bbf40939d54123,0x7203df6b21c6052b,0xffffffffffffffff,0xfffffffeffffffff +// (p + 1) / 2 +.Lpoly_div_2: +.quad 0x8000000000000000,0xffffffff80000000,0xffffffffffffffff,0x7fffffff7fffffff +// (n + 1) / 2 +.Lord_div_2: +.quad 0xa9ddfa049ceaa092,0xb901efb590e30295,0xffffffffffffffff,0x7fffffff7fffffff + +.text + +// void bn_rshift1(BN_ULONG *a); +.globl bn_rshift1 +.type bn_rshift1,%function +.align 5 +bn_rshift1: + AARCH64_VALID_CALL_TARGET + // Load inputs + ldp x7,x8,[x0] + ldp x9,x10,[x0,#16] + + // Right shift + extr x7,x8,x7,#1 + extr x8,x9,x8,#1 + extr x9,x10,x9,#1 + lsr x10,x10,#1 + + // Store results + stp x7,x8,[x0] + stp x9,x10,[x0,#16] + + ret +.size bn_rshift1,.-bn_rshift1 + +// void bn_sub(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b); +.globl bn_sub +.type bn_sub,%function +.align 5 +bn_sub: + AARCH64_VALID_CALL_TARGET + // Load inputs + ldp x7,x8,[x1] + ldp x9,x10,[x1,#16] + ldp x11,x12,[x2] + ldp x13,x14,[x2,#16] + + // Subtraction + subs x7,x7,x11 + sbcs x8,x8,x12 + sbcs x9,x9,x13 + sbc x10,x10,x14 + + // Store results + stp x7,x8,[x0] + stp x9,x10,[x0,#16] + + ret +.size bn_sub,.-bn_sub + +// void ecp_sm2p256_div_by_2(BN_ULONG *r,const BN_ULONG *a); +.globl ecp_sm2p256_div_by_2 +.type ecp_sm2p256_div_by_2,%function +.align 5 +ecp_sm2p256_div_by_2: + AARCH64_VALID_CALL_TARGET + // Load inputs + ldp x7,x8,[x1] + ldp x9,x10,[x1,#16] + + // Save the least significant bit + mov x3,x7 + + // Right shift 1 + extr x7,x8,x7,#1 + extr x8,x9,x8,#1 + extr x9,x10,x9,#1 + lsr x10,x10,#1 + + // Load mod + adrp x2,.Lpoly_div_2 + add x2,x2,#:lo12:.Lpoly_div_2 + ldp x11,x12,[x2] + ldp x13,x14,[x2,#16] + + // Parity check + tst x3,#1 + csel x11,xzr,x11,eq + csel x12,xzr,x12,eq + csel x13,xzr,x13,eq + csel x14,xzr,x14,eq + + // Add + adds x7,x7,x11 + adcs x8,x8,x12 + adcs x9,x9,x13 + adc x10,x10,x14 + + // Store results + stp x7,x8,[x0] + stp x9,x10,[x0,#16] + ret +.size ecp_sm2p256_div_by_2,.-ecp_sm2p256_div_by_2 + +// void ecp_sm2p256_div_by_2_mod_ord(BN_ULONG *r,const BN_ULONG *a); +.globl ecp_sm2p256_div_by_2_mod_ord +.type ecp_sm2p256_div_by_2_mod_ord,%function +.align 5 +ecp_sm2p256_div_by_2_mod_ord: + AARCH64_VALID_CALL_TARGET + // Load inputs + ldp x7,x8,[x1] + ldp x9,x10,[x1,#16] + + // Save the least significant bit + mov x3,x7 + + // Right shift 1 + extr x7,x8,x7,#1 + extr x8,x9,x8,#1 + extr x9,x10,x9,#1 + lsr x10,x10,#1 + + // Load mod + adrp x2,.Lord_div_2 + add x2,x2,#:lo12:.Lord_div_2 + ldp x11,x12,[x2] + ldp x13,x14,[x2,#16] + + // Parity check + tst x3,#1 + csel x11,xzr,x11,eq + csel x12,xzr,x12,eq + csel x13,xzr,x13,eq + csel x14,xzr,x14,eq + + // Add + adds x7,x7,x11 + adcs x8,x8,x12 + adcs x9,x9,x13 + adc x10,x10,x14 + + // Store results + stp x7,x8,[x0] + stp x9,x10,[x0,#16] + ret +.size ecp_sm2p256_div_by_2_mod_ord,.-ecp_sm2p256_div_by_2_mod_ord + +// void ecp_sm2p256_mul_by_3(BN_ULONG *r,const BN_ULONG *a); +.globl ecp_sm2p256_mul_by_3 +.type ecp_sm2p256_mul_by_3,%function +.align 5 +ecp_sm2p256_mul_by_3: + AARCH64_VALID_CALL_TARGET + // Load inputs + ldp x7,x8,[x1] + ldp x9,x10,[x1,#16] + + // 2*a + adds x7,x7,x7 + adcs x8,x8,x8 + adcs x9,x9,x9 + adcs x10,x10,x10 + adcs x15,xzr,xzr + + mov x3,x7 + mov x4,x8 + mov x5,x9 + mov x6,x10 + + // Sub polynomial + adrp x2,.Lpoly + add x2,x2,#:lo12:.Lpoly + ldp x11,x12,[x2] + ldp x13,x14,[x2,#16] + subs x7,x7,x11 + sbcs x8,x8,x12 + sbcs x9,x9,x13 + sbcs x10,x10,x14 + sbcs x15,x15,xzr + + csel x7,x7,x3,cs + csel x8,x8,x4,cs + csel x9,x9,x5,cs + csel x10,x10,x6,cs + eor x15,x15,x15 + + // 3*a + ldp x11,x12,[x1] + ldp x13,x14,[x1,#16] + adds x7,x7,x11 + adcs x8,x8,x12 + adcs x9,x9,x13 + adcs x10,x10,x14 + adcs x15,xzr,xzr + + mov x3,x7 + mov x4,x8 + mov x5,x9 + mov x6,x10 + + // Sub polynomial + adrp x2,.Lpoly + add x2,x2,#:lo12:.Lpoly + ldp x11,x12,[x2] + ldp x13,x14,[x2,#16] + subs x7,x7,x11 + sbcs x8,x8,x12 + sbcs x9,x9,x13 + sbcs x10,x10,x14 + sbcs x15,x15,xzr + + csel x7,x7,x3,cs + csel x8,x8,x4,cs + csel x9,x9,x5,cs + csel x10,x10,x6,cs + + // Store results + stp x7,x8,[x0] + stp x9,x10,[x0,#16] + + ret +.size ecp_sm2p256_mul_by_3,.-ecp_sm2p256_mul_by_3 + +// void ecp_sm2p256_add(BN_ULONG *r,const BN_ULONG *a,const BN_ULONG *b); +.globl ecp_sm2p256_add +.type ecp_sm2p256_add,%function +.align 5 +ecp_sm2p256_add: + AARCH64_VALID_CALL_TARGET + // Load inputs + ldp x7,x8,[x1] + ldp x9,x10,[x1,#16] + ldp x11,x12,[x2] + ldp x13,x14,[x2,#16] + + // Addition + adds x7,x7,x11 + adcs x8,x8,x12 + adcs x9,x9,x13 + adcs x10,x10,x14 + adc x15,xzr,xzr + + // Load polynomial + adrp x2,.Lpoly + add x2,x2,#:lo12:.Lpoly + ldp x11,x12,[x2] + ldp x13,x14,[x2,#16] + + // Backup Addition + mov x3,x7 + mov x4,x8 + mov x5,x9 + mov x6,x10 + + // Sub polynomial + subs x3,x3,x11 + sbcs x4,x4,x12 + sbcs x5,x5,x13 + sbcs x6,x6,x14 + sbcs x15,x15,xzr + + // Select based on carry + csel x7,x7,x3,cc + csel x8,x8,x4,cc + csel x9,x9,x5,cc + csel x10,x10,x6,cc + + // Store results + stp x7,x8,[x0] + stp x9,x10,[x0,#16] + ret +.size ecp_sm2p256_add,.-ecp_sm2p256_add + +// void ecp_sm2p256_sub(BN_ULONG *r,const BN_ULONG *a,const BN_ULONG *b); +.globl ecp_sm2p256_sub +.type ecp_sm2p256_sub,%function +.align 5 +ecp_sm2p256_sub: + AARCH64_VALID_CALL_TARGET + // Load inputs + ldp x7,x8,[x1] + ldp x9,x10,[x1,#16] + ldp x11,x12,[x2] + ldp x13,x14,[x2,#16] + + // Subtraction + subs x7,x7,x11 + sbcs x8,x8,x12 + sbcs x9,x9,x13 + sbcs x10,x10,x14 + sbc x15,xzr,xzr + + // Load polynomial + adrp x2,.Lpoly + add x2,x2,#:lo12:.Lpoly + ldp x11,x12,[x2] + ldp x13,x14,[x2,#16] + + // Backup subtraction + mov x3,x7 + mov x4,x8 + mov x5,x9 + mov x6,x10 + + // Add polynomial + adds x3,x3,x11 + adcs x4,x4,x12 + adcs x5,x5,x13 + adcs x6,x6,x14 + tst x15,x15 + + // Select based on carry + csel x7,x7,x3,eq + csel x8,x8,x4,eq + csel x9,x9,x5,eq + csel x10,x10,x6,eq + + // Store results + stp x7,x8,[x0] + stp x9,x10,[x0,#16] + ret +.size ecp_sm2p256_sub,.-ecp_sm2p256_sub + +// void ecp_sm2p256_sub_mod_ord(BN_ULONG *r,const BN_ULONG *a,const BN_ULONG *b); +.globl ecp_sm2p256_sub_mod_ord +.type ecp_sm2p256_sub_mod_ord,%function +.align 5 +ecp_sm2p256_sub_mod_ord: + AARCH64_VALID_CALL_TARGET + // Load inputs + ldp x7,x8,[x1] + ldp x9,x10,[x1,#16] + ldp x11,x12,[x2] + ldp x13,x14,[x2,#16] + + // Subtraction + subs x7,x7,x11 + sbcs x8,x8,x12 + sbcs x9,x9,x13 + sbcs x10,x10,x14 + sbc x15,xzr,xzr + + // Load polynomial + adrp x2,.Lord + add x2,x2,#:lo12:.Lord + ldp x11,x12,[x2] + ldp x13,x14,[x2,#16] + + // Backup subtraction + mov x3,x7 + mov x4,x8 + mov x5,x9 + mov x6,x10 + + // Add polynomial + adds x3,x3,x11 + adcs x4,x4,x12 + adcs x5,x5,x13 + adcs x6,x6,x14 + tst x15,x15 + + // Select based on carry + csel x7,x7,x3,eq + csel x8,x8,x4,eq + csel x9,x9,x5,eq + csel x10,x10,x6,eq + + // Store results + stp x7,x8,[x0] + stp x9,x10,[x0,#16] + ret +.size ecp_sm2p256_sub_mod_ord,.-ecp_sm2p256_sub_mod_ord + +.macro RDC + // a = | s7 | ... | s0 |, where si are 64-bit quantities + // = |a15|a14| ... |a1|a0|, where ai are 32-bit quantities + // | s7 | s6 | s5 | s4 | + // | a15 | a14 | a13 | a12 | a11 | a10 | a9 | a8 | + // | s3 | s2 | s1 | s0 | + // | a7 | a6 | a5 | a4 | a3 | a2 | a1 | a0 | + // ================================================= + // | a8 | a11 | a10 | a9 | a8 | 0 | s4 | (+) + // | a9 | a15 | s6 | a11 | 0 | a10 | a9 | (+) + // | a10 | 0 | a14 | a13 | a12 | 0 | s5 | (+) + // | a11 | 0 | s7 | a13 | 0 | a12 | a11 | (+) + // | a12 | 0 | s7 | a13 | 0 | s6 | (+) + // | a12 | 0 | 0 | a15 | a14 | 0 | a14 | a13 | (+) + // | a13 | 0 | 0 | 0 | a15 | 0 | a14 | a13 | (+) + // | a13 | 0 | 0 | 0 | 0 | 0 | s7 | (+) + // | a14 | 0 | 0 | 0 | 0 | 0 | s7 | (+) + // | a14 | 0 | 0 | 0 | 0 | 0 | 0 | a15 | (+) + // | a15 | 0 | 0 | 0 | 0 | 0 | 0 | a15 | (+) + // | a15 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | (+) + // | s7 | 0 | 0 | 0 | 0 | 0 | 0 | (+) + // | 0 | 0 | 0 | 0 | 0 | a8 | 0 | 0 | (-) + // | 0 | 0 | 0 | 0 | 0 | a9 | 0 | 0 | (-) + // | 0 | 0 | 0 | 0 | 0 | a13 | 0 | 0 | (-) + // | 0 | 0 | 0 | 0 | 0 | a14 | 0 | 0 | (-) + // | U[7]| U[6]| U[5]| U[4]| U[3]| U[2]| U[1]| U[0]| + // | V[3] | V[2] | V[1] | V[0] | + + // 1. 64-bit addition + // t2=s6+s7+s7 + adds x5,x13,x14 + adcs x4,xzr,xzr + adds x5,x5,x14 + adcs x4,x4,xzr + // t3=s4+s5+t2 + adds x6,x11,x5 + adcs x15,x4,xzr + adds x6,x6,x12 + adcs x15,x15,xzr + // sum + adds x7,x7,x6 + adcs x8,x8,x15 + adcs x9,x9,x5 + adcs x10,x10,x14 + adcs x3,xzr,xzr + adds x10,x10,x4 + adcs x3,x3,xzr + + stp x7,x8,[sp,#32] + stp x9,x10,[sp,#48] + + // 2. 64-bit to 32-bit spread + mov x4,#0xffffffff + mov x7,x11 + mov x8,x12 + mov x9,x13 + mov x10,x14 + and x7,x7,x4 // a8 + and x8,x8,x4 // a10 + and x9,x9,x4 // a12 + and x10,x10,x4 // a14 + lsr x11,x11,#32 // a9 + lsr x12,x12,#32 // a11 + lsr x13,x13,#32 // a13 + lsr x14,x14,#32 // a15 + + // 3. 32-bit addition + add x4,x10,x9 // t1 <- a12 + a14 + add x5,x14,x13 // t2 <- a13 + a15 + add x6,x7,x11 // t3 <- a8 + a9 + add x15,x10,x8 // t4 <- a10 + a14 + add x14,x14,x12 // a15 <- a11 + a15 + add x9,x5,x4 // a12 <- a12 + a13 + a14 + a15 + add x8,x8,x9 // a10 <- a10 + a12 + a13 + a14 + a15 + add x8,x8,x9 // a10 <- a10 + 2*(a12 + a13 + a14 + a15) + add x8,x8,x6 // a10 <- a8 + a9 + a10 + 2*(a12 + a13 + a14 + a15) + add x8,x8,x12 // a10 <- a8 + a9 + a10 + a11 + 2*(a12 + a13 + a14 + a15) + add x9,x9,x13 // a12 <- a12 + 2*a13 + a14 + a15 + add x9,x9,x12 // a12 <- a11 + a12 + 2*a13 + a14 + a15 + add x9,x9,x7 // a12 <- a8 + a11 + a12 + 2*a13 + a14 + a15 + add x6,x6,x10 // t3 <- a8 + a9 + a14 + add x6,x6,x13 // t3 <- a8 + a9 + a13 + a14 + add x11,x11,x5 // a9 <- a9 + a13 + a15 + add x12,x12,x11 // a11 <- a9 + a11 + a13 + a15 + add x12,x12,x5 // a11 <- a9 + a11 + 2*(a13 + a15) + add x4,x4,x15 // t1 <- a10 + a12 + 2*a14 + + // U[0] s5 a9 + a11 + 2*(a13 + a15) + // U[1] t1 a10 + a12 + 2*a14 + // U[2] -t3 a8 + a9 + a13 + a14 + // U[3] s2 a8 + a11 + a12 + 2*a13 + a14 + a15 + // U[4] s4 a9 + a13 + a15 + // U[5] t4 a10 + a14 + // U[6] s7 a11 + a15 + // U[7] s1 a8 + a9 + a10 + a11 + 2*(a12 + a13 + a14 + a15) + + // 4. 32-bit to 64-bit + lsl x7,x4,#32 + extr x4,x9,x4,#32 + extr x9,x15,x9,#32 + extr x15,x8,x15,#32 + lsr x8,x8,#32 + + // 5. 64-bit addition + adds x12,x12,x7 + adcs x4,x4,xzr + adcs x11,x11,x9 + adcs x14,x14,x15 + adcs x3,x3,x8 + + // V[0] s5 + // V[1] t1 + // V[2] s4 + // V[3] s7 + // carry t0 + // sub t3 + + // 5. Process s0-s3 + ldp x7,x8,[sp,#32] + ldp x9,x10,[sp,#48] + // add with V0-V3 + adds x7,x7,x12 + adcs x8,x8,x4 + adcs x9,x9,x11 + adcs x10,x10,x14 + adcs x3,x3,xzr + // sub with t3 + subs x8,x8,x6 + sbcs x9,x9,xzr + sbcs x10,x10,xzr + sbcs x3,x3,xzr + + // 6. MOD + // First Mod + lsl x4,x3,#32 + subs x5,x4,x3 + + adds x7,x7,x3 + adcs x8,x8,x5 + adcs x9,x9,xzr + adcs x10,x10,x4 + + // Last Mod + // return y - p if y > p else y + mov x11,x7 + mov x12,x8 + mov x13,x9 + mov x14,x10 + + adrp x3,.Lpoly + add x3,x3,#:lo12:.Lpoly + ldp x4,x5,[x3] + ldp x6,x15,[x3,#16] + + adcs x16,xzr,xzr + + subs x7,x7,x4 + sbcs x8,x8,x5 + sbcs x9,x9,x6 + sbcs x10,x10,x15 + sbcs x16,x16,xzr + + csel x7,x7,x11,cs + csel x8,x8,x12,cs + csel x9,x9,x13,cs + csel x10,x10,x14,cs + +.endm + +// void ecp_sm2p256_mul(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b); +.globl ecp_sm2p256_mul +.type ecp_sm2p256_mul,%function +.align 5 +ecp_sm2p256_mul: + AARCH64_SIGN_LINK_REGISTER + // Store scalar registers + stp x29,x30,[sp,#-80]! + add x29,sp,#0 + stp x16,x17,[sp,#16] + stp x19,x20,[sp,#64] + + // Load inputs + ldp x7,x8,[x1] + ldp x9,x10,[x1,#16] + ldp x11,x12,[x2] + ldp x13,x14,[x2,#16] + +// ### multiplication ### + // ======================== + // s3 s2 s1 s0 + // * s7 s6 s5 s4 + // ------------------------ + // + s0 s0 s0 s0 + // * * * * + // s7 s6 s5 s4 + // s1 s1 s1 s1 + // * * * * + // s7 s6 s5 s4 + // s2 s2 s2 s2 + // * * * * + // s7 s6 s5 s4 + // s3 s3 s3 s3 + // * * * * + // s7 s6 s5 s4 + // ------------------------ + // s7 s6 s5 s4 s3 s2 s1 s0 + // ======================== + +// ### s0*s4 ### + mul x16,x7,x11 + umulh x5,x7,x11 + +// ### s1*s4 + s0*s5 ### + mul x3,x8,x11 + umulh x4,x8,x11 + adds x5,x5,x3 + adcs x6,x4,xzr + + mul x3,x7,x12 + umulh x4,x7,x12 + adds x5,x5,x3 + adcs x6,x6,x4 + adcs x15,xzr,xzr + +// ### s2*s4 + s1*s5 + s0*s6 ### + mul x3,x9,x11 + umulh x4,x9,x11 + adds x6,x6,x3 + adcs x15,x15,x4 + + mul x3,x8,x12 + umulh x4,x8,x12 + adds x6,x6,x3 + adcs x15,x15,x4 + adcs x17,xzr,xzr + + mul x3,x7,x13 + umulh x4,x7,x13 + adds x6,x6,x3 + adcs x15,x15,x4 + adcs x17,x17,xzr + +// ### s3*s4 + s2*s5 + s1*s6 + s0*s7 ### + mul x3,x10,x11 + umulh x4,x10,x11 + adds x15,x15,x3 + adcs x17,x17,x4 + adcs x19,xzr,xzr + + mul x3,x9,x12 + umulh x4,x9,x12 + adds x15,x15,x3 + adcs x17,x17,x4 + adcs x19,x19,xzr + + mul x3,x8,x13 + umulh x4,x8,x13 + adds x15,x15,x3 + adcs x17,x17,x4 + adcs x19,x19,xzr + + mul x3,x7,x14 + umulh x4,x7,x14 + adds x15,x15,x3 + adcs x17,x17,x4 + adcs x19,x19,xzr + +// ### s3*s5 + s2*s6 + s1*s7 ### + mul x3,x10,x12 + umulh x4,x10,x12 + adds x17,x17,x3 + adcs x19,x19,x4 + adcs x20,xzr,xzr + + mul x3,x9,x13 + umulh x4,x9,x13 + adds x17,x17,x3 + adcs x19,x19,x4 + adcs x20,x20,xzr + + mul x3,x8,x14 + umulh x4,x8,x14 + adds x11,x17,x3 + adcs x19,x19,x4 + adcs x20,x20,xzr + +// ### s3*s6 + s2*s7 ### + mul x3,x10,x13 + umulh x4,x10,x13 + adds x19,x19,x3 + adcs x20,x20,x4 + adcs x17,xzr,xzr + + mul x3,x9,x14 + umulh x4,x9,x14 + adds x12,x19,x3 + adcs x20,x20,x4 + adcs x17,x17,xzr + +// ### s3*s7 ### + mul x3,x10,x14 + umulh x4,x10,x14 + adds x13,x20,x3 + adcs x14,x17,x4 + + mov x7,x16 + mov x8,x5 + mov x9,x6 + mov x10,x15 + + // result of mul: s7 s6 s5 s4 s3 s2 s1 s0 + +// ### Reduction ### + RDC + + stp x7,x8,[x0] + stp x9,x10,[x0,#16] + + // Restore scalar registers + ldp x16,x17,[sp,#16] + ldp x19,x20,[sp,#64] + ldp x29,x30,[sp],#80 + + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_sm2p256_mul,.-ecp_sm2p256_mul + +// void ecp_sm2p256_sqr(BN_ULONG *r, const BN_ULONG *a); +.globl ecp_sm2p256_sqr +.type ecp_sm2p256_sqr,%function +.align 5 + +ecp_sm2p256_sqr: + AARCH64_SIGN_LINK_REGISTER + // Store scalar registers + stp x29,x30,[sp,#-80]! + add x29,sp,#0 + stp x16,x17,[sp,#16] + stp x19,x20,[sp,#64] + + // Load inputs + ldp x11,x12,[x1] + ldp x13,x14,[x1,#16] + +// ### square ### + // ======================== + // s7 s6 s5 s4 + // * s7 s6 s5 s4 + // ------------------------ + // + s4 s4 s4 s4 + // * * * * + // s7 s6 s5 s4 + // s5 s5 s5 s5 + // * * * * + // s7 s6 s5 s4 + // s6 s6 s6 s6 + // * * * * + // s7 s6 s5 s4 + // s7 s7 s7 s7 + // * * * * + // s7 s6 s5 s4 + // ------------------------ + // s7 s6 s5 s4 s3 s2 s1 s0 + // ======================== + +// ### s4*s5 ### + mul x8,x11,x12 + umulh x9,x11,x12 + +// ### s4*s6 ### + mul x3,x13,x11 + umulh x10,x13,x11 + adds x9,x9,x3 + adcs x10,x10,xzr + +// ### s4*s7 + s5*s6 ### + mul x3,x14,x11 + umulh x4,x14,x11 + adds x10,x10,x3 + adcs x7,x4,xzr + + mul x3,x13,x12 + umulh x4,x13,x12 + adds x10,x10,x3 + adcs x7,x7,x4 + adcs x5,xzr,xzr + +// ### s5*s7 ### + mul x3,x14,x12 + umulh x4,x14,x12 + adds x7,x7,x3 + adcs x5,x5,x4 + +// ### s6*s7 ### + mul x3,x14,x13 + umulh x4,x14,x13 + adds x5,x5,x3 + adcs x6,x4,xzr + +// ### 2*(t3,t2,s0,s3,s2,s1) ### + adds x8,x8,x8 + adcs x9,x9,x9 + adcs x10,x10,x10 + adcs x7,x7,x7 + adcs x5,x5,x5 + adcs x6,x6,x6 + adcs x15,xzr,xzr + +// ### s4*s4 ### + mul x16,x11,x11 + umulh x17,x11,x11 + +// ### s5*s5 ### + mul x11,x12,x12 + umulh x12,x12,x12 + +// ### s6*s6 ### + mul x3,x13,x13 + umulh x4,x13,x13 + +// ### s7*s7 ### + mul x19,x14,x14 + umulh x20,x14,x14 + + adds x8,x8,x17 + adcs x9,x9,x11 + adcs x10,x10,x12 + adcs x7,x7,x3 + adcs x5,x5,x4 + adcs x6,x6,x19 + adcs x15,x15,x20 + + mov x11,x7 + mov x7,x16 + mov x12,x5 + mov x13,x6 + mov x14,x15 + + // result of mul: s7 s6 s5 s4 s3 s2 s1 s0 + +// ### Reduction ### + RDC + + stp x7,x8,[x0] + stp x9,x10,[x0,#16] + + // Restore scalar registers + ldp x16,x17,[sp,#16] + ldp x19,x20,[sp,#64] + ldp x29,x30,[sp],#80 + + AARCH64_VALIDATE_LINK_REGISTER + ret +.size ecp_sm2p256_sqr,.-ecp_sm2p256_sqr diff --git a/contrib/openssl-cmake/asm/crypto/ec/x25519-x86_64.s b/contrib/openssl-cmake/asm/crypto/ec/x25519-x86_64.s new file mode 100644 index 000000000000..8fd319c83c88 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/ec/x25519-x86_64.s @@ -0,0 +1,802 @@ +.text + +.globl x25519_fe51_mul +.type x25519_fe51_mul,@function +.align 32 +x25519_fe51_mul: +.cfi_startproc + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + leaq -40(%rsp),%rsp +.cfi_adjust_cfa_offset 40 +.Lfe51_mul_body: + + movq 0(%rsi),%rax + movq 0(%rdx),%r11 + movq 8(%rdx),%r12 + movq 16(%rdx),%r13 + movq 24(%rdx),%rbp + movq 32(%rdx),%r14 + + movq %rdi,32(%rsp) + movq %rax,%rdi + mulq %r11 + movq %r11,0(%rsp) + movq %rax,%rbx + movq %rdi,%rax + movq %rdx,%rcx + mulq %r12 + movq %r12,8(%rsp) + movq %rax,%r8 + movq %rdi,%rax + leaq (%r14,%r14,8),%r15 + movq %rdx,%r9 + mulq %r13 + movq %r13,16(%rsp) + movq %rax,%r10 + movq %rdi,%rax + leaq (%r14,%r15,2),%rdi + movq %rdx,%r11 + mulq %rbp + movq %rax,%r12 + movq 0(%rsi),%rax + movq %rdx,%r13 + mulq %r14 + movq %rax,%r14 + movq 8(%rsi),%rax + movq %rdx,%r15 + + mulq %rdi + addq %rax,%rbx + movq 16(%rsi),%rax + adcq %rdx,%rcx + mulq %rdi + addq %rax,%r8 + movq 24(%rsi),%rax + adcq %rdx,%r9 + mulq %rdi + addq %rax,%r10 + movq 32(%rsi),%rax + adcq %rdx,%r11 + mulq %rdi + imulq $19,%rbp,%rdi + addq %rax,%r12 + movq 8(%rsi),%rax + adcq %rdx,%r13 + mulq %rbp + movq 16(%rsp),%rbp + addq %rax,%r14 + movq 16(%rsi),%rax + adcq %rdx,%r15 + + mulq %rdi + addq %rax,%rbx + movq 24(%rsi),%rax + adcq %rdx,%rcx + mulq %rdi + addq %rax,%r8 + movq 32(%rsi),%rax + adcq %rdx,%r9 + mulq %rdi + imulq $19,%rbp,%rdi + addq %rax,%r10 + movq 8(%rsi),%rax + adcq %rdx,%r11 + mulq %rbp + addq %rax,%r12 + movq 16(%rsi),%rax + adcq %rdx,%r13 + mulq %rbp + movq 8(%rsp),%rbp + addq %rax,%r14 + movq 24(%rsi),%rax + adcq %rdx,%r15 + + mulq %rdi + addq %rax,%rbx + movq 32(%rsi),%rax + adcq %rdx,%rcx + mulq %rdi + addq %rax,%r8 + movq 8(%rsi),%rax + adcq %rdx,%r9 + mulq %rbp + imulq $19,%rbp,%rdi + addq %rax,%r10 + movq 16(%rsi),%rax + adcq %rdx,%r11 + mulq %rbp + addq %rax,%r12 + movq 24(%rsi),%rax + adcq %rdx,%r13 + mulq %rbp + movq 0(%rsp),%rbp + addq %rax,%r14 + movq 32(%rsi),%rax + adcq %rdx,%r15 + + mulq %rdi + addq %rax,%rbx + movq 8(%rsi),%rax + adcq %rdx,%rcx + mulq %rbp + addq %rax,%r8 + movq 16(%rsi),%rax + adcq %rdx,%r9 + mulq %rbp + addq %rax,%r10 + movq 24(%rsi),%rax + adcq %rdx,%r11 + mulq %rbp + addq %rax,%r12 + movq 32(%rsi),%rax + adcq %rdx,%r13 + mulq %rbp + addq %rax,%r14 + adcq %rdx,%r15 + + movq 32(%rsp),%rdi + jmp .Lreduce51 +.Lfe51_mul_epilogue: +.cfi_endproc +.size x25519_fe51_mul,.-x25519_fe51_mul + +.globl x25519_fe51_sqr +.type x25519_fe51_sqr,@function +.align 32 +x25519_fe51_sqr: +.cfi_startproc + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + leaq -40(%rsp),%rsp +.cfi_adjust_cfa_offset 40 +.Lfe51_sqr_body: + + movq 0(%rsi),%rax + movq 16(%rsi),%r15 + movq 32(%rsi),%rbp + + movq %rdi,32(%rsp) + leaq (%rax,%rax,1),%r14 + mulq %rax + movq %rax,%rbx + movq 8(%rsi),%rax + movq %rdx,%rcx + mulq %r14 + movq %rax,%r8 + movq %r15,%rax + movq %r15,0(%rsp) + movq %rdx,%r9 + mulq %r14 + movq %rax,%r10 + movq 24(%rsi),%rax + movq %rdx,%r11 + imulq $19,%rbp,%rdi + mulq %r14 + movq %rax,%r12 + movq %rbp,%rax + movq %rdx,%r13 + mulq %r14 + movq %rax,%r14 + movq %rbp,%rax + movq %rdx,%r15 + + mulq %rdi + addq %rax,%r12 + movq 8(%rsi),%rax + adcq %rdx,%r13 + + movq 24(%rsi),%rsi + leaq (%rax,%rax,1),%rbp + mulq %rax + addq %rax,%r10 + movq 0(%rsp),%rax + adcq %rdx,%r11 + mulq %rbp + addq %rax,%r12 + movq %rbp,%rax + adcq %rdx,%r13 + mulq %rsi + addq %rax,%r14 + movq %rbp,%rax + adcq %rdx,%r15 + imulq $19,%rsi,%rbp + mulq %rdi + addq %rax,%rbx + leaq (%rsi,%rsi,1),%rax + adcq %rdx,%rcx + + mulq %rdi + addq %rax,%r10 + movq %rsi,%rax + adcq %rdx,%r11 + mulq %rbp + addq %rax,%r8 + movq 0(%rsp),%rax + adcq %rdx,%r9 + + leaq (%rax,%rax,1),%rsi + mulq %rax + addq %rax,%r14 + movq %rbp,%rax + adcq %rdx,%r15 + mulq %rsi + addq %rax,%rbx + movq %rsi,%rax + adcq %rdx,%rcx + mulq %rdi + addq %rax,%r8 + adcq %rdx,%r9 + + movq 32(%rsp),%rdi + jmp .Lreduce51 + +.align 32 +.Lreduce51: + movq $0x7ffffffffffff,%rbp + + movq %r10,%rdx + shrq $51,%r10 + shlq $13,%r11 + andq %rbp,%rdx + orq %r10,%r11 + addq %r11,%r12 + adcq $0,%r13 + + movq %rbx,%rax + shrq $51,%rbx + shlq $13,%rcx + andq %rbp,%rax + orq %rbx,%rcx + addq %rcx,%r8 + adcq $0,%r9 + + movq %r12,%rbx + shrq $51,%r12 + shlq $13,%r13 + andq %rbp,%rbx + orq %r12,%r13 + addq %r13,%r14 + adcq $0,%r15 + + movq %r8,%rcx + shrq $51,%r8 + shlq $13,%r9 + andq %rbp,%rcx + orq %r8,%r9 + addq %r9,%rdx + + movq %r14,%r10 + shrq $51,%r14 + shlq $13,%r15 + andq %rbp,%r10 + orq %r14,%r15 + + leaq (%r15,%r15,8),%r14 + leaq (%r15,%r14,2),%r15 + addq %r15,%rax + + movq %rdx,%r8 + andq %rbp,%rdx + shrq $51,%r8 + addq %r8,%rbx + + movq %rax,%r9 + andq %rbp,%rax + shrq $51,%r9 + addq %r9,%rcx + + movq %rax,0(%rdi) + movq %rcx,8(%rdi) + movq %rdx,16(%rdi) + movq %rbx,24(%rdi) + movq %r10,32(%rdi) + + movq 40(%rsp),%r15 +.cfi_restore %r15 + movq 48(%rsp),%r14 +.cfi_restore %r14 + movq 56(%rsp),%r13 +.cfi_restore %r13 + movq 64(%rsp),%r12 +.cfi_restore %r12 + movq 72(%rsp),%rbx +.cfi_restore %rbx + movq 80(%rsp),%rbp +.cfi_restore %rbp + leaq 88(%rsp),%rsp +.cfi_adjust_cfa_offset 88 +.Lfe51_sqr_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size x25519_fe51_sqr,.-x25519_fe51_sqr + +.globl x25519_fe51_mul121666 +.type x25519_fe51_mul121666,@function +.align 32 +x25519_fe51_mul121666: +.cfi_startproc + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + leaq -40(%rsp),%rsp +.cfi_adjust_cfa_offset 40 +.Lfe51_mul121666_body: + movl $121666,%eax + + mulq 0(%rsi) + movq %rax,%rbx + movl $121666,%eax + movq %rdx,%rcx + mulq 8(%rsi) + movq %rax,%r8 + movl $121666,%eax + movq %rdx,%r9 + mulq 16(%rsi) + movq %rax,%r10 + movl $121666,%eax + movq %rdx,%r11 + mulq 24(%rsi) + movq %rax,%r12 + movl $121666,%eax + movq %rdx,%r13 + mulq 32(%rsi) + movq %rax,%r14 + movq %rdx,%r15 + + jmp .Lreduce51 +.Lfe51_mul121666_epilogue: +.cfi_endproc +.size x25519_fe51_mul121666,.-x25519_fe51_mul121666 + +.globl x25519_fe64_eligible +.type x25519_fe64_eligible,@function +.align 32 +x25519_fe64_eligible: +.cfi_startproc + movl OPENSSL_ia32cap_P+8(%rip),%ecx + xorl %eax,%eax + andl $0x80100,%ecx + cmpl $0x80100,%ecx + cmovel %ecx,%eax + .byte 0xf3,0xc3 +.cfi_endproc +.size x25519_fe64_eligible,.-x25519_fe64_eligible + +.globl x25519_fe64_mul +.type x25519_fe64_mul,@function +.align 32 +x25519_fe64_mul: +.cfi_startproc + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rdi +.cfi_adjust_cfa_offset 8 +.cfi_offset %rdi,-64 + leaq -16(%rsp),%rsp +.cfi_adjust_cfa_offset 16 +.Lfe64_mul_body: + + movq %rdx,%rax + movq 0(%rdx),%rbp + movq 0(%rsi),%rdx + movq 8(%rax),%rcx + movq 16(%rax),%r14 + movq 24(%rax),%r15 + + mulxq %rbp,%r8,%rax + xorl %edi,%edi + mulxq %rcx,%r9,%rbx + adcxq %rax,%r9 + mulxq %r14,%r10,%rax + adcxq %rbx,%r10 + mulxq %r15,%r11,%r12 + movq 8(%rsi),%rdx + adcxq %rax,%r11 + movq %r14,(%rsp) + adcxq %rdi,%r12 + + mulxq %rbp,%rax,%rbx + adoxq %rax,%r9 + adcxq %rbx,%r10 + mulxq %rcx,%rax,%rbx + adoxq %rax,%r10 + adcxq %rbx,%r11 + mulxq %r14,%rax,%rbx + adoxq %rax,%r11 + adcxq %rbx,%r12 + mulxq %r15,%rax,%r13 + movq 16(%rsi),%rdx + adoxq %rax,%r12 + adcxq %rdi,%r13 + adoxq %rdi,%r13 + + mulxq %rbp,%rax,%rbx + adcxq %rax,%r10 + adoxq %rbx,%r11 + mulxq %rcx,%rax,%rbx + adcxq %rax,%r11 + adoxq %rbx,%r12 + mulxq %r14,%rax,%rbx + adcxq %rax,%r12 + adoxq %rbx,%r13 + mulxq %r15,%rax,%r14 + movq 24(%rsi),%rdx + adcxq %rax,%r13 + adoxq %rdi,%r14 + adcxq %rdi,%r14 + + mulxq %rbp,%rax,%rbx + adoxq %rax,%r11 + adcxq %rbx,%r12 + mulxq %rcx,%rax,%rbx + adoxq %rax,%r12 + adcxq %rbx,%r13 + mulxq (%rsp),%rax,%rbx + adoxq %rax,%r13 + adcxq %rbx,%r14 + mulxq %r15,%rax,%r15 + movl $38,%edx + adoxq %rax,%r14 + adcxq %rdi,%r15 + adoxq %rdi,%r15 + + jmp .Lreduce64 +.Lfe64_mul_epilogue: +.cfi_endproc +.size x25519_fe64_mul,.-x25519_fe64_mul + +.globl x25519_fe64_sqr +.type x25519_fe64_sqr,@function +.align 32 +x25519_fe64_sqr: +.cfi_startproc + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + pushq %rdi +.cfi_adjust_cfa_offset 8 +.cfi_offset %rdi,-64 + leaq -16(%rsp),%rsp +.cfi_adjust_cfa_offset 16 +.Lfe64_sqr_body: + + movq 0(%rsi),%rdx + movq 8(%rsi),%rcx + movq 16(%rsi),%rbp + movq 24(%rsi),%rsi + + + mulxq %rdx,%r8,%r15 + mulxq %rcx,%r9,%rax + xorl %edi,%edi + mulxq %rbp,%r10,%rbx + adcxq %rax,%r10 + mulxq %rsi,%r11,%r12 + movq %rcx,%rdx + adcxq %rbx,%r11 + adcxq %rdi,%r12 + + + mulxq %rbp,%rax,%rbx + adoxq %rax,%r11 + adcxq %rbx,%r12 + mulxq %rsi,%rax,%r13 + movq %rbp,%rdx + adoxq %rax,%r12 + adcxq %rdi,%r13 + + + mulxq %rsi,%rax,%r14 + movq %rcx,%rdx + adoxq %rax,%r13 + adcxq %rdi,%r14 + adoxq %rdi,%r14 + + adcxq %r9,%r9 + adoxq %r15,%r9 + adcxq %r10,%r10 + mulxq %rdx,%rax,%rbx + movq %rbp,%rdx + adcxq %r11,%r11 + adoxq %rax,%r10 + adcxq %r12,%r12 + adoxq %rbx,%r11 + mulxq %rdx,%rax,%rbx + movq %rsi,%rdx + adcxq %r13,%r13 + adoxq %rax,%r12 + adcxq %r14,%r14 + adoxq %rbx,%r13 + mulxq %rdx,%rax,%r15 + movl $38,%edx + adoxq %rax,%r14 + adcxq %rdi,%r15 + adoxq %rdi,%r15 + jmp .Lreduce64 + +.align 32 +.Lreduce64: + mulxq %r12,%rax,%rbx + adcxq %rax,%r8 + adoxq %rbx,%r9 + mulxq %r13,%rax,%rbx + adcxq %rax,%r9 + adoxq %rbx,%r10 + mulxq %r14,%rax,%rbx + adcxq %rax,%r10 + adoxq %rbx,%r11 + mulxq %r15,%rax,%r12 + adcxq %rax,%r11 + adoxq %rdi,%r12 + adcxq %rdi,%r12 + + movq 16(%rsp),%rdi + imulq %rdx,%r12 + + addq %r12,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + + sbbq %rax,%rax + andq $38,%rax + + addq %rax,%r8 + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r8,0(%rdi) + + movq 24(%rsp),%r15 +.cfi_restore %r15 + movq 32(%rsp),%r14 +.cfi_restore %r14 + movq 40(%rsp),%r13 +.cfi_restore %r13 + movq 48(%rsp),%r12 +.cfi_restore %r12 + movq 56(%rsp),%rbx +.cfi_restore %rbx + movq 64(%rsp),%rbp +.cfi_restore %rbp + leaq 72(%rsp),%rsp +.cfi_adjust_cfa_offset 88 +.Lfe64_sqr_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size x25519_fe64_sqr,.-x25519_fe64_sqr + +.globl x25519_fe64_mul121666 +.type x25519_fe64_mul121666,@function +.align 32 +x25519_fe64_mul121666: +.Lfe64_mul121666_body: +.cfi_startproc + movl $121666,%edx + mulxq 0(%rsi),%r8,%rcx + mulxq 8(%rsi),%r9,%rax + addq %rcx,%r9 + mulxq 16(%rsi),%r10,%rcx + adcq %rax,%r10 + mulxq 24(%rsi),%r11,%rax + adcq %rcx,%r11 + adcq $0,%rax + + imulq $38,%rax,%rax + + addq %rax,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%r11 + + sbbq %rax,%rax + andq $38,%rax + + addq %rax,%r8 + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r8,0(%rdi) + +.Lfe64_mul121666_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size x25519_fe64_mul121666,.-x25519_fe64_mul121666 + +.globl x25519_fe64_add +.type x25519_fe64_add,@function +.align 32 +x25519_fe64_add: +.Lfe64_add_body: +.cfi_startproc + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + adcq 16(%rdx),%r10 + adcq 24(%rdx),%r11 + + sbbq %rax,%rax + andq $38,%rax + + addq %rax,%r8 + adcq $0,%r9 + adcq $0,%r10 + movq %r9,8(%rdi) + adcq $0,%r11 + movq %r10,16(%rdi) + sbbq %rax,%rax + movq %r11,24(%rdi) + andq $38,%rax + + addq %rax,%r8 + movq %r8,0(%rdi) + +.Lfe64_add_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size x25519_fe64_add,.-x25519_fe64_add + +.globl x25519_fe64_sub +.type x25519_fe64_sub,@function +.align 32 +x25519_fe64_sub: +.Lfe64_sub_body: +.cfi_startproc + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + + subq 0(%rdx),%r8 + sbbq 8(%rdx),%r9 + sbbq 16(%rdx),%r10 + sbbq 24(%rdx),%r11 + + sbbq %rax,%rax + andq $38,%rax + + subq %rax,%r8 + sbbq $0,%r9 + sbbq $0,%r10 + movq %r9,8(%rdi) + sbbq $0,%r11 + movq %r10,16(%rdi) + sbbq %rax,%rax + movq %r11,24(%rdi) + andq $38,%rax + + subq %rax,%r8 + movq %r8,0(%rdi) + +.Lfe64_sub_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size x25519_fe64_sub,.-x25519_fe64_sub + +.globl x25519_fe64_tobytes +.type x25519_fe64_tobytes,@function +.align 32 +x25519_fe64_tobytes: +.Lfe64_to_body: +.cfi_startproc + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + + + leaq (%r11,%r11,1),%rax + sarq $63,%r11 + shrq $1,%rax + andq $19,%r11 + addq $19,%r11 + + addq %r11,%r8 + adcq $0,%r9 + adcq $0,%r10 + adcq $0,%rax + + leaq (%rax,%rax,1),%r11 + sarq $63,%rax + shrq $1,%r11 + notq %rax + andq $19,%rax + + subq %rax,%r8 + sbbq $0,%r9 + sbbq $0,%r10 + sbbq $0,%r11 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + +.Lfe64_to_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size x25519_fe64_tobytes,.-x25519_fe64_tobytes +.byte 88,50,53,53,49,57,32,112,114,105,109,105,116,105,118,101,115,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 diff --git a/contrib/openssl-cmake/asm/crypto/loongarch64cpuid.S b/contrib/openssl-cmake/asm/crypto/loongarch64cpuid.S new file mode 100644 index 000000000000..b8ba7d3e831b --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/loongarch64cpuid.S @@ -0,0 +1,69 @@ +################################################################################ +# int CRYPTO_memcmp(const void * in_a, const void * in_b, size_t len) +################################################################################ +.text +.balign 16 +.globl CRYPTO_memcmp +.type CRYPTO_memcmp,@function +CRYPTO_memcmp: + li.d $r12,0 + beqz $r6,2f # len == 0 +1: + ld.bu $r13,$r4,0 + ld.bu $r14,$r5,0 + addi.d $r4,$r4,1 + addi.d $r5,$r5,1 + addi.d $r6,$r6,-1 + xor $r13,$r13,$r14 + or $r12,$r12,$r13 + blt $r0,$r6,1b +2: + move $r4,$r12 + jr $r1 +################################################################################ +# void OPENSSL_cleanse(void *ptr, size_t len) +################################################################################ +.text +.balign 16 +.globl OPENSSL_cleanse +.type OPENSSL_cleanse,@function +OPENSSL_cleanse: + beqz $r5,2f # len == 0, return + srli.d $r12,$r5,4 + bnez $r12,3f # len > 15 + +1: # Store <= 15 individual bytes + st.b $r0,$r4,0 + addi.d $r4,$r4,1 + addi.d $r5,$r5,-1 + bnez $r5,1b +2: + jr $r1 + +3: # Store individual bytes until we are aligned + andi $r12,$r4,0x7 + beqz $r12,4f + st.b $r0,$r4,0 + addi.d $r4,$r4,1 + addi.d $r5,$r5,-1 + b 3b + +4: # Store aligned dwords + li.d $r13,8 +4: + st.d $r0,$r4,0 + addi.d $r4,$r4,8 + addi.d $r5,$r5,-8 + bge $r5,$r13,4b # if len>=8 loop + bnez $r5,1b # if len<8 and len != 0, store remaining bytes + jr $r1 +################################################################################ +# uint32_t OPENSSL_rdtsc(void) +################################################################################ +.text +.balign 16 +.globl OPENSSL_rdtsc +.type OPENSSL_rdtsc,@function +OPENSSL_rdtsc: + rdtimel.w $r4,$r0 + jr $r1 diff --git a/contrib/openssl-cmake/asm/crypto/md5/asm/md5-aarch64.S b/contrib/openssl-cmake/asm/crypto/md5/asm/md5-aarch64.S new file mode 100644 index 000000000000..b6f8777c9fd0 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/md5/asm/md5-aarch64.S @@ -0,0 +1,677 @@ +#include "arm_arch.h" + +.text +.globl ossl_md5_block_asm_data_order +.type ossl_md5_block_asm_data_order,@function +ossl_md5_block_asm_data_order: + AARCH64_VALID_CALL_TARGET + // Save all callee-saved registers + stp x19,x20,[sp,#-80]! + stp x21,x22,[sp,#16] + stp x23,x24,[sp,#32] + stp x25,x26,[sp,#48] + stp x27,x28,[sp,#64] + + ldp w10, w11, [x0, #0] // .Load MD5 state->A and state->B + ldp w12, w13, [x0, #8] // .Load MD5 state->C and state->D +.align 5 +ossl_md5_blocks_loop: + eor x17, x12, x13 // Begin aux function round 1 F(x,y,z)=(((y^z)&x)^z) + and x16, x17, x11 // Continue aux function round 1 F(x,y,z)=(((y^z)&x)^z) + ldp w15, w20, [x1] // .Load 2 words of input data0 M[0],M[1] + ldp w3, w21, [x1, #8] // .Load 2 words of input data0 M[2],M[3] +#ifdef __AARCH64EB__ + rev w15, w15 + rev w20, w20 + rev w3, w3 + rev w21, w21 +#endif + eor x14, x16, x13 // End aux function round 1 F(x,y,z)=(((y^z)&x)^z) + movz x9, #0xa478 // .Load lower half of constant 0xd76aa478 + movk x9, #0xd76a, lsl #16 // .Load upper half of constant 0xd76aa478 + add w8, w10, w15 // Add dest value + add w7, w8, w9 // Add constant 0xd76aa478 + add w6, w7, w14 // Add aux function result + ror w6, w6, #25 // Rotate left s=7 bits + eor x5, x11, x12 // Begin aux function round 1 F(x,y,z)=(((y^z)&x)^z) + add w4, w11, w6 // Add X parameter round 1 A=FF(A, B, C, D, 0xd76aa478, s=7, M[0]) + and x8, x5, x4 // Continue aux function round 1 F(x,y,z)=(((y^z)&x)^z) + eor x17, x8, x12 // End aux function round 1 F(x,y,z)=(((y^z)&x)^z) + movz x16, #0xb756 // .Load lower half of constant 0xe8c7b756 + movk x16, #0xe8c7, lsl #16 // .Load upper half of constant 0xe8c7b756 + add w9, w13, w20 // Add dest value + add w7, w9, w16 // Add constant 0xe8c7b756 + add w14, w7, w17 // Add aux function result + ror w14, w14, #20 // Rotate left s=12 bits + eor x6, x4, x11 // Begin aux function round 1 F(x,y,z)=(((y^z)&x)^z) + add w5, w4, w14 // Add X parameter round 1 D=FF(D, A, B, C, 0xe8c7b756, s=12, M[1]) + and x8, x6, x5 // Continue aux function round 1 F(x,y,z)=(((y^z)&x)^z) + eor x9, x8, x11 // End aux function round 1 F(x,y,z)=(((y^z)&x)^z) + movz x16, #0x70db // .Load lower half of constant 0x242070db + movk x16, #0x2420, lsl #16 // .Load upper half of constant 0x242070db + add w7, w12, w3 // Add dest value + add w17, w7, w16 // Add constant 0x242070db + add w14, w17, w9 // Add aux function result + ror w14, w14, #15 // Rotate left s=17 bits + eor x6, x5, x4 // Begin aux function round 1 F(x,y,z)=(((y^z)&x)^z) + add w8, w5, w14 // Add X parameter round 1 C=FF(C, D, A, B, 0x242070db, s=17, M[2]) + and x7, x6, x8 // Continue aux function round 1 F(x,y,z)=(((y^z)&x)^z) + eor x16, x7, x4 // End aux function round 1 F(x,y,z)=(((y^z)&x)^z) + movz x9, #0xceee // .Load lower half of constant 0xc1bdceee + movk x9, #0xc1bd, lsl #16 // .Load upper half of constant 0xc1bdceee + add w14, w11, w21 // Add dest value + add w6, w14, w9 // Add constant 0xc1bdceee + add w7, w6, w16 // Add aux function result + ror w7, w7, #10 // Rotate left s=22 bits + eor x17, x8, x5 // Begin aux function round 1 F(x,y,z)=(((y^z)&x)^z) + add w9, w8, w7 // Add X parameter round 1 B=FF(B, C, D, A, 0xc1bdceee, s=22, M[3]) + ldp w14, w22, [x1, #16] // .Load 2 words of input data0 M[4],M[5] + ldp w7, w23, [x1, #24] // .Load 2 words of input data0 M[6],M[7] +#ifdef __AARCH64EB__ + rev w14, w14 + rev w22, w22 + rev w7, w7 + rev w23, w23 +#endif + and x16, x17, x9 // Continue aux function round 1 F(x,y,z)=(((y^z)&x)^z) + eor x6, x16, x5 // End aux function round 1 F(x,y,z)=(((y^z)&x)^z) + movz x16, #0xfaf // .Load lower half of constant 0xf57c0faf + movk x16, #0xf57c, lsl #16 // .Load upper half of constant 0xf57c0faf + add w17, w4, w14 // Add dest value + add w16, w17, w16 // Add constant 0xf57c0faf + add w4, w16, w6 // Add aux function result + ror w4, w4, #25 // Rotate left s=7 bits + eor x16, x9, x8 // Begin aux function round 1 F(x,y,z)=(((y^z)&x)^z) + add w17, w9, w4 // Add X parameter round 1 A=FF(A, B, C, D, 0xf57c0faf, s=7, M[4]) + and x16, x16, x17 // Continue aux function round 1 F(x,y,z)=(((y^z)&x)^z) + eor x6, x16, x8 // End aux function round 1 F(x,y,z)=(((y^z)&x)^z) + movz x4, #0xc62a // .Load lower half of constant 0x4787c62a + movk x4, #0x4787, lsl #16 // .Load upper half of constant 0x4787c62a + add w16, w5, w22 // Add dest value + add w16, w16, w4 // Add constant 0x4787c62a + add w5, w16, w6 // Add aux function result + ror w5, w5, #20 // Rotate left s=12 bits + eor x4, x17, x9 // Begin aux function round 1 F(x,y,z)=(((y^z)&x)^z) + add w19, w17, w5 // Add X parameter round 1 D=FF(D, A, B, C, 0x4787c62a, s=12, M[5]) + and x6, x4, x19 // Continue aux function round 1 F(x,y,z)=(((y^z)&x)^z) + eor x5, x6, x9 // End aux function round 1 F(x,y,z)=(((y^z)&x)^z) + movz x4, #0x4613 // .Load lower half of constant 0xa8304613 + movk x4, #0xa830, lsl #16 // .Load upper half of constant 0xa8304613 + add w6, w8, w7 // Add dest value + add w8, w6, w4 // Add constant 0xa8304613 + add w4, w8, w5 // Add aux function result + ror w4, w4, #15 // Rotate left s=17 bits + eor x6, x19, x17 // Begin aux function round 1 F(x,y,z)=(((y^z)&x)^z) + add w8, w19, w4 // Add X parameter round 1 C=FF(C, D, A, B, 0xa8304613, s=17, M[6]) + and x5, x6, x8 // Continue aux function round 1 F(x,y,z)=(((y^z)&x)^z) + eor x4, x5, x17 // End aux function round 1 F(x,y,z)=(((y^z)&x)^z) + movz x6, #0x9501 // .Load lower half of constant 0xfd469501 + movk x6, #0xfd46, lsl #16 // .Load upper half of constant 0xfd469501 + add w9, w9, w23 // Add dest value + add w5, w9, w6 // Add constant 0xfd469501 + add w9, w5, w4 // Add aux function result + ror w9, w9, #10 // Rotate left s=22 bits + eor x6, x8, x19 // Begin aux function round 1 F(x,y,z)=(((y^z)&x)^z) + add w4, w8, w9 // Add X parameter round 1 B=FF(B, C, D, A, 0xfd469501, s=22, M[7]) + ldp w5, w24, [x1, #32] // .Load 2 words of input data0 M[8],M[9] + ldp w16, w25, [x1, #40] // .Load 2 words of input data0 M[10],M[11] +#ifdef __AARCH64EB__ + rev w5, w5 + rev w24, w24 + rev w16, w16 + rev w25, w25 +#endif + and x9, x6, x4 // Continue aux function round 1 F(x,y,z)=(((y^z)&x)^z) + eor x6, x9, x19 // End aux function round 1 F(x,y,z)=(((y^z)&x)^z) + movz x9, #0x98d8 // .Load lower half of constant 0x698098d8 + movk x9, #0x6980, lsl #16 // .Load upper half of constant 0x698098d8 + add w17, w17, w5 // Add dest value + add w9, w17, w9 // Add constant 0x698098d8 + add w17, w9, w6 // Add aux function result + ror w17, w17, #25 // Rotate left s=7 bits + eor x9, x4, x8 // Begin aux function round 1 F(x,y,z)=(((y^z)&x)^z) + add w6, w4, w17 // Add X parameter round 1 A=FF(A, B, C, D, 0x698098d8, s=7, M[8]) + and x17, x9, x6 // Continue aux function round 1 F(x,y,z)=(((y^z)&x)^z) + eor x9, x17, x8 // End aux function round 1 F(x,y,z)=(((y^z)&x)^z) + movz x17, #0xf7af // .Load lower half of constant 0x8b44f7af + movk x17, #0x8b44, lsl #16 // .Load upper half of constant 0x8b44f7af + add w19, w19, w24 // Add dest value + add w17, w19, w17 // Add constant 0x8b44f7af + add w19, w17, w9 // Add aux function result + ror w19, w19, #20 // Rotate left s=12 bits + eor x9, x6, x4 // Begin aux function round 1 F(x,y,z)=(((y^z)&x)^z) + add w17, w6, w19 // Add X parameter round 1 D=FF(D, A, B, C, 0x8b44f7af, s=12, M[9]) + and x9, x9, x17 // Continue aux function round 1 F(x,y,z)=(((y^z)&x)^z) + eor x9, x9, x4 // End aux function round 1 F(x,y,z)=(((y^z)&x)^z) + movz x11, #0x5bb1 // .Load lower half of constant 0xffff5bb1 + movk x11, #0xffff, lsl #16 // .Load upper half of constant 0xffff5bb1 + add w8, w8, w16 // Add dest value + add w8, w8, w11 // Add constant 0xffff5bb1 + add w8, w8, w9 // Add aux function result + ror w8, w8, #15 // Rotate left s=17 bits + eor x9, x17, x6 // Begin aux function round 1 F(x,y,z)=(((y^z)&x)^z) + add w8, w17, w8 // Add X parameter round 1 C=FF(C, D, A, B, 0xffff5bb1, s=17, M[10]) + and x9, x9, x8 // Continue aux function round 1 F(x,y,z)=(((y^z)&x)^z) + eor x9, x9, x6 // End aux function round 1 F(x,y,z)=(((y^z)&x)^z) + movz x11, #0xd7be // .Load lower half of constant 0x895cd7be + movk x11, #0x895c, lsl #16 // .Load upper half of constant 0x895cd7be + add w4, w4, w25 // Add dest value + add w4, w4, w11 // Add constant 0x895cd7be + add w9, w4, w9 // Add aux function result + ror w9, w9, #10 // Rotate left s=22 bits + eor x4, x8, x17 // Begin aux function round 1 F(x,y,z)=(((y^z)&x)^z) + add w9, w8, w9 // Add X parameter round 1 B=FF(B, C, D, A, 0x895cd7be, s=22, M[11]) + ldp w11, w26, [x1, #48] // .Load 2 words of input data0 M[12],M[13] + ldp w12, w27, [x1, #56] // .Load 2 words of input data0 M[14],M[15] +#ifdef __AARCH64EB__ + rev w11, w11 + rev w26, w26 + rev w12, w12 + rev w27, w27 +#endif + and x4, x4, x9 // Continue aux function round 1 F(x,y,z)=(((y^z)&x)^z) + eor x4, x4, x17 // End aux function round 1 F(x,y,z)=(((y^z)&x)^z) + movz x19, #0x1122 // .Load lower half of constant 0x6b901122 + movk x19, #0x6b90, lsl #16 // .Load upper half of constant 0x6b901122 + add w6, w6, w11 // Add dest value + add w6, w6, w19 // Add constant 0x6b901122 + add w4, w6, w4 // Add aux function result + ror w4, w4, #25 // Rotate left s=7 bits + eor x6, x9, x8 // Begin aux function round 1 F(x,y,z)=(((y^z)&x)^z) + add w4, w9, w4 // Add X parameter round 1 A=FF(A, B, C, D, 0x6b901122, s=7, M[12]) + and x6, x6, x4 // Continue aux function round 1 F(x,y,z)=(((y^z)&x)^z) + eor x6, x6, x8 // End aux function round 1 F(x,y,z)=(((y^z)&x)^z) + movz x19, #0x7193 // .Load lower half of constant 0xfd987193 + movk x19, #0xfd98, lsl #16 // .Load upper half of constant 0xfd987193 + add w17, w17, w26 // Add dest value + add w17, w17, w19 // Add constant 0xfd987193 + add w17, w17, w6 // Add aux function result + ror w17, w17, #20 // Rotate left s=12 bits + eor x6, x4, x9 // Begin aux function round 1 F(x,y,z)=(((y^z)&x)^z) + add w17, w4, w17 // Add X parameter round 1 D=FF(D, A, B, C, 0xfd987193, s=12, M[13]) + and x6, x6, x17 // Continue aux function round 1 F(x,y,z)=(((y^z)&x)^z) + eor x6, x6, x9 // End aux function round 1 F(x,y,z)=(((y^z)&x)^z) + movz x13, #0x438e // .Load lower half of constant 0xa679438e + movk x13, #0xa679, lsl #16 // .Load upper half of constant 0xa679438e + add w8, w8, w12 // Add dest value + add w8, w8, w13 // Add constant 0xa679438e + add w8, w8, w6 // Add aux function result + ror w8, w8, #15 // Rotate left s=17 bits + eor x6, x17, x4 // Begin aux function round 1 F(x,y,z)=(((y^z)&x)^z) + add w8, w17, w8 // Add X parameter round 1 C=FF(C, D, A, B, 0xa679438e, s=17, M[14]) + and x6, x6, x8 // Continue aux function round 1 F(x,y,z)=(((y^z)&x)^z) + eor x6, x6, x4 // End aux function round 1 F(x,y,z)=(((y^z)&x)^z) + movz x13, #0x821 // .Load lower half of constant 0x49b40821 + movk x13, #0x49b4, lsl #16 // .Load upper half of constant 0x49b40821 + add w9, w9, w27 // Add dest value + add w9, w9, w13 // Add constant 0x49b40821 + add w9, w9, w6 // Add aux function result + ror w9, w9, #10 // Rotate left s=22 bits + bic x6, x8, x17 // Aux function round 2 (~z & y) + add w9, w8, w9 // Add X parameter round 1 B=FF(B, C, D, A, 0x49b40821, s=22, M[15]) + movz x13, #0x2562 // .Load lower half of constant 0xf61e2562 + movk x13, #0xf61e, lsl #16 // .Load upper half of constant 0xf61e2562 + add w4, w4, w20 // Add dest value + add w4, w4, w13 // Add constant 0xf61e2562 + and x13, x9, x17 // Aux function round 2 (x & z) + add w4, w4, w6 // Add (~z & y) + add w4, w4, w13 // Add (x & z) + ror w4, w4, #27 // Rotate left s=5 bits + bic x6, x9, x8 // Aux function round 2 (~z & y) + add w4, w9, w4 // Add X parameter round 2 A=GG(A, B, C, D, 0xf61e2562, s=5, M[1]) + movz x13, #0xb340 // .Load lower half of constant 0xc040b340 + movk x13, #0xc040, lsl #16 // .Load upper half of constant 0xc040b340 + add w17, w17, w7 // Add dest value + add w17, w17, w13 // Add constant 0xc040b340 + and x13, x4, x8 // Aux function round 2 (x & z) + add w17, w17, w6 // Add (~z & y) + add w17, w17, w13 // Add (x & z) + ror w17, w17, #23 // Rotate left s=9 bits + bic x6, x4, x9 // Aux function round 2 (~z & y) + add w17, w4, w17 // Add X parameter round 2 D=GG(D, A, B, C, 0xc040b340, s=9, M[6]) + movz x13, #0x5a51 // .Load lower half of constant 0x265e5a51 + movk x13, #0x265e, lsl #16 // .Load upper half of constant 0x265e5a51 + add w8, w8, w25 // Add dest value + add w8, w8, w13 // Add constant 0x265e5a51 + and x13, x17, x9 // Aux function round 2 (x & z) + add w8, w8, w6 // Add (~z & y) + add w8, w8, w13 // Add (x & z) + ror w8, w8, #18 // Rotate left s=14 bits + bic x6, x17, x4 // Aux function round 2 (~z & y) + add w8, w17, w8 // Add X parameter round 2 C=GG(C, D, A, B, 0x265e5a51, s=14, M[11]) + movz x13, #0xc7aa // .Load lower half of constant 0xe9b6c7aa + movk x13, #0xe9b6, lsl #16 // .Load upper half of constant 0xe9b6c7aa + add w9, w9, w15 // Add dest value + add w9, w9, w13 // Add constant 0xe9b6c7aa + and x13, x8, x4 // Aux function round 2 (x & z) + add w9, w9, w6 // Add (~z & y) + add w9, w9, w13 // Add (x & z) + ror w9, w9, #12 // Rotate left s=20 bits + bic x6, x8, x17 // Aux function round 2 (~z & y) + add w9, w8, w9 // Add X parameter round 2 B=GG(B, C, D, A, 0xe9b6c7aa, s=20, M[0]) + movz x13, #0x105d // .Load lower half of constant 0xd62f105d + movk x13, #0xd62f, lsl #16 // .Load upper half of constant 0xd62f105d + add w4, w4, w22 // Add dest value + add w4, w4, w13 // Add constant 0xd62f105d + and x13, x9, x17 // Aux function round 2 (x & z) + add w4, w4, w6 // Add (~z & y) + add w4, w4, w13 // Add (x & z) + ror w4, w4, #27 // Rotate left s=5 bits + bic x6, x9, x8 // Aux function round 2 (~z & y) + add w4, w9, w4 // Add X parameter round 2 A=GG(A, B, C, D, 0xd62f105d, s=5, M[5]) + movz x13, #0x1453 // .Load lower half of constant 0x2441453 + movk x13, #0x244, lsl #16 // .Load upper half of constant 0x2441453 + add w17, w17, w16 // Add dest value + add w17, w17, w13 // Add constant 0x2441453 + and x13, x4, x8 // Aux function round 2 (x & z) + add w17, w17, w6 // Add (~z & y) + add w17, w17, w13 // Add (x & z) + ror w17, w17, #23 // Rotate left s=9 bits + bic x6, x4, x9 // Aux function round 2 (~z & y) + add w17, w4, w17 // Add X parameter round 2 D=GG(D, A, B, C, 0x2441453, s=9, M[10]) + movz x13, #0xe681 // .Load lower half of constant 0xd8a1e681 + movk x13, #0xd8a1, lsl #16 // .Load upper half of constant 0xd8a1e681 + add w8, w8, w27 // Add dest value + add w8, w8, w13 // Add constant 0xd8a1e681 + and x13, x17, x9 // Aux function round 2 (x & z) + add w8, w8, w6 // Add (~z & y) + add w8, w8, w13 // Add (x & z) + ror w8, w8, #18 // Rotate left s=14 bits + bic x6, x17, x4 // Aux function round 2 (~z & y) + add w8, w17, w8 // Add X parameter round 2 C=GG(C, D, A, B, 0xd8a1e681, s=14, M[15]) + movz x13, #0xfbc8 // .Load lower half of constant 0xe7d3fbc8 + movk x13, #0xe7d3, lsl #16 // .Load upper half of constant 0xe7d3fbc8 + add w9, w9, w14 // Add dest value + add w9, w9, w13 // Add constant 0xe7d3fbc8 + and x13, x8, x4 // Aux function round 2 (x & z) + add w9, w9, w6 // Add (~z & y) + add w9, w9, w13 // Add (x & z) + ror w9, w9, #12 // Rotate left s=20 bits + bic x6, x8, x17 // Aux function round 2 (~z & y) + add w9, w8, w9 // Add X parameter round 2 B=GG(B, C, D, A, 0xe7d3fbc8, s=20, M[4]) + movz x13, #0xcde6 // .Load lower half of constant 0x21e1cde6 + movk x13, #0x21e1, lsl #16 // .Load upper half of constant 0x21e1cde6 + add w4, w4, w24 // Add dest value + add w4, w4, w13 // Add constant 0x21e1cde6 + and x13, x9, x17 // Aux function round 2 (x & z) + add w4, w4, w6 // Add (~z & y) + add w4, w4, w13 // Add (x & z) + ror w4, w4, #27 // Rotate left s=5 bits + bic x6, x9, x8 // Aux function round 2 (~z & y) + add w4, w9, w4 // Add X parameter round 2 A=GG(A, B, C, D, 0x21e1cde6, s=5, M[9]) + movz x13, #0x7d6 // .Load lower half of constant 0xc33707d6 + movk x13, #0xc337, lsl #16 // .Load upper half of constant 0xc33707d6 + add w17, w17, w12 // Add dest value + add w17, w17, w13 // Add constant 0xc33707d6 + and x13, x4, x8 // Aux function round 2 (x & z) + add w17, w17, w6 // Add (~z & y) + add w17, w17, w13 // Add (x & z) + ror w17, w17, #23 // Rotate left s=9 bits + bic x6, x4, x9 // Aux function round 2 (~z & y) + add w17, w4, w17 // Add X parameter round 2 D=GG(D, A, B, C, 0xc33707d6, s=9, M[14]) + movz x13, #0xd87 // .Load lower half of constant 0xf4d50d87 + movk x13, #0xf4d5, lsl #16 // .Load upper half of constant 0xf4d50d87 + add w8, w8, w21 // Add dest value + add w8, w8, w13 // Add constant 0xf4d50d87 + and x13, x17, x9 // Aux function round 2 (x & z) + add w8, w8, w6 // Add (~z & y) + add w8, w8, w13 // Add (x & z) + ror w8, w8, #18 // Rotate left s=14 bits + bic x6, x17, x4 // Aux function round 2 (~z & y) + add w8, w17, w8 // Add X parameter round 2 C=GG(C, D, A, B, 0xf4d50d87, s=14, M[3]) + movz x13, #0x14ed // .Load lower half of constant 0x455a14ed + movk x13, #0x455a, lsl #16 // .Load upper half of constant 0x455a14ed + add w9, w9, w5 // Add dest value + add w9, w9, w13 // Add constant 0x455a14ed + and x13, x8, x4 // Aux function round 2 (x & z) + add w9, w9, w6 // Add (~z & y) + add w9, w9, w13 // Add (x & z) + ror w9, w9, #12 // Rotate left s=20 bits + bic x6, x8, x17 // Aux function round 2 (~z & y) + add w9, w8, w9 // Add X parameter round 2 B=GG(B, C, D, A, 0x455a14ed, s=20, M[8]) + movz x13, #0xe905 // .Load lower half of constant 0xa9e3e905 + movk x13, #0xa9e3, lsl #16 // .Load upper half of constant 0xa9e3e905 + add w4, w4, w26 // Add dest value + add w4, w4, w13 // Add constant 0xa9e3e905 + and x13, x9, x17 // Aux function round 2 (x & z) + add w4, w4, w6 // Add (~z & y) + add w4, w4, w13 // Add (x & z) + ror w4, w4, #27 // Rotate left s=5 bits + bic x6, x9, x8 // Aux function round 2 (~z & y) + add w4, w9, w4 // Add X parameter round 2 A=GG(A, B, C, D, 0xa9e3e905, s=5, M[13]) + movz x13, #0xa3f8 // .Load lower half of constant 0xfcefa3f8 + movk x13, #0xfcef, lsl #16 // .Load upper half of constant 0xfcefa3f8 + add w17, w17, w3 // Add dest value + add w17, w17, w13 // Add constant 0xfcefa3f8 + and x13, x4, x8 // Aux function round 2 (x & z) + add w17, w17, w6 // Add (~z & y) + add w17, w17, w13 // Add (x & z) + ror w17, w17, #23 // Rotate left s=9 bits + bic x6, x4, x9 // Aux function round 2 (~z & y) + add w17, w4, w17 // Add X parameter round 2 D=GG(D, A, B, C, 0xfcefa3f8, s=9, M[2]) + movz x13, #0x2d9 // .Load lower half of constant 0x676f02d9 + movk x13, #0x676f, lsl #16 // .Load upper half of constant 0x676f02d9 + add w8, w8, w23 // Add dest value + add w8, w8, w13 // Add constant 0x676f02d9 + and x13, x17, x9 // Aux function round 2 (x & z) + add w8, w8, w6 // Add (~z & y) + add w8, w8, w13 // Add (x & z) + ror w8, w8, #18 // Rotate left s=14 bits + bic x6, x17, x4 // Aux function round 2 (~z & y) + add w8, w17, w8 // Add X parameter round 2 C=GG(C, D, A, B, 0x676f02d9, s=14, M[7]) + movz x13, #0x4c8a // .Load lower half of constant 0x8d2a4c8a + movk x13, #0x8d2a, lsl #16 // .Load upper half of constant 0x8d2a4c8a + add w9, w9, w11 // Add dest value + add w9, w9, w13 // Add constant 0x8d2a4c8a + and x13, x8, x4 // Aux function round 2 (x & z) + add w9, w9, w6 // Add (~z & y) + add w9, w9, w13 // Add (x & z) + eor x6, x8, x17 // Begin aux function round 3 H(x,y,z)=(x^y^z) + ror w9, w9, #12 // Rotate left s=20 bits + movz x10, #0x3942 // .Load lower half of constant 0xfffa3942 + add w9, w8, w9 // Add X parameter round 2 B=GG(B, C, D, A, 0x8d2a4c8a, s=20, M[12]) + movk x10, #0xfffa, lsl #16 // .Load upper half of constant 0xfffa3942 + add w4, w4, w22 // Add dest value + eor x6, x6, x9 // End aux function round 3 H(x,y,z)=(x^y^z) + add w4, w4, w10 // Add constant 0xfffa3942 + add w4, w4, w6 // Add aux function result + ror w4, w4, #28 // Rotate left s=4 bits + eor x6, x9, x8 // Begin aux function round 3 H(x,y,z)=(x^y^z) + movz x10, #0xf681 // .Load lower half of constant 0x8771f681 + add w4, w9, w4 // Add X parameter round 3 A=HH(A, B, C, D, 0xfffa3942, s=4, M[5]) + movk x10, #0x8771, lsl #16 // .Load upper half of constant 0x8771f681 + add w17, w17, w5 // Add dest value + eor x6, x6, x4 // End aux function round 3 H(x,y,z)=(x^y^z) + add w17, w17, w10 // Add constant 0x8771f681 + add w17, w17, w6 // Add aux function result + eor x6, x4, x9 // Begin aux function round 3 H(x,y,z)=(x^y^z) + ror w17, w17, #21 // Rotate left s=11 bits + movz x13, #0x6122 // .Load lower half of constant 0x6d9d6122 + add w17, w4, w17 // Add X parameter round 3 D=HH(D, A, B, C, 0x8771f681, s=11, M[8]) + movk x13, #0x6d9d, lsl #16 // .Load upper half of constant 0x6d9d6122 + add w8, w8, w25 // Add dest value + eor x6, x6, x17 // End aux function round 3 H(x,y,z)=(x^y^z) + add w8, w8, w13 // Add constant 0x6d9d6122 + add w8, w8, w6 // Add aux function result + ror w8, w8, #16 // Rotate left s=16 bits + eor x6, x17, x4 // Begin aux function round 3 H(x,y,z)=(x^y^z) + movz x13, #0x380c // .Load lower half of constant 0xfde5380c + add w8, w17, w8 // Add X parameter round 3 C=HH(C, D, A, B, 0x6d9d6122, s=16, M[11]) + movk x13, #0xfde5, lsl #16 // .Load upper half of constant 0xfde5380c + add w9, w9, w12 // Add dest value + eor x6, x6, x8 // End aux function round 3 H(x,y,z)=(x^y^z) + add w9, w9, w13 // Add constant 0xfde5380c + add w9, w9, w6 // Add aux function result + eor x6, x8, x17 // Begin aux function round 3 H(x,y,z)=(x^y^z) + ror w9, w9, #9 // Rotate left s=23 bits + movz x10, #0xea44 // .Load lower half of constant 0xa4beea44 + add w9, w8, w9 // Add X parameter round 3 B=HH(B, C, D, A, 0xfde5380c, s=23, M[14]) + movk x10, #0xa4be, lsl #16 // .Load upper half of constant 0xa4beea44 + add w4, w4, w20 // Add dest value + eor x6, x6, x9 // End aux function round 3 H(x,y,z)=(x^y^z) + add w4, w4, w10 // Add constant 0xa4beea44 + add w4, w4, w6 // Add aux function result + ror w4, w4, #28 // Rotate left s=4 bits + eor x6, x9, x8 // Begin aux function round 3 H(x,y,z)=(x^y^z) + movz x10, #0xcfa9 // .Load lower half of constant 0x4bdecfa9 + add w4, w9, w4 // Add X parameter round 3 A=HH(A, B, C, D, 0xa4beea44, s=4, M[1]) + movk x10, #0x4bde, lsl #16 // .Load upper half of constant 0x4bdecfa9 + add w17, w17, w14 // Add dest value + eor x6, x6, x4 // End aux function round 3 H(x,y,z)=(x^y^z) + add w17, w17, w10 // Add constant 0x4bdecfa9 + add w17, w17, w6 // Add aux function result + eor x6, x4, x9 // Begin aux function round 3 H(x,y,z)=(x^y^z) + ror w17, w17, #21 // Rotate left s=11 bits + movz x13, #0x4b60 // .Load lower half of constant 0xf6bb4b60 + add w17, w4, w17 // Add X parameter round 3 D=HH(D, A, B, C, 0x4bdecfa9, s=11, M[4]) + movk x13, #0xf6bb, lsl #16 // .Load upper half of constant 0xf6bb4b60 + add w8, w8, w23 // Add dest value + eor x6, x6, x17 // End aux function round 3 H(x,y,z)=(x^y^z) + add w8, w8, w13 // Add constant 0xf6bb4b60 + add w8, w8, w6 // Add aux function result + ror w8, w8, #16 // Rotate left s=16 bits + eor x6, x17, x4 // Begin aux function round 3 H(x,y,z)=(x^y^z) + movz x13, #0xbc70 // .Load lower half of constant 0xbebfbc70 + add w8, w17, w8 // Add X parameter round 3 C=HH(C, D, A, B, 0xf6bb4b60, s=16, M[7]) + movk x13, #0xbebf, lsl #16 // .Load upper half of constant 0xbebfbc70 + add w9, w9, w16 // Add dest value + eor x6, x6, x8 // End aux function round 3 H(x,y,z)=(x^y^z) + add w9, w9, w13 // Add constant 0xbebfbc70 + add w9, w9, w6 // Add aux function result + eor x6, x8, x17 // Begin aux function round 3 H(x,y,z)=(x^y^z) + ror w9, w9, #9 // Rotate left s=23 bits + movz x10, #0x7ec6 // .Load lower half of constant 0x289b7ec6 + add w9, w8, w9 // Add X parameter round 3 B=HH(B, C, D, A, 0xbebfbc70, s=23, M[10]) + movk x10, #0x289b, lsl #16 // .Load upper half of constant 0x289b7ec6 + add w4, w4, w26 // Add dest value + eor x6, x6, x9 // End aux function round 3 H(x,y,z)=(x^y^z) + add w4, w4, w10 // Add constant 0x289b7ec6 + add w4, w4, w6 // Add aux function result + ror w4, w4, #28 // Rotate left s=4 bits + eor x6, x9, x8 // Begin aux function round 3 H(x,y,z)=(x^y^z) + movz x10, #0x27fa // .Load lower half of constant 0xeaa127fa + add w4, w9, w4 // Add X parameter round 3 A=HH(A, B, C, D, 0x289b7ec6, s=4, M[13]) + movk x10, #0xeaa1, lsl #16 // .Load upper half of constant 0xeaa127fa + add w17, w17, w15 // Add dest value + eor x6, x6, x4 // End aux function round 3 H(x,y,z)=(x^y^z) + add w17, w17, w10 // Add constant 0xeaa127fa + add w17, w17, w6 // Add aux function result + eor x6, x4, x9 // Begin aux function round 3 H(x,y,z)=(x^y^z) + ror w17, w17, #21 // Rotate left s=11 bits + movz x13, #0x3085 // .Load lower half of constant 0xd4ef3085 + add w17, w4, w17 // Add X parameter round 3 D=HH(D, A, B, C, 0xeaa127fa, s=11, M[0]) + movk x13, #0xd4ef, lsl #16 // .Load upper half of constant 0xd4ef3085 + add w8, w8, w21 // Add dest value + eor x6, x6, x17 // End aux function round 3 H(x,y,z)=(x^y^z) + add w8, w8, w13 // Add constant 0xd4ef3085 + add w8, w8, w6 // Add aux function result + ror w8, w8, #16 // Rotate left s=16 bits + eor x6, x17, x4 // Begin aux function round 3 H(x,y,z)=(x^y^z) + movz x13, #0x1d05 // .Load lower half of constant 0x4881d05 + add w8, w17, w8 // Add X parameter round 3 C=HH(C, D, A, B, 0xd4ef3085, s=16, M[3]) + movk x13, #0x488, lsl #16 // .Load upper half of constant 0x4881d05 + add w9, w9, w7 // Add dest value + eor x6, x6, x8 // End aux function round 3 H(x,y,z)=(x^y^z) + add w9, w9, w13 // Add constant 0x4881d05 + add w9, w9, w6 // Add aux function result + eor x6, x8, x17 // Begin aux function round 3 H(x,y,z)=(x^y^z) + ror w9, w9, #9 // Rotate left s=23 bits + movz x10, #0xd039 // .Load lower half of constant 0xd9d4d039 + add w9, w8, w9 // Add X parameter round 3 B=HH(B, C, D, A, 0x4881d05, s=23, M[6]) + movk x10, #0xd9d4, lsl #16 // .Load upper half of constant 0xd9d4d039 + add w4, w4, w24 // Add dest value + eor x6, x6, x9 // End aux function round 3 H(x,y,z)=(x^y^z) + add w4, w4, w10 // Add constant 0xd9d4d039 + add w4, w4, w6 // Add aux function result + ror w4, w4, #28 // Rotate left s=4 bits + eor x6, x9, x8 // Begin aux function round 3 H(x,y,z)=(x^y^z) + movz x10, #0x99e5 // .Load lower half of constant 0xe6db99e5 + add w4, w9, w4 // Add X parameter round 3 A=HH(A, B, C, D, 0xd9d4d039, s=4, M[9]) + movk x10, #0xe6db, lsl #16 // .Load upper half of constant 0xe6db99e5 + add w17, w17, w11 // Add dest value + eor x6, x6, x4 // End aux function round 3 H(x,y,z)=(x^y^z) + add w17, w17, w10 // Add constant 0xe6db99e5 + add w17, w17, w6 // Add aux function result + eor x6, x4, x9 // Begin aux function round 3 H(x,y,z)=(x^y^z) + ror w17, w17, #21 // Rotate left s=11 bits + movz x13, #0x7cf8 // .Load lower half of constant 0x1fa27cf8 + add w17, w4, w17 // Add X parameter round 3 D=HH(D, A, B, C, 0xe6db99e5, s=11, M[12]) + movk x13, #0x1fa2, lsl #16 // .Load upper half of constant 0x1fa27cf8 + add w8, w8, w27 // Add dest value + eor x6, x6, x17 // End aux function round 3 H(x,y,z)=(x^y^z) + add w8, w8, w13 // Add constant 0x1fa27cf8 + add w8, w8, w6 // Add aux function result + ror w8, w8, #16 // Rotate left s=16 bits + eor x6, x17, x4 // Begin aux function round 3 H(x,y,z)=(x^y^z) + movz x13, #0x5665 // .Load lower half of constant 0xc4ac5665 + add w8, w17, w8 // Add X parameter round 3 C=HH(C, D, A, B, 0x1fa27cf8, s=16, M[15]) + movk x13, #0xc4ac, lsl #16 // .Load upper half of constant 0xc4ac5665 + add w9, w9, w3 // Add dest value + eor x6, x6, x8 // End aux function round 3 H(x,y,z)=(x^y^z) + add w9, w9, w13 // Add constant 0xc4ac5665 + add w9, w9, w6 // Add aux function result + ror w9, w9, #9 // Rotate left s=23 bits + movz x6, #0x2244 // .Load lower half of constant 0xf4292244 + movk x6, #0xf429, lsl #16 // .Load upper half of constant 0xf4292244 + add w9, w8, w9 // Add X parameter round 3 B=HH(B, C, D, A, 0xc4ac5665, s=23, M[2]) + add w4, w4, w15 // Add dest value + orn x13, x9, x17 // Begin aux function round 4 I(x,y,z)=((~z|x)^y) + add w4, w4, w6 // Add constant 0xf4292244 + eor x6, x8, x13 // End aux function round 4 I(x,y,z)=((~z|x)^y) + add w4, w4, w6 // Add aux function result + ror w4, w4, #26 // Rotate left s=6 bits + movz x6, #0xff97 // .Load lower half of constant 0x432aff97 + movk x6, #0x432a, lsl #16 // .Load upper half of constant 0x432aff97 + add w4, w9, w4 // Add X parameter round 4 A=II(A, B, C, D, 0xf4292244, s=6, M[0]) + orn x10, x4, x8 // Begin aux function round 4 I(x,y,z)=((~z|x)^y) + add w17, w17, w23 // Add dest value + eor x10, x9, x10 // End aux function round 4 I(x,y,z)=((~z|x)^y) + add w17, w17, w6 // Add constant 0x432aff97 + add w6, w17, w10 // Add aux function result + ror w6, w6, #22 // Rotate left s=10 bits + movz x17, #0x23a7 // .Load lower half of constant 0xab9423a7 + movk x17, #0xab94, lsl #16 // .Load upper half of constant 0xab9423a7 + add w6, w4, w6 // Add X parameter round 4 D=II(D, A, B, C, 0x432aff97, s=10, M[7]) + add w8, w8, w12 // Add dest value + orn x10, x6, x9 // Begin aux function round 4 I(x,y,z)=((~z|x)^y) + add w8, w8, w17 // Add constant 0xab9423a7 + eor x17, x4, x10 // End aux function round 4 I(x,y,z)=((~z|x)^y) + add w8, w8, w17 // Add aux function result + ror w8, w8, #17 // Rotate left s=15 bits + movz x17, #0xa039 // .Load lower half of constant 0xfc93a039 + movk x17, #0xfc93, lsl #16 // .Load upper half of constant 0xfc93a039 + add w8, w6, w8 // Add X parameter round 4 C=II(C, D, A, B, 0xab9423a7, s=15, M[14]) + orn x13, x8, x4 // Begin aux function round 4 I(x,y,z)=((~z|x)^y) + add w9, w9, w22 // Add dest value + eor x13, x6, x13 // End aux function round 4 I(x,y,z)=((~z|x)^y) + add w9, w9, w17 // Add constant 0xfc93a039 + add w17, w9, w13 // Add aux function result + ror w17, w17, #11 // Rotate left s=21 bits + movz x9, #0x59c3 // .Load lower half of constant 0x655b59c3 + movk x9, #0x655b, lsl #16 // .Load upper half of constant 0x655b59c3 + add w17, w8, w17 // Add X parameter round 4 B=II(B, C, D, A, 0xfc93a039, s=21, M[5]) + add w4, w4, w11 // Add dest value + orn x13, x17, x6 // Begin aux function round 4 I(x,y,z)=((~z|x)^y) + add w9, w4, w9 // Add constant 0x655b59c3 + eor x4, x8, x13 // End aux function round 4 I(x,y,z)=((~z|x)^y) + add w9, w9, w4 // Add aux function result + ror w9, w9, #26 // Rotate left s=6 bits + movz x4, #0xcc92 // .Load lower half of constant 0x8f0ccc92 + movk x4, #0x8f0c, lsl #16 // .Load upper half of constant 0x8f0ccc92 + add w9, w17, w9 // Add X parameter round 4 A=II(A, B, C, D, 0x655b59c3, s=6, M[12]) + orn x10, x9, x8 // Begin aux function round 4 I(x,y,z)=((~z|x)^y) + add w6, w6, w21 // Add dest value + eor x10, x17, x10 // End aux function round 4 I(x,y,z)=((~z|x)^y) + add w4, w6, w4 // Add constant 0x8f0ccc92 + add w6, w4, w10 // Add aux function result + ror w6, w6, #22 // Rotate left s=10 bits + movz x4, #0xf47d // .Load lower half of constant 0xffeff47d + movk x4, #0xffef, lsl #16 // .Load upper half of constant 0xffeff47d + add w6, w9, w6 // Add X parameter round 4 D=II(D, A, B, C, 0x8f0ccc92, s=10, M[3]) + add w8, w8, w16 // Add dest value + orn x10, x6, x17 // Begin aux function round 4 I(x,y,z)=((~z|x)^y) + add w8, w8, w4 // Add constant 0xffeff47d + eor x4, x9, x10 // End aux function round 4 I(x,y,z)=((~z|x)^y) + add w8, w8, w4 // Add aux function result + ror w8, w8, #17 // Rotate left s=15 bits + movz x4, #0x5dd1 // .Load lower half of constant 0x85845dd1 + movk x4, #0x8584, lsl #16 // .Load upper half of constant 0x85845dd1 + add w8, w6, w8 // Add X parameter round 4 C=II(C, D, A, B, 0xffeff47d, s=15, M[10]) + orn x10, x8, x9 // Begin aux function round 4 I(x,y,z)=((~z|x)^y) + add w15, w17, w20 // Add dest value + eor x17, x6, x10 // End aux function round 4 I(x,y,z)=((~z|x)^y) + add w15, w15, w4 // Add constant 0x85845dd1 + add w4, w15, w17 // Add aux function result + ror w4, w4, #11 // Rotate left s=21 bits + movz x15, #0x7e4f // .Load lower half of constant 0x6fa87e4f + movk x15, #0x6fa8, lsl #16 // .Load upper half of constant 0x6fa87e4f + add w17, w8, w4 // Add X parameter round 4 B=II(B, C, D, A, 0x85845dd1, s=21, M[1]) + add w4, w9, w5 // Add dest value + orn x9, x17, x6 // Begin aux function round 4 I(x,y,z)=((~z|x)^y) + add w15, w4, w15 // Add constant 0x6fa87e4f + eor x4, x8, x9 // End aux function round 4 I(x,y,z)=((~z|x)^y) + add w9, w15, w4 // Add aux function result + ror w9, w9, #26 // Rotate left s=6 bits + movz x15, #0xe6e0 // .Load lower half of constant 0xfe2ce6e0 + movk x15, #0xfe2c, lsl #16 // .Load upper half of constant 0xfe2ce6e0 + add w4, w17, w9 // Add X parameter round 4 A=II(A, B, C, D, 0x6fa87e4f, s=6, M[8]) + orn x9, x4, x8 // Begin aux function round 4 I(x,y,z)=((~z|x)^y) + add w6, w6, w27 // Add dest value + eor x9, x17, x9 // End aux function round 4 I(x,y,z)=((~z|x)^y) + add w15, w6, w15 // Add constant 0xfe2ce6e0 + add w6, w15, w9 // Add aux function result + ror w6, w6, #22 // Rotate left s=10 bits + movz x9, #0x4314 // .Load lower half of constant 0xa3014314 + movk x9, #0xa301, lsl #16 // .Load upper half of constant 0xa3014314 + add w15, w4, w6 // Add X parameter round 4 D=II(D, A, B, C, 0xfe2ce6e0, s=10, M[15]) + add w6, w8, w7 // Add dest value + orn x7, x15, x17 // Begin aux function round 4 I(x,y,z)=((~z|x)^y) + add w8, w6, w9 // Add constant 0xa3014314 + eor x9, x4, x7 // End aux function round 4 I(x,y,z)=((~z|x)^y) + add w6, w8, w9 // Add aux function result + ror w6, w6, #17 // Rotate left s=15 bits + movz x7, #0x11a1 // .Load lower half of constant 0x4e0811a1 + movk x7, #0x4e08, lsl #16 // .Load upper half of constant 0x4e0811a1 + add w8, w15, w6 // Add X parameter round 4 C=II(C, D, A, B, 0xa3014314, s=15, M[6]) + orn x9, x8, x4 // Begin aux function round 4 I(x,y,z)=((~z|x)^y) + add w6, w17, w26 // Add dest value + eor x17, x15, x9 // End aux function round 4 I(x,y,z)=((~z|x)^y) + add w9, w6, w7 // Add constant 0x4e0811a1 + add w7, w9, w17 // Add aux function result + ror w7, w7, #11 // Rotate left s=21 bits + movz x6, #0x7e82 // .Load lower half of constant 0xf7537e82 + movk x6, #0xf753, lsl #16 // .Load upper half of constant 0xf7537e82 + add w9, w8, w7 // Add X parameter round 4 B=II(B, C, D, A, 0x4e0811a1, s=21, M[13]) + add w17, w4, w14 // Add dest value + orn x7, x9, x15 // Begin aux function round 4 I(x,y,z)=((~z|x)^y) + add w14, w17, w6 // Add constant 0xf7537e82 + eor x4, x8, x7 // End aux function round 4 I(x,y,z)=((~z|x)^y) + add w17, w14, w4 // Add aux function result + ror w17, w17, #26 // Rotate left s=6 bits + movz x6, #0xf235 // .Load lower half of constant 0xbd3af235 + movk x6, #0xbd3a, lsl #16 // .Load upper half of constant 0xbd3af235 + add w7, w9, w17 // Add X parameter round 4 A=II(A, B, C, D, 0xf7537e82, s=6, M[4]) + orn x14, x7, x8 // Begin aux function round 4 I(x,y,z)=((~z|x)^y) + add w4, w15, w25 // Add dest value + eor x17, x9, x14 // End aux function round 4 I(x,y,z)=((~z|x)^y) + add w15, w4, w6 // Add constant 0xbd3af235 + add w16, w15, w17 // Add aux function result + ror w16, w16, #22 // Rotate left s=10 bits + movz x14, #0xd2bb // .Load lower half of constant 0x2ad7d2bb + movk x14, #0x2ad7, lsl #16 // .Load upper half of constant 0x2ad7d2bb + add w4, w7, w16 // Add X parameter round 4 D=II(D, A, B, C, 0xbd3af235, s=10, M[11]) + add w6, w8, w3 // Add dest value + orn x15, x4, x9 // Begin aux function round 4 I(x,y,z)=((~z|x)^y) + add w17, w6, w14 // Add constant 0x2ad7d2bb + eor x16, x7, x15 // End aux function round 4 I(x,y,z)=((~z|x)^y) + add w8, w17, w16 // Add aux function result + ror w8, w8, #17 // Rotate left s=15 bits + movz x3, #0xd391 // .Load lower half of constant 0xeb86d391 + movk x3, #0xeb86, lsl #16 // .Load upper half of constant 0xeb86d391 + add w14, w4, w8 // Add X parameter round 4 C=II(C, D, A, B, 0x2ad7d2bb, s=15, M[2]) + orn x6, x14, x7 // Begin aux function round 4 I(x,y,z)=((~z|x)^y) + add w15, w9, w24 // Add dest value + eor x17, x4, x6 // End aux function round 4 I(x,y,z)=((~z|x)^y) + add w16, w15, w3 // Add constant 0xeb86d391 + add w8, w16, w17 // Add aux function result + ror w8, w8, #11 // Rotate left s=21 bits + ldp w6, w15, [x0] // Reload MD5 state->A and state->B + ldp w5, w9, [x0, #8] // Reload MD5 state->C and state->D + add w3, w14, w8 // Add X parameter round 4 B=II(B, C, D, A, 0xeb86d391, s=21, M[9]) + add w13, w4, w9 // Add result of MD5 rounds to state->D + add w12, w14, w5 // Add result of MD5 rounds to state->C + add w10, w7, w6 // Add result of MD5 rounds to state->A + add w11, w3, w15 // Add result of MD5 rounds to state->B + stp w12, w13, [x0, #8] // Store MD5 states C,D + stp w10, w11, [x0] // Store MD5 states A,B + add x1, x1, #64 // Increment data pointer + subs w2, w2, #1 // Decrement block counter + b.ne ossl_md5_blocks_loop + + ldp x21,x22,[sp,#16] + ldp x23,x24,[sp,#32] + ldp x25,x26,[sp,#48] + ldp x27,x28,[sp,#64] + ldp x19,x20,[sp],#80 + ret + diff --git a/contrib/openssl-cmake/asm/crypto/md5/md5-x86_64.s b/contrib/openssl-cmake/asm/crypto/md5/md5-x86_64.s new file mode 100644 index 000000000000..fb4c26760fdf --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/md5/md5-x86_64.s @@ -0,0 +1,683 @@ +.text +.align 16 + +.globl ossl_md5_block_asm_data_order +.type ossl_md5_block_asm_data_order,@function +ossl_md5_block_asm_data_order: +.cfi_startproc + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-16 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-40 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-48 +.Lprologue: + + + + + movq %rdi,%rbp + shlq $6,%rdx + leaq (%rsi,%rdx,1),%rdi + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + + + + + + + + cmpq %rdi,%rsi + je .Lend + + +.Lloop: + movl %eax,%r8d + movl %ebx,%r9d + movl %ecx,%r14d + movl %edx,%r15d + movl 0(%rsi),%r10d + movl %edx,%r11d + xorl %ecx,%r11d + leal -680876936(%rax,%r10,1),%eax + andl %ebx,%r11d + movl 4(%rsi),%r10d + xorl %edx,%r11d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal -389564586(%rdx,%r10,1),%edx + andl %eax,%r11d + movl 8(%rsi),%r10d + xorl %ecx,%r11d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal 606105819(%rcx,%r10,1),%ecx + andl %edx,%r11d + movl 12(%rsi),%r10d + xorl %ebx,%r11d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal -1044525330(%rbx,%r10,1),%ebx + andl %ecx,%r11d + movl 16(%rsi),%r10d + xorl %eax,%r11d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + xorl %ecx,%r11d + leal -176418897(%rax,%r10,1),%eax + andl %ebx,%r11d + movl 20(%rsi),%r10d + xorl %edx,%r11d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal 1200080426(%rdx,%r10,1),%edx + andl %eax,%r11d + movl 24(%rsi),%r10d + xorl %ecx,%r11d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal -1473231341(%rcx,%r10,1),%ecx + andl %edx,%r11d + movl 28(%rsi),%r10d + xorl %ebx,%r11d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal -45705983(%rbx,%r10,1),%ebx + andl %ecx,%r11d + movl 32(%rsi),%r10d + xorl %eax,%r11d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + xorl %ecx,%r11d + leal 1770035416(%rax,%r10,1),%eax + andl %ebx,%r11d + movl 36(%rsi),%r10d + xorl %edx,%r11d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal -1958414417(%rdx,%r10,1),%edx + andl %eax,%r11d + movl 40(%rsi),%r10d + xorl %ecx,%r11d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal -42063(%rcx,%r10,1),%ecx + andl %edx,%r11d + movl 44(%rsi),%r10d + xorl %ebx,%r11d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal -1990404162(%rbx,%r10,1),%ebx + andl %ecx,%r11d + movl 48(%rsi),%r10d + xorl %eax,%r11d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + xorl %ecx,%r11d + leal 1804603682(%rax,%r10,1),%eax + andl %ebx,%r11d + movl 52(%rsi),%r10d + xorl %edx,%r11d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal -40341101(%rdx,%r10,1),%edx + andl %eax,%r11d + movl 56(%rsi),%r10d + xorl %ecx,%r11d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal -1502002290(%rcx,%r10,1),%ecx + andl %edx,%r11d + movl 60(%rsi),%r10d + xorl %ebx,%r11d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal 1236535329(%rbx,%r10,1),%ebx + andl %ecx,%r11d + movl 4(%rsi),%r10d + xorl %eax,%r11d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + movl %edx,%r11d + movl %edx,%r12d + notl %r11d + andl %ebx,%r12d + leal -165796510(%rax,%r10,1),%eax + andl %ecx,%r11d + movl 24(%rsi),%r10d + addl %r11d,%eax + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + andl %eax,%r12d + leal -1069501632(%rdx,%r10,1),%edx + andl %ebx,%r11d + movl 44(%rsi),%r10d + addl %r11d,%edx + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + andl %edx,%r12d + leal 643717713(%rcx,%r10,1),%ecx + andl %eax,%r11d + movl 0(%rsi),%r10d + addl %r11d,%ecx + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + andl %ecx,%r12d + leal -373897302(%rbx,%r10,1),%ebx + andl %edx,%r11d + movl 20(%rsi),%r10d + addl %r11d,%ebx + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + notl %r11d + andl %ebx,%r12d + leal -701558691(%rax,%r10,1),%eax + andl %ecx,%r11d + movl 40(%rsi),%r10d + addl %r11d,%eax + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + andl %eax,%r12d + leal 38016083(%rdx,%r10,1),%edx + andl %ebx,%r11d + movl 60(%rsi),%r10d + addl %r11d,%edx + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + andl %edx,%r12d + leal -660478335(%rcx,%r10,1),%ecx + andl %eax,%r11d + movl 16(%rsi),%r10d + addl %r11d,%ecx + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + andl %ecx,%r12d + leal -405537848(%rbx,%r10,1),%ebx + andl %edx,%r11d + movl 36(%rsi),%r10d + addl %r11d,%ebx + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + notl %r11d + andl %ebx,%r12d + leal 568446438(%rax,%r10,1),%eax + andl %ecx,%r11d + movl 56(%rsi),%r10d + addl %r11d,%eax + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + andl %eax,%r12d + leal -1019803690(%rdx,%r10,1),%edx + andl %ebx,%r11d + movl 12(%rsi),%r10d + addl %r11d,%edx + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + andl %edx,%r12d + leal -187363961(%rcx,%r10,1),%ecx + andl %eax,%r11d + movl 32(%rsi),%r10d + addl %r11d,%ecx + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + andl %ecx,%r12d + leal 1163531501(%rbx,%r10,1),%ebx + andl %edx,%r11d + movl 52(%rsi),%r10d + addl %r11d,%ebx + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + notl %r11d + andl %ebx,%r12d + leal -1444681467(%rax,%r10,1),%eax + andl %ecx,%r11d + movl 8(%rsi),%r10d + addl %r11d,%eax + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + andl %eax,%r12d + leal -51403784(%rdx,%r10,1),%edx + andl %ebx,%r11d + movl 28(%rsi),%r10d + addl %r11d,%edx + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + andl %edx,%r12d + leal 1735328473(%rcx,%r10,1),%ecx + andl %eax,%r11d + movl 48(%rsi),%r10d + addl %r11d,%ecx + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + andl %ecx,%r12d + leal -1926607734(%rbx,%r10,1),%ebx + andl %edx,%r11d + movl 20(%rsi),%r10d + addl %r11d,%ebx + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + movl %ecx,%r11d + leal -378558(%rax,%r10,1),%eax + xorl %edx,%r11d + movl 32(%rsi),%r10d + xorl %ebx,%r11d + addl %r11d,%eax + movl %ebx,%r11d + roll $4,%eax + addl %ebx,%eax + leal -2022574463(%rdx,%r10,1),%edx + xorl %ecx,%r11d + movl 44(%rsi),%r10d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal 1839030562(%rcx,%r10,1),%ecx + xorl %ebx,%r11d + movl 56(%rsi),%r10d + xorl %edx,%r11d + addl %r11d,%ecx + movl %edx,%r11d + roll $16,%ecx + addl %edx,%ecx + leal -35309556(%rbx,%r10,1),%ebx + xorl %eax,%r11d + movl 4(%rsi),%r10d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + leal -1530992060(%rax,%r10,1),%eax + xorl %edx,%r11d + movl 16(%rsi),%r10d + xorl %ebx,%r11d + addl %r11d,%eax + movl %ebx,%r11d + roll $4,%eax + addl %ebx,%eax + leal 1272893353(%rdx,%r10,1),%edx + xorl %ecx,%r11d + movl 28(%rsi),%r10d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal -155497632(%rcx,%r10,1),%ecx + xorl %ebx,%r11d + movl 40(%rsi),%r10d + xorl %edx,%r11d + addl %r11d,%ecx + movl %edx,%r11d + roll $16,%ecx + addl %edx,%ecx + leal -1094730640(%rbx,%r10,1),%ebx + xorl %eax,%r11d + movl 52(%rsi),%r10d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + leal 681279174(%rax,%r10,1),%eax + xorl %edx,%r11d + movl 0(%rsi),%r10d + xorl %ebx,%r11d + addl %r11d,%eax + movl %ebx,%r11d + roll $4,%eax + addl %ebx,%eax + leal -358537222(%rdx,%r10,1),%edx + xorl %ecx,%r11d + movl 12(%rsi),%r10d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal -722521979(%rcx,%r10,1),%ecx + xorl %ebx,%r11d + movl 24(%rsi),%r10d + xorl %edx,%r11d + addl %r11d,%ecx + movl %edx,%r11d + roll $16,%ecx + addl %edx,%ecx + leal 76029189(%rbx,%r10,1),%ebx + xorl %eax,%r11d + movl 36(%rsi),%r10d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + leal -640364487(%rax,%r10,1),%eax + xorl %edx,%r11d + movl 48(%rsi),%r10d + xorl %ebx,%r11d + addl %r11d,%eax + movl %ebx,%r11d + roll $4,%eax + addl %ebx,%eax + leal -421815835(%rdx,%r10,1),%edx + xorl %ecx,%r11d + movl 60(%rsi),%r10d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal 530742520(%rcx,%r10,1),%ecx + xorl %ebx,%r11d + movl 8(%rsi),%r10d + xorl %edx,%r11d + addl %r11d,%ecx + movl %edx,%r11d + roll $16,%ecx + addl %edx,%ecx + leal -995338651(%rbx,%r10,1),%ebx + xorl %eax,%r11d + movl 0(%rsi),%r10d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + movl $0xffffffff,%r11d + xorl %edx,%r11d + leal -198630844(%rax,%r10,1),%eax + orl %ebx,%r11d + movl 28(%rsi),%r10d + xorl %ecx,%r11d + addl %r11d,%eax + movl $0xffffffff,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal 1126891415(%rdx,%r10,1),%edx + orl %eax,%r11d + movl 56(%rsi),%r10d + xorl %ebx,%r11d + addl %r11d,%edx + movl $0xffffffff,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal -1416354905(%rcx,%r10,1),%ecx + orl %edx,%r11d + movl 20(%rsi),%r10d + xorl %eax,%r11d + addl %r11d,%ecx + movl $0xffffffff,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal -57434055(%rbx,%r10,1),%ebx + orl %ecx,%r11d + movl 48(%rsi),%r10d + xorl %edx,%r11d + addl %r11d,%ebx + movl $0xffffffff,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + leal 1700485571(%rax,%r10,1),%eax + orl %ebx,%r11d + movl 12(%rsi),%r10d + xorl %ecx,%r11d + addl %r11d,%eax + movl $0xffffffff,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal -1894986606(%rdx,%r10,1),%edx + orl %eax,%r11d + movl 40(%rsi),%r10d + xorl %ebx,%r11d + addl %r11d,%edx + movl $0xffffffff,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal -1051523(%rcx,%r10,1),%ecx + orl %edx,%r11d + movl 4(%rsi),%r10d + xorl %eax,%r11d + addl %r11d,%ecx + movl $0xffffffff,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal -2054922799(%rbx,%r10,1),%ebx + orl %ecx,%r11d + movl 32(%rsi),%r10d + xorl %edx,%r11d + addl %r11d,%ebx + movl $0xffffffff,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + leal 1873313359(%rax,%r10,1),%eax + orl %ebx,%r11d + movl 60(%rsi),%r10d + xorl %ecx,%r11d + addl %r11d,%eax + movl $0xffffffff,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal -30611744(%rdx,%r10,1),%edx + orl %eax,%r11d + movl 24(%rsi),%r10d + xorl %ebx,%r11d + addl %r11d,%edx + movl $0xffffffff,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal -1560198380(%rcx,%r10,1),%ecx + orl %edx,%r11d + movl 52(%rsi),%r10d + xorl %eax,%r11d + addl %r11d,%ecx + movl $0xffffffff,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal 1309151649(%rbx,%r10,1),%ebx + orl %ecx,%r11d + movl 16(%rsi),%r10d + xorl %edx,%r11d + addl %r11d,%ebx + movl $0xffffffff,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + leal -145523070(%rax,%r10,1),%eax + orl %ebx,%r11d + movl 44(%rsi),%r10d + xorl %ecx,%r11d + addl %r11d,%eax + movl $0xffffffff,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal -1120210379(%rdx,%r10,1),%edx + orl %eax,%r11d + movl 8(%rsi),%r10d + xorl %ebx,%r11d + addl %r11d,%edx + movl $0xffffffff,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal 718787259(%rcx,%r10,1),%ecx + orl %edx,%r11d + movl 36(%rsi),%r10d + xorl %eax,%r11d + addl %r11d,%ecx + movl $0xffffffff,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal -343485551(%rbx,%r10,1),%ebx + orl %ecx,%r11d + movl 0(%rsi),%r10d + xorl %edx,%r11d + addl %r11d,%ebx + movl $0xffffffff,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + + addl %r8d,%eax + addl %r9d,%ebx + addl %r14d,%ecx + addl %r15d,%edx + + + addq $64,%rsi + cmpq %rdi,%rsi + jb .Lloop + + +.Lend: + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + movq (%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r12 +.cfi_restore %r12 + movq 24(%rsp),%rbx +.cfi_restore %rbx + movq 32(%rsp),%rbp +.cfi_restore %rbp + addq $40,%rsp +.cfi_adjust_cfa_offset -40 +.Lepilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_md5_block_asm_data_order,.-ossl_md5_block_asm_data_order diff --git a/contrib/openssl-cmake/asm/crypto/modes/aes-gcm-avx512.s b/contrib/openssl-cmake/asm/crypto/modes/aes-gcm-avx512.s new file mode 100644 index 000000000000..b239d9fc2ac8 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/modes/aes-gcm-avx512.s @@ -0,0 +1,136110 @@ + +.globl ossl_vaes_vpclmulqdq_capable +.type ossl_vaes_vpclmulqdq_capable,@function +.align 32 +ossl_vaes_vpclmulqdq_capable: + movq OPENSSL_ia32cap_P+8(%rip),%rcx + + movq $6600291188736,%rdx + xorl %eax,%eax + andq %rdx,%rcx + cmpq %rdx,%rcx + cmoveq %rcx,%rax + .byte 0xf3,0xc3 +.size ossl_vaes_vpclmulqdq_capable, .-ossl_vaes_vpclmulqdq_capable +.text +.globl ossl_aes_gcm_init_avx512 +.type ossl_aes_gcm_init_avx512,@function +.align 32 +ossl_aes_gcm_init_avx512: +.cfi_startproc +.byte 243,15,30,250 + vpxorq %xmm16,%xmm16,%xmm16 + + + movl 240(%rdi),%eax + cmpl $9,%eax + je .Laes_128_0 + cmpl $11,%eax + je .Laes_192_0 + cmpl $13,%eax + je .Laes_256_0 + jmp .Lexit_aes_0 +.align 32 +.Laes_128_0: + vpxorq 0(%rdi),%xmm16,%xmm16 + + vaesenc 16(%rdi),%xmm16,%xmm16 + + vaesenc 32(%rdi),%xmm16,%xmm16 + + vaesenc 48(%rdi),%xmm16,%xmm16 + + vaesenc 64(%rdi),%xmm16,%xmm16 + + vaesenc 80(%rdi),%xmm16,%xmm16 + + vaesenc 96(%rdi),%xmm16,%xmm16 + + vaesenc 112(%rdi),%xmm16,%xmm16 + + vaesenc 128(%rdi),%xmm16,%xmm16 + + vaesenc 144(%rdi),%xmm16,%xmm16 + + vaesenclast 160(%rdi),%xmm16,%xmm16 + jmp .Lexit_aes_0 +.align 32 +.Laes_192_0: + vpxorq 0(%rdi),%xmm16,%xmm16 + + vaesenc 16(%rdi),%xmm16,%xmm16 + + vaesenc 32(%rdi),%xmm16,%xmm16 + + vaesenc 48(%rdi),%xmm16,%xmm16 + + vaesenc 64(%rdi),%xmm16,%xmm16 + + vaesenc 80(%rdi),%xmm16,%xmm16 + + vaesenc 96(%rdi),%xmm16,%xmm16 + + vaesenc 112(%rdi),%xmm16,%xmm16 + + vaesenc 128(%rdi),%xmm16,%xmm16 + + vaesenc 144(%rdi),%xmm16,%xmm16 + + vaesenc 160(%rdi),%xmm16,%xmm16 + + vaesenc 176(%rdi),%xmm16,%xmm16 + + vaesenclast 192(%rdi),%xmm16,%xmm16 + jmp .Lexit_aes_0 +.align 32 +.Laes_256_0: + vpxorq 0(%rdi),%xmm16,%xmm16 + + vaesenc 16(%rdi),%xmm16,%xmm16 + + vaesenc 32(%rdi),%xmm16,%xmm16 + + vaesenc 48(%rdi),%xmm16,%xmm16 + + vaesenc 64(%rdi),%xmm16,%xmm16 + + vaesenc 80(%rdi),%xmm16,%xmm16 + + vaesenc 96(%rdi),%xmm16,%xmm16 + + vaesenc 112(%rdi),%xmm16,%xmm16 + + vaesenc 128(%rdi),%xmm16,%xmm16 + + vaesenc 144(%rdi),%xmm16,%xmm16 + + vaesenc 160(%rdi),%xmm16,%xmm16 + + vaesenc 176(%rdi),%xmm16,%xmm16 + + vaesenc 192(%rdi),%xmm16,%xmm16 + + vaesenc 208(%rdi),%xmm16,%xmm16 + + vaesenclast 224(%rdi),%xmm16,%xmm16 + jmp .Lexit_aes_0 +.Lexit_aes_0: + + vpshufb SHUF_MASK(%rip),%xmm16,%xmm16 + + vmovdqa64 %xmm16,%xmm2 + vpsllq $1,%xmm16,%xmm16 + vpsrlq $63,%xmm2,%xmm2 + vmovdqa %xmm2,%xmm1 + vpslldq $8,%xmm2,%xmm2 + vpsrldq $8,%xmm1,%xmm1 + vporq %xmm2,%xmm16,%xmm16 + + vpshufd $36,%xmm1,%xmm2 + vpcmpeqd TWOONE(%rip),%xmm2,%xmm2 + vpand POLY(%rip),%xmm2,%xmm2 + vpxorq %xmm2,%xmm16,%xmm16 + + vmovdqu64 %xmm16,336(%rsi) + vshufi32x4 $0x00,%ymm16,%ymm16,%ymm4 + vmovdqa %ymm4,%ymm3 + + vpclmulqdq $0x11,%ymm4,%ymm3,%ymm0 + vpclmulqdq $0x00,%ymm4,%ymm3,%ymm1 + vpclmulqdq $0x01,%ymm4,%ymm3,%ymm2 + vpclmulqdq $0x10,%ymm4,%ymm3,%ymm3 + vpxorq %ymm2,%ymm3,%ymm3 + + vpsrldq $8,%ymm3,%ymm2 + vpslldq $8,%ymm3,%ymm3 + vpxorq %ymm2,%ymm0,%ymm0 + vpxorq %ymm1,%ymm3,%ymm3 + + + + vmovdqu64 POLY2(%rip),%ymm2 + + vpclmulqdq $0x01,%ymm3,%ymm2,%ymm1 + vpslldq $8,%ymm1,%ymm1 + vpxorq %ymm1,%ymm3,%ymm3 + + + + vpclmulqdq $0x00,%ymm3,%ymm2,%ymm1 + vpsrldq $4,%ymm1,%ymm1 + vpclmulqdq $0x10,%ymm3,%ymm2,%ymm3 + vpslldq $4,%ymm3,%ymm3 + + vpternlogq $0x96,%ymm1,%ymm0,%ymm3 + + vmovdqu64 %xmm3,320(%rsi) + vinserti64x2 $1,%xmm16,%ymm3,%ymm4 + vmovdqa64 %ymm4,%ymm5 + + vpclmulqdq $0x11,%ymm3,%ymm4,%ymm0 + vpclmulqdq $0x00,%ymm3,%ymm4,%ymm1 + vpclmulqdq $0x01,%ymm3,%ymm4,%ymm2 + vpclmulqdq $0x10,%ymm3,%ymm4,%ymm4 + vpxorq %ymm2,%ymm4,%ymm4 + + vpsrldq $8,%ymm4,%ymm2 + vpslldq $8,%ymm4,%ymm4 + vpxorq %ymm2,%ymm0,%ymm0 + vpxorq %ymm1,%ymm4,%ymm4 + + + + vmovdqu64 POLY2(%rip),%ymm2 + + vpclmulqdq $0x01,%ymm4,%ymm2,%ymm1 + vpslldq $8,%ymm1,%ymm1 + vpxorq %ymm1,%ymm4,%ymm4 + + + + vpclmulqdq $0x00,%ymm4,%ymm2,%ymm1 + vpsrldq $4,%ymm1,%ymm1 + vpclmulqdq $0x10,%ymm4,%ymm2,%ymm4 + vpslldq $4,%ymm4,%ymm4 + + vpternlogq $0x96,%ymm1,%ymm0,%ymm4 + + vmovdqu64 %ymm4,288(%rsi) + + vinserti64x4 $1,%ymm5,%zmm4,%zmm4 + + + vshufi64x2 $0x00,%zmm4,%zmm4,%zmm3 + vmovdqa64 %zmm4,%zmm5 + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm0 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm1 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm2 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm2,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm2 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm2,%zmm0,%zmm0 + vpxorq %zmm1,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm2 + + vpclmulqdq $0x01,%zmm4,%zmm2,%zmm1 + vpslldq $8,%zmm1,%zmm1 + vpxorq %zmm1,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm2,%zmm1 + vpsrldq $4,%zmm1,%zmm1 + vpclmulqdq $0x10,%zmm4,%zmm2,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm1,%zmm0,%zmm4 + + vmovdqu64 %zmm4,224(%rsi) + vshufi64x2 $0x00,%zmm4,%zmm4,%zmm3 + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm0 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm1 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm2 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm2,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm2 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm2,%zmm0,%zmm0 + vpxorq %zmm1,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm2 + + vpclmulqdq $0x01,%zmm5,%zmm2,%zmm1 + vpslldq $8,%zmm1,%zmm1 + vpxorq %zmm1,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm2,%zmm1 + vpsrldq $4,%zmm1,%zmm1 + vpclmulqdq $0x10,%zmm5,%zmm2,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm1,%zmm0,%zmm5 + + vmovdqu64 %zmm5,160(%rsi) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm0 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm1 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm2 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm2,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm2 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm2,%zmm0,%zmm0 + vpxorq %zmm1,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm2 + + vpclmulqdq $0x01,%zmm4,%zmm2,%zmm1 + vpslldq $8,%zmm1,%zmm1 + vpxorq %zmm1,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm2,%zmm1 + vpsrldq $4,%zmm1,%zmm1 + vpclmulqdq $0x10,%zmm4,%zmm2,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm1,%zmm0,%zmm4 + + vmovdqu64 %zmm4,96(%rsi) + vzeroupper +.Labort_init: + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_aes_gcm_init_avx512, .-ossl_aes_gcm_init_avx512 +.globl ossl_aes_gcm_setiv_avx512 +.type ossl_aes_gcm_setiv_avx512,@function +.align 32 +ossl_aes_gcm_setiv_avx512: +.cfi_startproc +.Lsetiv_seh_begin: +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 +.Lsetiv_seh_push_rbx: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 +.Lsetiv_seh_push_rbp: + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 +.Lsetiv_seh_push_r12: + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 +.Lsetiv_seh_push_r13: + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 +.Lsetiv_seh_push_r14: + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lsetiv_seh_push_r15: + + + + + + + + + + + leaq 0(%rsp),%rbp +.cfi_def_cfa_register %rbp +.Lsetiv_seh_setfp: + +.Lsetiv_seh_prolog_end: + subq $820,%rsp + andq $(-64),%rsp + cmpq $12,%rcx + je iv_len_12_init_IV + vpxor %xmm2,%xmm2,%xmm2 + movq %rdx,%r10 + movq %rcx,%r11 + orq %r11,%r11 + jz .L_CALC_AAD_done_1 + + xorq %rbx,%rbx + vmovdqa64 SHUF_MASK(%rip),%zmm16 + +.L_get_AAD_loop48x16_1: + cmpq $768,%r11 + jl .L_exit_AAD_loop48x16_1 + vmovdqu64 0(%r10),%zmm11 + vmovdqu64 64(%r10),%zmm3 + vmovdqu64 128(%r10),%zmm4 + vmovdqu64 192(%r10),%zmm5 + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %zmm16,%zmm4,%zmm4 + vpshufb %zmm16,%zmm5,%zmm5 + testq %rbx,%rbx + jnz .L_skip_hkeys_precomputation_2 + + vmovdqu64 288(%rsi),%zmm1 + vmovdqu64 %zmm1,704(%rsp) + + vmovdqu64 224(%rsi),%zmm9 + vmovdqu64 %zmm9,640(%rsp) + + + vshufi64x2 $0x00,%zmm9,%zmm9,%zmm9 + + vmovdqu64 160(%rsi),%zmm10 + vmovdqu64 %zmm10,576(%rsp) + + vmovdqu64 96(%rsi),%zmm12 + vmovdqu64 %zmm12,512(%rsp) + + vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 + vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 + vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 + vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 + vpxorq %zmm17,%zmm10,%zmm10 + + vpsrldq $8,%zmm10,%zmm17 + vpslldq $8,%zmm10,%zmm10 + vpxorq %zmm17,%zmm13,%zmm13 + vpxorq %zmm15,%zmm10,%zmm10 + + + + vmovdqu64 POLY2(%rip),%zmm17 + + vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 + vpslldq $8,%zmm15,%zmm15 + vpxorq %zmm15,%zmm10,%zmm10 + + + + vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 + vpsrldq $4,%zmm15,%zmm15 + vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 + vpslldq $4,%zmm10,%zmm10 + + vpternlogq $0x96,%zmm15,%zmm13,%zmm10 + + vmovdqu64 %zmm10,448(%rsp) + + vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 + vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 + vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 + vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 + vpxorq %zmm17,%zmm12,%zmm12 + + vpsrldq $8,%zmm12,%zmm17 + vpslldq $8,%zmm12,%zmm12 + vpxorq %zmm17,%zmm13,%zmm13 + vpxorq %zmm15,%zmm12,%zmm12 + + + + vmovdqu64 POLY2(%rip),%zmm17 + + vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 + vpslldq $8,%zmm15,%zmm15 + vpxorq %zmm15,%zmm12,%zmm12 + + + + vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 + vpsrldq $4,%zmm15,%zmm15 + vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 + vpslldq $4,%zmm12,%zmm12 + + vpternlogq $0x96,%zmm15,%zmm13,%zmm12 + + vmovdqu64 %zmm12,384(%rsp) + + vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 + vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 + vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 + vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 + vpxorq %zmm17,%zmm10,%zmm10 + + vpsrldq $8,%zmm10,%zmm17 + vpslldq $8,%zmm10,%zmm10 + vpxorq %zmm17,%zmm13,%zmm13 + vpxorq %zmm15,%zmm10,%zmm10 + + + + vmovdqu64 POLY2(%rip),%zmm17 + + vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 + vpslldq $8,%zmm15,%zmm15 + vpxorq %zmm15,%zmm10,%zmm10 + + + + vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 + vpsrldq $4,%zmm15,%zmm15 + vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 + vpslldq $4,%zmm10,%zmm10 + + vpternlogq $0x96,%zmm15,%zmm13,%zmm10 + + vmovdqu64 %zmm10,320(%rsp) + + vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 + vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 + vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 + vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 + vpxorq %zmm17,%zmm12,%zmm12 + + vpsrldq $8,%zmm12,%zmm17 + vpslldq $8,%zmm12,%zmm12 + vpxorq %zmm17,%zmm13,%zmm13 + vpxorq %zmm15,%zmm12,%zmm12 + + + + vmovdqu64 POLY2(%rip),%zmm17 + + vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 + vpslldq $8,%zmm15,%zmm15 + vpxorq %zmm15,%zmm12,%zmm12 + + + + vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 + vpsrldq $4,%zmm15,%zmm15 + vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 + vpslldq $4,%zmm12,%zmm12 + + vpternlogq $0x96,%zmm15,%zmm13,%zmm12 + + vmovdqu64 %zmm12,256(%rsp) + + vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 + vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 + vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 + vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 + vpxorq %zmm17,%zmm10,%zmm10 + + vpsrldq $8,%zmm10,%zmm17 + vpslldq $8,%zmm10,%zmm10 + vpxorq %zmm17,%zmm13,%zmm13 + vpxorq %zmm15,%zmm10,%zmm10 + + + + vmovdqu64 POLY2(%rip),%zmm17 + + vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 + vpslldq $8,%zmm15,%zmm15 + vpxorq %zmm15,%zmm10,%zmm10 + + + + vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 + vpsrldq $4,%zmm15,%zmm15 + vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 + vpslldq $4,%zmm10,%zmm10 + + vpternlogq $0x96,%zmm15,%zmm13,%zmm10 + + vmovdqu64 %zmm10,192(%rsp) + + vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 + vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 + vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 + vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 + vpxorq %zmm17,%zmm12,%zmm12 + + vpsrldq $8,%zmm12,%zmm17 + vpslldq $8,%zmm12,%zmm12 + vpxorq %zmm17,%zmm13,%zmm13 + vpxorq %zmm15,%zmm12,%zmm12 + + + + vmovdqu64 POLY2(%rip),%zmm17 + + vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 + vpslldq $8,%zmm15,%zmm15 + vpxorq %zmm15,%zmm12,%zmm12 + + + + vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 + vpsrldq $4,%zmm15,%zmm15 + vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 + vpslldq $4,%zmm12,%zmm12 + + vpternlogq $0x96,%zmm15,%zmm13,%zmm12 + + vmovdqu64 %zmm12,128(%rsp) + + vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 + vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 + vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 + vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 + vpxorq %zmm17,%zmm10,%zmm10 + + vpsrldq $8,%zmm10,%zmm17 + vpslldq $8,%zmm10,%zmm10 + vpxorq %zmm17,%zmm13,%zmm13 + vpxorq %zmm15,%zmm10,%zmm10 + + + + vmovdqu64 POLY2(%rip),%zmm17 + + vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 + vpslldq $8,%zmm15,%zmm15 + vpxorq %zmm15,%zmm10,%zmm10 + + + + vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 + vpsrldq $4,%zmm15,%zmm15 + vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 + vpslldq $4,%zmm10,%zmm10 + + vpternlogq $0x96,%zmm15,%zmm13,%zmm10 + + vmovdqu64 %zmm10,64(%rsp) + + vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 + vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 + vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 + vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 + vpxorq %zmm17,%zmm12,%zmm12 + + vpsrldq $8,%zmm12,%zmm17 + vpslldq $8,%zmm12,%zmm12 + vpxorq %zmm17,%zmm13,%zmm13 + vpxorq %zmm15,%zmm12,%zmm12 + + + + vmovdqu64 POLY2(%rip),%zmm17 + + vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 + vpslldq $8,%zmm15,%zmm15 + vpxorq %zmm15,%zmm12,%zmm12 + + + + vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 + vpsrldq $4,%zmm15,%zmm15 + vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 + vpslldq $4,%zmm12,%zmm12 + + vpternlogq $0x96,%zmm15,%zmm13,%zmm12 + + vmovdqu64 %zmm12,0(%rsp) +.L_skip_hkeys_precomputation_2: + movq $1,%rbx + vpxorq %zmm2,%zmm11,%zmm11 + vmovdqu64 0(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 + vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 + vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 + vmovdqu64 64(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 + vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 + vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 + vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 + vpxorq %zmm17,%zmm10,%zmm7 + vpxorq %zmm13,%zmm1,%zmm6 + vpxorq %zmm15,%zmm9,%zmm8 + vpternlogq $0x96,%zmm18,%zmm12,%zmm7 + vmovdqu64 128(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 + vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 + vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 + vmovdqu64 192(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 + vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 + vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 + vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 + + vpternlogq $0x96,%zmm17,%zmm10,%zmm7 + vpternlogq $0x96,%zmm13,%zmm1,%zmm6 + vpternlogq $0x96,%zmm15,%zmm9,%zmm8 + vpternlogq $0x96,%zmm18,%zmm12,%zmm7 + vmovdqu64 256(%r10),%zmm11 + vmovdqu64 320(%r10),%zmm3 + vmovdqu64 384(%r10),%zmm4 + vmovdqu64 448(%r10),%zmm5 + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %zmm16,%zmm4,%zmm4 + vpshufb %zmm16,%zmm5,%zmm5 + vmovdqu64 256(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 + vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 + vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 + vmovdqu64 320(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 + vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 + vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 + vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 + vpternlogq $0x96,%zmm17,%zmm10,%zmm7 + vpternlogq $0x96,%zmm13,%zmm1,%zmm6 + vpternlogq $0x96,%zmm15,%zmm9,%zmm8 + vpternlogq $0x96,%zmm18,%zmm12,%zmm7 + vmovdqu64 384(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 + vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 + vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 + vmovdqu64 448(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 + vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 + vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 + vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 + + vpternlogq $0x96,%zmm17,%zmm10,%zmm7 + vpternlogq $0x96,%zmm13,%zmm1,%zmm6 + vpternlogq $0x96,%zmm15,%zmm9,%zmm8 + vpternlogq $0x96,%zmm18,%zmm12,%zmm7 + vmovdqu64 512(%r10),%zmm11 + vmovdqu64 576(%r10),%zmm3 + vmovdqu64 640(%r10),%zmm4 + vmovdqu64 704(%r10),%zmm5 + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %zmm16,%zmm4,%zmm4 + vpshufb %zmm16,%zmm5,%zmm5 + vmovdqu64 512(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 + vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 + vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 + vmovdqu64 576(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 + vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 + vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 + vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 + vpternlogq $0x96,%zmm17,%zmm10,%zmm7 + vpternlogq $0x96,%zmm13,%zmm1,%zmm6 + vpternlogq $0x96,%zmm15,%zmm9,%zmm8 + vpternlogq $0x96,%zmm18,%zmm12,%zmm7 + vmovdqu64 640(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 + vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 + vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 + vmovdqu64 704(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 + vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 + vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 + vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 + + vpternlogq $0x96,%zmm17,%zmm10,%zmm7 + vpternlogq $0x96,%zmm13,%zmm1,%zmm6 + vpternlogq $0x96,%zmm15,%zmm9,%zmm8 + vpternlogq $0x96,%zmm18,%zmm12,%zmm7 + + vpsrldq $8,%zmm7,%zmm1 + vpslldq $8,%zmm7,%zmm9 + vpxorq %zmm1,%zmm6,%zmm6 + vpxorq %zmm9,%zmm8,%zmm8 + vextracti64x4 $1,%zmm6,%ymm1 + vpxorq %ymm1,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm1 + vpxorq %xmm1,%xmm6,%xmm6 + vextracti64x4 $1,%zmm8,%ymm9 + vpxorq %ymm9,%ymm8,%ymm8 + vextracti32x4 $1,%ymm8,%xmm9 + vpxorq %xmm9,%xmm8,%xmm8 + vmovdqa64 POLY2(%rip),%xmm10 + + + vpclmulqdq $0x01,%xmm8,%xmm10,%xmm1 + vpslldq $8,%xmm1,%xmm1 + vpxorq %xmm1,%xmm8,%xmm1 + + + vpclmulqdq $0x00,%xmm1,%xmm10,%xmm9 + vpsrldq $4,%xmm9,%xmm9 + vpclmulqdq $0x10,%xmm1,%xmm10,%xmm2 + vpslldq $4,%xmm2,%xmm2 + vpternlogq $0x96,%xmm6,%xmm9,%xmm2 + + subq $768,%r11 + je .L_CALC_AAD_done_1 + + addq $768,%r10 + jmp .L_get_AAD_loop48x16_1 + +.L_exit_AAD_loop48x16_1: + + cmpq $512,%r11 + jl .L_less_than_32x16_1 + + vmovdqu64 0(%r10),%zmm11 + vmovdqu64 64(%r10),%zmm3 + vmovdqu64 128(%r10),%zmm4 + vmovdqu64 192(%r10),%zmm5 + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %zmm16,%zmm4,%zmm4 + vpshufb %zmm16,%zmm5,%zmm5 + testq %rbx,%rbx + jnz .L_skip_hkeys_precomputation_3 + + vmovdqu64 288(%rsi),%zmm1 + vmovdqu64 %zmm1,704(%rsp) + + vmovdqu64 224(%rsi),%zmm9 + vmovdqu64 %zmm9,640(%rsp) + + + vshufi64x2 $0x00,%zmm9,%zmm9,%zmm9 + + vmovdqu64 160(%rsi),%zmm10 + vmovdqu64 %zmm10,576(%rsp) + + vmovdqu64 96(%rsi),%zmm12 + vmovdqu64 %zmm12,512(%rsp) + + vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 + vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 + vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 + vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 + vpxorq %zmm17,%zmm10,%zmm10 + + vpsrldq $8,%zmm10,%zmm17 + vpslldq $8,%zmm10,%zmm10 + vpxorq %zmm17,%zmm13,%zmm13 + vpxorq %zmm15,%zmm10,%zmm10 + + + + vmovdqu64 POLY2(%rip),%zmm17 + + vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 + vpslldq $8,%zmm15,%zmm15 + vpxorq %zmm15,%zmm10,%zmm10 + + + + vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 + vpsrldq $4,%zmm15,%zmm15 + vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 + vpslldq $4,%zmm10,%zmm10 + + vpternlogq $0x96,%zmm15,%zmm13,%zmm10 + + vmovdqu64 %zmm10,448(%rsp) + + vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 + vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 + vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 + vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 + vpxorq %zmm17,%zmm12,%zmm12 + + vpsrldq $8,%zmm12,%zmm17 + vpslldq $8,%zmm12,%zmm12 + vpxorq %zmm17,%zmm13,%zmm13 + vpxorq %zmm15,%zmm12,%zmm12 + + + + vmovdqu64 POLY2(%rip),%zmm17 + + vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 + vpslldq $8,%zmm15,%zmm15 + vpxorq %zmm15,%zmm12,%zmm12 + + + + vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 + vpsrldq $4,%zmm15,%zmm15 + vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 + vpslldq $4,%zmm12,%zmm12 + + vpternlogq $0x96,%zmm15,%zmm13,%zmm12 + + vmovdqu64 %zmm12,384(%rsp) + + vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 + vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 + vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 + vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 + vpxorq %zmm17,%zmm10,%zmm10 + + vpsrldq $8,%zmm10,%zmm17 + vpslldq $8,%zmm10,%zmm10 + vpxorq %zmm17,%zmm13,%zmm13 + vpxorq %zmm15,%zmm10,%zmm10 + + + + vmovdqu64 POLY2(%rip),%zmm17 + + vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 + vpslldq $8,%zmm15,%zmm15 + vpxorq %zmm15,%zmm10,%zmm10 + + + + vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 + vpsrldq $4,%zmm15,%zmm15 + vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 + vpslldq $4,%zmm10,%zmm10 + + vpternlogq $0x96,%zmm15,%zmm13,%zmm10 + + vmovdqu64 %zmm10,320(%rsp) + + vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 + vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 + vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 + vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 + vpxorq %zmm17,%zmm12,%zmm12 + + vpsrldq $8,%zmm12,%zmm17 + vpslldq $8,%zmm12,%zmm12 + vpxorq %zmm17,%zmm13,%zmm13 + vpxorq %zmm15,%zmm12,%zmm12 + + + + vmovdqu64 POLY2(%rip),%zmm17 + + vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 + vpslldq $8,%zmm15,%zmm15 + vpxorq %zmm15,%zmm12,%zmm12 + + + + vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 + vpsrldq $4,%zmm15,%zmm15 + vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 + vpslldq $4,%zmm12,%zmm12 + + vpternlogq $0x96,%zmm15,%zmm13,%zmm12 + + vmovdqu64 %zmm12,256(%rsp) +.L_skip_hkeys_precomputation_3: + movq $1,%rbx + vpxorq %zmm2,%zmm11,%zmm11 + vmovdqu64 256(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 + vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 + vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 + vmovdqu64 320(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 + vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 + vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 + vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 + vpxorq %zmm17,%zmm10,%zmm7 + vpxorq %zmm13,%zmm1,%zmm6 + vpxorq %zmm15,%zmm9,%zmm8 + vpternlogq $0x96,%zmm18,%zmm12,%zmm7 + vmovdqu64 384(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 + vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 + vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 + vmovdqu64 448(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 + vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 + vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 + vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 + + vpternlogq $0x96,%zmm17,%zmm10,%zmm7 + vpternlogq $0x96,%zmm13,%zmm1,%zmm6 + vpternlogq $0x96,%zmm15,%zmm9,%zmm8 + vpternlogq $0x96,%zmm18,%zmm12,%zmm7 + vmovdqu64 256(%r10),%zmm11 + vmovdqu64 320(%r10),%zmm3 + vmovdqu64 384(%r10),%zmm4 + vmovdqu64 448(%r10),%zmm5 + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %zmm16,%zmm4,%zmm4 + vpshufb %zmm16,%zmm5,%zmm5 + vmovdqu64 512(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 + vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 + vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 + vmovdqu64 576(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 + vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 + vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 + vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 + vpternlogq $0x96,%zmm17,%zmm10,%zmm7 + vpternlogq $0x96,%zmm13,%zmm1,%zmm6 + vpternlogq $0x96,%zmm15,%zmm9,%zmm8 + vpternlogq $0x96,%zmm18,%zmm12,%zmm7 + vmovdqu64 640(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 + vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 + vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 + vmovdqu64 704(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 + vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 + vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 + vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 + + vpternlogq $0x96,%zmm17,%zmm10,%zmm7 + vpternlogq $0x96,%zmm13,%zmm1,%zmm6 + vpternlogq $0x96,%zmm15,%zmm9,%zmm8 + vpternlogq $0x96,%zmm18,%zmm12,%zmm7 + + vpsrldq $8,%zmm7,%zmm1 + vpslldq $8,%zmm7,%zmm9 + vpxorq %zmm1,%zmm6,%zmm6 + vpxorq %zmm9,%zmm8,%zmm8 + vextracti64x4 $1,%zmm6,%ymm1 + vpxorq %ymm1,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm1 + vpxorq %xmm1,%xmm6,%xmm6 + vextracti64x4 $1,%zmm8,%ymm9 + vpxorq %ymm9,%ymm8,%ymm8 + vextracti32x4 $1,%ymm8,%xmm9 + vpxorq %xmm9,%xmm8,%xmm8 + vmovdqa64 POLY2(%rip),%xmm10 + + + vpclmulqdq $0x01,%xmm8,%xmm10,%xmm1 + vpslldq $8,%xmm1,%xmm1 + vpxorq %xmm1,%xmm8,%xmm1 + + + vpclmulqdq $0x00,%xmm1,%xmm10,%xmm9 + vpsrldq $4,%xmm9,%xmm9 + vpclmulqdq $0x10,%xmm1,%xmm10,%xmm2 + vpslldq $4,%xmm2,%xmm2 + vpternlogq $0x96,%xmm6,%xmm9,%xmm2 + + subq $512,%r11 + je .L_CALC_AAD_done_1 + + addq $512,%r10 + jmp .L_less_than_16x16_1 + +.L_less_than_32x16_1: + cmpq $256,%r11 + jl .L_less_than_16x16_1 + + vmovdqu64 0(%r10),%zmm11 + vmovdqu64 64(%r10),%zmm3 + vmovdqu64 128(%r10),%zmm4 + vmovdqu64 192(%r10),%zmm5 + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %zmm16,%zmm4,%zmm4 + vpshufb %zmm16,%zmm5,%zmm5 + vpxorq %zmm2,%zmm11,%zmm11 + vmovdqu64 96(%rsi),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 + vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 + vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 + vmovdqu64 160(%rsi),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 + vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 + vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 + vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 + vpxorq %zmm17,%zmm10,%zmm7 + vpxorq %zmm13,%zmm1,%zmm6 + vpxorq %zmm15,%zmm9,%zmm8 + vpternlogq $0x96,%zmm18,%zmm12,%zmm7 + vmovdqu64 224(%rsi),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 + vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 + vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 + vmovdqu64 288(%rsi),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 + vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 + vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 + vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 + + vpternlogq $0x96,%zmm17,%zmm10,%zmm7 + vpternlogq $0x96,%zmm13,%zmm1,%zmm6 + vpternlogq $0x96,%zmm15,%zmm9,%zmm8 + vpternlogq $0x96,%zmm18,%zmm12,%zmm7 + + vpsrldq $8,%zmm7,%zmm1 + vpslldq $8,%zmm7,%zmm9 + vpxorq %zmm1,%zmm6,%zmm6 + vpxorq %zmm9,%zmm8,%zmm8 + vextracti64x4 $1,%zmm6,%ymm1 + vpxorq %ymm1,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm1 + vpxorq %xmm1,%xmm6,%xmm6 + vextracti64x4 $1,%zmm8,%ymm9 + vpxorq %ymm9,%ymm8,%ymm8 + vextracti32x4 $1,%ymm8,%xmm9 + vpxorq %xmm9,%xmm8,%xmm8 + vmovdqa64 POLY2(%rip),%xmm10 + + + vpclmulqdq $0x01,%xmm8,%xmm10,%xmm1 + vpslldq $8,%xmm1,%xmm1 + vpxorq %xmm1,%xmm8,%xmm1 + + + vpclmulqdq $0x00,%xmm1,%xmm10,%xmm9 + vpsrldq $4,%xmm9,%xmm9 + vpclmulqdq $0x10,%xmm1,%xmm10,%xmm2 + vpslldq $4,%xmm2,%xmm2 + vpternlogq $0x96,%xmm6,%xmm9,%xmm2 + + subq $256,%r11 + je .L_CALC_AAD_done_1 + + addq $256,%r10 + +.L_less_than_16x16_1: + + leaq byte64_len_to_mask_table(%rip),%r12 + leaq (%r12,%r11,8),%r12 + + + addl $15,%r11d + shrl $4,%r11d + cmpl $2,%r11d + jb .L_AAD_blocks_1_1 + je .L_AAD_blocks_2_1 + cmpl $4,%r11d + jb .L_AAD_blocks_3_1 + je .L_AAD_blocks_4_1 + cmpl $6,%r11d + jb .L_AAD_blocks_5_1 + je .L_AAD_blocks_6_1 + cmpl $8,%r11d + jb .L_AAD_blocks_7_1 + je .L_AAD_blocks_8_1 + cmpl $10,%r11d + jb .L_AAD_blocks_9_1 + je .L_AAD_blocks_10_1 + cmpl $12,%r11d + jb .L_AAD_blocks_11_1 + je .L_AAD_blocks_12_1 + cmpl $14,%r11d + jb .L_AAD_blocks_13_1 + je .L_AAD_blocks_14_1 + cmpl $15,%r11d + je .L_AAD_blocks_15_1 +.L_AAD_blocks_16_1: + subq $1536,%r12 + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11 + vmovdqu8 64(%r10),%zmm3 + vmovdqu8 128(%r10),%zmm4 + vmovdqu8 192(%r10),%zmm5{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %zmm16,%zmm4,%zmm4 + vpshufb %zmm16,%zmm5,%zmm5 + vpxorq %zmm2,%zmm11,%zmm11 + vmovdqu64 96(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 + vmovdqu64 160(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 + vmovdqu64 224(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 + vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 + vpternlogq $0x96,%zmm9,%zmm11,%zmm1 + vpternlogq $0x96,%zmm10,%zmm3,%zmm6 + vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 + vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 + vpternlogq $0x96,%zmm12,%zmm11,%zmm7 + vpternlogq $0x96,%zmm13,%zmm3,%zmm8 + vmovdqu64 288(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm5,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm5,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm5,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm5,%zmm13 + vpxorq %zmm9,%zmm1,%zmm9 + vpxorq %zmm10,%zmm6,%zmm10 + vpxorq %zmm12,%zmm7,%zmm12 + vpxorq %zmm13,%zmm8,%zmm13 + + vpxorq %zmm13,%zmm12,%zmm12 + vpsrldq $8,%zmm12,%zmm7 + vpslldq $8,%zmm12,%zmm8 + vpxorq %zmm7,%zmm9,%zmm1 + vpxorq %zmm8,%zmm10,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 + vpslldq $4,%xmm2,%xmm2 + vpternlogq $0x96,%xmm1,%xmm8,%xmm2 + + jmp .L_CALC_AAD_done_1 +.L_AAD_blocks_15_1: + subq $1536,%r12 + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11 + vmovdqu8 64(%r10),%zmm3 + vmovdqu8 128(%r10),%zmm4 + vmovdqu8 192(%r10),%zmm5{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %zmm16,%zmm4,%zmm4 + vpshufb %zmm16,%zmm5,%zmm5 + vpxorq %zmm2,%zmm11,%zmm11 + vmovdqu64 112(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 + vmovdqu64 176(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 + vmovdqu64 240(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 + vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 + vpternlogq $0x96,%zmm1,%zmm11,%zmm9 + vpternlogq $0x96,%zmm6,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 + vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 + vpternlogq $0x96,%zmm7,%zmm11,%zmm12 + vpternlogq $0x96,%zmm8,%zmm3,%zmm13 + vmovdqu64 304(%rsi),%ymm15 + vinserti64x2 $2,336(%rsi),%zmm15,%zmm15 + vpclmulqdq $0x01,%zmm15,%zmm5,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm5,%zmm8 + vpclmulqdq $0x11,%zmm15,%zmm5,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm5,%zmm6 + + vpxorq %zmm12,%zmm7,%zmm7 + vpxorq %zmm13,%zmm8,%zmm8 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm6,%zmm6 + + vpxorq %zmm8,%zmm7,%zmm7 + vpsrldq $8,%zmm7,%zmm12 + vpslldq $8,%zmm7,%zmm13 + vpxorq %zmm12,%zmm1,%zmm1 + vpxorq %zmm13,%zmm6,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 + vpslldq $4,%xmm2,%xmm2 + vpternlogq $0x96,%xmm1,%xmm8,%xmm2 + + jmp .L_CALC_AAD_done_1 +.L_AAD_blocks_14_1: + subq $1536,%r12 + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11 + vmovdqu8 64(%r10),%zmm3 + vmovdqu8 128(%r10),%zmm4 + vmovdqu8 192(%r10),%ymm5{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %zmm16,%zmm4,%zmm4 + vpshufb %ymm16,%ymm5,%ymm5 + vpxorq %zmm2,%zmm11,%zmm11 + vmovdqu64 128(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 + vmovdqu64 192(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 + vmovdqu64 256(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 + vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 + vpternlogq $0x96,%zmm1,%zmm11,%zmm9 + vpternlogq $0x96,%zmm6,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 + vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 + vpternlogq $0x96,%zmm7,%zmm11,%zmm12 + vpternlogq $0x96,%zmm8,%zmm3,%zmm13 + vmovdqu64 320(%rsi),%ymm15 + vpclmulqdq $0x01,%ymm15,%ymm5,%ymm7 + vpclmulqdq $0x10,%ymm15,%ymm5,%ymm8 + vpclmulqdq $0x11,%ymm15,%ymm5,%ymm1 + vpclmulqdq $0x00,%ymm15,%ymm5,%ymm6 + + vpxorq %zmm12,%zmm7,%zmm7 + vpxorq %zmm13,%zmm8,%zmm8 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm6,%zmm6 + + vpxorq %zmm8,%zmm7,%zmm7 + vpsrldq $8,%zmm7,%zmm12 + vpslldq $8,%zmm7,%zmm13 + vpxorq %zmm12,%zmm1,%zmm1 + vpxorq %zmm13,%zmm6,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 + vpslldq $4,%xmm2,%xmm2 + vpternlogq $0x96,%xmm1,%xmm8,%xmm2 + + jmp .L_CALC_AAD_done_1 +.L_AAD_blocks_13_1: + subq $1536,%r12 + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11 + vmovdqu8 64(%r10),%zmm3 + vmovdqu8 128(%r10),%zmm4 + vmovdqu8 192(%r10),%xmm5{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %zmm16,%zmm4,%zmm4 + vpshufb %xmm16,%xmm5,%xmm5 + vpxorq %zmm2,%zmm11,%zmm11 + vmovdqu64 144(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 + vmovdqu64 208(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 + vmovdqu64 272(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 + vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 + vpternlogq $0x96,%zmm1,%zmm11,%zmm9 + vpternlogq $0x96,%zmm6,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 + vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 + vpternlogq $0x96,%zmm7,%zmm11,%zmm12 + vpternlogq $0x96,%zmm8,%zmm3,%zmm13 + vmovdqu64 336(%rsi),%xmm15 + vpclmulqdq $0x01,%xmm15,%xmm5,%xmm7 + vpclmulqdq $0x10,%xmm15,%xmm5,%xmm8 + vpclmulqdq $0x11,%xmm15,%xmm5,%xmm1 + vpclmulqdq $0x00,%xmm15,%xmm5,%xmm6 + + vpxorq %zmm12,%zmm7,%zmm7 + vpxorq %zmm13,%zmm8,%zmm8 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm6,%zmm6 + + vpxorq %zmm8,%zmm7,%zmm7 + vpsrldq $8,%zmm7,%zmm12 + vpslldq $8,%zmm7,%zmm13 + vpxorq %zmm12,%zmm1,%zmm1 + vpxorq %zmm13,%zmm6,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 + vpslldq $4,%xmm2,%xmm2 + vpternlogq $0x96,%xmm1,%xmm8,%xmm2 + + jmp .L_CALC_AAD_done_1 +.L_AAD_blocks_12_1: + subq $1024,%r12 + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11 + vmovdqu8 64(%r10),%zmm3 + vmovdqu8 128(%r10),%zmm4{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %zmm16,%zmm4,%zmm4 + vpxorq %zmm2,%zmm11,%zmm11 + vmovdqu64 160(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 + vmovdqu64 224(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 + vmovdqu64 288(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 + vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 + vpternlogq $0x96,%zmm1,%zmm11,%zmm9 + vpternlogq $0x96,%zmm6,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 + vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 + vpternlogq $0x96,%zmm7,%zmm11,%zmm12 + vpternlogq $0x96,%zmm8,%zmm3,%zmm13 + + vpxorq %zmm13,%zmm12,%zmm12 + vpsrldq $8,%zmm12,%zmm7 + vpslldq $8,%zmm12,%zmm8 + vpxorq %zmm7,%zmm9,%zmm1 + vpxorq %zmm8,%zmm10,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 + vpslldq $4,%xmm2,%xmm2 + vpternlogq $0x96,%xmm1,%xmm8,%xmm2 + + jmp .L_CALC_AAD_done_1 +.L_AAD_blocks_11_1: + subq $1024,%r12 + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11 + vmovdqu8 64(%r10),%zmm3 + vmovdqu8 128(%r10),%zmm4{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %zmm16,%zmm4,%zmm4 + vpxorq %zmm2,%zmm11,%zmm11 + vmovdqu64 176(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 + vmovdqu64 240(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 + vpxorq %zmm9,%zmm1,%zmm9 + vpxorq %zmm10,%zmm6,%zmm10 + vpxorq %zmm12,%zmm7,%zmm12 + vpxorq %zmm13,%zmm8,%zmm13 + vmovdqu64 304(%rsi),%ymm15 + vinserti64x2 $2,336(%rsi),%zmm15,%zmm15 + vpclmulqdq $0x01,%zmm15,%zmm4,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm4,%zmm8 + vpclmulqdq $0x11,%zmm15,%zmm4,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm4,%zmm6 + + vpxorq %zmm12,%zmm7,%zmm7 + vpxorq %zmm13,%zmm8,%zmm8 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm6,%zmm6 + + vpxorq %zmm8,%zmm7,%zmm7 + vpsrldq $8,%zmm7,%zmm12 + vpslldq $8,%zmm7,%zmm13 + vpxorq %zmm12,%zmm1,%zmm1 + vpxorq %zmm13,%zmm6,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 + vpslldq $4,%xmm2,%xmm2 + vpternlogq $0x96,%xmm1,%xmm8,%xmm2 + + jmp .L_CALC_AAD_done_1 +.L_AAD_blocks_10_1: + subq $1024,%r12 + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11 + vmovdqu8 64(%r10),%zmm3 + vmovdqu8 128(%r10),%ymm4{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %ymm16,%ymm4,%ymm4 + vpxorq %zmm2,%zmm11,%zmm11 + vmovdqu64 192(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 + vmovdqu64 256(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 + vpxorq %zmm9,%zmm1,%zmm9 + vpxorq %zmm10,%zmm6,%zmm10 + vpxorq %zmm12,%zmm7,%zmm12 + vpxorq %zmm13,%zmm8,%zmm13 + vmovdqu64 320(%rsi),%ymm15 + vpclmulqdq $0x01,%ymm15,%ymm4,%ymm7 + vpclmulqdq $0x10,%ymm15,%ymm4,%ymm8 + vpclmulqdq $0x11,%ymm15,%ymm4,%ymm1 + vpclmulqdq $0x00,%ymm15,%ymm4,%ymm6 + + vpxorq %zmm12,%zmm7,%zmm7 + vpxorq %zmm13,%zmm8,%zmm8 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm6,%zmm6 + + vpxorq %zmm8,%zmm7,%zmm7 + vpsrldq $8,%zmm7,%zmm12 + vpslldq $8,%zmm7,%zmm13 + vpxorq %zmm12,%zmm1,%zmm1 + vpxorq %zmm13,%zmm6,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 + vpslldq $4,%xmm2,%xmm2 + vpternlogq $0x96,%xmm1,%xmm8,%xmm2 + + jmp .L_CALC_AAD_done_1 +.L_AAD_blocks_9_1: + subq $1024,%r12 + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11 + vmovdqu8 64(%r10),%zmm3 + vmovdqu8 128(%r10),%xmm4{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %xmm16,%xmm4,%xmm4 + vpxorq %zmm2,%zmm11,%zmm11 + vmovdqu64 208(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 + vmovdqu64 272(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 + vpxorq %zmm9,%zmm1,%zmm9 + vpxorq %zmm10,%zmm6,%zmm10 + vpxorq %zmm12,%zmm7,%zmm12 + vpxorq %zmm13,%zmm8,%zmm13 + vmovdqu64 336(%rsi),%xmm15 + vpclmulqdq $0x01,%xmm15,%xmm4,%xmm7 + vpclmulqdq $0x10,%xmm15,%xmm4,%xmm8 + vpclmulqdq $0x11,%xmm15,%xmm4,%xmm1 + vpclmulqdq $0x00,%xmm15,%xmm4,%xmm6 + + vpxorq %zmm12,%zmm7,%zmm7 + vpxorq %zmm13,%zmm8,%zmm8 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm6,%zmm6 + + vpxorq %zmm8,%zmm7,%zmm7 + vpsrldq $8,%zmm7,%zmm12 + vpslldq $8,%zmm7,%zmm13 + vpxorq %zmm12,%zmm1,%zmm1 + vpxorq %zmm13,%zmm6,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 + vpslldq $4,%xmm2,%xmm2 + vpternlogq $0x96,%xmm1,%xmm8,%xmm2 + + jmp .L_CALC_AAD_done_1 +.L_AAD_blocks_8_1: + subq $512,%r12 + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11 + vmovdqu8 64(%r10),%zmm3{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpxorq %zmm2,%zmm11,%zmm11 + vmovdqu64 224(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 + vmovdqu64 288(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 + vpxorq %zmm9,%zmm1,%zmm9 + vpxorq %zmm10,%zmm6,%zmm10 + vpxorq %zmm12,%zmm7,%zmm12 + vpxorq %zmm13,%zmm8,%zmm13 + + vpxorq %zmm13,%zmm12,%zmm12 + vpsrldq $8,%zmm12,%zmm7 + vpslldq $8,%zmm12,%zmm8 + vpxorq %zmm7,%zmm9,%zmm1 + vpxorq %zmm8,%zmm10,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 + vpslldq $4,%xmm2,%xmm2 + vpternlogq $0x96,%xmm1,%xmm8,%xmm2 + + jmp .L_CALC_AAD_done_1 +.L_AAD_blocks_7_1: + subq $512,%r12 + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11 + vmovdqu8 64(%r10),%zmm3{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpxorq %zmm2,%zmm11,%zmm11 + vmovdqu64 240(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm13 + vmovdqu64 304(%rsi),%ymm15 + vinserti64x2 $2,336(%rsi),%zmm15,%zmm15 + vpclmulqdq $0x01,%zmm15,%zmm3,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm3,%zmm8 + vpclmulqdq $0x11,%zmm15,%zmm3,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm3,%zmm6 + + vpxorq %zmm12,%zmm7,%zmm7 + vpxorq %zmm13,%zmm8,%zmm8 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm6,%zmm6 + + vpxorq %zmm8,%zmm7,%zmm7 + vpsrldq $8,%zmm7,%zmm12 + vpslldq $8,%zmm7,%zmm13 + vpxorq %zmm12,%zmm1,%zmm1 + vpxorq %zmm13,%zmm6,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 + vpslldq $4,%xmm2,%xmm2 + vpternlogq $0x96,%xmm1,%xmm8,%xmm2 + + jmp .L_CALC_AAD_done_1 +.L_AAD_blocks_6_1: + subq $512,%r12 + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11 + vmovdqu8 64(%r10),%ymm3{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %ymm16,%ymm3,%ymm3 + vpxorq %zmm2,%zmm11,%zmm11 + vmovdqu64 256(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm13 + vmovdqu64 320(%rsi),%ymm15 + vpclmulqdq $0x01,%ymm15,%ymm3,%ymm7 + vpclmulqdq $0x10,%ymm15,%ymm3,%ymm8 + vpclmulqdq $0x11,%ymm15,%ymm3,%ymm1 + vpclmulqdq $0x00,%ymm15,%ymm3,%ymm6 + + vpxorq %zmm12,%zmm7,%zmm7 + vpxorq %zmm13,%zmm8,%zmm8 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm6,%zmm6 + + vpxorq %zmm8,%zmm7,%zmm7 + vpsrldq $8,%zmm7,%zmm12 + vpslldq $8,%zmm7,%zmm13 + vpxorq %zmm12,%zmm1,%zmm1 + vpxorq %zmm13,%zmm6,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 + vpslldq $4,%xmm2,%xmm2 + vpternlogq $0x96,%xmm1,%xmm8,%xmm2 + + jmp .L_CALC_AAD_done_1 +.L_AAD_blocks_5_1: + subq $512,%r12 + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11 + vmovdqu8 64(%r10),%xmm3{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %xmm16,%xmm3,%xmm3 + vpxorq %zmm2,%zmm11,%zmm11 + vmovdqu64 272(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm13 + vmovdqu64 336(%rsi),%xmm15 + vpclmulqdq $0x01,%xmm15,%xmm3,%xmm7 + vpclmulqdq $0x10,%xmm15,%xmm3,%xmm8 + vpclmulqdq $0x11,%xmm15,%xmm3,%xmm1 + vpclmulqdq $0x00,%xmm15,%xmm3,%xmm6 + + vpxorq %zmm12,%zmm7,%zmm7 + vpxorq %zmm13,%zmm8,%zmm8 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm6,%zmm6 + + vpxorq %zmm8,%zmm7,%zmm7 + vpsrldq $8,%zmm7,%zmm12 + vpslldq $8,%zmm7,%zmm13 + vpxorq %zmm12,%zmm1,%zmm1 + vpxorq %zmm13,%zmm6,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 + vpslldq $4,%xmm2,%xmm2 + vpternlogq $0x96,%xmm1,%xmm8,%xmm2 + + jmp .L_CALC_AAD_done_1 +.L_AAD_blocks_4_1: + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpxorq %zmm2,%zmm11,%zmm11 + vmovdqu64 288(%rsi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm13 + + vpxorq %zmm13,%zmm12,%zmm12 + vpsrldq $8,%zmm12,%zmm7 + vpslldq $8,%zmm12,%zmm8 + vpxorq %zmm7,%zmm9,%zmm1 + vpxorq %zmm8,%zmm10,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 + vpslldq $4,%xmm2,%xmm2 + vpternlogq $0x96,%xmm1,%xmm8,%xmm2 + + jmp .L_CALC_AAD_done_1 +.L_AAD_blocks_3_1: + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpxorq %zmm2,%zmm11,%zmm11 + vmovdqu64 304(%rsi),%ymm15 + vinserti64x2 $2,336(%rsi),%zmm15,%zmm15 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 + + vpxorq %zmm8,%zmm7,%zmm7 + vpsrldq $8,%zmm7,%zmm12 + vpslldq $8,%zmm7,%zmm13 + vpxorq %zmm12,%zmm1,%zmm1 + vpxorq %zmm13,%zmm6,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 + vpslldq $4,%xmm2,%xmm2 + vpternlogq $0x96,%xmm1,%xmm8,%xmm2 + + jmp .L_CALC_AAD_done_1 +.L_AAD_blocks_2_1: + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%ymm11{%k1}{z} + vpshufb %ymm16,%ymm11,%ymm11 + vpxorq %zmm2,%zmm11,%zmm11 + vmovdqu64 320(%rsi),%ymm15 + vpclmulqdq $0x01,%ymm15,%ymm11,%ymm7 + vpclmulqdq $0x10,%ymm15,%ymm11,%ymm8 + vpclmulqdq $0x11,%ymm15,%ymm11,%ymm1 + vpclmulqdq $0x00,%ymm15,%ymm11,%ymm6 + + vpxorq %zmm8,%zmm7,%zmm7 + vpsrldq $8,%zmm7,%zmm12 + vpslldq $8,%zmm7,%zmm13 + vpxorq %zmm12,%zmm1,%zmm1 + vpxorq %zmm13,%zmm6,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 + vpslldq $4,%xmm2,%xmm2 + vpternlogq $0x96,%xmm1,%xmm8,%xmm2 + + jmp .L_CALC_AAD_done_1 +.L_AAD_blocks_1_1: + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%xmm11{%k1}{z} + vpshufb %xmm16,%xmm11,%xmm11 + vpxorq %zmm2,%zmm11,%zmm11 + vmovdqu64 336(%rsi),%xmm15 + vpclmulqdq $0x01,%xmm15,%xmm11,%xmm7 + vpclmulqdq $0x10,%xmm15,%xmm11,%xmm8 + vpclmulqdq $0x11,%xmm15,%xmm11,%xmm1 + vpclmulqdq $0x00,%xmm15,%xmm11,%xmm6 + + vpxorq %zmm8,%zmm7,%zmm7 + vpsrldq $8,%zmm7,%zmm12 + vpslldq $8,%zmm7,%zmm13 + vpxorq %zmm12,%zmm1,%zmm1 + vpxorq %zmm13,%zmm6,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm2 + vpslldq $4,%xmm2,%xmm2 + vpternlogq $0x96,%xmm1,%xmm8,%xmm2 + +.L_CALC_AAD_done_1: + movq %rcx,%r10 + shlq $3,%r10 + vmovq %r10,%xmm3 + + + vpxorq %xmm2,%xmm3,%xmm2 + + vmovdqu64 336(%rsi),%xmm1 + + vpclmulqdq $0x11,%xmm1,%xmm2,%xmm11 + vpclmulqdq $0x00,%xmm1,%xmm2,%xmm3 + vpclmulqdq $0x01,%xmm1,%xmm2,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm2,%xmm2 + vpxorq %xmm4,%xmm2,%xmm2 + + vpsrldq $8,%xmm2,%xmm4 + vpslldq $8,%xmm2,%xmm2 + vpxorq %xmm4,%xmm11,%xmm11 + vpxorq %xmm3,%xmm2,%xmm2 + + + + vmovdqu64 POLY2(%rip),%xmm4 + + vpclmulqdq $0x01,%xmm2,%xmm4,%xmm3 + vpslldq $8,%xmm3,%xmm3 + vpxorq %xmm3,%xmm2,%xmm2 + + + + vpclmulqdq $0x00,%xmm2,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm2,%xmm4,%xmm2 + vpslldq $4,%xmm2,%xmm2 + + vpternlogq $0x96,%xmm3,%xmm11,%xmm2 + + vpshufb SHUF_MASK(%rip),%xmm2,%xmm2 + jmp skip_iv_len_12_init_IV +iv_len_12_init_IV: + + vmovdqu8 ONEf(%rip),%xmm2 + movq %rdx,%r11 + movl $0x0000000000000fff,%r10d + kmovq %r10,%k1 + vmovdqu8 (%r11),%xmm2{%k1} +skip_iv_len_12_init_IV: + vmovdqu %xmm2,%xmm1 + + + movl 240(%rdi),%r10d + cmpl $9,%r10d + je .Laes_128_4 + cmpl $11,%r10d + je .Laes_192_4 + cmpl $13,%r10d + je .Laes_256_4 + jmp .Lexit_aes_4 +.align 32 +.Laes_128_4: + vpxorq 0(%rdi),%xmm1,%xmm1 + + vaesenc 16(%rdi),%xmm1,%xmm1 + + vaesenc 32(%rdi),%xmm1,%xmm1 + + vaesenc 48(%rdi),%xmm1,%xmm1 + + vaesenc 64(%rdi),%xmm1,%xmm1 + + vaesenc 80(%rdi),%xmm1,%xmm1 + + vaesenc 96(%rdi),%xmm1,%xmm1 + + vaesenc 112(%rdi),%xmm1,%xmm1 + + vaesenc 128(%rdi),%xmm1,%xmm1 + + vaesenc 144(%rdi),%xmm1,%xmm1 + + vaesenclast 160(%rdi),%xmm1,%xmm1 + jmp .Lexit_aes_4 +.align 32 +.Laes_192_4: + vpxorq 0(%rdi),%xmm1,%xmm1 + + vaesenc 16(%rdi),%xmm1,%xmm1 + + vaesenc 32(%rdi),%xmm1,%xmm1 + + vaesenc 48(%rdi),%xmm1,%xmm1 + + vaesenc 64(%rdi),%xmm1,%xmm1 + + vaesenc 80(%rdi),%xmm1,%xmm1 + + vaesenc 96(%rdi),%xmm1,%xmm1 + + vaesenc 112(%rdi),%xmm1,%xmm1 + + vaesenc 128(%rdi),%xmm1,%xmm1 + + vaesenc 144(%rdi),%xmm1,%xmm1 + + vaesenc 160(%rdi),%xmm1,%xmm1 + + vaesenc 176(%rdi),%xmm1,%xmm1 + + vaesenclast 192(%rdi),%xmm1,%xmm1 + jmp .Lexit_aes_4 +.align 32 +.Laes_256_4: + vpxorq 0(%rdi),%xmm1,%xmm1 + + vaesenc 16(%rdi),%xmm1,%xmm1 + + vaesenc 32(%rdi),%xmm1,%xmm1 + + vaesenc 48(%rdi),%xmm1,%xmm1 + + vaesenc 64(%rdi),%xmm1,%xmm1 + + vaesenc 80(%rdi),%xmm1,%xmm1 + + vaesenc 96(%rdi),%xmm1,%xmm1 + + vaesenc 112(%rdi),%xmm1,%xmm1 + + vaesenc 128(%rdi),%xmm1,%xmm1 + + vaesenc 144(%rdi),%xmm1,%xmm1 + + vaesenc 160(%rdi),%xmm1,%xmm1 + + vaesenc 176(%rdi),%xmm1,%xmm1 + + vaesenc 192(%rdi),%xmm1,%xmm1 + + vaesenc 208(%rdi),%xmm1,%xmm1 + + vaesenclast 224(%rdi),%xmm1,%xmm1 + jmp .Lexit_aes_4 +.Lexit_aes_4: + + vmovdqu %xmm1,32(%rsi) + + + vpshufb SHUF_MASK(%rip),%xmm2,%xmm2 + vmovdqu %xmm2,0(%rsi) + cmpq $256,%rcx + jbe .Lskip_hkeys_cleanup_5 + vpxor %xmm0,%xmm0,%xmm0 + vmovdqa64 %zmm0,0(%rsp) + vmovdqa64 %zmm0,64(%rsp) + vmovdqa64 %zmm0,128(%rsp) + vmovdqa64 %zmm0,192(%rsp) + vmovdqa64 %zmm0,256(%rsp) + vmovdqa64 %zmm0,320(%rsp) + vmovdqa64 %zmm0,384(%rsp) + vmovdqa64 %zmm0,448(%rsp) + vmovdqa64 %zmm0,512(%rsp) + vmovdqa64 %zmm0,576(%rsp) + vmovdqa64 %zmm0,640(%rsp) + vmovdqa64 %zmm0,704(%rsp) +.Lskip_hkeys_cleanup_5: + vzeroupper + leaq (%rbp),%rsp +.cfi_def_cfa_register %rsp + popq %r15 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r15 + popq %r14 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r14 + popq %r13 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r13 + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + popq %rbp +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbp + popq %rbx +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbx +.Labort_setiv: + .byte 0xf3,0xc3 +.Lsetiv_seh_end: +.cfi_endproc +.size ossl_aes_gcm_setiv_avx512, .-ossl_aes_gcm_setiv_avx512 +.globl ossl_aes_gcm_update_aad_avx512 +.type ossl_aes_gcm_update_aad_avx512,@function +.align 32 +ossl_aes_gcm_update_aad_avx512: +.cfi_startproc +.Lghash_seh_begin: +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 +.Lghash_seh_push_rbx: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 +.Lghash_seh_push_rbp: + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 +.Lghash_seh_push_r12: + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 +.Lghash_seh_push_r13: + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 +.Lghash_seh_push_r14: + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lghash_seh_push_r15: + + + + + + + + + + + leaq 0(%rsp),%rbp +.cfi_def_cfa_register %rbp +.Lghash_seh_setfp: + +.Lghash_seh_prolog_end: + subq $820,%rsp + andq $(-64),%rsp + vmovdqu64 64(%rdi),%xmm14 + movq %rsi,%r10 + movq %rdx,%r11 + orq %r11,%r11 + jz .L_CALC_AAD_done_6 + + xorq %rbx,%rbx + vmovdqa64 SHUF_MASK(%rip),%zmm16 + +.L_get_AAD_loop48x16_6: + cmpq $768,%r11 + jl .L_exit_AAD_loop48x16_6 + vmovdqu64 0(%r10),%zmm11 + vmovdqu64 64(%r10),%zmm3 + vmovdqu64 128(%r10),%zmm4 + vmovdqu64 192(%r10),%zmm5 + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %zmm16,%zmm4,%zmm4 + vpshufb %zmm16,%zmm5,%zmm5 + testq %rbx,%rbx + jnz .L_skip_hkeys_precomputation_7 + + vmovdqu64 288(%rdi),%zmm1 + vmovdqu64 %zmm1,704(%rsp) + + vmovdqu64 224(%rdi),%zmm9 + vmovdqu64 %zmm9,640(%rsp) + + + vshufi64x2 $0x00,%zmm9,%zmm9,%zmm9 + + vmovdqu64 160(%rdi),%zmm10 + vmovdqu64 %zmm10,576(%rsp) + + vmovdqu64 96(%rdi),%zmm12 + vmovdqu64 %zmm12,512(%rsp) + + vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 + vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 + vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 + vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 + vpxorq %zmm17,%zmm10,%zmm10 + + vpsrldq $8,%zmm10,%zmm17 + vpslldq $8,%zmm10,%zmm10 + vpxorq %zmm17,%zmm13,%zmm13 + vpxorq %zmm15,%zmm10,%zmm10 + + + + vmovdqu64 POLY2(%rip),%zmm17 + + vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 + vpslldq $8,%zmm15,%zmm15 + vpxorq %zmm15,%zmm10,%zmm10 + + + + vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 + vpsrldq $4,%zmm15,%zmm15 + vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 + vpslldq $4,%zmm10,%zmm10 + + vpternlogq $0x96,%zmm15,%zmm13,%zmm10 + + vmovdqu64 %zmm10,448(%rsp) + + vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 + vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 + vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 + vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 + vpxorq %zmm17,%zmm12,%zmm12 + + vpsrldq $8,%zmm12,%zmm17 + vpslldq $8,%zmm12,%zmm12 + vpxorq %zmm17,%zmm13,%zmm13 + vpxorq %zmm15,%zmm12,%zmm12 + + + + vmovdqu64 POLY2(%rip),%zmm17 + + vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 + vpslldq $8,%zmm15,%zmm15 + vpxorq %zmm15,%zmm12,%zmm12 + + + + vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 + vpsrldq $4,%zmm15,%zmm15 + vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 + vpslldq $4,%zmm12,%zmm12 + + vpternlogq $0x96,%zmm15,%zmm13,%zmm12 + + vmovdqu64 %zmm12,384(%rsp) + + vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 + vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 + vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 + vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 + vpxorq %zmm17,%zmm10,%zmm10 + + vpsrldq $8,%zmm10,%zmm17 + vpslldq $8,%zmm10,%zmm10 + vpxorq %zmm17,%zmm13,%zmm13 + vpxorq %zmm15,%zmm10,%zmm10 + + + + vmovdqu64 POLY2(%rip),%zmm17 + + vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 + vpslldq $8,%zmm15,%zmm15 + vpxorq %zmm15,%zmm10,%zmm10 + + + + vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 + vpsrldq $4,%zmm15,%zmm15 + vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 + vpslldq $4,%zmm10,%zmm10 + + vpternlogq $0x96,%zmm15,%zmm13,%zmm10 + + vmovdqu64 %zmm10,320(%rsp) + + vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 + vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 + vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 + vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 + vpxorq %zmm17,%zmm12,%zmm12 + + vpsrldq $8,%zmm12,%zmm17 + vpslldq $8,%zmm12,%zmm12 + vpxorq %zmm17,%zmm13,%zmm13 + vpxorq %zmm15,%zmm12,%zmm12 + + + + vmovdqu64 POLY2(%rip),%zmm17 + + vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 + vpslldq $8,%zmm15,%zmm15 + vpxorq %zmm15,%zmm12,%zmm12 + + + + vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 + vpsrldq $4,%zmm15,%zmm15 + vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 + vpslldq $4,%zmm12,%zmm12 + + vpternlogq $0x96,%zmm15,%zmm13,%zmm12 + + vmovdqu64 %zmm12,256(%rsp) + + vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 + vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 + vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 + vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 + vpxorq %zmm17,%zmm10,%zmm10 + + vpsrldq $8,%zmm10,%zmm17 + vpslldq $8,%zmm10,%zmm10 + vpxorq %zmm17,%zmm13,%zmm13 + vpxorq %zmm15,%zmm10,%zmm10 + + + + vmovdqu64 POLY2(%rip),%zmm17 + + vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 + vpslldq $8,%zmm15,%zmm15 + vpxorq %zmm15,%zmm10,%zmm10 + + + + vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 + vpsrldq $4,%zmm15,%zmm15 + vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 + vpslldq $4,%zmm10,%zmm10 + + vpternlogq $0x96,%zmm15,%zmm13,%zmm10 + + vmovdqu64 %zmm10,192(%rsp) + + vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 + vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 + vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 + vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 + vpxorq %zmm17,%zmm12,%zmm12 + + vpsrldq $8,%zmm12,%zmm17 + vpslldq $8,%zmm12,%zmm12 + vpxorq %zmm17,%zmm13,%zmm13 + vpxorq %zmm15,%zmm12,%zmm12 + + + + vmovdqu64 POLY2(%rip),%zmm17 + + vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 + vpslldq $8,%zmm15,%zmm15 + vpxorq %zmm15,%zmm12,%zmm12 + + + + vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 + vpsrldq $4,%zmm15,%zmm15 + vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 + vpslldq $4,%zmm12,%zmm12 + + vpternlogq $0x96,%zmm15,%zmm13,%zmm12 + + vmovdqu64 %zmm12,128(%rsp) + + vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 + vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 + vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 + vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 + vpxorq %zmm17,%zmm10,%zmm10 + + vpsrldq $8,%zmm10,%zmm17 + vpslldq $8,%zmm10,%zmm10 + vpxorq %zmm17,%zmm13,%zmm13 + vpxorq %zmm15,%zmm10,%zmm10 + + + + vmovdqu64 POLY2(%rip),%zmm17 + + vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 + vpslldq $8,%zmm15,%zmm15 + vpxorq %zmm15,%zmm10,%zmm10 + + + + vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 + vpsrldq $4,%zmm15,%zmm15 + vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 + vpslldq $4,%zmm10,%zmm10 + + vpternlogq $0x96,%zmm15,%zmm13,%zmm10 + + vmovdqu64 %zmm10,64(%rsp) + + vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 + vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 + vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 + vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 + vpxorq %zmm17,%zmm12,%zmm12 + + vpsrldq $8,%zmm12,%zmm17 + vpslldq $8,%zmm12,%zmm12 + vpxorq %zmm17,%zmm13,%zmm13 + vpxorq %zmm15,%zmm12,%zmm12 + + + + vmovdqu64 POLY2(%rip),%zmm17 + + vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 + vpslldq $8,%zmm15,%zmm15 + vpxorq %zmm15,%zmm12,%zmm12 + + + + vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 + vpsrldq $4,%zmm15,%zmm15 + vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 + vpslldq $4,%zmm12,%zmm12 + + vpternlogq $0x96,%zmm15,%zmm13,%zmm12 + + vmovdqu64 %zmm12,0(%rsp) +.L_skip_hkeys_precomputation_7: + movq $1,%rbx + vpxorq %zmm14,%zmm11,%zmm11 + vmovdqu64 0(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 + vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 + vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 + vmovdqu64 64(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 + vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 + vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 + vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 + vpxorq %zmm17,%zmm10,%zmm7 + vpxorq %zmm13,%zmm1,%zmm6 + vpxorq %zmm15,%zmm9,%zmm8 + vpternlogq $0x96,%zmm18,%zmm12,%zmm7 + vmovdqu64 128(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 + vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 + vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 + vmovdqu64 192(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 + vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 + vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 + vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 + + vpternlogq $0x96,%zmm17,%zmm10,%zmm7 + vpternlogq $0x96,%zmm13,%zmm1,%zmm6 + vpternlogq $0x96,%zmm15,%zmm9,%zmm8 + vpternlogq $0x96,%zmm18,%zmm12,%zmm7 + vmovdqu64 256(%r10),%zmm11 + vmovdqu64 320(%r10),%zmm3 + vmovdqu64 384(%r10),%zmm4 + vmovdqu64 448(%r10),%zmm5 + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %zmm16,%zmm4,%zmm4 + vpshufb %zmm16,%zmm5,%zmm5 + vmovdqu64 256(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 + vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 + vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 + vmovdqu64 320(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 + vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 + vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 + vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 + vpternlogq $0x96,%zmm17,%zmm10,%zmm7 + vpternlogq $0x96,%zmm13,%zmm1,%zmm6 + vpternlogq $0x96,%zmm15,%zmm9,%zmm8 + vpternlogq $0x96,%zmm18,%zmm12,%zmm7 + vmovdqu64 384(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 + vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 + vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 + vmovdqu64 448(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 + vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 + vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 + vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 + + vpternlogq $0x96,%zmm17,%zmm10,%zmm7 + vpternlogq $0x96,%zmm13,%zmm1,%zmm6 + vpternlogq $0x96,%zmm15,%zmm9,%zmm8 + vpternlogq $0x96,%zmm18,%zmm12,%zmm7 + vmovdqu64 512(%r10),%zmm11 + vmovdqu64 576(%r10),%zmm3 + vmovdqu64 640(%r10),%zmm4 + vmovdqu64 704(%r10),%zmm5 + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %zmm16,%zmm4,%zmm4 + vpshufb %zmm16,%zmm5,%zmm5 + vmovdqu64 512(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 + vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 + vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 + vmovdqu64 576(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 + vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 + vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 + vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 + vpternlogq $0x96,%zmm17,%zmm10,%zmm7 + vpternlogq $0x96,%zmm13,%zmm1,%zmm6 + vpternlogq $0x96,%zmm15,%zmm9,%zmm8 + vpternlogq $0x96,%zmm18,%zmm12,%zmm7 + vmovdqu64 640(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 + vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 + vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 + vmovdqu64 704(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 + vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 + vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 + vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 + + vpternlogq $0x96,%zmm17,%zmm10,%zmm7 + vpternlogq $0x96,%zmm13,%zmm1,%zmm6 + vpternlogq $0x96,%zmm15,%zmm9,%zmm8 + vpternlogq $0x96,%zmm18,%zmm12,%zmm7 + + vpsrldq $8,%zmm7,%zmm1 + vpslldq $8,%zmm7,%zmm9 + vpxorq %zmm1,%zmm6,%zmm6 + vpxorq %zmm9,%zmm8,%zmm8 + vextracti64x4 $1,%zmm6,%ymm1 + vpxorq %ymm1,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm1 + vpxorq %xmm1,%xmm6,%xmm6 + vextracti64x4 $1,%zmm8,%ymm9 + vpxorq %ymm9,%ymm8,%ymm8 + vextracti32x4 $1,%ymm8,%xmm9 + vpxorq %xmm9,%xmm8,%xmm8 + vmovdqa64 POLY2(%rip),%xmm10 + + + vpclmulqdq $0x01,%xmm8,%xmm10,%xmm1 + vpslldq $8,%xmm1,%xmm1 + vpxorq %xmm1,%xmm8,%xmm1 + + + vpclmulqdq $0x00,%xmm1,%xmm10,%xmm9 + vpsrldq $4,%xmm9,%xmm9 + vpclmulqdq $0x10,%xmm1,%xmm10,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm6,%xmm9,%xmm14 + + subq $768,%r11 + je .L_CALC_AAD_done_6 + + addq $768,%r10 + jmp .L_get_AAD_loop48x16_6 + +.L_exit_AAD_loop48x16_6: + + cmpq $512,%r11 + jl .L_less_than_32x16_6 + + vmovdqu64 0(%r10),%zmm11 + vmovdqu64 64(%r10),%zmm3 + vmovdqu64 128(%r10),%zmm4 + vmovdqu64 192(%r10),%zmm5 + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %zmm16,%zmm4,%zmm4 + vpshufb %zmm16,%zmm5,%zmm5 + testq %rbx,%rbx + jnz .L_skip_hkeys_precomputation_8 + + vmovdqu64 288(%rdi),%zmm1 + vmovdqu64 %zmm1,704(%rsp) + + vmovdqu64 224(%rdi),%zmm9 + vmovdqu64 %zmm9,640(%rsp) + + + vshufi64x2 $0x00,%zmm9,%zmm9,%zmm9 + + vmovdqu64 160(%rdi),%zmm10 + vmovdqu64 %zmm10,576(%rsp) + + vmovdqu64 96(%rdi),%zmm12 + vmovdqu64 %zmm12,512(%rsp) + + vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 + vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 + vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 + vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 + vpxorq %zmm17,%zmm10,%zmm10 + + vpsrldq $8,%zmm10,%zmm17 + vpslldq $8,%zmm10,%zmm10 + vpxorq %zmm17,%zmm13,%zmm13 + vpxorq %zmm15,%zmm10,%zmm10 + + + + vmovdqu64 POLY2(%rip),%zmm17 + + vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 + vpslldq $8,%zmm15,%zmm15 + vpxorq %zmm15,%zmm10,%zmm10 + + + + vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 + vpsrldq $4,%zmm15,%zmm15 + vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 + vpslldq $4,%zmm10,%zmm10 + + vpternlogq $0x96,%zmm15,%zmm13,%zmm10 + + vmovdqu64 %zmm10,448(%rsp) + + vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 + vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 + vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 + vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 + vpxorq %zmm17,%zmm12,%zmm12 + + vpsrldq $8,%zmm12,%zmm17 + vpslldq $8,%zmm12,%zmm12 + vpxorq %zmm17,%zmm13,%zmm13 + vpxorq %zmm15,%zmm12,%zmm12 + + + + vmovdqu64 POLY2(%rip),%zmm17 + + vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 + vpslldq $8,%zmm15,%zmm15 + vpxorq %zmm15,%zmm12,%zmm12 + + + + vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 + vpsrldq $4,%zmm15,%zmm15 + vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 + vpslldq $4,%zmm12,%zmm12 + + vpternlogq $0x96,%zmm15,%zmm13,%zmm12 + + vmovdqu64 %zmm12,384(%rsp) + + vpclmulqdq $0x11,%zmm9,%zmm10,%zmm13 + vpclmulqdq $0x00,%zmm9,%zmm10,%zmm15 + vpclmulqdq $0x01,%zmm9,%zmm10,%zmm17 + vpclmulqdq $0x10,%zmm9,%zmm10,%zmm10 + vpxorq %zmm17,%zmm10,%zmm10 + + vpsrldq $8,%zmm10,%zmm17 + vpslldq $8,%zmm10,%zmm10 + vpxorq %zmm17,%zmm13,%zmm13 + vpxorq %zmm15,%zmm10,%zmm10 + + + + vmovdqu64 POLY2(%rip),%zmm17 + + vpclmulqdq $0x01,%zmm10,%zmm17,%zmm15 + vpslldq $8,%zmm15,%zmm15 + vpxorq %zmm15,%zmm10,%zmm10 + + + + vpclmulqdq $0x00,%zmm10,%zmm17,%zmm15 + vpsrldq $4,%zmm15,%zmm15 + vpclmulqdq $0x10,%zmm10,%zmm17,%zmm10 + vpslldq $4,%zmm10,%zmm10 + + vpternlogq $0x96,%zmm15,%zmm13,%zmm10 + + vmovdqu64 %zmm10,320(%rsp) + + vpclmulqdq $0x11,%zmm9,%zmm12,%zmm13 + vpclmulqdq $0x00,%zmm9,%zmm12,%zmm15 + vpclmulqdq $0x01,%zmm9,%zmm12,%zmm17 + vpclmulqdq $0x10,%zmm9,%zmm12,%zmm12 + vpxorq %zmm17,%zmm12,%zmm12 + + vpsrldq $8,%zmm12,%zmm17 + vpslldq $8,%zmm12,%zmm12 + vpxorq %zmm17,%zmm13,%zmm13 + vpxorq %zmm15,%zmm12,%zmm12 + + + + vmovdqu64 POLY2(%rip),%zmm17 + + vpclmulqdq $0x01,%zmm12,%zmm17,%zmm15 + vpslldq $8,%zmm15,%zmm15 + vpxorq %zmm15,%zmm12,%zmm12 + + + + vpclmulqdq $0x00,%zmm12,%zmm17,%zmm15 + vpsrldq $4,%zmm15,%zmm15 + vpclmulqdq $0x10,%zmm12,%zmm17,%zmm12 + vpslldq $4,%zmm12,%zmm12 + + vpternlogq $0x96,%zmm15,%zmm13,%zmm12 + + vmovdqu64 %zmm12,256(%rsp) +.L_skip_hkeys_precomputation_8: + movq $1,%rbx + vpxorq %zmm14,%zmm11,%zmm11 + vmovdqu64 256(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 + vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 + vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 + vmovdqu64 320(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 + vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 + vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 + vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 + vpxorq %zmm17,%zmm10,%zmm7 + vpxorq %zmm13,%zmm1,%zmm6 + vpxorq %zmm15,%zmm9,%zmm8 + vpternlogq $0x96,%zmm18,%zmm12,%zmm7 + vmovdqu64 384(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 + vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 + vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 + vmovdqu64 448(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 + vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 + vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 + vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 + + vpternlogq $0x96,%zmm17,%zmm10,%zmm7 + vpternlogq $0x96,%zmm13,%zmm1,%zmm6 + vpternlogq $0x96,%zmm15,%zmm9,%zmm8 + vpternlogq $0x96,%zmm18,%zmm12,%zmm7 + vmovdqu64 256(%r10),%zmm11 + vmovdqu64 320(%r10),%zmm3 + vmovdqu64 384(%r10),%zmm4 + vmovdqu64 448(%r10),%zmm5 + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %zmm16,%zmm4,%zmm4 + vpshufb %zmm16,%zmm5,%zmm5 + vmovdqu64 512(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 + vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 + vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 + vmovdqu64 576(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 + vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 + vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 + vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 + vpternlogq $0x96,%zmm17,%zmm10,%zmm7 + vpternlogq $0x96,%zmm13,%zmm1,%zmm6 + vpternlogq $0x96,%zmm15,%zmm9,%zmm8 + vpternlogq $0x96,%zmm18,%zmm12,%zmm7 + vmovdqu64 640(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 + vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 + vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 + vmovdqu64 704(%rsp),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 + vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 + vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 + vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 + + vpternlogq $0x96,%zmm17,%zmm10,%zmm7 + vpternlogq $0x96,%zmm13,%zmm1,%zmm6 + vpternlogq $0x96,%zmm15,%zmm9,%zmm8 + vpternlogq $0x96,%zmm18,%zmm12,%zmm7 + + vpsrldq $8,%zmm7,%zmm1 + vpslldq $8,%zmm7,%zmm9 + vpxorq %zmm1,%zmm6,%zmm6 + vpxorq %zmm9,%zmm8,%zmm8 + vextracti64x4 $1,%zmm6,%ymm1 + vpxorq %ymm1,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm1 + vpxorq %xmm1,%xmm6,%xmm6 + vextracti64x4 $1,%zmm8,%ymm9 + vpxorq %ymm9,%ymm8,%ymm8 + vextracti32x4 $1,%ymm8,%xmm9 + vpxorq %xmm9,%xmm8,%xmm8 + vmovdqa64 POLY2(%rip),%xmm10 + + + vpclmulqdq $0x01,%xmm8,%xmm10,%xmm1 + vpslldq $8,%xmm1,%xmm1 + vpxorq %xmm1,%xmm8,%xmm1 + + + vpclmulqdq $0x00,%xmm1,%xmm10,%xmm9 + vpsrldq $4,%xmm9,%xmm9 + vpclmulqdq $0x10,%xmm1,%xmm10,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm6,%xmm9,%xmm14 + + subq $512,%r11 + je .L_CALC_AAD_done_6 + + addq $512,%r10 + jmp .L_less_than_16x16_6 + +.L_less_than_32x16_6: + cmpq $256,%r11 + jl .L_less_than_16x16_6 + + vmovdqu64 0(%r10),%zmm11 + vmovdqu64 64(%r10),%zmm3 + vmovdqu64 128(%r10),%zmm4 + vmovdqu64 192(%r10),%zmm5 + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %zmm16,%zmm4,%zmm4 + vpshufb %zmm16,%zmm5,%zmm5 + vpxorq %zmm14,%zmm11,%zmm11 + vmovdqu64 96(%rdi),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm19,%zmm11,%zmm9 + vpclmulqdq $0x01,%zmm19,%zmm11,%zmm10 + vpclmulqdq $0x10,%zmm19,%zmm11,%zmm12 + vmovdqu64 160(%rdi),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm3,%zmm13 + vpclmulqdq $0x00,%zmm19,%zmm3,%zmm15 + vpclmulqdq $0x01,%zmm19,%zmm3,%zmm17 + vpclmulqdq $0x10,%zmm19,%zmm3,%zmm18 + vpxorq %zmm17,%zmm10,%zmm7 + vpxorq %zmm13,%zmm1,%zmm6 + vpxorq %zmm15,%zmm9,%zmm8 + vpternlogq $0x96,%zmm18,%zmm12,%zmm7 + vmovdqu64 224(%rdi),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm4,%zmm1 + vpclmulqdq $0x00,%zmm19,%zmm4,%zmm9 + vpclmulqdq $0x01,%zmm19,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm19,%zmm4,%zmm12 + vmovdqu64 288(%rdi),%zmm19 + vpclmulqdq $0x11,%zmm19,%zmm5,%zmm13 + vpclmulqdq $0x00,%zmm19,%zmm5,%zmm15 + vpclmulqdq $0x01,%zmm19,%zmm5,%zmm17 + vpclmulqdq $0x10,%zmm19,%zmm5,%zmm18 + + vpternlogq $0x96,%zmm17,%zmm10,%zmm7 + vpternlogq $0x96,%zmm13,%zmm1,%zmm6 + vpternlogq $0x96,%zmm15,%zmm9,%zmm8 + vpternlogq $0x96,%zmm18,%zmm12,%zmm7 + + vpsrldq $8,%zmm7,%zmm1 + vpslldq $8,%zmm7,%zmm9 + vpxorq %zmm1,%zmm6,%zmm6 + vpxorq %zmm9,%zmm8,%zmm8 + vextracti64x4 $1,%zmm6,%ymm1 + vpxorq %ymm1,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm1 + vpxorq %xmm1,%xmm6,%xmm6 + vextracti64x4 $1,%zmm8,%ymm9 + vpxorq %ymm9,%ymm8,%ymm8 + vextracti32x4 $1,%ymm8,%xmm9 + vpxorq %xmm9,%xmm8,%xmm8 + vmovdqa64 POLY2(%rip),%xmm10 + + + vpclmulqdq $0x01,%xmm8,%xmm10,%xmm1 + vpslldq $8,%xmm1,%xmm1 + vpxorq %xmm1,%xmm8,%xmm1 + + + vpclmulqdq $0x00,%xmm1,%xmm10,%xmm9 + vpsrldq $4,%xmm9,%xmm9 + vpclmulqdq $0x10,%xmm1,%xmm10,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm6,%xmm9,%xmm14 + + subq $256,%r11 + je .L_CALC_AAD_done_6 + + addq $256,%r10 + +.L_less_than_16x16_6: + + leaq byte64_len_to_mask_table(%rip),%r12 + leaq (%r12,%r11,8),%r12 + + + addl $15,%r11d + shrl $4,%r11d + cmpl $2,%r11d + jb .L_AAD_blocks_1_6 + je .L_AAD_blocks_2_6 + cmpl $4,%r11d + jb .L_AAD_blocks_3_6 + je .L_AAD_blocks_4_6 + cmpl $6,%r11d + jb .L_AAD_blocks_5_6 + je .L_AAD_blocks_6_6 + cmpl $8,%r11d + jb .L_AAD_blocks_7_6 + je .L_AAD_blocks_8_6 + cmpl $10,%r11d + jb .L_AAD_blocks_9_6 + je .L_AAD_blocks_10_6 + cmpl $12,%r11d + jb .L_AAD_blocks_11_6 + je .L_AAD_blocks_12_6 + cmpl $14,%r11d + jb .L_AAD_blocks_13_6 + je .L_AAD_blocks_14_6 + cmpl $15,%r11d + je .L_AAD_blocks_15_6 +.L_AAD_blocks_16_6: + subq $1536,%r12 + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11 + vmovdqu8 64(%r10),%zmm3 + vmovdqu8 128(%r10),%zmm4 + vmovdqu8 192(%r10),%zmm5{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %zmm16,%zmm4,%zmm4 + vpshufb %zmm16,%zmm5,%zmm5 + vpxorq %zmm14,%zmm11,%zmm11 + vmovdqu64 96(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 + vmovdqu64 160(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 + vmovdqu64 224(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 + vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 + vpternlogq $0x96,%zmm9,%zmm11,%zmm1 + vpternlogq $0x96,%zmm10,%zmm3,%zmm6 + vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 + vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 + vpternlogq $0x96,%zmm12,%zmm11,%zmm7 + vpternlogq $0x96,%zmm13,%zmm3,%zmm8 + vmovdqu64 288(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm5,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm5,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm5,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm5,%zmm13 + vpxorq %zmm9,%zmm1,%zmm9 + vpxorq %zmm10,%zmm6,%zmm10 + vpxorq %zmm12,%zmm7,%zmm12 + vpxorq %zmm13,%zmm8,%zmm13 + + vpxorq %zmm13,%zmm12,%zmm12 + vpsrldq $8,%zmm12,%zmm7 + vpslldq $8,%zmm12,%zmm8 + vpxorq %zmm7,%zmm9,%zmm1 + vpxorq %zmm8,%zmm10,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm1,%xmm8,%xmm14 + + jmp .L_CALC_AAD_done_6 +.L_AAD_blocks_15_6: + subq $1536,%r12 + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11 + vmovdqu8 64(%r10),%zmm3 + vmovdqu8 128(%r10),%zmm4 + vmovdqu8 192(%r10),%zmm5{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %zmm16,%zmm4,%zmm4 + vpshufb %zmm16,%zmm5,%zmm5 + vpxorq %zmm14,%zmm11,%zmm11 + vmovdqu64 112(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 + vmovdqu64 176(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 + vmovdqu64 240(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 + vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 + vpternlogq $0x96,%zmm1,%zmm11,%zmm9 + vpternlogq $0x96,%zmm6,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 + vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 + vpternlogq $0x96,%zmm7,%zmm11,%zmm12 + vpternlogq $0x96,%zmm8,%zmm3,%zmm13 + vmovdqu64 304(%rdi),%ymm15 + vinserti64x2 $2,336(%rdi),%zmm15,%zmm15 + vpclmulqdq $0x01,%zmm15,%zmm5,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm5,%zmm8 + vpclmulqdq $0x11,%zmm15,%zmm5,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm5,%zmm6 + + vpxorq %zmm12,%zmm7,%zmm7 + vpxorq %zmm13,%zmm8,%zmm8 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm6,%zmm6 + + vpxorq %zmm8,%zmm7,%zmm7 + vpsrldq $8,%zmm7,%zmm12 + vpslldq $8,%zmm7,%zmm13 + vpxorq %zmm12,%zmm1,%zmm1 + vpxorq %zmm13,%zmm6,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm1,%xmm8,%xmm14 + + jmp .L_CALC_AAD_done_6 +.L_AAD_blocks_14_6: + subq $1536,%r12 + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11 + vmovdqu8 64(%r10),%zmm3 + vmovdqu8 128(%r10),%zmm4 + vmovdqu8 192(%r10),%ymm5{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %zmm16,%zmm4,%zmm4 + vpshufb %ymm16,%ymm5,%ymm5 + vpxorq %zmm14,%zmm11,%zmm11 + vmovdqu64 128(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 + vmovdqu64 192(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 + vmovdqu64 256(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 + vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 + vpternlogq $0x96,%zmm1,%zmm11,%zmm9 + vpternlogq $0x96,%zmm6,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 + vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 + vpternlogq $0x96,%zmm7,%zmm11,%zmm12 + vpternlogq $0x96,%zmm8,%zmm3,%zmm13 + vmovdqu64 320(%rdi),%ymm15 + vpclmulqdq $0x01,%ymm15,%ymm5,%ymm7 + vpclmulqdq $0x10,%ymm15,%ymm5,%ymm8 + vpclmulqdq $0x11,%ymm15,%ymm5,%ymm1 + vpclmulqdq $0x00,%ymm15,%ymm5,%ymm6 + + vpxorq %zmm12,%zmm7,%zmm7 + vpxorq %zmm13,%zmm8,%zmm8 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm6,%zmm6 + + vpxorq %zmm8,%zmm7,%zmm7 + vpsrldq $8,%zmm7,%zmm12 + vpslldq $8,%zmm7,%zmm13 + vpxorq %zmm12,%zmm1,%zmm1 + vpxorq %zmm13,%zmm6,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm1,%xmm8,%xmm14 + + jmp .L_CALC_AAD_done_6 +.L_AAD_blocks_13_6: + subq $1536,%r12 + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11 + vmovdqu8 64(%r10),%zmm3 + vmovdqu8 128(%r10),%zmm4 + vmovdqu8 192(%r10),%xmm5{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %zmm16,%zmm4,%zmm4 + vpshufb %xmm16,%xmm5,%xmm5 + vpxorq %zmm14,%zmm11,%zmm11 + vmovdqu64 144(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 + vmovdqu64 208(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 + vmovdqu64 272(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 + vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 + vpternlogq $0x96,%zmm1,%zmm11,%zmm9 + vpternlogq $0x96,%zmm6,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 + vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 + vpternlogq $0x96,%zmm7,%zmm11,%zmm12 + vpternlogq $0x96,%zmm8,%zmm3,%zmm13 + vmovdqu64 336(%rdi),%xmm15 + vpclmulqdq $0x01,%xmm15,%xmm5,%xmm7 + vpclmulqdq $0x10,%xmm15,%xmm5,%xmm8 + vpclmulqdq $0x11,%xmm15,%xmm5,%xmm1 + vpclmulqdq $0x00,%xmm15,%xmm5,%xmm6 + + vpxorq %zmm12,%zmm7,%zmm7 + vpxorq %zmm13,%zmm8,%zmm8 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm6,%zmm6 + + vpxorq %zmm8,%zmm7,%zmm7 + vpsrldq $8,%zmm7,%zmm12 + vpslldq $8,%zmm7,%zmm13 + vpxorq %zmm12,%zmm1,%zmm1 + vpxorq %zmm13,%zmm6,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm1,%xmm8,%xmm14 + + jmp .L_CALC_AAD_done_6 +.L_AAD_blocks_12_6: + subq $1024,%r12 + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11 + vmovdqu8 64(%r10),%zmm3 + vmovdqu8 128(%r10),%zmm4{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %zmm16,%zmm4,%zmm4 + vpxorq %zmm14,%zmm11,%zmm11 + vmovdqu64 160(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 + vmovdqu64 224(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 + vmovdqu64 288(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm4,%zmm11 + vpclmulqdq $0x00,%zmm15,%zmm4,%zmm3 + vpternlogq $0x96,%zmm1,%zmm11,%zmm9 + vpternlogq $0x96,%zmm6,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm4,%zmm11 + vpclmulqdq $0x10,%zmm15,%zmm4,%zmm3 + vpternlogq $0x96,%zmm7,%zmm11,%zmm12 + vpternlogq $0x96,%zmm8,%zmm3,%zmm13 + + vpxorq %zmm13,%zmm12,%zmm12 + vpsrldq $8,%zmm12,%zmm7 + vpslldq $8,%zmm12,%zmm8 + vpxorq %zmm7,%zmm9,%zmm1 + vpxorq %zmm8,%zmm10,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm1,%xmm8,%xmm14 + + jmp .L_CALC_AAD_done_6 +.L_AAD_blocks_11_6: + subq $1024,%r12 + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11 + vmovdqu8 64(%r10),%zmm3 + vmovdqu8 128(%r10),%zmm4{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %zmm16,%zmm4,%zmm4 + vpxorq %zmm14,%zmm11,%zmm11 + vmovdqu64 176(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 + vmovdqu64 240(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 + vpxorq %zmm9,%zmm1,%zmm9 + vpxorq %zmm10,%zmm6,%zmm10 + vpxorq %zmm12,%zmm7,%zmm12 + vpxorq %zmm13,%zmm8,%zmm13 + vmovdqu64 304(%rdi),%ymm15 + vinserti64x2 $2,336(%rdi),%zmm15,%zmm15 + vpclmulqdq $0x01,%zmm15,%zmm4,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm4,%zmm8 + vpclmulqdq $0x11,%zmm15,%zmm4,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm4,%zmm6 + + vpxorq %zmm12,%zmm7,%zmm7 + vpxorq %zmm13,%zmm8,%zmm8 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm6,%zmm6 + + vpxorq %zmm8,%zmm7,%zmm7 + vpsrldq $8,%zmm7,%zmm12 + vpslldq $8,%zmm7,%zmm13 + vpxorq %zmm12,%zmm1,%zmm1 + vpxorq %zmm13,%zmm6,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm1,%xmm8,%xmm14 + + jmp .L_CALC_AAD_done_6 +.L_AAD_blocks_10_6: + subq $1024,%r12 + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11 + vmovdqu8 64(%r10),%zmm3 + vmovdqu8 128(%r10),%ymm4{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %ymm16,%ymm4,%ymm4 + vpxorq %zmm14,%zmm11,%zmm11 + vmovdqu64 192(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 + vmovdqu64 256(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 + vpxorq %zmm9,%zmm1,%zmm9 + vpxorq %zmm10,%zmm6,%zmm10 + vpxorq %zmm12,%zmm7,%zmm12 + vpxorq %zmm13,%zmm8,%zmm13 + vmovdqu64 320(%rdi),%ymm15 + vpclmulqdq $0x01,%ymm15,%ymm4,%ymm7 + vpclmulqdq $0x10,%ymm15,%ymm4,%ymm8 + vpclmulqdq $0x11,%ymm15,%ymm4,%ymm1 + vpclmulqdq $0x00,%ymm15,%ymm4,%ymm6 + + vpxorq %zmm12,%zmm7,%zmm7 + vpxorq %zmm13,%zmm8,%zmm8 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm6,%zmm6 + + vpxorq %zmm8,%zmm7,%zmm7 + vpsrldq $8,%zmm7,%zmm12 + vpslldq $8,%zmm7,%zmm13 + vpxorq %zmm12,%zmm1,%zmm1 + vpxorq %zmm13,%zmm6,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm1,%xmm8,%xmm14 + + jmp .L_CALC_AAD_done_6 +.L_AAD_blocks_9_6: + subq $1024,%r12 + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11 + vmovdqu8 64(%r10),%zmm3 + vmovdqu8 128(%r10),%xmm4{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpshufb %xmm16,%xmm4,%xmm4 + vpxorq %zmm14,%zmm11,%zmm11 + vmovdqu64 208(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 + vmovdqu64 272(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 + vpxorq %zmm9,%zmm1,%zmm9 + vpxorq %zmm10,%zmm6,%zmm10 + vpxorq %zmm12,%zmm7,%zmm12 + vpxorq %zmm13,%zmm8,%zmm13 + vmovdqu64 336(%rdi),%xmm15 + vpclmulqdq $0x01,%xmm15,%xmm4,%xmm7 + vpclmulqdq $0x10,%xmm15,%xmm4,%xmm8 + vpclmulqdq $0x11,%xmm15,%xmm4,%xmm1 + vpclmulqdq $0x00,%xmm15,%xmm4,%xmm6 + + vpxorq %zmm12,%zmm7,%zmm7 + vpxorq %zmm13,%zmm8,%zmm8 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm6,%zmm6 + + vpxorq %zmm8,%zmm7,%zmm7 + vpsrldq $8,%zmm7,%zmm12 + vpslldq $8,%zmm7,%zmm13 + vpxorq %zmm12,%zmm1,%zmm1 + vpxorq %zmm13,%zmm6,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm1,%xmm8,%xmm14 + + jmp .L_CALC_AAD_done_6 +.L_AAD_blocks_8_6: + subq $512,%r12 + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11 + vmovdqu8 64(%r10),%zmm3{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpxorq %zmm14,%zmm11,%zmm11 + vmovdqu64 224(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 + vmovdqu64 288(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm3,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm3,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm3,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm3,%zmm13 + vpxorq %zmm9,%zmm1,%zmm9 + vpxorq %zmm10,%zmm6,%zmm10 + vpxorq %zmm12,%zmm7,%zmm12 + vpxorq %zmm13,%zmm8,%zmm13 + + vpxorq %zmm13,%zmm12,%zmm12 + vpsrldq $8,%zmm12,%zmm7 + vpslldq $8,%zmm12,%zmm8 + vpxorq %zmm7,%zmm9,%zmm1 + vpxorq %zmm8,%zmm10,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm1,%xmm8,%xmm14 + + jmp .L_CALC_AAD_done_6 +.L_AAD_blocks_7_6: + subq $512,%r12 + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11 + vmovdqu8 64(%r10),%zmm3{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %zmm16,%zmm3,%zmm3 + vpxorq %zmm14,%zmm11,%zmm11 + vmovdqu64 240(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm13 + vmovdqu64 304(%rdi),%ymm15 + vinserti64x2 $2,336(%rdi),%zmm15,%zmm15 + vpclmulqdq $0x01,%zmm15,%zmm3,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm3,%zmm8 + vpclmulqdq $0x11,%zmm15,%zmm3,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm3,%zmm6 + + vpxorq %zmm12,%zmm7,%zmm7 + vpxorq %zmm13,%zmm8,%zmm8 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm6,%zmm6 + + vpxorq %zmm8,%zmm7,%zmm7 + vpsrldq $8,%zmm7,%zmm12 + vpslldq $8,%zmm7,%zmm13 + vpxorq %zmm12,%zmm1,%zmm1 + vpxorq %zmm13,%zmm6,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm1,%xmm8,%xmm14 + + jmp .L_CALC_AAD_done_6 +.L_AAD_blocks_6_6: + subq $512,%r12 + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11 + vmovdqu8 64(%r10),%ymm3{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %ymm16,%ymm3,%ymm3 + vpxorq %zmm14,%zmm11,%zmm11 + vmovdqu64 256(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm13 + vmovdqu64 320(%rdi),%ymm15 + vpclmulqdq $0x01,%ymm15,%ymm3,%ymm7 + vpclmulqdq $0x10,%ymm15,%ymm3,%ymm8 + vpclmulqdq $0x11,%ymm15,%ymm3,%ymm1 + vpclmulqdq $0x00,%ymm15,%ymm3,%ymm6 + + vpxorq %zmm12,%zmm7,%zmm7 + vpxorq %zmm13,%zmm8,%zmm8 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm6,%zmm6 + + vpxorq %zmm8,%zmm7,%zmm7 + vpsrldq $8,%zmm7,%zmm12 + vpslldq $8,%zmm7,%zmm13 + vpxorq %zmm12,%zmm1,%zmm1 + vpxorq %zmm13,%zmm6,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm1,%xmm8,%xmm14 + + jmp .L_CALC_AAD_done_6 +.L_AAD_blocks_5_6: + subq $512,%r12 + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11 + vmovdqu8 64(%r10),%xmm3{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpshufb %xmm16,%xmm3,%xmm3 + vpxorq %zmm14,%zmm11,%zmm11 + vmovdqu64 272(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm13 + vmovdqu64 336(%rdi),%xmm15 + vpclmulqdq $0x01,%xmm15,%xmm3,%xmm7 + vpclmulqdq $0x10,%xmm15,%xmm3,%xmm8 + vpclmulqdq $0x11,%xmm15,%xmm3,%xmm1 + vpclmulqdq $0x00,%xmm15,%xmm3,%xmm6 + + vpxorq %zmm12,%zmm7,%zmm7 + vpxorq %zmm13,%zmm8,%zmm8 + vpxorq %zmm9,%zmm1,%zmm1 + vpxorq %zmm10,%zmm6,%zmm6 + + vpxorq %zmm8,%zmm7,%zmm7 + vpsrldq $8,%zmm7,%zmm12 + vpslldq $8,%zmm7,%zmm13 + vpxorq %zmm12,%zmm1,%zmm1 + vpxorq %zmm13,%zmm6,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm1,%xmm8,%xmm14 + + jmp .L_CALC_AAD_done_6 +.L_AAD_blocks_4_6: + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpxorq %zmm14,%zmm11,%zmm11 + vmovdqu64 288(%rdi),%zmm15 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm9 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm10 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm12 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm13 + + vpxorq %zmm13,%zmm12,%zmm12 + vpsrldq $8,%zmm12,%zmm7 + vpslldq $8,%zmm12,%zmm8 + vpxorq %zmm7,%zmm9,%zmm1 + vpxorq %zmm8,%zmm10,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm1,%xmm8,%xmm14 + + jmp .L_CALC_AAD_done_6 +.L_AAD_blocks_3_6: + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%zmm11{%k1}{z} + vpshufb %zmm16,%zmm11,%zmm11 + vpxorq %zmm14,%zmm11,%zmm11 + vmovdqu64 304(%rdi),%ymm15 + vinserti64x2 $2,336(%rdi),%zmm15,%zmm15 + vpclmulqdq $0x01,%zmm15,%zmm11,%zmm7 + vpclmulqdq $0x10,%zmm15,%zmm11,%zmm8 + vpclmulqdq $0x11,%zmm15,%zmm11,%zmm1 + vpclmulqdq $0x00,%zmm15,%zmm11,%zmm6 + + vpxorq %zmm8,%zmm7,%zmm7 + vpsrldq $8,%zmm7,%zmm12 + vpslldq $8,%zmm7,%zmm13 + vpxorq %zmm12,%zmm1,%zmm1 + vpxorq %zmm13,%zmm6,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm1,%xmm8,%xmm14 + + jmp .L_CALC_AAD_done_6 +.L_AAD_blocks_2_6: + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%ymm11{%k1}{z} + vpshufb %ymm16,%ymm11,%ymm11 + vpxorq %zmm14,%zmm11,%zmm11 + vmovdqu64 320(%rdi),%ymm15 + vpclmulqdq $0x01,%ymm15,%ymm11,%ymm7 + vpclmulqdq $0x10,%ymm15,%ymm11,%ymm8 + vpclmulqdq $0x11,%ymm15,%ymm11,%ymm1 + vpclmulqdq $0x00,%ymm15,%ymm11,%ymm6 + + vpxorq %zmm8,%zmm7,%zmm7 + vpsrldq $8,%zmm7,%zmm12 + vpslldq $8,%zmm7,%zmm13 + vpxorq %zmm12,%zmm1,%zmm1 + vpxorq %zmm13,%zmm6,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm1,%xmm8,%xmm14 + + jmp .L_CALC_AAD_done_6 +.L_AAD_blocks_1_6: + kmovq (%r12),%k1 + vmovdqu8 0(%r10),%xmm11{%k1}{z} + vpshufb %xmm16,%xmm11,%xmm11 + vpxorq %zmm14,%zmm11,%zmm11 + vmovdqu64 336(%rdi),%xmm15 + vpclmulqdq $0x01,%xmm15,%xmm11,%xmm7 + vpclmulqdq $0x10,%xmm15,%xmm11,%xmm8 + vpclmulqdq $0x11,%xmm15,%xmm11,%xmm1 + vpclmulqdq $0x00,%xmm15,%xmm11,%xmm6 + + vpxorq %zmm8,%zmm7,%zmm7 + vpsrldq $8,%zmm7,%zmm12 + vpslldq $8,%zmm7,%zmm13 + vpxorq %zmm12,%zmm1,%zmm1 + vpxorq %zmm13,%zmm6,%zmm6 + vextracti64x4 $1,%zmm1,%ymm12 + vpxorq %ymm12,%ymm1,%ymm1 + vextracti32x4 $1,%ymm1,%xmm12 + vpxorq %xmm12,%xmm1,%xmm1 + vextracti64x4 $1,%zmm6,%ymm13 + vpxorq %ymm13,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm13 + vpxorq %xmm13,%xmm6,%xmm6 + vmovdqa64 POLY2(%rip),%xmm15 + + + vpclmulqdq $0x01,%xmm6,%xmm15,%xmm7 + vpslldq $8,%xmm7,%xmm7 + vpxorq %xmm7,%xmm6,%xmm7 + + + vpclmulqdq $0x00,%xmm7,%xmm15,%xmm8 + vpsrldq $4,%xmm8,%xmm8 + vpclmulqdq $0x10,%xmm7,%xmm15,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm1,%xmm8,%xmm14 + +.L_CALC_AAD_done_6: + vmovdqu64 %xmm14,64(%rdi) + cmpq $256,%rdx + jbe .Lskip_hkeys_cleanup_9 + vpxor %xmm0,%xmm0,%xmm0 + vmovdqa64 %zmm0,0(%rsp) + vmovdqa64 %zmm0,64(%rsp) + vmovdqa64 %zmm0,128(%rsp) + vmovdqa64 %zmm0,192(%rsp) + vmovdqa64 %zmm0,256(%rsp) + vmovdqa64 %zmm0,320(%rsp) + vmovdqa64 %zmm0,384(%rsp) + vmovdqa64 %zmm0,448(%rsp) + vmovdqa64 %zmm0,512(%rsp) + vmovdqa64 %zmm0,576(%rsp) + vmovdqa64 %zmm0,640(%rsp) + vmovdqa64 %zmm0,704(%rsp) +.Lskip_hkeys_cleanup_9: + vzeroupper + leaq (%rbp),%rsp +.cfi_def_cfa_register %rsp + popq %r15 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r15 + popq %r14 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r14 + popq %r13 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r13 + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + popq %rbp +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbp + popq %rbx +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbx +.Lexit_update_aad: + .byte 0xf3,0xc3 +.Lghash_seh_end: +.cfi_endproc +.size ossl_aes_gcm_update_aad_avx512, .-ossl_aes_gcm_update_aad_avx512 +.globl ossl_aes_gcm_encrypt_avx512 +.type ossl_aes_gcm_encrypt_avx512,@function +.align 32 +ossl_aes_gcm_encrypt_avx512: +.cfi_startproc +.Lencrypt_seh_begin: +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 +.Lencrypt_seh_push_rbx: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 +.Lencrypt_seh_push_rbp: + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 +.Lencrypt_seh_push_r12: + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 +.Lencrypt_seh_push_r13: + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 +.Lencrypt_seh_push_r14: + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lencrypt_seh_push_r15: + + + + + + + + + + + leaq 0(%rsp),%rbp +.cfi_def_cfa_register %rbp +.Lencrypt_seh_setfp: + +.Lencrypt_seh_prolog_end: + subq $1588,%rsp + andq $(-64),%rsp + + + movl 240(%rdi),%eax + cmpl $9,%eax + je .Laes_gcm_encrypt_128_avx512 + cmpl $11,%eax + je .Laes_gcm_encrypt_192_avx512 + cmpl $13,%eax + je .Laes_gcm_encrypt_256_avx512 + xorl %eax,%eax + jmp .Lexit_gcm_encrypt +.align 32 +.Laes_gcm_encrypt_128_avx512: + orq %r8,%r8 + je .L_enc_dec_done_10 + xorq %r14,%r14 + vmovdqu64 64(%rsi),%xmm14 + + movq (%rdx),%r11 + orq %r11,%r11 + je .L_partial_block_done_11 + movl $16,%r10d + leaq byte_len_to_mask_table(%rip),%r12 + cmpq %r10,%r8 + cmovcq %r8,%r10 + kmovw (%r12,%r10,2),%k1 + vmovdqu8 (%rcx),%xmm0{%k1}{z} + + vmovdqu64 16(%rsi),%xmm3 + vmovdqu64 336(%rsi),%xmm4 + + + + leaq SHIFT_MASK(%rip),%r12 + addq %r11,%r12 + vmovdqu64 (%r12),%xmm5 + vpshufb %xmm5,%xmm3,%xmm3 + vpxorq %xmm0,%xmm3,%xmm3 + + + leaq (%r8,%r11,1),%r13 + subq $16,%r13 + jge .L_no_extra_mask_11 + subq %r13,%r12 +.L_no_extra_mask_11: + + + + vmovdqu64 16(%r12),%xmm0 + vpand %xmm0,%xmm3,%xmm3 + vpshufb SHUF_MASK(%rip),%xmm3,%xmm3 + vpshufb %xmm5,%xmm3,%xmm3 + vpxorq %xmm3,%xmm14,%xmm14 + cmpq $0,%r13 + jl .L_partial_incomplete_11 + + vpclmulqdq $0x11,%xmm4,%xmm14,%xmm7 + vpclmulqdq $0x00,%xmm4,%xmm14,%xmm10 + vpclmulqdq $0x01,%xmm4,%xmm14,%xmm11 + vpclmulqdq $0x10,%xmm4,%xmm14,%xmm14 + vpxorq %xmm11,%xmm14,%xmm14 + + vpsrldq $8,%xmm14,%xmm11 + vpslldq $8,%xmm14,%xmm14 + vpxorq %xmm11,%xmm7,%xmm7 + vpxorq %xmm10,%xmm14,%xmm14 + + + + vmovdqu64 POLY2(%rip),%xmm11 + + vpclmulqdq $0x01,%xmm14,%xmm11,%xmm10 + vpslldq $8,%xmm10,%xmm10 + vpxorq %xmm10,%xmm14,%xmm14 + + + + vpclmulqdq $0x00,%xmm14,%xmm11,%xmm10 + vpsrldq $4,%xmm10,%xmm10 + vpclmulqdq $0x10,%xmm14,%xmm11,%xmm14 + vpslldq $4,%xmm14,%xmm14 + + vpternlogq $0x96,%xmm10,%xmm7,%xmm14 + + movq $0,(%rdx) + + movq %r11,%r12 + movq $16,%r11 + subq %r12,%r11 + jmp .L_enc_dec_done_11 + +.L_partial_incomplete_11: + addq %r8,(%rdx) + movq %r8,%r11 + +.L_enc_dec_done_11: + + + leaq byte_len_to_mask_table(%rip),%r12 + kmovw (%r12,%r11,2),%k1 + vmovdqu64 %xmm14,64(%rsi) + + vpshufb SHUF_MASK(%rip),%xmm3,%xmm3 + vpshufb %xmm5,%xmm3,%xmm3 + movq %r9,%r12 + vmovdqu8 %xmm3,(%r12){%k1} +.L_partial_block_done_11: + vmovdqu64 0(%rsi),%xmm2 + subq %r11,%r8 + je .L_enc_dec_done_10 + cmpq $256,%r8 + jbe .L_message_below_equal_16_blocks_10 + + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vmovdqa64 ddq_addbe_4444(%rip),%zmm27 + vmovdqa64 ddq_addbe_1234(%rip),%zmm28 + + + + + + + vmovd %xmm2,%r15d + andl $255,%r15d + + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpshufb %zmm29,%zmm2,%zmm2 + + + + cmpb $240,%r15b + jae .L_next_16_overflow_12 + vpaddd %zmm28,%zmm2,%zmm7 + vpaddd %zmm27,%zmm7,%zmm10 + vpaddd %zmm27,%zmm10,%zmm11 + vpaddd %zmm27,%zmm11,%zmm12 + jmp .L_next_16_ok_12 +.L_next_16_overflow_12: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm12 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 + vpaddd %zmm12,%zmm7,%zmm10 + vpaddd %zmm12,%zmm10,%zmm11 + vpaddd %zmm12,%zmm11,%zmm12 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %zmm29,%zmm11,%zmm11 + vpshufb %zmm29,%zmm12,%zmm12 +.L_next_16_ok_12: + vshufi64x2 $255,%zmm12,%zmm12,%zmm2 + addb $16,%r15b + + vmovdqu8 0(%rcx,%r11,1),%zmm0 + vmovdqu8 64(%rcx,%r11,1),%zmm3 + vmovdqu8 128(%rcx,%r11,1),%zmm4 + vmovdqu8 192(%rcx,%r11,1),%zmm5 + + + vbroadcastf64x2 0(%rdi),%zmm6 + vpxorq %zmm6,%zmm7,%zmm7 + vpxorq %zmm6,%zmm10,%zmm10 + vpxorq %zmm6,%zmm11,%zmm11 + vpxorq %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 16(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 32(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 48(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 64(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 80(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 96(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 112(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 128(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 144(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 160(%rdi),%zmm6 + vaesenclast %zmm6,%zmm7,%zmm7 + vaesenclast %zmm6,%zmm10,%zmm10 + vaesenclast %zmm6,%zmm11,%zmm11 + vaesenclast %zmm6,%zmm12,%zmm12 + + + vpxorq %zmm0,%zmm7,%zmm7 + vpxorq %zmm3,%zmm10,%zmm10 + vpxorq %zmm4,%zmm11,%zmm11 + vpxorq %zmm5,%zmm12,%zmm12 + + + movq %r9,%r10 + vmovdqu8 %zmm7,0(%r10,%r11,1) + vmovdqu8 %zmm10,64(%r10,%r11,1) + vmovdqu8 %zmm11,128(%r10,%r11,1) + vmovdqu8 %zmm12,192(%r10,%r11,1) + + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %zmm29,%zmm11,%zmm11 + vpshufb %zmm29,%zmm12,%zmm12 + vmovdqa64 %zmm7,768(%rsp) + vmovdqa64 %zmm10,832(%rsp) + vmovdqa64 %zmm11,896(%rsp) + vmovdqa64 %zmm12,960(%rsp) + testq %r14,%r14 + jnz .L_skip_hkeys_precomputation_13 + + vmovdqu64 288(%rsi),%zmm0 + vmovdqu64 %zmm0,704(%rsp) + + vmovdqu64 224(%rsi),%zmm3 + vmovdqu64 %zmm3,640(%rsp) + + + vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 + + vmovdqu64 160(%rsi),%zmm4 + vmovdqu64 %zmm4,576(%rsp) + + vmovdqu64 96(%rsi),%zmm5 + vmovdqu64 %zmm5,512(%rsp) +.L_skip_hkeys_precomputation_13: + cmpq $512,%r8 + jb .L_message_below_32_blocks_10 + + + + cmpb $240,%r15b + jae .L_next_16_overflow_14 + vpaddd %zmm28,%zmm2,%zmm7 + vpaddd %zmm27,%zmm7,%zmm10 + vpaddd %zmm27,%zmm10,%zmm11 + vpaddd %zmm27,%zmm11,%zmm12 + jmp .L_next_16_ok_14 +.L_next_16_overflow_14: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm12 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 + vpaddd %zmm12,%zmm7,%zmm10 + vpaddd %zmm12,%zmm10,%zmm11 + vpaddd %zmm12,%zmm11,%zmm12 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %zmm29,%zmm11,%zmm11 + vpshufb %zmm29,%zmm12,%zmm12 +.L_next_16_ok_14: + vshufi64x2 $255,%zmm12,%zmm12,%zmm2 + addb $16,%r15b + + vmovdqu8 256(%rcx,%r11,1),%zmm0 + vmovdqu8 320(%rcx,%r11,1),%zmm3 + vmovdqu8 384(%rcx,%r11,1),%zmm4 + vmovdqu8 448(%rcx,%r11,1),%zmm5 + + + vbroadcastf64x2 0(%rdi),%zmm6 + vpxorq %zmm6,%zmm7,%zmm7 + vpxorq %zmm6,%zmm10,%zmm10 + vpxorq %zmm6,%zmm11,%zmm11 + vpxorq %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 16(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 32(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 48(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 64(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 80(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 96(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 112(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 128(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 144(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 160(%rdi),%zmm6 + vaesenclast %zmm6,%zmm7,%zmm7 + vaesenclast %zmm6,%zmm10,%zmm10 + vaesenclast %zmm6,%zmm11,%zmm11 + vaesenclast %zmm6,%zmm12,%zmm12 + + + vpxorq %zmm0,%zmm7,%zmm7 + vpxorq %zmm3,%zmm10,%zmm10 + vpxorq %zmm4,%zmm11,%zmm11 + vpxorq %zmm5,%zmm12,%zmm12 + + + movq %r9,%r10 + vmovdqu8 %zmm7,256(%r10,%r11,1) + vmovdqu8 %zmm10,320(%r10,%r11,1) + vmovdqu8 %zmm11,384(%r10,%r11,1) + vmovdqu8 %zmm12,448(%r10,%r11,1) + + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %zmm29,%zmm11,%zmm11 + vpshufb %zmm29,%zmm12,%zmm12 + vmovdqa64 %zmm7,1024(%rsp) + vmovdqa64 %zmm10,1088(%rsp) + vmovdqa64 %zmm11,1152(%rsp) + vmovdqa64 %zmm12,1216(%rsp) + testq %r14,%r14 + jnz .L_skip_hkeys_precomputation_15 + vmovdqu64 640(%rsp),%zmm3 + + + vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 + + vmovdqu64 576(%rsp),%zmm4 + vmovdqu64 512(%rsp),%zmm5 + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,448(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,384(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,320(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,256(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,192(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,128(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,64(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,0(%rsp) +.L_skip_hkeys_precomputation_15: + movq $1,%r14 + addq $512,%r11 + subq $512,%r8 + + cmpq $768,%r8 + jb .L_no_more_big_nblocks_10 +.L_encrypt_big_nblocks_10: + cmpb $240,%r15b + jae .L_16_blocks_overflow_16 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_16 +.L_16_blocks_overflow_16: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_16: + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm15,%zmm10,%zmm26 + vpxorq %zmm12,%zmm6,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1) + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqa64 %zmm0,1280(%rsp) + vmovdqa64 %zmm3,1344(%rsp) + vmovdqa64 %zmm4,1408(%rsp) + vmovdqa64 %zmm5,1472(%rsp) + cmpb $240,%r15b + jae .L_16_blocks_overflow_17 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_17 +.L_16_blocks_overflow_17: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_17: + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 256(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 320(%rsp),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 384(%rsp),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 448(%rsp),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 256(%rcx,%r11,1),%zmm17 + vmovdqu8 320(%rcx,%r11,1),%zmm19 + vmovdqu8 384(%rcx,%r11,1),%zmm20 + vmovdqu8 448(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vpternlogq $0x96,%zmm12,%zmm6,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,256(%r10,%r11,1) + vmovdqu8 %zmm3,320(%r10,%r11,1) + vmovdqu8 %zmm4,384(%r10,%r11,1) + vmovdqu8 %zmm5,448(%r10,%r11,1) + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqa64 %zmm0,768(%rsp) + vmovdqa64 %zmm3,832(%rsp) + vmovdqa64 %zmm4,896(%rsp) + vmovdqa64 %zmm5,960(%rsp) + cmpb $240,%r15b + jae .L_16_blocks_overflow_18 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_18 +.L_16_blocks_overflow_18: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_18: + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 512(%rcx,%r11,1),%zmm17 + vmovdqu8 576(%rcx,%r11,1),%zmm19 + vmovdqu8 640(%rcx,%r11,1),%zmm20 + vmovdqu8 704(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + + + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpternlogq $0x96,%zmm15,%zmm12,%zmm6 + vpxorq %zmm24,%zmm6,%zmm6 + vpternlogq $0x96,%zmm10,%zmm13,%zmm7 + vpxorq %zmm25,%zmm7,%zmm7 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vextracti64x4 $1,%zmm6,%ymm12 + vpxorq %ymm12,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm12 + vpxorq %xmm12,%xmm6,%xmm6 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm6 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,512(%r10,%r11,1) + vmovdqu8 %zmm3,576(%r10,%r11,1) + vmovdqu8 %zmm4,640(%r10,%r11,1) + vmovdqu8 %zmm5,704(%r10,%r11,1) + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqa64 %zmm0,1024(%rsp) + vmovdqa64 %zmm3,1088(%rsp) + vmovdqa64 %zmm4,1152(%rsp) + vmovdqa64 %zmm5,1216(%rsp) + vmovdqa64 %zmm6,%zmm14 + + addq $768,%r11 + subq $768,%r8 + cmpq $768,%r8 + jae .L_encrypt_big_nblocks_10 + +.L_no_more_big_nblocks_10: + + cmpq $512,%r8 + jae .L_encrypt_32_blocks_10 + + cmpq $256,%r8 + jae .L_encrypt_16_blocks_10 +.L_encrypt_0_blocks_ghash_32_10: + movl %r8d,%r10d + andl $~15,%r10d + movl $256,%ebx + subl %r10d,%ebx + vmovdqa64 768(%rsp),%zmm13 + vpxorq %zmm14,%zmm13,%zmm13 + vmovdqu64 0(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 832(%rsp),%zmm13 + vmovdqu64 64(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpxorq %zmm10,%zmm4,%zmm26 + vpxorq %zmm6,%zmm0,%zmm24 + vpxorq %zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 896(%rsp),%zmm13 + vmovdqu64 128(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 960(%rsp),%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + addl $256,%ebx + movl %r8d,%r10d + addl $15,%r10d + shrl $4,%r10d + je .L_last_num_blocks_is_0_19 + + cmpl $8,%r10d + je .L_last_num_blocks_is_8_19 + jb .L_last_num_blocks_is_7_1_19 + + + cmpl $12,%r10d + je .L_last_num_blocks_is_12_19 + jb .L_last_num_blocks_is_11_9_19 + + + cmpl $15,%r10d + je .L_last_num_blocks_is_15_19 + ja .L_last_num_blocks_is_16_19 + cmpl $14,%r10d + je .L_last_num_blocks_is_14_19 + jmp .L_last_num_blocks_is_13_19 + +.L_last_num_blocks_is_11_9_19: + + cmpl $10,%r10d + je .L_last_num_blocks_is_10_19 + ja .L_last_num_blocks_is_11_19 + jmp .L_last_num_blocks_is_9_19 + +.L_last_num_blocks_is_7_1_19: + cmpl $4,%r10d + je .L_last_num_blocks_is_4_19 + jb .L_last_num_blocks_is_3_1_19 + + cmpl $6,%r10d + ja .L_last_num_blocks_is_7_19 + je .L_last_num_blocks_is_6_19 + jmp .L_last_num_blocks_is_5_19 + +.L_last_num_blocks_is_3_1_19: + + cmpl $2,%r10d + ja .L_last_num_blocks_is_3_19 + je .L_last_num_blocks_is_2_19 +.L_last_num_blocks_is_1_19: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $255,%r15d + jae .L_16_blocks_overflow_20 + vpaddd %xmm28,%xmm2,%xmm0 + jmp .L_16_blocks_ok_20 + +.L_16_blocks_overflow_20: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %xmm29,%xmm0,%xmm0 +.L_16_blocks_ok_20: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vaesenclast %xmm30,%xmm0,%xmm0 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %xmm29,%xmm0,%xmm17 + vextracti32x4 $0,%zmm17,%xmm7 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_21 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_21 +.L_small_initial_partial_block_21: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm0 + + + vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 + vpslldq $8,%xmm3,%xmm3 + vpxorq %xmm3,%xmm25,%xmm3 + + + vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 + vpsrldq $4,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm4,%xmm14 + + + + + + + + + + + + + vpxorq %xmm7,%xmm14,%xmm14 + + jmp .L_after_reduction_21 +.L_small_initial_compute_done_21: +.L_after_reduction_21: + jmp .L_last_blocks_done_19 +.L_last_num_blocks_is_2_19: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $254,%r15d + jae .L_16_blocks_overflow_22 + vpaddd %ymm28,%ymm2,%ymm0 + jmp .L_16_blocks_ok_22 + +.L_16_blocks_overflow_22: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %ymm29,%ymm0,%ymm0 +.L_16_blocks_ok_22: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vaesenclast %ymm30,%ymm0,%ymm0 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %ymm29,%ymm0,%ymm17 + vextracti32x4 $1,%zmm17,%xmm7 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_23 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_23 +.L_small_initial_partial_block_23: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_23: + + orq %r8,%r8 + je .L_after_reduction_23 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_23: + jmp .L_last_blocks_done_19 +.L_last_num_blocks_is_3_19: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $253,%r15d + jae .L_16_blocks_overflow_24 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_24 + +.L_16_blocks_overflow_24: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_24: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vextracti32x4 $2,%zmm17,%xmm7 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_25 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_25 +.L_small_initial_partial_block_25: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_25: + + orq %r8,%r8 + je .L_after_reduction_25 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_25: + jmp .L_last_blocks_done_19 +.L_last_num_blocks_is_4_19: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $252,%r15d + jae .L_16_blocks_overflow_26 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_26 + +.L_16_blocks_overflow_26: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_26: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vextracti32x4 $3,%zmm17,%xmm7 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_27 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_27 +.L_small_initial_partial_block_27: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_27: + + orq %r8,%r8 + je .L_after_reduction_27 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_27: + jmp .L_last_blocks_done_19 +.L_last_num_blocks_is_5_19: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $251,%r15d + jae .L_16_blocks_overflow_28 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %xmm27,%xmm0,%xmm3 + jmp .L_16_blocks_ok_28 + +.L_16_blocks_overflow_28: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 +.L_16_blocks_ok_28: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %xmm30,%xmm3,%xmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %xmm19,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %xmm29,%xmm3,%xmm19 + vextracti32x4 $0,%zmm19,%xmm7 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_29 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_29 +.L_small_initial_partial_block_29: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_29: + + orq %r8,%r8 + je .L_after_reduction_29 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_29: + jmp .L_last_blocks_done_19 +.L_last_num_blocks_is_6_19: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $250,%r15d + jae .L_16_blocks_overflow_30 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %ymm27,%ymm0,%ymm3 + jmp .L_16_blocks_ok_30 + +.L_16_blocks_overflow_30: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 +.L_16_blocks_ok_30: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %ymm30,%ymm3,%ymm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %ymm29,%ymm3,%ymm19 + vextracti32x4 $1,%zmm19,%xmm7 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_31 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_31 +.L_small_initial_partial_block_31: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_31: + + orq %r8,%r8 + je .L_after_reduction_31 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_31: + jmp .L_last_blocks_done_19 +.L_last_num_blocks_is_7_19: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $249,%r15d + jae .L_16_blocks_overflow_32 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_32 + +.L_16_blocks_overflow_32: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_32: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vextracti32x4 $2,%zmm19,%xmm7 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_33 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_33 +.L_small_initial_partial_block_33: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_33: + + orq %r8,%r8 + je .L_after_reduction_33 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_33: + jmp .L_last_blocks_done_19 +.L_last_num_blocks_is_8_19: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $248,%r15d + jae .L_16_blocks_overflow_34 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_34 + +.L_16_blocks_overflow_34: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_34: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vextracti32x4 $3,%zmm19,%xmm7 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_35 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_35 +.L_small_initial_partial_block_35: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_35: + + orq %r8,%r8 + je .L_after_reduction_35 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_35: + jmp .L_last_blocks_done_19 +.L_last_num_blocks_is_9_19: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $247,%r15d + jae .L_16_blocks_overflow_36 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %xmm27,%xmm3,%xmm4 + jmp .L_16_blocks_ok_36 + +.L_16_blocks_overflow_36: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 +.L_16_blocks_ok_36: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %xmm30,%xmm4,%xmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %xmm20,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %xmm29,%xmm4,%xmm20 + vextracti32x4 $0,%zmm20,%xmm7 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_37 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_37 +.L_small_initial_partial_block_37: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_37: + + orq %r8,%r8 + je .L_after_reduction_37 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_37: + jmp .L_last_blocks_done_19 +.L_last_num_blocks_is_10_19: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $246,%r15d + jae .L_16_blocks_overflow_38 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %ymm27,%ymm3,%ymm4 + jmp .L_16_blocks_ok_38 + +.L_16_blocks_overflow_38: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 +.L_16_blocks_ok_38: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %ymm30,%ymm4,%ymm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %ymm20,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %ymm29,%ymm4,%ymm20 + vextracti32x4 $1,%zmm20,%xmm7 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_39 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_39 +.L_small_initial_partial_block_39: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_39: + + orq %r8,%r8 + je .L_after_reduction_39 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_39: + jmp .L_last_blocks_done_19 +.L_last_num_blocks_is_11_19: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $245,%r15d + jae .L_16_blocks_overflow_40 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_40 + +.L_16_blocks_overflow_40: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_40: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vextracti32x4 $2,%zmm20,%xmm7 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_41 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_41 +.L_small_initial_partial_block_41: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_41: + + orq %r8,%r8 + je .L_after_reduction_41 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_41: + jmp .L_last_blocks_done_19 +.L_last_num_blocks_is_12_19: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $244,%r15d + jae .L_16_blocks_overflow_42 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_42 + +.L_16_blocks_overflow_42: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_42: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vextracti32x4 $3,%zmm20,%xmm7 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_43 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_43 +.L_small_initial_partial_block_43: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_43: + + orq %r8,%r8 + je .L_after_reduction_43 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_43: + jmp .L_last_blocks_done_19 +.L_last_num_blocks_is_13_19: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $243,%r15d + jae .L_16_blocks_overflow_44 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %xmm27,%xmm4,%xmm5 + jmp .L_16_blocks_ok_44 + +.L_16_blocks_overflow_44: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 +.L_16_blocks_ok_44: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %xmm30,%xmm5,%xmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %xmm21,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %xmm29,%xmm5,%xmm21 + vextracti32x4 $0,%zmm21,%xmm7 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_45 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_45 +.L_small_initial_partial_block_45: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_45: + + orq %r8,%r8 + je .L_after_reduction_45 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_45: + jmp .L_last_blocks_done_19 +.L_last_num_blocks_is_14_19: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $242,%r15d + jae .L_16_blocks_overflow_46 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %ymm27,%ymm4,%ymm5 + jmp .L_16_blocks_ok_46 + +.L_16_blocks_overflow_46: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 +.L_16_blocks_ok_46: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %ymm30,%ymm5,%ymm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %ymm21,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %ymm29,%ymm5,%ymm21 + vextracti32x4 $1,%zmm21,%xmm7 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_47 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_47 +.L_small_initial_partial_block_47: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_47: + + orq %r8,%r8 + je .L_after_reduction_47 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_47: + jmp .L_last_blocks_done_19 +.L_last_num_blocks_is_15_19: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $241,%r15d + jae .L_16_blocks_overflow_48 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_48 + +.L_16_blocks_overflow_48: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_48: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %zmm29,%zmm5,%zmm21 + vextracti32x4 $2,%zmm21,%xmm7 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_49 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_49 +.L_small_initial_partial_block_49: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_49: + + orq %r8,%r8 + je .L_after_reduction_49 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_49: + jmp .L_last_blocks_done_19 +.L_last_num_blocks_is_16_19: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $240,%r15d + jae .L_16_blocks_overflow_50 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_50 + +.L_16_blocks_overflow_50: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_50: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %zmm29,%zmm5,%zmm21 + vextracti32x4 $3,%zmm21,%xmm7 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_51: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_51: + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_51: + jmp .L_last_blocks_done_19 +.L_last_num_blocks_is_0_19: + vmovdqa64 1024(%rsp),%zmm13 + vmovdqu64 0(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1088(%rsp),%zmm13 + vmovdqu64 64(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 1152(%rsp),%zmm13 + vmovdqu64 128(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1216(%rsp),%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + +.L_last_blocks_done_19: + vpshufb %xmm29,%xmm2,%xmm2 + jmp .L_ghash_done_10 +.L_encrypt_32_blocks_10: + cmpb $240,%r15b + jae .L_16_blocks_overflow_52 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_52 +.L_16_blocks_overflow_52: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_52: + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm15,%zmm10,%zmm26 + vpxorq %zmm12,%zmm6,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1) + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqa64 %zmm0,1280(%rsp) + vmovdqa64 %zmm3,1344(%rsp) + vmovdqa64 %zmm4,1408(%rsp) + vmovdqa64 %zmm5,1472(%rsp) + cmpb $240,%r15b + jae .L_16_blocks_overflow_53 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_53 +.L_16_blocks_overflow_53: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_53: + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 256(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 320(%rsp),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 384(%rsp),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 448(%rsp),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 256(%rcx,%r11,1),%zmm17 + vmovdqu8 320(%rcx,%r11,1),%zmm19 + vmovdqu8 384(%rcx,%r11,1),%zmm20 + vmovdqu8 448(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vpternlogq $0x96,%zmm12,%zmm6,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,256(%r10,%r11,1) + vmovdqu8 %zmm3,320(%r10,%r11,1) + vmovdqu8 %zmm4,384(%r10,%r11,1) + vmovdqu8 %zmm5,448(%r10,%r11,1) + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqa64 %zmm0,768(%rsp) + vmovdqa64 %zmm3,832(%rsp) + vmovdqa64 %zmm4,896(%rsp) + vmovdqa64 %zmm5,960(%rsp) + vmovdqa64 1280(%rsp),%zmm13 + vmovdqu64 512(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1344(%rsp),%zmm13 + vmovdqu64 576(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 1408(%rsp),%zmm13 + vmovdqu64 640(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1472(%rsp),%zmm13 + vmovdqu64 704(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + + subq $512,%r8 + addq $512,%r11 + movl %r8d,%r10d + andl $~15,%r10d + movl $512,%ebx + subl %r10d,%ebx + movl %r8d,%r10d + addl $15,%r10d + shrl $4,%r10d + je .L_last_num_blocks_is_0_54 + + cmpl $8,%r10d + je .L_last_num_blocks_is_8_54 + jb .L_last_num_blocks_is_7_1_54 + + + cmpl $12,%r10d + je .L_last_num_blocks_is_12_54 + jb .L_last_num_blocks_is_11_9_54 + + + cmpl $15,%r10d + je .L_last_num_blocks_is_15_54 + ja .L_last_num_blocks_is_16_54 + cmpl $14,%r10d + je .L_last_num_blocks_is_14_54 + jmp .L_last_num_blocks_is_13_54 + +.L_last_num_blocks_is_11_9_54: + + cmpl $10,%r10d + je .L_last_num_blocks_is_10_54 + ja .L_last_num_blocks_is_11_54 + jmp .L_last_num_blocks_is_9_54 + +.L_last_num_blocks_is_7_1_54: + cmpl $4,%r10d + je .L_last_num_blocks_is_4_54 + jb .L_last_num_blocks_is_3_1_54 + + cmpl $6,%r10d + ja .L_last_num_blocks_is_7_54 + je .L_last_num_blocks_is_6_54 + jmp .L_last_num_blocks_is_5_54 + +.L_last_num_blocks_is_3_1_54: + + cmpl $2,%r10d + ja .L_last_num_blocks_is_3_54 + je .L_last_num_blocks_is_2_54 +.L_last_num_blocks_is_1_54: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $255,%r15d + jae .L_16_blocks_overflow_55 + vpaddd %xmm28,%xmm2,%xmm0 + jmp .L_16_blocks_ok_55 + +.L_16_blocks_overflow_55: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %xmm29,%xmm0,%xmm0 +.L_16_blocks_ok_55: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vaesenclast %xmm30,%xmm0,%xmm0 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %xmm29,%xmm0,%xmm17 + vextracti32x4 $0,%zmm17,%xmm7 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_56 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_56 +.L_small_initial_partial_block_56: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm0 + + + vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 + vpslldq $8,%xmm3,%xmm3 + vpxorq %xmm3,%xmm25,%xmm3 + + + vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 + vpsrldq $4,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm4,%xmm14 + + + + + + + + + + + + + vpxorq %xmm7,%xmm14,%xmm14 + + jmp .L_after_reduction_56 +.L_small_initial_compute_done_56: +.L_after_reduction_56: + jmp .L_last_blocks_done_54 +.L_last_num_blocks_is_2_54: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $254,%r15d + jae .L_16_blocks_overflow_57 + vpaddd %ymm28,%ymm2,%ymm0 + jmp .L_16_blocks_ok_57 + +.L_16_blocks_overflow_57: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %ymm29,%ymm0,%ymm0 +.L_16_blocks_ok_57: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vaesenclast %ymm30,%ymm0,%ymm0 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %ymm29,%ymm0,%ymm17 + vextracti32x4 $1,%zmm17,%xmm7 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_58 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_58 +.L_small_initial_partial_block_58: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_58: + + orq %r8,%r8 + je .L_after_reduction_58 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_58: + jmp .L_last_blocks_done_54 +.L_last_num_blocks_is_3_54: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $253,%r15d + jae .L_16_blocks_overflow_59 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_59 + +.L_16_blocks_overflow_59: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_59: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vextracti32x4 $2,%zmm17,%xmm7 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_60 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_60 +.L_small_initial_partial_block_60: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_60: + + orq %r8,%r8 + je .L_after_reduction_60 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_60: + jmp .L_last_blocks_done_54 +.L_last_num_blocks_is_4_54: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $252,%r15d + jae .L_16_blocks_overflow_61 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_61 + +.L_16_blocks_overflow_61: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_61: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vextracti32x4 $3,%zmm17,%xmm7 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_62 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_62 +.L_small_initial_partial_block_62: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_62: + + orq %r8,%r8 + je .L_after_reduction_62 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_62: + jmp .L_last_blocks_done_54 +.L_last_num_blocks_is_5_54: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $251,%r15d + jae .L_16_blocks_overflow_63 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %xmm27,%xmm0,%xmm3 + jmp .L_16_blocks_ok_63 + +.L_16_blocks_overflow_63: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 +.L_16_blocks_ok_63: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %xmm30,%xmm3,%xmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %xmm19,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %xmm29,%xmm3,%xmm19 + vextracti32x4 $0,%zmm19,%xmm7 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_64 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_64 +.L_small_initial_partial_block_64: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_64: + + orq %r8,%r8 + je .L_after_reduction_64 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_64: + jmp .L_last_blocks_done_54 +.L_last_num_blocks_is_6_54: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $250,%r15d + jae .L_16_blocks_overflow_65 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %ymm27,%ymm0,%ymm3 + jmp .L_16_blocks_ok_65 + +.L_16_blocks_overflow_65: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 +.L_16_blocks_ok_65: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %ymm30,%ymm3,%ymm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %ymm29,%ymm3,%ymm19 + vextracti32x4 $1,%zmm19,%xmm7 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_66 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_66 +.L_small_initial_partial_block_66: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_66: + + orq %r8,%r8 + je .L_after_reduction_66 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_66: + jmp .L_last_blocks_done_54 +.L_last_num_blocks_is_7_54: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $249,%r15d + jae .L_16_blocks_overflow_67 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_67 + +.L_16_blocks_overflow_67: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_67: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vextracti32x4 $2,%zmm19,%xmm7 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_68 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_68 +.L_small_initial_partial_block_68: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_68: + + orq %r8,%r8 + je .L_after_reduction_68 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_68: + jmp .L_last_blocks_done_54 +.L_last_num_blocks_is_8_54: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $248,%r15d + jae .L_16_blocks_overflow_69 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_69 + +.L_16_blocks_overflow_69: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_69: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vextracti32x4 $3,%zmm19,%xmm7 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_70 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_70 +.L_small_initial_partial_block_70: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_70: + + orq %r8,%r8 + je .L_after_reduction_70 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_70: + jmp .L_last_blocks_done_54 +.L_last_num_blocks_is_9_54: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $247,%r15d + jae .L_16_blocks_overflow_71 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %xmm27,%xmm3,%xmm4 + jmp .L_16_blocks_ok_71 + +.L_16_blocks_overflow_71: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 +.L_16_blocks_ok_71: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %xmm30,%xmm4,%xmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %xmm20,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %xmm29,%xmm4,%xmm20 + vextracti32x4 $0,%zmm20,%xmm7 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_72 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_72 +.L_small_initial_partial_block_72: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_72: + + orq %r8,%r8 + je .L_after_reduction_72 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_72: + jmp .L_last_blocks_done_54 +.L_last_num_blocks_is_10_54: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $246,%r15d + jae .L_16_blocks_overflow_73 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %ymm27,%ymm3,%ymm4 + jmp .L_16_blocks_ok_73 + +.L_16_blocks_overflow_73: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 +.L_16_blocks_ok_73: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %ymm30,%ymm4,%ymm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %ymm20,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %ymm29,%ymm4,%ymm20 + vextracti32x4 $1,%zmm20,%xmm7 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_74 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_74 +.L_small_initial_partial_block_74: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_74: + + orq %r8,%r8 + je .L_after_reduction_74 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_74: + jmp .L_last_blocks_done_54 +.L_last_num_blocks_is_11_54: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $245,%r15d + jae .L_16_blocks_overflow_75 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_75 + +.L_16_blocks_overflow_75: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_75: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vextracti32x4 $2,%zmm20,%xmm7 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_76 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_76 +.L_small_initial_partial_block_76: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_76: + + orq %r8,%r8 + je .L_after_reduction_76 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_76: + jmp .L_last_blocks_done_54 +.L_last_num_blocks_is_12_54: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $244,%r15d + jae .L_16_blocks_overflow_77 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_77 + +.L_16_blocks_overflow_77: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_77: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vextracti32x4 $3,%zmm20,%xmm7 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_78 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_78 +.L_small_initial_partial_block_78: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_78: + + orq %r8,%r8 + je .L_after_reduction_78 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_78: + jmp .L_last_blocks_done_54 +.L_last_num_blocks_is_13_54: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $243,%r15d + jae .L_16_blocks_overflow_79 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %xmm27,%xmm4,%xmm5 + jmp .L_16_blocks_ok_79 + +.L_16_blocks_overflow_79: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 +.L_16_blocks_ok_79: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %xmm30,%xmm5,%xmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %xmm21,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %xmm29,%xmm5,%xmm21 + vextracti32x4 $0,%zmm21,%xmm7 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_80 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_80 +.L_small_initial_partial_block_80: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_80: + + orq %r8,%r8 + je .L_after_reduction_80 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_80: + jmp .L_last_blocks_done_54 +.L_last_num_blocks_is_14_54: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $242,%r15d + jae .L_16_blocks_overflow_81 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %ymm27,%ymm4,%ymm5 + jmp .L_16_blocks_ok_81 + +.L_16_blocks_overflow_81: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 +.L_16_blocks_ok_81: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %ymm30,%ymm5,%ymm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %ymm21,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %ymm29,%ymm5,%ymm21 + vextracti32x4 $1,%zmm21,%xmm7 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_82 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_82 +.L_small_initial_partial_block_82: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_82: + + orq %r8,%r8 + je .L_after_reduction_82 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_82: + jmp .L_last_blocks_done_54 +.L_last_num_blocks_is_15_54: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $241,%r15d + jae .L_16_blocks_overflow_83 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_83 + +.L_16_blocks_overflow_83: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_83: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %zmm29,%zmm5,%zmm21 + vextracti32x4 $2,%zmm21,%xmm7 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_84 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_84 +.L_small_initial_partial_block_84: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_84: + + orq %r8,%r8 + je .L_after_reduction_84 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_84: + jmp .L_last_blocks_done_54 +.L_last_num_blocks_is_16_54: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $240,%r15d + jae .L_16_blocks_overflow_85 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_85 + +.L_16_blocks_overflow_85: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_85: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %zmm29,%zmm5,%zmm21 + vextracti32x4 $3,%zmm21,%xmm7 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_86: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_86: + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_86: + jmp .L_last_blocks_done_54 +.L_last_num_blocks_is_0_54: + vmovdqa64 768(%rsp),%zmm13 + vpxorq %zmm14,%zmm13,%zmm13 + vmovdqu64 0(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 832(%rsp),%zmm13 + vmovdqu64 64(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpxorq %zmm10,%zmm4,%zmm26 + vpxorq %zmm6,%zmm0,%zmm24 + vpxorq %zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 896(%rsp),%zmm13 + vmovdqu64 128(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 960(%rsp),%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + +.L_last_blocks_done_54: + vpshufb %xmm29,%xmm2,%xmm2 + jmp .L_ghash_done_10 +.L_encrypt_16_blocks_10: + cmpb $240,%r15b + jae .L_16_blocks_overflow_87 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_87 +.L_16_blocks_overflow_87: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_87: + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm15,%zmm10,%zmm26 + vpxorq %zmm12,%zmm6,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1) + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqa64 %zmm0,1280(%rsp) + vmovdqa64 %zmm3,1344(%rsp) + vmovdqa64 %zmm4,1408(%rsp) + vmovdqa64 %zmm5,1472(%rsp) + vmovdqa64 1024(%rsp),%zmm13 + vmovdqu64 256(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1088(%rsp),%zmm13 + vmovdqu64 320(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 1152(%rsp),%zmm13 + vmovdqu64 384(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1216(%rsp),%zmm13 + vmovdqu64 448(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + subq $256,%r8 + addq $256,%r11 + movl %r8d,%r10d + addl $15,%r10d + shrl $4,%r10d + je .L_last_num_blocks_is_0_88 + + cmpl $8,%r10d + je .L_last_num_blocks_is_8_88 + jb .L_last_num_blocks_is_7_1_88 + + + cmpl $12,%r10d + je .L_last_num_blocks_is_12_88 + jb .L_last_num_blocks_is_11_9_88 + + + cmpl $15,%r10d + je .L_last_num_blocks_is_15_88 + ja .L_last_num_blocks_is_16_88 + cmpl $14,%r10d + je .L_last_num_blocks_is_14_88 + jmp .L_last_num_blocks_is_13_88 + +.L_last_num_blocks_is_11_9_88: + + cmpl $10,%r10d + je .L_last_num_blocks_is_10_88 + ja .L_last_num_blocks_is_11_88 + jmp .L_last_num_blocks_is_9_88 + +.L_last_num_blocks_is_7_1_88: + cmpl $4,%r10d + je .L_last_num_blocks_is_4_88 + jb .L_last_num_blocks_is_3_1_88 + + cmpl $6,%r10d + ja .L_last_num_blocks_is_7_88 + je .L_last_num_blocks_is_6_88 + jmp .L_last_num_blocks_is_5_88 + +.L_last_num_blocks_is_3_1_88: + + cmpl $2,%r10d + ja .L_last_num_blocks_is_3_88 + je .L_last_num_blocks_is_2_88 +.L_last_num_blocks_is_1_88: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $255,%r15d + jae .L_16_blocks_overflow_89 + vpaddd %xmm28,%xmm2,%xmm0 + jmp .L_16_blocks_ok_89 + +.L_16_blocks_overflow_89: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %xmm29,%xmm0,%xmm0 +.L_16_blocks_ok_89: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $0,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %xmm31,%xmm0,%xmm0 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %xmm30,%xmm0,%xmm0 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %xmm29,%xmm0,%xmm17 + vextracti32x4 $0,%zmm17,%xmm7 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_90 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_90 +.L_small_initial_partial_block_90: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + + + + + + + + + + + + vpxorq %xmm7,%xmm14,%xmm14 + + jmp .L_after_reduction_90 +.L_small_initial_compute_done_90: +.L_after_reduction_90: + jmp .L_last_blocks_done_88 +.L_last_num_blocks_is_2_88: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $254,%r15d + jae .L_16_blocks_overflow_91 + vpaddd %ymm28,%ymm2,%ymm0 + jmp .L_16_blocks_ok_91 + +.L_16_blocks_overflow_91: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %ymm29,%ymm0,%ymm0 +.L_16_blocks_ok_91: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $1,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %ymm31,%ymm0,%ymm0 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %ymm30,%ymm0,%ymm0 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %ymm29,%ymm0,%ymm17 + vextracti32x4 $1,%zmm17,%xmm7 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_92 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_92 +.L_small_initial_partial_block_92: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_92: + + orq %r8,%r8 + je .L_after_reduction_92 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_92: + jmp .L_last_blocks_done_88 +.L_last_num_blocks_is_3_88: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $253,%r15d + jae .L_16_blocks_overflow_93 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_93 + +.L_16_blocks_overflow_93: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_93: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $2,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vextracti32x4 $2,%zmm17,%xmm7 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_94 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_94 +.L_small_initial_partial_block_94: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_94: + + orq %r8,%r8 + je .L_after_reduction_94 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_94: + jmp .L_last_blocks_done_88 +.L_last_num_blocks_is_4_88: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $252,%r15d + jae .L_16_blocks_overflow_95 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_95 + +.L_16_blocks_overflow_95: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_95: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $3,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vextracti32x4 $3,%zmm17,%xmm7 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_96 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_96 +.L_small_initial_partial_block_96: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_96: + + orq %r8,%r8 + je .L_after_reduction_96 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_96: + jmp .L_last_blocks_done_88 +.L_last_num_blocks_is_5_88: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $251,%r15d + jae .L_16_blocks_overflow_97 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %xmm27,%xmm0,%xmm3 + jmp .L_16_blocks_ok_97 + +.L_16_blocks_overflow_97: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 +.L_16_blocks_ok_97: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $0,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %xmm30,%xmm3,%xmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %xmm19,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %xmm29,%xmm3,%xmm19 + vextracti32x4 $0,%zmm19,%xmm7 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_98 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_98 +.L_small_initial_partial_block_98: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_98: + + orq %r8,%r8 + je .L_after_reduction_98 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_98: + jmp .L_last_blocks_done_88 +.L_last_num_blocks_is_6_88: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $250,%r15d + jae .L_16_blocks_overflow_99 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %ymm27,%ymm0,%ymm3 + jmp .L_16_blocks_ok_99 + +.L_16_blocks_overflow_99: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 +.L_16_blocks_ok_99: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $1,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %ymm30,%ymm3,%ymm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %ymm29,%ymm3,%ymm19 + vextracti32x4 $1,%zmm19,%xmm7 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_100 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_100 +.L_small_initial_partial_block_100: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_100: + + orq %r8,%r8 + je .L_after_reduction_100 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_100: + jmp .L_last_blocks_done_88 +.L_last_num_blocks_is_7_88: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $249,%r15d + jae .L_16_blocks_overflow_101 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_101 + +.L_16_blocks_overflow_101: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_101: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $2,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vextracti32x4 $2,%zmm19,%xmm7 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_102 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_102 +.L_small_initial_partial_block_102: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_102: + + orq %r8,%r8 + je .L_after_reduction_102 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_102: + jmp .L_last_blocks_done_88 +.L_last_num_blocks_is_8_88: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $248,%r15d + jae .L_16_blocks_overflow_103 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_103 + +.L_16_blocks_overflow_103: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_103: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $3,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vextracti32x4 $3,%zmm19,%xmm7 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_104 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_104 +.L_small_initial_partial_block_104: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_104: + + orq %r8,%r8 + je .L_after_reduction_104 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_104: + jmp .L_last_blocks_done_88 +.L_last_num_blocks_is_9_88: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $247,%r15d + jae .L_16_blocks_overflow_105 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %xmm27,%xmm3,%xmm4 + jmp .L_16_blocks_ok_105 + +.L_16_blocks_overflow_105: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 +.L_16_blocks_ok_105: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $0,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %xmm30,%xmm4,%xmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %xmm20,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %xmm29,%xmm4,%xmm20 + vextracti32x4 $0,%zmm20,%xmm7 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_106 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_106 +.L_small_initial_partial_block_106: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_106: + + orq %r8,%r8 + je .L_after_reduction_106 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_106: + jmp .L_last_blocks_done_88 +.L_last_num_blocks_is_10_88: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $246,%r15d + jae .L_16_blocks_overflow_107 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %ymm27,%ymm3,%ymm4 + jmp .L_16_blocks_ok_107 + +.L_16_blocks_overflow_107: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 +.L_16_blocks_ok_107: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $1,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %ymm30,%ymm4,%ymm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %ymm20,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %ymm29,%ymm4,%ymm20 + vextracti32x4 $1,%zmm20,%xmm7 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_108 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_108 +.L_small_initial_partial_block_108: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_108: + + orq %r8,%r8 + je .L_after_reduction_108 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_108: + jmp .L_last_blocks_done_88 +.L_last_num_blocks_is_11_88: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $245,%r15d + jae .L_16_blocks_overflow_109 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_109 + +.L_16_blocks_overflow_109: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_109: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $2,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vextracti32x4 $2,%zmm20,%xmm7 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_110 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_110 +.L_small_initial_partial_block_110: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_110: + + orq %r8,%r8 + je .L_after_reduction_110 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_110: + jmp .L_last_blocks_done_88 +.L_last_num_blocks_is_12_88: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $244,%r15d + jae .L_16_blocks_overflow_111 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_111 + +.L_16_blocks_overflow_111: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_111: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $3,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vextracti32x4 $3,%zmm20,%xmm7 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_112 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_112 +.L_small_initial_partial_block_112: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_112: + + orq %r8,%r8 + je .L_after_reduction_112 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_112: + jmp .L_last_blocks_done_88 +.L_last_num_blocks_is_13_88: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $243,%r15d + jae .L_16_blocks_overflow_113 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %xmm27,%xmm4,%xmm5 + jmp .L_16_blocks_ok_113 + +.L_16_blocks_overflow_113: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 +.L_16_blocks_ok_113: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $0,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %xmm30,%xmm5,%xmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %xmm21,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %xmm29,%xmm5,%xmm21 + vextracti32x4 $0,%zmm21,%xmm7 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_114 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_114 +.L_small_initial_partial_block_114: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_114: + + orq %r8,%r8 + je .L_after_reduction_114 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_114: + jmp .L_last_blocks_done_88 +.L_last_num_blocks_is_14_88: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $242,%r15d + jae .L_16_blocks_overflow_115 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %ymm27,%ymm4,%ymm5 + jmp .L_16_blocks_ok_115 + +.L_16_blocks_overflow_115: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 +.L_16_blocks_ok_115: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $1,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %ymm30,%ymm5,%ymm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %ymm21,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %ymm29,%ymm5,%ymm21 + vextracti32x4 $1,%zmm21,%xmm7 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_116 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_116 +.L_small_initial_partial_block_116: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_116: + + orq %r8,%r8 + je .L_after_reduction_116 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_116: + jmp .L_last_blocks_done_88 +.L_last_num_blocks_is_15_88: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $241,%r15d + jae .L_16_blocks_overflow_117 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_117 + +.L_16_blocks_overflow_117: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_117: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $2,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %zmm29,%zmm5,%zmm21 + vextracti32x4 $2,%zmm21,%xmm7 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_118 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_118 +.L_small_initial_partial_block_118: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_118: + + orq %r8,%r8 + je .L_after_reduction_118 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_118: + jmp .L_last_blocks_done_88 +.L_last_num_blocks_is_16_88: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $240,%r15d + jae .L_16_blocks_overflow_119 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_119 + +.L_16_blocks_overflow_119: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_119: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $3,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %zmm29,%zmm5,%zmm21 + vextracti32x4 $3,%zmm21,%xmm7 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_120: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_120: + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_120: + jmp .L_last_blocks_done_88 +.L_last_num_blocks_is_0_88: + vmovdqa64 1280(%rsp),%zmm13 + vmovdqu64 512(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1344(%rsp),%zmm13 + vmovdqu64 576(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 1408(%rsp),%zmm13 + vmovdqu64 640(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1472(%rsp),%zmm13 + vmovdqu64 704(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + +.L_last_blocks_done_88: + vpshufb %xmm29,%xmm2,%xmm2 + jmp .L_ghash_done_10 + +.L_message_below_32_blocks_10: + + + subq $256,%r8 + addq $256,%r11 + movl %r8d,%r10d + testq %r14,%r14 + jnz .L_skip_hkeys_precomputation_121 + vmovdqu64 640(%rsp),%zmm3 + + + vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 + + vmovdqu64 576(%rsp),%zmm4 + vmovdqu64 512(%rsp),%zmm5 + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,448(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,384(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,320(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,256(%rsp) +.L_skip_hkeys_precomputation_121: + movq $1,%r14 + andl $~15,%r10d + movl $512,%ebx + subl %r10d,%ebx + movl %r8d,%r10d + addl $15,%r10d + shrl $4,%r10d + je .L_last_num_blocks_is_0_122 + + cmpl $8,%r10d + je .L_last_num_blocks_is_8_122 + jb .L_last_num_blocks_is_7_1_122 + + + cmpl $12,%r10d + je .L_last_num_blocks_is_12_122 + jb .L_last_num_blocks_is_11_9_122 + + + cmpl $15,%r10d + je .L_last_num_blocks_is_15_122 + ja .L_last_num_blocks_is_16_122 + cmpl $14,%r10d + je .L_last_num_blocks_is_14_122 + jmp .L_last_num_blocks_is_13_122 + +.L_last_num_blocks_is_11_9_122: + + cmpl $10,%r10d + je .L_last_num_blocks_is_10_122 + ja .L_last_num_blocks_is_11_122 + jmp .L_last_num_blocks_is_9_122 + +.L_last_num_blocks_is_7_1_122: + cmpl $4,%r10d + je .L_last_num_blocks_is_4_122 + jb .L_last_num_blocks_is_3_1_122 + + cmpl $6,%r10d + ja .L_last_num_blocks_is_7_122 + je .L_last_num_blocks_is_6_122 + jmp .L_last_num_blocks_is_5_122 + +.L_last_num_blocks_is_3_1_122: + + cmpl $2,%r10d + ja .L_last_num_blocks_is_3_122 + je .L_last_num_blocks_is_2_122 +.L_last_num_blocks_is_1_122: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $255,%r15d + jae .L_16_blocks_overflow_123 + vpaddd %xmm28,%xmm2,%xmm0 + jmp .L_16_blocks_ok_123 + +.L_16_blocks_overflow_123: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %xmm29,%xmm0,%xmm0 +.L_16_blocks_ok_123: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vaesenclast %xmm30,%xmm0,%xmm0 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %xmm29,%xmm0,%xmm17 + vextracti32x4 $0,%zmm17,%xmm7 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_124 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_124 +.L_small_initial_partial_block_124: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm0 + + + vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 + vpslldq $8,%xmm3,%xmm3 + vpxorq %xmm3,%xmm25,%xmm3 + + + vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 + vpsrldq $4,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm4,%xmm14 + + + + + + + + + + + + + vpxorq %xmm7,%xmm14,%xmm14 + + jmp .L_after_reduction_124 +.L_small_initial_compute_done_124: +.L_after_reduction_124: + jmp .L_last_blocks_done_122 +.L_last_num_blocks_is_2_122: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $254,%r15d + jae .L_16_blocks_overflow_125 + vpaddd %ymm28,%ymm2,%ymm0 + jmp .L_16_blocks_ok_125 + +.L_16_blocks_overflow_125: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %ymm29,%ymm0,%ymm0 +.L_16_blocks_ok_125: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vaesenclast %ymm30,%ymm0,%ymm0 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %ymm29,%ymm0,%ymm17 + vextracti32x4 $1,%zmm17,%xmm7 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_126 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_126 +.L_small_initial_partial_block_126: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_126: + + orq %r8,%r8 + je .L_after_reduction_126 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_126: + jmp .L_last_blocks_done_122 +.L_last_num_blocks_is_3_122: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $253,%r15d + jae .L_16_blocks_overflow_127 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_127 + +.L_16_blocks_overflow_127: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_127: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vextracti32x4 $2,%zmm17,%xmm7 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_128 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_128 +.L_small_initial_partial_block_128: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_128: + + orq %r8,%r8 + je .L_after_reduction_128 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_128: + jmp .L_last_blocks_done_122 +.L_last_num_blocks_is_4_122: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $252,%r15d + jae .L_16_blocks_overflow_129 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_129 + +.L_16_blocks_overflow_129: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_129: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vextracti32x4 $3,%zmm17,%xmm7 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_130 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_130 +.L_small_initial_partial_block_130: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_130: + + orq %r8,%r8 + je .L_after_reduction_130 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_130: + jmp .L_last_blocks_done_122 +.L_last_num_blocks_is_5_122: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $251,%r15d + jae .L_16_blocks_overflow_131 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %xmm27,%xmm0,%xmm3 + jmp .L_16_blocks_ok_131 + +.L_16_blocks_overflow_131: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 +.L_16_blocks_ok_131: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %xmm30,%xmm3,%xmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %xmm19,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %xmm29,%xmm3,%xmm19 + vextracti32x4 $0,%zmm19,%xmm7 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_132 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_132 +.L_small_initial_partial_block_132: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_132: + + orq %r8,%r8 + je .L_after_reduction_132 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_132: + jmp .L_last_blocks_done_122 +.L_last_num_blocks_is_6_122: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $250,%r15d + jae .L_16_blocks_overflow_133 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %ymm27,%ymm0,%ymm3 + jmp .L_16_blocks_ok_133 + +.L_16_blocks_overflow_133: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 +.L_16_blocks_ok_133: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %ymm30,%ymm3,%ymm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %ymm29,%ymm3,%ymm19 + vextracti32x4 $1,%zmm19,%xmm7 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_134 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_134 +.L_small_initial_partial_block_134: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_134: + + orq %r8,%r8 + je .L_after_reduction_134 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_134: + jmp .L_last_blocks_done_122 +.L_last_num_blocks_is_7_122: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $249,%r15d + jae .L_16_blocks_overflow_135 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_135 + +.L_16_blocks_overflow_135: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_135: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vextracti32x4 $2,%zmm19,%xmm7 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_136 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_136 +.L_small_initial_partial_block_136: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_136: + + orq %r8,%r8 + je .L_after_reduction_136 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_136: + jmp .L_last_blocks_done_122 +.L_last_num_blocks_is_8_122: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $248,%r15d + jae .L_16_blocks_overflow_137 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_137 + +.L_16_blocks_overflow_137: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_137: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vextracti32x4 $3,%zmm19,%xmm7 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_138 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_138 +.L_small_initial_partial_block_138: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_138: + + orq %r8,%r8 + je .L_after_reduction_138 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_138: + jmp .L_last_blocks_done_122 +.L_last_num_blocks_is_9_122: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $247,%r15d + jae .L_16_blocks_overflow_139 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %xmm27,%xmm3,%xmm4 + jmp .L_16_blocks_ok_139 + +.L_16_blocks_overflow_139: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 +.L_16_blocks_ok_139: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %xmm30,%xmm4,%xmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %xmm20,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %xmm29,%xmm4,%xmm20 + vextracti32x4 $0,%zmm20,%xmm7 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_140 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_140 +.L_small_initial_partial_block_140: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_140: + + orq %r8,%r8 + je .L_after_reduction_140 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_140: + jmp .L_last_blocks_done_122 +.L_last_num_blocks_is_10_122: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $246,%r15d + jae .L_16_blocks_overflow_141 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %ymm27,%ymm3,%ymm4 + jmp .L_16_blocks_ok_141 + +.L_16_blocks_overflow_141: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 +.L_16_blocks_ok_141: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %ymm30,%ymm4,%ymm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %ymm20,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %ymm29,%ymm4,%ymm20 + vextracti32x4 $1,%zmm20,%xmm7 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_142 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_142 +.L_small_initial_partial_block_142: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_142: + + orq %r8,%r8 + je .L_after_reduction_142 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_142: + jmp .L_last_blocks_done_122 +.L_last_num_blocks_is_11_122: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $245,%r15d + jae .L_16_blocks_overflow_143 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_143 + +.L_16_blocks_overflow_143: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_143: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vextracti32x4 $2,%zmm20,%xmm7 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_144 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_144 +.L_small_initial_partial_block_144: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_144: + + orq %r8,%r8 + je .L_after_reduction_144 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_144: + jmp .L_last_blocks_done_122 +.L_last_num_blocks_is_12_122: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $244,%r15d + jae .L_16_blocks_overflow_145 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_145 + +.L_16_blocks_overflow_145: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_145: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vextracti32x4 $3,%zmm20,%xmm7 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_146 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_146 +.L_small_initial_partial_block_146: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_146: + + orq %r8,%r8 + je .L_after_reduction_146 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_146: + jmp .L_last_blocks_done_122 +.L_last_num_blocks_is_13_122: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $243,%r15d + jae .L_16_blocks_overflow_147 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %xmm27,%xmm4,%xmm5 + jmp .L_16_blocks_ok_147 + +.L_16_blocks_overflow_147: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 +.L_16_blocks_ok_147: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %xmm30,%xmm5,%xmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %xmm21,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %xmm29,%xmm5,%xmm21 + vextracti32x4 $0,%zmm21,%xmm7 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_148 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_148 +.L_small_initial_partial_block_148: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_148: + + orq %r8,%r8 + je .L_after_reduction_148 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_148: + jmp .L_last_blocks_done_122 +.L_last_num_blocks_is_14_122: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $242,%r15d + jae .L_16_blocks_overflow_149 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %ymm27,%ymm4,%ymm5 + jmp .L_16_blocks_ok_149 + +.L_16_blocks_overflow_149: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 +.L_16_blocks_ok_149: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %ymm30,%ymm5,%ymm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %ymm21,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %ymm29,%ymm5,%ymm21 + vextracti32x4 $1,%zmm21,%xmm7 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_150 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_150 +.L_small_initial_partial_block_150: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_150: + + orq %r8,%r8 + je .L_after_reduction_150 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_150: + jmp .L_last_blocks_done_122 +.L_last_num_blocks_is_15_122: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $241,%r15d + jae .L_16_blocks_overflow_151 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_151 + +.L_16_blocks_overflow_151: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_151: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %zmm29,%zmm5,%zmm21 + vextracti32x4 $2,%zmm21,%xmm7 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_152 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_152 +.L_small_initial_partial_block_152: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_152: + + orq %r8,%r8 + je .L_after_reduction_152 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_152: + jmp .L_last_blocks_done_122 +.L_last_num_blocks_is_16_122: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $240,%r15d + jae .L_16_blocks_overflow_153 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_153 + +.L_16_blocks_overflow_153: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_153: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %zmm29,%zmm5,%zmm21 + vextracti32x4 $3,%zmm21,%xmm7 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_154: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_154: + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_154: + jmp .L_last_blocks_done_122 +.L_last_num_blocks_is_0_122: + vmovdqa64 768(%rsp),%zmm13 + vpxorq %zmm14,%zmm13,%zmm13 + vmovdqu64 0(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 832(%rsp),%zmm13 + vmovdqu64 64(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpxorq %zmm10,%zmm4,%zmm26 + vpxorq %zmm6,%zmm0,%zmm24 + vpxorq %zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 896(%rsp),%zmm13 + vmovdqu64 128(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 960(%rsp),%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + +.L_last_blocks_done_122: + vpshufb %xmm29,%xmm2,%xmm2 + jmp .L_ghash_done_10 + +.L_message_below_equal_16_blocks_10: + + + movl %r8d,%r12d + addl $15,%r12d + shrl $4,%r12d + cmpq $8,%r12 + je .L_small_initial_num_blocks_is_8_155 + jl .L_small_initial_num_blocks_is_7_1_155 + + + cmpq $12,%r12 + je .L_small_initial_num_blocks_is_12_155 + jl .L_small_initial_num_blocks_is_11_9_155 + + + cmpq $16,%r12 + je .L_small_initial_num_blocks_is_16_155 + cmpq $15,%r12 + je .L_small_initial_num_blocks_is_15_155 + cmpq $14,%r12 + je .L_small_initial_num_blocks_is_14_155 + jmp .L_small_initial_num_blocks_is_13_155 + +.L_small_initial_num_blocks_is_11_9_155: + + cmpq $11,%r12 + je .L_small_initial_num_blocks_is_11_155 + cmpq $10,%r12 + je .L_small_initial_num_blocks_is_10_155 + jmp .L_small_initial_num_blocks_is_9_155 + +.L_small_initial_num_blocks_is_7_1_155: + cmpq $4,%r12 + je .L_small_initial_num_blocks_is_4_155 + jl .L_small_initial_num_blocks_is_3_1_155 + + cmpq $7,%r12 + je .L_small_initial_num_blocks_is_7_155 + cmpq $6,%r12 + je .L_small_initial_num_blocks_is_6_155 + jmp .L_small_initial_num_blocks_is_5_155 + +.L_small_initial_num_blocks_is_3_1_155: + + cmpq $3,%r12 + je .L_small_initial_num_blocks_is_3_155 + cmpq $2,%r12 + je .L_small_initial_num_blocks_is_2_155 + + + + + +.L_small_initial_num_blocks_is_1_155: + vmovdqa64 SHUF_MASK(%rip),%xmm29 + vpaddd ONE(%rip),%xmm2,%xmm0 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $0,%zmm0,%xmm2 + vpshufb %xmm29,%xmm0,%xmm0 + vmovdqu8 0(%rcx,%r11,1),%xmm6{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %xmm15,%xmm0,%xmm0 + vpxorq %xmm6,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm12 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %xmm29,%xmm0,%xmm6 + vextracti32x4 $0,%zmm6,%xmm13 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_156 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_156 +.L_small_initial_partial_block_156: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + + + + + + + + + + + + vpxorq %xmm13,%xmm14,%xmm14 + + jmp .L_after_reduction_156 +.L_small_initial_compute_done_156: +.L_after_reduction_156: + jmp .L_small_initial_blocks_encrypted_155 +.L_small_initial_num_blocks_is_2_155: + vmovdqa64 SHUF_MASK(%rip),%ymm29 + vshufi64x2 $0,%ymm2,%ymm2,%ymm0 + vpaddd ddq_add_1234(%rip),%ymm0,%ymm0 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $1,%zmm0,%xmm2 + vpshufb %ymm29,%ymm0,%ymm0 + vmovdqu8 0(%rcx,%r11,1),%ymm6{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %ymm15,%ymm0,%ymm0 + vpxorq %ymm6,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm12 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %ymm29,%ymm0,%ymm6 + vextracti32x4 $1,%zmm6,%xmm13 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_157 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_157 +.L_small_initial_partial_block_157: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_157: + + orq %r8,%r8 + je .L_after_reduction_157 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_157: + jmp .L_small_initial_blocks_encrypted_155 +.L_small_initial_num_blocks_is_3_155: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $2,%zmm0,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vpxorq %zmm6,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vextracti32x4 $2,%zmm6,%xmm13 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_158 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_158 +.L_small_initial_partial_block_158: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_158: + + orq %r8,%r8 + je .L_after_reduction_158 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_158: + jmp .L_small_initial_blocks_encrypted_155 +.L_small_initial_num_blocks_is_4_155: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $3,%zmm0,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vpxorq %zmm6,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vextracti32x4 $3,%zmm6,%xmm13 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_159 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_159 +.L_small_initial_partial_block_159: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_159: + + orq %r8,%r8 + je .L_after_reduction_159 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_159: + jmp .L_small_initial_blocks_encrypted_155 +.L_small_initial_num_blocks_is_5_155: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $64,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $0,%zmm3,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%xmm7{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %xmm15,%xmm3,%xmm3 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %xmm7,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %xmm29,%xmm3,%xmm7 + vextracti32x4 $0,%zmm7,%xmm13 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_160 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_160 +.L_small_initial_partial_block_160: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_160: + + orq %r8,%r8 + je .L_after_reduction_160 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_160: + jmp .L_small_initial_blocks_encrypted_155 +.L_small_initial_num_blocks_is_6_155: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $64,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $1,%zmm3,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%ymm7{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %ymm15,%ymm3,%ymm3 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %ymm7,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %ymm29,%ymm3,%ymm7 + vextracti32x4 $1,%zmm7,%xmm13 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_161 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_161 +.L_small_initial_partial_block_161: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_161: + + orq %r8,%r8 + je .L_after_reduction_161 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_161: + jmp .L_small_initial_blocks_encrypted_155 +.L_small_initial_num_blocks_is_7_155: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $64,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $2,%zmm3,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vextracti32x4 $2,%zmm7,%xmm13 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_162 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_162 +.L_small_initial_partial_block_162: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_162: + + orq %r8,%r8 + je .L_after_reduction_162 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_162: + jmp .L_small_initial_blocks_encrypted_155 +.L_small_initial_num_blocks_is_8_155: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $64,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $3,%zmm3,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vextracti32x4 $3,%zmm7,%xmm13 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_163 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 224(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_163 +.L_small_initial_partial_block_163: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_163: + + orq %r8,%r8 + je .L_after_reduction_163 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_163: + jmp .L_small_initial_blocks_encrypted_155 +.L_small_initial_num_blocks_is_9_155: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $128,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $0,%zmm4,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%xmm10{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %xmm15,%xmm4,%xmm4 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %xmm10,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vpshufb %xmm29,%xmm4,%xmm10 + vextracti32x4 $0,%zmm10,%xmm13 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_164 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 208(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_164 +.L_small_initial_partial_block_164: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 224(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_164: + + orq %r8,%r8 + je .L_after_reduction_164 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_164: + jmp .L_small_initial_blocks_encrypted_155 +.L_small_initial_num_blocks_is_10_155: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $128,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $1,%zmm4,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%ymm10{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %ymm15,%ymm4,%ymm4 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %ymm10,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vpshufb %ymm29,%ymm4,%ymm10 + vextracti32x4 $1,%zmm10,%xmm13 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_165 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 192(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_165 +.L_small_initial_partial_block_165: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 208(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_165: + + orq %r8,%r8 + je .L_after_reduction_165 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_165: + jmp .L_small_initial_blocks_encrypted_155 +.L_small_initial_num_blocks_is_11_155: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $128,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $2,%zmm4,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vpshufb %zmm29,%zmm4,%zmm10 + vextracti32x4 $2,%zmm10,%xmm13 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_166 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 176(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_166 +.L_small_initial_partial_block_166: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 192(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_166: + + orq %r8,%r8 + je .L_after_reduction_166 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_166: + jmp .L_small_initial_blocks_encrypted_155 +.L_small_initial_num_blocks_is_12_155: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $128,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $3,%zmm4,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vpshufb %zmm29,%zmm4,%zmm10 + vextracti32x4 $3,%zmm10,%xmm13 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_167 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 160(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 224(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_167 +.L_small_initial_partial_block_167: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 176(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_167: + + orq %r8,%r8 + je .L_after_reduction_167 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_167: + jmp .L_small_initial_blocks_encrypted_155 +.L_small_initial_num_blocks_is_13_155: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $192,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $0,%zmm5,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10 + vmovdqu8 192(%rcx,%r11,1),%xmm11{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vpxorq %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vaesenclast %xmm15,%xmm5,%xmm5 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vpxorq %xmm11,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vpshufb %zmm29,%zmm4,%zmm10 + vpshufb %xmm29,%xmm5,%xmm11 + vextracti32x4 $0,%zmm11,%xmm13 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_168 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 144(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 208(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_168 +.L_small_initial_partial_block_168: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 160(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 224(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_168: + + orq %r8,%r8 + je .L_after_reduction_168 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_168: + jmp .L_small_initial_blocks_encrypted_155 +.L_small_initial_num_blocks_is_14_155: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $192,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $1,%zmm5,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10 + vmovdqu8 192(%rcx,%r11,1),%ymm11{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vpxorq %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vaesenclast %ymm15,%ymm5,%ymm5 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vpxorq %ymm11,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vpshufb %zmm29,%zmm4,%zmm10 + vpshufb %ymm29,%ymm5,%ymm11 + vextracti32x4 $1,%zmm11,%xmm13 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_169 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 128(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 192(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_169 +.L_small_initial_partial_block_169: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 144(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 208(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_169: + + orq %r8,%r8 + je .L_after_reduction_169 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_169: + jmp .L_small_initial_blocks_encrypted_155 +.L_small_initial_num_blocks_is_15_155: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $192,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $2,%zmm5,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10 + vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vpxorq %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vaesenclast %zmm15,%zmm5,%zmm5 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vpxorq %zmm11,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vpshufb %zmm29,%zmm4,%zmm10 + vpshufb %zmm29,%zmm5,%zmm11 + vextracti32x4 $2,%zmm11,%xmm13 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_170 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 112(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 176(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_170 +.L_small_initial_partial_block_170: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 128(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 192(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_170: + + orq %r8,%r8 + je .L_after_reduction_170 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_170: + jmp .L_small_initial_blocks_encrypted_155 +.L_small_initial_num_blocks_is_16_155: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $192,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $3,%zmm5,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10 + vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vpxorq %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vaesenclast %zmm15,%zmm5,%zmm5 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vpxorq %zmm11,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vpshufb %zmm29,%zmm4,%zmm10 + vpshufb %zmm29,%zmm5,%zmm11 + vextracti32x4 $3,%zmm11,%xmm13 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_171: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 112(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 176(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_171: + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_171: +.L_small_initial_blocks_encrypted_155: +.L_ghash_done_10: + vmovdqu64 %xmm2,0(%rsi) + vmovdqu64 %xmm14,64(%rsi) +.L_enc_dec_done_10: + jmp .Lexit_gcm_encrypt +.align 32 +.Laes_gcm_encrypt_192_avx512: + orq %r8,%r8 + je .L_enc_dec_done_172 + xorq %r14,%r14 + vmovdqu64 64(%rsi),%xmm14 + + movq (%rdx),%r11 + orq %r11,%r11 + je .L_partial_block_done_173 + movl $16,%r10d + leaq byte_len_to_mask_table(%rip),%r12 + cmpq %r10,%r8 + cmovcq %r8,%r10 + kmovw (%r12,%r10,2),%k1 + vmovdqu8 (%rcx),%xmm0{%k1}{z} + + vmovdqu64 16(%rsi),%xmm3 + vmovdqu64 336(%rsi),%xmm4 + + + + leaq SHIFT_MASK(%rip),%r12 + addq %r11,%r12 + vmovdqu64 (%r12),%xmm5 + vpshufb %xmm5,%xmm3,%xmm3 + vpxorq %xmm0,%xmm3,%xmm3 + + + leaq (%r8,%r11,1),%r13 + subq $16,%r13 + jge .L_no_extra_mask_173 + subq %r13,%r12 +.L_no_extra_mask_173: + + + + vmovdqu64 16(%r12),%xmm0 + vpand %xmm0,%xmm3,%xmm3 + vpshufb SHUF_MASK(%rip),%xmm3,%xmm3 + vpshufb %xmm5,%xmm3,%xmm3 + vpxorq %xmm3,%xmm14,%xmm14 + cmpq $0,%r13 + jl .L_partial_incomplete_173 + + vpclmulqdq $0x11,%xmm4,%xmm14,%xmm7 + vpclmulqdq $0x00,%xmm4,%xmm14,%xmm10 + vpclmulqdq $0x01,%xmm4,%xmm14,%xmm11 + vpclmulqdq $0x10,%xmm4,%xmm14,%xmm14 + vpxorq %xmm11,%xmm14,%xmm14 + + vpsrldq $8,%xmm14,%xmm11 + vpslldq $8,%xmm14,%xmm14 + vpxorq %xmm11,%xmm7,%xmm7 + vpxorq %xmm10,%xmm14,%xmm14 + + + + vmovdqu64 POLY2(%rip),%xmm11 + + vpclmulqdq $0x01,%xmm14,%xmm11,%xmm10 + vpslldq $8,%xmm10,%xmm10 + vpxorq %xmm10,%xmm14,%xmm14 + + + + vpclmulqdq $0x00,%xmm14,%xmm11,%xmm10 + vpsrldq $4,%xmm10,%xmm10 + vpclmulqdq $0x10,%xmm14,%xmm11,%xmm14 + vpslldq $4,%xmm14,%xmm14 + + vpternlogq $0x96,%xmm10,%xmm7,%xmm14 + + movq $0,(%rdx) + + movq %r11,%r12 + movq $16,%r11 + subq %r12,%r11 + jmp .L_enc_dec_done_173 + +.L_partial_incomplete_173: + addq %r8,(%rdx) + movq %r8,%r11 + +.L_enc_dec_done_173: + + + leaq byte_len_to_mask_table(%rip),%r12 + kmovw (%r12,%r11,2),%k1 + vmovdqu64 %xmm14,64(%rsi) + + vpshufb SHUF_MASK(%rip),%xmm3,%xmm3 + vpshufb %xmm5,%xmm3,%xmm3 + movq %r9,%r12 + vmovdqu8 %xmm3,(%r12){%k1} +.L_partial_block_done_173: + vmovdqu64 0(%rsi),%xmm2 + subq %r11,%r8 + je .L_enc_dec_done_172 + cmpq $256,%r8 + jbe .L_message_below_equal_16_blocks_172 + + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vmovdqa64 ddq_addbe_4444(%rip),%zmm27 + vmovdqa64 ddq_addbe_1234(%rip),%zmm28 + + + + + + + vmovd %xmm2,%r15d + andl $255,%r15d + + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpshufb %zmm29,%zmm2,%zmm2 + + + + cmpb $240,%r15b + jae .L_next_16_overflow_174 + vpaddd %zmm28,%zmm2,%zmm7 + vpaddd %zmm27,%zmm7,%zmm10 + vpaddd %zmm27,%zmm10,%zmm11 + vpaddd %zmm27,%zmm11,%zmm12 + jmp .L_next_16_ok_174 +.L_next_16_overflow_174: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm12 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 + vpaddd %zmm12,%zmm7,%zmm10 + vpaddd %zmm12,%zmm10,%zmm11 + vpaddd %zmm12,%zmm11,%zmm12 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %zmm29,%zmm11,%zmm11 + vpshufb %zmm29,%zmm12,%zmm12 +.L_next_16_ok_174: + vshufi64x2 $255,%zmm12,%zmm12,%zmm2 + addb $16,%r15b + + vmovdqu8 0(%rcx,%r11,1),%zmm0 + vmovdqu8 64(%rcx,%r11,1),%zmm3 + vmovdqu8 128(%rcx,%r11,1),%zmm4 + vmovdqu8 192(%rcx,%r11,1),%zmm5 + + + vbroadcastf64x2 0(%rdi),%zmm6 + vpxorq %zmm6,%zmm7,%zmm7 + vpxorq %zmm6,%zmm10,%zmm10 + vpxorq %zmm6,%zmm11,%zmm11 + vpxorq %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 16(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 32(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 48(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 64(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 80(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 96(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 112(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 128(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 144(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 160(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 176(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 192(%rdi),%zmm6 + vaesenclast %zmm6,%zmm7,%zmm7 + vaesenclast %zmm6,%zmm10,%zmm10 + vaesenclast %zmm6,%zmm11,%zmm11 + vaesenclast %zmm6,%zmm12,%zmm12 + + + vpxorq %zmm0,%zmm7,%zmm7 + vpxorq %zmm3,%zmm10,%zmm10 + vpxorq %zmm4,%zmm11,%zmm11 + vpxorq %zmm5,%zmm12,%zmm12 + + + movq %r9,%r10 + vmovdqu8 %zmm7,0(%r10,%r11,1) + vmovdqu8 %zmm10,64(%r10,%r11,1) + vmovdqu8 %zmm11,128(%r10,%r11,1) + vmovdqu8 %zmm12,192(%r10,%r11,1) + + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %zmm29,%zmm11,%zmm11 + vpshufb %zmm29,%zmm12,%zmm12 + vmovdqa64 %zmm7,768(%rsp) + vmovdqa64 %zmm10,832(%rsp) + vmovdqa64 %zmm11,896(%rsp) + vmovdqa64 %zmm12,960(%rsp) + testq %r14,%r14 + jnz .L_skip_hkeys_precomputation_175 + + vmovdqu64 288(%rsi),%zmm0 + vmovdqu64 %zmm0,704(%rsp) + + vmovdqu64 224(%rsi),%zmm3 + vmovdqu64 %zmm3,640(%rsp) + + + vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 + + vmovdqu64 160(%rsi),%zmm4 + vmovdqu64 %zmm4,576(%rsp) + + vmovdqu64 96(%rsi),%zmm5 + vmovdqu64 %zmm5,512(%rsp) +.L_skip_hkeys_precomputation_175: + cmpq $512,%r8 + jb .L_message_below_32_blocks_172 + + + + cmpb $240,%r15b + jae .L_next_16_overflow_176 + vpaddd %zmm28,%zmm2,%zmm7 + vpaddd %zmm27,%zmm7,%zmm10 + vpaddd %zmm27,%zmm10,%zmm11 + vpaddd %zmm27,%zmm11,%zmm12 + jmp .L_next_16_ok_176 +.L_next_16_overflow_176: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm12 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 + vpaddd %zmm12,%zmm7,%zmm10 + vpaddd %zmm12,%zmm10,%zmm11 + vpaddd %zmm12,%zmm11,%zmm12 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %zmm29,%zmm11,%zmm11 + vpshufb %zmm29,%zmm12,%zmm12 +.L_next_16_ok_176: + vshufi64x2 $255,%zmm12,%zmm12,%zmm2 + addb $16,%r15b + + vmovdqu8 256(%rcx,%r11,1),%zmm0 + vmovdqu8 320(%rcx,%r11,1),%zmm3 + vmovdqu8 384(%rcx,%r11,1),%zmm4 + vmovdqu8 448(%rcx,%r11,1),%zmm5 + + + vbroadcastf64x2 0(%rdi),%zmm6 + vpxorq %zmm6,%zmm7,%zmm7 + vpxorq %zmm6,%zmm10,%zmm10 + vpxorq %zmm6,%zmm11,%zmm11 + vpxorq %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 16(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 32(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 48(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 64(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 80(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 96(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 112(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 128(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 144(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 160(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 176(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 192(%rdi),%zmm6 + vaesenclast %zmm6,%zmm7,%zmm7 + vaesenclast %zmm6,%zmm10,%zmm10 + vaesenclast %zmm6,%zmm11,%zmm11 + vaesenclast %zmm6,%zmm12,%zmm12 + + + vpxorq %zmm0,%zmm7,%zmm7 + vpxorq %zmm3,%zmm10,%zmm10 + vpxorq %zmm4,%zmm11,%zmm11 + vpxorq %zmm5,%zmm12,%zmm12 + + + movq %r9,%r10 + vmovdqu8 %zmm7,256(%r10,%r11,1) + vmovdqu8 %zmm10,320(%r10,%r11,1) + vmovdqu8 %zmm11,384(%r10,%r11,1) + vmovdqu8 %zmm12,448(%r10,%r11,1) + + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %zmm29,%zmm11,%zmm11 + vpshufb %zmm29,%zmm12,%zmm12 + vmovdqa64 %zmm7,1024(%rsp) + vmovdqa64 %zmm10,1088(%rsp) + vmovdqa64 %zmm11,1152(%rsp) + vmovdqa64 %zmm12,1216(%rsp) + testq %r14,%r14 + jnz .L_skip_hkeys_precomputation_177 + vmovdqu64 640(%rsp),%zmm3 + + + vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 + + vmovdqu64 576(%rsp),%zmm4 + vmovdqu64 512(%rsp),%zmm5 + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,448(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,384(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,320(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,256(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,192(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,128(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,64(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,0(%rsp) +.L_skip_hkeys_precomputation_177: + movq $1,%r14 + addq $512,%r11 + subq $512,%r8 + + cmpq $768,%r8 + jb .L_no_more_big_nblocks_172 +.L_encrypt_big_nblocks_172: + cmpb $240,%r15b + jae .L_16_blocks_overflow_178 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_178 +.L_16_blocks_overflow_178: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_178: + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm15,%zmm10,%zmm26 + vpxorq %zmm12,%zmm6,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1) + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqa64 %zmm0,1280(%rsp) + vmovdqa64 %zmm3,1344(%rsp) + vmovdqa64 %zmm4,1408(%rsp) + vmovdqa64 %zmm5,1472(%rsp) + cmpb $240,%r15b + jae .L_16_blocks_overflow_179 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_179 +.L_16_blocks_overflow_179: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_179: + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 256(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 320(%rsp),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 384(%rsp),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 448(%rsp),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 256(%rcx,%r11,1),%zmm17 + vmovdqu8 320(%rcx,%r11,1),%zmm19 + vmovdqu8 384(%rcx,%r11,1),%zmm20 + vmovdqu8 448(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vpternlogq $0x96,%zmm12,%zmm6,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,256(%r10,%r11,1) + vmovdqu8 %zmm3,320(%r10,%r11,1) + vmovdqu8 %zmm4,384(%r10,%r11,1) + vmovdqu8 %zmm5,448(%r10,%r11,1) + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqa64 %zmm0,768(%rsp) + vmovdqa64 %zmm3,832(%rsp) + vmovdqa64 %zmm4,896(%rsp) + vmovdqa64 %zmm5,960(%rsp) + cmpb $240,%r15b + jae .L_16_blocks_overflow_180 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_180 +.L_16_blocks_overflow_180: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_180: + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 512(%rcx,%r11,1),%zmm17 + vmovdqu8 576(%rcx,%r11,1),%zmm19 + vmovdqu8 640(%rcx,%r11,1),%zmm20 + vmovdqu8 704(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + + + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpternlogq $0x96,%zmm15,%zmm12,%zmm6 + vpxorq %zmm24,%zmm6,%zmm6 + vpternlogq $0x96,%zmm10,%zmm13,%zmm7 + vpxorq %zmm25,%zmm7,%zmm7 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vextracti64x4 $1,%zmm6,%ymm12 + vpxorq %ymm12,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm12 + vpxorq %xmm12,%xmm6,%xmm6 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm6 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,512(%r10,%r11,1) + vmovdqu8 %zmm3,576(%r10,%r11,1) + vmovdqu8 %zmm4,640(%r10,%r11,1) + vmovdqu8 %zmm5,704(%r10,%r11,1) + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqa64 %zmm0,1024(%rsp) + vmovdqa64 %zmm3,1088(%rsp) + vmovdqa64 %zmm4,1152(%rsp) + vmovdqa64 %zmm5,1216(%rsp) + vmovdqa64 %zmm6,%zmm14 + + addq $768,%r11 + subq $768,%r8 + cmpq $768,%r8 + jae .L_encrypt_big_nblocks_172 + +.L_no_more_big_nblocks_172: + + cmpq $512,%r8 + jae .L_encrypt_32_blocks_172 + + cmpq $256,%r8 + jae .L_encrypt_16_blocks_172 +.L_encrypt_0_blocks_ghash_32_172: + movl %r8d,%r10d + andl $~15,%r10d + movl $256,%ebx + subl %r10d,%ebx + vmovdqa64 768(%rsp),%zmm13 + vpxorq %zmm14,%zmm13,%zmm13 + vmovdqu64 0(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 832(%rsp),%zmm13 + vmovdqu64 64(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpxorq %zmm10,%zmm4,%zmm26 + vpxorq %zmm6,%zmm0,%zmm24 + vpxorq %zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 896(%rsp),%zmm13 + vmovdqu64 128(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 960(%rsp),%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + addl $256,%ebx + movl %r8d,%r10d + addl $15,%r10d + shrl $4,%r10d + je .L_last_num_blocks_is_0_181 + + cmpl $8,%r10d + je .L_last_num_blocks_is_8_181 + jb .L_last_num_blocks_is_7_1_181 + + + cmpl $12,%r10d + je .L_last_num_blocks_is_12_181 + jb .L_last_num_blocks_is_11_9_181 + + + cmpl $15,%r10d + je .L_last_num_blocks_is_15_181 + ja .L_last_num_blocks_is_16_181 + cmpl $14,%r10d + je .L_last_num_blocks_is_14_181 + jmp .L_last_num_blocks_is_13_181 + +.L_last_num_blocks_is_11_9_181: + + cmpl $10,%r10d + je .L_last_num_blocks_is_10_181 + ja .L_last_num_blocks_is_11_181 + jmp .L_last_num_blocks_is_9_181 + +.L_last_num_blocks_is_7_1_181: + cmpl $4,%r10d + je .L_last_num_blocks_is_4_181 + jb .L_last_num_blocks_is_3_1_181 + + cmpl $6,%r10d + ja .L_last_num_blocks_is_7_181 + je .L_last_num_blocks_is_6_181 + jmp .L_last_num_blocks_is_5_181 + +.L_last_num_blocks_is_3_1_181: + + cmpl $2,%r10d + ja .L_last_num_blocks_is_3_181 + je .L_last_num_blocks_is_2_181 +.L_last_num_blocks_is_1_181: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $255,%r15d + jae .L_16_blocks_overflow_182 + vpaddd %xmm28,%xmm2,%xmm0 + jmp .L_16_blocks_ok_182 + +.L_16_blocks_overflow_182: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %xmm29,%xmm0,%xmm0 +.L_16_blocks_ok_182: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vaesenclast %xmm30,%xmm0,%xmm0 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %xmm29,%xmm0,%xmm17 + vextracti32x4 $0,%zmm17,%xmm7 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_183 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_183 +.L_small_initial_partial_block_183: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm0 + + + vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 + vpslldq $8,%xmm3,%xmm3 + vpxorq %xmm3,%xmm25,%xmm3 + + + vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 + vpsrldq $4,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm4,%xmm14 + + + + + + + + + + + + + vpxorq %xmm7,%xmm14,%xmm14 + + jmp .L_after_reduction_183 +.L_small_initial_compute_done_183: +.L_after_reduction_183: + jmp .L_last_blocks_done_181 +.L_last_num_blocks_is_2_181: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $254,%r15d + jae .L_16_blocks_overflow_184 + vpaddd %ymm28,%ymm2,%ymm0 + jmp .L_16_blocks_ok_184 + +.L_16_blocks_overflow_184: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %ymm29,%ymm0,%ymm0 +.L_16_blocks_ok_184: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vaesenclast %ymm30,%ymm0,%ymm0 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %ymm29,%ymm0,%ymm17 + vextracti32x4 $1,%zmm17,%xmm7 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_185 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_185 +.L_small_initial_partial_block_185: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_185: + + orq %r8,%r8 + je .L_after_reduction_185 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_185: + jmp .L_last_blocks_done_181 +.L_last_num_blocks_is_3_181: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $253,%r15d + jae .L_16_blocks_overflow_186 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_186 + +.L_16_blocks_overflow_186: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_186: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vextracti32x4 $2,%zmm17,%xmm7 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_187 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_187 +.L_small_initial_partial_block_187: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_187: + + orq %r8,%r8 + je .L_after_reduction_187 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_187: + jmp .L_last_blocks_done_181 +.L_last_num_blocks_is_4_181: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $252,%r15d + jae .L_16_blocks_overflow_188 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_188 + +.L_16_blocks_overflow_188: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_188: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vextracti32x4 $3,%zmm17,%xmm7 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_189 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_189 +.L_small_initial_partial_block_189: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_189: + + orq %r8,%r8 + je .L_after_reduction_189 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_189: + jmp .L_last_blocks_done_181 +.L_last_num_blocks_is_5_181: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $251,%r15d + jae .L_16_blocks_overflow_190 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %xmm27,%xmm0,%xmm3 + jmp .L_16_blocks_ok_190 + +.L_16_blocks_overflow_190: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 +.L_16_blocks_ok_190: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %xmm30,%xmm3,%xmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %xmm19,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %xmm29,%xmm3,%xmm19 + vextracti32x4 $0,%zmm19,%xmm7 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_191 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_191 +.L_small_initial_partial_block_191: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_191: + + orq %r8,%r8 + je .L_after_reduction_191 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_191: + jmp .L_last_blocks_done_181 +.L_last_num_blocks_is_6_181: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $250,%r15d + jae .L_16_blocks_overflow_192 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %ymm27,%ymm0,%ymm3 + jmp .L_16_blocks_ok_192 + +.L_16_blocks_overflow_192: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 +.L_16_blocks_ok_192: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %ymm30,%ymm3,%ymm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %ymm29,%ymm3,%ymm19 + vextracti32x4 $1,%zmm19,%xmm7 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_193 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_193 +.L_small_initial_partial_block_193: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_193: + + orq %r8,%r8 + je .L_after_reduction_193 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_193: + jmp .L_last_blocks_done_181 +.L_last_num_blocks_is_7_181: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $249,%r15d + jae .L_16_blocks_overflow_194 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_194 + +.L_16_blocks_overflow_194: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_194: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vextracti32x4 $2,%zmm19,%xmm7 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_195 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_195 +.L_small_initial_partial_block_195: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_195: + + orq %r8,%r8 + je .L_after_reduction_195 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_195: + jmp .L_last_blocks_done_181 +.L_last_num_blocks_is_8_181: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $248,%r15d + jae .L_16_blocks_overflow_196 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_196 + +.L_16_blocks_overflow_196: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_196: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vextracti32x4 $3,%zmm19,%xmm7 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_197 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_197 +.L_small_initial_partial_block_197: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_197: + + orq %r8,%r8 + je .L_after_reduction_197 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_197: + jmp .L_last_blocks_done_181 +.L_last_num_blocks_is_9_181: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $247,%r15d + jae .L_16_blocks_overflow_198 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %xmm27,%xmm3,%xmm4 + jmp .L_16_blocks_ok_198 + +.L_16_blocks_overflow_198: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 +.L_16_blocks_ok_198: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %xmm30,%xmm4,%xmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %xmm20,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %xmm29,%xmm4,%xmm20 + vextracti32x4 $0,%zmm20,%xmm7 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_199 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_199 +.L_small_initial_partial_block_199: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_199: + + orq %r8,%r8 + je .L_after_reduction_199 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_199: + jmp .L_last_blocks_done_181 +.L_last_num_blocks_is_10_181: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $246,%r15d + jae .L_16_blocks_overflow_200 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %ymm27,%ymm3,%ymm4 + jmp .L_16_blocks_ok_200 + +.L_16_blocks_overflow_200: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 +.L_16_blocks_ok_200: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %ymm30,%ymm4,%ymm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %ymm20,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %ymm29,%ymm4,%ymm20 + vextracti32x4 $1,%zmm20,%xmm7 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_201 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_201 +.L_small_initial_partial_block_201: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_201: + + orq %r8,%r8 + je .L_after_reduction_201 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_201: + jmp .L_last_blocks_done_181 +.L_last_num_blocks_is_11_181: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $245,%r15d + jae .L_16_blocks_overflow_202 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_202 + +.L_16_blocks_overflow_202: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_202: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vextracti32x4 $2,%zmm20,%xmm7 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_203 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_203 +.L_small_initial_partial_block_203: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_203: + + orq %r8,%r8 + je .L_after_reduction_203 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_203: + jmp .L_last_blocks_done_181 +.L_last_num_blocks_is_12_181: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $244,%r15d + jae .L_16_blocks_overflow_204 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_204 + +.L_16_blocks_overflow_204: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_204: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vextracti32x4 $3,%zmm20,%xmm7 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_205 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_205 +.L_small_initial_partial_block_205: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_205: + + orq %r8,%r8 + je .L_after_reduction_205 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_205: + jmp .L_last_blocks_done_181 +.L_last_num_blocks_is_13_181: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $243,%r15d + jae .L_16_blocks_overflow_206 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %xmm27,%xmm4,%xmm5 + jmp .L_16_blocks_ok_206 + +.L_16_blocks_overflow_206: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 +.L_16_blocks_ok_206: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %xmm30,%xmm5,%xmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %xmm21,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %xmm29,%xmm5,%xmm21 + vextracti32x4 $0,%zmm21,%xmm7 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_207 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_207 +.L_small_initial_partial_block_207: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_207: + + orq %r8,%r8 + je .L_after_reduction_207 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_207: + jmp .L_last_blocks_done_181 +.L_last_num_blocks_is_14_181: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $242,%r15d + jae .L_16_blocks_overflow_208 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %ymm27,%ymm4,%ymm5 + jmp .L_16_blocks_ok_208 + +.L_16_blocks_overflow_208: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 +.L_16_blocks_ok_208: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %ymm30,%ymm5,%ymm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %ymm21,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %ymm29,%ymm5,%ymm21 + vextracti32x4 $1,%zmm21,%xmm7 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_209 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_209 +.L_small_initial_partial_block_209: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_209: + + orq %r8,%r8 + je .L_after_reduction_209 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_209: + jmp .L_last_blocks_done_181 +.L_last_num_blocks_is_15_181: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $241,%r15d + jae .L_16_blocks_overflow_210 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_210 + +.L_16_blocks_overflow_210: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_210: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %zmm29,%zmm5,%zmm21 + vextracti32x4 $2,%zmm21,%xmm7 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_211 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_211 +.L_small_initial_partial_block_211: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_211: + + orq %r8,%r8 + je .L_after_reduction_211 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_211: + jmp .L_last_blocks_done_181 +.L_last_num_blocks_is_16_181: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $240,%r15d + jae .L_16_blocks_overflow_212 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_212 + +.L_16_blocks_overflow_212: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_212: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %zmm29,%zmm5,%zmm21 + vextracti32x4 $3,%zmm21,%xmm7 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_213: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_213: + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_213: + jmp .L_last_blocks_done_181 +.L_last_num_blocks_is_0_181: + vmovdqa64 1024(%rsp),%zmm13 + vmovdqu64 0(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1088(%rsp),%zmm13 + vmovdqu64 64(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 1152(%rsp),%zmm13 + vmovdqu64 128(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1216(%rsp),%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + +.L_last_blocks_done_181: + vpshufb %xmm29,%xmm2,%xmm2 + jmp .L_ghash_done_172 +.L_encrypt_32_blocks_172: + cmpb $240,%r15b + jae .L_16_blocks_overflow_214 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_214 +.L_16_blocks_overflow_214: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_214: + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm15,%zmm10,%zmm26 + vpxorq %zmm12,%zmm6,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1) + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqa64 %zmm0,1280(%rsp) + vmovdqa64 %zmm3,1344(%rsp) + vmovdqa64 %zmm4,1408(%rsp) + vmovdqa64 %zmm5,1472(%rsp) + cmpb $240,%r15b + jae .L_16_blocks_overflow_215 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_215 +.L_16_blocks_overflow_215: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_215: + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 256(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 320(%rsp),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 384(%rsp),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 448(%rsp),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 256(%rcx,%r11,1),%zmm17 + vmovdqu8 320(%rcx,%r11,1),%zmm19 + vmovdqu8 384(%rcx,%r11,1),%zmm20 + vmovdqu8 448(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vpternlogq $0x96,%zmm12,%zmm6,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,256(%r10,%r11,1) + vmovdqu8 %zmm3,320(%r10,%r11,1) + vmovdqu8 %zmm4,384(%r10,%r11,1) + vmovdqu8 %zmm5,448(%r10,%r11,1) + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqa64 %zmm0,768(%rsp) + vmovdqa64 %zmm3,832(%rsp) + vmovdqa64 %zmm4,896(%rsp) + vmovdqa64 %zmm5,960(%rsp) + vmovdqa64 1280(%rsp),%zmm13 + vmovdqu64 512(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1344(%rsp),%zmm13 + vmovdqu64 576(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 1408(%rsp),%zmm13 + vmovdqu64 640(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1472(%rsp),%zmm13 + vmovdqu64 704(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + + subq $512,%r8 + addq $512,%r11 + movl %r8d,%r10d + andl $~15,%r10d + movl $512,%ebx + subl %r10d,%ebx + movl %r8d,%r10d + addl $15,%r10d + shrl $4,%r10d + je .L_last_num_blocks_is_0_216 + + cmpl $8,%r10d + je .L_last_num_blocks_is_8_216 + jb .L_last_num_blocks_is_7_1_216 + + + cmpl $12,%r10d + je .L_last_num_blocks_is_12_216 + jb .L_last_num_blocks_is_11_9_216 + + + cmpl $15,%r10d + je .L_last_num_blocks_is_15_216 + ja .L_last_num_blocks_is_16_216 + cmpl $14,%r10d + je .L_last_num_blocks_is_14_216 + jmp .L_last_num_blocks_is_13_216 + +.L_last_num_blocks_is_11_9_216: + + cmpl $10,%r10d + je .L_last_num_blocks_is_10_216 + ja .L_last_num_blocks_is_11_216 + jmp .L_last_num_blocks_is_9_216 + +.L_last_num_blocks_is_7_1_216: + cmpl $4,%r10d + je .L_last_num_blocks_is_4_216 + jb .L_last_num_blocks_is_3_1_216 + + cmpl $6,%r10d + ja .L_last_num_blocks_is_7_216 + je .L_last_num_blocks_is_6_216 + jmp .L_last_num_blocks_is_5_216 + +.L_last_num_blocks_is_3_1_216: + + cmpl $2,%r10d + ja .L_last_num_blocks_is_3_216 + je .L_last_num_blocks_is_2_216 +.L_last_num_blocks_is_1_216: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $255,%r15d + jae .L_16_blocks_overflow_217 + vpaddd %xmm28,%xmm2,%xmm0 + jmp .L_16_blocks_ok_217 + +.L_16_blocks_overflow_217: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %xmm29,%xmm0,%xmm0 +.L_16_blocks_ok_217: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vaesenclast %xmm30,%xmm0,%xmm0 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %xmm29,%xmm0,%xmm17 + vextracti32x4 $0,%zmm17,%xmm7 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_218 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_218 +.L_small_initial_partial_block_218: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm0 + + + vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 + vpslldq $8,%xmm3,%xmm3 + vpxorq %xmm3,%xmm25,%xmm3 + + + vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 + vpsrldq $4,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm4,%xmm14 + + + + + + + + + + + + + vpxorq %xmm7,%xmm14,%xmm14 + + jmp .L_after_reduction_218 +.L_small_initial_compute_done_218: +.L_after_reduction_218: + jmp .L_last_blocks_done_216 +.L_last_num_blocks_is_2_216: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $254,%r15d + jae .L_16_blocks_overflow_219 + vpaddd %ymm28,%ymm2,%ymm0 + jmp .L_16_blocks_ok_219 + +.L_16_blocks_overflow_219: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %ymm29,%ymm0,%ymm0 +.L_16_blocks_ok_219: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vaesenclast %ymm30,%ymm0,%ymm0 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %ymm29,%ymm0,%ymm17 + vextracti32x4 $1,%zmm17,%xmm7 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_220 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_220 +.L_small_initial_partial_block_220: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_220: + + orq %r8,%r8 + je .L_after_reduction_220 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_220: + jmp .L_last_blocks_done_216 +.L_last_num_blocks_is_3_216: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $253,%r15d + jae .L_16_blocks_overflow_221 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_221 + +.L_16_blocks_overflow_221: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_221: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vextracti32x4 $2,%zmm17,%xmm7 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_222 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_222 +.L_small_initial_partial_block_222: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_222: + + orq %r8,%r8 + je .L_after_reduction_222 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_222: + jmp .L_last_blocks_done_216 +.L_last_num_blocks_is_4_216: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $252,%r15d + jae .L_16_blocks_overflow_223 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_223 + +.L_16_blocks_overflow_223: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_223: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vextracti32x4 $3,%zmm17,%xmm7 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_224 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_224 +.L_small_initial_partial_block_224: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_224: + + orq %r8,%r8 + je .L_after_reduction_224 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_224: + jmp .L_last_blocks_done_216 +.L_last_num_blocks_is_5_216: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $251,%r15d + jae .L_16_blocks_overflow_225 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %xmm27,%xmm0,%xmm3 + jmp .L_16_blocks_ok_225 + +.L_16_blocks_overflow_225: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 +.L_16_blocks_ok_225: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %xmm30,%xmm3,%xmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %xmm19,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %xmm29,%xmm3,%xmm19 + vextracti32x4 $0,%zmm19,%xmm7 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_226 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_226 +.L_small_initial_partial_block_226: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_226: + + orq %r8,%r8 + je .L_after_reduction_226 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_226: + jmp .L_last_blocks_done_216 +.L_last_num_blocks_is_6_216: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $250,%r15d + jae .L_16_blocks_overflow_227 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %ymm27,%ymm0,%ymm3 + jmp .L_16_blocks_ok_227 + +.L_16_blocks_overflow_227: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 +.L_16_blocks_ok_227: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %ymm30,%ymm3,%ymm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %ymm29,%ymm3,%ymm19 + vextracti32x4 $1,%zmm19,%xmm7 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_228 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_228 +.L_small_initial_partial_block_228: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_228: + + orq %r8,%r8 + je .L_after_reduction_228 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_228: + jmp .L_last_blocks_done_216 +.L_last_num_blocks_is_7_216: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $249,%r15d + jae .L_16_blocks_overflow_229 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_229 + +.L_16_blocks_overflow_229: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_229: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vextracti32x4 $2,%zmm19,%xmm7 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_230 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_230 +.L_small_initial_partial_block_230: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_230: + + orq %r8,%r8 + je .L_after_reduction_230 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_230: + jmp .L_last_blocks_done_216 +.L_last_num_blocks_is_8_216: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $248,%r15d + jae .L_16_blocks_overflow_231 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_231 + +.L_16_blocks_overflow_231: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_231: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vextracti32x4 $3,%zmm19,%xmm7 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_232 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_232 +.L_small_initial_partial_block_232: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_232: + + orq %r8,%r8 + je .L_after_reduction_232 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_232: + jmp .L_last_blocks_done_216 +.L_last_num_blocks_is_9_216: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $247,%r15d + jae .L_16_blocks_overflow_233 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %xmm27,%xmm3,%xmm4 + jmp .L_16_blocks_ok_233 + +.L_16_blocks_overflow_233: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 +.L_16_blocks_ok_233: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %xmm30,%xmm4,%xmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %xmm20,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %xmm29,%xmm4,%xmm20 + vextracti32x4 $0,%zmm20,%xmm7 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_234 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_234 +.L_small_initial_partial_block_234: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_234: + + orq %r8,%r8 + je .L_after_reduction_234 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_234: + jmp .L_last_blocks_done_216 +.L_last_num_blocks_is_10_216: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $246,%r15d + jae .L_16_blocks_overflow_235 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %ymm27,%ymm3,%ymm4 + jmp .L_16_blocks_ok_235 + +.L_16_blocks_overflow_235: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 +.L_16_blocks_ok_235: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %ymm30,%ymm4,%ymm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %ymm20,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %ymm29,%ymm4,%ymm20 + vextracti32x4 $1,%zmm20,%xmm7 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_236 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_236 +.L_small_initial_partial_block_236: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_236: + + orq %r8,%r8 + je .L_after_reduction_236 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_236: + jmp .L_last_blocks_done_216 +.L_last_num_blocks_is_11_216: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $245,%r15d + jae .L_16_blocks_overflow_237 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_237 + +.L_16_blocks_overflow_237: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_237: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vextracti32x4 $2,%zmm20,%xmm7 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_238 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_238 +.L_small_initial_partial_block_238: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_238: + + orq %r8,%r8 + je .L_after_reduction_238 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_238: + jmp .L_last_blocks_done_216 +.L_last_num_blocks_is_12_216: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $244,%r15d + jae .L_16_blocks_overflow_239 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_239 + +.L_16_blocks_overflow_239: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_239: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vextracti32x4 $3,%zmm20,%xmm7 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_240 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_240 +.L_small_initial_partial_block_240: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_240: + + orq %r8,%r8 + je .L_after_reduction_240 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_240: + jmp .L_last_blocks_done_216 +.L_last_num_blocks_is_13_216: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $243,%r15d + jae .L_16_blocks_overflow_241 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %xmm27,%xmm4,%xmm5 + jmp .L_16_blocks_ok_241 + +.L_16_blocks_overflow_241: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 +.L_16_blocks_ok_241: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %xmm30,%xmm5,%xmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %xmm21,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %xmm29,%xmm5,%xmm21 + vextracti32x4 $0,%zmm21,%xmm7 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_242 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_242 +.L_small_initial_partial_block_242: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_242: + + orq %r8,%r8 + je .L_after_reduction_242 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_242: + jmp .L_last_blocks_done_216 +.L_last_num_blocks_is_14_216: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $242,%r15d + jae .L_16_blocks_overflow_243 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %ymm27,%ymm4,%ymm5 + jmp .L_16_blocks_ok_243 + +.L_16_blocks_overflow_243: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 +.L_16_blocks_ok_243: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %ymm30,%ymm5,%ymm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %ymm21,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %ymm29,%ymm5,%ymm21 + vextracti32x4 $1,%zmm21,%xmm7 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_244 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_244 +.L_small_initial_partial_block_244: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_244: + + orq %r8,%r8 + je .L_after_reduction_244 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_244: + jmp .L_last_blocks_done_216 +.L_last_num_blocks_is_15_216: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $241,%r15d + jae .L_16_blocks_overflow_245 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_245 + +.L_16_blocks_overflow_245: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_245: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %zmm29,%zmm5,%zmm21 + vextracti32x4 $2,%zmm21,%xmm7 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_246 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_246 +.L_small_initial_partial_block_246: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_246: + + orq %r8,%r8 + je .L_after_reduction_246 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_246: + jmp .L_last_blocks_done_216 +.L_last_num_blocks_is_16_216: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $240,%r15d + jae .L_16_blocks_overflow_247 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_247 + +.L_16_blocks_overflow_247: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_247: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %zmm29,%zmm5,%zmm21 + vextracti32x4 $3,%zmm21,%xmm7 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_248: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_248: + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_248: + jmp .L_last_blocks_done_216 +.L_last_num_blocks_is_0_216: + vmovdqa64 768(%rsp),%zmm13 + vpxorq %zmm14,%zmm13,%zmm13 + vmovdqu64 0(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 832(%rsp),%zmm13 + vmovdqu64 64(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpxorq %zmm10,%zmm4,%zmm26 + vpxorq %zmm6,%zmm0,%zmm24 + vpxorq %zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 896(%rsp),%zmm13 + vmovdqu64 128(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 960(%rsp),%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + +.L_last_blocks_done_216: + vpshufb %xmm29,%xmm2,%xmm2 + jmp .L_ghash_done_172 +.L_encrypt_16_blocks_172: + cmpb $240,%r15b + jae .L_16_blocks_overflow_249 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_249 +.L_16_blocks_overflow_249: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_249: + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm15,%zmm10,%zmm26 + vpxorq %zmm12,%zmm6,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1) + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqa64 %zmm0,1280(%rsp) + vmovdqa64 %zmm3,1344(%rsp) + vmovdqa64 %zmm4,1408(%rsp) + vmovdqa64 %zmm5,1472(%rsp) + vmovdqa64 1024(%rsp),%zmm13 + vmovdqu64 256(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1088(%rsp),%zmm13 + vmovdqu64 320(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 1152(%rsp),%zmm13 + vmovdqu64 384(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1216(%rsp),%zmm13 + vmovdqu64 448(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + subq $256,%r8 + addq $256,%r11 + movl %r8d,%r10d + addl $15,%r10d + shrl $4,%r10d + je .L_last_num_blocks_is_0_250 + + cmpl $8,%r10d + je .L_last_num_blocks_is_8_250 + jb .L_last_num_blocks_is_7_1_250 + + + cmpl $12,%r10d + je .L_last_num_blocks_is_12_250 + jb .L_last_num_blocks_is_11_9_250 + + + cmpl $15,%r10d + je .L_last_num_blocks_is_15_250 + ja .L_last_num_blocks_is_16_250 + cmpl $14,%r10d + je .L_last_num_blocks_is_14_250 + jmp .L_last_num_blocks_is_13_250 + +.L_last_num_blocks_is_11_9_250: + + cmpl $10,%r10d + je .L_last_num_blocks_is_10_250 + ja .L_last_num_blocks_is_11_250 + jmp .L_last_num_blocks_is_9_250 + +.L_last_num_blocks_is_7_1_250: + cmpl $4,%r10d + je .L_last_num_blocks_is_4_250 + jb .L_last_num_blocks_is_3_1_250 + + cmpl $6,%r10d + ja .L_last_num_blocks_is_7_250 + je .L_last_num_blocks_is_6_250 + jmp .L_last_num_blocks_is_5_250 + +.L_last_num_blocks_is_3_1_250: + + cmpl $2,%r10d + ja .L_last_num_blocks_is_3_250 + je .L_last_num_blocks_is_2_250 +.L_last_num_blocks_is_1_250: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $255,%r15d + jae .L_16_blocks_overflow_251 + vpaddd %xmm28,%xmm2,%xmm0 + jmp .L_16_blocks_ok_251 + +.L_16_blocks_overflow_251: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %xmm29,%xmm0,%xmm0 +.L_16_blocks_ok_251: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $0,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %xmm31,%xmm0,%xmm0 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %xmm30,%xmm0,%xmm0 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %xmm29,%xmm0,%xmm17 + vextracti32x4 $0,%zmm17,%xmm7 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_252 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_252 +.L_small_initial_partial_block_252: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + + + + + + + + + + + + vpxorq %xmm7,%xmm14,%xmm14 + + jmp .L_after_reduction_252 +.L_small_initial_compute_done_252: +.L_after_reduction_252: + jmp .L_last_blocks_done_250 +.L_last_num_blocks_is_2_250: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $254,%r15d + jae .L_16_blocks_overflow_253 + vpaddd %ymm28,%ymm2,%ymm0 + jmp .L_16_blocks_ok_253 + +.L_16_blocks_overflow_253: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %ymm29,%ymm0,%ymm0 +.L_16_blocks_ok_253: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $1,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %ymm31,%ymm0,%ymm0 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %ymm30,%ymm0,%ymm0 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %ymm29,%ymm0,%ymm17 + vextracti32x4 $1,%zmm17,%xmm7 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_254 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_254 +.L_small_initial_partial_block_254: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_254: + + orq %r8,%r8 + je .L_after_reduction_254 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_254: + jmp .L_last_blocks_done_250 +.L_last_num_blocks_is_3_250: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $253,%r15d + jae .L_16_blocks_overflow_255 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_255 + +.L_16_blocks_overflow_255: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_255: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $2,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vextracti32x4 $2,%zmm17,%xmm7 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_256 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_256 +.L_small_initial_partial_block_256: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_256: + + orq %r8,%r8 + je .L_after_reduction_256 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_256: + jmp .L_last_blocks_done_250 +.L_last_num_blocks_is_4_250: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $252,%r15d + jae .L_16_blocks_overflow_257 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_257 + +.L_16_blocks_overflow_257: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_257: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $3,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vextracti32x4 $3,%zmm17,%xmm7 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_258 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_258 +.L_small_initial_partial_block_258: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_258: + + orq %r8,%r8 + je .L_after_reduction_258 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_258: + jmp .L_last_blocks_done_250 +.L_last_num_blocks_is_5_250: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $251,%r15d + jae .L_16_blocks_overflow_259 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %xmm27,%xmm0,%xmm3 + jmp .L_16_blocks_ok_259 + +.L_16_blocks_overflow_259: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 +.L_16_blocks_ok_259: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $0,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %xmm30,%xmm3,%xmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %xmm19,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %xmm29,%xmm3,%xmm19 + vextracti32x4 $0,%zmm19,%xmm7 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_260 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_260 +.L_small_initial_partial_block_260: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_260: + + orq %r8,%r8 + je .L_after_reduction_260 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_260: + jmp .L_last_blocks_done_250 +.L_last_num_blocks_is_6_250: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $250,%r15d + jae .L_16_blocks_overflow_261 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %ymm27,%ymm0,%ymm3 + jmp .L_16_blocks_ok_261 + +.L_16_blocks_overflow_261: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 +.L_16_blocks_ok_261: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $1,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %ymm30,%ymm3,%ymm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %ymm29,%ymm3,%ymm19 + vextracti32x4 $1,%zmm19,%xmm7 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_262 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_262 +.L_small_initial_partial_block_262: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_262: + + orq %r8,%r8 + je .L_after_reduction_262 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_262: + jmp .L_last_blocks_done_250 +.L_last_num_blocks_is_7_250: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $249,%r15d + jae .L_16_blocks_overflow_263 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_263 + +.L_16_blocks_overflow_263: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_263: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $2,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vextracti32x4 $2,%zmm19,%xmm7 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_264 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_264 +.L_small_initial_partial_block_264: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_264: + + orq %r8,%r8 + je .L_after_reduction_264 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_264: + jmp .L_last_blocks_done_250 +.L_last_num_blocks_is_8_250: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $248,%r15d + jae .L_16_blocks_overflow_265 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_265 + +.L_16_blocks_overflow_265: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_265: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $3,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vextracti32x4 $3,%zmm19,%xmm7 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_266 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_266 +.L_small_initial_partial_block_266: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_266: + + orq %r8,%r8 + je .L_after_reduction_266 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_266: + jmp .L_last_blocks_done_250 +.L_last_num_blocks_is_9_250: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $247,%r15d + jae .L_16_blocks_overflow_267 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %xmm27,%xmm3,%xmm4 + jmp .L_16_blocks_ok_267 + +.L_16_blocks_overflow_267: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 +.L_16_blocks_ok_267: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $0,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %xmm30,%xmm4,%xmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %xmm20,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %xmm29,%xmm4,%xmm20 + vextracti32x4 $0,%zmm20,%xmm7 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_268 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_268 +.L_small_initial_partial_block_268: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_268: + + orq %r8,%r8 + je .L_after_reduction_268 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_268: + jmp .L_last_blocks_done_250 +.L_last_num_blocks_is_10_250: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $246,%r15d + jae .L_16_blocks_overflow_269 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %ymm27,%ymm3,%ymm4 + jmp .L_16_blocks_ok_269 + +.L_16_blocks_overflow_269: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 +.L_16_blocks_ok_269: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $1,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %ymm30,%ymm4,%ymm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %ymm20,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %ymm29,%ymm4,%ymm20 + vextracti32x4 $1,%zmm20,%xmm7 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_270 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_270 +.L_small_initial_partial_block_270: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_270: + + orq %r8,%r8 + je .L_after_reduction_270 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_270: + jmp .L_last_blocks_done_250 +.L_last_num_blocks_is_11_250: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $245,%r15d + jae .L_16_blocks_overflow_271 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_271 + +.L_16_blocks_overflow_271: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_271: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $2,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vextracti32x4 $2,%zmm20,%xmm7 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_272 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_272 +.L_small_initial_partial_block_272: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_272: + + orq %r8,%r8 + je .L_after_reduction_272 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_272: + jmp .L_last_blocks_done_250 +.L_last_num_blocks_is_12_250: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $244,%r15d + jae .L_16_blocks_overflow_273 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_273 + +.L_16_blocks_overflow_273: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_273: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $3,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vextracti32x4 $3,%zmm20,%xmm7 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_274 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_274 +.L_small_initial_partial_block_274: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_274: + + orq %r8,%r8 + je .L_after_reduction_274 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_274: + jmp .L_last_blocks_done_250 +.L_last_num_blocks_is_13_250: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $243,%r15d + jae .L_16_blocks_overflow_275 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %xmm27,%xmm4,%xmm5 + jmp .L_16_blocks_ok_275 + +.L_16_blocks_overflow_275: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 +.L_16_blocks_ok_275: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $0,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %xmm30,%xmm5,%xmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %xmm21,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %xmm29,%xmm5,%xmm21 + vextracti32x4 $0,%zmm21,%xmm7 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_276 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_276 +.L_small_initial_partial_block_276: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_276: + + orq %r8,%r8 + je .L_after_reduction_276 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_276: + jmp .L_last_blocks_done_250 +.L_last_num_blocks_is_14_250: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $242,%r15d + jae .L_16_blocks_overflow_277 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %ymm27,%ymm4,%ymm5 + jmp .L_16_blocks_ok_277 + +.L_16_blocks_overflow_277: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 +.L_16_blocks_ok_277: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $1,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %ymm30,%ymm5,%ymm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %ymm21,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %ymm29,%ymm5,%ymm21 + vextracti32x4 $1,%zmm21,%xmm7 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_278 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_278 +.L_small_initial_partial_block_278: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_278: + + orq %r8,%r8 + je .L_after_reduction_278 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_278: + jmp .L_last_blocks_done_250 +.L_last_num_blocks_is_15_250: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $241,%r15d + jae .L_16_blocks_overflow_279 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_279 + +.L_16_blocks_overflow_279: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_279: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $2,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %zmm29,%zmm5,%zmm21 + vextracti32x4 $2,%zmm21,%xmm7 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_280 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_280 +.L_small_initial_partial_block_280: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_280: + + orq %r8,%r8 + je .L_after_reduction_280 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_280: + jmp .L_last_blocks_done_250 +.L_last_num_blocks_is_16_250: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $240,%r15d + jae .L_16_blocks_overflow_281 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_281 + +.L_16_blocks_overflow_281: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_281: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $3,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %zmm29,%zmm5,%zmm21 + vextracti32x4 $3,%zmm21,%xmm7 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_282: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_282: + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_282: + jmp .L_last_blocks_done_250 +.L_last_num_blocks_is_0_250: + vmovdqa64 1280(%rsp),%zmm13 + vmovdqu64 512(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1344(%rsp),%zmm13 + vmovdqu64 576(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 1408(%rsp),%zmm13 + vmovdqu64 640(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1472(%rsp),%zmm13 + vmovdqu64 704(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + +.L_last_blocks_done_250: + vpshufb %xmm29,%xmm2,%xmm2 + jmp .L_ghash_done_172 + +.L_message_below_32_blocks_172: + + + subq $256,%r8 + addq $256,%r11 + movl %r8d,%r10d + testq %r14,%r14 + jnz .L_skip_hkeys_precomputation_283 + vmovdqu64 640(%rsp),%zmm3 + + + vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 + + vmovdqu64 576(%rsp),%zmm4 + vmovdqu64 512(%rsp),%zmm5 + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,448(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,384(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,320(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,256(%rsp) +.L_skip_hkeys_precomputation_283: + movq $1,%r14 + andl $~15,%r10d + movl $512,%ebx + subl %r10d,%ebx + movl %r8d,%r10d + addl $15,%r10d + shrl $4,%r10d + je .L_last_num_blocks_is_0_284 + + cmpl $8,%r10d + je .L_last_num_blocks_is_8_284 + jb .L_last_num_blocks_is_7_1_284 + + + cmpl $12,%r10d + je .L_last_num_blocks_is_12_284 + jb .L_last_num_blocks_is_11_9_284 + + + cmpl $15,%r10d + je .L_last_num_blocks_is_15_284 + ja .L_last_num_blocks_is_16_284 + cmpl $14,%r10d + je .L_last_num_blocks_is_14_284 + jmp .L_last_num_blocks_is_13_284 + +.L_last_num_blocks_is_11_9_284: + + cmpl $10,%r10d + je .L_last_num_blocks_is_10_284 + ja .L_last_num_blocks_is_11_284 + jmp .L_last_num_blocks_is_9_284 + +.L_last_num_blocks_is_7_1_284: + cmpl $4,%r10d + je .L_last_num_blocks_is_4_284 + jb .L_last_num_blocks_is_3_1_284 + + cmpl $6,%r10d + ja .L_last_num_blocks_is_7_284 + je .L_last_num_blocks_is_6_284 + jmp .L_last_num_blocks_is_5_284 + +.L_last_num_blocks_is_3_1_284: + + cmpl $2,%r10d + ja .L_last_num_blocks_is_3_284 + je .L_last_num_blocks_is_2_284 +.L_last_num_blocks_is_1_284: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $255,%r15d + jae .L_16_blocks_overflow_285 + vpaddd %xmm28,%xmm2,%xmm0 + jmp .L_16_blocks_ok_285 + +.L_16_blocks_overflow_285: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %xmm29,%xmm0,%xmm0 +.L_16_blocks_ok_285: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vaesenclast %xmm30,%xmm0,%xmm0 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %xmm29,%xmm0,%xmm17 + vextracti32x4 $0,%zmm17,%xmm7 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_286 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_286 +.L_small_initial_partial_block_286: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm0 + + + vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 + vpslldq $8,%xmm3,%xmm3 + vpxorq %xmm3,%xmm25,%xmm3 + + + vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 + vpsrldq $4,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm4,%xmm14 + + + + + + + + + + + + + vpxorq %xmm7,%xmm14,%xmm14 + + jmp .L_after_reduction_286 +.L_small_initial_compute_done_286: +.L_after_reduction_286: + jmp .L_last_blocks_done_284 +.L_last_num_blocks_is_2_284: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $254,%r15d + jae .L_16_blocks_overflow_287 + vpaddd %ymm28,%ymm2,%ymm0 + jmp .L_16_blocks_ok_287 + +.L_16_blocks_overflow_287: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %ymm29,%ymm0,%ymm0 +.L_16_blocks_ok_287: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vaesenclast %ymm30,%ymm0,%ymm0 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %ymm29,%ymm0,%ymm17 + vextracti32x4 $1,%zmm17,%xmm7 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_288 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_288 +.L_small_initial_partial_block_288: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_288: + + orq %r8,%r8 + je .L_after_reduction_288 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_288: + jmp .L_last_blocks_done_284 +.L_last_num_blocks_is_3_284: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $253,%r15d + jae .L_16_blocks_overflow_289 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_289 + +.L_16_blocks_overflow_289: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_289: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vextracti32x4 $2,%zmm17,%xmm7 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_290 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_290 +.L_small_initial_partial_block_290: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_290: + + orq %r8,%r8 + je .L_after_reduction_290 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_290: + jmp .L_last_blocks_done_284 +.L_last_num_blocks_is_4_284: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $252,%r15d + jae .L_16_blocks_overflow_291 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_291 + +.L_16_blocks_overflow_291: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_291: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vextracti32x4 $3,%zmm17,%xmm7 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_292 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_292 +.L_small_initial_partial_block_292: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_292: + + orq %r8,%r8 + je .L_after_reduction_292 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_292: + jmp .L_last_blocks_done_284 +.L_last_num_blocks_is_5_284: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $251,%r15d + jae .L_16_blocks_overflow_293 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %xmm27,%xmm0,%xmm3 + jmp .L_16_blocks_ok_293 + +.L_16_blocks_overflow_293: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 +.L_16_blocks_ok_293: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %xmm30,%xmm3,%xmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %xmm19,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %xmm29,%xmm3,%xmm19 + vextracti32x4 $0,%zmm19,%xmm7 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_294 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_294 +.L_small_initial_partial_block_294: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_294: + + orq %r8,%r8 + je .L_after_reduction_294 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_294: + jmp .L_last_blocks_done_284 +.L_last_num_blocks_is_6_284: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $250,%r15d + jae .L_16_blocks_overflow_295 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %ymm27,%ymm0,%ymm3 + jmp .L_16_blocks_ok_295 + +.L_16_blocks_overflow_295: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 +.L_16_blocks_ok_295: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %ymm30,%ymm3,%ymm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %ymm29,%ymm3,%ymm19 + vextracti32x4 $1,%zmm19,%xmm7 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_296 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_296 +.L_small_initial_partial_block_296: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_296: + + orq %r8,%r8 + je .L_after_reduction_296 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_296: + jmp .L_last_blocks_done_284 +.L_last_num_blocks_is_7_284: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $249,%r15d + jae .L_16_blocks_overflow_297 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_297 + +.L_16_blocks_overflow_297: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_297: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vextracti32x4 $2,%zmm19,%xmm7 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_298 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_298 +.L_small_initial_partial_block_298: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_298: + + orq %r8,%r8 + je .L_after_reduction_298 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_298: + jmp .L_last_blocks_done_284 +.L_last_num_blocks_is_8_284: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $248,%r15d + jae .L_16_blocks_overflow_299 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_299 + +.L_16_blocks_overflow_299: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_299: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vextracti32x4 $3,%zmm19,%xmm7 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_300 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_300 +.L_small_initial_partial_block_300: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_300: + + orq %r8,%r8 + je .L_after_reduction_300 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_300: + jmp .L_last_blocks_done_284 +.L_last_num_blocks_is_9_284: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $247,%r15d + jae .L_16_blocks_overflow_301 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %xmm27,%xmm3,%xmm4 + jmp .L_16_blocks_ok_301 + +.L_16_blocks_overflow_301: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 +.L_16_blocks_ok_301: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %xmm30,%xmm4,%xmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %xmm20,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %xmm29,%xmm4,%xmm20 + vextracti32x4 $0,%zmm20,%xmm7 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_302 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_302 +.L_small_initial_partial_block_302: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_302: + + orq %r8,%r8 + je .L_after_reduction_302 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_302: + jmp .L_last_blocks_done_284 +.L_last_num_blocks_is_10_284: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $246,%r15d + jae .L_16_blocks_overflow_303 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %ymm27,%ymm3,%ymm4 + jmp .L_16_blocks_ok_303 + +.L_16_blocks_overflow_303: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 +.L_16_blocks_ok_303: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %ymm30,%ymm4,%ymm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %ymm20,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %ymm29,%ymm4,%ymm20 + vextracti32x4 $1,%zmm20,%xmm7 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_304 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_304 +.L_small_initial_partial_block_304: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_304: + + orq %r8,%r8 + je .L_after_reduction_304 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_304: + jmp .L_last_blocks_done_284 +.L_last_num_blocks_is_11_284: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $245,%r15d + jae .L_16_blocks_overflow_305 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_305 + +.L_16_blocks_overflow_305: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_305: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vextracti32x4 $2,%zmm20,%xmm7 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_306 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_306 +.L_small_initial_partial_block_306: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_306: + + orq %r8,%r8 + je .L_after_reduction_306 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_306: + jmp .L_last_blocks_done_284 +.L_last_num_blocks_is_12_284: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $244,%r15d + jae .L_16_blocks_overflow_307 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_307 + +.L_16_blocks_overflow_307: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_307: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vextracti32x4 $3,%zmm20,%xmm7 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_308 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_308 +.L_small_initial_partial_block_308: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_308: + + orq %r8,%r8 + je .L_after_reduction_308 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_308: + jmp .L_last_blocks_done_284 +.L_last_num_blocks_is_13_284: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $243,%r15d + jae .L_16_blocks_overflow_309 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %xmm27,%xmm4,%xmm5 + jmp .L_16_blocks_ok_309 + +.L_16_blocks_overflow_309: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 +.L_16_blocks_ok_309: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %xmm30,%xmm5,%xmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %xmm21,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %xmm29,%xmm5,%xmm21 + vextracti32x4 $0,%zmm21,%xmm7 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_310 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_310 +.L_small_initial_partial_block_310: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_310: + + orq %r8,%r8 + je .L_after_reduction_310 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_310: + jmp .L_last_blocks_done_284 +.L_last_num_blocks_is_14_284: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $242,%r15d + jae .L_16_blocks_overflow_311 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %ymm27,%ymm4,%ymm5 + jmp .L_16_blocks_ok_311 + +.L_16_blocks_overflow_311: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 +.L_16_blocks_ok_311: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %ymm30,%ymm5,%ymm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %ymm21,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %ymm29,%ymm5,%ymm21 + vextracti32x4 $1,%zmm21,%xmm7 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_312 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_312 +.L_small_initial_partial_block_312: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_312: + + orq %r8,%r8 + je .L_after_reduction_312 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_312: + jmp .L_last_blocks_done_284 +.L_last_num_blocks_is_15_284: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $241,%r15d + jae .L_16_blocks_overflow_313 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_313 + +.L_16_blocks_overflow_313: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_313: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %zmm29,%zmm5,%zmm21 + vextracti32x4 $2,%zmm21,%xmm7 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_314 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_314 +.L_small_initial_partial_block_314: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_314: + + orq %r8,%r8 + je .L_after_reduction_314 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_314: + jmp .L_last_blocks_done_284 +.L_last_num_blocks_is_16_284: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $240,%r15d + jae .L_16_blocks_overflow_315 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_315 + +.L_16_blocks_overflow_315: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_315: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %zmm29,%zmm5,%zmm21 + vextracti32x4 $3,%zmm21,%xmm7 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_316: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_316: + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_316: + jmp .L_last_blocks_done_284 +.L_last_num_blocks_is_0_284: + vmovdqa64 768(%rsp),%zmm13 + vpxorq %zmm14,%zmm13,%zmm13 + vmovdqu64 0(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 832(%rsp),%zmm13 + vmovdqu64 64(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpxorq %zmm10,%zmm4,%zmm26 + vpxorq %zmm6,%zmm0,%zmm24 + vpxorq %zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 896(%rsp),%zmm13 + vmovdqu64 128(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 960(%rsp),%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + +.L_last_blocks_done_284: + vpshufb %xmm29,%xmm2,%xmm2 + jmp .L_ghash_done_172 + +.L_message_below_equal_16_blocks_172: + + + movl %r8d,%r12d + addl $15,%r12d + shrl $4,%r12d + cmpq $8,%r12 + je .L_small_initial_num_blocks_is_8_317 + jl .L_small_initial_num_blocks_is_7_1_317 + + + cmpq $12,%r12 + je .L_small_initial_num_blocks_is_12_317 + jl .L_small_initial_num_blocks_is_11_9_317 + + + cmpq $16,%r12 + je .L_small_initial_num_blocks_is_16_317 + cmpq $15,%r12 + je .L_small_initial_num_blocks_is_15_317 + cmpq $14,%r12 + je .L_small_initial_num_blocks_is_14_317 + jmp .L_small_initial_num_blocks_is_13_317 + +.L_small_initial_num_blocks_is_11_9_317: + + cmpq $11,%r12 + je .L_small_initial_num_blocks_is_11_317 + cmpq $10,%r12 + je .L_small_initial_num_blocks_is_10_317 + jmp .L_small_initial_num_blocks_is_9_317 + +.L_small_initial_num_blocks_is_7_1_317: + cmpq $4,%r12 + je .L_small_initial_num_blocks_is_4_317 + jl .L_small_initial_num_blocks_is_3_1_317 + + cmpq $7,%r12 + je .L_small_initial_num_blocks_is_7_317 + cmpq $6,%r12 + je .L_small_initial_num_blocks_is_6_317 + jmp .L_small_initial_num_blocks_is_5_317 + +.L_small_initial_num_blocks_is_3_1_317: + + cmpq $3,%r12 + je .L_small_initial_num_blocks_is_3_317 + cmpq $2,%r12 + je .L_small_initial_num_blocks_is_2_317 + + + + + +.L_small_initial_num_blocks_is_1_317: + vmovdqa64 SHUF_MASK(%rip),%xmm29 + vpaddd ONE(%rip),%xmm2,%xmm0 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $0,%zmm0,%xmm2 + vpshufb %xmm29,%xmm0,%xmm0 + vmovdqu8 0(%rcx,%r11,1),%xmm6{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %xmm15,%xmm0,%xmm0 + vpxorq %xmm6,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm12 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %xmm29,%xmm0,%xmm6 + vextracti32x4 $0,%zmm6,%xmm13 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_318 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_318 +.L_small_initial_partial_block_318: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + + + + + + + + + + + + vpxorq %xmm13,%xmm14,%xmm14 + + jmp .L_after_reduction_318 +.L_small_initial_compute_done_318: +.L_after_reduction_318: + jmp .L_small_initial_blocks_encrypted_317 +.L_small_initial_num_blocks_is_2_317: + vmovdqa64 SHUF_MASK(%rip),%ymm29 + vshufi64x2 $0,%ymm2,%ymm2,%ymm0 + vpaddd ddq_add_1234(%rip),%ymm0,%ymm0 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $1,%zmm0,%xmm2 + vpshufb %ymm29,%ymm0,%ymm0 + vmovdqu8 0(%rcx,%r11,1),%ymm6{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %ymm15,%ymm0,%ymm0 + vpxorq %ymm6,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm12 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %ymm29,%ymm0,%ymm6 + vextracti32x4 $1,%zmm6,%xmm13 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_319 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_319 +.L_small_initial_partial_block_319: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_319: + + orq %r8,%r8 + je .L_after_reduction_319 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_319: + jmp .L_small_initial_blocks_encrypted_317 +.L_small_initial_num_blocks_is_3_317: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $2,%zmm0,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vpxorq %zmm6,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vextracti32x4 $2,%zmm6,%xmm13 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_320 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_320 +.L_small_initial_partial_block_320: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_320: + + orq %r8,%r8 + je .L_after_reduction_320 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_320: + jmp .L_small_initial_blocks_encrypted_317 +.L_small_initial_num_blocks_is_4_317: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $3,%zmm0,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vpxorq %zmm6,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vextracti32x4 $3,%zmm6,%xmm13 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_321 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_321 +.L_small_initial_partial_block_321: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_321: + + orq %r8,%r8 + je .L_after_reduction_321 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_321: + jmp .L_small_initial_blocks_encrypted_317 +.L_small_initial_num_blocks_is_5_317: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $64,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $0,%zmm3,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%xmm7{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %xmm15,%xmm3,%xmm3 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %xmm7,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %xmm29,%xmm3,%xmm7 + vextracti32x4 $0,%zmm7,%xmm13 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_322 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_322 +.L_small_initial_partial_block_322: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_322: + + orq %r8,%r8 + je .L_after_reduction_322 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_322: + jmp .L_small_initial_blocks_encrypted_317 +.L_small_initial_num_blocks_is_6_317: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $64,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $1,%zmm3,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%ymm7{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %ymm15,%ymm3,%ymm3 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %ymm7,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %ymm29,%ymm3,%ymm7 + vextracti32x4 $1,%zmm7,%xmm13 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_323 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_323 +.L_small_initial_partial_block_323: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_323: + + orq %r8,%r8 + je .L_after_reduction_323 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_323: + jmp .L_small_initial_blocks_encrypted_317 +.L_small_initial_num_blocks_is_7_317: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $64,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $2,%zmm3,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vextracti32x4 $2,%zmm7,%xmm13 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_324 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_324 +.L_small_initial_partial_block_324: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_324: + + orq %r8,%r8 + je .L_after_reduction_324 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_324: + jmp .L_small_initial_blocks_encrypted_317 +.L_small_initial_num_blocks_is_8_317: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $64,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $3,%zmm3,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vextracti32x4 $3,%zmm7,%xmm13 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_325 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 224(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_325 +.L_small_initial_partial_block_325: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_325: + + orq %r8,%r8 + je .L_after_reduction_325 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_325: + jmp .L_small_initial_blocks_encrypted_317 +.L_small_initial_num_blocks_is_9_317: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $128,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $0,%zmm4,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%xmm10{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %xmm15,%xmm4,%xmm4 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %xmm10,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vpshufb %xmm29,%xmm4,%xmm10 + vextracti32x4 $0,%zmm10,%xmm13 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_326 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 208(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_326 +.L_small_initial_partial_block_326: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 224(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_326: + + orq %r8,%r8 + je .L_after_reduction_326 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_326: + jmp .L_small_initial_blocks_encrypted_317 +.L_small_initial_num_blocks_is_10_317: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $128,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $1,%zmm4,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%ymm10{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %ymm15,%ymm4,%ymm4 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %ymm10,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vpshufb %ymm29,%ymm4,%ymm10 + vextracti32x4 $1,%zmm10,%xmm13 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_327 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 192(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_327 +.L_small_initial_partial_block_327: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 208(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_327: + + orq %r8,%r8 + je .L_after_reduction_327 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_327: + jmp .L_small_initial_blocks_encrypted_317 +.L_small_initial_num_blocks_is_11_317: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $128,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $2,%zmm4,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vpshufb %zmm29,%zmm4,%zmm10 + vextracti32x4 $2,%zmm10,%xmm13 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_328 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 176(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_328 +.L_small_initial_partial_block_328: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 192(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_328: + + orq %r8,%r8 + je .L_after_reduction_328 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_328: + jmp .L_small_initial_blocks_encrypted_317 +.L_small_initial_num_blocks_is_12_317: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $128,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $3,%zmm4,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vpshufb %zmm29,%zmm4,%zmm10 + vextracti32x4 $3,%zmm10,%xmm13 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_329 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 160(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 224(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_329 +.L_small_initial_partial_block_329: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 176(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_329: + + orq %r8,%r8 + je .L_after_reduction_329 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_329: + jmp .L_small_initial_blocks_encrypted_317 +.L_small_initial_num_blocks_is_13_317: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $192,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $0,%zmm5,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10 + vmovdqu8 192(%rcx,%r11,1),%xmm11{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vpxorq %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vaesenclast %xmm15,%xmm5,%xmm5 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vpxorq %xmm11,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vpshufb %zmm29,%zmm4,%zmm10 + vpshufb %xmm29,%xmm5,%xmm11 + vextracti32x4 $0,%zmm11,%xmm13 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_330 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 144(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 208(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_330 +.L_small_initial_partial_block_330: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 160(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 224(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_330: + + orq %r8,%r8 + je .L_after_reduction_330 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_330: + jmp .L_small_initial_blocks_encrypted_317 +.L_small_initial_num_blocks_is_14_317: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $192,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $1,%zmm5,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10 + vmovdqu8 192(%rcx,%r11,1),%ymm11{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vpxorq %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vaesenclast %ymm15,%ymm5,%ymm5 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vpxorq %ymm11,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vpshufb %zmm29,%zmm4,%zmm10 + vpshufb %ymm29,%ymm5,%ymm11 + vextracti32x4 $1,%zmm11,%xmm13 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_331 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 128(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 192(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_331 +.L_small_initial_partial_block_331: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 144(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 208(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_331: + + orq %r8,%r8 + je .L_after_reduction_331 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_331: + jmp .L_small_initial_blocks_encrypted_317 +.L_small_initial_num_blocks_is_15_317: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $192,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $2,%zmm5,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10 + vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vpxorq %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vaesenclast %zmm15,%zmm5,%zmm5 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vpxorq %zmm11,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vpshufb %zmm29,%zmm4,%zmm10 + vpshufb %zmm29,%zmm5,%zmm11 + vextracti32x4 $2,%zmm11,%xmm13 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_332 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 112(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 176(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_332 +.L_small_initial_partial_block_332: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 128(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 192(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_332: + + orq %r8,%r8 + je .L_after_reduction_332 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_332: + jmp .L_small_initial_blocks_encrypted_317 +.L_small_initial_num_blocks_is_16_317: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $192,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $3,%zmm5,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10 + vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vpxorq %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vaesenclast %zmm15,%zmm5,%zmm5 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vpxorq %zmm11,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vpshufb %zmm29,%zmm4,%zmm10 + vpshufb %zmm29,%zmm5,%zmm11 + vextracti32x4 $3,%zmm11,%xmm13 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_333: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 112(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 176(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_333: + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_333: +.L_small_initial_blocks_encrypted_317: +.L_ghash_done_172: + vmovdqu64 %xmm2,0(%rsi) + vmovdqu64 %xmm14,64(%rsi) +.L_enc_dec_done_172: + jmp .Lexit_gcm_encrypt +.align 32 +.Laes_gcm_encrypt_256_avx512: + orq %r8,%r8 + je .L_enc_dec_done_334 + xorq %r14,%r14 + vmovdqu64 64(%rsi),%xmm14 + + movq (%rdx),%r11 + orq %r11,%r11 + je .L_partial_block_done_335 + movl $16,%r10d + leaq byte_len_to_mask_table(%rip),%r12 + cmpq %r10,%r8 + cmovcq %r8,%r10 + kmovw (%r12,%r10,2),%k1 + vmovdqu8 (%rcx),%xmm0{%k1}{z} + + vmovdqu64 16(%rsi),%xmm3 + vmovdqu64 336(%rsi),%xmm4 + + + + leaq SHIFT_MASK(%rip),%r12 + addq %r11,%r12 + vmovdqu64 (%r12),%xmm5 + vpshufb %xmm5,%xmm3,%xmm3 + vpxorq %xmm0,%xmm3,%xmm3 + + + leaq (%r8,%r11,1),%r13 + subq $16,%r13 + jge .L_no_extra_mask_335 + subq %r13,%r12 +.L_no_extra_mask_335: + + + + vmovdqu64 16(%r12),%xmm0 + vpand %xmm0,%xmm3,%xmm3 + vpshufb SHUF_MASK(%rip),%xmm3,%xmm3 + vpshufb %xmm5,%xmm3,%xmm3 + vpxorq %xmm3,%xmm14,%xmm14 + cmpq $0,%r13 + jl .L_partial_incomplete_335 + + vpclmulqdq $0x11,%xmm4,%xmm14,%xmm7 + vpclmulqdq $0x00,%xmm4,%xmm14,%xmm10 + vpclmulqdq $0x01,%xmm4,%xmm14,%xmm11 + vpclmulqdq $0x10,%xmm4,%xmm14,%xmm14 + vpxorq %xmm11,%xmm14,%xmm14 + + vpsrldq $8,%xmm14,%xmm11 + vpslldq $8,%xmm14,%xmm14 + vpxorq %xmm11,%xmm7,%xmm7 + vpxorq %xmm10,%xmm14,%xmm14 + + + + vmovdqu64 POLY2(%rip),%xmm11 + + vpclmulqdq $0x01,%xmm14,%xmm11,%xmm10 + vpslldq $8,%xmm10,%xmm10 + vpxorq %xmm10,%xmm14,%xmm14 + + + + vpclmulqdq $0x00,%xmm14,%xmm11,%xmm10 + vpsrldq $4,%xmm10,%xmm10 + vpclmulqdq $0x10,%xmm14,%xmm11,%xmm14 + vpslldq $4,%xmm14,%xmm14 + + vpternlogq $0x96,%xmm10,%xmm7,%xmm14 + + movq $0,(%rdx) + + movq %r11,%r12 + movq $16,%r11 + subq %r12,%r11 + jmp .L_enc_dec_done_335 + +.L_partial_incomplete_335: + addq %r8,(%rdx) + movq %r8,%r11 + +.L_enc_dec_done_335: + + + leaq byte_len_to_mask_table(%rip),%r12 + kmovw (%r12,%r11,2),%k1 + vmovdqu64 %xmm14,64(%rsi) + + vpshufb SHUF_MASK(%rip),%xmm3,%xmm3 + vpshufb %xmm5,%xmm3,%xmm3 + movq %r9,%r12 + vmovdqu8 %xmm3,(%r12){%k1} +.L_partial_block_done_335: + vmovdqu64 0(%rsi),%xmm2 + subq %r11,%r8 + je .L_enc_dec_done_334 + cmpq $256,%r8 + jbe .L_message_below_equal_16_blocks_334 + + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vmovdqa64 ddq_addbe_4444(%rip),%zmm27 + vmovdqa64 ddq_addbe_1234(%rip),%zmm28 + + + + + + + vmovd %xmm2,%r15d + andl $255,%r15d + + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpshufb %zmm29,%zmm2,%zmm2 + + + + cmpb $240,%r15b + jae .L_next_16_overflow_336 + vpaddd %zmm28,%zmm2,%zmm7 + vpaddd %zmm27,%zmm7,%zmm10 + vpaddd %zmm27,%zmm10,%zmm11 + vpaddd %zmm27,%zmm11,%zmm12 + jmp .L_next_16_ok_336 +.L_next_16_overflow_336: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm12 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 + vpaddd %zmm12,%zmm7,%zmm10 + vpaddd %zmm12,%zmm10,%zmm11 + vpaddd %zmm12,%zmm11,%zmm12 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %zmm29,%zmm11,%zmm11 + vpshufb %zmm29,%zmm12,%zmm12 +.L_next_16_ok_336: + vshufi64x2 $255,%zmm12,%zmm12,%zmm2 + addb $16,%r15b + + vmovdqu8 0(%rcx,%r11,1),%zmm0 + vmovdqu8 64(%rcx,%r11,1),%zmm3 + vmovdqu8 128(%rcx,%r11,1),%zmm4 + vmovdqu8 192(%rcx,%r11,1),%zmm5 + + + vbroadcastf64x2 0(%rdi),%zmm6 + vpxorq %zmm6,%zmm7,%zmm7 + vpxorq %zmm6,%zmm10,%zmm10 + vpxorq %zmm6,%zmm11,%zmm11 + vpxorq %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 16(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 32(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 48(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 64(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 80(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 96(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 112(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 128(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 144(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 160(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 176(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 192(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 208(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 224(%rdi),%zmm6 + vaesenclast %zmm6,%zmm7,%zmm7 + vaesenclast %zmm6,%zmm10,%zmm10 + vaesenclast %zmm6,%zmm11,%zmm11 + vaesenclast %zmm6,%zmm12,%zmm12 + + + vpxorq %zmm0,%zmm7,%zmm7 + vpxorq %zmm3,%zmm10,%zmm10 + vpxorq %zmm4,%zmm11,%zmm11 + vpxorq %zmm5,%zmm12,%zmm12 + + + movq %r9,%r10 + vmovdqu8 %zmm7,0(%r10,%r11,1) + vmovdqu8 %zmm10,64(%r10,%r11,1) + vmovdqu8 %zmm11,128(%r10,%r11,1) + vmovdqu8 %zmm12,192(%r10,%r11,1) + + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %zmm29,%zmm11,%zmm11 + vpshufb %zmm29,%zmm12,%zmm12 + vmovdqa64 %zmm7,768(%rsp) + vmovdqa64 %zmm10,832(%rsp) + vmovdqa64 %zmm11,896(%rsp) + vmovdqa64 %zmm12,960(%rsp) + testq %r14,%r14 + jnz .L_skip_hkeys_precomputation_337 + + vmovdqu64 288(%rsi),%zmm0 + vmovdqu64 %zmm0,704(%rsp) + + vmovdqu64 224(%rsi),%zmm3 + vmovdqu64 %zmm3,640(%rsp) + + + vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 + + vmovdqu64 160(%rsi),%zmm4 + vmovdqu64 %zmm4,576(%rsp) + + vmovdqu64 96(%rsi),%zmm5 + vmovdqu64 %zmm5,512(%rsp) +.L_skip_hkeys_precomputation_337: + cmpq $512,%r8 + jb .L_message_below_32_blocks_334 + + + + cmpb $240,%r15b + jae .L_next_16_overflow_338 + vpaddd %zmm28,%zmm2,%zmm7 + vpaddd %zmm27,%zmm7,%zmm10 + vpaddd %zmm27,%zmm10,%zmm11 + vpaddd %zmm27,%zmm11,%zmm12 + jmp .L_next_16_ok_338 +.L_next_16_overflow_338: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm12 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 + vpaddd %zmm12,%zmm7,%zmm10 + vpaddd %zmm12,%zmm10,%zmm11 + vpaddd %zmm12,%zmm11,%zmm12 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %zmm29,%zmm11,%zmm11 + vpshufb %zmm29,%zmm12,%zmm12 +.L_next_16_ok_338: + vshufi64x2 $255,%zmm12,%zmm12,%zmm2 + addb $16,%r15b + + vmovdqu8 256(%rcx,%r11,1),%zmm0 + vmovdqu8 320(%rcx,%r11,1),%zmm3 + vmovdqu8 384(%rcx,%r11,1),%zmm4 + vmovdqu8 448(%rcx,%r11,1),%zmm5 + + + vbroadcastf64x2 0(%rdi),%zmm6 + vpxorq %zmm6,%zmm7,%zmm7 + vpxorq %zmm6,%zmm10,%zmm10 + vpxorq %zmm6,%zmm11,%zmm11 + vpxorq %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 16(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 32(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 48(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 64(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 80(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 96(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 112(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 128(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 144(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 160(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 176(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 192(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 208(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 224(%rdi),%zmm6 + vaesenclast %zmm6,%zmm7,%zmm7 + vaesenclast %zmm6,%zmm10,%zmm10 + vaesenclast %zmm6,%zmm11,%zmm11 + vaesenclast %zmm6,%zmm12,%zmm12 + + + vpxorq %zmm0,%zmm7,%zmm7 + vpxorq %zmm3,%zmm10,%zmm10 + vpxorq %zmm4,%zmm11,%zmm11 + vpxorq %zmm5,%zmm12,%zmm12 + + + movq %r9,%r10 + vmovdqu8 %zmm7,256(%r10,%r11,1) + vmovdqu8 %zmm10,320(%r10,%r11,1) + vmovdqu8 %zmm11,384(%r10,%r11,1) + vmovdqu8 %zmm12,448(%r10,%r11,1) + + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %zmm29,%zmm11,%zmm11 + vpshufb %zmm29,%zmm12,%zmm12 + vmovdqa64 %zmm7,1024(%rsp) + vmovdqa64 %zmm10,1088(%rsp) + vmovdqa64 %zmm11,1152(%rsp) + vmovdqa64 %zmm12,1216(%rsp) + testq %r14,%r14 + jnz .L_skip_hkeys_precomputation_339 + vmovdqu64 640(%rsp),%zmm3 + + + vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 + + vmovdqu64 576(%rsp),%zmm4 + vmovdqu64 512(%rsp),%zmm5 + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,448(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,384(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,320(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,256(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,192(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,128(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,64(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,0(%rsp) +.L_skip_hkeys_precomputation_339: + movq $1,%r14 + addq $512,%r11 + subq $512,%r8 + + cmpq $768,%r8 + jb .L_no_more_big_nblocks_334 +.L_encrypt_big_nblocks_334: + cmpb $240,%r15b + jae .L_16_blocks_overflow_340 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_340 +.L_16_blocks_overflow_340: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_340: + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm15,%zmm10,%zmm26 + vpxorq %zmm12,%zmm6,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1) + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqa64 %zmm0,1280(%rsp) + vmovdqa64 %zmm3,1344(%rsp) + vmovdqa64 %zmm4,1408(%rsp) + vmovdqa64 %zmm5,1472(%rsp) + cmpb $240,%r15b + jae .L_16_blocks_overflow_341 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_341 +.L_16_blocks_overflow_341: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_341: + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 256(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 320(%rsp),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 384(%rsp),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 448(%rsp),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 256(%rcx,%r11,1),%zmm17 + vmovdqu8 320(%rcx,%r11,1),%zmm19 + vmovdqu8 384(%rcx,%r11,1),%zmm20 + vmovdqu8 448(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vpternlogq $0x96,%zmm12,%zmm6,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,256(%r10,%r11,1) + vmovdqu8 %zmm3,320(%r10,%r11,1) + vmovdqu8 %zmm4,384(%r10,%r11,1) + vmovdqu8 %zmm5,448(%r10,%r11,1) + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqa64 %zmm0,768(%rsp) + vmovdqa64 %zmm3,832(%rsp) + vmovdqa64 %zmm4,896(%rsp) + vmovdqa64 %zmm5,960(%rsp) + cmpb $240,%r15b + jae .L_16_blocks_overflow_342 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_342 +.L_16_blocks_overflow_342: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_342: + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 512(%rcx,%r11,1),%zmm17 + vmovdqu8 576(%rcx,%r11,1),%zmm19 + vmovdqu8 640(%rcx,%r11,1),%zmm20 + vmovdqu8 704(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + + + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpternlogq $0x96,%zmm15,%zmm12,%zmm6 + vpxorq %zmm24,%zmm6,%zmm6 + vpternlogq $0x96,%zmm10,%zmm13,%zmm7 + vpxorq %zmm25,%zmm7,%zmm7 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vextracti64x4 $1,%zmm6,%ymm12 + vpxorq %ymm12,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm12 + vpxorq %xmm12,%xmm6,%xmm6 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm6 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,512(%r10,%r11,1) + vmovdqu8 %zmm3,576(%r10,%r11,1) + vmovdqu8 %zmm4,640(%r10,%r11,1) + vmovdqu8 %zmm5,704(%r10,%r11,1) + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqa64 %zmm0,1024(%rsp) + vmovdqa64 %zmm3,1088(%rsp) + vmovdqa64 %zmm4,1152(%rsp) + vmovdqa64 %zmm5,1216(%rsp) + vmovdqa64 %zmm6,%zmm14 + + addq $768,%r11 + subq $768,%r8 + cmpq $768,%r8 + jae .L_encrypt_big_nblocks_334 + +.L_no_more_big_nblocks_334: + + cmpq $512,%r8 + jae .L_encrypt_32_blocks_334 + + cmpq $256,%r8 + jae .L_encrypt_16_blocks_334 +.L_encrypt_0_blocks_ghash_32_334: + movl %r8d,%r10d + andl $~15,%r10d + movl $256,%ebx + subl %r10d,%ebx + vmovdqa64 768(%rsp),%zmm13 + vpxorq %zmm14,%zmm13,%zmm13 + vmovdqu64 0(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 832(%rsp),%zmm13 + vmovdqu64 64(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpxorq %zmm10,%zmm4,%zmm26 + vpxorq %zmm6,%zmm0,%zmm24 + vpxorq %zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 896(%rsp),%zmm13 + vmovdqu64 128(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 960(%rsp),%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + addl $256,%ebx + movl %r8d,%r10d + addl $15,%r10d + shrl $4,%r10d + je .L_last_num_blocks_is_0_343 + + cmpl $8,%r10d + je .L_last_num_blocks_is_8_343 + jb .L_last_num_blocks_is_7_1_343 + + + cmpl $12,%r10d + je .L_last_num_blocks_is_12_343 + jb .L_last_num_blocks_is_11_9_343 + + + cmpl $15,%r10d + je .L_last_num_blocks_is_15_343 + ja .L_last_num_blocks_is_16_343 + cmpl $14,%r10d + je .L_last_num_blocks_is_14_343 + jmp .L_last_num_blocks_is_13_343 + +.L_last_num_blocks_is_11_9_343: + + cmpl $10,%r10d + je .L_last_num_blocks_is_10_343 + ja .L_last_num_blocks_is_11_343 + jmp .L_last_num_blocks_is_9_343 + +.L_last_num_blocks_is_7_1_343: + cmpl $4,%r10d + je .L_last_num_blocks_is_4_343 + jb .L_last_num_blocks_is_3_1_343 + + cmpl $6,%r10d + ja .L_last_num_blocks_is_7_343 + je .L_last_num_blocks_is_6_343 + jmp .L_last_num_blocks_is_5_343 + +.L_last_num_blocks_is_3_1_343: + + cmpl $2,%r10d + ja .L_last_num_blocks_is_3_343 + je .L_last_num_blocks_is_2_343 +.L_last_num_blocks_is_1_343: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $255,%r15d + jae .L_16_blocks_overflow_344 + vpaddd %xmm28,%xmm2,%xmm0 + jmp .L_16_blocks_ok_344 + +.L_16_blocks_overflow_344: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %xmm29,%xmm0,%xmm0 +.L_16_blocks_ok_344: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vaesenclast %xmm30,%xmm0,%xmm0 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %xmm29,%xmm0,%xmm17 + vextracti32x4 $0,%zmm17,%xmm7 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_345 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_345 +.L_small_initial_partial_block_345: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm0 + + + vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 + vpslldq $8,%xmm3,%xmm3 + vpxorq %xmm3,%xmm25,%xmm3 + + + vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 + vpsrldq $4,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm4,%xmm14 + + + + + + + + + + + + + vpxorq %xmm7,%xmm14,%xmm14 + + jmp .L_after_reduction_345 +.L_small_initial_compute_done_345: +.L_after_reduction_345: + jmp .L_last_blocks_done_343 +.L_last_num_blocks_is_2_343: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $254,%r15d + jae .L_16_blocks_overflow_346 + vpaddd %ymm28,%ymm2,%ymm0 + jmp .L_16_blocks_ok_346 + +.L_16_blocks_overflow_346: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %ymm29,%ymm0,%ymm0 +.L_16_blocks_ok_346: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vaesenclast %ymm30,%ymm0,%ymm0 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %ymm29,%ymm0,%ymm17 + vextracti32x4 $1,%zmm17,%xmm7 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_347 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_347 +.L_small_initial_partial_block_347: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_347: + + orq %r8,%r8 + je .L_after_reduction_347 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_347: + jmp .L_last_blocks_done_343 +.L_last_num_blocks_is_3_343: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $253,%r15d + jae .L_16_blocks_overflow_348 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_348 + +.L_16_blocks_overflow_348: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_348: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vextracti32x4 $2,%zmm17,%xmm7 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_349 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_349 +.L_small_initial_partial_block_349: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_349: + + orq %r8,%r8 + je .L_after_reduction_349 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_349: + jmp .L_last_blocks_done_343 +.L_last_num_blocks_is_4_343: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $252,%r15d + jae .L_16_blocks_overflow_350 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_350 + +.L_16_blocks_overflow_350: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_350: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vextracti32x4 $3,%zmm17,%xmm7 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_351 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_351 +.L_small_initial_partial_block_351: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_351: + + orq %r8,%r8 + je .L_after_reduction_351 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_351: + jmp .L_last_blocks_done_343 +.L_last_num_blocks_is_5_343: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $251,%r15d + jae .L_16_blocks_overflow_352 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %xmm27,%xmm0,%xmm3 + jmp .L_16_blocks_ok_352 + +.L_16_blocks_overflow_352: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 +.L_16_blocks_ok_352: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %xmm30,%xmm3,%xmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %xmm19,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %xmm29,%xmm3,%xmm19 + vextracti32x4 $0,%zmm19,%xmm7 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_353 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_353 +.L_small_initial_partial_block_353: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_353: + + orq %r8,%r8 + je .L_after_reduction_353 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_353: + jmp .L_last_blocks_done_343 +.L_last_num_blocks_is_6_343: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $250,%r15d + jae .L_16_blocks_overflow_354 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %ymm27,%ymm0,%ymm3 + jmp .L_16_blocks_ok_354 + +.L_16_blocks_overflow_354: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 +.L_16_blocks_ok_354: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %ymm30,%ymm3,%ymm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %ymm29,%ymm3,%ymm19 + vextracti32x4 $1,%zmm19,%xmm7 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_355 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_355 +.L_small_initial_partial_block_355: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_355: + + orq %r8,%r8 + je .L_after_reduction_355 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_355: + jmp .L_last_blocks_done_343 +.L_last_num_blocks_is_7_343: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $249,%r15d + jae .L_16_blocks_overflow_356 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_356 + +.L_16_blocks_overflow_356: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_356: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vextracti32x4 $2,%zmm19,%xmm7 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_357 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_357 +.L_small_initial_partial_block_357: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_357: + + orq %r8,%r8 + je .L_after_reduction_357 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_357: + jmp .L_last_blocks_done_343 +.L_last_num_blocks_is_8_343: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $248,%r15d + jae .L_16_blocks_overflow_358 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_358 + +.L_16_blocks_overflow_358: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_358: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vextracti32x4 $3,%zmm19,%xmm7 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_359 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_359 +.L_small_initial_partial_block_359: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_359: + + orq %r8,%r8 + je .L_after_reduction_359 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_359: + jmp .L_last_blocks_done_343 +.L_last_num_blocks_is_9_343: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $247,%r15d + jae .L_16_blocks_overflow_360 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %xmm27,%xmm3,%xmm4 + jmp .L_16_blocks_ok_360 + +.L_16_blocks_overflow_360: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 +.L_16_blocks_ok_360: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %xmm30,%xmm4,%xmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %xmm20,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %xmm29,%xmm4,%xmm20 + vextracti32x4 $0,%zmm20,%xmm7 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_361 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_361 +.L_small_initial_partial_block_361: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_361: + + orq %r8,%r8 + je .L_after_reduction_361 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_361: + jmp .L_last_blocks_done_343 +.L_last_num_blocks_is_10_343: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $246,%r15d + jae .L_16_blocks_overflow_362 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %ymm27,%ymm3,%ymm4 + jmp .L_16_blocks_ok_362 + +.L_16_blocks_overflow_362: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 +.L_16_blocks_ok_362: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %ymm30,%ymm4,%ymm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %ymm20,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %ymm29,%ymm4,%ymm20 + vextracti32x4 $1,%zmm20,%xmm7 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_363 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_363 +.L_small_initial_partial_block_363: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_363: + + orq %r8,%r8 + je .L_after_reduction_363 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_363: + jmp .L_last_blocks_done_343 +.L_last_num_blocks_is_11_343: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $245,%r15d + jae .L_16_blocks_overflow_364 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_364 + +.L_16_blocks_overflow_364: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_364: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vextracti32x4 $2,%zmm20,%xmm7 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_365 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_365 +.L_small_initial_partial_block_365: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_365: + + orq %r8,%r8 + je .L_after_reduction_365 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_365: + jmp .L_last_blocks_done_343 +.L_last_num_blocks_is_12_343: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $244,%r15d + jae .L_16_blocks_overflow_366 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_366 + +.L_16_blocks_overflow_366: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_366: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vextracti32x4 $3,%zmm20,%xmm7 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_367 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_367 +.L_small_initial_partial_block_367: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_367: + + orq %r8,%r8 + je .L_after_reduction_367 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_367: + jmp .L_last_blocks_done_343 +.L_last_num_blocks_is_13_343: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $243,%r15d + jae .L_16_blocks_overflow_368 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %xmm27,%xmm4,%xmm5 + jmp .L_16_blocks_ok_368 + +.L_16_blocks_overflow_368: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 +.L_16_blocks_ok_368: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %xmm30,%xmm5,%xmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %xmm21,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %xmm29,%xmm5,%xmm21 + vextracti32x4 $0,%zmm21,%xmm7 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_369 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_369 +.L_small_initial_partial_block_369: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_369: + + orq %r8,%r8 + je .L_after_reduction_369 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_369: + jmp .L_last_blocks_done_343 +.L_last_num_blocks_is_14_343: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $242,%r15d + jae .L_16_blocks_overflow_370 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %ymm27,%ymm4,%ymm5 + jmp .L_16_blocks_ok_370 + +.L_16_blocks_overflow_370: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 +.L_16_blocks_ok_370: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %ymm30,%ymm5,%ymm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %ymm21,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %ymm29,%ymm5,%ymm21 + vextracti32x4 $1,%zmm21,%xmm7 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_371 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_371 +.L_small_initial_partial_block_371: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_371: + + orq %r8,%r8 + je .L_after_reduction_371 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_371: + jmp .L_last_blocks_done_343 +.L_last_num_blocks_is_15_343: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $241,%r15d + jae .L_16_blocks_overflow_372 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_372 + +.L_16_blocks_overflow_372: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_372: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %zmm29,%zmm5,%zmm21 + vextracti32x4 $2,%zmm21,%xmm7 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_373 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_373 +.L_small_initial_partial_block_373: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_373: + + orq %r8,%r8 + je .L_after_reduction_373 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_373: + jmp .L_last_blocks_done_343 +.L_last_num_blocks_is_16_343: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $240,%r15d + jae .L_16_blocks_overflow_374 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_374 + +.L_16_blocks_overflow_374: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_374: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %zmm29,%zmm5,%zmm21 + vextracti32x4 $3,%zmm21,%xmm7 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_375: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_375: + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_375: + jmp .L_last_blocks_done_343 +.L_last_num_blocks_is_0_343: + vmovdqa64 1024(%rsp),%zmm13 + vmovdqu64 0(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1088(%rsp),%zmm13 + vmovdqu64 64(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 1152(%rsp),%zmm13 + vmovdqu64 128(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1216(%rsp),%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + +.L_last_blocks_done_343: + vpshufb %xmm29,%xmm2,%xmm2 + jmp .L_ghash_done_334 +.L_encrypt_32_blocks_334: + cmpb $240,%r15b + jae .L_16_blocks_overflow_376 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_376 +.L_16_blocks_overflow_376: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_376: + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm15,%zmm10,%zmm26 + vpxorq %zmm12,%zmm6,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1) + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqa64 %zmm0,1280(%rsp) + vmovdqa64 %zmm3,1344(%rsp) + vmovdqa64 %zmm4,1408(%rsp) + vmovdqa64 %zmm5,1472(%rsp) + cmpb $240,%r15b + jae .L_16_blocks_overflow_377 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_377 +.L_16_blocks_overflow_377: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_377: + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 256(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 320(%rsp),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 384(%rsp),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 448(%rsp),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 256(%rcx,%r11,1),%zmm17 + vmovdqu8 320(%rcx,%r11,1),%zmm19 + vmovdqu8 384(%rcx,%r11,1),%zmm20 + vmovdqu8 448(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vpternlogq $0x96,%zmm12,%zmm6,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,256(%r10,%r11,1) + vmovdqu8 %zmm3,320(%r10,%r11,1) + vmovdqu8 %zmm4,384(%r10,%r11,1) + vmovdqu8 %zmm5,448(%r10,%r11,1) + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqa64 %zmm0,768(%rsp) + vmovdqa64 %zmm3,832(%rsp) + vmovdqa64 %zmm4,896(%rsp) + vmovdqa64 %zmm5,960(%rsp) + vmovdqa64 1280(%rsp),%zmm13 + vmovdqu64 512(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1344(%rsp),%zmm13 + vmovdqu64 576(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 1408(%rsp),%zmm13 + vmovdqu64 640(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1472(%rsp),%zmm13 + vmovdqu64 704(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + + subq $512,%r8 + addq $512,%r11 + movl %r8d,%r10d + andl $~15,%r10d + movl $512,%ebx + subl %r10d,%ebx + movl %r8d,%r10d + addl $15,%r10d + shrl $4,%r10d + je .L_last_num_blocks_is_0_378 + + cmpl $8,%r10d + je .L_last_num_blocks_is_8_378 + jb .L_last_num_blocks_is_7_1_378 + + + cmpl $12,%r10d + je .L_last_num_blocks_is_12_378 + jb .L_last_num_blocks_is_11_9_378 + + + cmpl $15,%r10d + je .L_last_num_blocks_is_15_378 + ja .L_last_num_blocks_is_16_378 + cmpl $14,%r10d + je .L_last_num_blocks_is_14_378 + jmp .L_last_num_blocks_is_13_378 + +.L_last_num_blocks_is_11_9_378: + + cmpl $10,%r10d + je .L_last_num_blocks_is_10_378 + ja .L_last_num_blocks_is_11_378 + jmp .L_last_num_blocks_is_9_378 + +.L_last_num_blocks_is_7_1_378: + cmpl $4,%r10d + je .L_last_num_blocks_is_4_378 + jb .L_last_num_blocks_is_3_1_378 + + cmpl $6,%r10d + ja .L_last_num_blocks_is_7_378 + je .L_last_num_blocks_is_6_378 + jmp .L_last_num_blocks_is_5_378 + +.L_last_num_blocks_is_3_1_378: + + cmpl $2,%r10d + ja .L_last_num_blocks_is_3_378 + je .L_last_num_blocks_is_2_378 +.L_last_num_blocks_is_1_378: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $255,%r15d + jae .L_16_blocks_overflow_379 + vpaddd %xmm28,%xmm2,%xmm0 + jmp .L_16_blocks_ok_379 + +.L_16_blocks_overflow_379: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %xmm29,%xmm0,%xmm0 +.L_16_blocks_ok_379: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vaesenclast %xmm30,%xmm0,%xmm0 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %xmm29,%xmm0,%xmm17 + vextracti32x4 $0,%zmm17,%xmm7 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_380 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_380 +.L_small_initial_partial_block_380: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm0 + + + vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 + vpslldq $8,%xmm3,%xmm3 + vpxorq %xmm3,%xmm25,%xmm3 + + + vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 + vpsrldq $4,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm4,%xmm14 + + + + + + + + + + + + + vpxorq %xmm7,%xmm14,%xmm14 + + jmp .L_after_reduction_380 +.L_small_initial_compute_done_380: +.L_after_reduction_380: + jmp .L_last_blocks_done_378 +.L_last_num_blocks_is_2_378: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $254,%r15d + jae .L_16_blocks_overflow_381 + vpaddd %ymm28,%ymm2,%ymm0 + jmp .L_16_blocks_ok_381 + +.L_16_blocks_overflow_381: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %ymm29,%ymm0,%ymm0 +.L_16_blocks_ok_381: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vaesenclast %ymm30,%ymm0,%ymm0 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %ymm29,%ymm0,%ymm17 + vextracti32x4 $1,%zmm17,%xmm7 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_382 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_382 +.L_small_initial_partial_block_382: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_382: + + orq %r8,%r8 + je .L_after_reduction_382 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_382: + jmp .L_last_blocks_done_378 +.L_last_num_blocks_is_3_378: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $253,%r15d + jae .L_16_blocks_overflow_383 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_383 + +.L_16_blocks_overflow_383: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_383: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vextracti32x4 $2,%zmm17,%xmm7 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_384 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_384 +.L_small_initial_partial_block_384: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_384: + + orq %r8,%r8 + je .L_after_reduction_384 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_384: + jmp .L_last_blocks_done_378 +.L_last_num_blocks_is_4_378: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $252,%r15d + jae .L_16_blocks_overflow_385 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_385 + +.L_16_blocks_overflow_385: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_385: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vextracti32x4 $3,%zmm17,%xmm7 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_386 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_386 +.L_small_initial_partial_block_386: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_386: + + orq %r8,%r8 + je .L_after_reduction_386 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_386: + jmp .L_last_blocks_done_378 +.L_last_num_blocks_is_5_378: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $251,%r15d + jae .L_16_blocks_overflow_387 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %xmm27,%xmm0,%xmm3 + jmp .L_16_blocks_ok_387 + +.L_16_blocks_overflow_387: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 +.L_16_blocks_ok_387: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %xmm30,%xmm3,%xmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %xmm19,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %xmm29,%xmm3,%xmm19 + vextracti32x4 $0,%zmm19,%xmm7 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_388 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_388 +.L_small_initial_partial_block_388: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_388: + + orq %r8,%r8 + je .L_after_reduction_388 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_388: + jmp .L_last_blocks_done_378 +.L_last_num_blocks_is_6_378: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $250,%r15d + jae .L_16_blocks_overflow_389 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %ymm27,%ymm0,%ymm3 + jmp .L_16_blocks_ok_389 + +.L_16_blocks_overflow_389: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 +.L_16_blocks_ok_389: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %ymm30,%ymm3,%ymm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %ymm29,%ymm3,%ymm19 + vextracti32x4 $1,%zmm19,%xmm7 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_390 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_390 +.L_small_initial_partial_block_390: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_390: + + orq %r8,%r8 + je .L_after_reduction_390 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_390: + jmp .L_last_blocks_done_378 +.L_last_num_blocks_is_7_378: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $249,%r15d + jae .L_16_blocks_overflow_391 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_391 + +.L_16_blocks_overflow_391: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_391: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vextracti32x4 $2,%zmm19,%xmm7 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_392 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_392 +.L_small_initial_partial_block_392: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_392: + + orq %r8,%r8 + je .L_after_reduction_392 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_392: + jmp .L_last_blocks_done_378 +.L_last_num_blocks_is_8_378: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $248,%r15d + jae .L_16_blocks_overflow_393 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_393 + +.L_16_blocks_overflow_393: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_393: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vextracti32x4 $3,%zmm19,%xmm7 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_394 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_394 +.L_small_initial_partial_block_394: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_394: + + orq %r8,%r8 + je .L_after_reduction_394 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_394: + jmp .L_last_blocks_done_378 +.L_last_num_blocks_is_9_378: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $247,%r15d + jae .L_16_blocks_overflow_395 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %xmm27,%xmm3,%xmm4 + jmp .L_16_blocks_ok_395 + +.L_16_blocks_overflow_395: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 +.L_16_blocks_ok_395: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %xmm30,%xmm4,%xmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %xmm20,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %xmm29,%xmm4,%xmm20 + vextracti32x4 $0,%zmm20,%xmm7 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_396 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_396 +.L_small_initial_partial_block_396: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_396: + + orq %r8,%r8 + je .L_after_reduction_396 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_396: + jmp .L_last_blocks_done_378 +.L_last_num_blocks_is_10_378: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $246,%r15d + jae .L_16_blocks_overflow_397 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %ymm27,%ymm3,%ymm4 + jmp .L_16_blocks_ok_397 + +.L_16_blocks_overflow_397: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 +.L_16_blocks_ok_397: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %ymm30,%ymm4,%ymm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %ymm20,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %ymm29,%ymm4,%ymm20 + vextracti32x4 $1,%zmm20,%xmm7 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_398 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_398 +.L_small_initial_partial_block_398: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_398: + + orq %r8,%r8 + je .L_after_reduction_398 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_398: + jmp .L_last_blocks_done_378 +.L_last_num_blocks_is_11_378: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $245,%r15d + jae .L_16_blocks_overflow_399 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_399 + +.L_16_blocks_overflow_399: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_399: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vextracti32x4 $2,%zmm20,%xmm7 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_400 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_400 +.L_small_initial_partial_block_400: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_400: + + orq %r8,%r8 + je .L_after_reduction_400 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_400: + jmp .L_last_blocks_done_378 +.L_last_num_blocks_is_12_378: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $244,%r15d + jae .L_16_blocks_overflow_401 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_401 + +.L_16_blocks_overflow_401: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_401: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vextracti32x4 $3,%zmm20,%xmm7 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_402 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_402 +.L_small_initial_partial_block_402: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_402: + + orq %r8,%r8 + je .L_after_reduction_402 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_402: + jmp .L_last_blocks_done_378 +.L_last_num_blocks_is_13_378: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $243,%r15d + jae .L_16_blocks_overflow_403 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %xmm27,%xmm4,%xmm5 + jmp .L_16_blocks_ok_403 + +.L_16_blocks_overflow_403: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 +.L_16_blocks_ok_403: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %xmm30,%xmm5,%xmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %xmm21,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %xmm29,%xmm5,%xmm21 + vextracti32x4 $0,%zmm21,%xmm7 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_404 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_404 +.L_small_initial_partial_block_404: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_404: + + orq %r8,%r8 + je .L_after_reduction_404 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_404: + jmp .L_last_blocks_done_378 +.L_last_num_blocks_is_14_378: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $242,%r15d + jae .L_16_blocks_overflow_405 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %ymm27,%ymm4,%ymm5 + jmp .L_16_blocks_ok_405 + +.L_16_blocks_overflow_405: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 +.L_16_blocks_ok_405: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %ymm30,%ymm5,%ymm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %ymm21,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %ymm29,%ymm5,%ymm21 + vextracti32x4 $1,%zmm21,%xmm7 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_406 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_406 +.L_small_initial_partial_block_406: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_406: + + orq %r8,%r8 + je .L_after_reduction_406 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_406: + jmp .L_last_blocks_done_378 +.L_last_num_blocks_is_15_378: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $241,%r15d + jae .L_16_blocks_overflow_407 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_407 + +.L_16_blocks_overflow_407: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_407: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %zmm29,%zmm5,%zmm21 + vextracti32x4 $2,%zmm21,%xmm7 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_408 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_408 +.L_small_initial_partial_block_408: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_408: + + orq %r8,%r8 + je .L_after_reduction_408 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_408: + jmp .L_last_blocks_done_378 +.L_last_num_blocks_is_16_378: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $240,%r15d + jae .L_16_blocks_overflow_409 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_409 + +.L_16_blocks_overflow_409: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_409: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %zmm29,%zmm5,%zmm21 + vextracti32x4 $3,%zmm21,%xmm7 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_410: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_410: + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_410: + jmp .L_last_blocks_done_378 +.L_last_num_blocks_is_0_378: + vmovdqa64 768(%rsp),%zmm13 + vpxorq %zmm14,%zmm13,%zmm13 + vmovdqu64 0(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 832(%rsp),%zmm13 + vmovdqu64 64(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpxorq %zmm10,%zmm4,%zmm26 + vpxorq %zmm6,%zmm0,%zmm24 + vpxorq %zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 896(%rsp),%zmm13 + vmovdqu64 128(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 960(%rsp),%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + +.L_last_blocks_done_378: + vpshufb %xmm29,%xmm2,%xmm2 + jmp .L_ghash_done_334 +.L_encrypt_16_blocks_334: + cmpb $240,%r15b + jae .L_16_blocks_overflow_411 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_411 +.L_16_blocks_overflow_411: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_411: + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm15,%zmm10,%zmm26 + vpxorq %zmm12,%zmm6,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1) + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqa64 %zmm0,1280(%rsp) + vmovdqa64 %zmm3,1344(%rsp) + vmovdqa64 %zmm4,1408(%rsp) + vmovdqa64 %zmm5,1472(%rsp) + vmovdqa64 1024(%rsp),%zmm13 + vmovdqu64 256(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1088(%rsp),%zmm13 + vmovdqu64 320(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 1152(%rsp),%zmm13 + vmovdqu64 384(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1216(%rsp),%zmm13 + vmovdqu64 448(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + subq $256,%r8 + addq $256,%r11 + movl %r8d,%r10d + addl $15,%r10d + shrl $4,%r10d + je .L_last_num_blocks_is_0_412 + + cmpl $8,%r10d + je .L_last_num_blocks_is_8_412 + jb .L_last_num_blocks_is_7_1_412 + + + cmpl $12,%r10d + je .L_last_num_blocks_is_12_412 + jb .L_last_num_blocks_is_11_9_412 + + + cmpl $15,%r10d + je .L_last_num_blocks_is_15_412 + ja .L_last_num_blocks_is_16_412 + cmpl $14,%r10d + je .L_last_num_blocks_is_14_412 + jmp .L_last_num_blocks_is_13_412 + +.L_last_num_blocks_is_11_9_412: + + cmpl $10,%r10d + je .L_last_num_blocks_is_10_412 + ja .L_last_num_blocks_is_11_412 + jmp .L_last_num_blocks_is_9_412 + +.L_last_num_blocks_is_7_1_412: + cmpl $4,%r10d + je .L_last_num_blocks_is_4_412 + jb .L_last_num_blocks_is_3_1_412 + + cmpl $6,%r10d + ja .L_last_num_blocks_is_7_412 + je .L_last_num_blocks_is_6_412 + jmp .L_last_num_blocks_is_5_412 + +.L_last_num_blocks_is_3_1_412: + + cmpl $2,%r10d + ja .L_last_num_blocks_is_3_412 + je .L_last_num_blocks_is_2_412 +.L_last_num_blocks_is_1_412: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $255,%r15d + jae .L_16_blocks_overflow_413 + vpaddd %xmm28,%xmm2,%xmm0 + jmp .L_16_blocks_ok_413 + +.L_16_blocks_overflow_413: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %xmm29,%xmm0,%xmm0 +.L_16_blocks_ok_413: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $0,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %xmm31,%xmm0,%xmm0 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %xmm30,%xmm0,%xmm0 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %xmm29,%xmm0,%xmm17 + vextracti32x4 $0,%zmm17,%xmm7 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_414 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_414 +.L_small_initial_partial_block_414: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + + + + + + + + + + + + vpxorq %xmm7,%xmm14,%xmm14 + + jmp .L_after_reduction_414 +.L_small_initial_compute_done_414: +.L_after_reduction_414: + jmp .L_last_blocks_done_412 +.L_last_num_blocks_is_2_412: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $254,%r15d + jae .L_16_blocks_overflow_415 + vpaddd %ymm28,%ymm2,%ymm0 + jmp .L_16_blocks_ok_415 + +.L_16_blocks_overflow_415: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %ymm29,%ymm0,%ymm0 +.L_16_blocks_ok_415: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $1,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %ymm31,%ymm0,%ymm0 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %ymm30,%ymm0,%ymm0 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %ymm29,%ymm0,%ymm17 + vextracti32x4 $1,%zmm17,%xmm7 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_416 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_416 +.L_small_initial_partial_block_416: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_416: + + orq %r8,%r8 + je .L_after_reduction_416 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_416: + jmp .L_last_blocks_done_412 +.L_last_num_blocks_is_3_412: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $253,%r15d + jae .L_16_blocks_overflow_417 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_417 + +.L_16_blocks_overflow_417: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_417: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $2,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vextracti32x4 $2,%zmm17,%xmm7 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_418 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_418 +.L_small_initial_partial_block_418: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_418: + + orq %r8,%r8 + je .L_after_reduction_418 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_418: + jmp .L_last_blocks_done_412 +.L_last_num_blocks_is_4_412: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $252,%r15d + jae .L_16_blocks_overflow_419 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_419 + +.L_16_blocks_overflow_419: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_419: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $3,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vextracti32x4 $3,%zmm17,%xmm7 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_420 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_420 +.L_small_initial_partial_block_420: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_420: + + orq %r8,%r8 + je .L_after_reduction_420 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_420: + jmp .L_last_blocks_done_412 +.L_last_num_blocks_is_5_412: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $251,%r15d + jae .L_16_blocks_overflow_421 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %xmm27,%xmm0,%xmm3 + jmp .L_16_blocks_ok_421 + +.L_16_blocks_overflow_421: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 +.L_16_blocks_ok_421: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $0,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %xmm30,%xmm3,%xmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %xmm19,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %xmm29,%xmm3,%xmm19 + vextracti32x4 $0,%zmm19,%xmm7 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_422 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_422 +.L_small_initial_partial_block_422: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_422: + + orq %r8,%r8 + je .L_after_reduction_422 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_422: + jmp .L_last_blocks_done_412 +.L_last_num_blocks_is_6_412: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $250,%r15d + jae .L_16_blocks_overflow_423 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %ymm27,%ymm0,%ymm3 + jmp .L_16_blocks_ok_423 + +.L_16_blocks_overflow_423: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 +.L_16_blocks_ok_423: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $1,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %ymm30,%ymm3,%ymm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %ymm29,%ymm3,%ymm19 + vextracti32x4 $1,%zmm19,%xmm7 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_424 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_424 +.L_small_initial_partial_block_424: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_424: + + orq %r8,%r8 + je .L_after_reduction_424 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_424: + jmp .L_last_blocks_done_412 +.L_last_num_blocks_is_7_412: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $249,%r15d + jae .L_16_blocks_overflow_425 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_425 + +.L_16_blocks_overflow_425: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_425: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $2,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vextracti32x4 $2,%zmm19,%xmm7 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_426 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_426 +.L_small_initial_partial_block_426: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_426: + + orq %r8,%r8 + je .L_after_reduction_426 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_426: + jmp .L_last_blocks_done_412 +.L_last_num_blocks_is_8_412: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $248,%r15d + jae .L_16_blocks_overflow_427 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_427 + +.L_16_blocks_overflow_427: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_427: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $3,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vextracti32x4 $3,%zmm19,%xmm7 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_428 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_428 +.L_small_initial_partial_block_428: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_428: + + orq %r8,%r8 + je .L_after_reduction_428 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_428: + jmp .L_last_blocks_done_412 +.L_last_num_blocks_is_9_412: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $247,%r15d + jae .L_16_blocks_overflow_429 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %xmm27,%xmm3,%xmm4 + jmp .L_16_blocks_ok_429 + +.L_16_blocks_overflow_429: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 +.L_16_blocks_ok_429: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $0,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %xmm30,%xmm4,%xmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %xmm20,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %xmm29,%xmm4,%xmm20 + vextracti32x4 $0,%zmm20,%xmm7 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_430 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_430 +.L_small_initial_partial_block_430: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_430: + + orq %r8,%r8 + je .L_after_reduction_430 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_430: + jmp .L_last_blocks_done_412 +.L_last_num_blocks_is_10_412: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $246,%r15d + jae .L_16_blocks_overflow_431 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %ymm27,%ymm3,%ymm4 + jmp .L_16_blocks_ok_431 + +.L_16_blocks_overflow_431: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 +.L_16_blocks_ok_431: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $1,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %ymm30,%ymm4,%ymm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %ymm20,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %ymm29,%ymm4,%ymm20 + vextracti32x4 $1,%zmm20,%xmm7 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_432 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_432 +.L_small_initial_partial_block_432: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_432: + + orq %r8,%r8 + je .L_after_reduction_432 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_432: + jmp .L_last_blocks_done_412 +.L_last_num_blocks_is_11_412: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $245,%r15d + jae .L_16_blocks_overflow_433 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_433 + +.L_16_blocks_overflow_433: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_433: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $2,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vextracti32x4 $2,%zmm20,%xmm7 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_434 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_434 +.L_small_initial_partial_block_434: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_434: + + orq %r8,%r8 + je .L_after_reduction_434 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_434: + jmp .L_last_blocks_done_412 +.L_last_num_blocks_is_12_412: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $244,%r15d + jae .L_16_blocks_overflow_435 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_435 + +.L_16_blocks_overflow_435: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_435: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $3,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vextracti32x4 $3,%zmm20,%xmm7 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_436 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_436 +.L_small_initial_partial_block_436: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_436: + + orq %r8,%r8 + je .L_after_reduction_436 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_436: + jmp .L_last_blocks_done_412 +.L_last_num_blocks_is_13_412: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $243,%r15d + jae .L_16_blocks_overflow_437 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %xmm27,%xmm4,%xmm5 + jmp .L_16_blocks_ok_437 + +.L_16_blocks_overflow_437: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 +.L_16_blocks_ok_437: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $0,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %xmm30,%xmm5,%xmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %xmm21,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %xmm29,%xmm5,%xmm21 + vextracti32x4 $0,%zmm21,%xmm7 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_438 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_438 +.L_small_initial_partial_block_438: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_438: + + orq %r8,%r8 + je .L_after_reduction_438 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_438: + jmp .L_last_blocks_done_412 +.L_last_num_blocks_is_14_412: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $242,%r15d + jae .L_16_blocks_overflow_439 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %ymm27,%ymm4,%ymm5 + jmp .L_16_blocks_ok_439 + +.L_16_blocks_overflow_439: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 +.L_16_blocks_ok_439: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $1,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %ymm30,%ymm5,%ymm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %ymm21,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %ymm29,%ymm5,%ymm21 + vextracti32x4 $1,%zmm21,%xmm7 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_440 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_440 +.L_small_initial_partial_block_440: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_440: + + orq %r8,%r8 + je .L_after_reduction_440 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_440: + jmp .L_last_blocks_done_412 +.L_last_num_blocks_is_15_412: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $241,%r15d + jae .L_16_blocks_overflow_441 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_441 + +.L_16_blocks_overflow_441: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_441: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $2,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %zmm29,%zmm5,%zmm21 + vextracti32x4 $2,%zmm21,%xmm7 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_442 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_442 +.L_small_initial_partial_block_442: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_442: + + orq %r8,%r8 + je .L_after_reduction_442 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_442: + jmp .L_last_blocks_done_412 +.L_last_num_blocks_is_16_412: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $240,%r15d + jae .L_16_blocks_overflow_443 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_443 + +.L_16_blocks_overflow_443: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_443: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $3,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %zmm29,%zmm5,%zmm21 + vextracti32x4 $3,%zmm21,%xmm7 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_444: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_444: + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_444: + jmp .L_last_blocks_done_412 +.L_last_num_blocks_is_0_412: + vmovdqa64 1280(%rsp),%zmm13 + vmovdqu64 512(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1344(%rsp),%zmm13 + vmovdqu64 576(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 1408(%rsp),%zmm13 + vmovdqu64 640(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1472(%rsp),%zmm13 + vmovdqu64 704(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + +.L_last_blocks_done_412: + vpshufb %xmm29,%xmm2,%xmm2 + jmp .L_ghash_done_334 + +.L_message_below_32_blocks_334: + + + subq $256,%r8 + addq $256,%r11 + movl %r8d,%r10d + testq %r14,%r14 + jnz .L_skip_hkeys_precomputation_445 + vmovdqu64 640(%rsp),%zmm3 + + + vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 + + vmovdqu64 576(%rsp),%zmm4 + vmovdqu64 512(%rsp),%zmm5 + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,448(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,384(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,320(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,256(%rsp) +.L_skip_hkeys_precomputation_445: + movq $1,%r14 + andl $~15,%r10d + movl $512,%ebx + subl %r10d,%ebx + movl %r8d,%r10d + addl $15,%r10d + shrl $4,%r10d + je .L_last_num_blocks_is_0_446 + + cmpl $8,%r10d + je .L_last_num_blocks_is_8_446 + jb .L_last_num_blocks_is_7_1_446 + + + cmpl $12,%r10d + je .L_last_num_blocks_is_12_446 + jb .L_last_num_blocks_is_11_9_446 + + + cmpl $15,%r10d + je .L_last_num_blocks_is_15_446 + ja .L_last_num_blocks_is_16_446 + cmpl $14,%r10d + je .L_last_num_blocks_is_14_446 + jmp .L_last_num_blocks_is_13_446 + +.L_last_num_blocks_is_11_9_446: + + cmpl $10,%r10d + je .L_last_num_blocks_is_10_446 + ja .L_last_num_blocks_is_11_446 + jmp .L_last_num_blocks_is_9_446 + +.L_last_num_blocks_is_7_1_446: + cmpl $4,%r10d + je .L_last_num_blocks_is_4_446 + jb .L_last_num_blocks_is_3_1_446 + + cmpl $6,%r10d + ja .L_last_num_blocks_is_7_446 + je .L_last_num_blocks_is_6_446 + jmp .L_last_num_blocks_is_5_446 + +.L_last_num_blocks_is_3_1_446: + + cmpl $2,%r10d + ja .L_last_num_blocks_is_3_446 + je .L_last_num_blocks_is_2_446 +.L_last_num_blocks_is_1_446: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $255,%r15d + jae .L_16_blocks_overflow_447 + vpaddd %xmm28,%xmm2,%xmm0 + jmp .L_16_blocks_ok_447 + +.L_16_blocks_overflow_447: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %xmm29,%xmm0,%xmm0 +.L_16_blocks_ok_447: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vaesenclast %xmm30,%xmm0,%xmm0 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %xmm29,%xmm0,%xmm17 + vextracti32x4 $0,%zmm17,%xmm7 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_448 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_448 +.L_small_initial_partial_block_448: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm0 + + + vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 + vpslldq $8,%xmm3,%xmm3 + vpxorq %xmm3,%xmm25,%xmm3 + + + vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 + vpsrldq $4,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm4,%xmm14 + + + + + + + + + + + + + vpxorq %xmm7,%xmm14,%xmm14 + + jmp .L_after_reduction_448 +.L_small_initial_compute_done_448: +.L_after_reduction_448: + jmp .L_last_blocks_done_446 +.L_last_num_blocks_is_2_446: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $254,%r15d + jae .L_16_blocks_overflow_449 + vpaddd %ymm28,%ymm2,%ymm0 + jmp .L_16_blocks_ok_449 + +.L_16_blocks_overflow_449: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %ymm29,%ymm0,%ymm0 +.L_16_blocks_ok_449: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vaesenclast %ymm30,%ymm0,%ymm0 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %ymm29,%ymm0,%ymm17 + vextracti32x4 $1,%zmm17,%xmm7 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_450 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_450 +.L_small_initial_partial_block_450: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_450: + + orq %r8,%r8 + je .L_after_reduction_450 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_450: + jmp .L_last_blocks_done_446 +.L_last_num_blocks_is_3_446: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $253,%r15d + jae .L_16_blocks_overflow_451 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_451 + +.L_16_blocks_overflow_451: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_451: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vextracti32x4 $2,%zmm17,%xmm7 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_452 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_452 +.L_small_initial_partial_block_452: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_452: + + orq %r8,%r8 + je .L_after_reduction_452 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_452: + jmp .L_last_blocks_done_446 +.L_last_num_blocks_is_4_446: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $252,%r15d + jae .L_16_blocks_overflow_453 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_453 + +.L_16_blocks_overflow_453: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_453: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vextracti32x4 $3,%zmm17,%xmm7 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_454 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_454 +.L_small_initial_partial_block_454: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_454: + + orq %r8,%r8 + je .L_after_reduction_454 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_454: + jmp .L_last_blocks_done_446 +.L_last_num_blocks_is_5_446: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $251,%r15d + jae .L_16_blocks_overflow_455 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %xmm27,%xmm0,%xmm3 + jmp .L_16_blocks_ok_455 + +.L_16_blocks_overflow_455: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 +.L_16_blocks_ok_455: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %xmm30,%xmm3,%xmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %xmm19,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %xmm29,%xmm3,%xmm19 + vextracti32x4 $0,%zmm19,%xmm7 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_456 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_456 +.L_small_initial_partial_block_456: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_456: + + orq %r8,%r8 + je .L_after_reduction_456 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_456: + jmp .L_last_blocks_done_446 +.L_last_num_blocks_is_6_446: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $250,%r15d + jae .L_16_blocks_overflow_457 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %ymm27,%ymm0,%ymm3 + jmp .L_16_blocks_ok_457 + +.L_16_blocks_overflow_457: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 +.L_16_blocks_ok_457: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %ymm30,%ymm3,%ymm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %ymm29,%ymm3,%ymm19 + vextracti32x4 $1,%zmm19,%xmm7 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_458 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_458 +.L_small_initial_partial_block_458: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_458: + + orq %r8,%r8 + je .L_after_reduction_458 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_458: + jmp .L_last_blocks_done_446 +.L_last_num_blocks_is_7_446: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $249,%r15d + jae .L_16_blocks_overflow_459 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_459 + +.L_16_blocks_overflow_459: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_459: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vextracti32x4 $2,%zmm19,%xmm7 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_460 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_460 +.L_small_initial_partial_block_460: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_460: + + orq %r8,%r8 + je .L_after_reduction_460 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_460: + jmp .L_last_blocks_done_446 +.L_last_num_blocks_is_8_446: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $248,%r15d + jae .L_16_blocks_overflow_461 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_461 + +.L_16_blocks_overflow_461: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_461: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vextracti32x4 $3,%zmm19,%xmm7 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_462 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_462 +.L_small_initial_partial_block_462: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_462: + + orq %r8,%r8 + je .L_after_reduction_462 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_462: + jmp .L_last_blocks_done_446 +.L_last_num_blocks_is_9_446: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $247,%r15d + jae .L_16_blocks_overflow_463 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %xmm27,%xmm3,%xmm4 + jmp .L_16_blocks_ok_463 + +.L_16_blocks_overflow_463: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 +.L_16_blocks_ok_463: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %xmm30,%xmm4,%xmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %xmm20,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %xmm29,%xmm4,%xmm20 + vextracti32x4 $0,%zmm20,%xmm7 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_464 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_464 +.L_small_initial_partial_block_464: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_464: + + orq %r8,%r8 + je .L_after_reduction_464 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_464: + jmp .L_last_blocks_done_446 +.L_last_num_blocks_is_10_446: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $246,%r15d + jae .L_16_blocks_overflow_465 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %ymm27,%ymm3,%ymm4 + jmp .L_16_blocks_ok_465 + +.L_16_blocks_overflow_465: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 +.L_16_blocks_ok_465: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %ymm30,%ymm4,%ymm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %ymm20,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %ymm29,%ymm4,%ymm20 + vextracti32x4 $1,%zmm20,%xmm7 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_466 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_466 +.L_small_initial_partial_block_466: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_466: + + orq %r8,%r8 + je .L_after_reduction_466 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_466: + jmp .L_last_blocks_done_446 +.L_last_num_blocks_is_11_446: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $245,%r15d + jae .L_16_blocks_overflow_467 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_467 + +.L_16_blocks_overflow_467: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_467: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vextracti32x4 $2,%zmm20,%xmm7 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_468 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_468 +.L_small_initial_partial_block_468: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_468: + + orq %r8,%r8 + je .L_after_reduction_468 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_468: + jmp .L_last_blocks_done_446 +.L_last_num_blocks_is_12_446: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $244,%r15d + jae .L_16_blocks_overflow_469 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_469 + +.L_16_blocks_overflow_469: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_469: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vextracti32x4 $3,%zmm20,%xmm7 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_470 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_470 +.L_small_initial_partial_block_470: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_470: + + orq %r8,%r8 + je .L_after_reduction_470 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_470: + jmp .L_last_blocks_done_446 +.L_last_num_blocks_is_13_446: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $243,%r15d + jae .L_16_blocks_overflow_471 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %xmm27,%xmm4,%xmm5 + jmp .L_16_blocks_ok_471 + +.L_16_blocks_overflow_471: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 +.L_16_blocks_ok_471: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %xmm30,%xmm5,%xmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %xmm21,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %xmm29,%xmm5,%xmm21 + vextracti32x4 $0,%zmm21,%xmm7 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_472 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_472 +.L_small_initial_partial_block_472: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_472: + + orq %r8,%r8 + je .L_after_reduction_472 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_472: + jmp .L_last_blocks_done_446 +.L_last_num_blocks_is_14_446: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $242,%r15d + jae .L_16_blocks_overflow_473 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %ymm27,%ymm4,%ymm5 + jmp .L_16_blocks_ok_473 + +.L_16_blocks_overflow_473: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 +.L_16_blocks_ok_473: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %ymm30,%ymm5,%ymm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %ymm21,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %ymm29,%ymm5,%ymm21 + vextracti32x4 $1,%zmm21,%xmm7 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_474 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_474 +.L_small_initial_partial_block_474: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_474: + + orq %r8,%r8 + je .L_after_reduction_474 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_474: + jmp .L_last_blocks_done_446 +.L_last_num_blocks_is_15_446: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $241,%r15d + jae .L_16_blocks_overflow_475 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_475 + +.L_16_blocks_overflow_475: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_475: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %zmm29,%zmm5,%zmm21 + vextracti32x4 $2,%zmm21,%xmm7 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_476 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_476 +.L_small_initial_partial_block_476: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_476: + + orq %r8,%r8 + je .L_after_reduction_476 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_476: + jmp .L_last_blocks_done_446 +.L_last_num_blocks_is_16_446: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $240,%r15d + jae .L_16_blocks_overflow_477 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_477 + +.L_16_blocks_overflow_477: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_477: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm17 + vpshufb %zmm29,%zmm3,%zmm19 + vpshufb %zmm29,%zmm4,%zmm20 + vpshufb %zmm29,%zmm5,%zmm21 + vextracti32x4 $3,%zmm21,%xmm7 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_478: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_478: + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_478: + jmp .L_last_blocks_done_446 +.L_last_num_blocks_is_0_446: + vmovdqa64 768(%rsp),%zmm13 + vpxorq %zmm14,%zmm13,%zmm13 + vmovdqu64 0(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 832(%rsp),%zmm13 + vmovdqu64 64(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpxorq %zmm10,%zmm4,%zmm26 + vpxorq %zmm6,%zmm0,%zmm24 + vpxorq %zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 896(%rsp),%zmm13 + vmovdqu64 128(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 960(%rsp),%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + +.L_last_blocks_done_446: + vpshufb %xmm29,%xmm2,%xmm2 + jmp .L_ghash_done_334 + +.L_message_below_equal_16_blocks_334: + + + movl %r8d,%r12d + addl $15,%r12d + shrl $4,%r12d + cmpq $8,%r12 + je .L_small_initial_num_blocks_is_8_479 + jl .L_small_initial_num_blocks_is_7_1_479 + + + cmpq $12,%r12 + je .L_small_initial_num_blocks_is_12_479 + jl .L_small_initial_num_blocks_is_11_9_479 + + + cmpq $16,%r12 + je .L_small_initial_num_blocks_is_16_479 + cmpq $15,%r12 + je .L_small_initial_num_blocks_is_15_479 + cmpq $14,%r12 + je .L_small_initial_num_blocks_is_14_479 + jmp .L_small_initial_num_blocks_is_13_479 + +.L_small_initial_num_blocks_is_11_9_479: + + cmpq $11,%r12 + je .L_small_initial_num_blocks_is_11_479 + cmpq $10,%r12 + je .L_small_initial_num_blocks_is_10_479 + jmp .L_small_initial_num_blocks_is_9_479 + +.L_small_initial_num_blocks_is_7_1_479: + cmpq $4,%r12 + je .L_small_initial_num_blocks_is_4_479 + jl .L_small_initial_num_blocks_is_3_1_479 + + cmpq $7,%r12 + je .L_small_initial_num_blocks_is_7_479 + cmpq $6,%r12 + je .L_small_initial_num_blocks_is_6_479 + jmp .L_small_initial_num_blocks_is_5_479 + +.L_small_initial_num_blocks_is_3_1_479: + + cmpq $3,%r12 + je .L_small_initial_num_blocks_is_3_479 + cmpq $2,%r12 + je .L_small_initial_num_blocks_is_2_479 + + + + + +.L_small_initial_num_blocks_is_1_479: + vmovdqa64 SHUF_MASK(%rip),%xmm29 + vpaddd ONE(%rip),%xmm2,%xmm0 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $0,%zmm0,%xmm2 + vpshufb %xmm29,%xmm0,%xmm0 + vmovdqu8 0(%rcx,%r11,1),%xmm6{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %xmm15,%xmm0,%xmm0 + vpxorq %xmm6,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm12 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %xmm29,%xmm0,%xmm6 + vextracti32x4 $0,%zmm6,%xmm13 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_480 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_480 +.L_small_initial_partial_block_480: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + + + + + + + + + + + + vpxorq %xmm13,%xmm14,%xmm14 + + jmp .L_after_reduction_480 +.L_small_initial_compute_done_480: +.L_after_reduction_480: + jmp .L_small_initial_blocks_encrypted_479 +.L_small_initial_num_blocks_is_2_479: + vmovdqa64 SHUF_MASK(%rip),%ymm29 + vshufi64x2 $0,%ymm2,%ymm2,%ymm0 + vpaddd ddq_add_1234(%rip),%ymm0,%ymm0 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $1,%zmm0,%xmm2 + vpshufb %ymm29,%ymm0,%ymm0 + vmovdqu8 0(%rcx,%r11,1),%ymm6{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %ymm15,%ymm0,%ymm0 + vpxorq %ymm6,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm12 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %ymm29,%ymm0,%ymm6 + vextracti32x4 $1,%zmm6,%xmm13 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_481 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_481 +.L_small_initial_partial_block_481: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_481: + + orq %r8,%r8 + je .L_after_reduction_481 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_481: + jmp .L_small_initial_blocks_encrypted_479 +.L_small_initial_num_blocks_is_3_479: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $2,%zmm0,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vpxorq %zmm6,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vextracti32x4 $2,%zmm6,%xmm13 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_482 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_482 +.L_small_initial_partial_block_482: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_482: + + orq %r8,%r8 + je .L_after_reduction_482 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_482: + jmp .L_small_initial_blocks_encrypted_479 +.L_small_initial_num_blocks_is_4_479: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $3,%zmm0,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vpxorq %zmm6,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vextracti32x4 $3,%zmm6,%xmm13 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_483 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_483 +.L_small_initial_partial_block_483: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_483: + + orq %r8,%r8 + je .L_after_reduction_483 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_483: + jmp .L_small_initial_blocks_encrypted_479 +.L_small_initial_num_blocks_is_5_479: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $64,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $0,%zmm3,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%xmm7{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %xmm15,%xmm3,%xmm3 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %xmm7,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %xmm29,%xmm3,%xmm7 + vextracti32x4 $0,%zmm7,%xmm13 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_484 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_484 +.L_small_initial_partial_block_484: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_484: + + orq %r8,%r8 + je .L_after_reduction_484 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_484: + jmp .L_small_initial_blocks_encrypted_479 +.L_small_initial_num_blocks_is_6_479: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $64,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $1,%zmm3,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%ymm7{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %ymm15,%ymm3,%ymm3 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %ymm7,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %ymm29,%ymm3,%ymm7 + vextracti32x4 $1,%zmm7,%xmm13 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_485 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_485 +.L_small_initial_partial_block_485: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_485: + + orq %r8,%r8 + je .L_after_reduction_485 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_485: + jmp .L_small_initial_blocks_encrypted_479 +.L_small_initial_num_blocks_is_7_479: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $64,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $2,%zmm3,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vextracti32x4 $2,%zmm7,%xmm13 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_486 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_486 +.L_small_initial_partial_block_486: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_486: + + orq %r8,%r8 + je .L_after_reduction_486 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_486: + jmp .L_small_initial_blocks_encrypted_479 +.L_small_initial_num_blocks_is_8_479: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $64,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $3,%zmm3,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vextracti32x4 $3,%zmm7,%xmm13 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_487 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 224(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_487 +.L_small_initial_partial_block_487: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_487: + + orq %r8,%r8 + je .L_after_reduction_487 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_487: + jmp .L_small_initial_blocks_encrypted_479 +.L_small_initial_num_blocks_is_9_479: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $128,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $0,%zmm4,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%xmm10{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %xmm15,%xmm4,%xmm4 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %xmm10,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vpshufb %xmm29,%xmm4,%xmm10 + vextracti32x4 $0,%zmm10,%xmm13 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_488 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 208(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_488 +.L_small_initial_partial_block_488: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 224(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_488: + + orq %r8,%r8 + je .L_after_reduction_488 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_488: + jmp .L_small_initial_blocks_encrypted_479 +.L_small_initial_num_blocks_is_10_479: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $128,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $1,%zmm4,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%ymm10{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %ymm15,%ymm4,%ymm4 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %ymm10,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vpshufb %ymm29,%ymm4,%ymm10 + vextracti32x4 $1,%zmm10,%xmm13 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_489 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 192(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_489 +.L_small_initial_partial_block_489: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 208(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_489: + + orq %r8,%r8 + je .L_after_reduction_489 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_489: + jmp .L_small_initial_blocks_encrypted_479 +.L_small_initial_num_blocks_is_11_479: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $128,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $2,%zmm4,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vpshufb %zmm29,%zmm4,%zmm10 + vextracti32x4 $2,%zmm10,%xmm13 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_490 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 176(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_490 +.L_small_initial_partial_block_490: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 192(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_490: + + orq %r8,%r8 + je .L_after_reduction_490 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_490: + jmp .L_small_initial_blocks_encrypted_479 +.L_small_initial_num_blocks_is_12_479: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $128,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $3,%zmm4,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vpshufb %zmm29,%zmm4,%zmm10 + vextracti32x4 $3,%zmm10,%xmm13 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_491 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 160(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 224(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_491 +.L_small_initial_partial_block_491: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 176(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_491: + + orq %r8,%r8 + je .L_after_reduction_491 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_491: + jmp .L_small_initial_blocks_encrypted_479 +.L_small_initial_num_blocks_is_13_479: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $192,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $0,%zmm5,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10 + vmovdqu8 192(%rcx,%r11,1),%xmm11{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vpxorq %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vaesenclast %xmm15,%xmm5,%xmm5 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vpxorq %xmm11,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vpshufb %zmm29,%zmm4,%zmm10 + vpshufb %xmm29,%xmm5,%xmm11 + vextracti32x4 $0,%zmm11,%xmm13 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_492 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 144(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 208(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_492 +.L_small_initial_partial_block_492: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 160(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 224(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_492: + + orq %r8,%r8 + je .L_after_reduction_492 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_492: + jmp .L_small_initial_blocks_encrypted_479 +.L_small_initial_num_blocks_is_14_479: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $192,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $1,%zmm5,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10 + vmovdqu8 192(%rcx,%r11,1),%ymm11{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vpxorq %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vaesenclast %ymm15,%ymm5,%ymm5 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vpxorq %ymm11,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vpshufb %zmm29,%zmm4,%zmm10 + vpshufb %ymm29,%ymm5,%ymm11 + vextracti32x4 $1,%zmm11,%xmm13 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_493 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 128(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 192(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_493 +.L_small_initial_partial_block_493: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 144(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 208(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_493: + + orq %r8,%r8 + je .L_after_reduction_493 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_493: + jmp .L_small_initial_blocks_encrypted_479 +.L_small_initial_num_blocks_is_15_479: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $192,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $2,%zmm5,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10 + vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vpxorq %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vaesenclast %zmm15,%zmm5,%zmm5 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vpxorq %zmm11,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vpshufb %zmm29,%zmm4,%zmm10 + vpshufb %zmm29,%zmm5,%zmm11 + vextracti32x4 $2,%zmm11,%xmm13 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_494 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 112(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 176(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_494 +.L_small_initial_partial_block_494: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 128(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 192(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_494: + + orq %r8,%r8 + je .L_after_reduction_494 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_494: + jmp .L_small_initial_blocks_encrypted_479 +.L_small_initial_num_blocks_is_16_479: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $192,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $3,%zmm5,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10 + vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vpxorq %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vaesenclast %zmm15,%zmm5,%zmm5 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vpxorq %zmm11,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm0,%zmm6 + vpshufb %zmm29,%zmm3,%zmm7 + vpshufb %zmm29,%zmm4,%zmm10 + vpshufb %zmm29,%zmm5,%zmm11 + vextracti32x4 $3,%zmm11,%xmm13 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_495: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 112(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 176(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_495: + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_495: +.L_small_initial_blocks_encrypted_479: +.L_ghash_done_334: + vmovdqu64 %xmm2,0(%rsi) + vmovdqu64 %xmm14,64(%rsi) +.L_enc_dec_done_334: + jmp .Lexit_gcm_encrypt +.Lexit_gcm_encrypt: + cmpq $256,%r8 + jbe .Lskip_hkeys_cleanup_496 + vpxor %xmm0,%xmm0,%xmm0 + vmovdqa64 %zmm0,0(%rsp) + vmovdqa64 %zmm0,64(%rsp) + vmovdqa64 %zmm0,128(%rsp) + vmovdqa64 %zmm0,192(%rsp) + vmovdqa64 %zmm0,256(%rsp) + vmovdqa64 %zmm0,320(%rsp) + vmovdqa64 %zmm0,384(%rsp) + vmovdqa64 %zmm0,448(%rsp) + vmovdqa64 %zmm0,512(%rsp) + vmovdqa64 %zmm0,576(%rsp) + vmovdqa64 %zmm0,640(%rsp) + vmovdqa64 %zmm0,704(%rsp) +.Lskip_hkeys_cleanup_496: + vzeroupper + leaq (%rbp),%rsp +.cfi_def_cfa_register %rsp + popq %r15 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r15 + popq %r14 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r14 + popq %r13 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r13 + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + popq %rbp +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbp + popq %rbx +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbx + .byte 0xf3,0xc3 +.Lencrypt_seh_end: +.cfi_endproc +.size ossl_aes_gcm_encrypt_avx512, .-ossl_aes_gcm_encrypt_avx512 +.globl ossl_aes_gcm_decrypt_avx512 +.type ossl_aes_gcm_decrypt_avx512,@function +.align 32 +ossl_aes_gcm_decrypt_avx512: +.cfi_startproc +.Ldecrypt_seh_begin: +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 +.Ldecrypt_seh_push_rbx: + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 +.Ldecrypt_seh_push_rbp: + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 +.Ldecrypt_seh_push_r12: + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 +.Ldecrypt_seh_push_r13: + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 +.Ldecrypt_seh_push_r14: + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Ldecrypt_seh_push_r15: + + + + + + + + + + + leaq 0(%rsp),%rbp +.cfi_def_cfa_register %rbp +.Ldecrypt_seh_setfp: + +.Ldecrypt_seh_prolog_end: + subq $1588,%rsp + andq $(-64),%rsp + + + movl 240(%rdi),%eax + cmpl $9,%eax + je .Laes_gcm_decrypt_128_avx512 + cmpl $11,%eax + je .Laes_gcm_decrypt_192_avx512 + cmpl $13,%eax + je .Laes_gcm_decrypt_256_avx512 + xorl %eax,%eax + jmp .Lexit_gcm_decrypt +.align 32 +.Laes_gcm_decrypt_128_avx512: + orq %r8,%r8 + je .L_enc_dec_done_497 + xorq %r14,%r14 + vmovdqu64 64(%rsi),%xmm14 + + movq (%rdx),%r11 + orq %r11,%r11 + je .L_partial_block_done_498 + movl $16,%r10d + leaq byte_len_to_mask_table(%rip),%r12 + cmpq %r10,%r8 + cmovcq %r8,%r10 + kmovw (%r12,%r10,2),%k1 + vmovdqu8 (%rcx),%xmm0{%k1}{z} + + vmovdqu64 16(%rsi),%xmm3 + vmovdqu64 336(%rsi),%xmm4 + + + + leaq SHIFT_MASK(%rip),%r12 + addq %r11,%r12 + vmovdqu64 (%r12),%xmm5 + vpshufb %xmm5,%xmm3,%xmm3 + + vmovdqa64 %xmm0,%xmm6 + vpxorq %xmm0,%xmm3,%xmm3 + + + leaq (%r8,%r11,1),%r13 + subq $16,%r13 + jge .L_no_extra_mask_498 + subq %r13,%r12 +.L_no_extra_mask_498: + + + + vmovdqu64 16(%r12),%xmm0 + vpand %xmm0,%xmm3,%xmm3 + vpand %xmm0,%xmm6,%xmm6 + vpshufb SHUF_MASK(%rip),%xmm6,%xmm6 + vpshufb %xmm5,%xmm6,%xmm6 + vpxorq %xmm6,%xmm14,%xmm14 + cmpq $0,%r13 + jl .L_partial_incomplete_498 + + vpclmulqdq $0x11,%xmm4,%xmm14,%xmm7 + vpclmulqdq $0x00,%xmm4,%xmm14,%xmm10 + vpclmulqdq $0x01,%xmm4,%xmm14,%xmm11 + vpclmulqdq $0x10,%xmm4,%xmm14,%xmm14 + vpxorq %xmm11,%xmm14,%xmm14 + + vpsrldq $8,%xmm14,%xmm11 + vpslldq $8,%xmm14,%xmm14 + vpxorq %xmm11,%xmm7,%xmm7 + vpxorq %xmm10,%xmm14,%xmm14 + + + + vmovdqu64 POLY2(%rip),%xmm11 + + vpclmulqdq $0x01,%xmm14,%xmm11,%xmm10 + vpslldq $8,%xmm10,%xmm10 + vpxorq %xmm10,%xmm14,%xmm14 + + + + vpclmulqdq $0x00,%xmm14,%xmm11,%xmm10 + vpsrldq $4,%xmm10,%xmm10 + vpclmulqdq $0x10,%xmm14,%xmm11,%xmm14 + vpslldq $4,%xmm14,%xmm14 + + vpternlogq $0x96,%xmm10,%xmm7,%xmm14 + + movq $0,(%rdx) + + movq %r11,%r12 + movq $16,%r11 + subq %r12,%r11 + jmp .L_enc_dec_done_498 + +.L_partial_incomplete_498: + addq %r8,(%rdx) + movq %r8,%r11 + +.L_enc_dec_done_498: + + + leaq byte_len_to_mask_table(%rip),%r12 + kmovw (%r12,%r11,2),%k1 + vmovdqu64 %xmm14,64(%rsi) + movq %r9,%r12 + vmovdqu8 %xmm3,(%r12){%k1} +.L_partial_block_done_498: + vmovdqu64 0(%rsi),%xmm2 + subq %r11,%r8 + je .L_enc_dec_done_497 + cmpq $256,%r8 + jbe .L_message_below_equal_16_blocks_497 + + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vmovdqa64 ddq_addbe_4444(%rip),%zmm27 + vmovdqa64 ddq_addbe_1234(%rip),%zmm28 + + + + + + + vmovd %xmm2,%r15d + andl $255,%r15d + + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpshufb %zmm29,%zmm2,%zmm2 + + + + cmpb $240,%r15b + jae .L_next_16_overflow_499 + vpaddd %zmm28,%zmm2,%zmm7 + vpaddd %zmm27,%zmm7,%zmm10 + vpaddd %zmm27,%zmm10,%zmm11 + vpaddd %zmm27,%zmm11,%zmm12 + jmp .L_next_16_ok_499 +.L_next_16_overflow_499: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm12 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 + vpaddd %zmm12,%zmm7,%zmm10 + vpaddd %zmm12,%zmm10,%zmm11 + vpaddd %zmm12,%zmm11,%zmm12 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %zmm29,%zmm11,%zmm11 + vpshufb %zmm29,%zmm12,%zmm12 +.L_next_16_ok_499: + vshufi64x2 $255,%zmm12,%zmm12,%zmm2 + addb $16,%r15b + + vmovdqu8 0(%rcx,%r11,1),%zmm0 + vmovdqu8 64(%rcx,%r11,1),%zmm3 + vmovdqu8 128(%rcx,%r11,1),%zmm4 + vmovdqu8 192(%rcx,%r11,1),%zmm5 + + + vbroadcastf64x2 0(%rdi),%zmm6 + vpxorq %zmm6,%zmm7,%zmm7 + vpxorq %zmm6,%zmm10,%zmm10 + vpxorq %zmm6,%zmm11,%zmm11 + vpxorq %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 16(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 32(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 48(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 64(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 80(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 96(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 112(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 128(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 144(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 160(%rdi),%zmm6 + vaesenclast %zmm6,%zmm7,%zmm7 + vaesenclast %zmm6,%zmm10,%zmm10 + vaesenclast %zmm6,%zmm11,%zmm11 + vaesenclast %zmm6,%zmm12,%zmm12 + + + vpxorq %zmm0,%zmm7,%zmm7 + vpxorq %zmm3,%zmm10,%zmm10 + vpxorq %zmm4,%zmm11,%zmm11 + vpxorq %zmm5,%zmm12,%zmm12 + + + movq %r9,%r10 + vmovdqu8 %zmm7,0(%r10,%r11,1) + vmovdqu8 %zmm10,64(%r10,%r11,1) + vmovdqu8 %zmm11,128(%r10,%r11,1) + vmovdqu8 %zmm12,192(%r10,%r11,1) + + vpshufb %zmm29,%zmm0,%zmm7 + vpshufb %zmm29,%zmm3,%zmm10 + vpshufb %zmm29,%zmm4,%zmm11 + vpshufb %zmm29,%zmm5,%zmm12 + vmovdqa64 %zmm7,768(%rsp) + vmovdqa64 %zmm10,832(%rsp) + vmovdqa64 %zmm11,896(%rsp) + vmovdqa64 %zmm12,960(%rsp) + testq %r14,%r14 + jnz .L_skip_hkeys_precomputation_500 + + vmovdqu64 288(%rsi),%zmm0 + vmovdqu64 %zmm0,704(%rsp) + + vmovdqu64 224(%rsi),%zmm3 + vmovdqu64 %zmm3,640(%rsp) + + + vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 + + vmovdqu64 160(%rsi),%zmm4 + vmovdqu64 %zmm4,576(%rsp) + + vmovdqu64 96(%rsi),%zmm5 + vmovdqu64 %zmm5,512(%rsp) +.L_skip_hkeys_precomputation_500: + cmpq $512,%r8 + jb .L_message_below_32_blocks_497 + + + + cmpb $240,%r15b + jae .L_next_16_overflow_501 + vpaddd %zmm28,%zmm2,%zmm7 + vpaddd %zmm27,%zmm7,%zmm10 + vpaddd %zmm27,%zmm10,%zmm11 + vpaddd %zmm27,%zmm11,%zmm12 + jmp .L_next_16_ok_501 +.L_next_16_overflow_501: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm12 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 + vpaddd %zmm12,%zmm7,%zmm10 + vpaddd %zmm12,%zmm10,%zmm11 + vpaddd %zmm12,%zmm11,%zmm12 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %zmm29,%zmm11,%zmm11 + vpshufb %zmm29,%zmm12,%zmm12 +.L_next_16_ok_501: + vshufi64x2 $255,%zmm12,%zmm12,%zmm2 + addb $16,%r15b + + vmovdqu8 256(%rcx,%r11,1),%zmm0 + vmovdqu8 320(%rcx,%r11,1),%zmm3 + vmovdqu8 384(%rcx,%r11,1),%zmm4 + vmovdqu8 448(%rcx,%r11,1),%zmm5 + + + vbroadcastf64x2 0(%rdi),%zmm6 + vpxorq %zmm6,%zmm7,%zmm7 + vpxorq %zmm6,%zmm10,%zmm10 + vpxorq %zmm6,%zmm11,%zmm11 + vpxorq %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 16(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 32(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 48(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 64(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 80(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 96(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 112(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 128(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 144(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 160(%rdi),%zmm6 + vaesenclast %zmm6,%zmm7,%zmm7 + vaesenclast %zmm6,%zmm10,%zmm10 + vaesenclast %zmm6,%zmm11,%zmm11 + vaesenclast %zmm6,%zmm12,%zmm12 + + + vpxorq %zmm0,%zmm7,%zmm7 + vpxorq %zmm3,%zmm10,%zmm10 + vpxorq %zmm4,%zmm11,%zmm11 + vpxorq %zmm5,%zmm12,%zmm12 + + + movq %r9,%r10 + vmovdqu8 %zmm7,256(%r10,%r11,1) + vmovdqu8 %zmm10,320(%r10,%r11,1) + vmovdqu8 %zmm11,384(%r10,%r11,1) + vmovdqu8 %zmm12,448(%r10,%r11,1) + + vpshufb %zmm29,%zmm0,%zmm7 + vpshufb %zmm29,%zmm3,%zmm10 + vpshufb %zmm29,%zmm4,%zmm11 + vpshufb %zmm29,%zmm5,%zmm12 + vmovdqa64 %zmm7,1024(%rsp) + vmovdqa64 %zmm10,1088(%rsp) + vmovdqa64 %zmm11,1152(%rsp) + vmovdqa64 %zmm12,1216(%rsp) + testq %r14,%r14 + jnz .L_skip_hkeys_precomputation_502 + vmovdqu64 640(%rsp),%zmm3 + + + vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 + + vmovdqu64 576(%rsp),%zmm4 + vmovdqu64 512(%rsp),%zmm5 + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,448(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,384(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,320(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,256(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,192(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,128(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,64(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,0(%rsp) +.L_skip_hkeys_precomputation_502: + movq $1,%r14 + addq $512,%r11 + subq $512,%r8 + + cmpq $768,%r8 + jb .L_no_more_big_nblocks_497 +.L_encrypt_big_nblocks_497: + cmpb $240,%r15b + jae .L_16_blocks_overflow_503 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_503 +.L_16_blocks_overflow_503: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_503: + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm15,%zmm10,%zmm26 + vpxorq %zmm12,%zmm6,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1) + vpshufb %zmm29,%zmm17,%zmm0 + vpshufb %zmm29,%zmm19,%zmm3 + vpshufb %zmm29,%zmm20,%zmm4 + vpshufb %zmm29,%zmm21,%zmm5 + vmovdqa64 %zmm0,1280(%rsp) + vmovdqa64 %zmm3,1344(%rsp) + vmovdqa64 %zmm4,1408(%rsp) + vmovdqa64 %zmm5,1472(%rsp) + cmpb $240,%r15b + jae .L_16_blocks_overflow_504 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_504 +.L_16_blocks_overflow_504: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_504: + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 256(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 320(%rsp),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 384(%rsp),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 448(%rsp),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 256(%rcx,%r11,1),%zmm17 + vmovdqu8 320(%rcx,%r11,1),%zmm19 + vmovdqu8 384(%rcx,%r11,1),%zmm20 + vmovdqu8 448(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vpternlogq $0x96,%zmm12,%zmm6,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,256(%r10,%r11,1) + vmovdqu8 %zmm3,320(%r10,%r11,1) + vmovdqu8 %zmm4,384(%r10,%r11,1) + vmovdqu8 %zmm5,448(%r10,%r11,1) + vpshufb %zmm29,%zmm17,%zmm0 + vpshufb %zmm29,%zmm19,%zmm3 + vpshufb %zmm29,%zmm20,%zmm4 + vpshufb %zmm29,%zmm21,%zmm5 + vmovdqa64 %zmm0,768(%rsp) + vmovdqa64 %zmm3,832(%rsp) + vmovdqa64 %zmm4,896(%rsp) + vmovdqa64 %zmm5,960(%rsp) + cmpb $240,%r15b + jae .L_16_blocks_overflow_505 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_505 +.L_16_blocks_overflow_505: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_505: + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 512(%rcx,%r11,1),%zmm17 + vmovdqu8 576(%rcx,%r11,1),%zmm19 + vmovdqu8 640(%rcx,%r11,1),%zmm20 + vmovdqu8 704(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + + + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpternlogq $0x96,%zmm15,%zmm12,%zmm6 + vpxorq %zmm24,%zmm6,%zmm6 + vpternlogq $0x96,%zmm10,%zmm13,%zmm7 + vpxorq %zmm25,%zmm7,%zmm7 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vextracti64x4 $1,%zmm6,%ymm12 + vpxorq %ymm12,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm12 + vpxorq %xmm12,%xmm6,%xmm6 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm6 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,512(%r10,%r11,1) + vmovdqu8 %zmm3,576(%r10,%r11,1) + vmovdqu8 %zmm4,640(%r10,%r11,1) + vmovdqu8 %zmm5,704(%r10,%r11,1) + vpshufb %zmm29,%zmm17,%zmm0 + vpshufb %zmm29,%zmm19,%zmm3 + vpshufb %zmm29,%zmm20,%zmm4 + vpshufb %zmm29,%zmm21,%zmm5 + vmovdqa64 %zmm0,1024(%rsp) + vmovdqa64 %zmm3,1088(%rsp) + vmovdqa64 %zmm4,1152(%rsp) + vmovdqa64 %zmm5,1216(%rsp) + vmovdqa64 %zmm6,%zmm14 + + addq $768,%r11 + subq $768,%r8 + cmpq $768,%r8 + jae .L_encrypt_big_nblocks_497 + +.L_no_more_big_nblocks_497: + + cmpq $512,%r8 + jae .L_encrypt_32_blocks_497 + + cmpq $256,%r8 + jae .L_encrypt_16_blocks_497 +.L_encrypt_0_blocks_ghash_32_497: + movl %r8d,%r10d + andl $~15,%r10d + movl $256,%ebx + subl %r10d,%ebx + vmovdqa64 768(%rsp),%zmm13 + vpxorq %zmm14,%zmm13,%zmm13 + vmovdqu64 0(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 832(%rsp),%zmm13 + vmovdqu64 64(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpxorq %zmm10,%zmm4,%zmm26 + vpxorq %zmm6,%zmm0,%zmm24 + vpxorq %zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 896(%rsp),%zmm13 + vmovdqu64 128(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 960(%rsp),%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + addl $256,%ebx + movl %r8d,%r10d + addl $15,%r10d + shrl $4,%r10d + je .L_last_num_blocks_is_0_506 + + cmpl $8,%r10d + je .L_last_num_blocks_is_8_506 + jb .L_last_num_blocks_is_7_1_506 + + + cmpl $12,%r10d + je .L_last_num_blocks_is_12_506 + jb .L_last_num_blocks_is_11_9_506 + + + cmpl $15,%r10d + je .L_last_num_blocks_is_15_506 + ja .L_last_num_blocks_is_16_506 + cmpl $14,%r10d + je .L_last_num_blocks_is_14_506 + jmp .L_last_num_blocks_is_13_506 + +.L_last_num_blocks_is_11_9_506: + + cmpl $10,%r10d + je .L_last_num_blocks_is_10_506 + ja .L_last_num_blocks_is_11_506 + jmp .L_last_num_blocks_is_9_506 + +.L_last_num_blocks_is_7_1_506: + cmpl $4,%r10d + je .L_last_num_blocks_is_4_506 + jb .L_last_num_blocks_is_3_1_506 + + cmpl $6,%r10d + ja .L_last_num_blocks_is_7_506 + je .L_last_num_blocks_is_6_506 + jmp .L_last_num_blocks_is_5_506 + +.L_last_num_blocks_is_3_1_506: + + cmpl $2,%r10d + ja .L_last_num_blocks_is_3_506 + je .L_last_num_blocks_is_2_506 +.L_last_num_blocks_is_1_506: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $255,%r15d + jae .L_16_blocks_overflow_507 + vpaddd %xmm28,%xmm2,%xmm0 + jmp .L_16_blocks_ok_507 + +.L_16_blocks_overflow_507: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %xmm29,%xmm0,%xmm0 +.L_16_blocks_ok_507: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vaesenclast %xmm30,%xmm0,%xmm0 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %xmm29,%xmm17,%xmm17 + vextracti32x4 $0,%zmm17,%xmm7 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_508 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_508 +.L_small_initial_partial_block_508: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm0 + + + vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 + vpslldq $8,%xmm3,%xmm3 + vpxorq %xmm3,%xmm25,%xmm3 + + + vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 + vpsrldq $4,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm4,%xmm14 + + + + + + + + + + + + + vpxorq %xmm7,%xmm14,%xmm14 + + jmp .L_after_reduction_508 +.L_small_initial_compute_done_508: +.L_after_reduction_508: + jmp .L_last_blocks_done_506 +.L_last_num_blocks_is_2_506: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $254,%r15d + jae .L_16_blocks_overflow_509 + vpaddd %ymm28,%ymm2,%ymm0 + jmp .L_16_blocks_ok_509 + +.L_16_blocks_overflow_509: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %ymm29,%ymm0,%ymm0 +.L_16_blocks_ok_509: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vaesenclast %ymm30,%ymm0,%ymm0 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %ymm29,%ymm17,%ymm17 + vextracti32x4 $1,%zmm17,%xmm7 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_510 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_510 +.L_small_initial_partial_block_510: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_510: + + orq %r8,%r8 + je .L_after_reduction_510 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_510: + jmp .L_last_blocks_done_506 +.L_last_num_blocks_is_3_506: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $253,%r15d + jae .L_16_blocks_overflow_511 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_511 + +.L_16_blocks_overflow_511: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_511: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vextracti32x4 $2,%zmm17,%xmm7 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_512 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_512 +.L_small_initial_partial_block_512: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_512: + + orq %r8,%r8 + je .L_after_reduction_512 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_512: + jmp .L_last_blocks_done_506 +.L_last_num_blocks_is_4_506: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $252,%r15d + jae .L_16_blocks_overflow_513 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_513 + +.L_16_blocks_overflow_513: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_513: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vextracti32x4 $3,%zmm17,%xmm7 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_514 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_514 +.L_small_initial_partial_block_514: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_514: + + orq %r8,%r8 + je .L_after_reduction_514 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_514: + jmp .L_last_blocks_done_506 +.L_last_num_blocks_is_5_506: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $251,%r15d + jae .L_16_blocks_overflow_515 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %xmm27,%xmm0,%xmm3 + jmp .L_16_blocks_ok_515 + +.L_16_blocks_overflow_515: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 +.L_16_blocks_ok_515: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %xmm30,%xmm3,%xmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %xmm19,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %xmm29,%xmm19,%xmm19 + vextracti32x4 $0,%zmm19,%xmm7 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_516 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_516 +.L_small_initial_partial_block_516: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_516: + + orq %r8,%r8 + je .L_after_reduction_516 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_516: + jmp .L_last_blocks_done_506 +.L_last_num_blocks_is_6_506: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $250,%r15d + jae .L_16_blocks_overflow_517 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %ymm27,%ymm0,%ymm3 + jmp .L_16_blocks_ok_517 + +.L_16_blocks_overflow_517: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 +.L_16_blocks_ok_517: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %ymm30,%ymm3,%ymm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %ymm29,%ymm19,%ymm19 + vextracti32x4 $1,%zmm19,%xmm7 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_518 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_518 +.L_small_initial_partial_block_518: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_518: + + orq %r8,%r8 + je .L_after_reduction_518 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_518: + jmp .L_last_blocks_done_506 +.L_last_num_blocks_is_7_506: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $249,%r15d + jae .L_16_blocks_overflow_519 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_519 + +.L_16_blocks_overflow_519: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_519: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vextracti32x4 $2,%zmm19,%xmm7 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_520 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_520 +.L_small_initial_partial_block_520: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_520: + + orq %r8,%r8 + je .L_after_reduction_520 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_520: + jmp .L_last_blocks_done_506 +.L_last_num_blocks_is_8_506: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $248,%r15d + jae .L_16_blocks_overflow_521 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_521 + +.L_16_blocks_overflow_521: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_521: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vextracti32x4 $3,%zmm19,%xmm7 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_522 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_522 +.L_small_initial_partial_block_522: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_522: + + orq %r8,%r8 + je .L_after_reduction_522 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_522: + jmp .L_last_blocks_done_506 +.L_last_num_blocks_is_9_506: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $247,%r15d + jae .L_16_blocks_overflow_523 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %xmm27,%xmm3,%xmm4 + jmp .L_16_blocks_ok_523 + +.L_16_blocks_overflow_523: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 +.L_16_blocks_ok_523: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %xmm30,%xmm4,%xmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %xmm20,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %xmm29,%xmm20,%xmm20 + vextracti32x4 $0,%zmm20,%xmm7 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_524 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_524 +.L_small_initial_partial_block_524: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_524: + + orq %r8,%r8 + je .L_after_reduction_524 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_524: + jmp .L_last_blocks_done_506 +.L_last_num_blocks_is_10_506: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $246,%r15d + jae .L_16_blocks_overflow_525 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %ymm27,%ymm3,%ymm4 + jmp .L_16_blocks_ok_525 + +.L_16_blocks_overflow_525: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 +.L_16_blocks_ok_525: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %ymm30,%ymm4,%ymm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %ymm20,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %ymm29,%ymm20,%ymm20 + vextracti32x4 $1,%zmm20,%xmm7 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_526 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_526 +.L_small_initial_partial_block_526: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_526: + + orq %r8,%r8 + je .L_after_reduction_526 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_526: + jmp .L_last_blocks_done_506 +.L_last_num_blocks_is_11_506: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $245,%r15d + jae .L_16_blocks_overflow_527 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_527 + +.L_16_blocks_overflow_527: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_527: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vextracti32x4 $2,%zmm20,%xmm7 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_528 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_528 +.L_small_initial_partial_block_528: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_528: + + orq %r8,%r8 + je .L_after_reduction_528 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_528: + jmp .L_last_blocks_done_506 +.L_last_num_blocks_is_12_506: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $244,%r15d + jae .L_16_blocks_overflow_529 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_529 + +.L_16_blocks_overflow_529: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_529: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vextracti32x4 $3,%zmm20,%xmm7 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_530 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_530 +.L_small_initial_partial_block_530: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_530: + + orq %r8,%r8 + je .L_after_reduction_530 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_530: + jmp .L_last_blocks_done_506 +.L_last_num_blocks_is_13_506: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $243,%r15d + jae .L_16_blocks_overflow_531 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %xmm27,%xmm4,%xmm5 + jmp .L_16_blocks_ok_531 + +.L_16_blocks_overflow_531: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 +.L_16_blocks_ok_531: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %xmm30,%xmm5,%xmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %xmm21,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %xmm29,%xmm21,%xmm21 + vextracti32x4 $0,%zmm21,%xmm7 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_532 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_532 +.L_small_initial_partial_block_532: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_532: + + orq %r8,%r8 + je .L_after_reduction_532 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_532: + jmp .L_last_blocks_done_506 +.L_last_num_blocks_is_14_506: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $242,%r15d + jae .L_16_blocks_overflow_533 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %ymm27,%ymm4,%ymm5 + jmp .L_16_blocks_ok_533 + +.L_16_blocks_overflow_533: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 +.L_16_blocks_ok_533: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %ymm30,%ymm5,%ymm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %ymm21,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %ymm29,%ymm21,%ymm21 + vextracti32x4 $1,%zmm21,%xmm7 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_534 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_534 +.L_small_initial_partial_block_534: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_534: + + orq %r8,%r8 + je .L_after_reduction_534 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_534: + jmp .L_last_blocks_done_506 +.L_last_num_blocks_is_15_506: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $241,%r15d + jae .L_16_blocks_overflow_535 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_535 + +.L_16_blocks_overflow_535: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_535: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %zmm29,%zmm21,%zmm21 + vextracti32x4 $2,%zmm21,%xmm7 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_536 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_536 +.L_small_initial_partial_block_536: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_536: + + orq %r8,%r8 + je .L_after_reduction_536 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_536: + jmp .L_last_blocks_done_506 +.L_last_num_blocks_is_16_506: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $240,%r15d + jae .L_16_blocks_overflow_537 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_537 + +.L_16_blocks_overflow_537: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_537: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %zmm29,%zmm21,%zmm21 + vextracti32x4 $3,%zmm21,%xmm7 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_538: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_538: + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_538: + jmp .L_last_blocks_done_506 +.L_last_num_blocks_is_0_506: + vmovdqa64 1024(%rsp),%zmm13 + vmovdqu64 0(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1088(%rsp),%zmm13 + vmovdqu64 64(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 1152(%rsp),%zmm13 + vmovdqu64 128(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1216(%rsp),%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + +.L_last_blocks_done_506: + vpshufb %xmm29,%xmm2,%xmm2 + jmp .L_ghash_done_497 +.L_encrypt_32_blocks_497: + cmpb $240,%r15b + jae .L_16_blocks_overflow_539 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_539 +.L_16_blocks_overflow_539: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_539: + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm15,%zmm10,%zmm26 + vpxorq %zmm12,%zmm6,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1) + vpshufb %zmm29,%zmm17,%zmm0 + vpshufb %zmm29,%zmm19,%zmm3 + vpshufb %zmm29,%zmm20,%zmm4 + vpshufb %zmm29,%zmm21,%zmm5 + vmovdqa64 %zmm0,1280(%rsp) + vmovdqa64 %zmm3,1344(%rsp) + vmovdqa64 %zmm4,1408(%rsp) + vmovdqa64 %zmm5,1472(%rsp) + cmpb $240,%r15b + jae .L_16_blocks_overflow_540 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_540 +.L_16_blocks_overflow_540: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_540: + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 256(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 320(%rsp),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 384(%rsp),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 448(%rsp),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 256(%rcx,%r11,1),%zmm17 + vmovdqu8 320(%rcx,%r11,1),%zmm19 + vmovdqu8 384(%rcx,%r11,1),%zmm20 + vmovdqu8 448(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vpternlogq $0x96,%zmm12,%zmm6,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,256(%r10,%r11,1) + vmovdqu8 %zmm3,320(%r10,%r11,1) + vmovdqu8 %zmm4,384(%r10,%r11,1) + vmovdqu8 %zmm5,448(%r10,%r11,1) + vpshufb %zmm29,%zmm17,%zmm0 + vpshufb %zmm29,%zmm19,%zmm3 + vpshufb %zmm29,%zmm20,%zmm4 + vpshufb %zmm29,%zmm21,%zmm5 + vmovdqa64 %zmm0,768(%rsp) + vmovdqa64 %zmm3,832(%rsp) + vmovdqa64 %zmm4,896(%rsp) + vmovdqa64 %zmm5,960(%rsp) + vmovdqa64 1280(%rsp),%zmm13 + vmovdqu64 512(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1344(%rsp),%zmm13 + vmovdqu64 576(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 1408(%rsp),%zmm13 + vmovdqu64 640(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1472(%rsp),%zmm13 + vmovdqu64 704(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + + subq $512,%r8 + addq $512,%r11 + movl %r8d,%r10d + andl $~15,%r10d + movl $512,%ebx + subl %r10d,%ebx + movl %r8d,%r10d + addl $15,%r10d + shrl $4,%r10d + je .L_last_num_blocks_is_0_541 + + cmpl $8,%r10d + je .L_last_num_blocks_is_8_541 + jb .L_last_num_blocks_is_7_1_541 + + + cmpl $12,%r10d + je .L_last_num_blocks_is_12_541 + jb .L_last_num_blocks_is_11_9_541 + + + cmpl $15,%r10d + je .L_last_num_blocks_is_15_541 + ja .L_last_num_blocks_is_16_541 + cmpl $14,%r10d + je .L_last_num_blocks_is_14_541 + jmp .L_last_num_blocks_is_13_541 + +.L_last_num_blocks_is_11_9_541: + + cmpl $10,%r10d + je .L_last_num_blocks_is_10_541 + ja .L_last_num_blocks_is_11_541 + jmp .L_last_num_blocks_is_9_541 + +.L_last_num_blocks_is_7_1_541: + cmpl $4,%r10d + je .L_last_num_blocks_is_4_541 + jb .L_last_num_blocks_is_3_1_541 + + cmpl $6,%r10d + ja .L_last_num_blocks_is_7_541 + je .L_last_num_blocks_is_6_541 + jmp .L_last_num_blocks_is_5_541 + +.L_last_num_blocks_is_3_1_541: + + cmpl $2,%r10d + ja .L_last_num_blocks_is_3_541 + je .L_last_num_blocks_is_2_541 +.L_last_num_blocks_is_1_541: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $255,%r15d + jae .L_16_blocks_overflow_542 + vpaddd %xmm28,%xmm2,%xmm0 + jmp .L_16_blocks_ok_542 + +.L_16_blocks_overflow_542: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %xmm29,%xmm0,%xmm0 +.L_16_blocks_ok_542: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vaesenclast %xmm30,%xmm0,%xmm0 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %xmm29,%xmm17,%xmm17 + vextracti32x4 $0,%zmm17,%xmm7 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_543 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_543 +.L_small_initial_partial_block_543: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm0 + + + vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 + vpslldq $8,%xmm3,%xmm3 + vpxorq %xmm3,%xmm25,%xmm3 + + + vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 + vpsrldq $4,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm4,%xmm14 + + + + + + + + + + + + + vpxorq %xmm7,%xmm14,%xmm14 + + jmp .L_after_reduction_543 +.L_small_initial_compute_done_543: +.L_after_reduction_543: + jmp .L_last_blocks_done_541 +.L_last_num_blocks_is_2_541: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $254,%r15d + jae .L_16_blocks_overflow_544 + vpaddd %ymm28,%ymm2,%ymm0 + jmp .L_16_blocks_ok_544 + +.L_16_blocks_overflow_544: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %ymm29,%ymm0,%ymm0 +.L_16_blocks_ok_544: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vaesenclast %ymm30,%ymm0,%ymm0 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %ymm29,%ymm17,%ymm17 + vextracti32x4 $1,%zmm17,%xmm7 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_545 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_545 +.L_small_initial_partial_block_545: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_545: + + orq %r8,%r8 + je .L_after_reduction_545 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_545: + jmp .L_last_blocks_done_541 +.L_last_num_blocks_is_3_541: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $253,%r15d + jae .L_16_blocks_overflow_546 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_546 + +.L_16_blocks_overflow_546: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_546: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vextracti32x4 $2,%zmm17,%xmm7 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_547 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_547 +.L_small_initial_partial_block_547: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_547: + + orq %r8,%r8 + je .L_after_reduction_547 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_547: + jmp .L_last_blocks_done_541 +.L_last_num_blocks_is_4_541: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $252,%r15d + jae .L_16_blocks_overflow_548 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_548 + +.L_16_blocks_overflow_548: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_548: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vextracti32x4 $3,%zmm17,%xmm7 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_549 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_549 +.L_small_initial_partial_block_549: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_549: + + orq %r8,%r8 + je .L_after_reduction_549 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_549: + jmp .L_last_blocks_done_541 +.L_last_num_blocks_is_5_541: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $251,%r15d + jae .L_16_blocks_overflow_550 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %xmm27,%xmm0,%xmm3 + jmp .L_16_blocks_ok_550 + +.L_16_blocks_overflow_550: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 +.L_16_blocks_ok_550: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %xmm30,%xmm3,%xmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %xmm19,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %xmm29,%xmm19,%xmm19 + vextracti32x4 $0,%zmm19,%xmm7 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_551 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_551 +.L_small_initial_partial_block_551: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_551: + + orq %r8,%r8 + je .L_after_reduction_551 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_551: + jmp .L_last_blocks_done_541 +.L_last_num_blocks_is_6_541: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $250,%r15d + jae .L_16_blocks_overflow_552 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %ymm27,%ymm0,%ymm3 + jmp .L_16_blocks_ok_552 + +.L_16_blocks_overflow_552: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 +.L_16_blocks_ok_552: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %ymm30,%ymm3,%ymm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %ymm29,%ymm19,%ymm19 + vextracti32x4 $1,%zmm19,%xmm7 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_553 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_553 +.L_small_initial_partial_block_553: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_553: + + orq %r8,%r8 + je .L_after_reduction_553 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_553: + jmp .L_last_blocks_done_541 +.L_last_num_blocks_is_7_541: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $249,%r15d + jae .L_16_blocks_overflow_554 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_554 + +.L_16_blocks_overflow_554: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_554: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vextracti32x4 $2,%zmm19,%xmm7 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_555 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_555 +.L_small_initial_partial_block_555: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_555: + + orq %r8,%r8 + je .L_after_reduction_555 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_555: + jmp .L_last_blocks_done_541 +.L_last_num_blocks_is_8_541: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $248,%r15d + jae .L_16_blocks_overflow_556 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_556 + +.L_16_blocks_overflow_556: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_556: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vextracti32x4 $3,%zmm19,%xmm7 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_557 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_557 +.L_small_initial_partial_block_557: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_557: + + orq %r8,%r8 + je .L_after_reduction_557 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_557: + jmp .L_last_blocks_done_541 +.L_last_num_blocks_is_9_541: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $247,%r15d + jae .L_16_blocks_overflow_558 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %xmm27,%xmm3,%xmm4 + jmp .L_16_blocks_ok_558 + +.L_16_blocks_overflow_558: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 +.L_16_blocks_ok_558: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %xmm30,%xmm4,%xmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %xmm20,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %xmm29,%xmm20,%xmm20 + vextracti32x4 $0,%zmm20,%xmm7 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_559 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_559 +.L_small_initial_partial_block_559: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_559: + + orq %r8,%r8 + je .L_after_reduction_559 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_559: + jmp .L_last_blocks_done_541 +.L_last_num_blocks_is_10_541: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $246,%r15d + jae .L_16_blocks_overflow_560 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %ymm27,%ymm3,%ymm4 + jmp .L_16_blocks_ok_560 + +.L_16_blocks_overflow_560: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 +.L_16_blocks_ok_560: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %ymm30,%ymm4,%ymm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %ymm20,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %ymm29,%ymm20,%ymm20 + vextracti32x4 $1,%zmm20,%xmm7 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_561 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_561 +.L_small_initial_partial_block_561: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_561: + + orq %r8,%r8 + je .L_after_reduction_561 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_561: + jmp .L_last_blocks_done_541 +.L_last_num_blocks_is_11_541: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $245,%r15d + jae .L_16_blocks_overflow_562 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_562 + +.L_16_blocks_overflow_562: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_562: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vextracti32x4 $2,%zmm20,%xmm7 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_563 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_563 +.L_small_initial_partial_block_563: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_563: + + orq %r8,%r8 + je .L_after_reduction_563 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_563: + jmp .L_last_blocks_done_541 +.L_last_num_blocks_is_12_541: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $244,%r15d + jae .L_16_blocks_overflow_564 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_564 + +.L_16_blocks_overflow_564: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_564: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vextracti32x4 $3,%zmm20,%xmm7 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_565 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_565 +.L_small_initial_partial_block_565: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_565: + + orq %r8,%r8 + je .L_after_reduction_565 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_565: + jmp .L_last_blocks_done_541 +.L_last_num_blocks_is_13_541: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $243,%r15d + jae .L_16_blocks_overflow_566 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %xmm27,%xmm4,%xmm5 + jmp .L_16_blocks_ok_566 + +.L_16_blocks_overflow_566: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 +.L_16_blocks_ok_566: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %xmm30,%xmm5,%xmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %xmm21,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %xmm29,%xmm21,%xmm21 + vextracti32x4 $0,%zmm21,%xmm7 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_567 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_567 +.L_small_initial_partial_block_567: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_567: + + orq %r8,%r8 + je .L_after_reduction_567 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_567: + jmp .L_last_blocks_done_541 +.L_last_num_blocks_is_14_541: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $242,%r15d + jae .L_16_blocks_overflow_568 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %ymm27,%ymm4,%ymm5 + jmp .L_16_blocks_ok_568 + +.L_16_blocks_overflow_568: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 +.L_16_blocks_ok_568: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %ymm30,%ymm5,%ymm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %ymm21,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %ymm29,%ymm21,%ymm21 + vextracti32x4 $1,%zmm21,%xmm7 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_569 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_569 +.L_small_initial_partial_block_569: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_569: + + orq %r8,%r8 + je .L_after_reduction_569 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_569: + jmp .L_last_blocks_done_541 +.L_last_num_blocks_is_15_541: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $241,%r15d + jae .L_16_blocks_overflow_570 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_570 + +.L_16_blocks_overflow_570: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_570: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %zmm29,%zmm21,%zmm21 + vextracti32x4 $2,%zmm21,%xmm7 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_571 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_571 +.L_small_initial_partial_block_571: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_571: + + orq %r8,%r8 + je .L_after_reduction_571 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_571: + jmp .L_last_blocks_done_541 +.L_last_num_blocks_is_16_541: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $240,%r15d + jae .L_16_blocks_overflow_572 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_572 + +.L_16_blocks_overflow_572: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_572: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %zmm29,%zmm21,%zmm21 + vextracti32x4 $3,%zmm21,%xmm7 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_573: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_573: + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_573: + jmp .L_last_blocks_done_541 +.L_last_num_blocks_is_0_541: + vmovdqa64 768(%rsp),%zmm13 + vpxorq %zmm14,%zmm13,%zmm13 + vmovdqu64 0(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 832(%rsp),%zmm13 + vmovdqu64 64(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpxorq %zmm10,%zmm4,%zmm26 + vpxorq %zmm6,%zmm0,%zmm24 + vpxorq %zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 896(%rsp),%zmm13 + vmovdqu64 128(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 960(%rsp),%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + +.L_last_blocks_done_541: + vpshufb %xmm29,%xmm2,%xmm2 + jmp .L_ghash_done_497 +.L_encrypt_16_blocks_497: + cmpb $240,%r15b + jae .L_16_blocks_overflow_574 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_574 +.L_16_blocks_overflow_574: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_574: + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm15,%zmm10,%zmm26 + vpxorq %zmm12,%zmm6,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1) + vpshufb %zmm29,%zmm17,%zmm0 + vpshufb %zmm29,%zmm19,%zmm3 + vpshufb %zmm29,%zmm20,%zmm4 + vpshufb %zmm29,%zmm21,%zmm5 + vmovdqa64 %zmm0,1280(%rsp) + vmovdqa64 %zmm3,1344(%rsp) + vmovdqa64 %zmm4,1408(%rsp) + vmovdqa64 %zmm5,1472(%rsp) + vmovdqa64 1024(%rsp),%zmm13 + vmovdqu64 256(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1088(%rsp),%zmm13 + vmovdqu64 320(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 1152(%rsp),%zmm13 + vmovdqu64 384(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1216(%rsp),%zmm13 + vmovdqu64 448(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + subq $256,%r8 + addq $256,%r11 + movl %r8d,%r10d + addl $15,%r10d + shrl $4,%r10d + je .L_last_num_blocks_is_0_575 + + cmpl $8,%r10d + je .L_last_num_blocks_is_8_575 + jb .L_last_num_blocks_is_7_1_575 + + + cmpl $12,%r10d + je .L_last_num_blocks_is_12_575 + jb .L_last_num_blocks_is_11_9_575 + + + cmpl $15,%r10d + je .L_last_num_blocks_is_15_575 + ja .L_last_num_blocks_is_16_575 + cmpl $14,%r10d + je .L_last_num_blocks_is_14_575 + jmp .L_last_num_blocks_is_13_575 + +.L_last_num_blocks_is_11_9_575: + + cmpl $10,%r10d + je .L_last_num_blocks_is_10_575 + ja .L_last_num_blocks_is_11_575 + jmp .L_last_num_blocks_is_9_575 + +.L_last_num_blocks_is_7_1_575: + cmpl $4,%r10d + je .L_last_num_blocks_is_4_575 + jb .L_last_num_blocks_is_3_1_575 + + cmpl $6,%r10d + ja .L_last_num_blocks_is_7_575 + je .L_last_num_blocks_is_6_575 + jmp .L_last_num_blocks_is_5_575 + +.L_last_num_blocks_is_3_1_575: + + cmpl $2,%r10d + ja .L_last_num_blocks_is_3_575 + je .L_last_num_blocks_is_2_575 +.L_last_num_blocks_is_1_575: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $255,%r15d + jae .L_16_blocks_overflow_576 + vpaddd %xmm28,%xmm2,%xmm0 + jmp .L_16_blocks_ok_576 + +.L_16_blocks_overflow_576: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %xmm29,%xmm0,%xmm0 +.L_16_blocks_ok_576: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $0,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %xmm31,%xmm0,%xmm0 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %xmm30,%xmm0,%xmm0 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %xmm29,%xmm17,%xmm17 + vextracti32x4 $0,%zmm17,%xmm7 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_577 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_577 +.L_small_initial_partial_block_577: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + + + + + + + + + + + + vpxorq %xmm7,%xmm14,%xmm14 + + jmp .L_after_reduction_577 +.L_small_initial_compute_done_577: +.L_after_reduction_577: + jmp .L_last_blocks_done_575 +.L_last_num_blocks_is_2_575: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $254,%r15d + jae .L_16_blocks_overflow_578 + vpaddd %ymm28,%ymm2,%ymm0 + jmp .L_16_blocks_ok_578 + +.L_16_blocks_overflow_578: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %ymm29,%ymm0,%ymm0 +.L_16_blocks_ok_578: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $1,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %ymm31,%ymm0,%ymm0 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %ymm30,%ymm0,%ymm0 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %ymm29,%ymm17,%ymm17 + vextracti32x4 $1,%zmm17,%xmm7 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_579 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_579 +.L_small_initial_partial_block_579: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_579: + + orq %r8,%r8 + je .L_after_reduction_579 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_579: + jmp .L_last_blocks_done_575 +.L_last_num_blocks_is_3_575: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $253,%r15d + jae .L_16_blocks_overflow_580 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_580 + +.L_16_blocks_overflow_580: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_580: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $2,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vextracti32x4 $2,%zmm17,%xmm7 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_581 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_581 +.L_small_initial_partial_block_581: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_581: + + orq %r8,%r8 + je .L_after_reduction_581 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_581: + jmp .L_last_blocks_done_575 +.L_last_num_blocks_is_4_575: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $252,%r15d + jae .L_16_blocks_overflow_582 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_582 + +.L_16_blocks_overflow_582: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_582: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $3,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vextracti32x4 $3,%zmm17,%xmm7 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_583 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_583 +.L_small_initial_partial_block_583: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_583: + + orq %r8,%r8 + je .L_after_reduction_583 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_583: + jmp .L_last_blocks_done_575 +.L_last_num_blocks_is_5_575: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $251,%r15d + jae .L_16_blocks_overflow_584 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %xmm27,%xmm0,%xmm3 + jmp .L_16_blocks_ok_584 + +.L_16_blocks_overflow_584: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 +.L_16_blocks_ok_584: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $0,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %xmm30,%xmm3,%xmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %xmm19,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %xmm29,%xmm19,%xmm19 + vextracti32x4 $0,%zmm19,%xmm7 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_585 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_585 +.L_small_initial_partial_block_585: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_585: + + orq %r8,%r8 + je .L_after_reduction_585 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_585: + jmp .L_last_blocks_done_575 +.L_last_num_blocks_is_6_575: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $250,%r15d + jae .L_16_blocks_overflow_586 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %ymm27,%ymm0,%ymm3 + jmp .L_16_blocks_ok_586 + +.L_16_blocks_overflow_586: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 +.L_16_blocks_ok_586: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $1,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %ymm30,%ymm3,%ymm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %ymm29,%ymm19,%ymm19 + vextracti32x4 $1,%zmm19,%xmm7 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_587 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_587 +.L_small_initial_partial_block_587: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_587: + + orq %r8,%r8 + je .L_after_reduction_587 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_587: + jmp .L_last_blocks_done_575 +.L_last_num_blocks_is_7_575: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $249,%r15d + jae .L_16_blocks_overflow_588 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_588 + +.L_16_blocks_overflow_588: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_588: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $2,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vextracti32x4 $2,%zmm19,%xmm7 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_589 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_589 +.L_small_initial_partial_block_589: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_589: + + orq %r8,%r8 + je .L_after_reduction_589 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_589: + jmp .L_last_blocks_done_575 +.L_last_num_blocks_is_8_575: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $248,%r15d + jae .L_16_blocks_overflow_590 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_590 + +.L_16_blocks_overflow_590: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_590: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $3,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vextracti32x4 $3,%zmm19,%xmm7 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_591 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_591 +.L_small_initial_partial_block_591: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_591: + + orq %r8,%r8 + je .L_after_reduction_591 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_591: + jmp .L_last_blocks_done_575 +.L_last_num_blocks_is_9_575: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $247,%r15d + jae .L_16_blocks_overflow_592 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %xmm27,%xmm3,%xmm4 + jmp .L_16_blocks_ok_592 + +.L_16_blocks_overflow_592: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 +.L_16_blocks_ok_592: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $0,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %xmm30,%xmm4,%xmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %xmm20,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %xmm29,%xmm20,%xmm20 + vextracti32x4 $0,%zmm20,%xmm7 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_593 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_593 +.L_small_initial_partial_block_593: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_593: + + orq %r8,%r8 + je .L_after_reduction_593 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_593: + jmp .L_last_blocks_done_575 +.L_last_num_blocks_is_10_575: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $246,%r15d + jae .L_16_blocks_overflow_594 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %ymm27,%ymm3,%ymm4 + jmp .L_16_blocks_ok_594 + +.L_16_blocks_overflow_594: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 +.L_16_blocks_ok_594: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $1,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %ymm30,%ymm4,%ymm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %ymm20,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %ymm29,%ymm20,%ymm20 + vextracti32x4 $1,%zmm20,%xmm7 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_595 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_595 +.L_small_initial_partial_block_595: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_595: + + orq %r8,%r8 + je .L_after_reduction_595 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_595: + jmp .L_last_blocks_done_575 +.L_last_num_blocks_is_11_575: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $245,%r15d + jae .L_16_blocks_overflow_596 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_596 + +.L_16_blocks_overflow_596: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_596: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $2,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vextracti32x4 $2,%zmm20,%xmm7 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_597 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_597 +.L_small_initial_partial_block_597: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_597: + + orq %r8,%r8 + je .L_after_reduction_597 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_597: + jmp .L_last_blocks_done_575 +.L_last_num_blocks_is_12_575: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $244,%r15d + jae .L_16_blocks_overflow_598 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_598 + +.L_16_blocks_overflow_598: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_598: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $3,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vextracti32x4 $3,%zmm20,%xmm7 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_599 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_599 +.L_small_initial_partial_block_599: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_599: + + orq %r8,%r8 + je .L_after_reduction_599 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_599: + jmp .L_last_blocks_done_575 +.L_last_num_blocks_is_13_575: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $243,%r15d + jae .L_16_blocks_overflow_600 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %xmm27,%xmm4,%xmm5 + jmp .L_16_blocks_ok_600 + +.L_16_blocks_overflow_600: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 +.L_16_blocks_ok_600: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $0,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %xmm30,%xmm5,%xmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %xmm21,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %xmm29,%xmm21,%xmm21 + vextracti32x4 $0,%zmm21,%xmm7 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_601 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_601 +.L_small_initial_partial_block_601: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_601: + + orq %r8,%r8 + je .L_after_reduction_601 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_601: + jmp .L_last_blocks_done_575 +.L_last_num_blocks_is_14_575: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $242,%r15d + jae .L_16_blocks_overflow_602 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %ymm27,%ymm4,%ymm5 + jmp .L_16_blocks_ok_602 + +.L_16_blocks_overflow_602: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 +.L_16_blocks_ok_602: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $1,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %ymm30,%ymm5,%ymm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %ymm21,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %ymm29,%ymm21,%ymm21 + vextracti32x4 $1,%zmm21,%xmm7 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_603 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_603 +.L_small_initial_partial_block_603: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_603: + + orq %r8,%r8 + je .L_after_reduction_603 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_603: + jmp .L_last_blocks_done_575 +.L_last_num_blocks_is_15_575: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $241,%r15d + jae .L_16_blocks_overflow_604 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_604 + +.L_16_blocks_overflow_604: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_604: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $2,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %zmm29,%zmm21,%zmm21 + vextracti32x4 $2,%zmm21,%xmm7 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_605 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_605 +.L_small_initial_partial_block_605: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_605: + + orq %r8,%r8 + je .L_after_reduction_605 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_605: + jmp .L_last_blocks_done_575 +.L_last_num_blocks_is_16_575: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $240,%r15d + jae .L_16_blocks_overflow_606 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_606 + +.L_16_blocks_overflow_606: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_606: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $3,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %zmm29,%zmm21,%zmm21 + vextracti32x4 $3,%zmm21,%xmm7 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_607: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_607: + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_607: + jmp .L_last_blocks_done_575 +.L_last_num_blocks_is_0_575: + vmovdqa64 1280(%rsp),%zmm13 + vmovdqu64 512(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1344(%rsp),%zmm13 + vmovdqu64 576(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 1408(%rsp),%zmm13 + vmovdqu64 640(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1472(%rsp),%zmm13 + vmovdqu64 704(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + +.L_last_blocks_done_575: + vpshufb %xmm29,%xmm2,%xmm2 + jmp .L_ghash_done_497 + +.L_message_below_32_blocks_497: + + + subq $256,%r8 + addq $256,%r11 + movl %r8d,%r10d + testq %r14,%r14 + jnz .L_skip_hkeys_precomputation_608 + vmovdqu64 640(%rsp),%zmm3 + + + vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 + + vmovdqu64 576(%rsp),%zmm4 + vmovdqu64 512(%rsp),%zmm5 + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,448(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,384(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,320(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,256(%rsp) +.L_skip_hkeys_precomputation_608: + movq $1,%r14 + andl $~15,%r10d + movl $512,%ebx + subl %r10d,%ebx + movl %r8d,%r10d + addl $15,%r10d + shrl $4,%r10d + je .L_last_num_blocks_is_0_609 + + cmpl $8,%r10d + je .L_last_num_blocks_is_8_609 + jb .L_last_num_blocks_is_7_1_609 + + + cmpl $12,%r10d + je .L_last_num_blocks_is_12_609 + jb .L_last_num_blocks_is_11_9_609 + + + cmpl $15,%r10d + je .L_last_num_blocks_is_15_609 + ja .L_last_num_blocks_is_16_609 + cmpl $14,%r10d + je .L_last_num_blocks_is_14_609 + jmp .L_last_num_blocks_is_13_609 + +.L_last_num_blocks_is_11_9_609: + + cmpl $10,%r10d + je .L_last_num_blocks_is_10_609 + ja .L_last_num_blocks_is_11_609 + jmp .L_last_num_blocks_is_9_609 + +.L_last_num_blocks_is_7_1_609: + cmpl $4,%r10d + je .L_last_num_blocks_is_4_609 + jb .L_last_num_blocks_is_3_1_609 + + cmpl $6,%r10d + ja .L_last_num_blocks_is_7_609 + je .L_last_num_blocks_is_6_609 + jmp .L_last_num_blocks_is_5_609 + +.L_last_num_blocks_is_3_1_609: + + cmpl $2,%r10d + ja .L_last_num_blocks_is_3_609 + je .L_last_num_blocks_is_2_609 +.L_last_num_blocks_is_1_609: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $255,%r15d + jae .L_16_blocks_overflow_610 + vpaddd %xmm28,%xmm2,%xmm0 + jmp .L_16_blocks_ok_610 + +.L_16_blocks_overflow_610: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %xmm29,%xmm0,%xmm0 +.L_16_blocks_ok_610: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vaesenclast %xmm30,%xmm0,%xmm0 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %xmm29,%xmm17,%xmm17 + vextracti32x4 $0,%zmm17,%xmm7 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_611 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_611 +.L_small_initial_partial_block_611: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm0 + + + vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 + vpslldq $8,%xmm3,%xmm3 + vpxorq %xmm3,%xmm25,%xmm3 + + + vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 + vpsrldq $4,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm4,%xmm14 + + + + + + + + + + + + + vpxorq %xmm7,%xmm14,%xmm14 + + jmp .L_after_reduction_611 +.L_small_initial_compute_done_611: +.L_after_reduction_611: + jmp .L_last_blocks_done_609 +.L_last_num_blocks_is_2_609: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $254,%r15d + jae .L_16_blocks_overflow_612 + vpaddd %ymm28,%ymm2,%ymm0 + jmp .L_16_blocks_ok_612 + +.L_16_blocks_overflow_612: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %ymm29,%ymm0,%ymm0 +.L_16_blocks_ok_612: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vaesenclast %ymm30,%ymm0,%ymm0 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %ymm29,%ymm17,%ymm17 + vextracti32x4 $1,%zmm17,%xmm7 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_613 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_613 +.L_small_initial_partial_block_613: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_613: + + orq %r8,%r8 + je .L_after_reduction_613 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_613: + jmp .L_last_blocks_done_609 +.L_last_num_blocks_is_3_609: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $253,%r15d + jae .L_16_blocks_overflow_614 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_614 + +.L_16_blocks_overflow_614: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_614: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vextracti32x4 $2,%zmm17,%xmm7 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_615 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_615 +.L_small_initial_partial_block_615: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_615: + + orq %r8,%r8 + je .L_after_reduction_615 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_615: + jmp .L_last_blocks_done_609 +.L_last_num_blocks_is_4_609: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $252,%r15d + jae .L_16_blocks_overflow_616 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_616 + +.L_16_blocks_overflow_616: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_616: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vextracti32x4 $3,%zmm17,%xmm7 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_617 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_617 +.L_small_initial_partial_block_617: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_617: + + orq %r8,%r8 + je .L_after_reduction_617 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_617: + jmp .L_last_blocks_done_609 +.L_last_num_blocks_is_5_609: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $251,%r15d + jae .L_16_blocks_overflow_618 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %xmm27,%xmm0,%xmm3 + jmp .L_16_blocks_ok_618 + +.L_16_blocks_overflow_618: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 +.L_16_blocks_ok_618: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %xmm30,%xmm3,%xmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %xmm19,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %xmm29,%xmm19,%xmm19 + vextracti32x4 $0,%zmm19,%xmm7 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_619 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_619 +.L_small_initial_partial_block_619: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_619: + + orq %r8,%r8 + je .L_after_reduction_619 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_619: + jmp .L_last_blocks_done_609 +.L_last_num_blocks_is_6_609: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $250,%r15d + jae .L_16_blocks_overflow_620 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %ymm27,%ymm0,%ymm3 + jmp .L_16_blocks_ok_620 + +.L_16_blocks_overflow_620: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 +.L_16_blocks_ok_620: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %ymm30,%ymm3,%ymm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %ymm29,%ymm19,%ymm19 + vextracti32x4 $1,%zmm19,%xmm7 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_621 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_621 +.L_small_initial_partial_block_621: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_621: + + orq %r8,%r8 + je .L_after_reduction_621 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_621: + jmp .L_last_blocks_done_609 +.L_last_num_blocks_is_7_609: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $249,%r15d + jae .L_16_blocks_overflow_622 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_622 + +.L_16_blocks_overflow_622: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_622: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vextracti32x4 $2,%zmm19,%xmm7 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_623 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_623 +.L_small_initial_partial_block_623: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_623: + + orq %r8,%r8 + je .L_after_reduction_623 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_623: + jmp .L_last_blocks_done_609 +.L_last_num_blocks_is_8_609: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $248,%r15d + jae .L_16_blocks_overflow_624 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_624 + +.L_16_blocks_overflow_624: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_624: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vextracti32x4 $3,%zmm19,%xmm7 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_625 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_625 +.L_small_initial_partial_block_625: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_625: + + orq %r8,%r8 + je .L_after_reduction_625 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_625: + jmp .L_last_blocks_done_609 +.L_last_num_blocks_is_9_609: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $247,%r15d + jae .L_16_blocks_overflow_626 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %xmm27,%xmm3,%xmm4 + jmp .L_16_blocks_ok_626 + +.L_16_blocks_overflow_626: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 +.L_16_blocks_ok_626: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %xmm30,%xmm4,%xmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %xmm20,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %xmm29,%xmm20,%xmm20 + vextracti32x4 $0,%zmm20,%xmm7 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_627 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_627 +.L_small_initial_partial_block_627: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_627: + + orq %r8,%r8 + je .L_after_reduction_627 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_627: + jmp .L_last_blocks_done_609 +.L_last_num_blocks_is_10_609: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $246,%r15d + jae .L_16_blocks_overflow_628 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %ymm27,%ymm3,%ymm4 + jmp .L_16_blocks_ok_628 + +.L_16_blocks_overflow_628: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 +.L_16_blocks_ok_628: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %ymm30,%ymm4,%ymm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %ymm20,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %ymm29,%ymm20,%ymm20 + vextracti32x4 $1,%zmm20,%xmm7 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_629 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_629 +.L_small_initial_partial_block_629: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_629: + + orq %r8,%r8 + je .L_after_reduction_629 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_629: + jmp .L_last_blocks_done_609 +.L_last_num_blocks_is_11_609: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $245,%r15d + jae .L_16_blocks_overflow_630 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_630 + +.L_16_blocks_overflow_630: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_630: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vextracti32x4 $2,%zmm20,%xmm7 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_631 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_631 +.L_small_initial_partial_block_631: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_631: + + orq %r8,%r8 + je .L_after_reduction_631 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_631: + jmp .L_last_blocks_done_609 +.L_last_num_blocks_is_12_609: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $244,%r15d + jae .L_16_blocks_overflow_632 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_632 + +.L_16_blocks_overflow_632: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_632: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vextracti32x4 $3,%zmm20,%xmm7 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_633 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_633 +.L_small_initial_partial_block_633: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_633: + + orq %r8,%r8 + je .L_after_reduction_633 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_633: + jmp .L_last_blocks_done_609 +.L_last_num_blocks_is_13_609: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $243,%r15d + jae .L_16_blocks_overflow_634 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %xmm27,%xmm4,%xmm5 + jmp .L_16_blocks_ok_634 + +.L_16_blocks_overflow_634: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 +.L_16_blocks_ok_634: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %xmm30,%xmm5,%xmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %xmm21,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %xmm29,%xmm21,%xmm21 + vextracti32x4 $0,%zmm21,%xmm7 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_635 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_635 +.L_small_initial_partial_block_635: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_635: + + orq %r8,%r8 + je .L_after_reduction_635 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_635: + jmp .L_last_blocks_done_609 +.L_last_num_blocks_is_14_609: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $242,%r15d + jae .L_16_blocks_overflow_636 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %ymm27,%ymm4,%ymm5 + jmp .L_16_blocks_ok_636 + +.L_16_blocks_overflow_636: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 +.L_16_blocks_ok_636: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %ymm30,%ymm5,%ymm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %ymm21,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %ymm29,%ymm21,%ymm21 + vextracti32x4 $1,%zmm21,%xmm7 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_637 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_637 +.L_small_initial_partial_block_637: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_637: + + orq %r8,%r8 + je .L_after_reduction_637 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_637: + jmp .L_last_blocks_done_609 +.L_last_num_blocks_is_15_609: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $241,%r15d + jae .L_16_blocks_overflow_638 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_638 + +.L_16_blocks_overflow_638: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_638: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %zmm29,%zmm21,%zmm21 + vextracti32x4 $2,%zmm21,%xmm7 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_639 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_639 +.L_small_initial_partial_block_639: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_639: + + orq %r8,%r8 + je .L_after_reduction_639 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_639: + jmp .L_last_blocks_done_609 +.L_last_num_blocks_is_16_609: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $240,%r15d + jae .L_16_blocks_overflow_640 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_640 + +.L_16_blocks_overflow_640: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_640: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %zmm29,%zmm21,%zmm21 + vextracti32x4 $3,%zmm21,%xmm7 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_641: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_641: + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_641: + jmp .L_last_blocks_done_609 +.L_last_num_blocks_is_0_609: + vmovdqa64 768(%rsp),%zmm13 + vpxorq %zmm14,%zmm13,%zmm13 + vmovdqu64 0(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 832(%rsp),%zmm13 + vmovdqu64 64(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpxorq %zmm10,%zmm4,%zmm26 + vpxorq %zmm6,%zmm0,%zmm24 + vpxorq %zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 896(%rsp),%zmm13 + vmovdqu64 128(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 960(%rsp),%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + +.L_last_blocks_done_609: + vpshufb %xmm29,%xmm2,%xmm2 + jmp .L_ghash_done_497 + +.L_message_below_equal_16_blocks_497: + + + movl %r8d,%r12d + addl $15,%r12d + shrl $4,%r12d + cmpq $8,%r12 + je .L_small_initial_num_blocks_is_8_642 + jl .L_small_initial_num_blocks_is_7_1_642 + + + cmpq $12,%r12 + je .L_small_initial_num_blocks_is_12_642 + jl .L_small_initial_num_blocks_is_11_9_642 + + + cmpq $16,%r12 + je .L_small_initial_num_blocks_is_16_642 + cmpq $15,%r12 + je .L_small_initial_num_blocks_is_15_642 + cmpq $14,%r12 + je .L_small_initial_num_blocks_is_14_642 + jmp .L_small_initial_num_blocks_is_13_642 + +.L_small_initial_num_blocks_is_11_9_642: + + cmpq $11,%r12 + je .L_small_initial_num_blocks_is_11_642 + cmpq $10,%r12 + je .L_small_initial_num_blocks_is_10_642 + jmp .L_small_initial_num_blocks_is_9_642 + +.L_small_initial_num_blocks_is_7_1_642: + cmpq $4,%r12 + je .L_small_initial_num_blocks_is_4_642 + jl .L_small_initial_num_blocks_is_3_1_642 + + cmpq $7,%r12 + je .L_small_initial_num_blocks_is_7_642 + cmpq $6,%r12 + je .L_small_initial_num_blocks_is_6_642 + jmp .L_small_initial_num_blocks_is_5_642 + +.L_small_initial_num_blocks_is_3_1_642: + + cmpq $3,%r12 + je .L_small_initial_num_blocks_is_3_642 + cmpq $2,%r12 + je .L_small_initial_num_blocks_is_2_642 + + + + + +.L_small_initial_num_blocks_is_1_642: + vmovdqa64 SHUF_MASK(%rip),%xmm29 + vpaddd ONE(%rip),%xmm2,%xmm0 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $0,%zmm0,%xmm2 + vpshufb %xmm29,%xmm0,%xmm0 + vmovdqu8 0(%rcx,%r11,1),%xmm6{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %xmm15,%xmm0,%xmm0 + vpxorq %xmm6,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm12 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %xmm29,%xmm6,%xmm6 + vextracti32x4 $0,%zmm6,%xmm13 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_643 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_643 +.L_small_initial_partial_block_643: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + + + + + + + + + + + + vpxorq %xmm13,%xmm14,%xmm14 + + jmp .L_after_reduction_643 +.L_small_initial_compute_done_643: +.L_after_reduction_643: + jmp .L_small_initial_blocks_encrypted_642 +.L_small_initial_num_blocks_is_2_642: + vmovdqa64 SHUF_MASK(%rip),%ymm29 + vshufi64x2 $0,%ymm2,%ymm2,%ymm0 + vpaddd ddq_add_1234(%rip),%ymm0,%ymm0 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $1,%zmm0,%xmm2 + vpshufb %ymm29,%ymm0,%ymm0 + vmovdqu8 0(%rcx,%r11,1),%ymm6{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %ymm15,%ymm0,%ymm0 + vpxorq %ymm6,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm12 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %ymm29,%ymm6,%ymm6 + vextracti32x4 $1,%zmm6,%xmm13 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_644 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_644 +.L_small_initial_partial_block_644: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_644: + + orq %r8,%r8 + je .L_after_reduction_644 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_644: + jmp .L_small_initial_blocks_encrypted_642 +.L_small_initial_num_blocks_is_3_642: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $2,%zmm0,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vpxorq %zmm6,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vextracti32x4 $2,%zmm6,%xmm13 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_645 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_645 +.L_small_initial_partial_block_645: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_645: + + orq %r8,%r8 + je .L_after_reduction_645 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_645: + jmp .L_small_initial_blocks_encrypted_642 +.L_small_initial_num_blocks_is_4_642: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $3,%zmm0,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vpxorq %zmm6,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vextracti32x4 $3,%zmm6,%xmm13 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_646 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_646 +.L_small_initial_partial_block_646: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_646: + + orq %r8,%r8 + je .L_after_reduction_646 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_646: + jmp .L_small_initial_blocks_encrypted_642 +.L_small_initial_num_blocks_is_5_642: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $64,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $0,%zmm3,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%xmm7{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %xmm15,%xmm3,%xmm3 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %xmm7,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %xmm29,%xmm7,%xmm7 + vextracti32x4 $0,%zmm7,%xmm13 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_647 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_647 +.L_small_initial_partial_block_647: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_647: + + orq %r8,%r8 + je .L_after_reduction_647 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_647: + jmp .L_small_initial_blocks_encrypted_642 +.L_small_initial_num_blocks_is_6_642: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $64,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $1,%zmm3,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%ymm7{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %ymm15,%ymm3,%ymm3 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %ymm7,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %ymm29,%ymm7,%ymm7 + vextracti32x4 $1,%zmm7,%xmm13 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_648 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_648 +.L_small_initial_partial_block_648: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_648: + + orq %r8,%r8 + je .L_after_reduction_648 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_648: + jmp .L_small_initial_blocks_encrypted_642 +.L_small_initial_num_blocks_is_7_642: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $64,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $2,%zmm3,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vextracti32x4 $2,%zmm7,%xmm13 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_649 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_649 +.L_small_initial_partial_block_649: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_649: + + orq %r8,%r8 + je .L_after_reduction_649 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_649: + jmp .L_small_initial_blocks_encrypted_642 +.L_small_initial_num_blocks_is_8_642: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $64,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $3,%zmm3,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vextracti32x4 $3,%zmm7,%xmm13 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_650 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 224(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_650 +.L_small_initial_partial_block_650: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_650: + + orq %r8,%r8 + je .L_after_reduction_650 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_650: + jmp .L_small_initial_blocks_encrypted_642 +.L_small_initial_num_blocks_is_9_642: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $128,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $0,%zmm4,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%xmm10{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %xmm15,%xmm4,%xmm4 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %xmm10,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %xmm29,%xmm10,%xmm10 + vextracti32x4 $0,%zmm10,%xmm13 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_651 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 208(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_651 +.L_small_initial_partial_block_651: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 224(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_651: + + orq %r8,%r8 + je .L_after_reduction_651 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_651: + jmp .L_small_initial_blocks_encrypted_642 +.L_small_initial_num_blocks_is_10_642: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $128,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $1,%zmm4,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%ymm10{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %ymm15,%ymm4,%ymm4 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %ymm10,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %ymm29,%ymm10,%ymm10 + vextracti32x4 $1,%zmm10,%xmm13 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_652 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 192(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_652 +.L_small_initial_partial_block_652: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 208(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_652: + + orq %r8,%r8 + je .L_after_reduction_652 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_652: + jmp .L_small_initial_blocks_encrypted_642 +.L_small_initial_num_blocks_is_11_642: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $128,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $2,%zmm4,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vextracti32x4 $2,%zmm10,%xmm13 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_653 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 176(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_653 +.L_small_initial_partial_block_653: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 192(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_653: + + orq %r8,%r8 + je .L_after_reduction_653 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_653: + jmp .L_small_initial_blocks_encrypted_642 +.L_small_initial_num_blocks_is_12_642: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $128,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $3,%zmm4,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vextracti32x4 $3,%zmm10,%xmm13 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_654 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 160(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 224(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_654 +.L_small_initial_partial_block_654: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 176(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_654: + + orq %r8,%r8 + je .L_after_reduction_654 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_654: + jmp .L_small_initial_blocks_encrypted_642 +.L_small_initial_num_blocks_is_13_642: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $192,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $0,%zmm5,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10 + vmovdqu8 192(%rcx,%r11,1),%xmm11{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vpxorq %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vaesenclast %xmm15,%xmm5,%xmm5 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vpxorq %xmm11,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %xmm29,%xmm11,%xmm11 + vextracti32x4 $0,%zmm11,%xmm13 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_655 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 144(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 208(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_655 +.L_small_initial_partial_block_655: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 160(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 224(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_655: + + orq %r8,%r8 + je .L_after_reduction_655 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_655: + jmp .L_small_initial_blocks_encrypted_642 +.L_small_initial_num_blocks_is_14_642: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $192,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $1,%zmm5,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10 + vmovdqu8 192(%rcx,%r11,1),%ymm11{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vpxorq %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vaesenclast %ymm15,%ymm5,%ymm5 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vpxorq %ymm11,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %ymm29,%ymm11,%ymm11 + vextracti32x4 $1,%zmm11,%xmm13 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_656 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 128(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 192(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_656 +.L_small_initial_partial_block_656: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 144(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 208(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_656: + + orq %r8,%r8 + je .L_after_reduction_656 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_656: + jmp .L_small_initial_blocks_encrypted_642 +.L_small_initial_num_blocks_is_15_642: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $192,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $2,%zmm5,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10 + vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vpxorq %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vaesenclast %zmm15,%zmm5,%zmm5 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vpxorq %zmm11,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %zmm29,%zmm11,%zmm11 + vextracti32x4 $2,%zmm11,%xmm13 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_657 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 112(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 176(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_657 +.L_small_initial_partial_block_657: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 128(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 192(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_657: + + orq %r8,%r8 + je .L_after_reduction_657 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_657: + jmp .L_small_initial_blocks_encrypted_642 +.L_small_initial_num_blocks_is_16_642: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $192,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $3,%zmm5,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10 + vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vpxorq %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vaesenclast %zmm15,%zmm5,%zmm5 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vpxorq %zmm11,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %zmm29,%zmm11,%zmm11 + vextracti32x4 $3,%zmm11,%xmm13 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_658: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 112(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 176(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_658: + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_658: +.L_small_initial_blocks_encrypted_642: +.L_ghash_done_497: + vmovdqu64 %xmm2,0(%rsi) + vmovdqu64 %xmm14,64(%rsi) +.L_enc_dec_done_497: + jmp .Lexit_gcm_decrypt +.align 32 +.Laes_gcm_decrypt_192_avx512: + orq %r8,%r8 + je .L_enc_dec_done_659 + xorq %r14,%r14 + vmovdqu64 64(%rsi),%xmm14 + + movq (%rdx),%r11 + orq %r11,%r11 + je .L_partial_block_done_660 + movl $16,%r10d + leaq byte_len_to_mask_table(%rip),%r12 + cmpq %r10,%r8 + cmovcq %r8,%r10 + kmovw (%r12,%r10,2),%k1 + vmovdqu8 (%rcx),%xmm0{%k1}{z} + + vmovdqu64 16(%rsi),%xmm3 + vmovdqu64 336(%rsi),%xmm4 + + + + leaq SHIFT_MASK(%rip),%r12 + addq %r11,%r12 + vmovdqu64 (%r12),%xmm5 + vpshufb %xmm5,%xmm3,%xmm3 + + vmovdqa64 %xmm0,%xmm6 + vpxorq %xmm0,%xmm3,%xmm3 + + + leaq (%r8,%r11,1),%r13 + subq $16,%r13 + jge .L_no_extra_mask_660 + subq %r13,%r12 +.L_no_extra_mask_660: + + + + vmovdqu64 16(%r12),%xmm0 + vpand %xmm0,%xmm3,%xmm3 + vpand %xmm0,%xmm6,%xmm6 + vpshufb SHUF_MASK(%rip),%xmm6,%xmm6 + vpshufb %xmm5,%xmm6,%xmm6 + vpxorq %xmm6,%xmm14,%xmm14 + cmpq $0,%r13 + jl .L_partial_incomplete_660 + + vpclmulqdq $0x11,%xmm4,%xmm14,%xmm7 + vpclmulqdq $0x00,%xmm4,%xmm14,%xmm10 + vpclmulqdq $0x01,%xmm4,%xmm14,%xmm11 + vpclmulqdq $0x10,%xmm4,%xmm14,%xmm14 + vpxorq %xmm11,%xmm14,%xmm14 + + vpsrldq $8,%xmm14,%xmm11 + vpslldq $8,%xmm14,%xmm14 + vpxorq %xmm11,%xmm7,%xmm7 + vpxorq %xmm10,%xmm14,%xmm14 + + + + vmovdqu64 POLY2(%rip),%xmm11 + + vpclmulqdq $0x01,%xmm14,%xmm11,%xmm10 + vpslldq $8,%xmm10,%xmm10 + vpxorq %xmm10,%xmm14,%xmm14 + + + + vpclmulqdq $0x00,%xmm14,%xmm11,%xmm10 + vpsrldq $4,%xmm10,%xmm10 + vpclmulqdq $0x10,%xmm14,%xmm11,%xmm14 + vpslldq $4,%xmm14,%xmm14 + + vpternlogq $0x96,%xmm10,%xmm7,%xmm14 + + movq $0,(%rdx) + + movq %r11,%r12 + movq $16,%r11 + subq %r12,%r11 + jmp .L_enc_dec_done_660 + +.L_partial_incomplete_660: + addq %r8,(%rdx) + movq %r8,%r11 + +.L_enc_dec_done_660: + + + leaq byte_len_to_mask_table(%rip),%r12 + kmovw (%r12,%r11,2),%k1 + vmovdqu64 %xmm14,64(%rsi) + movq %r9,%r12 + vmovdqu8 %xmm3,(%r12){%k1} +.L_partial_block_done_660: + vmovdqu64 0(%rsi),%xmm2 + subq %r11,%r8 + je .L_enc_dec_done_659 + cmpq $256,%r8 + jbe .L_message_below_equal_16_blocks_659 + + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vmovdqa64 ddq_addbe_4444(%rip),%zmm27 + vmovdqa64 ddq_addbe_1234(%rip),%zmm28 + + + + + + + vmovd %xmm2,%r15d + andl $255,%r15d + + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpshufb %zmm29,%zmm2,%zmm2 + + + + cmpb $240,%r15b + jae .L_next_16_overflow_661 + vpaddd %zmm28,%zmm2,%zmm7 + vpaddd %zmm27,%zmm7,%zmm10 + vpaddd %zmm27,%zmm10,%zmm11 + vpaddd %zmm27,%zmm11,%zmm12 + jmp .L_next_16_ok_661 +.L_next_16_overflow_661: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm12 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 + vpaddd %zmm12,%zmm7,%zmm10 + vpaddd %zmm12,%zmm10,%zmm11 + vpaddd %zmm12,%zmm11,%zmm12 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %zmm29,%zmm11,%zmm11 + vpshufb %zmm29,%zmm12,%zmm12 +.L_next_16_ok_661: + vshufi64x2 $255,%zmm12,%zmm12,%zmm2 + addb $16,%r15b + + vmovdqu8 0(%rcx,%r11,1),%zmm0 + vmovdqu8 64(%rcx,%r11,1),%zmm3 + vmovdqu8 128(%rcx,%r11,1),%zmm4 + vmovdqu8 192(%rcx,%r11,1),%zmm5 + + + vbroadcastf64x2 0(%rdi),%zmm6 + vpxorq %zmm6,%zmm7,%zmm7 + vpxorq %zmm6,%zmm10,%zmm10 + vpxorq %zmm6,%zmm11,%zmm11 + vpxorq %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 16(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 32(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 48(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 64(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 80(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 96(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 112(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 128(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 144(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 160(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 176(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 192(%rdi),%zmm6 + vaesenclast %zmm6,%zmm7,%zmm7 + vaesenclast %zmm6,%zmm10,%zmm10 + vaesenclast %zmm6,%zmm11,%zmm11 + vaesenclast %zmm6,%zmm12,%zmm12 + + + vpxorq %zmm0,%zmm7,%zmm7 + vpxorq %zmm3,%zmm10,%zmm10 + vpxorq %zmm4,%zmm11,%zmm11 + vpxorq %zmm5,%zmm12,%zmm12 + + + movq %r9,%r10 + vmovdqu8 %zmm7,0(%r10,%r11,1) + vmovdqu8 %zmm10,64(%r10,%r11,1) + vmovdqu8 %zmm11,128(%r10,%r11,1) + vmovdqu8 %zmm12,192(%r10,%r11,1) + + vpshufb %zmm29,%zmm0,%zmm7 + vpshufb %zmm29,%zmm3,%zmm10 + vpshufb %zmm29,%zmm4,%zmm11 + vpshufb %zmm29,%zmm5,%zmm12 + vmovdqa64 %zmm7,768(%rsp) + vmovdqa64 %zmm10,832(%rsp) + vmovdqa64 %zmm11,896(%rsp) + vmovdqa64 %zmm12,960(%rsp) + testq %r14,%r14 + jnz .L_skip_hkeys_precomputation_662 + + vmovdqu64 288(%rsi),%zmm0 + vmovdqu64 %zmm0,704(%rsp) + + vmovdqu64 224(%rsi),%zmm3 + vmovdqu64 %zmm3,640(%rsp) + + + vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 + + vmovdqu64 160(%rsi),%zmm4 + vmovdqu64 %zmm4,576(%rsp) + + vmovdqu64 96(%rsi),%zmm5 + vmovdqu64 %zmm5,512(%rsp) +.L_skip_hkeys_precomputation_662: + cmpq $512,%r8 + jb .L_message_below_32_blocks_659 + + + + cmpb $240,%r15b + jae .L_next_16_overflow_663 + vpaddd %zmm28,%zmm2,%zmm7 + vpaddd %zmm27,%zmm7,%zmm10 + vpaddd %zmm27,%zmm10,%zmm11 + vpaddd %zmm27,%zmm11,%zmm12 + jmp .L_next_16_ok_663 +.L_next_16_overflow_663: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm12 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 + vpaddd %zmm12,%zmm7,%zmm10 + vpaddd %zmm12,%zmm10,%zmm11 + vpaddd %zmm12,%zmm11,%zmm12 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %zmm29,%zmm11,%zmm11 + vpshufb %zmm29,%zmm12,%zmm12 +.L_next_16_ok_663: + vshufi64x2 $255,%zmm12,%zmm12,%zmm2 + addb $16,%r15b + + vmovdqu8 256(%rcx,%r11,1),%zmm0 + vmovdqu8 320(%rcx,%r11,1),%zmm3 + vmovdqu8 384(%rcx,%r11,1),%zmm4 + vmovdqu8 448(%rcx,%r11,1),%zmm5 + + + vbroadcastf64x2 0(%rdi),%zmm6 + vpxorq %zmm6,%zmm7,%zmm7 + vpxorq %zmm6,%zmm10,%zmm10 + vpxorq %zmm6,%zmm11,%zmm11 + vpxorq %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 16(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 32(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 48(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 64(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 80(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 96(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 112(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 128(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 144(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 160(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 176(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 192(%rdi),%zmm6 + vaesenclast %zmm6,%zmm7,%zmm7 + vaesenclast %zmm6,%zmm10,%zmm10 + vaesenclast %zmm6,%zmm11,%zmm11 + vaesenclast %zmm6,%zmm12,%zmm12 + + + vpxorq %zmm0,%zmm7,%zmm7 + vpxorq %zmm3,%zmm10,%zmm10 + vpxorq %zmm4,%zmm11,%zmm11 + vpxorq %zmm5,%zmm12,%zmm12 + + + movq %r9,%r10 + vmovdqu8 %zmm7,256(%r10,%r11,1) + vmovdqu8 %zmm10,320(%r10,%r11,1) + vmovdqu8 %zmm11,384(%r10,%r11,1) + vmovdqu8 %zmm12,448(%r10,%r11,1) + + vpshufb %zmm29,%zmm0,%zmm7 + vpshufb %zmm29,%zmm3,%zmm10 + vpshufb %zmm29,%zmm4,%zmm11 + vpshufb %zmm29,%zmm5,%zmm12 + vmovdqa64 %zmm7,1024(%rsp) + vmovdqa64 %zmm10,1088(%rsp) + vmovdqa64 %zmm11,1152(%rsp) + vmovdqa64 %zmm12,1216(%rsp) + testq %r14,%r14 + jnz .L_skip_hkeys_precomputation_664 + vmovdqu64 640(%rsp),%zmm3 + + + vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 + + vmovdqu64 576(%rsp),%zmm4 + vmovdqu64 512(%rsp),%zmm5 + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,448(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,384(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,320(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,256(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,192(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,128(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,64(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,0(%rsp) +.L_skip_hkeys_precomputation_664: + movq $1,%r14 + addq $512,%r11 + subq $512,%r8 + + cmpq $768,%r8 + jb .L_no_more_big_nblocks_659 +.L_encrypt_big_nblocks_659: + cmpb $240,%r15b + jae .L_16_blocks_overflow_665 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_665 +.L_16_blocks_overflow_665: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_665: + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm15,%zmm10,%zmm26 + vpxorq %zmm12,%zmm6,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1) + vpshufb %zmm29,%zmm17,%zmm0 + vpshufb %zmm29,%zmm19,%zmm3 + vpshufb %zmm29,%zmm20,%zmm4 + vpshufb %zmm29,%zmm21,%zmm5 + vmovdqa64 %zmm0,1280(%rsp) + vmovdqa64 %zmm3,1344(%rsp) + vmovdqa64 %zmm4,1408(%rsp) + vmovdqa64 %zmm5,1472(%rsp) + cmpb $240,%r15b + jae .L_16_blocks_overflow_666 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_666 +.L_16_blocks_overflow_666: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_666: + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 256(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 320(%rsp),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 384(%rsp),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 448(%rsp),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 256(%rcx,%r11,1),%zmm17 + vmovdqu8 320(%rcx,%r11,1),%zmm19 + vmovdqu8 384(%rcx,%r11,1),%zmm20 + vmovdqu8 448(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vpternlogq $0x96,%zmm12,%zmm6,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,256(%r10,%r11,1) + vmovdqu8 %zmm3,320(%r10,%r11,1) + vmovdqu8 %zmm4,384(%r10,%r11,1) + vmovdqu8 %zmm5,448(%r10,%r11,1) + vpshufb %zmm29,%zmm17,%zmm0 + vpshufb %zmm29,%zmm19,%zmm3 + vpshufb %zmm29,%zmm20,%zmm4 + vpshufb %zmm29,%zmm21,%zmm5 + vmovdqa64 %zmm0,768(%rsp) + vmovdqa64 %zmm3,832(%rsp) + vmovdqa64 %zmm4,896(%rsp) + vmovdqa64 %zmm5,960(%rsp) + cmpb $240,%r15b + jae .L_16_blocks_overflow_667 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_667 +.L_16_blocks_overflow_667: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_667: + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 512(%rcx,%r11,1),%zmm17 + vmovdqu8 576(%rcx,%r11,1),%zmm19 + vmovdqu8 640(%rcx,%r11,1),%zmm20 + vmovdqu8 704(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + + + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpternlogq $0x96,%zmm15,%zmm12,%zmm6 + vpxorq %zmm24,%zmm6,%zmm6 + vpternlogq $0x96,%zmm10,%zmm13,%zmm7 + vpxorq %zmm25,%zmm7,%zmm7 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vextracti64x4 $1,%zmm6,%ymm12 + vpxorq %ymm12,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm12 + vpxorq %xmm12,%xmm6,%xmm6 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm6 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,512(%r10,%r11,1) + vmovdqu8 %zmm3,576(%r10,%r11,1) + vmovdqu8 %zmm4,640(%r10,%r11,1) + vmovdqu8 %zmm5,704(%r10,%r11,1) + vpshufb %zmm29,%zmm17,%zmm0 + vpshufb %zmm29,%zmm19,%zmm3 + vpshufb %zmm29,%zmm20,%zmm4 + vpshufb %zmm29,%zmm21,%zmm5 + vmovdqa64 %zmm0,1024(%rsp) + vmovdqa64 %zmm3,1088(%rsp) + vmovdqa64 %zmm4,1152(%rsp) + vmovdqa64 %zmm5,1216(%rsp) + vmovdqa64 %zmm6,%zmm14 + + addq $768,%r11 + subq $768,%r8 + cmpq $768,%r8 + jae .L_encrypt_big_nblocks_659 + +.L_no_more_big_nblocks_659: + + cmpq $512,%r8 + jae .L_encrypt_32_blocks_659 + + cmpq $256,%r8 + jae .L_encrypt_16_blocks_659 +.L_encrypt_0_blocks_ghash_32_659: + movl %r8d,%r10d + andl $~15,%r10d + movl $256,%ebx + subl %r10d,%ebx + vmovdqa64 768(%rsp),%zmm13 + vpxorq %zmm14,%zmm13,%zmm13 + vmovdqu64 0(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 832(%rsp),%zmm13 + vmovdqu64 64(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpxorq %zmm10,%zmm4,%zmm26 + vpxorq %zmm6,%zmm0,%zmm24 + vpxorq %zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 896(%rsp),%zmm13 + vmovdqu64 128(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 960(%rsp),%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + addl $256,%ebx + movl %r8d,%r10d + addl $15,%r10d + shrl $4,%r10d + je .L_last_num_blocks_is_0_668 + + cmpl $8,%r10d + je .L_last_num_blocks_is_8_668 + jb .L_last_num_blocks_is_7_1_668 + + + cmpl $12,%r10d + je .L_last_num_blocks_is_12_668 + jb .L_last_num_blocks_is_11_9_668 + + + cmpl $15,%r10d + je .L_last_num_blocks_is_15_668 + ja .L_last_num_blocks_is_16_668 + cmpl $14,%r10d + je .L_last_num_blocks_is_14_668 + jmp .L_last_num_blocks_is_13_668 + +.L_last_num_blocks_is_11_9_668: + + cmpl $10,%r10d + je .L_last_num_blocks_is_10_668 + ja .L_last_num_blocks_is_11_668 + jmp .L_last_num_blocks_is_9_668 + +.L_last_num_blocks_is_7_1_668: + cmpl $4,%r10d + je .L_last_num_blocks_is_4_668 + jb .L_last_num_blocks_is_3_1_668 + + cmpl $6,%r10d + ja .L_last_num_blocks_is_7_668 + je .L_last_num_blocks_is_6_668 + jmp .L_last_num_blocks_is_5_668 + +.L_last_num_blocks_is_3_1_668: + + cmpl $2,%r10d + ja .L_last_num_blocks_is_3_668 + je .L_last_num_blocks_is_2_668 +.L_last_num_blocks_is_1_668: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $255,%r15d + jae .L_16_blocks_overflow_669 + vpaddd %xmm28,%xmm2,%xmm0 + jmp .L_16_blocks_ok_669 + +.L_16_blocks_overflow_669: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %xmm29,%xmm0,%xmm0 +.L_16_blocks_ok_669: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vaesenclast %xmm30,%xmm0,%xmm0 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %xmm29,%xmm17,%xmm17 + vextracti32x4 $0,%zmm17,%xmm7 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_670 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_670 +.L_small_initial_partial_block_670: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm0 + + + vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 + vpslldq $8,%xmm3,%xmm3 + vpxorq %xmm3,%xmm25,%xmm3 + + + vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 + vpsrldq $4,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm4,%xmm14 + + + + + + + + + + + + + vpxorq %xmm7,%xmm14,%xmm14 + + jmp .L_after_reduction_670 +.L_small_initial_compute_done_670: +.L_after_reduction_670: + jmp .L_last_blocks_done_668 +.L_last_num_blocks_is_2_668: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $254,%r15d + jae .L_16_blocks_overflow_671 + vpaddd %ymm28,%ymm2,%ymm0 + jmp .L_16_blocks_ok_671 + +.L_16_blocks_overflow_671: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %ymm29,%ymm0,%ymm0 +.L_16_blocks_ok_671: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vaesenclast %ymm30,%ymm0,%ymm0 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %ymm29,%ymm17,%ymm17 + vextracti32x4 $1,%zmm17,%xmm7 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_672 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_672 +.L_small_initial_partial_block_672: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_672: + + orq %r8,%r8 + je .L_after_reduction_672 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_672: + jmp .L_last_blocks_done_668 +.L_last_num_blocks_is_3_668: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $253,%r15d + jae .L_16_blocks_overflow_673 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_673 + +.L_16_blocks_overflow_673: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_673: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vextracti32x4 $2,%zmm17,%xmm7 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_674 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_674 +.L_small_initial_partial_block_674: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_674: + + orq %r8,%r8 + je .L_after_reduction_674 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_674: + jmp .L_last_blocks_done_668 +.L_last_num_blocks_is_4_668: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $252,%r15d + jae .L_16_blocks_overflow_675 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_675 + +.L_16_blocks_overflow_675: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_675: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vextracti32x4 $3,%zmm17,%xmm7 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_676 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_676 +.L_small_initial_partial_block_676: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_676: + + orq %r8,%r8 + je .L_after_reduction_676 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_676: + jmp .L_last_blocks_done_668 +.L_last_num_blocks_is_5_668: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $251,%r15d + jae .L_16_blocks_overflow_677 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %xmm27,%xmm0,%xmm3 + jmp .L_16_blocks_ok_677 + +.L_16_blocks_overflow_677: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 +.L_16_blocks_ok_677: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %xmm30,%xmm3,%xmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %xmm19,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %xmm29,%xmm19,%xmm19 + vextracti32x4 $0,%zmm19,%xmm7 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_678 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_678 +.L_small_initial_partial_block_678: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_678: + + orq %r8,%r8 + je .L_after_reduction_678 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_678: + jmp .L_last_blocks_done_668 +.L_last_num_blocks_is_6_668: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $250,%r15d + jae .L_16_blocks_overflow_679 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %ymm27,%ymm0,%ymm3 + jmp .L_16_blocks_ok_679 + +.L_16_blocks_overflow_679: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 +.L_16_blocks_ok_679: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %ymm30,%ymm3,%ymm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %ymm29,%ymm19,%ymm19 + vextracti32x4 $1,%zmm19,%xmm7 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_680 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_680 +.L_small_initial_partial_block_680: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_680: + + orq %r8,%r8 + je .L_after_reduction_680 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_680: + jmp .L_last_blocks_done_668 +.L_last_num_blocks_is_7_668: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $249,%r15d + jae .L_16_blocks_overflow_681 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_681 + +.L_16_blocks_overflow_681: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_681: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vextracti32x4 $2,%zmm19,%xmm7 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_682 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_682 +.L_small_initial_partial_block_682: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_682: + + orq %r8,%r8 + je .L_after_reduction_682 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_682: + jmp .L_last_blocks_done_668 +.L_last_num_blocks_is_8_668: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $248,%r15d + jae .L_16_blocks_overflow_683 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_683 + +.L_16_blocks_overflow_683: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_683: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vextracti32x4 $3,%zmm19,%xmm7 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_684 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_684 +.L_small_initial_partial_block_684: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_684: + + orq %r8,%r8 + je .L_after_reduction_684 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_684: + jmp .L_last_blocks_done_668 +.L_last_num_blocks_is_9_668: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $247,%r15d + jae .L_16_blocks_overflow_685 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %xmm27,%xmm3,%xmm4 + jmp .L_16_blocks_ok_685 + +.L_16_blocks_overflow_685: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 +.L_16_blocks_ok_685: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %xmm30,%xmm4,%xmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %xmm20,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %xmm29,%xmm20,%xmm20 + vextracti32x4 $0,%zmm20,%xmm7 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_686 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_686 +.L_small_initial_partial_block_686: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_686: + + orq %r8,%r8 + je .L_after_reduction_686 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_686: + jmp .L_last_blocks_done_668 +.L_last_num_blocks_is_10_668: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $246,%r15d + jae .L_16_blocks_overflow_687 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %ymm27,%ymm3,%ymm4 + jmp .L_16_blocks_ok_687 + +.L_16_blocks_overflow_687: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 +.L_16_blocks_ok_687: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %ymm30,%ymm4,%ymm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %ymm20,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %ymm29,%ymm20,%ymm20 + vextracti32x4 $1,%zmm20,%xmm7 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_688 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_688 +.L_small_initial_partial_block_688: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_688: + + orq %r8,%r8 + je .L_after_reduction_688 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_688: + jmp .L_last_blocks_done_668 +.L_last_num_blocks_is_11_668: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $245,%r15d + jae .L_16_blocks_overflow_689 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_689 + +.L_16_blocks_overflow_689: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_689: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vextracti32x4 $2,%zmm20,%xmm7 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_690 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_690 +.L_small_initial_partial_block_690: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_690: + + orq %r8,%r8 + je .L_after_reduction_690 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_690: + jmp .L_last_blocks_done_668 +.L_last_num_blocks_is_12_668: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $244,%r15d + jae .L_16_blocks_overflow_691 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_691 + +.L_16_blocks_overflow_691: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_691: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vextracti32x4 $3,%zmm20,%xmm7 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_692 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_692 +.L_small_initial_partial_block_692: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_692: + + orq %r8,%r8 + je .L_after_reduction_692 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_692: + jmp .L_last_blocks_done_668 +.L_last_num_blocks_is_13_668: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $243,%r15d + jae .L_16_blocks_overflow_693 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %xmm27,%xmm4,%xmm5 + jmp .L_16_blocks_ok_693 + +.L_16_blocks_overflow_693: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 +.L_16_blocks_ok_693: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %xmm30,%xmm5,%xmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %xmm21,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %xmm29,%xmm21,%xmm21 + vextracti32x4 $0,%zmm21,%xmm7 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_694 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_694 +.L_small_initial_partial_block_694: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_694: + + orq %r8,%r8 + je .L_after_reduction_694 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_694: + jmp .L_last_blocks_done_668 +.L_last_num_blocks_is_14_668: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $242,%r15d + jae .L_16_blocks_overflow_695 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %ymm27,%ymm4,%ymm5 + jmp .L_16_blocks_ok_695 + +.L_16_blocks_overflow_695: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 +.L_16_blocks_ok_695: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %ymm30,%ymm5,%ymm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %ymm21,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %ymm29,%ymm21,%ymm21 + vextracti32x4 $1,%zmm21,%xmm7 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_696 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_696 +.L_small_initial_partial_block_696: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_696: + + orq %r8,%r8 + je .L_after_reduction_696 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_696: + jmp .L_last_blocks_done_668 +.L_last_num_blocks_is_15_668: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $241,%r15d + jae .L_16_blocks_overflow_697 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_697 + +.L_16_blocks_overflow_697: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_697: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %zmm29,%zmm21,%zmm21 + vextracti32x4 $2,%zmm21,%xmm7 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_698 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_698 +.L_small_initial_partial_block_698: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_698: + + orq %r8,%r8 + je .L_after_reduction_698 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_698: + jmp .L_last_blocks_done_668 +.L_last_num_blocks_is_16_668: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $240,%r15d + jae .L_16_blocks_overflow_699 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_699 + +.L_16_blocks_overflow_699: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_699: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %zmm29,%zmm21,%zmm21 + vextracti32x4 $3,%zmm21,%xmm7 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_700: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_700: + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_700: + jmp .L_last_blocks_done_668 +.L_last_num_blocks_is_0_668: + vmovdqa64 1024(%rsp),%zmm13 + vmovdqu64 0(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1088(%rsp),%zmm13 + vmovdqu64 64(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 1152(%rsp),%zmm13 + vmovdqu64 128(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1216(%rsp),%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + +.L_last_blocks_done_668: + vpshufb %xmm29,%xmm2,%xmm2 + jmp .L_ghash_done_659 +.L_encrypt_32_blocks_659: + cmpb $240,%r15b + jae .L_16_blocks_overflow_701 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_701 +.L_16_blocks_overflow_701: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_701: + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm15,%zmm10,%zmm26 + vpxorq %zmm12,%zmm6,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1) + vpshufb %zmm29,%zmm17,%zmm0 + vpshufb %zmm29,%zmm19,%zmm3 + vpshufb %zmm29,%zmm20,%zmm4 + vpshufb %zmm29,%zmm21,%zmm5 + vmovdqa64 %zmm0,1280(%rsp) + vmovdqa64 %zmm3,1344(%rsp) + vmovdqa64 %zmm4,1408(%rsp) + vmovdqa64 %zmm5,1472(%rsp) + cmpb $240,%r15b + jae .L_16_blocks_overflow_702 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_702 +.L_16_blocks_overflow_702: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_702: + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 256(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 320(%rsp),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 384(%rsp),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 448(%rsp),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 256(%rcx,%r11,1),%zmm17 + vmovdqu8 320(%rcx,%r11,1),%zmm19 + vmovdqu8 384(%rcx,%r11,1),%zmm20 + vmovdqu8 448(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vpternlogq $0x96,%zmm12,%zmm6,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,256(%r10,%r11,1) + vmovdqu8 %zmm3,320(%r10,%r11,1) + vmovdqu8 %zmm4,384(%r10,%r11,1) + vmovdqu8 %zmm5,448(%r10,%r11,1) + vpshufb %zmm29,%zmm17,%zmm0 + vpshufb %zmm29,%zmm19,%zmm3 + vpshufb %zmm29,%zmm20,%zmm4 + vpshufb %zmm29,%zmm21,%zmm5 + vmovdqa64 %zmm0,768(%rsp) + vmovdqa64 %zmm3,832(%rsp) + vmovdqa64 %zmm4,896(%rsp) + vmovdqa64 %zmm5,960(%rsp) + vmovdqa64 1280(%rsp),%zmm13 + vmovdqu64 512(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1344(%rsp),%zmm13 + vmovdqu64 576(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 1408(%rsp),%zmm13 + vmovdqu64 640(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1472(%rsp),%zmm13 + vmovdqu64 704(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + + subq $512,%r8 + addq $512,%r11 + movl %r8d,%r10d + andl $~15,%r10d + movl $512,%ebx + subl %r10d,%ebx + movl %r8d,%r10d + addl $15,%r10d + shrl $4,%r10d + je .L_last_num_blocks_is_0_703 + + cmpl $8,%r10d + je .L_last_num_blocks_is_8_703 + jb .L_last_num_blocks_is_7_1_703 + + + cmpl $12,%r10d + je .L_last_num_blocks_is_12_703 + jb .L_last_num_blocks_is_11_9_703 + + + cmpl $15,%r10d + je .L_last_num_blocks_is_15_703 + ja .L_last_num_blocks_is_16_703 + cmpl $14,%r10d + je .L_last_num_blocks_is_14_703 + jmp .L_last_num_blocks_is_13_703 + +.L_last_num_blocks_is_11_9_703: + + cmpl $10,%r10d + je .L_last_num_blocks_is_10_703 + ja .L_last_num_blocks_is_11_703 + jmp .L_last_num_blocks_is_9_703 + +.L_last_num_blocks_is_7_1_703: + cmpl $4,%r10d + je .L_last_num_blocks_is_4_703 + jb .L_last_num_blocks_is_3_1_703 + + cmpl $6,%r10d + ja .L_last_num_blocks_is_7_703 + je .L_last_num_blocks_is_6_703 + jmp .L_last_num_blocks_is_5_703 + +.L_last_num_blocks_is_3_1_703: + + cmpl $2,%r10d + ja .L_last_num_blocks_is_3_703 + je .L_last_num_blocks_is_2_703 +.L_last_num_blocks_is_1_703: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $255,%r15d + jae .L_16_blocks_overflow_704 + vpaddd %xmm28,%xmm2,%xmm0 + jmp .L_16_blocks_ok_704 + +.L_16_blocks_overflow_704: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %xmm29,%xmm0,%xmm0 +.L_16_blocks_ok_704: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vaesenclast %xmm30,%xmm0,%xmm0 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %xmm29,%xmm17,%xmm17 + vextracti32x4 $0,%zmm17,%xmm7 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_705 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_705 +.L_small_initial_partial_block_705: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm0 + + + vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 + vpslldq $8,%xmm3,%xmm3 + vpxorq %xmm3,%xmm25,%xmm3 + + + vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 + vpsrldq $4,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm4,%xmm14 + + + + + + + + + + + + + vpxorq %xmm7,%xmm14,%xmm14 + + jmp .L_after_reduction_705 +.L_small_initial_compute_done_705: +.L_after_reduction_705: + jmp .L_last_blocks_done_703 +.L_last_num_blocks_is_2_703: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $254,%r15d + jae .L_16_blocks_overflow_706 + vpaddd %ymm28,%ymm2,%ymm0 + jmp .L_16_blocks_ok_706 + +.L_16_blocks_overflow_706: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %ymm29,%ymm0,%ymm0 +.L_16_blocks_ok_706: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vaesenclast %ymm30,%ymm0,%ymm0 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %ymm29,%ymm17,%ymm17 + vextracti32x4 $1,%zmm17,%xmm7 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_707 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_707 +.L_small_initial_partial_block_707: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_707: + + orq %r8,%r8 + je .L_after_reduction_707 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_707: + jmp .L_last_blocks_done_703 +.L_last_num_blocks_is_3_703: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $253,%r15d + jae .L_16_blocks_overflow_708 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_708 + +.L_16_blocks_overflow_708: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_708: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vextracti32x4 $2,%zmm17,%xmm7 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_709 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_709 +.L_small_initial_partial_block_709: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_709: + + orq %r8,%r8 + je .L_after_reduction_709 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_709: + jmp .L_last_blocks_done_703 +.L_last_num_blocks_is_4_703: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $252,%r15d + jae .L_16_blocks_overflow_710 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_710 + +.L_16_blocks_overflow_710: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_710: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vextracti32x4 $3,%zmm17,%xmm7 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_711 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_711 +.L_small_initial_partial_block_711: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_711: + + orq %r8,%r8 + je .L_after_reduction_711 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_711: + jmp .L_last_blocks_done_703 +.L_last_num_blocks_is_5_703: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $251,%r15d + jae .L_16_blocks_overflow_712 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %xmm27,%xmm0,%xmm3 + jmp .L_16_blocks_ok_712 + +.L_16_blocks_overflow_712: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 +.L_16_blocks_ok_712: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %xmm30,%xmm3,%xmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %xmm19,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %xmm29,%xmm19,%xmm19 + vextracti32x4 $0,%zmm19,%xmm7 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_713 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_713 +.L_small_initial_partial_block_713: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_713: + + orq %r8,%r8 + je .L_after_reduction_713 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_713: + jmp .L_last_blocks_done_703 +.L_last_num_blocks_is_6_703: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $250,%r15d + jae .L_16_blocks_overflow_714 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %ymm27,%ymm0,%ymm3 + jmp .L_16_blocks_ok_714 + +.L_16_blocks_overflow_714: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 +.L_16_blocks_ok_714: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %ymm30,%ymm3,%ymm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %ymm29,%ymm19,%ymm19 + vextracti32x4 $1,%zmm19,%xmm7 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_715 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_715 +.L_small_initial_partial_block_715: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_715: + + orq %r8,%r8 + je .L_after_reduction_715 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_715: + jmp .L_last_blocks_done_703 +.L_last_num_blocks_is_7_703: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $249,%r15d + jae .L_16_blocks_overflow_716 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_716 + +.L_16_blocks_overflow_716: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_716: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vextracti32x4 $2,%zmm19,%xmm7 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_717 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_717 +.L_small_initial_partial_block_717: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_717: + + orq %r8,%r8 + je .L_after_reduction_717 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_717: + jmp .L_last_blocks_done_703 +.L_last_num_blocks_is_8_703: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $248,%r15d + jae .L_16_blocks_overflow_718 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_718 + +.L_16_blocks_overflow_718: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_718: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vextracti32x4 $3,%zmm19,%xmm7 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_719 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_719 +.L_small_initial_partial_block_719: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_719: + + orq %r8,%r8 + je .L_after_reduction_719 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_719: + jmp .L_last_blocks_done_703 +.L_last_num_blocks_is_9_703: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $247,%r15d + jae .L_16_blocks_overflow_720 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %xmm27,%xmm3,%xmm4 + jmp .L_16_blocks_ok_720 + +.L_16_blocks_overflow_720: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 +.L_16_blocks_ok_720: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %xmm30,%xmm4,%xmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %xmm20,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %xmm29,%xmm20,%xmm20 + vextracti32x4 $0,%zmm20,%xmm7 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_721 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_721 +.L_small_initial_partial_block_721: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_721: + + orq %r8,%r8 + je .L_after_reduction_721 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_721: + jmp .L_last_blocks_done_703 +.L_last_num_blocks_is_10_703: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $246,%r15d + jae .L_16_blocks_overflow_722 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %ymm27,%ymm3,%ymm4 + jmp .L_16_blocks_ok_722 + +.L_16_blocks_overflow_722: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 +.L_16_blocks_ok_722: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %ymm30,%ymm4,%ymm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %ymm20,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %ymm29,%ymm20,%ymm20 + vextracti32x4 $1,%zmm20,%xmm7 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_723 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_723 +.L_small_initial_partial_block_723: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_723: + + orq %r8,%r8 + je .L_after_reduction_723 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_723: + jmp .L_last_blocks_done_703 +.L_last_num_blocks_is_11_703: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $245,%r15d + jae .L_16_blocks_overflow_724 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_724 + +.L_16_blocks_overflow_724: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_724: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vextracti32x4 $2,%zmm20,%xmm7 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_725 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_725 +.L_small_initial_partial_block_725: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_725: + + orq %r8,%r8 + je .L_after_reduction_725 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_725: + jmp .L_last_blocks_done_703 +.L_last_num_blocks_is_12_703: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $244,%r15d + jae .L_16_blocks_overflow_726 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_726 + +.L_16_blocks_overflow_726: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_726: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vextracti32x4 $3,%zmm20,%xmm7 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_727 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_727 +.L_small_initial_partial_block_727: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_727: + + orq %r8,%r8 + je .L_after_reduction_727 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_727: + jmp .L_last_blocks_done_703 +.L_last_num_blocks_is_13_703: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $243,%r15d + jae .L_16_blocks_overflow_728 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %xmm27,%xmm4,%xmm5 + jmp .L_16_blocks_ok_728 + +.L_16_blocks_overflow_728: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 +.L_16_blocks_ok_728: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %xmm30,%xmm5,%xmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %xmm21,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %xmm29,%xmm21,%xmm21 + vextracti32x4 $0,%zmm21,%xmm7 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_729 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_729 +.L_small_initial_partial_block_729: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_729: + + orq %r8,%r8 + je .L_after_reduction_729 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_729: + jmp .L_last_blocks_done_703 +.L_last_num_blocks_is_14_703: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $242,%r15d + jae .L_16_blocks_overflow_730 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %ymm27,%ymm4,%ymm5 + jmp .L_16_blocks_ok_730 + +.L_16_blocks_overflow_730: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 +.L_16_blocks_ok_730: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %ymm30,%ymm5,%ymm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %ymm21,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %ymm29,%ymm21,%ymm21 + vextracti32x4 $1,%zmm21,%xmm7 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_731 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_731 +.L_small_initial_partial_block_731: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_731: + + orq %r8,%r8 + je .L_after_reduction_731 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_731: + jmp .L_last_blocks_done_703 +.L_last_num_blocks_is_15_703: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $241,%r15d + jae .L_16_blocks_overflow_732 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_732 + +.L_16_blocks_overflow_732: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_732: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %zmm29,%zmm21,%zmm21 + vextracti32x4 $2,%zmm21,%xmm7 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_733 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_733 +.L_small_initial_partial_block_733: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_733: + + orq %r8,%r8 + je .L_after_reduction_733 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_733: + jmp .L_last_blocks_done_703 +.L_last_num_blocks_is_16_703: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $240,%r15d + jae .L_16_blocks_overflow_734 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_734 + +.L_16_blocks_overflow_734: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_734: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %zmm29,%zmm21,%zmm21 + vextracti32x4 $3,%zmm21,%xmm7 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_735: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_735: + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_735: + jmp .L_last_blocks_done_703 +.L_last_num_blocks_is_0_703: + vmovdqa64 768(%rsp),%zmm13 + vpxorq %zmm14,%zmm13,%zmm13 + vmovdqu64 0(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 832(%rsp),%zmm13 + vmovdqu64 64(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpxorq %zmm10,%zmm4,%zmm26 + vpxorq %zmm6,%zmm0,%zmm24 + vpxorq %zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 896(%rsp),%zmm13 + vmovdqu64 128(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 960(%rsp),%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + +.L_last_blocks_done_703: + vpshufb %xmm29,%xmm2,%xmm2 + jmp .L_ghash_done_659 +.L_encrypt_16_blocks_659: + cmpb $240,%r15b + jae .L_16_blocks_overflow_736 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_736 +.L_16_blocks_overflow_736: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_736: + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm15,%zmm10,%zmm26 + vpxorq %zmm12,%zmm6,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1) + vpshufb %zmm29,%zmm17,%zmm0 + vpshufb %zmm29,%zmm19,%zmm3 + vpshufb %zmm29,%zmm20,%zmm4 + vpshufb %zmm29,%zmm21,%zmm5 + vmovdqa64 %zmm0,1280(%rsp) + vmovdqa64 %zmm3,1344(%rsp) + vmovdqa64 %zmm4,1408(%rsp) + vmovdqa64 %zmm5,1472(%rsp) + vmovdqa64 1024(%rsp),%zmm13 + vmovdqu64 256(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1088(%rsp),%zmm13 + vmovdqu64 320(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 1152(%rsp),%zmm13 + vmovdqu64 384(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1216(%rsp),%zmm13 + vmovdqu64 448(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + subq $256,%r8 + addq $256,%r11 + movl %r8d,%r10d + addl $15,%r10d + shrl $4,%r10d + je .L_last_num_blocks_is_0_737 + + cmpl $8,%r10d + je .L_last_num_blocks_is_8_737 + jb .L_last_num_blocks_is_7_1_737 + + + cmpl $12,%r10d + je .L_last_num_blocks_is_12_737 + jb .L_last_num_blocks_is_11_9_737 + + + cmpl $15,%r10d + je .L_last_num_blocks_is_15_737 + ja .L_last_num_blocks_is_16_737 + cmpl $14,%r10d + je .L_last_num_blocks_is_14_737 + jmp .L_last_num_blocks_is_13_737 + +.L_last_num_blocks_is_11_9_737: + + cmpl $10,%r10d + je .L_last_num_blocks_is_10_737 + ja .L_last_num_blocks_is_11_737 + jmp .L_last_num_blocks_is_9_737 + +.L_last_num_blocks_is_7_1_737: + cmpl $4,%r10d + je .L_last_num_blocks_is_4_737 + jb .L_last_num_blocks_is_3_1_737 + + cmpl $6,%r10d + ja .L_last_num_blocks_is_7_737 + je .L_last_num_blocks_is_6_737 + jmp .L_last_num_blocks_is_5_737 + +.L_last_num_blocks_is_3_1_737: + + cmpl $2,%r10d + ja .L_last_num_blocks_is_3_737 + je .L_last_num_blocks_is_2_737 +.L_last_num_blocks_is_1_737: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $255,%r15d + jae .L_16_blocks_overflow_738 + vpaddd %xmm28,%xmm2,%xmm0 + jmp .L_16_blocks_ok_738 + +.L_16_blocks_overflow_738: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %xmm29,%xmm0,%xmm0 +.L_16_blocks_ok_738: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $0,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %xmm31,%xmm0,%xmm0 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %xmm30,%xmm0,%xmm0 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %xmm29,%xmm17,%xmm17 + vextracti32x4 $0,%zmm17,%xmm7 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_739 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_739 +.L_small_initial_partial_block_739: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + + + + + + + + + + + + vpxorq %xmm7,%xmm14,%xmm14 + + jmp .L_after_reduction_739 +.L_small_initial_compute_done_739: +.L_after_reduction_739: + jmp .L_last_blocks_done_737 +.L_last_num_blocks_is_2_737: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $254,%r15d + jae .L_16_blocks_overflow_740 + vpaddd %ymm28,%ymm2,%ymm0 + jmp .L_16_blocks_ok_740 + +.L_16_blocks_overflow_740: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %ymm29,%ymm0,%ymm0 +.L_16_blocks_ok_740: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $1,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %ymm31,%ymm0,%ymm0 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %ymm30,%ymm0,%ymm0 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %ymm29,%ymm17,%ymm17 + vextracti32x4 $1,%zmm17,%xmm7 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_741 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_741 +.L_small_initial_partial_block_741: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_741: + + orq %r8,%r8 + je .L_after_reduction_741 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_741: + jmp .L_last_blocks_done_737 +.L_last_num_blocks_is_3_737: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $253,%r15d + jae .L_16_blocks_overflow_742 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_742 + +.L_16_blocks_overflow_742: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_742: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $2,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vextracti32x4 $2,%zmm17,%xmm7 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_743 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_743 +.L_small_initial_partial_block_743: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_743: + + orq %r8,%r8 + je .L_after_reduction_743 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_743: + jmp .L_last_blocks_done_737 +.L_last_num_blocks_is_4_737: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $252,%r15d + jae .L_16_blocks_overflow_744 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_744 + +.L_16_blocks_overflow_744: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_744: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $3,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vextracti32x4 $3,%zmm17,%xmm7 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_745 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_745 +.L_small_initial_partial_block_745: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_745: + + orq %r8,%r8 + je .L_after_reduction_745 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_745: + jmp .L_last_blocks_done_737 +.L_last_num_blocks_is_5_737: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $251,%r15d + jae .L_16_blocks_overflow_746 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %xmm27,%xmm0,%xmm3 + jmp .L_16_blocks_ok_746 + +.L_16_blocks_overflow_746: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 +.L_16_blocks_ok_746: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $0,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %xmm30,%xmm3,%xmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %xmm19,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %xmm29,%xmm19,%xmm19 + vextracti32x4 $0,%zmm19,%xmm7 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_747 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_747 +.L_small_initial_partial_block_747: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_747: + + orq %r8,%r8 + je .L_after_reduction_747 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_747: + jmp .L_last_blocks_done_737 +.L_last_num_blocks_is_6_737: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $250,%r15d + jae .L_16_blocks_overflow_748 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %ymm27,%ymm0,%ymm3 + jmp .L_16_blocks_ok_748 + +.L_16_blocks_overflow_748: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 +.L_16_blocks_ok_748: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $1,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %ymm30,%ymm3,%ymm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %ymm29,%ymm19,%ymm19 + vextracti32x4 $1,%zmm19,%xmm7 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_749 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_749 +.L_small_initial_partial_block_749: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_749: + + orq %r8,%r8 + je .L_after_reduction_749 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_749: + jmp .L_last_blocks_done_737 +.L_last_num_blocks_is_7_737: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $249,%r15d + jae .L_16_blocks_overflow_750 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_750 + +.L_16_blocks_overflow_750: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_750: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $2,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vextracti32x4 $2,%zmm19,%xmm7 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_751 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_751 +.L_small_initial_partial_block_751: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_751: + + orq %r8,%r8 + je .L_after_reduction_751 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_751: + jmp .L_last_blocks_done_737 +.L_last_num_blocks_is_8_737: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $248,%r15d + jae .L_16_blocks_overflow_752 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_752 + +.L_16_blocks_overflow_752: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_752: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $3,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vextracti32x4 $3,%zmm19,%xmm7 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_753 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_753 +.L_small_initial_partial_block_753: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_753: + + orq %r8,%r8 + je .L_after_reduction_753 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_753: + jmp .L_last_blocks_done_737 +.L_last_num_blocks_is_9_737: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $247,%r15d + jae .L_16_blocks_overflow_754 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %xmm27,%xmm3,%xmm4 + jmp .L_16_blocks_ok_754 + +.L_16_blocks_overflow_754: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 +.L_16_blocks_ok_754: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $0,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %xmm30,%xmm4,%xmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %xmm20,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %xmm29,%xmm20,%xmm20 + vextracti32x4 $0,%zmm20,%xmm7 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_755 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_755 +.L_small_initial_partial_block_755: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_755: + + orq %r8,%r8 + je .L_after_reduction_755 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_755: + jmp .L_last_blocks_done_737 +.L_last_num_blocks_is_10_737: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $246,%r15d + jae .L_16_blocks_overflow_756 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %ymm27,%ymm3,%ymm4 + jmp .L_16_blocks_ok_756 + +.L_16_blocks_overflow_756: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 +.L_16_blocks_ok_756: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $1,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %ymm30,%ymm4,%ymm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %ymm20,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %ymm29,%ymm20,%ymm20 + vextracti32x4 $1,%zmm20,%xmm7 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_757 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_757 +.L_small_initial_partial_block_757: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_757: + + orq %r8,%r8 + je .L_after_reduction_757 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_757: + jmp .L_last_blocks_done_737 +.L_last_num_blocks_is_11_737: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $245,%r15d + jae .L_16_blocks_overflow_758 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_758 + +.L_16_blocks_overflow_758: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_758: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $2,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vextracti32x4 $2,%zmm20,%xmm7 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_759 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_759 +.L_small_initial_partial_block_759: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_759: + + orq %r8,%r8 + je .L_after_reduction_759 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_759: + jmp .L_last_blocks_done_737 +.L_last_num_blocks_is_12_737: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $244,%r15d + jae .L_16_blocks_overflow_760 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_760 + +.L_16_blocks_overflow_760: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_760: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $3,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vextracti32x4 $3,%zmm20,%xmm7 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_761 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_761 +.L_small_initial_partial_block_761: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_761: + + orq %r8,%r8 + je .L_after_reduction_761 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_761: + jmp .L_last_blocks_done_737 +.L_last_num_blocks_is_13_737: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $243,%r15d + jae .L_16_blocks_overflow_762 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %xmm27,%xmm4,%xmm5 + jmp .L_16_blocks_ok_762 + +.L_16_blocks_overflow_762: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 +.L_16_blocks_ok_762: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $0,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %xmm30,%xmm5,%xmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %xmm21,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %xmm29,%xmm21,%xmm21 + vextracti32x4 $0,%zmm21,%xmm7 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_763 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_763 +.L_small_initial_partial_block_763: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_763: + + orq %r8,%r8 + je .L_after_reduction_763 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_763: + jmp .L_last_blocks_done_737 +.L_last_num_blocks_is_14_737: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $242,%r15d + jae .L_16_blocks_overflow_764 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %ymm27,%ymm4,%ymm5 + jmp .L_16_blocks_ok_764 + +.L_16_blocks_overflow_764: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 +.L_16_blocks_ok_764: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $1,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %ymm30,%ymm5,%ymm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %ymm21,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %ymm29,%ymm21,%ymm21 + vextracti32x4 $1,%zmm21,%xmm7 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_765 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_765 +.L_small_initial_partial_block_765: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_765: + + orq %r8,%r8 + je .L_after_reduction_765 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_765: + jmp .L_last_blocks_done_737 +.L_last_num_blocks_is_15_737: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $241,%r15d + jae .L_16_blocks_overflow_766 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_766 + +.L_16_blocks_overflow_766: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_766: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $2,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %zmm29,%zmm21,%zmm21 + vextracti32x4 $2,%zmm21,%xmm7 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_767 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_767 +.L_small_initial_partial_block_767: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_767: + + orq %r8,%r8 + je .L_after_reduction_767 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_767: + jmp .L_last_blocks_done_737 +.L_last_num_blocks_is_16_737: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $240,%r15d + jae .L_16_blocks_overflow_768 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_768 + +.L_16_blocks_overflow_768: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_768: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $3,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %zmm29,%zmm21,%zmm21 + vextracti32x4 $3,%zmm21,%xmm7 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_769: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_769: + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_769: + jmp .L_last_blocks_done_737 +.L_last_num_blocks_is_0_737: + vmovdqa64 1280(%rsp),%zmm13 + vmovdqu64 512(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1344(%rsp),%zmm13 + vmovdqu64 576(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 1408(%rsp),%zmm13 + vmovdqu64 640(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1472(%rsp),%zmm13 + vmovdqu64 704(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + +.L_last_blocks_done_737: + vpshufb %xmm29,%xmm2,%xmm2 + jmp .L_ghash_done_659 + +.L_message_below_32_blocks_659: + + + subq $256,%r8 + addq $256,%r11 + movl %r8d,%r10d + testq %r14,%r14 + jnz .L_skip_hkeys_precomputation_770 + vmovdqu64 640(%rsp),%zmm3 + + + vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 + + vmovdqu64 576(%rsp),%zmm4 + vmovdqu64 512(%rsp),%zmm5 + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,448(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,384(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,320(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,256(%rsp) +.L_skip_hkeys_precomputation_770: + movq $1,%r14 + andl $~15,%r10d + movl $512,%ebx + subl %r10d,%ebx + movl %r8d,%r10d + addl $15,%r10d + shrl $4,%r10d + je .L_last_num_blocks_is_0_771 + + cmpl $8,%r10d + je .L_last_num_blocks_is_8_771 + jb .L_last_num_blocks_is_7_1_771 + + + cmpl $12,%r10d + je .L_last_num_blocks_is_12_771 + jb .L_last_num_blocks_is_11_9_771 + + + cmpl $15,%r10d + je .L_last_num_blocks_is_15_771 + ja .L_last_num_blocks_is_16_771 + cmpl $14,%r10d + je .L_last_num_blocks_is_14_771 + jmp .L_last_num_blocks_is_13_771 + +.L_last_num_blocks_is_11_9_771: + + cmpl $10,%r10d + je .L_last_num_blocks_is_10_771 + ja .L_last_num_blocks_is_11_771 + jmp .L_last_num_blocks_is_9_771 + +.L_last_num_blocks_is_7_1_771: + cmpl $4,%r10d + je .L_last_num_blocks_is_4_771 + jb .L_last_num_blocks_is_3_1_771 + + cmpl $6,%r10d + ja .L_last_num_blocks_is_7_771 + je .L_last_num_blocks_is_6_771 + jmp .L_last_num_blocks_is_5_771 + +.L_last_num_blocks_is_3_1_771: + + cmpl $2,%r10d + ja .L_last_num_blocks_is_3_771 + je .L_last_num_blocks_is_2_771 +.L_last_num_blocks_is_1_771: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $255,%r15d + jae .L_16_blocks_overflow_772 + vpaddd %xmm28,%xmm2,%xmm0 + jmp .L_16_blocks_ok_772 + +.L_16_blocks_overflow_772: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %xmm29,%xmm0,%xmm0 +.L_16_blocks_ok_772: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vaesenclast %xmm30,%xmm0,%xmm0 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %xmm29,%xmm17,%xmm17 + vextracti32x4 $0,%zmm17,%xmm7 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_773 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_773 +.L_small_initial_partial_block_773: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm0 + + + vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 + vpslldq $8,%xmm3,%xmm3 + vpxorq %xmm3,%xmm25,%xmm3 + + + vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 + vpsrldq $4,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm4,%xmm14 + + + + + + + + + + + + + vpxorq %xmm7,%xmm14,%xmm14 + + jmp .L_after_reduction_773 +.L_small_initial_compute_done_773: +.L_after_reduction_773: + jmp .L_last_blocks_done_771 +.L_last_num_blocks_is_2_771: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $254,%r15d + jae .L_16_blocks_overflow_774 + vpaddd %ymm28,%ymm2,%ymm0 + jmp .L_16_blocks_ok_774 + +.L_16_blocks_overflow_774: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %ymm29,%ymm0,%ymm0 +.L_16_blocks_ok_774: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vaesenclast %ymm30,%ymm0,%ymm0 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %ymm29,%ymm17,%ymm17 + vextracti32x4 $1,%zmm17,%xmm7 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_775 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_775 +.L_small_initial_partial_block_775: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_775: + + orq %r8,%r8 + je .L_after_reduction_775 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_775: + jmp .L_last_blocks_done_771 +.L_last_num_blocks_is_3_771: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $253,%r15d + jae .L_16_blocks_overflow_776 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_776 + +.L_16_blocks_overflow_776: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_776: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vextracti32x4 $2,%zmm17,%xmm7 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_777 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_777 +.L_small_initial_partial_block_777: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_777: + + orq %r8,%r8 + je .L_after_reduction_777 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_777: + jmp .L_last_blocks_done_771 +.L_last_num_blocks_is_4_771: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $252,%r15d + jae .L_16_blocks_overflow_778 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_778 + +.L_16_blocks_overflow_778: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_778: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vextracti32x4 $3,%zmm17,%xmm7 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_779 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_779 +.L_small_initial_partial_block_779: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_779: + + orq %r8,%r8 + je .L_after_reduction_779 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_779: + jmp .L_last_blocks_done_771 +.L_last_num_blocks_is_5_771: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $251,%r15d + jae .L_16_blocks_overflow_780 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %xmm27,%xmm0,%xmm3 + jmp .L_16_blocks_ok_780 + +.L_16_blocks_overflow_780: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 +.L_16_blocks_ok_780: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %xmm30,%xmm3,%xmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %xmm19,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %xmm29,%xmm19,%xmm19 + vextracti32x4 $0,%zmm19,%xmm7 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_781 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_781 +.L_small_initial_partial_block_781: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_781: + + orq %r8,%r8 + je .L_after_reduction_781 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_781: + jmp .L_last_blocks_done_771 +.L_last_num_blocks_is_6_771: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $250,%r15d + jae .L_16_blocks_overflow_782 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %ymm27,%ymm0,%ymm3 + jmp .L_16_blocks_ok_782 + +.L_16_blocks_overflow_782: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 +.L_16_blocks_ok_782: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %ymm30,%ymm3,%ymm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %ymm29,%ymm19,%ymm19 + vextracti32x4 $1,%zmm19,%xmm7 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_783 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_783 +.L_small_initial_partial_block_783: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_783: + + orq %r8,%r8 + je .L_after_reduction_783 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_783: + jmp .L_last_blocks_done_771 +.L_last_num_blocks_is_7_771: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $249,%r15d + jae .L_16_blocks_overflow_784 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_784 + +.L_16_blocks_overflow_784: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_784: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vextracti32x4 $2,%zmm19,%xmm7 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_785 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_785 +.L_small_initial_partial_block_785: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_785: + + orq %r8,%r8 + je .L_after_reduction_785 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_785: + jmp .L_last_blocks_done_771 +.L_last_num_blocks_is_8_771: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $248,%r15d + jae .L_16_blocks_overflow_786 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_786 + +.L_16_blocks_overflow_786: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_786: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vextracti32x4 $3,%zmm19,%xmm7 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_787 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_787 +.L_small_initial_partial_block_787: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_787: + + orq %r8,%r8 + je .L_after_reduction_787 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_787: + jmp .L_last_blocks_done_771 +.L_last_num_blocks_is_9_771: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $247,%r15d + jae .L_16_blocks_overflow_788 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %xmm27,%xmm3,%xmm4 + jmp .L_16_blocks_ok_788 + +.L_16_blocks_overflow_788: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 +.L_16_blocks_ok_788: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %xmm30,%xmm4,%xmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %xmm20,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %xmm29,%xmm20,%xmm20 + vextracti32x4 $0,%zmm20,%xmm7 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_789 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_789 +.L_small_initial_partial_block_789: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_789: + + orq %r8,%r8 + je .L_after_reduction_789 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_789: + jmp .L_last_blocks_done_771 +.L_last_num_blocks_is_10_771: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $246,%r15d + jae .L_16_blocks_overflow_790 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %ymm27,%ymm3,%ymm4 + jmp .L_16_blocks_ok_790 + +.L_16_blocks_overflow_790: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 +.L_16_blocks_ok_790: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %ymm30,%ymm4,%ymm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %ymm20,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %ymm29,%ymm20,%ymm20 + vextracti32x4 $1,%zmm20,%xmm7 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_791 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_791 +.L_small_initial_partial_block_791: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_791: + + orq %r8,%r8 + je .L_after_reduction_791 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_791: + jmp .L_last_blocks_done_771 +.L_last_num_blocks_is_11_771: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $245,%r15d + jae .L_16_blocks_overflow_792 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_792 + +.L_16_blocks_overflow_792: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_792: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vextracti32x4 $2,%zmm20,%xmm7 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_793 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_793 +.L_small_initial_partial_block_793: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_793: + + orq %r8,%r8 + je .L_after_reduction_793 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_793: + jmp .L_last_blocks_done_771 +.L_last_num_blocks_is_12_771: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $244,%r15d + jae .L_16_blocks_overflow_794 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_794 + +.L_16_blocks_overflow_794: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_794: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vextracti32x4 $3,%zmm20,%xmm7 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_795 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_795 +.L_small_initial_partial_block_795: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_795: + + orq %r8,%r8 + je .L_after_reduction_795 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_795: + jmp .L_last_blocks_done_771 +.L_last_num_blocks_is_13_771: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $243,%r15d + jae .L_16_blocks_overflow_796 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %xmm27,%xmm4,%xmm5 + jmp .L_16_blocks_ok_796 + +.L_16_blocks_overflow_796: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 +.L_16_blocks_ok_796: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %xmm30,%xmm5,%xmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %xmm21,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %xmm29,%xmm21,%xmm21 + vextracti32x4 $0,%zmm21,%xmm7 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_797 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_797 +.L_small_initial_partial_block_797: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_797: + + orq %r8,%r8 + je .L_after_reduction_797 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_797: + jmp .L_last_blocks_done_771 +.L_last_num_blocks_is_14_771: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $242,%r15d + jae .L_16_blocks_overflow_798 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %ymm27,%ymm4,%ymm5 + jmp .L_16_blocks_ok_798 + +.L_16_blocks_overflow_798: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 +.L_16_blocks_ok_798: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %ymm30,%ymm5,%ymm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %ymm21,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %ymm29,%ymm21,%ymm21 + vextracti32x4 $1,%zmm21,%xmm7 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_799 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_799 +.L_small_initial_partial_block_799: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_799: + + orq %r8,%r8 + je .L_after_reduction_799 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_799: + jmp .L_last_blocks_done_771 +.L_last_num_blocks_is_15_771: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $241,%r15d + jae .L_16_blocks_overflow_800 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_800 + +.L_16_blocks_overflow_800: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_800: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %zmm29,%zmm21,%zmm21 + vextracti32x4 $2,%zmm21,%xmm7 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_801 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_801 +.L_small_initial_partial_block_801: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_801: + + orq %r8,%r8 + je .L_after_reduction_801 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_801: + jmp .L_last_blocks_done_771 +.L_last_num_blocks_is_16_771: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $240,%r15d + jae .L_16_blocks_overflow_802 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_802 + +.L_16_blocks_overflow_802: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_802: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %zmm29,%zmm21,%zmm21 + vextracti32x4 $3,%zmm21,%xmm7 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_803: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_803: + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_803: + jmp .L_last_blocks_done_771 +.L_last_num_blocks_is_0_771: + vmovdqa64 768(%rsp),%zmm13 + vpxorq %zmm14,%zmm13,%zmm13 + vmovdqu64 0(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 832(%rsp),%zmm13 + vmovdqu64 64(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpxorq %zmm10,%zmm4,%zmm26 + vpxorq %zmm6,%zmm0,%zmm24 + vpxorq %zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 896(%rsp),%zmm13 + vmovdqu64 128(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 960(%rsp),%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + +.L_last_blocks_done_771: + vpshufb %xmm29,%xmm2,%xmm2 + jmp .L_ghash_done_659 + +.L_message_below_equal_16_blocks_659: + + + movl %r8d,%r12d + addl $15,%r12d + shrl $4,%r12d + cmpq $8,%r12 + je .L_small_initial_num_blocks_is_8_804 + jl .L_small_initial_num_blocks_is_7_1_804 + + + cmpq $12,%r12 + je .L_small_initial_num_blocks_is_12_804 + jl .L_small_initial_num_blocks_is_11_9_804 + + + cmpq $16,%r12 + je .L_small_initial_num_blocks_is_16_804 + cmpq $15,%r12 + je .L_small_initial_num_blocks_is_15_804 + cmpq $14,%r12 + je .L_small_initial_num_blocks_is_14_804 + jmp .L_small_initial_num_blocks_is_13_804 + +.L_small_initial_num_blocks_is_11_9_804: + + cmpq $11,%r12 + je .L_small_initial_num_blocks_is_11_804 + cmpq $10,%r12 + je .L_small_initial_num_blocks_is_10_804 + jmp .L_small_initial_num_blocks_is_9_804 + +.L_small_initial_num_blocks_is_7_1_804: + cmpq $4,%r12 + je .L_small_initial_num_blocks_is_4_804 + jl .L_small_initial_num_blocks_is_3_1_804 + + cmpq $7,%r12 + je .L_small_initial_num_blocks_is_7_804 + cmpq $6,%r12 + je .L_small_initial_num_blocks_is_6_804 + jmp .L_small_initial_num_blocks_is_5_804 + +.L_small_initial_num_blocks_is_3_1_804: + + cmpq $3,%r12 + je .L_small_initial_num_blocks_is_3_804 + cmpq $2,%r12 + je .L_small_initial_num_blocks_is_2_804 + + + + + +.L_small_initial_num_blocks_is_1_804: + vmovdqa64 SHUF_MASK(%rip),%xmm29 + vpaddd ONE(%rip),%xmm2,%xmm0 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $0,%zmm0,%xmm2 + vpshufb %xmm29,%xmm0,%xmm0 + vmovdqu8 0(%rcx,%r11,1),%xmm6{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %xmm15,%xmm0,%xmm0 + vpxorq %xmm6,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm12 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %xmm29,%xmm6,%xmm6 + vextracti32x4 $0,%zmm6,%xmm13 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_805 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_805 +.L_small_initial_partial_block_805: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + + + + + + + + + + + + vpxorq %xmm13,%xmm14,%xmm14 + + jmp .L_after_reduction_805 +.L_small_initial_compute_done_805: +.L_after_reduction_805: + jmp .L_small_initial_blocks_encrypted_804 +.L_small_initial_num_blocks_is_2_804: + vmovdqa64 SHUF_MASK(%rip),%ymm29 + vshufi64x2 $0,%ymm2,%ymm2,%ymm0 + vpaddd ddq_add_1234(%rip),%ymm0,%ymm0 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $1,%zmm0,%xmm2 + vpshufb %ymm29,%ymm0,%ymm0 + vmovdqu8 0(%rcx,%r11,1),%ymm6{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %ymm15,%ymm0,%ymm0 + vpxorq %ymm6,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm12 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %ymm29,%ymm6,%ymm6 + vextracti32x4 $1,%zmm6,%xmm13 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_806 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_806 +.L_small_initial_partial_block_806: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_806: + + orq %r8,%r8 + je .L_after_reduction_806 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_806: + jmp .L_small_initial_blocks_encrypted_804 +.L_small_initial_num_blocks_is_3_804: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $2,%zmm0,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vpxorq %zmm6,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vextracti32x4 $2,%zmm6,%xmm13 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_807 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_807 +.L_small_initial_partial_block_807: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_807: + + orq %r8,%r8 + je .L_after_reduction_807 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_807: + jmp .L_small_initial_blocks_encrypted_804 +.L_small_initial_num_blocks_is_4_804: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $3,%zmm0,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vpxorq %zmm6,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vextracti32x4 $3,%zmm6,%xmm13 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_808 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_808 +.L_small_initial_partial_block_808: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_808: + + orq %r8,%r8 + je .L_after_reduction_808 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_808: + jmp .L_small_initial_blocks_encrypted_804 +.L_small_initial_num_blocks_is_5_804: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $64,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $0,%zmm3,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%xmm7{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %xmm15,%xmm3,%xmm3 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %xmm7,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %xmm29,%xmm7,%xmm7 + vextracti32x4 $0,%zmm7,%xmm13 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_809 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_809 +.L_small_initial_partial_block_809: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_809: + + orq %r8,%r8 + je .L_after_reduction_809 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_809: + jmp .L_small_initial_blocks_encrypted_804 +.L_small_initial_num_blocks_is_6_804: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $64,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $1,%zmm3,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%ymm7{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %ymm15,%ymm3,%ymm3 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %ymm7,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %ymm29,%ymm7,%ymm7 + vextracti32x4 $1,%zmm7,%xmm13 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_810 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_810 +.L_small_initial_partial_block_810: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_810: + + orq %r8,%r8 + je .L_after_reduction_810 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_810: + jmp .L_small_initial_blocks_encrypted_804 +.L_small_initial_num_blocks_is_7_804: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $64,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $2,%zmm3,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vextracti32x4 $2,%zmm7,%xmm13 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_811 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_811 +.L_small_initial_partial_block_811: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_811: + + orq %r8,%r8 + je .L_after_reduction_811 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_811: + jmp .L_small_initial_blocks_encrypted_804 +.L_small_initial_num_blocks_is_8_804: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $64,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $3,%zmm3,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vextracti32x4 $3,%zmm7,%xmm13 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_812 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 224(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_812 +.L_small_initial_partial_block_812: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_812: + + orq %r8,%r8 + je .L_after_reduction_812 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_812: + jmp .L_small_initial_blocks_encrypted_804 +.L_small_initial_num_blocks_is_9_804: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $128,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $0,%zmm4,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%xmm10{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %xmm15,%xmm4,%xmm4 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %xmm10,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %xmm29,%xmm10,%xmm10 + vextracti32x4 $0,%zmm10,%xmm13 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_813 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 208(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_813 +.L_small_initial_partial_block_813: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 224(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_813: + + orq %r8,%r8 + je .L_after_reduction_813 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_813: + jmp .L_small_initial_blocks_encrypted_804 +.L_small_initial_num_blocks_is_10_804: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $128,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $1,%zmm4,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%ymm10{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %ymm15,%ymm4,%ymm4 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %ymm10,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %ymm29,%ymm10,%ymm10 + vextracti32x4 $1,%zmm10,%xmm13 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_814 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 192(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_814 +.L_small_initial_partial_block_814: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 208(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_814: + + orq %r8,%r8 + je .L_after_reduction_814 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_814: + jmp .L_small_initial_blocks_encrypted_804 +.L_small_initial_num_blocks_is_11_804: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $128,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $2,%zmm4,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vextracti32x4 $2,%zmm10,%xmm13 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_815 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 176(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_815 +.L_small_initial_partial_block_815: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 192(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_815: + + orq %r8,%r8 + je .L_after_reduction_815 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_815: + jmp .L_small_initial_blocks_encrypted_804 +.L_small_initial_num_blocks_is_12_804: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $128,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $3,%zmm4,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vextracti32x4 $3,%zmm10,%xmm13 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_816 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 160(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 224(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_816 +.L_small_initial_partial_block_816: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 176(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_816: + + orq %r8,%r8 + je .L_after_reduction_816 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_816: + jmp .L_small_initial_blocks_encrypted_804 +.L_small_initial_num_blocks_is_13_804: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $192,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $0,%zmm5,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10 + vmovdqu8 192(%rcx,%r11,1),%xmm11{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vpxorq %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vaesenclast %xmm15,%xmm5,%xmm5 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vpxorq %xmm11,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %xmm29,%xmm11,%xmm11 + vextracti32x4 $0,%zmm11,%xmm13 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_817 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 144(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 208(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_817 +.L_small_initial_partial_block_817: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 160(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 224(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_817: + + orq %r8,%r8 + je .L_after_reduction_817 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_817: + jmp .L_small_initial_blocks_encrypted_804 +.L_small_initial_num_blocks_is_14_804: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $192,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $1,%zmm5,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10 + vmovdqu8 192(%rcx,%r11,1),%ymm11{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vpxorq %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vaesenclast %ymm15,%ymm5,%ymm5 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vpxorq %ymm11,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %ymm29,%ymm11,%ymm11 + vextracti32x4 $1,%zmm11,%xmm13 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_818 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 128(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 192(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_818 +.L_small_initial_partial_block_818: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 144(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 208(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_818: + + orq %r8,%r8 + je .L_after_reduction_818 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_818: + jmp .L_small_initial_blocks_encrypted_804 +.L_small_initial_num_blocks_is_15_804: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $192,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $2,%zmm5,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10 + vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vpxorq %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vaesenclast %zmm15,%zmm5,%zmm5 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vpxorq %zmm11,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %zmm29,%zmm11,%zmm11 + vextracti32x4 $2,%zmm11,%xmm13 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_819 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 112(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 176(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_819 +.L_small_initial_partial_block_819: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 128(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 192(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_819: + + orq %r8,%r8 + je .L_after_reduction_819 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_819: + jmp .L_small_initial_blocks_encrypted_804 +.L_small_initial_num_blocks_is_16_804: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $192,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $3,%zmm5,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10 + vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vpxorq %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vaesenclast %zmm15,%zmm5,%zmm5 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vpxorq %zmm11,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %zmm29,%zmm11,%zmm11 + vextracti32x4 $3,%zmm11,%xmm13 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_820: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 112(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 176(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_820: + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_820: +.L_small_initial_blocks_encrypted_804: +.L_ghash_done_659: + vmovdqu64 %xmm2,0(%rsi) + vmovdqu64 %xmm14,64(%rsi) +.L_enc_dec_done_659: + jmp .Lexit_gcm_decrypt +.align 32 +.Laes_gcm_decrypt_256_avx512: + orq %r8,%r8 + je .L_enc_dec_done_821 + xorq %r14,%r14 + vmovdqu64 64(%rsi),%xmm14 + + movq (%rdx),%r11 + orq %r11,%r11 + je .L_partial_block_done_822 + movl $16,%r10d + leaq byte_len_to_mask_table(%rip),%r12 + cmpq %r10,%r8 + cmovcq %r8,%r10 + kmovw (%r12,%r10,2),%k1 + vmovdqu8 (%rcx),%xmm0{%k1}{z} + + vmovdqu64 16(%rsi),%xmm3 + vmovdqu64 336(%rsi),%xmm4 + + + + leaq SHIFT_MASK(%rip),%r12 + addq %r11,%r12 + vmovdqu64 (%r12),%xmm5 + vpshufb %xmm5,%xmm3,%xmm3 + + vmovdqa64 %xmm0,%xmm6 + vpxorq %xmm0,%xmm3,%xmm3 + + + leaq (%r8,%r11,1),%r13 + subq $16,%r13 + jge .L_no_extra_mask_822 + subq %r13,%r12 +.L_no_extra_mask_822: + + + + vmovdqu64 16(%r12),%xmm0 + vpand %xmm0,%xmm3,%xmm3 + vpand %xmm0,%xmm6,%xmm6 + vpshufb SHUF_MASK(%rip),%xmm6,%xmm6 + vpshufb %xmm5,%xmm6,%xmm6 + vpxorq %xmm6,%xmm14,%xmm14 + cmpq $0,%r13 + jl .L_partial_incomplete_822 + + vpclmulqdq $0x11,%xmm4,%xmm14,%xmm7 + vpclmulqdq $0x00,%xmm4,%xmm14,%xmm10 + vpclmulqdq $0x01,%xmm4,%xmm14,%xmm11 + vpclmulqdq $0x10,%xmm4,%xmm14,%xmm14 + vpxorq %xmm11,%xmm14,%xmm14 + + vpsrldq $8,%xmm14,%xmm11 + vpslldq $8,%xmm14,%xmm14 + vpxorq %xmm11,%xmm7,%xmm7 + vpxorq %xmm10,%xmm14,%xmm14 + + + + vmovdqu64 POLY2(%rip),%xmm11 + + vpclmulqdq $0x01,%xmm14,%xmm11,%xmm10 + vpslldq $8,%xmm10,%xmm10 + vpxorq %xmm10,%xmm14,%xmm14 + + + + vpclmulqdq $0x00,%xmm14,%xmm11,%xmm10 + vpsrldq $4,%xmm10,%xmm10 + vpclmulqdq $0x10,%xmm14,%xmm11,%xmm14 + vpslldq $4,%xmm14,%xmm14 + + vpternlogq $0x96,%xmm10,%xmm7,%xmm14 + + movq $0,(%rdx) + + movq %r11,%r12 + movq $16,%r11 + subq %r12,%r11 + jmp .L_enc_dec_done_822 + +.L_partial_incomplete_822: + addq %r8,(%rdx) + movq %r8,%r11 + +.L_enc_dec_done_822: + + + leaq byte_len_to_mask_table(%rip),%r12 + kmovw (%r12,%r11,2),%k1 + vmovdqu64 %xmm14,64(%rsi) + movq %r9,%r12 + vmovdqu8 %xmm3,(%r12){%k1} +.L_partial_block_done_822: + vmovdqu64 0(%rsi),%xmm2 + subq %r11,%r8 + je .L_enc_dec_done_821 + cmpq $256,%r8 + jbe .L_message_below_equal_16_blocks_821 + + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vmovdqa64 ddq_addbe_4444(%rip),%zmm27 + vmovdqa64 ddq_addbe_1234(%rip),%zmm28 + + + + + + + vmovd %xmm2,%r15d + andl $255,%r15d + + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpshufb %zmm29,%zmm2,%zmm2 + + + + cmpb $240,%r15b + jae .L_next_16_overflow_823 + vpaddd %zmm28,%zmm2,%zmm7 + vpaddd %zmm27,%zmm7,%zmm10 + vpaddd %zmm27,%zmm10,%zmm11 + vpaddd %zmm27,%zmm11,%zmm12 + jmp .L_next_16_ok_823 +.L_next_16_overflow_823: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm12 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 + vpaddd %zmm12,%zmm7,%zmm10 + vpaddd %zmm12,%zmm10,%zmm11 + vpaddd %zmm12,%zmm11,%zmm12 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %zmm29,%zmm11,%zmm11 + vpshufb %zmm29,%zmm12,%zmm12 +.L_next_16_ok_823: + vshufi64x2 $255,%zmm12,%zmm12,%zmm2 + addb $16,%r15b + + vmovdqu8 0(%rcx,%r11,1),%zmm0 + vmovdqu8 64(%rcx,%r11,1),%zmm3 + vmovdqu8 128(%rcx,%r11,1),%zmm4 + vmovdqu8 192(%rcx,%r11,1),%zmm5 + + + vbroadcastf64x2 0(%rdi),%zmm6 + vpxorq %zmm6,%zmm7,%zmm7 + vpxorq %zmm6,%zmm10,%zmm10 + vpxorq %zmm6,%zmm11,%zmm11 + vpxorq %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 16(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 32(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 48(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 64(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 80(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 96(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 112(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 128(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 144(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 160(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 176(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 192(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 208(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 224(%rdi),%zmm6 + vaesenclast %zmm6,%zmm7,%zmm7 + vaesenclast %zmm6,%zmm10,%zmm10 + vaesenclast %zmm6,%zmm11,%zmm11 + vaesenclast %zmm6,%zmm12,%zmm12 + + + vpxorq %zmm0,%zmm7,%zmm7 + vpxorq %zmm3,%zmm10,%zmm10 + vpxorq %zmm4,%zmm11,%zmm11 + vpxorq %zmm5,%zmm12,%zmm12 + + + movq %r9,%r10 + vmovdqu8 %zmm7,0(%r10,%r11,1) + vmovdqu8 %zmm10,64(%r10,%r11,1) + vmovdqu8 %zmm11,128(%r10,%r11,1) + vmovdqu8 %zmm12,192(%r10,%r11,1) + + vpshufb %zmm29,%zmm0,%zmm7 + vpshufb %zmm29,%zmm3,%zmm10 + vpshufb %zmm29,%zmm4,%zmm11 + vpshufb %zmm29,%zmm5,%zmm12 + vmovdqa64 %zmm7,768(%rsp) + vmovdqa64 %zmm10,832(%rsp) + vmovdqa64 %zmm11,896(%rsp) + vmovdqa64 %zmm12,960(%rsp) + testq %r14,%r14 + jnz .L_skip_hkeys_precomputation_824 + + vmovdqu64 288(%rsi),%zmm0 + vmovdqu64 %zmm0,704(%rsp) + + vmovdqu64 224(%rsi),%zmm3 + vmovdqu64 %zmm3,640(%rsp) + + + vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 + + vmovdqu64 160(%rsi),%zmm4 + vmovdqu64 %zmm4,576(%rsp) + + vmovdqu64 96(%rsi),%zmm5 + vmovdqu64 %zmm5,512(%rsp) +.L_skip_hkeys_precomputation_824: + cmpq $512,%r8 + jb .L_message_below_32_blocks_821 + + + + cmpb $240,%r15b + jae .L_next_16_overflow_825 + vpaddd %zmm28,%zmm2,%zmm7 + vpaddd %zmm27,%zmm7,%zmm10 + vpaddd %zmm27,%zmm10,%zmm11 + vpaddd %zmm27,%zmm11,%zmm12 + jmp .L_next_16_ok_825 +.L_next_16_overflow_825: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm12 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm7 + vpaddd %zmm12,%zmm7,%zmm10 + vpaddd %zmm12,%zmm10,%zmm11 + vpaddd %zmm12,%zmm11,%zmm12 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %zmm29,%zmm11,%zmm11 + vpshufb %zmm29,%zmm12,%zmm12 +.L_next_16_ok_825: + vshufi64x2 $255,%zmm12,%zmm12,%zmm2 + addb $16,%r15b + + vmovdqu8 256(%rcx,%r11,1),%zmm0 + vmovdqu8 320(%rcx,%r11,1),%zmm3 + vmovdqu8 384(%rcx,%r11,1),%zmm4 + vmovdqu8 448(%rcx,%r11,1),%zmm5 + + + vbroadcastf64x2 0(%rdi),%zmm6 + vpxorq %zmm6,%zmm7,%zmm7 + vpxorq %zmm6,%zmm10,%zmm10 + vpxorq %zmm6,%zmm11,%zmm11 + vpxorq %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 16(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 32(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 48(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 64(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 80(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 96(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 112(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 128(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 144(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 160(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 176(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 192(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 208(%rdi),%zmm6 + vaesenc %zmm6,%zmm7,%zmm7 + vaesenc %zmm6,%zmm10,%zmm10 + vaesenc %zmm6,%zmm11,%zmm11 + vaesenc %zmm6,%zmm12,%zmm12 + vbroadcastf64x2 224(%rdi),%zmm6 + vaesenclast %zmm6,%zmm7,%zmm7 + vaesenclast %zmm6,%zmm10,%zmm10 + vaesenclast %zmm6,%zmm11,%zmm11 + vaesenclast %zmm6,%zmm12,%zmm12 + + + vpxorq %zmm0,%zmm7,%zmm7 + vpxorq %zmm3,%zmm10,%zmm10 + vpxorq %zmm4,%zmm11,%zmm11 + vpxorq %zmm5,%zmm12,%zmm12 + + + movq %r9,%r10 + vmovdqu8 %zmm7,256(%r10,%r11,1) + vmovdqu8 %zmm10,320(%r10,%r11,1) + vmovdqu8 %zmm11,384(%r10,%r11,1) + vmovdqu8 %zmm12,448(%r10,%r11,1) + + vpshufb %zmm29,%zmm0,%zmm7 + vpshufb %zmm29,%zmm3,%zmm10 + vpshufb %zmm29,%zmm4,%zmm11 + vpshufb %zmm29,%zmm5,%zmm12 + vmovdqa64 %zmm7,1024(%rsp) + vmovdqa64 %zmm10,1088(%rsp) + vmovdqa64 %zmm11,1152(%rsp) + vmovdqa64 %zmm12,1216(%rsp) + testq %r14,%r14 + jnz .L_skip_hkeys_precomputation_826 + vmovdqu64 640(%rsp),%zmm3 + + + vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 + + vmovdqu64 576(%rsp),%zmm4 + vmovdqu64 512(%rsp),%zmm5 + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,448(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,384(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,320(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,256(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,192(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,128(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,64(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,0(%rsp) +.L_skip_hkeys_precomputation_826: + movq $1,%r14 + addq $512,%r11 + subq $512,%r8 + + cmpq $768,%r8 + jb .L_no_more_big_nblocks_821 +.L_encrypt_big_nblocks_821: + cmpb $240,%r15b + jae .L_16_blocks_overflow_827 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_827 +.L_16_blocks_overflow_827: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_827: + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm15,%zmm10,%zmm26 + vpxorq %zmm12,%zmm6,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1) + vpshufb %zmm29,%zmm17,%zmm0 + vpshufb %zmm29,%zmm19,%zmm3 + vpshufb %zmm29,%zmm20,%zmm4 + vpshufb %zmm29,%zmm21,%zmm5 + vmovdqa64 %zmm0,1280(%rsp) + vmovdqa64 %zmm3,1344(%rsp) + vmovdqa64 %zmm4,1408(%rsp) + vmovdqa64 %zmm5,1472(%rsp) + cmpb $240,%r15b + jae .L_16_blocks_overflow_828 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_828 +.L_16_blocks_overflow_828: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_828: + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 256(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 320(%rsp),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 384(%rsp),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 448(%rsp),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 256(%rcx,%r11,1),%zmm17 + vmovdqu8 320(%rcx,%r11,1),%zmm19 + vmovdqu8 384(%rcx,%r11,1),%zmm20 + vmovdqu8 448(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vpternlogq $0x96,%zmm12,%zmm6,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,256(%r10,%r11,1) + vmovdqu8 %zmm3,320(%r10,%r11,1) + vmovdqu8 %zmm4,384(%r10,%r11,1) + vmovdqu8 %zmm5,448(%r10,%r11,1) + vpshufb %zmm29,%zmm17,%zmm0 + vpshufb %zmm29,%zmm19,%zmm3 + vpshufb %zmm29,%zmm20,%zmm4 + vpshufb %zmm29,%zmm21,%zmm5 + vmovdqa64 %zmm0,768(%rsp) + vmovdqa64 %zmm3,832(%rsp) + vmovdqa64 %zmm4,896(%rsp) + vmovdqa64 %zmm5,960(%rsp) + cmpb $240,%r15b + jae .L_16_blocks_overflow_829 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_829 +.L_16_blocks_overflow_829: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_829: + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 512(%rcx,%r11,1),%zmm17 + vmovdqu8 576(%rcx,%r11,1),%zmm19 + vmovdqu8 640(%rcx,%r11,1),%zmm20 + vmovdqu8 704(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + + + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpternlogq $0x96,%zmm15,%zmm12,%zmm6 + vpxorq %zmm24,%zmm6,%zmm6 + vpternlogq $0x96,%zmm10,%zmm13,%zmm7 + vpxorq %zmm25,%zmm7,%zmm7 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vextracti64x4 $1,%zmm6,%ymm12 + vpxorq %ymm12,%ymm6,%ymm6 + vextracti32x4 $1,%ymm6,%xmm12 + vpxorq %xmm12,%xmm6,%xmm6 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm6 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,512(%r10,%r11,1) + vmovdqu8 %zmm3,576(%r10,%r11,1) + vmovdqu8 %zmm4,640(%r10,%r11,1) + vmovdqu8 %zmm5,704(%r10,%r11,1) + vpshufb %zmm29,%zmm17,%zmm0 + vpshufb %zmm29,%zmm19,%zmm3 + vpshufb %zmm29,%zmm20,%zmm4 + vpshufb %zmm29,%zmm21,%zmm5 + vmovdqa64 %zmm0,1024(%rsp) + vmovdqa64 %zmm3,1088(%rsp) + vmovdqa64 %zmm4,1152(%rsp) + vmovdqa64 %zmm5,1216(%rsp) + vmovdqa64 %zmm6,%zmm14 + + addq $768,%r11 + subq $768,%r8 + cmpq $768,%r8 + jae .L_encrypt_big_nblocks_821 + +.L_no_more_big_nblocks_821: + + cmpq $512,%r8 + jae .L_encrypt_32_blocks_821 + + cmpq $256,%r8 + jae .L_encrypt_16_blocks_821 +.L_encrypt_0_blocks_ghash_32_821: + movl %r8d,%r10d + andl $~15,%r10d + movl $256,%ebx + subl %r10d,%ebx + vmovdqa64 768(%rsp),%zmm13 + vpxorq %zmm14,%zmm13,%zmm13 + vmovdqu64 0(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 832(%rsp),%zmm13 + vmovdqu64 64(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpxorq %zmm10,%zmm4,%zmm26 + vpxorq %zmm6,%zmm0,%zmm24 + vpxorq %zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 896(%rsp),%zmm13 + vmovdqu64 128(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 960(%rsp),%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + addl $256,%ebx + movl %r8d,%r10d + addl $15,%r10d + shrl $4,%r10d + je .L_last_num_blocks_is_0_830 + + cmpl $8,%r10d + je .L_last_num_blocks_is_8_830 + jb .L_last_num_blocks_is_7_1_830 + + + cmpl $12,%r10d + je .L_last_num_blocks_is_12_830 + jb .L_last_num_blocks_is_11_9_830 + + + cmpl $15,%r10d + je .L_last_num_blocks_is_15_830 + ja .L_last_num_blocks_is_16_830 + cmpl $14,%r10d + je .L_last_num_blocks_is_14_830 + jmp .L_last_num_blocks_is_13_830 + +.L_last_num_blocks_is_11_9_830: + + cmpl $10,%r10d + je .L_last_num_blocks_is_10_830 + ja .L_last_num_blocks_is_11_830 + jmp .L_last_num_blocks_is_9_830 + +.L_last_num_blocks_is_7_1_830: + cmpl $4,%r10d + je .L_last_num_blocks_is_4_830 + jb .L_last_num_blocks_is_3_1_830 + + cmpl $6,%r10d + ja .L_last_num_blocks_is_7_830 + je .L_last_num_blocks_is_6_830 + jmp .L_last_num_blocks_is_5_830 + +.L_last_num_blocks_is_3_1_830: + + cmpl $2,%r10d + ja .L_last_num_blocks_is_3_830 + je .L_last_num_blocks_is_2_830 +.L_last_num_blocks_is_1_830: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $255,%r15d + jae .L_16_blocks_overflow_831 + vpaddd %xmm28,%xmm2,%xmm0 + jmp .L_16_blocks_ok_831 + +.L_16_blocks_overflow_831: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %xmm29,%xmm0,%xmm0 +.L_16_blocks_ok_831: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vaesenclast %xmm30,%xmm0,%xmm0 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %xmm29,%xmm17,%xmm17 + vextracti32x4 $0,%zmm17,%xmm7 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_832 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_832 +.L_small_initial_partial_block_832: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm0 + + + vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 + vpslldq $8,%xmm3,%xmm3 + vpxorq %xmm3,%xmm25,%xmm3 + + + vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 + vpsrldq $4,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm4,%xmm14 + + + + + + + + + + + + + vpxorq %xmm7,%xmm14,%xmm14 + + jmp .L_after_reduction_832 +.L_small_initial_compute_done_832: +.L_after_reduction_832: + jmp .L_last_blocks_done_830 +.L_last_num_blocks_is_2_830: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $254,%r15d + jae .L_16_blocks_overflow_833 + vpaddd %ymm28,%ymm2,%ymm0 + jmp .L_16_blocks_ok_833 + +.L_16_blocks_overflow_833: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %ymm29,%ymm0,%ymm0 +.L_16_blocks_ok_833: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vaesenclast %ymm30,%ymm0,%ymm0 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %ymm29,%ymm17,%ymm17 + vextracti32x4 $1,%zmm17,%xmm7 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_834 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_834 +.L_small_initial_partial_block_834: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_834: + + orq %r8,%r8 + je .L_after_reduction_834 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_834: + jmp .L_last_blocks_done_830 +.L_last_num_blocks_is_3_830: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $253,%r15d + jae .L_16_blocks_overflow_835 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_835 + +.L_16_blocks_overflow_835: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_835: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vextracti32x4 $2,%zmm17,%xmm7 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_836 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_836 +.L_small_initial_partial_block_836: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_836: + + orq %r8,%r8 + je .L_after_reduction_836 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_836: + jmp .L_last_blocks_done_830 +.L_last_num_blocks_is_4_830: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $252,%r15d + jae .L_16_blocks_overflow_837 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_837 + +.L_16_blocks_overflow_837: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_837: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vextracti32x4 $3,%zmm17,%xmm7 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_838 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_838 +.L_small_initial_partial_block_838: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_838: + + orq %r8,%r8 + je .L_after_reduction_838 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_838: + jmp .L_last_blocks_done_830 +.L_last_num_blocks_is_5_830: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $251,%r15d + jae .L_16_blocks_overflow_839 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %xmm27,%xmm0,%xmm3 + jmp .L_16_blocks_ok_839 + +.L_16_blocks_overflow_839: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 +.L_16_blocks_ok_839: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %xmm30,%xmm3,%xmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %xmm19,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %xmm29,%xmm19,%xmm19 + vextracti32x4 $0,%zmm19,%xmm7 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_840 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_840 +.L_small_initial_partial_block_840: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_840: + + orq %r8,%r8 + je .L_after_reduction_840 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_840: + jmp .L_last_blocks_done_830 +.L_last_num_blocks_is_6_830: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $250,%r15d + jae .L_16_blocks_overflow_841 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %ymm27,%ymm0,%ymm3 + jmp .L_16_blocks_ok_841 + +.L_16_blocks_overflow_841: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 +.L_16_blocks_ok_841: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %ymm30,%ymm3,%ymm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %ymm29,%ymm19,%ymm19 + vextracti32x4 $1,%zmm19,%xmm7 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_842 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_842 +.L_small_initial_partial_block_842: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_842: + + orq %r8,%r8 + je .L_after_reduction_842 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_842: + jmp .L_last_blocks_done_830 +.L_last_num_blocks_is_7_830: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $249,%r15d + jae .L_16_blocks_overflow_843 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_843 + +.L_16_blocks_overflow_843: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_843: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vextracti32x4 $2,%zmm19,%xmm7 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_844 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_844 +.L_small_initial_partial_block_844: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_844: + + orq %r8,%r8 + je .L_after_reduction_844 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_844: + jmp .L_last_blocks_done_830 +.L_last_num_blocks_is_8_830: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $248,%r15d + jae .L_16_blocks_overflow_845 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_845 + +.L_16_blocks_overflow_845: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_845: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vextracti32x4 $3,%zmm19,%xmm7 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_846 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_846 +.L_small_initial_partial_block_846: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_846: + + orq %r8,%r8 + je .L_after_reduction_846 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_846: + jmp .L_last_blocks_done_830 +.L_last_num_blocks_is_9_830: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $247,%r15d + jae .L_16_blocks_overflow_847 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %xmm27,%xmm3,%xmm4 + jmp .L_16_blocks_ok_847 + +.L_16_blocks_overflow_847: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 +.L_16_blocks_ok_847: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %xmm30,%xmm4,%xmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %xmm20,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %xmm29,%xmm20,%xmm20 + vextracti32x4 $0,%zmm20,%xmm7 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_848 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_848 +.L_small_initial_partial_block_848: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_848: + + orq %r8,%r8 + je .L_after_reduction_848 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_848: + jmp .L_last_blocks_done_830 +.L_last_num_blocks_is_10_830: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $246,%r15d + jae .L_16_blocks_overflow_849 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %ymm27,%ymm3,%ymm4 + jmp .L_16_blocks_ok_849 + +.L_16_blocks_overflow_849: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 +.L_16_blocks_ok_849: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %ymm30,%ymm4,%ymm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %ymm20,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %ymm29,%ymm20,%ymm20 + vextracti32x4 $1,%zmm20,%xmm7 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_850 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_850 +.L_small_initial_partial_block_850: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_850: + + orq %r8,%r8 + je .L_after_reduction_850 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_850: + jmp .L_last_blocks_done_830 +.L_last_num_blocks_is_11_830: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $245,%r15d + jae .L_16_blocks_overflow_851 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_851 + +.L_16_blocks_overflow_851: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_851: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vextracti32x4 $2,%zmm20,%xmm7 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_852 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_852 +.L_small_initial_partial_block_852: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_852: + + orq %r8,%r8 + je .L_after_reduction_852 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_852: + jmp .L_last_blocks_done_830 +.L_last_num_blocks_is_12_830: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $244,%r15d + jae .L_16_blocks_overflow_853 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_853 + +.L_16_blocks_overflow_853: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_853: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vextracti32x4 $3,%zmm20,%xmm7 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_854 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_854 +.L_small_initial_partial_block_854: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_854: + + orq %r8,%r8 + je .L_after_reduction_854 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_854: + jmp .L_last_blocks_done_830 +.L_last_num_blocks_is_13_830: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $243,%r15d + jae .L_16_blocks_overflow_855 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %xmm27,%xmm4,%xmm5 + jmp .L_16_blocks_ok_855 + +.L_16_blocks_overflow_855: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 +.L_16_blocks_ok_855: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %xmm30,%xmm5,%xmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %xmm21,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %xmm29,%xmm21,%xmm21 + vextracti32x4 $0,%zmm21,%xmm7 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_856 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_856 +.L_small_initial_partial_block_856: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_856: + + orq %r8,%r8 + je .L_after_reduction_856 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_856: + jmp .L_last_blocks_done_830 +.L_last_num_blocks_is_14_830: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $242,%r15d + jae .L_16_blocks_overflow_857 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %ymm27,%ymm4,%ymm5 + jmp .L_16_blocks_ok_857 + +.L_16_blocks_overflow_857: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 +.L_16_blocks_ok_857: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %ymm30,%ymm5,%ymm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %ymm21,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %ymm29,%ymm21,%ymm21 + vextracti32x4 $1,%zmm21,%xmm7 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_858 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_858 +.L_small_initial_partial_block_858: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_858: + + orq %r8,%r8 + je .L_after_reduction_858 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_858: + jmp .L_last_blocks_done_830 +.L_last_num_blocks_is_15_830: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $241,%r15d + jae .L_16_blocks_overflow_859 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_859 + +.L_16_blocks_overflow_859: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_859: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %zmm29,%zmm21,%zmm21 + vextracti32x4 $2,%zmm21,%xmm7 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_860 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_860 +.L_small_initial_partial_block_860: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_860: + + orq %r8,%r8 + je .L_after_reduction_860 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_860: + jmp .L_last_blocks_done_830 +.L_last_num_blocks_is_16_830: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $240,%r15d + jae .L_16_blocks_overflow_861 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_861 + +.L_16_blocks_overflow_861: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_861: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm14,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %zmm29,%zmm21,%zmm21 + vextracti32x4 $3,%zmm21,%xmm7 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_862: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_862: + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_862: + jmp .L_last_blocks_done_830 +.L_last_num_blocks_is_0_830: + vmovdqa64 1024(%rsp),%zmm13 + vmovdqu64 0(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1088(%rsp),%zmm13 + vmovdqu64 64(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 1152(%rsp),%zmm13 + vmovdqu64 128(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1216(%rsp),%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + +.L_last_blocks_done_830: + vpshufb %xmm29,%xmm2,%xmm2 + jmp .L_ghash_done_821 +.L_encrypt_32_blocks_821: + cmpb $240,%r15b + jae .L_16_blocks_overflow_863 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_863 +.L_16_blocks_overflow_863: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_863: + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm15,%zmm10,%zmm26 + vpxorq %zmm12,%zmm6,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1) + vpshufb %zmm29,%zmm17,%zmm0 + vpshufb %zmm29,%zmm19,%zmm3 + vpshufb %zmm29,%zmm20,%zmm4 + vpshufb %zmm29,%zmm21,%zmm5 + vmovdqa64 %zmm0,1280(%rsp) + vmovdqa64 %zmm3,1344(%rsp) + vmovdqa64 %zmm4,1408(%rsp) + vmovdqa64 %zmm5,1472(%rsp) + cmpb $240,%r15b + jae .L_16_blocks_overflow_864 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_864 +.L_16_blocks_overflow_864: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_864: + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1024(%rsp),%zmm8 + vmovdqu64 256(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 320(%rsp),%zmm18 + vmovdqa64 1088(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 384(%rsp),%zmm1 + vmovdqa64 1152(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 448(%rsp),%zmm18 + vmovdqa64 1216(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 256(%rcx,%r11,1),%zmm17 + vmovdqu8 320(%rcx,%r11,1),%zmm19 + vmovdqu8 384(%rcx,%r11,1),%zmm20 + vmovdqu8 448(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm15,%zmm10,%zmm26 + vpternlogq $0x96,%zmm12,%zmm6,%zmm24 + vpternlogq $0x96,%zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,256(%r10,%r11,1) + vmovdqu8 %zmm3,320(%r10,%r11,1) + vmovdqu8 %zmm4,384(%r10,%r11,1) + vmovdqu8 %zmm5,448(%r10,%r11,1) + vpshufb %zmm29,%zmm17,%zmm0 + vpshufb %zmm29,%zmm19,%zmm3 + vpshufb %zmm29,%zmm20,%zmm4 + vpshufb %zmm29,%zmm21,%zmm5 + vmovdqa64 %zmm0,768(%rsp) + vmovdqa64 %zmm3,832(%rsp) + vmovdqa64 %zmm4,896(%rsp) + vmovdqa64 %zmm5,960(%rsp) + vmovdqa64 1280(%rsp),%zmm13 + vmovdqu64 512(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1344(%rsp),%zmm13 + vmovdqu64 576(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 1408(%rsp),%zmm13 + vmovdqu64 640(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1472(%rsp),%zmm13 + vmovdqu64 704(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + + subq $512,%r8 + addq $512,%r11 + movl %r8d,%r10d + andl $~15,%r10d + movl $512,%ebx + subl %r10d,%ebx + movl %r8d,%r10d + addl $15,%r10d + shrl $4,%r10d + je .L_last_num_blocks_is_0_865 + + cmpl $8,%r10d + je .L_last_num_blocks_is_8_865 + jb .L_last_num_blocks_is_7_1_865 + + + cmpl $12,%r10d + je .L_last_num_blocks_is_12_865 + jb .L_last_num_blocks_is_11_9_865 + + + cmpl $15,%r10d + je .L_last_num_blocks_is_15_865 + ja .L_last_num_blocks_is_16_865 + cmpl $14,%r10d + je .L_last_num_blocks_is_14_865 + jmp .L_last_num_blocks_is_13_865 + +.L_last_num_blocks_is_11_9_865: + + cmpl $10,%r10d + je .L_last_num_blocks_is_10_865 + ja .L_last_num_blocks_is_11_865 + jmp .L_last_num_blocks_is_9_865 + +.L_last_num_blocks_is_7_1_865: + cmpl $4,%r10d + je .L_last_num_blocks_is_4_865 + jb .L_last_num_blocks_is_3_1_865 + + cmpl $6,%r10d + ja .L_last_num_blocks_is_7_865 + je .L_last_num_blocks_is_6_865 + jmp .L_last_num_blocks_is_5_865 + +.L_last_num_blocks_is_3_1_865: + + cmpl $2,%r10d + ja .L_last_num_blocks_is_3_865 + je .L_last_num_blocks_is_2_865 +.L_last_num_blocks_is_1_865: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $255,%r15d + jae .L_16_blocks_overflow_866 + vpaddd %xmm28,%xmm2,%xmm0 + jmp .L_16_blocks_ok_866 + +.L_16_blocks_overflow_866: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %xmm29,%xmm0,%xmm0 +.L_16_blocks_ok_866: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vaesenclast %xmm30,%xmm0,%xmm0 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %xmm29,%xmm17,%xmm17 + vextracti32x4 $0,%zmm17,%xmm7 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_867 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_867 +.L_small_initial_partial_block_867: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm0 + + + vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 + vpslldq $8,%xmm3,%xmm3 + vpxorq %xmm3,%xmm25,%xmm3 + + + vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 + vpsrldq $4,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm4,%xmm14 + + + + + + + + + + + + + vpxorq %xmm7,%xmm14,%xmm14 + + jmp .L_after_reduction_867 +.L_small_initial_compute_done_867: +.L_after_reduction_867: + jmp .L_last_blocks_done_865 +.L_last_num_blocks_is_2_865: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $254,%r15d + jae .L_16_blocks_overflow_868 + vpaddd %ymm28,%ymm2,%ymm0 + jmp .L_16_blocks_ok_868 + +.L_16_blocks_overflow_868: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %ymm29,%ymm0,%ymm0 +.L_16_blocks_ok_868: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vaesenclast %ymm30,%ymm0,%ymm0 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %ymm29,%ymm17,%ymm17 + vextracti32x4 $1,%zmm17,%xmm7 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_869 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_869 +.L_small_initial_partial_block_869: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_869: + + orq %r8,%r8 + je .L_after_reduction_869 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_869: + jmp .L_last_blocks_done_865 +.L_last_num_blocks_is_3_865: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $253,%r15d + jae .L_16_blocks_overflow_870 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_870 + +.L_16_blocks_overflow_870: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_870: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vextracti32x4 $2,%zmm17,%xmm7 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_871 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_871 +.L_small_initial_partial_block_871: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_871: + + orq %r8,%r8 + je .L_after_reduction_871 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_871: + jmp .L_last_blocks_done_865 +.L_last_num_blocks_is_4_865: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $252,%r15d + jae .L_16_blocks_overflow_872 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_872 + +.L_16_blocks_overflow_872: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_872: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vextracti32x4 $3,%zmm17,%xmm7 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_873 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_873 +.L_small_initial_partial_block_873: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_873: + + orq %r8,%r8 + je .L_after_reduction_873 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_873: + jmp .L_last_blocks_done_865 +.L_last_num_blocks_is_5_865: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $251,%r15d + jae .L_16_blocks_overflow_874 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %xmm27,%xmm0,%xmm3 + jmp .L_16_blocks_ok_874 + +.L_16_blocks_overflow_874: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 +.L_16_blocks_ok_874: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %xmm30,%xmm3,%xmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %xmm19,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %xmm29,%xmm19,%xmm19 + vextracti32x4 $0,%zmm19,%xmm7 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_875 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_875 +.L_small_initial_partial_block_875: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_875: + + orq %r8,%r8 + je .L_after_reduction_875 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_875: + jmp .L_last_blocks_done_865 +.L_last_num_blocks_is_6_865: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $250,%r15d + jae .L_16_blocks_overflow_876 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %ymm27,%ymm0,%ymm3 + jmp .L_16_blocks_ok_876 + +.L_16_blocks_overflow_876: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 +.L_16_blocks_ok_876: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %ymm30,%ymm3,%ymm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %ymm29,%ymm19,%ymm19 + vextracti32x4 $1,%zmm19,%xmm7 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_877 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_877 +.L_small_initial_partial_block_877: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_877: + + orq %r8,%r8 + je .L_after_reduction_877 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_877: + jmp .L_last_blocks_done_865 +.L_last_num_blocks_is_7_865: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $249,%r15d + jae .L_16_blocks_overflow_878 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_878 + +.L_16_blocks_overflow_878: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_878: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vextracti32x4 $2,%zmm19,%xmm7 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_879 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_879 +.L_small_initial_partial_block_879: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_879: + + orq %r8,%r8 + je .L_after_reduction_879 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_879: + jmp .L_last_blocks_done_865 +.L_last_num_blocks_is_8_865: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $248,%r15d + jae .L_16_blocks_overflow_880 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_880 + +.L_16_blocks_overflow_880: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_880: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vextracti32x4 $3,%zmm19,%xmm7 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_881 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_881 +.L_small_initial_partial_block_881: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_881: + + orq %r8,%r8 + je .L_after_reduction_881 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_881: + jmp .L_last_blocks_done_865 +.L_last_num_blocks_is_9_865: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $247,%r15d + jae .L_16_blocks_overflow_882 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %xmm27,%xmm3,%xmm4 + jmp .L_16_blocks_ok_882 + +.L_16_blocks_overflow_882: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 +.L_16_blocks_ok_882: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %xmm30,%xmm4,%xmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %xmm20,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %xmm29,%xmm20,%xmm20 + vextracti32x4 $0,%zmm20,%xmm7 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_883 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_883 +.L_small_initial_partial_block_883: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_883: + + orq %r8,%r8 + je .L_after_reduction_883 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_883: + jmp .L_last_blocks_done_865 +.L_last_num_blocks_is_10_865: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $246,%r15d + jae .L_16_blocks_overflow_884 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %ymm27,%ymm3,%ymm4 + jmp .L_16_blocks_ok_884 + +.L_16_blocks_overflow_884: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 +.L_16_blocks_ok_884: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %ymm30,%ymm4,%ymm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %ymm20,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %ymm29,%ymm20,%ymm20 + vextracti32x4 $1,%zmm20,%xmm7 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_885 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_885 +.L_small_initial_partial_block_885: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_885: + + orq %r8,%r8 + je .L_after_reduction_885 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_885: + jmp .L_last_blocks_done_865 +.L_last_num_blocks_is_11_865: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $245,%r15d + jae .L_16_blocks_overflow_886 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_886 + +.L_16_blocks_overflow_886: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_886: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vextracti32x4 $2,%zmm20,%xmm7 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_887 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_887 +.L_small_initial_partial_block_887: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_887: + + orq %r8,%r8 + je .L_after_reduction_887 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_887: + jmp .L_last_blocks_done_865 +.L_last_num_blocks_is_12_865: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $244,%r15d + jae .L_16_blocks_overflow_888 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_888 + +.L_16_blocks_overflow_888: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_888: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vextracti32x4 $3,%zmm20,%xmm7 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_889 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_889 +.L_small_initial_partial_block_889: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_889: + + orq %r8,%r8 + je .L_after_reduction_889 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_889: + jmp .L_last_blocks_done_865 +.L_last_num_blocks_is_13_865: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $243,%r15d + jae .L_16_blocks_overflow_890 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %xmm27,%xmm4,%xmm5 + jmp .L_16_blocks_ok_890 + +.L_16_blocks_overflow_890: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 +.L_16_blocks_ok_890: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %xmm30,%xmm5,%xmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %xmm21,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %xmm29,%xmm21,%xmm21 + vextracti32x4 $0,%zmm21,%xmm7 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_891 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_891 +.L_small_initial_partial_block_891: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_891: + + orq %r8,%r8 + je .L_after_reduction_891 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_891: + jmp .L_last_blocks_done_865 +.L_last_num_blocks_is_14_865: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $242,%r15d + jae .L_16_blocks_overflow_892 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %ymm27,%ymm4,%ymm5 + jmp .L_16_blocks_ok_892 + +.L_16_blocks_overflow_892: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 +.L_16_blocks_ok_892: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %ymm30,%ymm5,%ymm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %ymm21,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %ymm29,%ymm21,%ymm21 + vextracti32x4 $1,%zmm21,%xmm7 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_893 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_893 +.L_small_initial_partial_block_893: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_893: + + orq %r8,%r8 + je .L_after_reduction_893 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_893: + jmp .L_last_blocks_done_865 +.L_last_num_blocks_is_15_865: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $241,%r15d + jae .L_16_blocks_overflow_894 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_894 + +.L_16_blocks_overflow_894: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_894: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %zmm29,%zmm21,%zmm21 + vextracti32x4 $2,%zmm21,%xmm7 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_895 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_895 +.L_small_initial_partial_block_895: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_895: + + orq %r8,%r8 + je .L_after_reduction_895 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_895: + jmp .L_last_blocks_done_865 +.L_last_num_blocks_is_16_865: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $240,%r15d + jae .L_16_blocks_overflow_896 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_896 + +.L_16_blocks_overflow_896: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_896: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %zmm29,%zmm21,%zmm21 + vextracti32x4 $3,%zmm21,%xmm7 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_897: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_897: + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_897: + jmp .L_last_blocks_done_865 +.L_last_num_blocks_is_0_865: + vmovdqa64 768(%rsp),%zmm13 + vpxorq %zmm14,%zmm13,%zmm13 + vmovdqu64 0(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 832(%rsp),%zmm13 + vmovdqu64 64(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpxorq %zmm10,%zmm4,%zmm26 + vpxorq %zmm6,%zmm0,%zmm24 + vpxorq %zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 896(%rsp),%zmm13 + vmovdqu64 128(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 960(%rsp),%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + +.L_last_blocks_done_865: + vpshufb %xmm29,%xmm2,%xmm2 + jmp .L_ghash_done_821 +.L_encrypt_16_blocks_821: + cmpb $240,%r15b + jae .L_16_blocks_overflow_898 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_898 +.L_16_blocks_overflow_898: + vpshufb %zmm29,%zmm2,%zmm2 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_898: + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp),%zmm1 + + + + + vshufi64x2 $255,%zmm5,%zmm5,%zmm2 + addb $16,%r15b + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + + + + + + + + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm6 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm6 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + + + + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21 + + + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm15,%zmm10,%zmm26 + vpxorq %zmm12,%zmm6,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + + + + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + + + + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1) + vpshufb %zmm29,%zmm17,%zmm0 + vpshufb %zmm29,%zmm19,%zmm3 + vpshufb %zmm29,%zmm20,%zmm4 + vpshufb %zmm29,%zmm21,%zmm5 + vmovdqa64 %zmm0,1280(%rsp) + vmovdqa64 %zmm3,1344(%rsp) + vmovdqa64 %zmm4,1408(%rsp) + vmovdqa64 %zmm5,1472(%rsp) + vmovdqa64 1024(%rsp),%zmm13 + vmovdqu64 256(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1088(%rsp),%zmm13 + vmovdqu64 320(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 1152(%rsp),%zmm13 + vmovdqu64 384(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1216(%rsp),%zmm13 + vmovdqu64 448(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + subq $256,%r8 + addq $256,%r11 + movl %r8d,%r10d + addl $15,%r10d + shrl $4,%r10d + je .L_last_num_blocks_is_0_899 + + cmpl $8,%r10d + je .L_last_num_blocks_is_8_899 + jb .L_last_num_blocks_is_7_1_899 + + + cmpl $12,%r10d + je .L_last_num_blocks_is_12_899 + jb .L_last_num_blocks_is_11_9_899 + + + cmpl $15,%r10d + je .L_last_num_blocks_is_15_899 + ja .L_last_num_blocks_is_16_899 + cmpl $14,%r10d + je .L_last_num_blocks_is_14_899 + jmp .L_last_num_blocks_is_13_899 + +.L_last_num_blocks_is_11_9_899: + + cmpl $10,%r10d + je .L_last_num_blocks_is_10_899 + ja .L_last_num_blocks_is_11_899 + jmp .L_last_num_blocks_is_9_899 + +.L_last_num_blocks_is_7_1_899: + cmpl $4,%r10d + je .L_last_num_blocks_is_4_899 + jb .L_last_num_blocks_is_3_1_899 + + cmpl $6,%r10d + ja .L_last_num_blocks_is_7_899 + je .L_last_num_blocks_is_6_899 + jmp .L_last_num_blocks_is_5_899 + +.L_last_num_blocks_is_3_1_899: + + cmpl $2,%r10d + ja .L_last_num_blocks_is_3_899 + je .L_last_num_blocks_is_2_899 +.L_last_num_blocks_is_1_899: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $255,%r15d + jae .L_16_blocks_overflow_900 + vpaddd %xmm28,%xmm2,%xmm0 + jmp .L_16_blocks_ok_900 + +.L_16_blocks_overflow_900: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %xmm29,%xmm0,%xmm0 +.L_16_blocks_ok_900: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $0,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %xmm31,%xmm0,%xmm0 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %xmm30,%xmm0,%xmm0 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %xmm29,%xmm17,%xmm17 + vextracti32x4 $0,%zmm17,%xmm7 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_901 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_901 +.L_small_initial_partial_block_901: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + + + + + + + + + + + + vpxorq %xmm7,%xmm14,%xmm14 + + jmp .L_after_reduction_901 +.L_small_initial_compute_done_901: +.L_after_reduction_901: + jmp .L_last_blocks_done_899 +.L_last_num_blocks_is_2_899: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $254,%r15d + jae .L_16_blocks_overflow_902 + vpaddd %ymm28,%ymm2,%ymm0 + jmp .L_16_blocks_ok_902 + +.L_16_blocks_overflow_902: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %ymm29,%ymm0,%ymm0 +.L_16_blocks_ok_902: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $1,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %ymm31,%ymm0,%ymm0 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %ymm30,%ymm0,%ymm0 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %ymm29,%ymm17,%ymm17 + vextracti32x4 $1,%zmm17,%xmm7 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_903 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_903 +.L_small_initial_partial_block_903: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_903: + + orq %r8,%r8 + je .L_after_reduction_903 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_903: + jmp .L_last_blocks_done_899 +.L_last_num_blocks_is_3_899: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $253,%r15d + jae .L_16_blocks_overflow_904 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_904 + +.L_16_blocks_overflow_904: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_904: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $2,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vextracti32x4 $2,%zmm17,%xmm7 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_905 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_905 +.L_small_initial_partial_block_905: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_905: + + orq %r8,%r8 + je .L_after_reduction_905 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_905: + jmp .L_last_blocks_done_899 +.L_last_num_blocks_is_4_899: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $252,%r15d + jae .L_16_blocks_overflow_906 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_906 + +.L_16_blocks_overflow_906: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_906: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $3,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vextracti32x4 $3,%zmm17,%xmm7 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_907 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_907 +.L_small_initial_partial_block_907: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_907: + + orq %r8,%r8 + je .L_after_reduction_907 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_907: + jmp .L_last_blocks_done_899 +.L_last_num_blocks_is_5_899: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $251,%r15d + jae .L_16_blocks_overflow_908 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %xmm27,%xmm0,%xmm3 + jmp .L_16_blocks_ok_908 + +.L_16_blocks_overflow_908: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 +.L_16_blocks_ok_908: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $0,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %xmm30,%xmm3,%xmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %xmm19,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %xmm29,%xmm19,%xmm19 + vextracti32x4 $0,%zmm19,%xmm7 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_909 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_909 +.L_small_initial_partial_block_909: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_909: + + orq %r8,%r8 + je .L_after_reduction_909 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_909: + jmp .L_last_blocks_done_899 +.L_last_num_blocks_is_6_899: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $250,%r15d + jae .L_16_blocks_overflow_910 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %ymm27,%ymm0,%ymm3 + jmp .L_16_blocks_ok_910 + +.L_16_blocks_overflow_910: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 +.L_16_blocks_ok_910: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $1,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %ymm30,%ymm3,%ymm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %ymm29,%ymm19,%ymm19 + vextracti32x4 $1,%zmm19,%xmm7 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_911 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_911 +.L_small_initial_partial_block_911: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_911: + + orq %r8,%r8 + je .L_after_reduction_911 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_911: + jmp .L_last_blocks_done_899 +.L_last_num_blocks_is_7_899: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $249,%r15d + jae .L_16_blocks_overflow_912 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_912 + +.L_16_blocks_overflow_912: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_912: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $2,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vextracti32x4 $2,%zmm19,%xmm7 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_913 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_913 +.L_small_initial_partial_block_913: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_913: + + orq %r8,%r8 + je .L_after_reduction_913 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_913: + jmp .L_last_blocks_done_899 +.L_last_num_blocks_is_8_899: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $248,%r15d + jae .L_16_blocks_overflow_914 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_914 + +.L_16_blocks_overflow_914: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_914: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $3,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vextracti32x4 $3,%zmm19,%xmm7 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_915 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_915 +.L_small_initial_partial_block_915: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_915: + + orq %r8,%r8 + je .L_after_reduction_915 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_915: + jmp .L_last_blocks_done_899 +.L_last_num_blocks_is_9_899: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $247,%r15d + jae .L_16_blocks_overflow_916 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %xmm27,%xmm3,%xmm4 + jmp .L_16_blocks_ok_916 + +.L_16_blocks_overflow_916: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 +.L_16_blocks_ok_916: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $0,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %xmm30,%xmm4,%xmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %xmm20,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %xmm29,%xmm20,%xmm20 + vextracti32x4 $0,%zmm20,%xmm7 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_917 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_917 +.L_small_initial_partial_block_917: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_917: + + orq %r8,%r8 + je .L_after_reduction_917 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_917: + jmp .L_last_blocks_done_899 +.L_last_num_blocks_is_10_899: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $246,%r15d + jae .L_16_blocks_overflow_918 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %ymm27,%ymm3,%ymm4 + jmp .L_16_blocks_ok_918 + +.L_16_blocks_overflow_918: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 +.L_16_blocks_ok_918: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $1,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %ymm30,%ymm4,%ymm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %ymm20,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %ymm29,%ymm20,%ymm20 + vextracti32x4 $1,%zmm20,%xmm7 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_919 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_919 +.L_small_initial_partial_block_919: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_919: + + orq %r8,%r8 + je .L_after_reduction_919 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_919: + jmp .L_last_blocks_done_899 +.L_last_num_blocks_is_11_899: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $245,%r15d + jae .L_16_blocks_overflow_920 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_920 + +.L_16_blocks_overflow_920: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_920: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $2,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vextracti32x4 $2,%zmm20,%xmm7 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_921 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_921 +.L_small_initial_partial_block_921: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_921: + + orq %r8,%r8 + je .L_after_reduction_921 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_921: + jmp .L_last_blocks_done_899 +.L_last_num_blocks_is_12_899: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $244,%r15d + jae .L_16_blocks_overflow_922 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_922 + +.L_16_blocks_overflow_922: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_922: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $3,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vextracti32x4 $3,%zmm20,%xmm7 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_923 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_923 +.L_small_initial_partial_block_923: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_923: + + orq %r8,%r8 + je .L_after_reduction_923 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_923: + jmp .L_last_blocks_done_899 +.L_last_num_blocks_is_13_899: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $243,%r15d + jae .L_16_blocks_overflow_924 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %xmm27,%xmm4,%xmm5 + jmp .L_16_blocks_ok_924 + +.L_16_blocks_overflow_924: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 +.L_16_blocks_ok_924: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $0,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %xmm30,%xmm5,%xmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %xmm21,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %xmm29,%xmm21,%xmm21 + vextracti32x4 $0,%zmm21,%xmm7 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_925 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_925 +.L_small_initial_partial_block_925: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_925: + + orq %r8,%r8 + je .L_after_reduction_925 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_925: + jmp .L_last_blocks_done_899 +.L_last_num_blocks_is_14_899: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $242,%r15d + jae .L_16_blocks_overflow_926 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %ymm27,%ymm4,%ymm5 + jmp .L_16_blocks_ok_926 + +.L_16_blocks_overflow_926: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 +.L_16_blocks_ok_926: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $1,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %ymm30,%ymm5,%ymm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %ymm21,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %ymm29,%ymm21,%ymm21 + vextracti32x4 $1,%zmm21,%xmm7 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_927 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_927 +.L_small_initial_partial_block_927: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_927: + + orq %r8,%r8 + je .L_after_reduction_927 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_927: + jmp .L_last_blocks_done_899 +.L_last_num_blocks_is_15_899: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $241,%r15d + jae .L_16_blocks_overflow_928 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_928 + +.L_16_blocks_overflow_928: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_928: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $2,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %zmm29,%zmm21,%zmm21 + vextracti32x4 $2,%zmm21,%xmm7 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_929 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_929 +.L_small_initial_partial_block_929: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_929: + + orq %r8,%r8 + je .L_after_reduction_929 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_929: + jmp .L_last_blocks_done_899 +.L_last_num_blocks_is_16_899: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $240,%r15d + jae .L_16_blocks_overflow_930 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_930 + +.L_16_blocks_overflow_930: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_930: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vmovdqa64 1280(%rsp),%zmm8 + vmovdqu64 512(%rsp),%zmm1 + vextracti32x4 $3,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 576(%rsp),%zmm18 + vmovdqa64 1344(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 640(%rsp),%zmm1 + vmovdqa64 1408(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 704(%rsp),%zmm18 + vmovdqa64 1472(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpternlogq $0x96,%zmm12,%zmm24,%zmm14 + vpternlogq $0x96,%zmm13,%zmm25,%zmm7 + vpternlogq $0x96,%zmm15,%zmm26,%zmm10 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vpsrldq $8,%zmm10,%zmm15 + vpslldq $8,%zmm10,%zmm10 + + vmovdqa64 POLY2(%rip),%xmm16 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vpxorq %zmm15,%zmm14,%zmm14 + vpxorq %zmm10,%zmm7,%zmm7 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vextracti64x4 $1,%zmm14,%ymm12 + vpxorq %ymm12,%ymm14,%ymm14 + vextracti32x4 $1,%ymm14,%xmm12 + vpxorq %xmm12,%xmm14,%xmm14 + vextracti64x4 $1,%zmm7,%ymm13 + vpxorq %ymm13,%ymm7,%ymm7 + vextracti32x4 $1,%ymm7,%xmm13 + vpxorq %xmm13,%xmm7,%xmm7 + vbroadcastf64x2 176(%rdi),%zmm31 + vpclmulqdq $0x01,%xmm7,%xmm16,%xmm13 + vpslldq $8,%xmm13,%xmm13 + vpxorq %xmm13,%xmm7,%xmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vpclmulqdq $0x00,%xmm13,%xmm16,%xmm12 + vpsrldq $4,%xmm12,%xmm12 + vpclmulqdq $0x10,%xmm13,%xmm16,%xmm15 + vpslldq $4,%xmm15,%xmm15 + + vpternlogq $0x96,%xmm12,%xmm15,%xmm14 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %zmm29,%zmm21,%zmm21 + vextracti32x4 $3,%zmm21,%xmm7 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_931: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vpxorq %zmm14,%zmm17,%zmm17 + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm31,%zmm5,%zmm5 + vpxorq %zmm8,%zmm0,%zmm0 + vpxorq %zmm22,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_931: + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_931: + jmp .L_last_blocks_done_899 +.L_last_num_blocks_is_0_899: + vmovdqa64 1280(%rsp),%zmm13 + vmovdqu64 512(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1344(%rsp),%zmm13 + vmovdqu64 576(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 1408(%rsp),%zmm13 + vmovdqu64 640(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 1472(%rsp),%zmm13 + vmovdqu64 704(%rsp),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + +.L_last_blocks_done_899: + vpshufb %xmm29,%xmm2,%xmm2 + jmp .L_ghash_done_821 + +.L_message_below_32_blocks_821: + + + subq $256,%r8 + addq $256,%r11 + movl %r8d,%r10d + testq %r14,%r14 + jnz .L_skip_hkeys_precomputation_932 + vmovdqu64 640(%rsp),%zmm3 + + + vshufi64x2 $0x00,%zmm3,%zmm3,%zmm3 + + vmovdqu64 576(%rsp),%zmm4 + vmovdqu64 512(%rsp),%zmm5 + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,448(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,384(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm4,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm4,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm4,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm4,%zmm4 + vpxorq %zmm10,%zmm4,%zmm4 + + vpsrldq $8,%zmm4,%zmm10 + vpslldq $8,%zmm4,%zmm4 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm4,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm4,%zmm4 + + + + vpclmulqdq $0x00,%zmm4,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm4,%zmm10,%zmm4 + vpslldq $4,%zmm4,%zmm4 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm4 + + vmovdqu64 %zmm4,320(%rsp) + + vpclmulqdq $0x11,%zmm3,%zmm5,%zmm6 + vpclmulqdq $0x00,%zmm3,%zmm5,%zmm7 + vpclmulqdq $0x01,%zmm3,%zmm5,%zmm10 + vpclmulqdq $0x10,%zmm3,%zmm5,%zmm5 + vpxorq %zmm10,%zmm5,%zmm5 + + vpsrldq $8,%zmm5,%zmm10 + vpslldq $8,%zmm5,%zmm5 + vpxorq %zmm10,%zmm6,%zmm6 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vmovdqu64 POLY2(%rip),%zmm10 + + vpclmulqdq $0x01,%zmm5,%zmm10,%zmm7 + vpslldq $8,%zmm7,%zmm7 + vpxorq %zmm7,%zmm5,%zmm5 + + + + vpclmulqdq $0x00,%zmm5,%zmm10,%zmm7 + vpsrldq $4,%zmm7,%zmm7 + vpclmulqdq $0x10,%zmm5,%zmm10,%zmm5 + vpslldq $4,%zmm5,%zmm5 + + vpternlogq $0x96,%zmm7,%zmm6,%zmm5 + + vmovdqu64 %zmm5,256(%rsp) +.L_skip_hkeys_precomputation_932: + movq $1,%r14 + andl $~15,%r10d + movl $512,%ebx + subl %r10d,%ebx + movl %r8d,%r10d + addl $15,%r10d + shrl $4,%r10d + je .L_last_num_blocks_is_0_933 + + cmpl $8,%r10d + je .L_last_num_blocks_is_8_933 + jb .L_last_num_blocks_is_7_1_933 + + + cmpl $12,%r10d + je .L_last_num_blocks_is_12_933 + jb .L_last_num_blocks_is_11_9_933 + + + cmpl $15,%r10d + je .L_last_num_blocks_is_15_933 + ja .L_last_num_blocks_is_16_933 + cmpl $14,%r10d + je .L_last_num_blocks_is_14_933 + jmp .L_last_num_blocks_is_13_933 + +.L_last_num_blocks_is_11_9_933: + + cmpl $10,%r10d + je .L_last_num_blocks_is_10_933 + ja .L_last_num_blocks_is_11_933 + jmp .L_last_num_blocks_is_9_933 + +.L_last_num_blocks_is_7_1_933: + cmpl $4,%r10d + je .L_last_num_blocks_is_4_933 + jb .L_last_num_blocks_is_3_1_933 + + cmpl $6,%r10d + ja .L_last_num_blocks_is_7_933 + je .L_last_num_blocks_is_6_933 + jmp .L_last_num_blocks_is_5_933 + +.L_last_num_blocks_is_3_1_933: + + cmpl $2,%r10d + ja .L_last_num_blocks_is_3_933 + je .L_last_num_blocks_is_2_933 +.L_last_num_blocks_is_1_933: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $255,%r15d + jae .L_16_blocks_overflow_934 + vpaddd %xmm28,%xmm2,%xmm0 + jmp .L_16_blocks_ok_934 + +.L_16_blocks_overflow_934: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %xmm29,%xmm0,%xmm0 +.L_16_blocks_ok_934: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%xmm17{%k1}{z} + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %xmm30,%xmm0,%xmm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %xmm31,%xmm0,%xmm0 + vaesenclast %xmm30,%xmm0,%xmm0 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %xmm29,%xmm17,%xmm17 + vextracti32x4 $0,%zmm17,%xmm7 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_935 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_935 +.L_small_initial_partial_block_935: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm0 + + + vpclmulqdq $0x01,%xmm25,%xmm0,%xmm3 + vpslldq $8,%xmm3,%xmm3 + vpxorq %xmm3,%xmm25,%xmm3 + + + vpclmulqdq $0x00,%xmm3,%xmm0,%xmm4 + vpsrldq $4,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm4,%xmm14 + + + + + + + + + + + + + vpxorq %xmm7,%xmm14,%xmm14 + + jmp .L_after_reduction_935 +.L_small_initial_compute_done_935: +.L_after_reduction_935: + jmp .L_last_blocks_done_933 +.L_last_num_blocks_is_2_933: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $254,%r15d + jae .L_16_blocks_overflow_936 + vpaddd %ymm28,%ymm2,%ymm0 + jmp .L_16_blocks_ok_936 + +.L_16_blocks_overflow_936: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %ymm29,%ymm0,%ymm0 +.L_16_blocks_ok_936: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%ymm17{%k1}{z} + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %ymm30,%ymm0,%ymm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %ymm31,%ymm0,%ymm0 + vaesenclast %ymm30,%ymm0,%ymm0 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %ymm29,%ymm17,%ymm17 + vextracti32x4 $1,%zmm17,%xmm7 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_937 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_937 +.L_small_initial_partial_block_937: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm17,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm17,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm17,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm17,%xmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_937: + + orq %r8,%r8 + je .L_after_reduction_937 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_937: + jmp .L_last_blocks_done_933 +.L_last_num_blocks_is_3_933: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $253,%r15d + jae .L_16_blocks_overflow_938 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_938 + +.L_16_blocks_overflow_938: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_938: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vextracti32x4 $2,%zmm17,%xmm7 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_939 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_939 +.L_small_initial_partial_block_939: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm17,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm17,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm17,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm17,%ymm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_939: + + orq %r8,%r8 + je .L_after_reduction_939 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_939: + jmp .L_last_blocks_done_933 +.L_last_num_blocks_is_4_933: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $252,%r15d + jae .L_16_blocks_overflow_940 + vpaddd %zmm28,%zmm2,%zmm0 + jmp .L_16_blocks_ok_940 + +.L_16_blocks_overflow_940: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpshufb %zmm29,%zmm0,%zmm0 +.L_16_blocks_ok_940: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm0,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm0,%zmm0 + vpxorq %zmm17,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm17,%zmm17{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vextracti32x4 $3,%zmm17,%xmm7 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_941 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_941 +.L_small_initial_partial_block_941: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpxorq %zmm26,%zmm4,%zmm4 + vpxorq %zmm24,%zmm0,%zmm0 + vpxorq %zmm25,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_941: + + orq %r8,%r8 + je .L_after_reduction_941 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_941: + jmp .L_last_blocks_done_933 +.L_last_num_blocks_is_5_933: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $251,%r15d + jae .L_16_blocks_overflow_942 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %xmm27,%xmm0,%xmm3 + jmp .L_16_blocks_ok_942 + +.L_16_blocks_overflow_942: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 +.L_16_blocks_ok_942: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%xmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %xmm30,%xmm3,%xmm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %xmm31,%xmm3,%xmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %xmm30,%xmm3,%xmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %xmm19,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %xmm29,%xmm19,%xmm19 + vextracti32x4 $0,%zmm19,%xmm7 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_943 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_943 +.L_small_initial_partial_block_943: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_943: + + orq %r8,%r8 + je .L_after_reduction_943 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_943: + jmp .L_last_blocks_done_933 +.L_last_num_blocks_is_6_933: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $250,%r15d + jae .L_16_blocks_overflow_944 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %ymm27,%ymm0,%ymm3 + jmp .L_16_blocks_ok_944 + +.L_16_blocks_overflow_944: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 +.L_16_blocks_ok_944: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%ymm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %ymm30,%ymm3,%ymm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %ymm31,%ymm3,%ymm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %ymm30,%ymm3,%ymm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %ymm29,%ymm19,%ymm19 + vextracti32x4 $1,%zmm19,%xmm7 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_945 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_945 +.L_small_initial_partial_block_945: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm19,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm19,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm19,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm19,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_945: + + orq %r8,%r8 + je .L_after_reduction_945 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_945: + jmp .L_last_blocks_done_933 +.L_last_num_blocks_is_7_933: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $249,%r15d + jae .L_16_blocks_overflow_946 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_946 + +.L_16_blocks_overflow_946: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_946: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vextracti32x4 $2,%zmm19,%xmm7 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_947 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_947 +.L_small_initial_partial_block_947: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm19,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm19,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm19,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm19,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_947: + + orq %r8,%r8 + je .L_after_reduction_947 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_947: + jmp .L_last_blocks_done_933 +.L_last_num_blocks_is_8_933: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $64,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $248,%r15d + jae .L_16_blocks_overflow_948 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + jmp .L_16_blocks_ok_948 + +.L_16_blocks_overflow_948: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 +.L_16_blocks_ok_948: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm3,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm19,%zmm19{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vextracti32x4 $3,%zmm19,%xmm7 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_949 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_949 +.L_small_initial_partial_block_949: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_949: + + orq %r8,%r8 + je .L_after_reduction_949 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_949: + jmp .L_last_blocks_done_933 +.L_last_num_blocks_is_9_933: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $247,%r15d + jae .L_16_blocks_overflow_950 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %xmm27,%xmm3,%xmm4 + jmp .L_16_blocks_ok_950 + +.L_16_blocks_overflow_950: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 +.L_16_blocks_ok_950: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%xmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %xmm30,%xmm4,%xmm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %xmm31,%xmm4,%xmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %xmm30,%xmm4,%xmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %xmm20,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %xmm29,%xmm20,%xmm20 + vextracti32x4 $0,%zmm20,%xmm7 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_951 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_951 +.L_small_initial_partial_block_951: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_951: + + orq %r8,%r8 + je .L_after_reduction_951 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_951: + jmp .L_last_blocks_done_933 +.L_last_num_blocks_is_10_933: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $246,%r15d + jae .L_16_blocks_overflow_952 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %ymm27,%ymm3,%ymm4 + jmp .L_16_blocks_ok_952 + +.L_16_blocks_overflow_952: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 +.L_16_blocks_ok_952: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%ymm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %ymm30,%ymm4,%ymm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %ymm31,%ymm4,%ymm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %ymm30,%ymm4,%ymm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %ymm20,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %ymm29,%ymm20,%ymm20 + vextracti32x4 $1,%zmm20,%xmm7 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_953 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_953 +.L_small_initial_partial_block_953: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm20,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm20,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm20,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm20,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_953: + + orq %r8,%r8 + je .L_after_reduction_953 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_953: + jmp .L_last_blocks_done_933 +.L_last_num_blocks_is_11_933: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $245,%r15d + jae .L_16_blocks_overflow_954 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_954 + +.L_16_blocks_overflow_954: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_954: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vextracti32x4 $2,%zmm20,%xmm7 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_955 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_955 +.L_small_initial_partial_block_955: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm20,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm20,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm20,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm20,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_955: + + orq %r8,%r8 + je .L_after_reduction_955 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_955: + jmp .L_last_blocks_done_933 +.L_last_num_blocks_is_12_933: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $128,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $244,%r15d + jae .L_16_blocks_overflow_956 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + jmp .L_16_blocks_ok_956 + +.L_16_blocks_overflow_956: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 +.L_16_blocks_ok_956: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm4,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm20,%zmm20{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vextracti32x4 $3,%zmm20,%xmm7 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_957 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_957 +.L_small_initial_partial_block_957: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vpxorq %zmm8,%zmm0,%zmm8 + vpxorq %zmm22,%zmm3,%zmm22 + vpxorq %zmm30,%zmm4,%zmm30 + vpxorq %zmm31,%zmm5,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_957: + + orq %r8,%r8 + je .L_after_reduction_957 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_957: + jmp .L_last_blocks_done_933 +.L_last_num_blocks_is_13_933: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $243,%r15d + jae .L_16_blocks_overflow_958 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %xmm27,%xmm4,%xmm5 + jmp .L_16_blocks_ok_958 + +.L_16_blocks_overflow_958: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 +.L_16_blocks_ok_958: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $0,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%xmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %xmm30,%xmm5,%xmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %xmm31,%xmm5,%xmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %xmm30,%xmm5,%xmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %xmm21,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %xmm29,%xmm21,%xmm21 + vextracti32x4 $0,%zmm21,%xmm7 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_959 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_959 +.L_small_initial_partial_block_959: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 160(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 224(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 288(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + + vpxorq %zmm26,%zmm30,%zmm30 + vpxorq %zmm24,%zmm8,%zmm8 + vpxorq %zmm25,%zmm22,%zmm22 + + vpxorq %zmm31,%zmm30,%zmm30 + vpsrldq $8,%zmm30,%zmm4 + vpslldq $8,%zmm30,%zmm5 + vpxorq %zmm4,%zmm8,%zmm0 + vpxorq %zmm5,%zmm22,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_959: + + orq %r8,%r8 + je .L_after_reduction_959 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_959: + jmp .L_last_blocks_done_933 +.L_last_num_blocks_is_14_933: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $242,%r15d + jae .L_16_blocks_overflow_960 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %ymm27,%ymm4,%ymm5 + jmp .L_16_blocks_ok_960 + +.L_16_blocks_overflow_960: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 +.L_16_blocks_ok_960: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $1,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%ymm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %ymm30,%ymm5,%ymm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %ymm31,%ymm5,%ymm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %ymm30,%ymm5,%ymm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %ymm21,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %ymm29,%ymm21,%ymm21 + vextracti32x4 $1,%zmm21,%xmm7 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_961 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_961 +.L_small_initial_partial_block_961: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 144(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 208(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 272(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 336(%rsi),%xmm1 + vpclmulqdq $0x01,%xmm1,%xmm21,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm21,%xmm5 + vpclmulqdq $0x11,%xmm1,%xmm21,%xmm0 + vpclmulqdq $0x00,%xmm1,%xmm21,%xmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_961: + + orq %r8,%r8 + je .L_after_reduction_961 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_961: + jmp .L_last_blocks_done_933 +.L_last_num_blocks_is_15_933: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $241,%r15d + jae .L_16_blocks_overflow_962 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_962 + +.L_16_blocks_overflow_962: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_962: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $2,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %zmm29,%zmm21,%zmm21 + vextracti32x4 $2,%zmm21,%xmm7 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_963 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_963 +.L_small_initial_partial_block_963: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 128(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 192(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 256(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 320(%rsi),%ymm1 + vpclmulqdq $0x01,%ymm1,%ymm21,%ymm4 + vpclmulqdq $0x10,%ymm1,%ymm21,%ymm5 + vpclmulqdq $0x11,%ymm1,%ymm21,%ymm0 + vpclmulqdq $0x00,%ymm1,%ymm21,%ymm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_963: + + orq %r8,%r8 + je .L_after_reduction_963 + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_963: + jmp .L_last_blocks_done_933 +.L_last_num_blocks_is_16_933: + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%rax + subq $192,%rax + kmovq (%r10,%rax,8),%k1 + cmpl $240,%r15d + jae .L_16_blocks_overflow_964 + vpaddd %zmm28,%zmm2,%zmm0 + vpaddd %zmm27,%zmm0,%zmm3 + vpaddd %zmm27,%zmm3,%zmm4 + vpaddd %zmm27,%zmm4,%zmm5 + jmp .L_16_blocks_ok_964 + +.L_16_blocks_overflow_964: + vpshufb %zmm29,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vmovdqa64 ddq_add_4444(%rip),%zmm5 + vpaddd %zmm5,%zmm0,%zmm3 + vpaddd %zmm5,%zmm3,%zmm4 + vpaddd %zmm5,%zmm4,%zmm5 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 +.L_16_blocks_ok_964: + + + + + vbroadcastf64x2 0(%rdi),%zmm30 + vpxorq 768(%rsp),%zmm14,%zmm8 + vmovdqu64 0(%rsp,%rbx,1),%zmm1 + vextracti32x4 $3,%zmm5,%xmm2 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + + + vbroadcastf64x2 16(%rdi),%zmm31 + vmovdqu64 64(%rsp,%rbx,1),%zmm18 + vmovdqa64 832(%rsp),%zmm22 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm30,%zmm3,%zmm3 + vpxorq %zmm30,%zmm4,%zmm4 + vpxorq %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm30 + + + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm14 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm7 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm10 + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm11 + vmovdqu64 128(%rsp,%rbx,1),%zmm1 + vmovdqa64 896(%rsp),%zmm8 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm18 + vmovdqa64 960(%rsp),%zmm22 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm30 + + + vpclmulqdq $0x10,%zmm1,%zmm8,%zmm20 + vpclmulqdq $0x01,%zmm1,%zmm8,%zmm21 + vpclmulqdq $0x11,%zmm1,%zmm8,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm8,%zmm19 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm31 + + + vpternlogq $0x96,%zmm17,%zmm12,%zmm14 + vpternlogq $0x96,%zmm19,%zmm13,%zmm7 + vpternlogq $0x96,%zmm21,%zmm16,%zmm11 + vpternlogq $0x96,%zmm20,%zmm15,%zmm10 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm30 + vmovdqu8 0(%rcx,%r11,1),%zmm17 + vmovdqu8 64(%rcx,%r11,1),%zmm19 + vmovdqu8 128(%rcx,%r11,1),%zmm20 + vmovdqu8 192(%rcx,%r11,1),%zmm21{%k1}{z} + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm31 + + + vpclmulqdq $0x10,%zmm18,%zmm22,%zmm15 + vpclmulqdq $0x01,%zmm18,%zmm22,%zmm16 + vpclmulqdq $0x11,%zmm18,%zmm22,%zmm12 + vpclmulqdq $0x00,%zmm18,%zmm22,%zmm13 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm30 + vpternlogq $0x96,%zmm16,%zmm11,%zmm10 + vpxorq %zmm12,%zmm14,%zmm24 + vpxorq %zmm13,%zmm7,%zmm25 + vpxorq %zmm15,%zmm10,%zmm26 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm31 + vaesenc %zmm30,%zmm0,%zmm0 + vaesenc %zmm30,%zmm3,%zmm3 + vaesenc %zmm30,%zmm4,%zmm4 + vaesenc %zmm30,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm30 + vaesenc %zmm31,%zmm0,%zmm0 + vaesenc %zmm31,%zmm3,%zmm3 + vaesenc %zmm31,%zmm4,%zmm4 + vaesenc %zmm31,%zmm5,%zmm5 + vaesenclast %zmm30,%zmm0,%zmm0 + vaesenclast %zmm30,%zmm3,%zmm3 + vaesenclast %zmm30,%zmm4,%zmm4 + vaesenclast %zmm30,%zmm5,%zmm5 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vpxorq %zmm20,%zmm4,%zmm4 + vpxorq %zmm21,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm11 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm21,%zmm21{%k1}{z} + vpshufb %zmm29,%zmm17,%zmm17 + vpshufb %zmm29,%zmm19,%zmm19 + vpshufb %zmm29,%zmm20,%zmm20 + vpshufb %zmm29,%zmm21,%zmm21 + vextracti32x4 $3,%zmm21,%xmm7 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_965: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm11,16(%rsi) + vmovdqu64 112(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm17,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm17,%zmm3 + vpclmulqdq $0x01,%zmm1,%zmm17,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm17,%zmm5 + vmovdqu64 176(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm19,%zmm8 + vpclmulqdq $0x00,%zmm1,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm19,%zmm30 + vpclmulqdq $0x10,%zmm1,%zmm19,%zmm31 + vmovdqu64 240(%rsi),%zmm1 + vpclmulqdq $0x11,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x00,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm0,%zmm17,%zmm8 + vpternlogq $0x96,%zmm3,%zmm19,%zmm22 + vpclmulqdq $0x01,%zmm1,%zmm20,%zmm17 + vpclmulqdq $0x10,%zmm1,%zmm20,%zmm19 + vpternlogq $0x96,%zmm4,%zmm17,%zmm30 + vpternlogq $0x96,%zmm5,%zmm19,%zmm31 + vmovdqu64 304(%rsi),%ymm1 + vinserti64x2 $2,336(%rsi),%zmm1,%zmm1 + vpclmulqdq $0x01,%zmm1,%zmm21,%zmm4 + vpclmulqdq $0x10,%zmm1,%zmm21,%zmm5 + vpclmulqdq $0x11,%zmm1,%zmm21,%zmm0 + vpclmulqdq $0x00,%zmm1,%zmm21,%zmm3 + + vpxorq %zmm30,%zmm4,%zmm4 + vpternlogq $0x96,%zmm31,%zmm26,%zmm5 + vpternlogq $0x96,%zmm8,%zmm24,%zmm0 + vpternlogq $0x96,%zmm22,%zmm25,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm30 + vpslldq $8,%zmm4,%zmm31 + vpxorq %zmm30,%zmm0,%zmm0 + vpxorq %zmm31,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm30 + vpxorq %ymm30,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm30 + vpxorq %xmm30,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm31 + vpxorq %ymm31,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm31 + vpxorq %xmm31,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm1 + + + vpclmulqdq $0x01,%xmm3,%xmm1,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm1,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm1,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_965: + vpxorq %xmm7,%xmm14,%xmm14 +.L_after_reduction_965: + jmp .L_last_blocks_done_933 +.L_last_num_blocks_is_0_933: + vmovdqa64 768(%rsp),%zmm13 + vpxorq %zmm14,%zmm13,%zmm13 + vmovdqu64 0(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 832(%rsp),%zmm13 + vmovdqu64 64(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + vpxorq %zmm10,%zmm4,%zmm26 + vpxorq %zmm6,%zmm0,%zmm24 + vpxorq %zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + vmovdqa64 896(%rsp),%zmm13 + vmovdqu64 128(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm0 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm3 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm4 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm5 + vmovdqa64 960(%rsp),%zmm13 + vmovdqu64 192(%rsp,%rbx,1),%zmm12 + vpclmulqdq $0x11,%zmm12,%zmm13,%zmm6 + vpclmulqdq $0x00,%zmm12,%zmm13,%zmm7 + vpclmulqdq $0x01,%zmm12,%zmm13,%zmm10 + vpclmulqdq $0x10,%zmm12,%zmm13,%zmm11 + + vpternlogq $0x96,%zmm10,%zmm4,%zmm26 + vpternlogq $0x96,%zmm6,%zmm0,%zmm24 + vpternlogq $0x96,%zmm7,%zmm3,%zmm25 + vpternlogq $0x96,%zmm11,%zmm5,%zmm26 + + vpsrldq $8,%zmm26,%zmm0 + vpslldq $8,%zmm26,%zmm3 + vpxorq %zmm0,%zmm24,%zmm24 + vpxorq %zmm3,%zmm25,%zmm25 + vextracti64x4 $1,%zmm24,%ymm0 + vpxorq %ymm0,%ymm24,%ymm24 + vextracti32x4 $1,%ymm24,%xmm0 + vpxorq %xmm0,%xmm24,%xmm24 + vextracti64x4 $1,%zmm25,%ymm3 + vpxorq %ymm3,%ymm25,%ymm25 + vextracti32x4 $1,%ymm25,%xmm3 + vpxorq %xmm3,%xmm25,%xmm25 + vmovdqa64 POLY2(%rip),%xmm4 + + + vpclmulqdq $0x01,%xmm25,%xmm4,%xmm0 + vpslldq $8,%xmm0,%xmm0 + vpxorq %xmm0,%xmm25,%xmm0 + + + vpclmulqdq $0x00,%xmm0,%xmm4,%xmm3 + vpsrldq $4,%xmm3,%xmm3 + vpclmulqdq $0x10,%xmm0,%xmm4,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm24,%xmm3,%xmm14 + +.L_last_blocks_done_933: + vpshufb %xmm29,%xmm2,%xmm2 + jmp .L_ghash_done_821 + +.L_message_below_equal_16_blocks_821: + + + movl %r8d,%r12d + addl $15,%r12d + shrl $4,%r12d + cmpq $8,%r12 + je .L_small_initial_num_blocks_is_8_966 + jl .L_small_initial_num_blocks_is_7_1_966 + + + cmpq $12,%r12 + je .L_small_initial_num_blocks_is_12_966 + jl .L_small_initial_num_blocks_is_11_9_966 + + + cmpq $16,%r12 + je .L_small_initial_num_blocks_is_16_966 + cmpq $15,%r12 + je .L_small_initial_num_blocks_is_15_966 + cmpq $14,%r12 + je .L_small_initial_num_blocks_is_14_966 + jmp .L_small_initial_num_blocks_is_13_966 + +.L_small_initial_num_blocks_is_11_9_966: + + cmpq $11,%r12 + je .L_small_initial_num_blocks_is_11_966 + cmpq $10,%r12 + je .L_small_initial_num_blocks_is_10_966 + jmp .L_small_initial_num_blocks_is_9_966 + +.L_small_initial_num_blocks_is_7_1_966: + cmpq $4,%r12 + je .L_small_initial_num_blocks_is_4_966 + jl .L_small_initial_num_blocks_is_3_1_966 + + cmpq $7,%r12 + je .L_small_initial_num_blocks_is_7_966 + cmpq $6,%r12 + je .L_small_initial_num_blocks_is_6_966 + jmp .L_small_initial_num_blocks_is_5_966 + +.L_small_initial_num_blocks_is_3_1_966: + + cmpq $3,%r12 + je .L_small_initial_num_blocks_is_3_966 + cmpq $2,%r12 + je .L_small_initial_num_blocks_is_2_966 + + + + + +.L_small_initial_num_blocks_is_1_966: + vmovdqa64 SHUF_MASK(%rip),%xmm29 + vpaddd ONE(%rip),%xmm2,%xmm0 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $0,%zmm0,%xmm2 + vpshufb %xmm29,%xmm0,%xmm0 + vmovdqu8 0(%rcx,%r11,1),%xmm6{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %xmm15,%xmm0,%xmm0 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %xmm15,%xmm0,%xmm0 + vpxorq %xmm6,%xmm0,%xmm0 + vextracti32x4 $0,%zmm0,%xmm12 + movq %r9,%r10 + vmovdqu8 %xmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %xmm29,%xmm6,%xmm6 + vextracti32x4 $0,%zmm6,%xmm13 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_967 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_967 +.L_small_initial_partial_block_967: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + + + + + + + + + + + + vpxorq %xmm13,%xmm14,%xmm14 + + jmp .L_after_reduction_967 +.L_small_initial_compute_done_967: +.L_after_reduction_967: + jmp .L_small_initial_blocks_encrypted_966 +.L_small_initial_num_blocks_is_2_966: + vmovdqa64 SHUF_MASK(%rip),%ymm29 + vshufi64x2 $0,%ymm2,%ymm2,%ymm0 + vpaddd ddq_add_1234(%rip),%ymm0,%ymm0 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $1,%zmm0,%xmm2 + vpshufb %ymm29,%ymm0,%ymm0 + vmovdqu8 0(%rcx,%r11,1),%ymm6{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %ymm15,%ymm0,%ymm0 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %ymm15,%ymm0,%ymm0 + vpxorq %ymm6,%ymm0,%ymm0 + vextracti32x4 $1,%zmm0,%xmm12 + movq %r9,%r10 + vmovdqu8 %ymm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %ymm29,%ymm6,%ymm6 + vextracti32x4 $1,%zmm6,%xmm13 + subq $16 * (2 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_968 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_968 +.L_small_initial_partial_block_968: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm6,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm6,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm6,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm6,%xmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_968: + + orq %r8,%r8 + je .L_after_reduction_968 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_968: + jmp .L_small_initial_blocks_encrypted_966 +.L_small_initial_num_blocks_is_3_966: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $2,%zmm0,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vpxorq %zmm6,%zmm0,%zmm0 + vextracti32x4 $2,%zmm0,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vextracti32x4 $2,%zmm6,%xmm13 + subq $16 * (3 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_969 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_969 +.L_small_initial_partial_block_969: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm6,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm6,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm6,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm6,%ymm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_969: + + orq %r8,%r8 + je .L_after_reduction_969 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_969: + jmp .L_small_initial_blocks_encrypted_966 +.L_small_initial_num_blocks_is_4_966: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $3,%zmm0,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vmovdqu8 0(%rcx,%r11,1),%zmm6{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vpxorq %zmm6,%zmm0,%zmm0 + vextracti32x4 $3,%zmm0,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1){%k1} + vmovdqu8 %zmm0,%zmm0{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vextracti32x4 $3,%zmm6,%xmm13 + subq $16 * (4 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_970 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_970 +.L_small_initial_partial_block_970: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_970: + + orq %r8,%r8 + je .L_after_reduction_970 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_970: + jmp .L_small_initial_blocks_encrypted_966 +.L_small_initial_num_blocks_is_5_966: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $64,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $0,%zmm3,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %xmm29,%xmm3,%xmm3 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%xmm7{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %xmm15,%xmm3,%xmm3 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %xmm15,%xmm3,%xmm3 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %xmm7,%xmm3,%xmm3 + vextracti32x4 $0,%zmm3,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %xmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %xmm29,%xmm7,%xmm7 + vextracti32x4 $0,%zmm7,%xmm13 + subq $16 * (5 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_971 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_971 +.L_small_initial_partial_block_971: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_971: + + orq %r8,%r8 + je .L_after_reduction_971 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_971: + jmp .L_small_initial_blocks_encrypted_966 +.L_small_initial_num_blocks_is_6_966: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $64,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $1,%zmm3,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %ymm29,%ymm3,%ymm3 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%ymm7{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %ymm15,%ymm3,%ymm3 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %ymm15,%ymm3,%ymm3 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %ymm7,%ymm3,%ymm3 + vextracti32x4 $1,%zmm3,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %ymm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %ymm29,%ymm7,%ymm7 + vextracti32x4 $1,%zmm7,%xmm13 + subq $16 * (6 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_972 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_972 +.L_small_initial_partial_block_972: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm7,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm7,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm7,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm7,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_972: + + orq %r8,%r8 + je .L_after_reduction_972 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_972: + jmp .L_small_initial_blocks_encrypted_966 +.L_small_initial_num_blocks_is_7_966: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $64,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $2,%zmm3,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vextracti32x4 $2,%zmm3,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vextracti32x4 $2,%zmm7,%xmm13 + subq $16 * (7 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_973 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_973 +.L_small_initial_partial_block_973: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm7,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm7,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm7,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm7,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_973: + + orq %r8,%r8 + je .L_after_reduction_973 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_973: + jmp .L_small_initial_blocks_encrypted_966 +.L_small_initial_num_blocks_is_8_966: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $64,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $3,%zmm3,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vextracti32x4 $3,%zmm3,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1){%k1} + vmovdqu8 %zmm3,%zmm3{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vextracti32x4 $3,%zmm7,%xmm13 + subq $16 * (8 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_974 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 224(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_974 +.L_small_initial_partial_block_974: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_974: + + orq %r8,%r8 + je .L_after_reduction_974 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_974: + jmp .L_small_initial_blocks_encrypted_966 +.L_small_initial_num_blocks_is_9_966: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $128,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $0,%zmm4,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %xmm29,%xmm4,%xmm4 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%xmm10{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %xmm15,%xmm4,%xmm4 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %xmm15,%xmm4,%xmm4 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %xmm10,%xmm4,%xmm4 + vextracti32x4 $0,%zmm4,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %xmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %xmm29,%xmm10,%xmm10 + vextracti32x4 $0,%zmm10,%xmm13 + subq $16 * (9 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_975 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 208(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_975 +.L_small_initial_partial_block_975: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 224(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_975: + + orq %r8,%r8 + je .L_after_reduction_975 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_975: + jmp .L_small_initial_blocks_encrypted_966 +.L_small_initial_num_blocks_is_10_966: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $128,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $1,%zmm4,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %ymm29,%ymm4,%ymm4 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%ymm10{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %ymm15,%ymm4,%ymm4 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %ymm15,%ymm4,%ymm4 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %ymm10,%ymm4,%ymm4 + vextracti32x4 $1,%zmm4,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %ymm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %ymm29,%ymm10,%ymm10 + vextracti32x4 $1,%zmm10,%xmm13 + subq $16 * (10 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_976 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 192(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_976 +.L_small_initial_partial_block_976: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 208(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm10,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm10,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm10,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm10,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_976: + + orq %r8,%r8 + je .L_after_reduction_976 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_976: + jmp .L_small_initial_blocks_encrypted_966 +.L_small_initial_num_blocks_is_11_966: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $128,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $2,%zmm4,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vextracti32x4 $2,%zmm4,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vextracti32x4 $2,%zmm10,%xmm13 + subq $16 * (11 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_977 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 176(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_977 +.L_small_initial_partial_block_977: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 192(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm10,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm10,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm10,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm10,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_977: + + orq %r8,%r8 + je .L_after_reduction_977 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_977: + jmp .L_small_initial_blocks_encrypted_966 +.L_small_initial_num_blocks_is_12_966: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $128,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $3,%zmm4,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vextracti32x4 $3,%zmm4,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1){%k1} + vmovdqu8 %zmm4,%zmm4{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vextracti32x4 $3,%zmm10,%xmm13 + subq $16 * (12 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_978 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 160(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 224(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_978 +.L_small_initial_partial_block_978: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 176(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vpxorq %zmm15,%zmm0,%zmm15 + vpxorq %zmm16,%zmm3,%zmm16 + vpxorq %zmm17,%zmm4,%zmm17 + vpxorq %zmm19,%zmm5,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_978: + + orq %r8,%r8 + je .L_after_reduction_978 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_978: + jmp .L_small_initial_blocks_encrypted_966 +.L_small_initial_num_blocks_is_13_966: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $192,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $0,%zmm5,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %xmm29,%xmm5,%xmm5 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10 + vmovdqu8 192(%rcx,%r11,1),%xmm11{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vpxorq %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %xmm15,%xmm5,%xmm5 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vaesenclast %xmm15,%xmm5,%xmm5 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vpxorq %xmm11,%xmm5,%xmm5 + vextracti32x4 $0,%zmm5,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %xmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %xmm29,%xmm11,%xmm11 + vextracti32x4 $0,%zmm11,%xmm13 + subq $16 * (13 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_979 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 144(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 208(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_979 +.L_small_initial_partial_block_979: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 160(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 224(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 288(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + + vpxorq %zmm19,%zmm17,%zmm17 + vpsrldq $8,%zmm17,%zmm4 + vpslldq $8,%zmm17,%zmm5 + vpxorq %zmm4,%zmm15,%zmm0 + vpxorq %zmm5,%zmm16,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_979: + + orq %r8,%r8 + je .L_after_reduction_979 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_979: + jmp .L_small_initial_blocks_encrypted_966 +.L_small_initial_num_blocks_is_14_966: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $192,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $1,%zmm5,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %ymm29,%ymm5,%ymm5 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10 + vmovdqu8 192(%rcx,%r11,1),%ymm11{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vpxorq %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %ymm15,%ymm5,%ymm5 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vaesenclast %ymm15,%ymm5,%ymm5 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vpxorq %ymm11,%ymm5,%ymm5 + vextracti32x4 $1,%zmm5,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %ymm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %ymm29,%ymm11,%ymm11 + vextracti32x4 $1,%zmm11,%xmm13 + subq $16 * (14 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_980 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 128(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 192(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_980 +.L_small_initial_partial_block_980: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 144(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 208(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 272(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 336(%rsi),%xmm20 + vpclmulqdq $0x01,%xmm20,%xmm11,%xmm4 + vpclmulqdq $0x10,%xmm20,%xmm11,%xmm5 + vpclmulqdq $0x11,%xmm20,%xmm11,%xmm0 + vpclmulqdq $0x00,%xmm20,%xmm11,%xmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_980: + + orq %r8,%r8 + je .L_after_reduction_980 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_980: + jmp .L_small_initial_blocks_encrypted_966 +.L_small_initial_num_blocks_is_15_966: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $192,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $2,%zmm5,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10 + vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vpxorq %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vaesenclast %zmm15,%zmm5,%zmm5 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vpxorq %zmm11,%zmm5,%zmm5 + vextracti32x4 $2,%zmm5,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %zmm29,%zmm11,%zmm11 + vextracti32x4 $2,%zmm11,%xmm13 + subq $16 * (15 - 1),%r8 + + + cmpq $16,%r8 + jl .L_small_initial_partial_block_981 + + + + + + subq $16,%r8 + movq $0,(%rdx) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 112(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 176(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + + jmp .L_small_initial_compute_done_981 +.L_small_initial_partial_block_981: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 128(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 192(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 256(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 320(%rsi),%ymm20 + vpclmulqdq $0x01,%ymm20,%ymm11,%ymm4 + vpclmulqdq $0x10,%ymm20,%ymm11,%ymm5 + vpclmulqdq $0x11,%ymm20,%ymm11,%ymm0 + vpclmulqdq $0x00,%ymm20,%ymm11,%ymm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_981: + + orq %r8,%r8 + je .L_after_reduction_981 + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_981: + jmp .L_small_initial_blocks_encrypted_966 +.L_small_initial_num_blocks_is_16_966: + vmovdqa64 SHUF_MASK(%rip),%zmm29 + vshufi64x2 $0,%zmm2,%zmm2,%zmm2 + vpaddd ddq_add_1234(%rip),%zmm2,%zmm0 + vpaddd ddq_add_5678(%rip),%zmm2,%zmm3 + vpaddd ddq_add_8888(%rip),%zmm0,%zmm4 + vpaddd ddq_add_8888(%rip),%zmm3,%zmm5 + leaq byte64_len_to_mask_table(%rip),%r10 + movq %r8,%r15 + subq $192,%r15 + kmovq (%r10,%r15,8),%k1 + vextracti32x4 $3,%zmm5,%xmm2 + vpshufb %zmm29,%zmm0,%zmm0 + vpshufb %zmm29,%zmm3,%zmm3 + vpshufb %zmm29,%zmm4,%zmm4 + vpshufb %zmm29,%zmm5,%zmm5 + vmovdqu8 0(%rcx,%r11,1),%zmm6 + vmovdqu8 64(%rcx,%r11,1),%zmm7 + vmovdqu8 128(%rcx,%r11,1),%zmm10 + vmovdqu8 192(%rcx,%r11,1),%zmm11{%k1}{z} + vbroadcastf64x2 0(%rdi),%zmm15 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm15,%zmm3,%zmm3 + vpxorq %zmm15,%zmm4,%zmm4 + vpxorq %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 16(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 32(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 48(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 64(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 80(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 96(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 112(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 128(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 144(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 160(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 176(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 192(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 208(%rdi),%zmm15 + vaesenc %zmm15,%zmm0,%zmm0 + vaesenc %zmm15,%zmm3,%zmm3 + vaesenc %zmm15,%zmm4,%zmm4 + vaesenc %zmm15,%zmm5,%zmm5 + vbroadcastf64x2 224(%rdi),%zmm15 + vaesenclast %zmm15,%zmm0,%zmm0 + vaesenclast %zmm15,%zmm3,%zmm3 + vaesenclast %zmm15,%zmm4,%zmm4 + vaesenclast %zmm15,%zmm5,%zmm5 + vpxorq %zmm6,%zmm0,%zmm0 + vpxorq %zmm7,%zmm3,%zmm3 + vpxorq %zmm10,%zmm4,%zmm4 + vpxorq %zmm11,%zmm5,%zmm5 + vextracti32x4 $3,%zmm5,%xmm12 + movq %r9,%r10 + vmovdqu8 %zmm0,0(%r10,%r11,1) + vmovdqu8 %zmm3,64(%r10,%r11,1) + vmovdqu8 %zmm4,128(%r10,%r11,1) + vmovdqu8 %zmm5,192(%r10,%r11,1){%k1} + vmovdqu8 %zmm5,%zmm5{%k1}{z} + vpshufb %zmm29,%zmm6,%zmm6 + vpshufb %zmm29,%zmm7,%zmm7 + vpshufb %zmm29,%zmm10,%zmm10 + vpshufb %zmm29,%zmm11,%zmm11 + vextracti32x4 $3,%zmm11,%xmm13 + subq $16 * (16 - 1),%r8 +.L_small_initial_partial_block_982: + + + + + + + + + movq %r8,(%rdx) + vmovdqu64 %xmm12,16(%rsi) + vpxorq %zmm14,%zmm6,%zmm6 + vmovdqu64 112(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm6,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm6,%zmm3 + vpclmulqdq $0x01,%zmm20,%zmm6,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm6,%zmm5 + vmovdqu64 176(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm7,%zmm15 + vpclmulqdq $0x00,%zmm20,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm7,%zmm17 + vpclmulqdq $0x10,%zmm20,%zmm7,%zmm19 + vmovdqu64 240(%rsi),%zmm20 + vpclmulqdq $0x11,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x00,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm0,%zmm6,%zmm15 + vpternlogq $0x96,%zmm3,%zmm7,%zmm16 + vpclmulqdq $0x01,%zmm20,%zmm10,%zmm6 + vpclmulqdq $0x10,%zmm20,%zmm10,%zmm7 + vpternlogq $0x96,%zmm4,%zmm6,%zmm17 + vpternlogq $0x96,%zmm5,%zmm7,%zmm19 + vmovdqu64 304(%rsi),%ymm20 + vinserti64x2 $2,336(%rsi),%zmm20,%zmm20 + vpclmulqdq $0x01,%zmm20,%zmm11,%zmm4 + vpclmulqdq $0x10,%zmm20,%zmm11,%zmm5 + vpclmulqdq $0x11,%zmm20,%zmm11,%zmm0 + vpclmulqdq $0x00,%zmm20,%zmm11,%zmm3 + + vpxorq %zmm17,%zmm4,%zmm4 + vpxorq %zmm19,%zmm5,%zmm5 + vpxorq %zmm15,%zmm0,%zmm0 + vpxorq %zmm16,%zmm3,%zmm3 + + vpxorq %zmm5,%zmm4,%zmm4 + vpsrldq $8,%zmm4,%zmm17 + vpslldq $8,%zmm4,%zmm19 + vpxorq %zmm17,%zmm0,%zmm0 + vpxorq %zmm19,%zmm3,%zmm3 + vextracti64x4 $1,%zmm0,%ymm17 + vpxorq %ymm17,%ymm0,%ymm0 + vextracti32x4 $1,%ymm0,%xmm17 + vpxorq %xmm17,%xmm0,%xmm0 + vextracti64x4 $1,%zmm3,%ymm19 + vpxorq %ymm19,%ymm3,%ymm3 + vextracti32x4 $1,%ymm3,%xmm19 + vpxorq %xmm19,%xmm3,%xmm3 + vmovdqa64 POLY2(%rip),%xmm20 + + + vpclmulqdq $0x01,%xmm3,%xmm20,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm3,%xmm4 + + + vpclmulqdq $0x00,%xmm4,%xmm20,%xmm5 + vpsrldq $4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm4,%xmm20,%xmm14 + vpslldq $4,%xmm14,%xmm14 + vpternlogq $0x96,%xmm0,%xmm5,%xmm14 + +.L_small_initial_compute_done_982: + vpxorq %xmm13,%xmm14,%xmm14 +.L_after_reduction_982: +.L_small_initial_blocks_encrypted_966: +.L_ghash_done_821: + vmovdqu64 %xmm2,0(%rsi) + vmovdqu64 %xmm14,64(%rsi) +.L_enc_dec_done_821: + jmp .Lexit_gcm_decrypt +.Lexit_gcm_decrypt: + cmpq $256,%r8 + jbe .Lskip_hkeys_cleanup_983 + vpxor %xmm0,%xmm0,%xmm0 + vmovdqa64 %zmm0,0(%rsp) + vmovdqa64 %zmm0,64(%rsp) + vmovdqa64 %zmm0,128(%rsp) + vmovdqa64 %zmm0,192(%rsp) + vmovdqa64 %zmm0,256(%rsp) + vmovdqa64 %zmm0,320(%rsp) + vmovdqa64 %zmm0,384(%rsp) + vmovdqa64 %zmm0,448(%rsp) + vmovdqa64 %zmm0,512(%rsp) + vmovdqa64 %zmm0,576(%rsp) + vmovdqa64 %zmm0,640(%rsp) + vmovdqa64 %zmm0,704(%rsp) +.Lskip_hkeys_cleanup_983: + vzeroupper + leaq (%rbp),%rsp +.cfi_def_cfa_register %rsp + popq %r15 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r15 + popq %r14 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r14 + popq %r13 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r13 + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + popq %rbp +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbp + popq %rbx +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbx + .byte 0xf3,0xc3 +.Ldecrypt_seh_end: +.cfi_endproc +.size ossl_aes_gcm_decrypt_avx512, .-ossl_aes_gcm_decrypt_avx512 +.globl ossl_aes_gcm_finalize_avx512 +.type ossl_aes_gcm_finalize_avx512,@function +.align 32 +ossl_aes_gcm_finalize_avx512: +.cfi_startproc +.byte 243,15,30,250 + vmovdqu 336(%rdi),%xmm2 + vmovdqu 32(%rdi),%xmm3 + vmovdqu 64(%rdi),%xmm4 + + + cmpq $0,%rsi + je .L_partial_done_984 + + vpclmulqdq $0x11,%xmm2,%xmm4,%xmm0 + vpclmulqdq $0x00,%xmm2,%xmm4,%xmm16 + vpclmulqdq $0x01,%xmm2,%xmm4,%xmm17 + vpclmulqdq $0x10,%xmm2,%xmm4,%xmm4 + vpxorq %xmm17,%xmm4,%xmm4 + + vpsrldq $8,%xmm4,%xmm17 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm17,%xmm0,%xmm0 + vpxorq %xmm16,%xmm4,%xmm4 + + + + vmovdqu64 POLY2(%rip),%xmm17 + + vpclmulqdq $0x01,%xmm4,%xmm17,%xmm16 + vpslldq $8,%xmm16,%xmm16 + vpxorq %xmm16,%xmm4,%xmm4 + + + + vpclmulqdq $0x00,%xmm4,%xmm17,%xmm16 + vpsrldq $4,%xmm16,%xmm16 + vpclmulqdq $0x10,%xmm4,%xmm17,%xmm4 + vpslldq $4,%xmm4,%xmm4 + + vpternlogq $0x96,%xmm16,%xmm0,%xmm4 + +.L_partial_done_984: + vmovq 56(%rdi),%xmm5 + vpinsrq $1,48(%rdi),%xmm5,%xmm5 + vpsllq $3,%xmm5,%xmm5 + + vpxor %xmm5,%xmm4,%xmm4 + + vpclmulqdq $0x11,%xmm2,%xmm4,%xmm0 + vpclmulqdq $0x00,%xmm2,%xmm4,%xmm16 + vpclmulqdq $0x01,%xmm2,%xmm4,%xmm17 + vpclmulqdq $0x10,%xmm2,%xmm4,%xmm4 + vpxorq %xmm17,%xmm4,%xmm4 + + vpsrldq $8,%xmm4,%xmm17 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm17,%xmm0,%xmm0 + vpxorq %xmm16,%xmm4,%xmm4 + + + + vmovdqu64 POLY2(%rip),%xmm17 + + vpclmulqdq $0x01,%xmm4,%xmm17,%xmm16 + vpslldq $8,%xmm16,%xmm16 + vpxorq %xmm16,%xmm4,%xmm4 + + + + vpclmulqdq $0x00,%xmm4,%xmm17,%xmm16 + vpsrldq $4,%xmm16,%xmm16 + vpclmulqdq $0x10,%xmm4,%xmm17,%xmm4 + vpslldq $4,%xmm4,%xmm4 + + vpternlogq $0x96,%xmm16,%xmm0,%xmm4 + + vpshufb SHUF_MASK(%rip),%xmm4,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + +.L_return_T_984: + vmovdqu %xmm3,64(%rdi) +.Labort_finalize: + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_aes_gcm_finalize_avx512, .-ossl_aes_gcm_finalize_avx512 +.globl ossl_gcm_gmult_avx512 +.hidden ossl_gcm_gmult_avx512 +.type ossl_gcm_gmult_avx512,@function +.align 32 +ossl_gcm_gmult_avx512: +.cfi_startproc +.byte 243,15,30,250 + vmovdqu64 (%rdi),%xmm1 + vmovdqu64 336(%rsi),%xmm2 + + vpclmulqdq $0x11,%xmm2,%xmm1,%xmm3 + vpclmulqdq $0x00,%xmm2,%xmm1,%xmm4 + vpclmulqdq $0x01,%xmm2,%xmm1,%xmm5 + vpclmulqdq $0x10,%xmm2,%xmm1,%xmm1 + vpxorq %xmm5,%xmm1,%xmm1 + + vpsrldq $8,%xmm1,%xmm5 + vpslldq $8,%xmm1,%xmm1 + vpxorq %xmm5,%xmm3,%xmm3 + vpxorq %xmm4,%xmm1,%xmm1 + + + + vmovdqu64 POLY2(%rip),%xmm5 + + vpclmulqdq $0x01,%xmm1,%xmm5,%xmm4 + vpslldq $8,%xmm4,%xmm4 + vpxorq %xmm4,%xmm1,%xmm1 + + + + vpclmulqdq $0x00,%xmm1,%xmm5,%xmm4 + vpsrldq $4,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm1,%xmm5,%xmm1 + vpslldq $4,%xmm1,%xmm1 + + vpternlogq $0x96,%xmm4,%xmm3,%xmm1 + + vmovdqu64 %xmm1,(%rdi) + vzeroupper +.Labort_gmult: + .byte 0xf3,0xc3 +.cfi_endproc +.size ossl_gcm_gmult_avx512, .-ossl_gcm_gmult_avx512 +.section .rodata +.align 16 +POLY:.quad 0x0000000000000001, 0xC200000000000000 + +.align 64 +POLY2: +.quad 0x00000001C2000000, 0xC200000000000000 +.quad 0x00000001C2000000, 0xC200000000000000 +.quad 0x00000001C2000000, 0xC200000000000000 +.quad 0x00000001C2000000, 0xC200000000000000 + +.align 16 +TWOONE:.quad 0x0000000000000001, 0x0000000100000000 + + + +.align 64 +SHUF_MASK: +.quad 0x08090A0B0C0D0E0F, 0x0001020304050607 +.quad 0x08090A0B0C0D0E0F, 0x0001020304050607 +.quad 0x08090A0B0C0D0E0F, 0x0001020304050607 +.quad 0x08090A0B0C0D0E0F, 0x0001020304050607 + +.align 16 +SHIFT_MASK: +.quad 0x0706050403020100, 0x0f0e0d0c0b0a0908 + +ALL_F: +.quad 0xffffffffffffffff, 0xffffffffffffffff + +ZERO: +.quad 0x0000000000000000, 0x0000000000000000 + +.align 16 +ONE: +.quad 0x0000000000000001, 0x0000000000000000 + +.align 16 +ONEf: +.quad 0x0000000000000000, 0x0100000000000000 + +.align 64 +ddq_add_1234: +.quad 0x0000000000000001, 0x0000000000000000 +.quad 0x0000000000000002, 0x0000000000000000 +.quad 0x0000000000000003, 0x0000000000000000 +.quad 0x0000000000000004, 0x0000000000000000 + +.align 64 +ddq_add_5678: +.quad 0x0000000000000005, 0x0000000000000000 +.quad 0x0000000000000006, 0x0000000000000000 +.quad 0x0000000000000007, 0x0000000000000000 +.quad 0x0000000000000008, 0x0000000000000000 + +.align 64 +ddq_add_4444: +.quad 0x0000000000000004, 0x0000000000000000 +.quad 0x0000000000000004, 0x0000000000000000 +.quad 0x0000000000000004, 0x0000000000000000 +.quad 0x0000000000000004, 0x0000000000000000 + +.align 64 +ddq_add_8888: +.quad 0x0000000000000008, 0x0000000000000000 +.quad 0x0000000000000008, 0x0000000000000000 +.quad 0x0000000000000008, 0x0000000000000000 +.quad 0x0000000000000008, 0x0000000000000000 + +.align 64 +ddq_addbe_1234: +.quad 0x0000000000000000, 0x0100000000000000 +.quad 0x0000000000000000, 0x0200000000000000 +.quad 0x0000000000000000, 0x0300000000000000 +.quad 0x0000000000000000, 0x0400000000000000 + +.align 64 +ddq_addbe_4444: +.quad 0x0000000000000000, 0x0400000000000000 +.quad 0x0000000000000000, 0x0400000000000000 +.quad 0x0000000000000000, 0x0400000000000000 +.quad 0x0000000000000000, 0x0400000000000000 + +.align 64 +byte_len_to_mask_table: +.value 0x0000, 0x0001, 0x0003, 0x0007 +.value 0x000f, 0x001f, 0x003f, 0x007f +.value 0x00ff, 0x01ff, 0x03ff, 0x07ff +.value 0x0fff, 0x1fff, 0x3fff, 0x7fff +.value 0xffff + +.align 64 +byte64_len_to_mask_table: +.quad 0x0000000000000000, 0x0000000000000001 +.quad 0x0000000000000003, 0x0000000000000007 +.quad 0x000000000000000f, 0x000000000000001f +.quad 0x000000000000003f, 0x000000000000007f +.quad 0x00000000000000ff, 0x00000000000001ff +.quad 0x00000000000003ff, 0x00000000000007ff +.quad 0x0000000000000fff, 0x0000000000001fff +.quad 0x0000000000003fff, 0x0000000000007fff +.quad 0x000000000000ffff, 0x000000000001ffff +.quad 0x000000000003ffff, 0x000000000007ffff +.quad 0x00000000000fffff, 0x00000000001fffff +.quad 0x00000000003fffff, 0x00000000007fffff +.quad 0x0000000000ffffff, 0x0000000001ffffff +.quad 0x0000000003ffffff, 0x0000000007ffffff +.quad 0x000000000fffffff, 0x000000001fffffff +.quad 0x000000003fffffff, 0x000000007fffffff +.quad 0x00000000ffffffff, 0x00000001ffffffff +.quad 0x00000003ffffffff, 0x00000007ffffffff +.quad 0x0000000fffffffff, 0x0000001fffffffff +.quad 0x0000003fffffffff, 0x0000007fffffffff +.quad 0x000000ffffffffff, 0x000001ffffffffff +.quad 0x000003ffffffffff, 0x000007ffffffffff +.quad 0x00000fffffffffff, 0x00001fffffffffff +.quad 0x00003fffffffffff, 0x00007fffffffffff +.quad 0x0000ffffffffffff, 0x0001ffffffffffff +.quad 0x0003ffffffffffff, 0x0007ffffffffffff +.quad 0x000fffffffffffff, 0x001fffffffffffff +.quad 0x003fffffffffffff, 0x007fffffffffffff +.quad 0x00ffffffffffffff, 0x01ffffffffffffff +.quad 0x03ffffffffffffff, 0x07ffffffffffffff +.quad 0x0fffffffffffffff, 0x1fffffffffffffff +.quad 0x3fffffffffffffff, 0x7fffffffffffffff +.quad 0xffffffffffffffff diff --git a/contrib/openssl-cmake/asm/crypto/modes/aes-gcm-ppc.s b/contrib/openssl-cmake/asm/crypto/modes/aes-gcm-ppc.s new file mode 100644 index 000000000000..e933689d63a9 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/modes/aes-gcm-ppc.s @@ -0,0 +1,1339 @@ +.machine "any" +.abiversion 2 +.text + + + + + +.macro .Loop_aes_middle4x + xxlor 19+32, 1, 1 + xxlor 20+32, 2, 2 + xxlor 21+32, 3, 3 + xxlor 22+32, 4, 4 + + .long 0x11EF9D08 + .long 0x12109D08 + .long 0x12319D08 + .long 0x12529D08 + + .long 0x11EFA508 + .long 0x1210A508 + .long 0x1231A508 + .long 0x1252A508 + + .long 0x11EFAD08 + .long 0x1210AD08 + .long 0x1231AD08 + .long 0x1252AD08 + + .long 0x11EFB508 + .long 0x1210B508 + .long 0x1231B508 + .long 0x1252B508 + + xxlor 19+32, 5, 5 + xxlor 20+32, 6, 6 + xxlor 21+32, 7, 7 + xxlor 22+32, 8, 8 + + .long 0x11EF9D08 + .long 0x12109D08 + .long 0x12319D08 + .long 0x12529D08 + + .long 0x11EFA508 + .long 0x1210A508 + .long 0x1231A508 + .long 0x1252A508 + + .long 0x11EFAD08 + .long 0x1210AD08 + .long 0x1231AD08 + .long 0x1252AD08 + + .long 0x11EFB508 + .long 0x1210B508 + .long 0x1231B508 + .long 0x1252B508 + + xxlor 23+32, 9, 9 + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 +.endm + + + + + +.macro .Loop_aes_middle8x + xxlor 23+32, 1, 1 + xxlor 24+32, 2, 2 + xxlor 25+32, 3, 3 + xxlor 26+32, 4, 4 + + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 + + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + .long 0x11EFCD08 + .long 0x1210CD08 + .long 0x1231CD08 + .long 0x1252CD08 + .long 0x1273CD08 + .long 0x1294CD08 + .long 0x12B5CD08 + .long 0x12D6CD08 + + .long 0x11EFD508 + .long 0x1210D508 + .long 0x1231D508 + .long 0x1252D508 + .long 0x1273D508 + .long 0x1294D508 + .long 0x12B5D508 + .long 0x12D6D508 + + xxlor 23+32, 5, 5 + xxlor 24+32, 6, 6 + xxlor 25+32, 7, 7 + xxlor 26+32, 8, 8 + + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 + + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + .long 0x11EFCD08 + .long 0x1210CD08 + .long 0x1231CD08 + .long 0x1252CD08 + .long 0x1273CD08 + .long 0x1294CD08 + .long 0x12B5CD08 + .long 0x12D6CD08 + + .long 0x11EFD508 + .long 0x1210D508 + .long 0x1231D508 + .long 0x1252D508 + .long 0x1273D508 + .long 0x1294D508 + .long 0x12B5D508 + .long 0x12D6D508 + + xxlor 23+32, 9, 9 + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 +.endm + + + + +ppc_aes_gcm_ghash: + vxor 15, 15, 0 + + xxlxor 29, 29, 29 + + .long 0x12EC7CC8 + .long 0x130984C8 + .long 0x13268CC8 + .long 0x134394C8 + + vxor 23, 23, 24 + vxor 23, 23, 25 + vxor 23, 23, 26 + + .long 0x130D7CC8 + .long 0x132A84C8 + .long 0x13478CC8 + .long 0x136494C8 + + vxor 24, 24, 25 + vxor 24, 24, 26 + vxor 24, 24, 27 + + + .long 0x139714C8 + + xxlor 29+32, 29, 29 + vsldoi 26, 24, 29, 8 + vsldoi 29, 29, 24, 8 + vxor 23, 23, 26 + + vsldoi 23, 23, 23, 8 + vxor 23, 23, 28 + + .long 0x130E7CC8 + .long 0x132B84C8 + .long 0x13488CC8 + .long 0x136594C8 + + vxor 24, 24, 25 + vxor 24, 24, 26 + vxor 24, 24, 27 + + vxor 24, 24, 29 + + + vsldoi 27, 23, 23, 8 + .long 0x12F714C8 + vxor 27, 27, 24 + vxor 23, 23, 27 + + xxlor 32, 23+32, 23+32 + + blr + + + + + +.macro ppc_aes_gcm_ghash2_4x + + vxor 15, 15, 0 + + xxlxor 29, 29, 29 + + .long 0x12EC7CC8 + .long 0x130984C8 + .long 0x13268CC8 + .long 0x134394C8 + + vxor 23, 23, 24 + vxor 23, 23, 25 + vxor 23, 23, 26 + + .long 0x130D7CC8 + .long 0x132A84C8 + .long 0x13478CC8 + .long 0x136494C8 + + vxor 24, 24, 25 + vxor 24, 24, 26 + + + .long 0x139714C8 + + xxlor 29+32, 29, 29 + + vxor 24, 24, 27 + vsldoi 26, 24, 29, 8 + vsldoi 29, 29, 24, 8 + vxor 23, 23, 26 + + vsldoi 23, 23, 23, 8 + vxor 23, 23, 28 + + .long 0x130E7CC8 + .long 0x132B84C8 + .long 0x13488CC8 + .long 0x136594C8 + + vxor 24, 24, 25 + vxor 24, 24, 26 + vxor 24, 24, 27 + + vxor 24, 24, 29 + + + vsldoi 27, 23, 23, 8 + .long 0x12F714C8 + vxor 27, 27, 24 + vxor 27, 23, 27 + + + .long 0x1309A4C8 + .long 0x1326ACC8 + .long 0x1343B4C8 + vxor 19, 19, 27 + .long 0x12EC9CC8 + + vxor 23, 23, 24 + vxor 23, 23, 25 + vxor 23, 23, 26 + + .long 0x130D9CC8 + .long 0x132AA4C8 + .long 0x1347ACC8 + .long 0x1364B4C8 + + vxor 24, 24, 25 + vxor 24, 24, 26 + + + .long 0x139714C8 + + xxlor 29+32, 29, 29 + + vxor 24, 24, 27 + vsldoi 26, 24, 29, 8 + vsldoi 29, 29, 24, 8 + vxor 23, 23, 26 + + vsldoi 23, 23, 23, 8 + vxor 23, 23, 28 + + .long 0x130E9CC8 + .long 0x132BA4C8 + .long 0x1348ACC8 + .long 0x1365B4C8 + + vxor 24, 24, 25 + vxor 24, 24, 26 + vxor 24, 24, 27 + + vxor 24, 24, 29 + + + vsldoi 27, 23, 23, 8 + .long 0x12F714C8 + vxor 27, 27, 24 + vxor 23, 23, 27 + + xxlor 32, 23+32, 23+32 + +.endm + + + + +.macro ppc_update_hash_1x + vxor 28, 28, 0 + + vxor 19, 19, 19 + + .long 0x12C3E4C8 + .long 0x12E4E4C8 + .long 0x1305E4C8 + + .long 0x137614C8 + + vsldoi 25, 23, 19, 8 + vsldoi 26, 19, 23, 8 + vxor 22, 22, 25 + vxor 24, 24, 26 + + vsldoi 22, 22, 22, 8 + vxor 22, 22, 27 + + vsldoi 20, 22, 22, 8 + .long 0x12D614C8 + vxor 20, 20, 24 + vxor 22, 22, 20 + + vor 0,22,22 + +.endm + + + + + + + + + + + + + +.global ppc_aes_gcm_encrypt +.align 5 +ppc_aes_gcm_encrypt: +_ppc_aes_gcm_encrypt: + + stdu 1,-512(1) + mflr 0 + + std 14,112(1) + std 15,120(1) + std 16,128(1) + std 17,136(1) + std 18,144(1) + std 19,152(1) + std 20,160(1) + std 21,168(1) + li 9, 256 + stvx 20, 9, 1 + addi 9, 9, 16 + stvx 21, 9, 1 + addi 9, 9, 16 + stvx 22, 9, 1 + addi 9, 9, 16 + stvx 23, 9, 1 + addi 9, 9, 16 + stvx 24, 9, 1 + addi 9, 9, 16 + stvx 25, 9, 1 + addi 9, 9, 16 + stvx 26, 9, 1 + addi 9, 9, 16 + stvx 27, 9, 1 + addi 9, 9, 16 + stvx 28, 9, 1 + addi 9, 9, 16 + stvx 29, 9, 1 + addi 9, 9, 16 + stvx 30, 9, 1 + addi 9, 9, 16 + stvx 31, 9, 1 + std 0, 528(1) + + + lxvb16x 32, 0, 8 + + + li 10, 32 + lxvd2x 2+32, 10, 8 + li 10, 48 + lxvd2x 3+32, 10, 8 + li 10, 64 + lxvd2x 4+32, 10, 8 + li 10, 80 + lxvd2x 5+32, 10, 8 + + li 10, 96 + lxvd2x 6+32, 10, 8 + li 10, 112 + lxvd2x 7+32, 10, 8 + li 10, 128 + lxvd2x 8+32, 10, 8 + + li 10, 144 + lxvd2x 9+32, 10, 8 + li 10, 160 + lxvd2x 10+32, 10, 8 + li 10, 176 + lxvd2x 11+32, 10, 8 + + li 10, 192 + lxvd2x 12+32, 10, 8 + li 10, 208 + lxvd2x 13+32, 10, 8 + li 10, 224 + lxvd2x 14+32, 10, 8 + + + lxvb16x 30+32, 0, 7 + + mr 12, 5 + li 11, 0 + + + vxor 31, 31, 31 + vspltisb 22,1 + vsldoi 31, 31, 22,1 + + + lxv 0, 0(6) + lxv 1, 0x10(6) + lxv 2, 0x20(6) + lxv 3, 0x30(6) + lxv 4, 0x40(6) + lxv 5, 0x50(6) + lxv 6, 0x60(6) + lxv 7, 0x70(6) + lxv 8, 0x80(6) + lxv 9, 0x90(6) + lxv 10, 0xa0(6) + + + lwz 9,240(6) + + + + xxlor 32+29, 0, 0 + vxor 15, 30, 29 + + cmpdi 9, 10 + beq .Loop_aes_gcm_8x + + + lxv 11, 0xb0(6) + lxv 12, 0xc0(6) + + cmpdi 9, 12 + beq .Loop_aes_gcm_8x + + + lxv 13, 0xd0(6) + lxv 14, 0xe0(6) + cmpdi 9, 14 + beq .Loop_aes_gcm_8x + + b aes_gcm_out + +.align 5 +.Loop_aes_gcm_8x: + mr 14, 3 + mr 9, 4 + + + li 10, 128 + divdu 10, 5, 10 + cmpdi 10, 0 + beq .Loop_last_block + + .long 0x13DEF8C0 + vxor 16, 30, 29 + .long 0x13DEF8C0 + vxor 17, 30, 29 + .long 0x13DEF8C0 + vxor 18, 30, 29 + .long 0x13DEF8C0 + vxor 19, 30, 29 + .long 0x13DEF8C0 + vxor 20, 30, 29 + .long 0x13DEF8C0 + vxor 21, 30, 29 + .long 0x13DEF8C0 + vxor 22, 30, 29 + + mtctr 10 + + li 15, 16 + li 16, 32 + li 17, 48 + li 18, 64 + li 19, 80 + li 20, 96 + li 21, 112 + + lwz 10, 240(6) + +.Loop_8x_block: + + lxvb16x 15, 0, 14 + lxvb16x 16, 15, 14 + lxvb16x 17, 16, 14 + lxvb16x 18, 17, 14 + lxvb16x 19, 18, 14 + lxvb16x 20, 19, 14 + lxvb16x 21, 20, 14 + lxvb16x 22, 21, 14 + addi 14, 14, 128 + +.Loop_aes_middle8x + + xxlor 23+32, 10, 10 + + cmpdi 10, 10 + beq Do_next_ghash + + + xxlor 24+32, 11, 11 + + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 + + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + xxlor 23+32, 12, 12 + + cmpdi 10, 12 + beq Do_next_ghash + + + xxlor 24+32, 13, 13 + + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 + + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + xxlor 23+32, 14, 14 + + cmpdi 10, 14 + beq Do_next_ghash + b aes_gcm_out + +Do_next_ghash: + + + + .long 0x11EFBD09 + .long 0x1210BD09 + + xxlxor 47, 47, 15 + stxvb16x 47, 0, 9 + xxlxor 48, 48, 16 + stxvb16x 48, 15, 9 + + .long 0x1231BD09 + .long 0x1252BD09 + + xxlxor 49, 49, 17 + stxvb16x 49, 16, 9 + xxlxor 50, 50, 18 + stxvb16x 50, 17, 9 + + .long 0x1273BD09 + .long 0x1294BD09 + + xxlxor 51, 51, 19 + stxvb16x 51, 18, 9 + xxlxor 52, 52, 20 + stxvb16x 52, 19, 9 + + .long 0x12B5BD09 + .long 0x12D6BD09 + + xxlxor 53, 53, 21 + stxvb16x 53, 20, 9 + xxlxor 54, 54, 22 + stxvb16x 54, 21, 9 + + addi 9, 9, 128 + + + ppc_aes_gcm_ghash2_4x + + xxlor 27+32, 0, 0 + .long 0x13DEF8C0 + vor 29,30,30 + vxor 15, 30, 27 + .long 0x13DEF8C0 + vxor 16, 30, 27 + .long 0x13DEF8C0 + vxor 17, 30, 27 + .long 0x13DEF8C0 + vxor 18, 30, 27 + .long 0x13DEF8C0 + vxor 19, 30, 27 + .long 0x13DEF8C0 + vxor 20, 30, 27 + .long 0x13DEF8C0 + vxor 21, 30, 27 + .long 0x13DEF8C0 + vxor 22, 30, 27 + + addi 12, 12, -128 + addi 11, 11, 128 + + bdnz .Loop_8x_block + + vor 30,29,29 + +.Loop_last_block: + cmpdi 12, 0 + beq aes_gcm_out + + + li 10, 16 + divdu 10, 12, 10 + + mtctr 10 + + lwz 10, 240(6) + + cmpdi 12, 16 + blt Final_block + +.macro .Loop_aes_middle_1x + xxlor 19+32, 1, 1 + xxlor 20+32, 2, 2 + xxlor 21+32, 3, 3 + xxlor 22+32, 4, 4 + + .long 0x11EF9D08 + .long 0x11EFA508 + .long 0x11EFAD08 + .long 0x11EFB508 + + xxlor 19+32, 5, 5 + xxlor 20+32, 6, 6 + xxlor 21+32, 7, 7 + xxlor 22+32, 8, 8 + + .long 0x11EF9D08 + .long 0x11EFA508 + .long 0x11EFAD08 + .long 0x11EFB508 + + xxlor 19+32, 9, 9 + .long 0x11EF9D08 +.endm + +Next_rem_block: + lxvb16x 15, 0, 14 + +.Loop_aes_middle_1x + + xxlor 23+32, 10, 10 + + cmpdi 10, 10 + beq Do_next_1x + + + xxlor 24+32, 11, 11 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 12, 12 + + cmpdi 10, 12 + beq Do_next_1x + + + xxlor 24+32, 13, 13 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 14, 14 + + cmpdi 10, 14 + beq Do_next_1x + +Do_next_1x: + .long 0x11EFBD09 + + xxlxor 47, 47, 15 + stxvb16x 47, 0, 9 + addi 14, 14, 16 + addi 9, 9, 16 + + vor 28,15,15 + ppc_update_hash_1x + + addi 12, 12, -16 + addi 11, 11, 16 + xxlor 19+32, 0, 0 + .long 0x13DEF8C0 + vxor 15, 30, 19 + + bdnz Next_rem_block + + cmpdi 12, 0 + beq aes_gcm_out + +Final_block: +.Loop_aes_middle_1x + + xxlor 23+32, 10, 10 + + cmpdi 10, 10 + beq Do_final_1x + + + xxlor 24+32, 11, 11 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 12, 12 + + cmpdi 10, 12 + beq Do_final_1x + + + xxlor 24+32, 13, 13 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 14, 14 + + cmpdi 10, 14 + beq Do_final_1x + +Do_final_1x: + .long 0x11EFBD09 + + lxvb16x 15, 0, 14 + xxlxor 47, 47, 15 + + + li 15, 16 + sub 15, 15, 12 + + vspltisb 16,-1 + vspltisb 17,0 + li 10, 192 + stvx 16, 10, 1 + addi 10, 10, 16 + stvx 17, 10, 1 + + addi 10, 1, 192 + lxvb16x 16, 15, 10 + xxland 47, 47, 16 + + vor 28,15,15 + ppc_update_hash_1x + + + bl Write_partial_block + + b aes_gcm_out + + + + + + + +Write_partial_block: + li 10, 192 + stxvb16x 15+32, 10, 1 + + + addi 10, 9, -1 + addi 16, 1, 191 + + mtctr 12 + li 15, 0 + +Write_last_byte: + lbzu 14, 1(16) + stbu 14, 1(10) + bdnz Write_last_byte + blr + +aes_gcm_out: + + stxvb16x 32, 0, 8 + add 3, 11, 12 + + li 9, 256 + lvx 20, 9, 1 + addi 9, 9, 16 + lvx 21, 9, 1 + addi 9, 9, 16 + lvx 22, 9, 1 + addi 9, 9, 16 + lvx 23, 9, 1 + addi 9, 9, 16 + lvx 24, 9, 1 + addi 9, 9, 16 + lvx 25, 9, 1 + addi 9, 9, 16 + lvx 26, 9, 1 + addi 9, 9, 16 + lvx 27, 9, 1 + addi 9, 9, 16 + lvx 28, 9, 1 + addi 9, 9, 16 + lvx 29, 9, 1 + addi 9, 9, 16 + lvx 30, 9, 1 + addi 9, 9, 16 + lvx 31, 9, 1 + + ld 0, 528(1) + ld 14,112(1) + ld 15,120(1) + ld 16,128(1) + ld 17,136(1) + ld 18,144(1) + ld 19,152(1) + ld 20,160(1) + ld 21,168(1) + + mtlr 0 + addi 1, 1, 512 + blr + + + + +.global ppc_aes_gcm_decrypt +.align 5 +ppc_aes_gcm_decrypt: +_ppc_aes_gcm_decrypt: + + stdu 1,-512(1) + mflr 0 + + std 14,112(1) + std 15,120(1) + std 16,128(1) + std 17,136(1) + std 18,144(1) + std 19,152(1) + std 20,160(1) + std 21,168(1) + li 9, 256 + stvx 20, 9, 1 + addi 9, 9, 16 + stvx 21, 9, 1 + addi 9, 9, 16 + stvx 22, 9, 1 + addi 9, 9, 16 + stvx 23, 9, 1 + addi 9, 9, 16 + stvx 24, 9, 1 + addi 9, 9, 16 + stvx 25, 9, 1 + addi 9, 9, 16 + stvx 26, 9, 1 + addi 9, 9, 16 + stvx 27, 9, 1 + addi 9, 9, 16 + stvx 28, 9, 1 + addi 9, 9, 16 + stvx 29, 9, 1 + addi 9, 9, 16 + stvx 30, 9, 1 + addi 9, 9, 16 + stvx 31, 9, 1 + std 0, 528(1) + + + lxvb16x 32, 0, 8 + + + li 10, 32 + lxvd2x 2+32, 10, 8 + li 10, 48 + lxvd2x 3+32, 10, 8 + li 10, 64 + lxvd2x 4+32, 10, 8 + li 10, 80 + lxvd2x 5+32, 10, 8 + + li 10, 96 + lxvd2x 6+32, 10, 8 + li 10, 112 + lxvd2x 7+32, 10, 8 + li 10, 128 + lxvd2x 8+32, 10, 8 + + li 10, 144 + lxvd2x 9+32, 10, 8 + li 10, 160 + lxvd2x 10+32, 10, 8 + li 10, 176 + lxvd2x 11+32, 10, 8 + + li 10, 192 + lxvd2x 12+32, 10, 8 + li 10, 208 + lxvd2x 13+32, 10, 8 + li 10, 224 + lxvd2x 14+32, 10, 8 + + + lxvb16x 30+32, 0, 7 + + mr 12, 5 + li 11, 0 + + + vxor 31, 31, 31 + vspltisb 22,1 + vsldoi 31, 31, 22,1 + + + lxv 0, 0(6) + lxv 1, 0x10(6) + lxv 2, 0x20(6) + lxv 3, 0x30(6) + lxv 4, 0x40(6) + lxv 5, 0x50(6) + lxv 6, 0x60(6) + lxv 7, 0x70(6) + lxv 8, 0x80(6) + lxv 9, 0x90(6) + lxv 10, 0xa0(6) + + + lwz 9,240(6) + + + + xxlor 32+29, 0, 0 + vxor 15, 30, 29 + + cmpdi 9, 10 + beq .Loop_aes_gcm_8x_dec + + + lxv 11, 0xb0(6) + lxv 12, 0xc0(6) + + cmpdi 9, 12 + beq .Loop_aes_gcm_8x_dec + + + lxv 13, 0xd0(6) + lxv 14, 0xe0(6) + cmpdi 9, 14 + beq .Loop_aes_gcm_8x_dec + + b aes_gcm_out + +.align 5 +.Loop_aes_gcm_8x_dec: + mr 14, 3 + mr 9, 4 + + + li 10, 128 + divdu 10, 5, 10 + cmpdi 10, 0 + beq .Loop_last_block_dec + + .long 0x13DEF8C0 + vxor 16, 30, 29 + .long 0x13DEF8C0 + vxor 17, 30, 29 + .long 0x13DEF8C0 + vxor 18, 30, 29 + .long 0x13DEF8C0 + vxor 19, 30, 29 + .long 0x13DEF8C0 + vxor 20, 30, 29 + .long 0x13DEF8C0 + vxor 21, 30, 29 + .long 0x13DEF8C0 + vxor 22, 30, 29 + + mtctr 10 + + li 15, 16 + li 16, 32 + li 17, 48 + li 18, 64 + li 19, 80 + li 20, 96 + li 21, 112 + + lwz 10, 240(6) + +.Loop_8x_block_dec: + + lxvb16x 15, 0, 14 + lxvb16x 16, 15, 14 + lxvb16x 17, 16, 14 + lxvb16x 18, 17, 14 + lxvb16x 19, 18, 14 + lxvb16x 20, 19, 14 + lxvb16x 21, 20, 14 + lxvb16x 22, 21, 14 + addi 14, 14, 128 + +.Loop_aes_middle8x + + xxlor 23+32, 10, 10 + + cmpdi 10, 10 + beq Do_last_aes_dec + + + xxlor 24+32, 11, 11 + + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 + + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + xxlor 23+32, 12, 12 + + cmpdi 10, 12 + beq Do_last_aes_dec + + + xxlor 24+32, 13, 13 + + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 + + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + xxlor 23+32, 14, 14 + + cmpdi 10, 14 + beq Do_last_aes_dec + b aes_gcm_out + +Do_last_aes_dec: + + + + .long 0x11EFBD09 + .long 0x1210BD09 + + xxlxor 47, 47, 15 + stxvb16x 47, 0, 9 + xxlxor 48, 48, 16 + stxvb16x 48, 15, 9 + + .long 0x1231BD09 + .long 0x1252BD09 + + xxlxor 49, 49, 17 + stxvb16x 49, 16, 9 + xxlxor 50, 50, 18 + stxvb16x 50, 17, 9 + + .long 0x1273BD09 + .long 0x1294BD09 + + xxlxor 51, 51, 19 + stxvb16x 51, 18, 9 + xxlxor 52, 52, 20 + stxvb16x 52, 19, 9 + + .long 0x12B5BD09 + .long 0x12D6BD09 + + xxlxor 53, 53, 21 + stxvb16x 53, 20, 9 + xxlxor 54, 54, 22 + stxvb16x 54, 21, 9 + + addi 9, 9, 128 + + xxlor 15+32, 15, 15 + xxlor 16+32, 16, 16 + xxlor 17+32, 17, 17 + xxlor 18+32, 18, 18 + xxlor 19+32, 19, 19 + xxlor 20+32, 20, 20 + xxlor 21+32, 21, 21 + xxlor 22+32, 22, 22 + + + ppc_aes_gcm_ghash2_4x + + xxlor 27+32, 0, 0 + .long 0x13DEF8C0 + vor 29,30,30 + vxor 15, 30, 27 + .long 0x13DEF8C0 + vxor 16, 30, 27 + .long 0x13DEF8C0 + vxor 17, 30, 27 + .long 0x13DEF8C0 + vxor 18, 30, 27 + .long 0x13DEF8C0 + vxor 19, 30, 27 + .long 0x13DEF8C0 + vxor 20, 30, 27 + .long 0x13DEF8C0 + vxor 21, 30, 27 + .long 0x13DEF8C0 + vxor 22, 30, 27 + addi 12, 12, -128 + addi 11, 11, 128 + + bdnz .Loop_8x_block_dec + + vor 30,29,29 + +.Loop_last_block_dec: + cmpdi 12, 0 + beq aes_gcm_out + + + li 10, 16 + divdu 10, 12, 10 + + mtctr 10 + + lwz 10,240(6) + + cmpdi 12, 16 + blt Final_block_dec + +Next_rem_block_dec: + lxvb16x 15, 0, 14 + +.Loop_aes_middle_1x + + xxlor 23+32, 10, 10 + + cmpdi 10, 10 + beq Do_next_1x_dec + + + xxlor 24+32, 11, 11 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 12, 12 + + cmpdi 10, 12 + beq Do_next_1x_dec + + + xxlor 24+32, 13, 13 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 14, 14 + + cmpdi 10, 14 + beq Do_next_1x_dec + +Do_next_1x_dec: + .long 0x11EFBD09 + + xxlxor 47, 47, 15 + stxvb16x 47, 0, 9 + addi 14, 14, 16 + addi 9, 9, 16 + + xxlor 28+32, 15, 15 + ppc_update_hash_1x + + addi 12, 12, -16 + addi 11, 11, 16 + xxlor 19+32, 0, 0 + .long 0x13DEF8C0 + vxor 15, 30, 19 + + bdnz Next_rem_block_dec + + cmpdi 12, 0 + beq aes_gcm_out + +Final_block_dec: +.Loop_aes_middle_1x + + xxlor 23+32, 10, 10 + + cmpdi 10, 10 + beq Do_final_1x_dec + + + xxlor 24+32, 11, 11 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 12, 12 + + cmpdi 10, 12 + beq Do_final_1x_dec + + + xxlor 24+32, 13, 13 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 14, 14 + + cmpdi 10, 14 + beq Do_final_1x_dec + +Do_final_1x_dec: + .long 0x11EFBD09 + + lxvb16x 15, 0, 14 + xxlxor 47, 47, 15 + + + li 15, 16 + sub 15, 15, 12 + + vspltisb 16,-1 + vspltisb 17,0 + li 10, 192 + stvx 16, 10, 1 + addi 10, 10, 16 + stvx 17, 10, 1 + + addi 10, 1, 192 + lxvb16x 16, 15, 10 + xxland 47, 47, 16 + + xxlor 28+32, 15, 15 + ppc_update_hash_1x + + + bl Write_partial_block + + b aes_gcm_out diff --git a/contrib/openssl-cmake/asm/crypto/modes/aes-gcm-riscv64-zvkb-zvkg-zvkned.S b/contrib/openssl-cmake/asm/crypto/modes/aes-gcm-riscv64-zvkb-zvkg-zvkned.S new file mode 100644 index 000000000000..6c1c5e1c7e28 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/modes/aes-gcm-riscv64-zvkb-zvkg-zvkned.S @@ -0,0 +1,1540 @@ +.text +.p2align 3 +.globl rv64i_zvkb_zvkg_zvkned_aes_gcm_encrypt +.type rv64i_zvkb_zvkg_zvkned_aes_gcm_encrypt,@function +rv64i_zvkb_zvkg_zvkned_aes_gcm_encrypt: + srli t0, a2, 4 + beqz t0, .Lenc_end + slli t5, t0, 2 + + mv a7, t5 + + # Compute the AES-GCM full-block e32 length for `LMUL=4`. We will handle + # the multiple AES-GCM blocks at the same time within `LMUL=4` register. + # The AES-GCM's SEW is e32 and EGW is 128 bits. + # FULL_BLOCK_LEN32 = (VLEN*LMUL)/(EGW) * (EGW/SEW) = (VLEN*4)/(32*4) * 4 + # = (VLEN*4)/32 + # We could get the block_num using the VL value of `vsetvli with e32, m4`. + .word 220231767 + # If `LEN32 % FULL_BLOCK_LEN32` is not equal to zero, we could fill the + # zero padding data to make sure we could always handle FULL_BLOCK_LEN32 + # blocks for all iterations. + + ## Prepare the H^n multiplier in v16 for GCM multiplier. The `n` is the gcm + ## block number in a LMUL=4 register group. + ## n = ((VLEN*LMUL)/(32*4)) = ((VLEN*4)/(32*4)) + ## = (VLEN/32) + ## We could use vsetvli with `e32, m1` to compute the `n` number. + .word 218133207 + + # The H is at `gcm128_context.Htable[0]`(addr(Xi)+16*2). + addi t1, a5, 32 + .word 3439489111 + .word 33779591 + + # Compute the H^n + li t1, 1 +1: + .word 2750984183 + slli t1, t1, 1 + bltu t1, t0, 1b + + .word 220754007 + .word 1577072727 + .word 2817763447 + + #### Load plaintext into v24 and handle padding. We also load the init tag + #### data into v20 and prepare the AES ctr input data into v12 and v28. + .word 1577073239 + + ## Prepare the AES ctr input data into v12. + # Setup ctr input mask. + # ctr mask : [000100010001....] + # Note: The actual vl should be `FULL_BLOCK_LEN32/4 * 2`, but we just use + # `FULL_BLOCK_LEN32` here. + .word 201879639 + li t0, 0b10001000 + .word 1577238615 + # Load IV. + .word 3439489111 + .word 34041735 + # Convert the big-endian counter into little-endian. + .word 3305271383 + .word 1240772567 + # Splat the `single block of IV` to v12 + .word 220754007 + .word 1577072215 + .word 2817762935 + # Prepare the ctr counter into v8 + # v8: [x, x, x, 0, x, x, x, 1, x, x, x, 2, ...] + .word 1342710871 + # Merge IV and ctr counter into v12. + # v12:[x, x, x, count+0, x, x, x, count+1, ...] + .word 86536279 + .word 12846679 + + li t4, 0 + # Get the SEW32 size in the first round. + # If we have the non-zero value for `LEN32&(FULL_BLOCK_LEN32-1)`, then + # we will have the leading padding zero. + addi t0, a6, -1 + and t0, t0, t5 + beqz t0, 1f + + ## with padding + sub t5, t5, t0 + sub t4, a6, t0 + # padding block size + srli t1, t4, 2 + # padding byte size + slli t2, t4, 2 + + # Adjust the ctr counter to make the counter start from `counter+0` for the + # first non-padding block. + .word 86536279 + .word 147015255 + # Prepare the AES ctr input into v28. + # The ctr data uses big-endian form. + .word 1577455191 + .word 1237626455 + + # Prepare the mask for input loading in the first round. We use + # `VL=FULL_BLOCK_LEN32` with the mask in the first round. + # Adjust input ptr. + sub a0, a0, t2 + # Adjust output ptr. + sub a1, a1, t2 + .word 211316823 + .word 1376297303 + # We don't use the pseudo instruction `vmsgeu` here. Use `vmsgtu` instead. + # The original code is: + # vmsgeu.vx v0, v2, t4 + addi t0, t4, -1 + .word 2049097815 + .word 220754007 + .word 1577073751 + # Load the input for length FULL_BLOCK_LEN32 with mask. + .word 86536279 + .word 355335 + + # Load the init `Xi` data to v20 with preceding zero padding. + # Adjust Xi ptr. + sub t0, a5, t2 + # Load for length `zero-padding-e32-length + 4`. + addi t1, t4, 4 + .word 19099735 + .word 190983 + j 2f + +1: + ## without padding + sub t5, t5, a6 + + .word 220754007 + .word 33909767 + + # Load the init Xi data to v20. + .word 3372380247 + .word 34073095 + + # Prepare the AES ctr input into v28. + # The ctr data uses big-endian form. + .word 86536279 + .word 1577455191 + .word 1237626455 +2: + + + # Load number of rounds + lwu t0, 240(a3) + li t1, 14 + li t2, 12 + li t3, 10 + + beq t0, t1, aes_gcm_enc_blocks_256 + beq t0, t2, aes_gcm_enc_blocks_192 + beq t0, t3, aes_gcm_enc_blocks_128 + +.Lenc_end: + li a0, 0 + ret + +.size rv64i_zvkb_zvkg_zvkned_aes_gcm_encrypt,.-rv64i_zvkb_zvkg_zvkned_aes_gcm_encrypt +.p2align 3 +aes_gcm_enc_blocks_128: + srli t6, a6, 2 + slli t0, a6, 2 + + # Load all 11 aes round keys to v1-v11 registers. + .word 3439489111 + .word 34005127 + addi a3, a3, 16 + .word 34005255 + addi a3, a3, 16 + .word 34005383 + addi a3, a3, 16 + .word 34005511 + addi a3, a3, 16 + .word 34005639 + addi a3, a3, 16 + .word 34005767 + addi a3, a3, 16 + .word 34005895 + addi a3, a3, 16 + .word 34006023 + addi a3, a3, 16 + .word 34006151 + addi a3, a3, 16 + .word 34006279 + addi a3, a3, 16 + .word 34006407 + + # We already have the ciphertext/plaintext and ctr data for the first round. + .word 220754007 + .word 2786307703 + .word 2787192439 + .word 2788241015 + .word 2789289591 + .word 2790338167 + .word 2791386743 + .word 2792435319 + .word 2793483895 + .word 2794532471 + .word 2795581047 + .word 2796662391 + + + # Compute AES ctr result. + .word 801902167 + + bnez t4, 1f + + ## without padding + # Store ciphertext/plaintext + .word 33943079 + j 2f + + ## with padding +1: + # Store ciphertext/plaintext using mask + .word 388647 + + # Fill zero for the padding blocks + .word 154071127 + .word 1577074263 + + # We have used mask register for `INPUT_PADDING_MASK` before. We need to + # setup the ctr mask back. + # ctr mask : [000100010001....] + .word 201879639 + li t1, 0b10001000 + .word 1577271383 +2: + + + + add a0, a0, t0 + add a1, a1, t0 + + + .word 220754007 + +.Lenc_blocks_128: + # Compute the partial tags. + # The partial tags will multiply with [H^n, H^n, ..., H^n] + # [tag0, tag1, ...] = + # ([tag0, tag1, ...] + [ciphertext0, ciphertext1, ...] * [H^n, H^n, ..., H^n] + # We will skip the [H^n, H^n, ..., H^n] multiplication for the last round. + beqz t5, .Lenc_blocks_128_end + .word 3004050039 + + .word 86536279 + # Increase ctr in v12. + .word 13616727 + sub t5, t5, a6 + # Load plaintext into v24 + .word 220229719 + .word 33909767 + # Prepare the AES ctr input into v28. + # The ctr data uses big-endian form. + .word 1577455191 + add a0, a0, t0 + .word 86011991 + .word 1237626455 + + + .word 220754007 + .word 2786307703 + .word 2787192439 + .word 2788241015 + .word 2789289591 + .word 2790338167 + .word 2791386743 + .word 2792435319 + .word 2793483895 + .word 2794532471 + .word 2795581047 + .word 2796662391 + + + # Compute AES ctr ciphertext result. + .word 801902167 + + # Store ciphertext + .word 33943079 + add a1, a1, t0 + + j .Lenc_blocks_128 +.Lenc_blocks_128_end: + + # Add ciphertext into partial tag + .word 793643607 + + .word 3441586263 + # Update current ctr value to v12 + .word 13616727 + # Convert ctr to big-endian counter. + .word 1220847191 + .word 484903 + + + # The H is at `gcm128_context.Htable[0]` (addr(Xi)+16*2). + # Load H to v1 + addi t1, a5, 32 + .word 3439489111 + .word 33775751 + # Multiply H for each partial tag and XOR them together. + # Handle 1st partial tag + .word 1577713751 + .word 2719522935 + # Handle 2nd to N-th partial tags + li t1, 4 +1: + .word 3441586263 + .word 1061372503 + .word 3439489111 + .word 2987532407 + addi t1, t1, 4 + blt t1, a6, 1b + + + # Save the final tag + .word 34070567 + + # return the processed size. + slli a0, a7, 2 + ret +.size aes_gcm_enc_blocks_128,.-aes_gcm_enc_blocks_128 +.p2align 3 +aes_gcm_enc_blocks_192: + srli t6, a6, 2 + slli t0, a6, 2 + + # We run out of 32 vector registers, so we just preserve some round keys + # and load the remaining round keys inside the aes body. + # We keep the round keys for: + # 1, 2, 3, 5, 6, 7, 9, 10, 11 and 12th keys. + # The following keys will be loaded in the aes body: + # 4, 8 and 13th keys. + .word 3439489111 + # key 1 + .word 34005127 + # key 2 + addi t1, a3, 16 + .word 33775879 + # key 3 + addi t1, a3, 32 + .word 33776007 + # key 5 + addi t1, a3, 64 + .word 33776135 + # key 6 + addi t1, a3, 80 + .word 33776263 + # key 7 + addi t1, a3, 96 + .word 33776391 + # key 9 + addi t1, a3, 128 + .word 33776519 + # key 10 + addi t1, a3, 144 + .word 33776647 + # key 11 + addi t1, a3, 160 + .word 33776775 + # key 12 + addi t1, a3, 176 + .word 33776903 + + # We already have the ciphertext/plaintext and ctr data for the first round. + # Load key 4 + .word 3439489111 + addi t1, a3, 48 + .word 33777031 + .word 220754007 + .word 2786307703 + .word 2787192439 + .word 2788241015 + .word 2796629623 + # Load key 8 + .word 3439489111 + addi t1, a3, 112 + .word 33777031 + .word 220754007 + .word 2789289591 + .word 2790338167 + .word 2791386743 + .word 2796629623 + # Load key 13 + .word 3439489111 + addi t1, a3, 192 + .word 33777031 + .word 220754007 + .word 2792435319 + .word 2793483895 + .word 2794532471 + .word 2795581047 + .word 2796662391 + + + # Compute AES ctr result. + .word 801902167 + + bnez t4, 1f + + ## without padding + # Store ciphertext/plaintext + .word 33943079 + j 2f + + ## with padding +1: + # Store ciphertext/plaintext using mask + .word 388647 + + # Fill zero for the padding blocks + .word 154071127 + .word 1577074263 + + # We have used mask register for `INPUT_PADDING_MASK` before. We need to + # setup the ctr mask back. + # ctr mask : [000100010001....] + .word 201879639 + li t1, 0b10001000 + .word 1577271383 +2: + + + + add a0, a0, t0 + add a1, a1, t0 + + + .word 220754007 + +.Lenc_blocks_192: + # Compute the partial tags. + # The partial tags will multiply with [H^n, H^n, ..., H^n] + # [tag0, tag1, ...] = + # ([tag0, tag1, ...] + [ciphertext0, ciphertext1, ...] * [H^n, H^n, ..., H^n] + # We will skip the [H^n, H^n, ..., H^n] multiplication for the last round. + beqz t5, .Lenc_blocks_192_end + .word 3004050039 + + .word 86536279 + # Increase ctr in v12. + .word 13616727 + sub t5, t5, a6 + # Load plaintext into v24 + .word 220229719 + .word 33909767 + # Prepare the AES ctr input into v28. + # The ctr data uses big-endian form. + .word 1577455191 + add a0, a0, t0 + .word 86011991 + .word 1237626455 + + + # Load key 4 + .word 3439489111 + addi t1, a3, 48 + .word 33777031 + .word 220754007 + .word 2786307703 + .word 2787192439 + .word 2788241015 + .word 2796629623 + # Load key 8 + .word 3439489111 + addi t1, a3, 112 + .word 33777031 + .word 220754007 + .word 2789289591 + .word 2790338167 + .word 2791386743 + .word 2796629623 + # Load key 13 + .word 3439489111 + addi t1, a3, 192 + .word 33777031 + .word 220754007 + .word 2792435319 + .word 2793483895 + .word 2794532471 + .word 2795581047 + .word 2796662391 + + + # Compute AES ctr ciphertext result. + .word 801902167 + + # Store ciphertext + .word 33943079 + add a1, a1, t0 + + j .Lenc_blocks_192 +.Lenc_blocks_192_end: + + # Add ciphertext into partial tag + .word 793643607 + + .word 3441586263 + # Update current ctr value to v12 + .word 13616727 + # Convert ctr to big-endian counter. + .word 1220847191 + .word 484903 + + + # The H is at `gcm128_context.Htable[0]` (addr(Xi)+16*2). + # Load H to v1 + addi t1, a5, 32 + .word 3439489111 + .word 33775751 + # Multiply H for each partial tag and XOR them together. + # Handle 1st partial tag + .word 1577713751 + .word 2719522935 + # Handle 2nd to N-th partial tags + li t1, 4 +1: + .word 3441586263 + .word 1061372503 + .word 3439489111 + .word 2987532407 + addi t1, t1, 4 + blt t1, a6, 1b + + + # Save the final tag + .word 34070567 + + # return the processed size. + slli a0, a7, 2 + ret +.size aes_gcm_enc_blocks_192,.-aes_gcm_enc_blocks_192 +.p2align 3 +aes_gcm_enc_blocks_256: + srli t6, a6, 2 + slli t0, a6, 2 + + # We run out of 32 vector registers, so we just preserve some round keys + # and load the remaining round keys inside the aes body. + # We keep the round keys for: + # 1, 2, 4, 5, 7, 8, 10, 11, 13 and 14th keys. + # The following keys will be loaded in the aes body: + # 3, 6, 9, 12 and 15th keys. + .word 3439489111 + # key 1 + .word 34005127 + # key 2 + addi t1, a3, 16 + .word 33775879 + # key 4 + addi t1, a3, 48 + .word 33776007 + # key 5 + addi t1, a3, 64 + .word 33776135 + # key 7 + addi t1, a3, 96 + .word 33776263 + # key 8 + addi t1, a3, 112 + .word 33776391 + # key 10 + addi t1, a3, 144 + .word 33776519 + # key 11 + addi t1, a3, 160 + .word 33776647 + # key 13 + addi t1, a3, 192 + .word 33776775 + # key 14 + addi t1, a3, 208 + .word 33776903 + + # We already have the ciphertext/plaintext and ctr data for the first round. + # Load key 3 + .word 3439489111 + addi t1, a3, 32 + .word 33777031 + .word 220754007 + .word 2786307703 + .word 2787192439 + .word 2796629623 + # Load key 6 + .word 3439489111 + addi t1, a3, 80 + .word 33777031 + .word 220754007 + .word 2788241015 + .word 2789289591 + .word 2796629623 + # Load key 9 + .word 3439489111 + addi t1, a3, 128 + .word 33777031 + .word 220754007 + .word 2790338167 + .word 2791386743 + .word 2796629623 + # Load key 12 + .word 3439489111 + addi t1, a3, 176 + .word 33777031 + .word 220754007 + .word 2792435319 + .word 2793483895 + .word 2796629623 + # Load key 15 + .word 3439489111 + addi t1, a3, 224 + .word 33777031 + .word 220754007 + .word 2794532471 + .word 2795581047 + .word 2796662391 + + + # Compute AES ctr result. + .word 801902167 + + bnez t4, 1f + + ## without padding + # Store ciphertext/plaintext + .word 33943079 + j 2f + + ## with padding +1: + # Store ciphertext/plaintext using mask + .word 388647 + + # Fill zero for the padding blocks + .word 154071127 + .word 1577074263 + + # We have used mask register for `INPUT_PADDING_MASK` before. We need to + # setup the ctr mask back. + # ctr mask : [000100010001....] + .word 201879639 + li t1, 0b10001000 + .word 1577271383 +2: + + + + add a0, a0, t0 + add a1, a1, t0 + + + .word 220754007 + +.Lenc_blocks_256: + # Compute the partial tags. + # The partial tags will multiply with [H^n, H^n, ..., H^n] + # [tag0, tag1, ...] = + # ([tag0, tag1, ...] + [ciphertext0, ciphertext1, ...] * [H^n, H^n, ..., H^n] + # We will skip the [H^n, H^n, ..., H^n] multiplication for the last round. + beqz t5, .Lenc_blocks_256_end + .word 3004050039 + + .word 86536279 + # Increase ctr in v12. + .word 13616727 + sub t5, t5, a6 + # Load plaintext into v24 + .word 220229719 + .word 33909767 + # Prepare the AES ctr input into v28. + # The ctr data uses big-endian form. + .word 1577455191 + add a0, a0, t0 + .word 86011991 + .word 1237626455 + + + # Load key 3 + .word 3439489111 + addi t1, a3, 32 + .word 33777031 + .word 220754007 + .word 2786307703 + .word 2787192439 + .word 2796629623 + # Load key 6 + .word 3439489111 + addi t1, a3, 80 + .word 33777031 + .word 220754007 + .word 2788241015 + .word 2789289591 + .word 2796629623 + # Load key 9 + .word 3439489111 + addi t1, a3, 128 + .word 33777031 + .word 220754007 + .word 2790338167 + .word 2791386743 + .word 2796629623 + # Load key 12 + .word 3439489111 + addi t1, a3, 176 + .word 33777031 + .word 220754007 + .word 2792435319 + .word 2793483895 + .word 2796629623 + # Load key 15 + .word 3439489111 + addi t1, a3, 224 + .word 33777031 + .word 220754007 + .word 2794532471 + .word 2795581047 + .word 2796662391 + + + # Compute AES ctr ciphertext result. + .word 801902167 + + # Store ciphertext + .word 33943079 + add a1, a1, t0 + + j .Lenc_blocks_256 +.Lenc_blocks_256_end: + + # Add ciphertext into partial tag + .word 793643607 + + .word 3441586263 + # Update current ctr value to v12 + .word 13616727 + # Convert ctr to big-endian counter. + .word 1220847191 + .word 484903 + + + # The H is at `gcm128_context.Htable[0]` (addr(Xi)+16*2). + # Load H to v1 + addi t1, a5, 32 + .word 3439489111 + .word 33775751 + # Multiply H for each partial tag and XOR them together. + # Handle 1st partial tag + .word 1577713751 + .word 2719522935 + # Handle 2nd to N-th partial tags + li t1, 4 +1: + .word 3441586263 + .word 1061372503 + .word 3439489111 + .word 2987532407 + addi t1, t1, 4 + blt t1, a6, 1b + + + # Save the final tag + .word 34070567 + + # return the processed size. + slli a0, a7, 2 + ret +.size aes_gcm_enc_blocks_256,.-aes_gcm_enc_blocks_256 +.p2align 3 +.globl rv64i_zvkb_zvkg_zvkned_aes_gcm_decrypt +.type rv64i_zvkb_zvkg_zvkned_aes_gcm_decrypt,@function +rv64i_zvkb_zvkg_zvkned_aes_gcm_decrypt: + srli t0, a2, 4 + beqz t0, .Ldec_end + slli t5, t0, 2 + + mv a7, t5 + + # Compute the AES-GCM full-block e32 length for `LMUL=4`. We will handle + # the multiple AES-GCM blocks at the same time within `LMUL=4` register. + # The AES-GCM's SEW is e32 and EGW is 128 bits. + # FULL_BLOCK_LEN32 = (VLEN*LMUL)/(EGW) * (EGW/SEW) = (VLEN*4)/(32*4) * 4 + # = (VLEN*4)/32 + # We could get the block_num using the VL value of `vsetvli with e32, m4`. + .word 220231767 + # If `LEN32 % FULL_BLOCK_LEN32` is not equal to zero, we could fill the + # zero padding data to make sure we could always handle FULL_BLOCK_LEN32 + # blocks for all iterations. + + ## Prepare the H^n multiplier in v16 for GCM multiplier. The `n` is the gcm + ## block number in a LMUL=4 register group. + ## n = ((VLEN*LMUL)/(32*4)) = ((VLEN*4)/(32*4)) + ## = (VLEN/32) + ## We could use vsetvli with `e32, m1` to compute the `n` number. + .word 218133207 + + # The H is at `gcm128_context.Htable[0]`(addr(Xi)+16*2). + addi t1, a5, 32 + .word 3439489111 + .word 33779591 + + # Compute the H^n + li t1, 1 +1: + .word 2750984183 + slli t1, t1, 1 + bltu t1, t0, 1b + + .word 220754007 + .word 1577072727 + .word 2817763447 + + #### Load plaintext into v24 and handle padding. We also load the init tag + #### data into v20 and prepare the AES ctr input data into v12 and v28. + .word 1577073239 + + ## Prepare the AES ctr input data into v12. + # Setup ctr input mask. + # ctr mask : [000100010001....] + # Note: The actual vl should be `FULL_BLOCK_LEN32/4 * 2`, but we just use + # `FULL_BLOCK_LEN32` here. + .word 201879639 + li t0, 0b10001000 + .word 1577238615 + # Load IV. + .word 3439489111 + .word 34041735 + # Convert the big-endian counter into little-endian. + .word 3305271383 + .word 1240772567 + # Splat the `single block of IV` to v12 + .word 220754007 + .word 1577072215 + .word 2817762935 + # Prepare the ctr counter into v8 + # v8: [x, x, x, 0, x, x, x, 1, x, x, x, 2, ...] + .word 1342710871 + # Merge IV and ctr counter into v12. + # v12:[x, x, x, count+0, x, x, x, count+1, ...] + .word 86536279 + .word 12846679 + + li t4, 0 + # Get the SEW32 size in the first round. + # If we have the non-zero value for `LEN32&(FULL_BLOCK_LEN32-1)`, then + # we will have the leading padding zero. + addi t0, a6, -1 + and t0, t0, t5 + beqz t0, 1f + + ## with padding + sub t5, t5, t0 + sub t4, a6, t0 + # padding block size + srli t1, t4, 2 + # padding byte size + slli t2, t4, 2 + + # Adjust the ctr counter to make the counter start from `counter+0` for the + # first non-padding block. + .word 86536279 + .word 147015255 + # Prepare the AES ctr input into v28. + # The ctr data uses big-endian form. + .word 1577455191 + .word 1237626455 + + # Prepare the mask for input loading in the first round. We use + # `VL=FULL_BLOCK_LEN32` with the mask in the first round. + # Adjust input ptr. + sub a0, a0, t2 + # Adjust output ptr. + sub a1, a1, t2 + .word 211316823 + .word 1376297303 + # We don't use the pseudo instruction `vmsgeu` here. Use `vmsgtu` instead. + # The original code is: + # vmsgeu.vx v0, v2, t4 + addi t0, t4, -1 + .word 2049097815 + .word 220754007 + .word 1577073751 + # Load the input for length FULL_BLOCK_LEN32 with mask. + .word 86536279 + .word 355335 + + # Load the init `Xi` data to v20 with preceding zero padding. + # Adjust Xi ptr. + sub t0, a5, t2 + # Load for length `zero-padding-e32-length + 4`. + addi t1, t4, 4 + .word 19099735 + .word 190983 + j 2f + +1: + ## without padding + sub t5, t5, a6 + + .word 220754007 + .word 33909767 + + # Load the init Xi data to v20. + .word 3372380247 + .word 34073095 + + # Prepare the AES ctr input into v28. + # The ctr data uses big-endian form. + .word 86536279 + .word 1577455191 + .word 1237626455 +2: + + + # Load number of rounds + lwu t0, 240(a3) + li t1, 14 + li t2, 12 + li t3, 10 + + beq t0, t1, aes_gcm_dec_blocks_256 + beq t0, t2, aes_gcm_dec_blocks_192 + beq t0, t3, aes_gcm_dec_blocks_128 + +.Ldec_end: + li a0, 0 + ret +.size rv64i_zvkb_zvkg_zvkned_aes_gcm_decrypt,.-rv64i_zvkb_zvkg_zvkned_aes_gcm_decrypt +.p2align 3 +aes_gcm_dec_blocks_128: + srli t6, a6, 2 + slli t0, a6, 2 + + # Load all 11 aes round keys to v1-v11 registers. + .word 3439489111 + .word 34005127 + addi a3, a3, 16 + .word 34005255 + addi a3, a3, 16 + .word 34005383 + addi a3, a3, 16 + .word 34005511 + addi a3, a3, 16 + .word 34005639 + addi a3, a3, 16 + .word 34005767 + addi a3, a3, 16 + .word 34005895 + addi a3, a3, 16 + .word 34006023 + addi a3, a3, 16 + .word 34006151 + addi a3, a3, 16 + .word 34006279 + addi a3, a3, 16 + .word 34006407 + + # We already have the ciphertext/plaintext and ctr data for the first round. + .word 220754007 + .word 2786307703 + .word 2787192439 + .word 2788241015 + .word 2789289591 + .word 2790338167 + .word 2791386743 + .word 2792435319 + .word 2793483895 + .word 2794532471 + .word 2795581047 + .word 2796662391 + + + # Compute AES ctr result. + .word 801902167 + + bnez t4, 1f + + ## without padding + # Store ciphertext/plaintext + .word 33943079 + j 2f + + ## with padding +1: + # Store ciphertext/plaintext using mask + .word 388647 + + # Fill zero for the padding blocks + .word 154071127 + .word 1577074263 + + # We have used mask register for `INPUT_PADDING_MASK` before. We need to + # setup the ctr mask back. + # ctr mask : [000100010001....] + .word 201879639 + li t1, 0b10001000 + .word 1577271383 +2: + + + + add a0, a0, t0 + add a1, a1, t0 + + + .word 220754007 + +.Ldec_blocks_128: + # Compute the partial tags. + # The partial tags will multiply with [H^n, H^n, ..., H^n] + # [tag0, tag1, ...] = + # ([tag0, tag1, ...] + [ciphertext0, ciphertext1, ...] * [H^n, H^n, ..., H^n] + # We will skip the [H^n, H^n, ..., H^n] multiplication for the last round. + beqz t5, .Ldec_blocks_256_end + .word 3003918967 + + .word 86536279 + # Increase ctr in v12. + .word 13616727 + sub t5, t5, a6 + # Load plaintext into v24 + .word 220229719 + .word 33909767 + # Prepare the AES ctr input into v28. + # The ctr data uses big-endian form. + .word 1577455191 + add a0, a0, t0 + .word 86011991 + .word 1237626455 + + + .word 220754007 + .word 2786307703 + .word 2787192439 + .word 2788241015 + .word 2789289591 + .word 2790338167 + .word 2791386743 + .word 2792435319 + .word 2793483895 + .word 2794532471 + .word 2795581047 + .word 2796662391 + + + # Compute AES ctr plaintext result. + .word 801902167 + + # Store plaintext + .word 33943079 + add a1, a1, t0 + + j .Ldec_blocks_128 +.Ldec_blocks_128_end: + + # Add ciphertext into partial tag + .word 793512535 + + .word 3441586263 + # Update current ctr value to v12 + .word 13616727 + # Convert ctr to big-endian counter. + .word 1220847191 + .word 484903 + + + # The H is at `gcm128_context.Htable[0]` (addr(Xi)+16*2). + # Load H to v1 + addi t1, a5, 32 + .word 3439489111 + .word 33775751 + # Multiply H for each partial tag and XOR them together. + # Handle 1st partial tag + .word 1577713751 + .word 2719522935 + # Handle 2nd to N-th partial tags + li t1, 4 +1: + .word 3441586263 + .word 1061372503 + .word 3439489111 + .word 2987532407 + addi t1, t1, 4 + blt t1, a6, 1b + + + # Save the final tag + .word 34070567 + + # return the processed size. + slli a0, a7, 2 + ret +.size aes_gcm_dec_blocks_128,.-aes_gcm_dec_blocks_128 +.p2align 3 +aes_gcm_dec_blocks_192: + srli t6, a6, 2 + slli t0, a6, 2 + + # We run out of 32 vector registers, so we just preserve some round keys + # and load the remaining round keys inside the aes body. + # We keep the round keys for: + # 1, 2, 3, 5, 6, 7, 9, 10, 11 and 12th keys. + # The following keys will be loaded in the aes body: + # 4, 8 and 13th keys. + .word 3439489111 + # key 1 + .word 34005127 + # key 2 + addi t1, a3, 16 + .word 33775879 + # key 3 + addi t1, a3, 32 + .word 33776007 + # key 5 + addi t1, a3, 64 + .word 33776135 + # key 6 + addi t1, a3, 80 + .word 33776263 + # key 7 + addi t1, a3, 96 + .word 33776391 + # key 9 + addi t1, a3, 128 + .word 33776519 + # key 10 + addi t1, a3, 144 + .word 33776647 + # key 11 + addi t1, a3, 160 + .word 33776775 + # key 12 + addi t1, a3, 176 + .word 33776903 + + # We already have the ciphertext/plaintext and ctr data for the first round. + # Load key 4 + .word 3439489111 + addi t1, a3, 48 + .word 33777031 + .word 220754007 + .word 2786307703 + .word 2787192439 + .word 2788241015 + .word 2796629623 + # Load key 8 + .word 3439489111 + addi t1, a3, 112 + .word 33777031 + .word 220754007 + .word 2789289591 + .word 2790338167 + .word 2791386743 + .word 2796629623 + # Load key 13 + .word 3439489111 + addi t1, a3, 192 + .word 33777031 + .word 220754007 + .word 2792435319 + .word 2793483895 + .word 2794532471 + .word 2795581047 + .word 2796662391 + + + # Compute AES ctr result. + .word 801902167 + + bnez t4, 1f + + ## without padding + # Store ciphertext/plaintext + .word 33943079 + j 2f + + ## with padding +1: + # Store ciphertext/plaintext using mask + .word 388647 + + # Fill zero for the padding blocks + .word 154071127 + .word 1577074263 + + # We have used mask register for `INPUT_PADDING_MASK` before. We need to + # setup the ctr mask back. + # ctr mask : [000100010001....] + .word 201879639 + li t1, 0b10001000 + .word 1577271383 +2: + + + + add a0, a0, t0 + add a1, a1, t0 + + + .word 220754007 + +.Ldec_blocks_192: + # Compute the partial tags. + # The partial tags will multiply with [H^n, H^n, ..., H^n] + # [tag0, tag1, ...] = + # ([tag0, tag1, ...] + [ciphertext0, ciphertext1, ...] * [H^n, H^n, ..., H^n] + # We will skip the [H^n, H^n, ..., H^n] multiplication for the last round. + beqz t5, .Ldec_blocks_192_end + .word 3003918967 + + .word 86536279 + # Increase ctr in v12. + .word 13616727 + sub t5, t5, a6 + # Load plaintext into v24 + .word 220229719 + .word 33909767 + # Prepare the AES ctr input into v28. + # The ctr data uses big-endian form. + .word 1577455191 + add a0, a0, t0 + .word 86011991 + .word 1237626455 + + + # Load key 4 + .word 3439489111 + addi t1, a3, 48 + .word 33777031 + .word 220754007 + .word 2786307703 + .word 2787192439 + .word 2788241015 + .word 2796629623 + # Load key 8 + .word 3439489111 + addi t1, a3, 112 + .word 33777031 + .word 220754007 + .word 2789289591 + .word 2790338167 + .word 2791386743 + .word 2796629623 + # Load key 13 + .word 3439489111 + addi t1, a3, 192 + .word 33777031 + .word 220754007 + .word 2792435319 + .word 2793483895 + .word 2794532471 + .word 2795581047 + .word 2796662391 + + + # Compute AES ctr plaintext result. + .word 801902167 + + # Store plaintext + .word 33943079 + add a1, a1, t0 + + j .Ldec_blocks_192 +.Ldec_blocks_192_end: + + # Add ciphertext into partial tag + .word 793512535 + + .word 3441586263 + # Update current ctr value to v12 + .word 13616727 + # Convert ctr to big-endian counter. + .word 1220847191 + .word 484903 + + + # The H is at `gcm128_context.Htable[0]` (addr(Xi)+16*2). + # Load H to v1 + addi t1, a5, 32 + .word 3439489111 + .word 33775751 + # Multiply H for each partial tag and XOR them together. + # Handle 1st partial tag + .word 1577713751 + .word 2719522935 + # Handle 2nd to N-th partial tags + li t1, 4 +1: + .word 3441586263 + .word 1061372503 + .word 3439489111 + .word 2987532407 + addi t1, t1, 4 + blt t1, a6, 1b + + + # Save the final tag + .word 34070567 + + # return the processed size. + slli a0, a7, 2 + ret +.size aes_gcm_dec_blocks_192,.-aes_gcm_dec_blocks_192 +.p2align 3 +aes_gcm_dec_blocks_256: + srli t6, a6, 2 + slli t0, a6, 2 + + # We run out of 32 vector registers, so we just preserve some round keys + # and load the remaining round keys inside the aes body. + # We keep the round keys for: + # 1, 2, 4, 5, 7, 8, 10, 11, 13 and 14th keys. + # The following keys will be loaded in the aes body: + # 3, 6, 9, 12 and 15th keys. + .word 3439489111 + # key 1 + .word 34005127 + # key 2 + addi t1, a3, 16 + .word 33775879 + # key 4 + addi t1, a3, 48 + .word 33776007 + # key 5 + addi t1, a3, 64 + .word 33776135 + # key 7 + addi t1, a3, 96 + .word 33776263 + # key 8 + addi t1, a3, 112 + .word 33776391 + # key 10 + addi t1, a3, 144 + .word 33776519 + # key 11 + addi t1, a3, 160 + .word 33776647 + # key 13 + addi t1, a3, 192 + .word 33776775 + # key 14 + addi t1, a3, 208 + .word 33776903 + + # We already have the ciphertext/plaintext and ctr data for the first round. + # Load key 3 + .word 3439489111 + addi t1, a3, 32 + .word 33777031 + .word 220754007 + .word 2786307703 + .word 2787192439 + .word 2796629623 + # Load key 6 + .word 3439489111 + addi t1, a3, 80 + .word 33777031 + .word 220754007 + .word 2788241015 + .word 2789289591 + .word 2796629623 + # Load key 9 + .word 3439489111 + addi t1, a3, 128 + .word 33777031 + .word 220754007 + .word 2790338167 + .word 2791386743 + .word 2796629623 + # Load key 12 + .word 3439489111 + addi t1, a3, 176 + .word 33777031 + .word 220754007 + .word 2792435319 + .word 2793483895 + .word 2796629623 + # Load key 15 + .word 3439489111 + addi t1, a3, 224 + .word 33777031 + .word 220754007 + .word 2794532471 + .word 2795581047 + .word 2796662391 + + + # Compute AES ctr result. + .word 801902167 + + bnez t4, 1f + + ## without padding + # Store ciphertext/plaintext + .word 33943079 + j 2f + + ## with padding +1: + # Store ciphertext/plaintext using mask + .word 388647 + + # Fill zero for the padding blocks + .word 154071127 + .word 1577074263 + + # We have used mask register for `INPUT_PADDING_MASK` before. We need to + # setup the ctr mask back. + # ctr mask : [000100010001....] + .word 201879639 + li t1, 0b10001000 + .word 1577271383 +2: + + + + add a0, a0, t0 + add a1, a1, t0 + + + .word 220754007 + +.Ldec_blocks_256: + # Compute the partial tags. + # The partial tags will multiply with [H^n, H^n, ..., H^n] + # [tag0, tag1, ...] = + # ([tag0, tag1, ...] + [ciphertext0, ciphertext1, ...] * [H^n, H^n, ..., H^n] + # We will skip the [H^n, H^n, ..., H^n] multiplication for the last round. + beqz t5, .Ldec_blocks_256_end + .word 3003918967 + + .word 86536279 + # Increase ctr in v12. + .word 13616727 + sub t5, t5, a6 + # Load plaintext into v24 + .word 220229719 + .word 33909767 + # Prepare the AES ctr input into v28. + # The ctr data uses big-endian form. + .word 1577455191 + add a0, a0, t0 + .word 86011991 + .word 1237626455 + + + # Load key 3 + .word 3439489111 + addi t1, a3, 32 + .word 33777031 + .word 220754007 + .word 2786307703 + .word 2787192439 + .word 2796629623 + # Load key 6 + .word 3439489111 + addi t1, a3, 80 + .word 33777031 + .word 220754007 + .word 2788241015 + .word 2789289591 + .word 2796629623 + # Load key 9 + .word 3439489111 + addi t1, a3, 128 + .word 33777031 + .word 220754007 + .word 2790338167 + .word 2791386743 + .word 2796629623 + # Load key 12 + .word 3439489111 + addi t1, a3, 176 + .word 33777031 + .word 220754007 + .word 2792435319 + .word 2793483895 + .word 2796629623 + # Load key 15 + .word 3439489111 + addi t1, a3, 224 + .word 33777031 + .word 220754007 + .word 2794532471 + .word 2795581047 + .word 2796662391 + + + # Compute AES ctr plaintext result. + .word 801902167 + + # Store plaintext + .word 33943079 + add a1, a1, t0 + + j .Ldec_blocks_256 +.Ldec_blocks_256_end: + + # Add ciphertext into partial tag + .word 793512535 + + .word 3441586263 + # Update current ctr value to v12 + .word 13616727 + # Convert ctr to big-endian counter. + .word 1220847191 + .word 484903 + + + # The H is at `gcm128_context.Htable[0]` (addr(Xi)+16*2). + # Load H to v1 + addi t1, a5, 32 + .word 3439489111 + .word 33775751 + # Multiply H for each partial tag and XOR them together. + # Handle 1st partial tag + .word 1577713751 + .word 2719522935 + # Handle 2nd to N-th partial tags + li t1, 4 +1: + .word 3441586263 + .word 1061372503 + .word 3439489111 + .word 2987532407 + addi t1, t1, 4 + blt t1, a6, 1b + + + # Save the final tag + .word 34070567 + + # return the processed size. + slli a0, a7, 2 + ret +.size aes_gcm_dec_blocks_256,.-aes_gcm_dec_blocks_256 diff --git a/contrib/openssl-cmake/asm/crypto/modes/aesni-gcm-x86_64.s b/contrib/openssl-cmake/asm/crypto/modes/aesni-gcm-x86_64.s new file mode 100644 index 000000000000..694432a92990 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/modes/aesni-gcm-x86_64.s @@ -0,0 +1,791 @@ +.text + +.type _aesni_ctr32_ghash_6x,@function +.align 32 +_aesni_ctr32_ghash_6x: +.cfi_startproc + vmovdqu 32(%r11),%xmm2 + subq $6,%rdx + vpxor %xmm4,%xmm4,%xmm4 + vmovdqu 0-128(%rcx),%xmm15 + vpaddb %xmm2,%xmm1,%xmm10 + vpaddb %xmm2,%xmm10,%xmm11 + vpaddb %xmm2,%xmm11,%xmm12 + vpaddb %xmm2,%xmm12,%xmm13 + vpaddb %xmm2,%xmm13,%xmm14 + vpxor %xmm15,%xmm1,%xmm9 + vmovdqu %xmm4,16+8(%rsp) + jmp .Loop6x + +.align 32 +.Loop6x: + addl $100663296,%ebx + jc .Lhandle_ctr32 + vmovdqu 0-32(%r9),%xmm3 + vpaddb %xmm2,%xmm14,%xmm1 + vpxor %xmm15,%xmm10,%xmm10 + vpxor %xmm15,%xmm11,%xmm11 + +.Lresume_ctr32: + vmovdqu %xmm1,(%r8) + vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5 + vpxor %xmm15,%xmm12,%xmm12 + vmovups 16-128(%rcx),%xmm2 + vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6 + xorq %r12,%r12 + cmpq %r14,%r15 + + vaesenc %xmm2,%xmm9,%xmm9 + vmovdqu 48+8(%rsp),%xmm0 + vpxor %xmm15,%xmm13,%xmm13 + vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1 + vaesenc %xmm2,%xmm10,%xmm10 + vpxor %xmm15,%xmm14,%xmm14 + setnc %r12b + vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7 + vaesenc %xmm2,%xmm11,%xmm11 + vmovdqu 16-32(%r9),%xmm3 + negq %r12 + vaesenc %xmm2,%xmm12,%xmm12 + vpxor %xmm5,%xmm6,%xmm6 + vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5 + vpxor %xmm4,%xmm8,%xmm8 + vaesenc %xmm2,%xmm13,%xmm13 + vpxor %xmm5,%xmm1,%xmm4 + andq $0x60,%r12 + vmovups 32-128(%rcx),%xmm15 + vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1 + vaesenc %xmm2,%xmm14,%xmm14 + + vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2 + leaq (%r14,%r12,1),%r14 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor 16+8(%rsp),%xmm8,%xmm8 + vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3 + vmovdqu 64+8(%rsp),%xmm0 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 88(%r14),%r13 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 80(%r14),%r12 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,32+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,40+8(%rsp) + vmovdqu 48-32(%r9),%xmm5 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 48-128(%rcx),%xmm15 + vpxor %xmm1,%xmm6,%xmm6 + vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm2,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2 + vaesenc %xmm15,%xmm10,%xmm10 + vpxor %xmm3,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3 + vaesenc %xmm15,%xmm11,%xmm11 + vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5 + vmovdqu 80+8(%rsp),%xmm0 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqu 64-32(%r9),%xmm1 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 64-128(%rcx),%xmm15 + vpxor %xmm2,%xmm6,%xmm6 + vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm3,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 72(%r14),%r13 + vpxor %xmm5,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 64(%r14),%r12 + vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1 + vmovdqu 96+8(%rsp),%xmm0 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,48+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,56+8(%rsp) + vpxor %xmm2,%xmm4,%xmm4 + vmovdqu 96-32(%r9),%xmm2 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 80-128(%rcx),%xmm15 + vpxor %xmm3,%xmm6,%xmm6 + vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm5,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 56(%r14),%r13 + vpxor %xmm1,%xmm7,%xmm7 + vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1 + vpxor 112+8(%rsp),%xmm8,%xmm8 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 48(%r14),%r12 + vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,64+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,72+8(%rsp) + vpxor %xmm3,%xmm4,%xmm4 + vmovdqu 112-32(%r9),%xmm3 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 96-128(%rcx),%xmm15 + vpxor %xmm5,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm1,%xmm6,%xmm6 + vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 40(%r14),%r13 + vpxor %xmm2,%xmm7,%xmm7 + vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 32(%r14),%r12 + vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,80+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,88+8(%rsp) + vpxor %xmm5,%xmm6,%xmm6 + vaesenc %xmm15,%xmm14,%xmm14 + vpxor %xmm1,%xmm6,%xmm6 + + vmovups 112-128(%rcx),%xmm15 + vpslldq $8,%xmm6,%xmm5 + vpxor %xmm2,%xmm4,%xmm4 + vmovdqu 16(%r11),%xmm3 + + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm8,%xmm7,%xmm7 + vaesenc %xmm15,%xmm10,%xmm10 + vpxor %xmm5,%xmm4,%xmm4 + movbeq 24(%r14),%r13 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 16(%r14),%r12 + vpalignr $8,%xmm4,%xmm4,%xmm0 + vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4 + movq %r13,96+8(%rsp) + vaesenc %xmm15,%xmm12,%xmm12 + movq %r12,104+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + vmovups 128-128(%rcx),%xmm1 + vaesenc %xmm15,%xmm14,%xmm14 + + vaesenc %xmm1,%xmm9,%xmm9 + vmovups 144-128(%rcx),%xmm15 + vaesenc %xmm1,%xmm10,%xmm10 + vpsrldq $8,%xmm6,%xmm6 + vaesenc %xmm1,%xmm11,%xmm11 + vpxor %xmm6,%xmm7,%xmm7 + vaesenc %xmm1,%xmm12,%xmm12 + vpxor %xmm0,%xmm4,%xmm4 + movbeq 8(%r14),%r13 + vaesenc %xmm1,%xmm13,%xmm13 + movbeq 0(%r14),%r12 + vaesenc %xmm1,%xmm14,%xmm14 + vmovups 160-128(%rcx),%xmm1 + cmpl $11,%ebp + jb .Lenc_tail + + vaesenc %xmm15,%xmm9,%xmm9 + vaesenc %xmm15,%xmm10,%xmm10 + vaesenc %xmm15,%xmm11,%xmm11 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vaesenc %xmm15,%xmm14,%xmm14 + + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + vmovups 176-128(%rcx),%xmm15 + vaesenc %xmm1,%xmm14,%xmm14 + vmovups 192-128(%rcx),%xmm1 + je .Lenc_tail + + vaesenc %xmm15,%xmm9,%xmm9 + vaesenc %xmm15,%xmm10,%xmm10 + vaesenc %xmm15,%xmm11,%xmm11 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vaesenc %xmm15,%xmm14,%xmm14 + + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + vmovups 208-128(%rcx),%xmm15 + vaesenc %xmm1,%xmm14,%xmm14 + vmovups 224-128(%rcx),%xmm1 + jmp .Lenc_tail + +.align 32 +.Lhandle_ctr32: + vmovdqu (%r11),%xmm0 + vpshufb %xmm0,%xmm1,%xmm6 + vmovdqu 48(%r11),%xmm5 + vpaddd 64(%r11),%xmm6,%xmm10 + vpaddd %xmm5,%xmm6,%xmm11 + vmovdqu 0-32(%r9),%xmm3 + vpaddd %xmm5,%xmm10,%xmm12 + vpshufb %xmm0,%xmm10,%xmm10 + vpaddd %xmm5,%xmm11,%xmm13 + vpshufb %xmm0,%xmm11,%xmm11 + vpxor %xmm15,%xmm10,%xmm10 + vpaddd %xmm5,%xmm12,%xmm14 + vpshufb %xmm0,%xmm12,%xmm12 + vpxor %xmm15,%xmm11,%xmm11 + vpaddd %xmm5,%xmm13,%xmm1 + vpshufb %xmm0,%xmm13,%xmm13 + vpshufb %xmm0,%xmm14,%xmm14 + vpshufb %xmm0,%xmm1,%xmm1 + jmp .Lresume_ctr32 + +.align 32 +.Lenc_tail: + vaesenc %xmm15,%xmm9,%xmm9 + vmovdqu %xmm7,16+8(%rsp) + vpalignr $8,%xmm4,%xmm4,%xmm8 + vaesenc %xmm15,%xmm10,%xmm10 + vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4 + vpxor 0(%rdi),%xmm1,%xmm2 + vaesenc %xmm15,%xmm11,%xmm11 + vpxor 16(%rdi),%xmm1,%xmm0 + vaesenc %xmm15,%xmm12,%xmm12 + vpxor 32(%rdi),%xmm1,%xmm5 + vaesenc %xmm15,%xmm13,%xmm13 + vpxor 48(%rdi),%xmm1,%xmm6 + vaesenc %xmm15,%xmm14,%xmm14 + vpxor 64(%rdi),%xmm1,%xmm7 + vpxor 80(%rdi),%xmm1,%xmm3 + vmovdqu (%r8),%xmm1 + + vaesenclast %xmm2,%xmm9,%xmm9 + vmovdqu 32(%r11),%xmm2 + vaesenclast %xmm0,%xmm10,%xmm10 + vpaddb %xmm2,%xmm1,%xmm0 + movq %r13,112+8(%rsp) + leaq 96(%rdi),%rdi + vaesenclast %xmm5,%xmm11,%xmm11 + vpaddb %xmm2,%xmm0,%xmm5 + movq %r12,120+8(%rsp) + leaq 96(%rsi),%rsi + vmovdqu 0-128(%rcx),%xmm15 + vaesenclast %xmm6,%xmm12,%xmm12 + vpaddb %xmm2,%xmm5,%xmm6 + vaesenclast %xmm7,%xmm13,%xmm13 + vpaddb %xmm2,%xmm6,%xmm7 + vaesenclast %xmm3,%xmm14,%xmm14 + vpaddb %xmm2,%xmm7,%xmm3 + + addq $0x60,%r10 + subq $0x6,%rdx + jc .L6x_done + + vmovups %xmm9,-96(%rsi) + vpxor %xmm15,%xmm1,%xmm9 + vmovups %xmm10,-80(%rsi) + vmovdqa %xmm0,%xmm10 + vmovups %xmm11,-64(%rsi) + vmovdqa %xmm5,%xmm11 + vmovups %xmm12,-48(%rsi) + vmovdqa %xmm6,%xmm12 + vmovups %xmm13,-32(%rsi) + vmovdqa %xmm7,%xmm13 + vmovups %xmm14,-16(%rsi) + vmovdqa %xmm3,%xmm14 + vmovdqu 32+8(%rsp),%xmm7 + jmp .Loop6x + +.L6x_done: + vpxor 16+8(%rsp),%xmm8,%xmm8 + vpxor %xmm4,%xmm8,%xmm8 + + .byte 0xf3,0xc3 +.cfi_endproc +.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x +.globl aesni_gcm_decrypt +.type aesni_gcm_decrypt,@function +.align 32 +aesni_gcm_decrypt: +.cfi_startproc + xorq %r10,%r10 + cmpq $0x60,%rdx + jb .Lgcm_dec_abort + + leaq (%rsp),%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + vzeroupper + + vmovdqu (%r8),%xmm1 + addq $-128,%rsp + movl 12(%r8),%ebx + leaq .Lbswap_mask(%rip),%r11 + leaq -128(%rcx),%r14 + movq $0xf80,%r15 + vmovdqu (%r9),%xmm8 + andq $-128,%rsp + vmovdqu (%r11),%xmm0 + leaq 128(%rcx),%rcx + leaq 32+32(%r9),%r9 + movl 240-128(%rcx),%ebp + vpshufb %xmm0,%xmm8,%xmm8 + + andq %r15,%r14 + andq %rsp,%r15 + subq %r14,%r15 + jc .Ldec_no_key_aliasing + cmpq $768,%r15 + jnc .Ldec_no_key_aliasing + subq %r15,%rsp +.Ldec_no_key_aliasing: + + vmovdqu 80(%rdi),%xmm7 + leaq (%rdi),%r14 + vmovdqu 64(%rdi),%xmm4 + leaq -192(%rdi,%rdx,1),%r15 + vmovdqu 48(%rdi),%xmm5 + shrq $4,%rdx + xorq %r10,%r10 + vmovdqu 32(%rdi),%xmm6 + vpshufb %xmm0,%xmm7,%xmm7 + vmovdqu 16(%rdi),%xmm2 + vpshufb %xmm0,%xmm4,%xmm4 + vmovdqu (%rdi),%xmm3 + vpshufb %xmm0,%xmm5,%xmm5 + vmovdqu %xmm4,48(%rsp) + vpshufb %xmm0,%xmm6,%xmm6 + vmovdqu %xmm5,64(%rsp) + vpshufb %xmm0,%xmm2,%xmm2 + vmovdqu %xmm6,80(%rsp) + vpshufb %xmm0,%xmm3,%xmm3 + vmovdqu %xmm2,96(%rsp) + vmovdqu %xmm3,112(%rsp) + + call _aesni_ctr32_ghash_6x + + vmovups %xmm9,-96(%rsi) + vmovups %xmm10,-80(%rsi) + vmovups %xmm11,-64(%rsi) + vmovups %xmm12,-48(%rsi) + vmovups %xmm13,-32(%rsi) + vmovups %xmm14,-16(%rsi) + + vpshufb (%r11),%xmm8,%xmm8 + vmovdqu %xmm8,-64(%r9) + + vzeroupper + movq -48(%rax),%r15 +.cfi_restore %r15 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lgcm_dec_abort: + movq %r10,%rax + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_gcm_decrypt,.-aesni_gcm_decrypt +.type _aesni_ctr32_6x,@function +.align 32 +_aesni_ctr32_6x: +.cfi_startproc + vmovdqu 0-128(%rcx),%xmm4 + vmovdqu 32(%r11),%xmm2 + leaq -1(%rbp),%r13 + vmovups 16-128(%rcx),%xmm15 + leaq 32-128(%rcx),%r12 + vpxor %xmm4,%xmm1,%xmm9 + addl $100663296,%ebx + jc .Lhandle_ctr32_2 + vpaddb %xmm2,%xmm1,%xmm10 + vpaddb %xmm2,%xmm10,%xmm11 + vpxor %xmm4,%xmm10,%xmm10 + vpaddb %xmm2,%xmm11,%xmm12 + vpxor %xmm4,%xmm11,%xmm11 + vpaddb %xmm2,%xmm12,%xmm13 + vpxor %xmm4,%xmm12,%xmm12 + vpaddb %xmm2,%xmm13,%xmm14 + vpxor %xmm4,%xmm13,%xmm13 + vpaddb %xmm2,%xmm14,%xmm1 + vpxor %xmm4,%xmm14,%xmm14 + jmp .Loop_ctr32 + +.align 16 +.Loop_ctr32: + vaesenc %xmm15,%xmm9,%xmm9 + vaesenc %xmm15,%xmm10,%xmm10 + vaesenc %xmm15,%xmm11,%xmm11 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vaesenc %xmm15,%xmm14,%xmm14 + vmovups (%r12),%xmm15 + leaq 16(%r12),%r12 + decl %r13d + jnz .Loop_ctr32 + + vmovdqu (%r12),%xmm3 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor 0(%rdi),%xmm3,%xmm4 + vaesenc %xmm15,%xmm10,%xmm10 + vpxor 16(%rdi),%xmm3,%xmm5 + vaesenc %xmm15,%xmm11,%xmm11 + vpxor 32(%rdi),%xmm3,%xmm6 + vaesenc %xmm15,%xmm12,%xmm12 + vpxor 48(%rdi),%xmm3,%xmm8 + vaesenc %xmm15,%xmm13,%xmm13 + vpxor 64(%rdi),%xmm3,%xmm2 + vaesenc %xmm15,%xmm14,%xmm14 + vpxor 80(%rdi),%xmm3,%xmm3 + leaq 96(%rdi),%rdi + + vaesenclast %xmm4,%xmm9,%xmm9 + vaesenclast %xmm5,%xmm10,%xmm10 + vaesenclast %xmm6,%xmm11,%xmm11 + vaesenclast %xmm8,%xmm12,%xmm12 + vaesenclast %xmm2,%xmm13,%xmm13 + vaesenclast %xmm3,%xmm14,%xmm14 + vmovups %xmm9,0(%rsi) + vmovups %xmm10,16(%rsi) + vmovups %xmm11,32(%rsi) + vmovups %xmm12,48(%rsi) + vmovups %xmm13,64(%rsi) + vmovups %xmm14,80(%rsi) + leaq 96(%rsi),%rsi + + .byte 0xf3,0xc3 +.align 32 +.Lhandle_ctr32_2: + vpshufb %xmm0,%xmm1,%xmm6 + vmovdqu 48(%r11),%xmm5 + vpaddd 64(%r11),%xmm6,%xmm10 + vpaddd %xmm5,%xmm6,%xmm11 + vpaddd %xmm5,%xmm10,%xmm12 + vpshufb %xmm0,%xmm10,%xmm10 + vpaddd %xmm5,%xmm11,%xmm13 + vpshufb %xmm0,%xmm11,%xmm11 + vpxor %xmm4,%xmm10,%xmm10 + vpaddd %xmm5,%xmm12,%xmm14 + vpshufb %xmm0,%xmm12,%xmm12 + vpxor %xmm4,%xmm11,%xmm11 + vpaddd %xmm5,%xmm13,%xmm1 + vpshufb %xmm0,%xmm13,%xmm13 + vpxor %xmm4,%xmm12,%xmm12 + vpshufb %xmm0,%xmm14,%xmm14 + vpxor %xmm4,%xmm13,%xmm13 + vpshufb %xmm0,%xmm1,%xmm1 + vpxor %xmm4,%xmm14,%xmm14 + jmp .Loop_ctr32 +.cfi_endproc +.size _aesni_ctr32_6x,.-_aesni_ctr32_6x + +.globl aesni_gcm_encrypt +.type aesni_gcm_encrypt,@function +.align 32 +aesni_gcm_encrypt: +.cfi_startproc + xorq %r10,%r10 + cmpq $288,%rdx + jb .Lgcm_enc_abort + + leaq (%rsp),%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + vzeroupper + + vmovdqu (%r8),%xmm1 + addq $-128,%rsp + movl 12(%r8),%ebx + leaq .Lbswap_mask(%rip),%r11 + leaq -128(%rcx),%r14 + movq $0xf80,%r15 + leaq 128(%rcx),%rcx + vmovdqu (%r11),%xmm0 + andq $-128,%rsp + movl 240-128(%rcx),%ebp + + andq %r15,%r14 + andq %rsp,%r15 + subq %r14,%r15 + jc .Lenc_no_key_aliasing + cmpq $768,%r15 + jnc .Lenc_no_key_aliasing + subq %r15,%rsp +.Lenc_no_key_aliasing: + + leaq (%rsi),%r14 + leaq -192(%rsi,%rdx,1),%r15 + shrq $4,%rdx + + call _aesni_ctr32_6x + vpshufb %xmm0,%xmm9,%xmm8 + vpshufb %xmm0,%xmm10,%xmm2 + vmovdqu %xmm8,112(%rsp) + vpshufb %xmm0,%xmm11,%xmm4 + vmovdqu %xmm2,96(%rsp) + vpshufb %xmm0,%xmm12,%xmm5 + vmovdqu %xmm4,80(%rsp) + vpshufb %xmm0,%xmm13,%xmm6 + vmovdqu %xmm5,64(%rsp) + vpshufb %xmm0,%xmm14,%xmm7 + vmovdqu %xmm6,48(%rsp) + + call _aesni_ctr32_6x + + vmovdqu (%r9),%xmm8 + leaq 32+32(%r9),%r9 + subq $12,%rdx + movq $192,%r10 + vpshufb %xmm0,%xmm8,%xmm8 + + call _aesni_ctr32_ghash_6x + vmovdqu 32(%rsp),%xmm7 + vmovdqu (%r11),%xmm0 + vmovdqu 0-32(%r9),%xmm3 + vpunpckhqdq %xmm7,%xmm7,%xmm1 + vmovdqu 32-32(%r9),%xmm15 + vmovups %xmm9,-96(%rsi) + vpshufb %xmm0,%xmm9,%xmm9 + vpxor %xmm7,%xmm1,%xmm1 + vmovups %xmm10,-80(%rsi) + vpshufb %xmm0,%xmm10,%xmm10 + vmovups %xmm11,-64(%rsi) + vpshufb %xmm0,%xmm11,%xmm11 + vmovups %xmm12,-48(%rsi) + vpshufb %xmm0,%xmm12,%xmm12 + vmovups %xmm13,-32(%rsi) + vpshufb %xmm0,%xmm13,%xmm13 + vmovups %xmm14,-16(%rsi) + vpshufb %xmm0,%xmm14,%xmm14 + vmovdqu %xmm9,16(%rsp) + vmovdqu 48(%rsp),%xmm6 + vmovdqu 16-32(%r9),%xmm0 + vpunpckhqdq %xmm6,%xmm6,%xmm2 + vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5 + vpxor %xmm6,%xmm2,%xmm2 + vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7 + vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1 + + vmovdqu 64(%rsp),%xmm9 + vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4 + vmovdqu 48-32(%r9),%xmm3 + vpxor %xmm5,%xmm4,%xmm4 + vpunpckhqdq %xmm9,%xmm9,%xmm5 + vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6 + vpxor %xmm9,%xmm5,%xmm5 + vpxor %xmm7,%xmm6,%xmm6 + vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2 + vmovdqu 80-32(%r9),%xmm15 + vpxor %xmm1,%xmm2,%xmm2 + + vmovdqu 80(%rsp),%xmm1 + vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7 + vmovdqu 64-32(%r9),%xmm0 + vpxor %xmm4,%xmm7,%xmm7 + vpunpckhqdq %xmm1,%xmm1,%xmm4 + vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9 + vpxor %xmm1,%xmm4,%xmm4 + vpxor %xmm6,%xmm9,%xmm9 + vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu 96(%rsp),%xmm2 + vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6 + vmovdqu 96-32(%r9),%xmm3 + vpxor %xmm7,%xmm6,%xmm6 + vpunpckhqdq %xmm2,%xmm2,%xmm7 + vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpxor %xmm9,%xmm1,%xmm1 + vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4 + vmovdqu 128-32(%r9),%xmm15 + vpxor %xmm5,%xmm4,%xmm4 + + vpxor 112(%rsp),%xmm8,%xmm8 + vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5 + vmovdqu 112-32(%r9),%xmm0 + vpunpckhqdq %xmm8,%xmm8,%xmm9 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2 + vpxor %xmm8,%xmm9,%xmm9 + vpxor %xmm1,%xmm2,%xmm2 + vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7 + vpxor %xmm4,%xmm7,%xmm4 + + vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6 + vmovdqu 0-32(%r9),%xmm3 + vpunpckhqdq %xmm14,%xmm14,%xmm1 + vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8 + vpxor %xmm14,%xmm1,%xmm1 + vpxor %xmm5,%xmm6,%xmm5 + vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9 + vmovdqu 32-32(%r9),%xmm15 + vpxor %xmm2,%xmm8,%xmm7 + vpxor %xmm4,%xmm9,%xmm6 + + vmovdqu 16-32(%r9),%xmm0 + vpxor %xmm5,%xmm7,%xmm9 + vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4 + vpxor %xmm9,%xmm6,%xmm6 + vpunpckhqdq %xmm13,%xmm13,%xmm2 + vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14 + vpxor %xmm13,%xmm2,%xmm2 + vpslldq $8,%xmm6,%xmm9 + vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1 + vpxor %xmm9,%xmm5,%xmm8 + vpsrldq $8,%xmm6,%xmm6 + vpxor %xmm6,%xmm7,%xmm7 + + vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5 + vmovdqu 48-32(%r9),%xmm3 + vpxor %xmm4,%xmm5,%xmm5 + vpunpckhqdq %xmm12,%xmm12,%xmm9 + vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13 + vpxor %xmm12,%xmm9,%xmm9 + vpxor %xmm14,%xmm13,%xmm13 + vpalignr $8,%xmm8,%xmm8,%xmm14 + vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2 + vmovdqu 80-32(%r9),%xmm15 + vpxor %xmm1,%xmm2,%xmm2 + + vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4 + vmovdqu 64-32(%r9),%xmm0 + vpxor %xmm5,%xmm4,%xmm4 + vpunpckhqdq %xmm11,%xmm11,%xmm1 + vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12 + vpxor %xmm11,%xmm1,%xmm1 + vpxor %xmm13,%xmm12,%xmm12 + vxorps 16(%rsp),%xmm7,%xmm7 + vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9 + vpxor %xmm2,%xmm9,%xmm9 + + vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8 + vxorps %xmm14,%xmm8,%xmm8 + + vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5 + vmovdqu 96-32(%r9),%xmm3 + vpxor %xmm4,%xmm5,%xmm5 + vpunpckhqdq %xmm10,%xmm10,%xmm2 + vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11 + vpxor %xmm10,%xmm2,%xmm2 + vpalignr $8,%xmm8,%xmm8,%xmm14 + vpxor %xmm12,%xmm11,%xmm11 + vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1 + vmovdqu 128-32(%r9),%xmm15 + vpxor %xmm9,%xmm1,%xmm1 + + vxorps %xmm7,%xmm14,%xmm14 + vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8 + vxorps %xmm14,%xmm8,%xmm8 + + vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4 + vmovdqu 112-32(%r9),%xmm0 + vpxor %xmm5,%xmm4,%xmm4 + vpunpckhqdq %xmm8,%xmm8,%xmm9 + vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10 + vpxor %xmm8,%xmm9,%xmm9 + vpxor %xmm11,%xmm10,%xmm10 + vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2 + vpxor %xmm1,%xmm2,%xmm2 + + vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5 + vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7 + vpxor %xmm4,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6 + vpxor %xmm10,%xmm7,%xmm7 + vpxor %xmm2,%xmm6,%xmm6 + + vpxor %xmm5,%xmm7,%xmm4 + vpxor %xmm4,%xmm6,%xmm6 + vpslldq $8,%xmm6,%xmm1 + vmovdqu 16(%r11),%xmm3 + vpsrldq $8,%xmm6,%xmm6 + vpxor %xmm1,%xmm5,%xmm8 + vpxor %xmm6,%xmm7,%xmm7 + + vpalignr $8,%xmm8,%xmm8,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8 + vpxor %xmm2,%xmm8,%xmm8 + + vpalignr $8,%xmm8,%xmm8,%xmm2 + vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8 + vpxor %xmm7,%xmm2,%xmm2 + vpxor %xmm2,%xmm8,%xmm8 + vpshufb (%r11),%xmm8,%xmm8 + vmovdqu %xmm8,-64(%r9) + + vzeroupper + movq -48(%rax),%r15 +.cfi_restore %r15 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lgcm_enc_abort: + movq %r10,%rax + .byte 0xf3,0xc3 +.cfi_endproc +.size aesni_gcm_encrypt,.-aesni_gcm_encrypt +.section .rodata +.align 64 +.Lbswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.Lpoly: +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +.Lone_msb: +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +.Ltwo_lsb: +.byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +.Lone_lsb: +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.previous +.align 64 diff --git a/contrib/openssl-cmake/asm/crypto/modes/asm/aes-gcm-armv8-unroll8_64.S b/contrib/openssl-cmake/asm/crypto/modes/asm/aes-gcm-armv8-unroll8_64.S new file mode 100644 index 000000000000..a53d209f2cda --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/modes/asm/aes-gcm-armv8-unroll8_64.S @@ -0,0 +1,8487 @@ +#include "arm_arch.h" + +#if __ARM_MAX_ARCH__>=8 +.arch armv8-a+crypto +.text +.globl unroll8_eor3_aes_gcm_enc_128_kernel +.type unroll8_eor3_aes_gcm_enc_128_kernel,%function +.align 4 +unroll8_eor3_aes_gcm_enc_128_kernel: + AARCH64_VALID_CALL_TARGET + cbz x1, .L128_enc_ret + stp d8, d9, [sp, #-80]! + lsr x9, x1, #3 + mov x16, x4 + mov x8, x5 + stp d10, d11, [sp, #16] + stp d12, d13, [sp, #32] + stp d14, d15, [sp, #48] + mov x5, #0xc200000000000000 + stp x5, xzr, [sp, #64] + add x10, sp, #64 + + mov x15, #0x100000000 //set up counter increment + movi v31.16b, #0x0 + mov v31.d[1], x15 + mov x5, x9 + ld1 { v0.16b}, [x16] //CTR block 0 + + sub x5, x5, #1 //byte_len - 1 + + and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) + + rev32 v30.16b, v0.16b //set up reversed counter + + add v30.4s, v30.4s, v31.4s //CTR block 0 + + rev32 v1.16b, v30.16b //CTR block 1 + add v30.4s, v30.4s, v31.4s //CTR block 1 + + rev32 v2.16b, v30.16b //CTR block 2 + add v30.4s, v30.4s, v31.4s //CTR block 2 + + rev32 v3.16b, v30.16b //CTR block 3 + add v30.4s, v30.4s, v31.4s //CTR block 3 + + rev32 v4.16b, v30.16b //CTR block 4 + add v30.4s, v30.4s, v31.4s //CTR block 4 + + rev32 v5.16b, v30.16b //CTR block 5 + add v30.4s, v30.4s, v31.4s //CTR block 5 + ldp q26, q27, [x8, #0] //load rk0, rk1 + + rev32 v6.16b, v30.16b //CTR block 6 + add v30.4s, v30.4s, v31.4s //CTR block 6 + + rev32 v7.16b, v30.16b //CTR block 7 + add v30.4s, v30.4s, v31.4s //CTR block 7 + + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 4 - round 0 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 6 - round 0 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 0 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 0 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 0 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 0 + + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 7 - round 0 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 5 - round 0 + ldp q28, q26, [x8, #32] //load rk2, rk3 + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 1 + + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 7 - round 1 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 5 - round 1 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 4 - round 1 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 1 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 6 - round 1 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 1 + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 5 - round 2 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 1 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 0 - round 2 + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 2 - round 2 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 3 - round 2 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 7 - round 2 + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 1 - round 2 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 6 - round 2 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 4 - round 2 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 3 + + ldp q27, q28, [x8, #64] //load rk4, rk5 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 5 - round 3 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 3 + + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 4 - round 3 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 3 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 6 - round 3 + + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 7 - round 3 + + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 6 - round 4 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 3 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 5 - round 4 + + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 7 - round 4 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 4 - round 4 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 4 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 4 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 4 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 4 + + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 7 - round 5 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 0 - round 5 + ldp q26, q27, [x8, #96] //load rk6, rk7 + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 1 - round 5 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 3 - round 5 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 2 - round 5 + + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 4 - round 5 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 5 - round 5 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 6 - round 5 + + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 4 - round 6 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 6 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 6 + + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 7 - round 6 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 6 - round 6 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 5 - round 6 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 6 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 6 + ldp q28, q26, [x8, #128] //load rk8, rk9 + + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 5 - round 7 + + ld1 { v19.16b}, [x3] + ext v19.16b, v19.16b, v19.16b, #8 + rev64 v19.16b, v19.16b + + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 7 - round 7 + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 4 - round 7 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 7 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 6 - round 7 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 7 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 7 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 7 + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 + + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 + ldr q27, [x8, #160] //load rk10 + + aese v3.16b, v26.16b //AES block 8k+11 - round 9 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 + aese v2.16b, v26.16b //AES block 8k+10 - round 9 + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 + aese v6.16b, v26.16b //AES block 8k+14 - round 9 + + aese v4.16b, v26.16b //AES block 8k+12 - round 9 + add x5, x5, x0 + aese v0.16b, v26.16b //AES block 8k+8 - round 9 + + aese v7.16b, v26.16b //AES block 8k+15 - round 9 + aese v5.16b, v26.16b //AES block 8k+13 - round 9 + aese v1.16b, v26.16b //AES block 8k+9 - round 9 + + add x4, x0, x1, lsr #3 //end_input_ptr + cmp x0, x5 //check if we have <= 8 blocks + b.ge .L128_enc_tail //handle tail + + ldp q8, q9, [x0], #32 //AES block 0, 1 - load plaintext + + ldp q10, q11, [x0], #32 //AES block 2, 3 - load plaintext + + ldp q12, q13, [x0], #32 //AES block 4, 5 - load plaintext + + ldp q14, q15, [x0], #32 //AES block 6, 7 - load plaintext + cmp x0, x5 //check if we have <= 8 blocks + +.inst 0xce006d08 //eor3 v8.16b, v8.16b, v0.16b, v27.16b //AES block 0 - result + rev32 v0.16b, v30.16b //CTR block 8 + add v30.4s, v30.4s, v31.4s //CTR block 8 + +.inst 0xce016d29 //eor3 v9.16b, v9.16b, v1.16b, v27.16b //AES block 1 - result + stp q8, q9, [x2], #32 //AES block 0, 1 - store result + + rev32 v1.16b, v30.16b //CTR block 9 +.inst 0xce056dad //eor3 v13.16b, v13.16b, v5.16b, v27.16b //AES block 5 - result + add v30.4s, v30.4s, v31.4s //CTR block 9 + +.inst 0xce026d4a //eor3 v10.16b, v10.16b, v2.16b, v27.16b //AES block 2 - result +.inst 0xce066dce //eor3 v14.16b, v14.16b, v6.16b, v27.16b //AES block 6 - result +.inst 0xce046d8c //eor3 v12.16b, v12.16b, v4.16b, v27.16b //AES block 4 - result + + rev32 v2.16b, v30.16b //CTR block 10 + add v30.4s, v30.4s, v31.4s //CTR block 10 + +.inst 0xce036d6b //eor3 v11.16b, v11.16b, v3.16b, v27.16b //AES block 3 - result +.inst 0xce076def //eor3 v15.16b, v15.16b, v7.16b,v27.16b //AES block 7 - result + stp q10, q11, [x2], #32 //AES block 2, 3 - store result + + rev32 v3.16b, v30.16b //CTR block 11 + add v30.4s, v30.4s, v31.4s //CTR block 11 + stp q12, q13, [x2], #32 //AES block 4, 5 - store result + + stp q14, q15, [x2], #32 //AES block 6, 7 - store result + + rev32 v4.16b, v30.16b //CTR block 12 + add v30.4s, v30.4s, v31.4s //CTR block 12 + b.ge .L128_enc_prepretail //do prepretail + +.L128_enc_main_loop: //main loop start + rev32 v5.16b, v30.16b //CTR block 8k+13 + ldr q20, [x3, #128] //load h5l | h5h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q22, [x3, #160] //load h6l | h6h + ext v22.16b, v22.16b, v22.16b, #8 + add v30.4s, v30.4s, v31.4s //CTR block 8k+13 + + rev64 v9.16b, v9.16b //GHASH block 8k+1 + rev64 v8.16b, v8.16b //GHASH block 8k + ldr q23, [x3, #176] //load h7l | h7h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q25, [x3, #208] //load h8l | h8h + ext v25.16b, v25.16b, v25.16b, #8 + + rev32 v6.16b, v30.16b //CTR block 8k+14 + add v30.4s, v30.4s, v31.4s //CTR block 8k+14 + ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 + + ldr q21, [x3, #144] //load h6k | h5k + ldr q24, [x3, #192] //load h8k | h7k + rev64 v13.16b, v13.16b //GHASH block 8k+5 (t0, t1, t2 and t3 free) + rev64 v11.16b, v11.16b //GHASH block 8k+3 + + ldp q26, q27, [x8, #0] //load rk0, rk1 + eor v8.16b, v8.16b, v19.16b //PRE 1 + rev32 v7.16b, v30.16b //CTR block 8k+15 + + rev64 v15.16b, v15.16b //GHASH block 8k+7 (t0, t1, t2 and t3 free) + + pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high + rev64 v10.16b, v10.16b //GHASH block 8k+2 + pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high + + pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low + trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid + pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low + + trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid + pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high + pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high + + eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low + ldr q23, [x3, #80] //load h3l | h3h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q25, [x3, #112] //load h3l | h3h + ext v25.16b, v25.16b, v25.16b, #8 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 + eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high + + add v30.4s, v30.4s, v31.4s //CTR block 8k+15 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 + eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 + pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low + + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 + +.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b,v9.16b //GHASH block 8k+2, 8k+3 - high + trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid + trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid + + ldp q28, q26, [x8, #32] //load rk2, rk3 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 + + pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 + + pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid + eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid + pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid + + rev64 v14.16b, v14.16b //GHASH block 8k+6 (t0, t1, and t2 free) +.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low + + pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid + eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid + pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 +.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 + ldr q21, [x3, #48] //load h2k | h1k + ldr q24, [x3, #96] //load h4k | h3k + rev64 v12.16b, v12.16b //GHASH block 8k+4 (t0, t1, and t2 free) + + ldp q27, q28, [x8, #64] //load rk4, rk5 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 + + ldr q20, [x3, #32] //load h1l | h1h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q22, [x3, #64] //load h1l | h1h + ext v22.16b, v22.16b, v22.16b, #8 + pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high + pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low + + trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid + trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 + + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 + + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 + + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 + + pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high + eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid + pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 + ldp q26, q27, [x8, #96] //load rk6, rk7 + trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid + + pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid + pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid + pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high + + pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 + + pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low +.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high + trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid + +.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 + + eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 + +.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid + ldr d16, [x10] //MODULO - load modulo constant + pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low + + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 + + pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 + + pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid +.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low + ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load plaintext + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 + rev32 v20.16b, v30.16b //CTR block 8k+16 + add v30.4s, v30.4s, v31.4s //CTR block 8k+16 + + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 + +.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid + ldp q28, q26, [x8, #128] //load rk8, rk9 +.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 + ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load plaintext + + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 + + pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 + + rev32 v22.16b, v30.16b //CTR block 8k+17 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 + ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load plaintext + add v30.4s, v30.4s, v31.4s //CTR block 8k+17 + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 + + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 +.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up + ldr q27, [x8, #160] //load rk10 + + ext v29.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment + rev32 v23.16b, v30.16b //CTR block 8k+18 + add v30.4s, v30.4s, v31.4s //CTR block 8k+18 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 +.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 + + aese v2.16b, v26.16b //AES block 8k+10 - round 9 + aese v4.16b, v26.16b //AES block 8k+12 - round 9 + aese v1.16b, v26.16b //AES block 8k+9 - round 9 + + ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load plaintext + rev32 v25.16b, v30.16b //CTR block 8k+19 + add v30.4s, v30.4s, v31.4s //CTR block 8k+19 + + cmp x0, x5 //.LOOP CONTROL +.inst 0xce046d8c //eor3 v12.16b, v12.16b, v4.16b, v27.16b //AES block 4 - result + aese v7.16b, v26.16b //AES block 8k+15 - round 9 + + aese v6.16b, v26.16b //AES block 8k+14 - round 9 + aese v3.16b, v26.16b //AES block 8k+11 - round 9 + +.inst 0xce026d4a //eor3 v10.16b, v10.16b, v2.16b, v27.16b //AES block 8k+10 - result + + mov v2.16b, v23.16b //CTR block 8k+18 + aese v0.16b, v26.16b //AES block 8k+8 - round 9 + + rev32 v4.16b, v30.16b //CTR block 8k+20 + add v30.4s, v30.4s, v31.4s //CTR block 8k+20 + +.inst 0xce076def //eor3 v15.16b, v15.16b, v7.16b, v27.16b //AES block 7 - result + aese v5.16b, v26.16b //AES block 8k+13 - round 9 + pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low + +.inst 0xce016d29 //eor3 v9.16b, v9.16b, v1.16b, v27.16b //AES block 8k+9 - result +.inst 0xce036d6b //eor3 v11.16b, v11.16b, v3.16b, v27.16b //AES block 8k+11 - result + mov v3.16b, v25.16b //CTR block 8k+19 + + ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment +.inst 0xce056dad //eor3 v13.16b, v13.16b, v5.16b, v27.16b //AES block 5 - result + mov v1.16b, v22.16b //CTR block 8k+17 + +.inst 0xce006d08 //eor3 v8.16b, v8.16b, v0.16b, v27.16b //AES block 8k+8 - result + mov v0.16b, v20.16b //CTR block 8k+16 + stp q8, q9, [x2], #32 //AES block 8k+8, 8k+9 - store result + + stp q10, q11, [x2], #32 //AES block 8k+10, 8k+11 - store result +.inst 0xce066dce //eor3 v14.16b, v14.16b, v6.16b, v27.16b //AES block 6 - result + + stp q12, q13, [x2], #32 //AES block 8k+12, 8k+13 - store result +.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low + + stp q14, q15, [x2], #32 //AES block 8k+14, 8k+15 - store result + b.lt .L128_enc_main_loop + +.L128_enc_prepretail: //PREPRETAIL + rev32 v5.16b, v30.16b //CTR block 8k+13 + ldr q23, [x3, #176] //load h7l | h7h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q25, [x3, #208] //load h8l | h8h + ext v25.16b, v25.16b, v25.16b, #8 + ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 + + ldr q20, [x3, #128] //load h5l | h5h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q22, [x3, #160] //load h6l | h6h + ext v22.16b, v22.16b, v22.16b, #8 + rev64 v8.16b, v8.16b //GHASH block 8k + rev64 v9.16b, v9.16b //GHASH block 8k+1 + + ldr q21, [x3, #144] //load h6k | h5k + ldr q24, [x3, #192] //load h6k | h5k + add v30.4s, v30.4s, v31.4s //CTR block 8k+13 + rev64 v11.16b, v11.16b //GHASH block 8k+3 + + rev64 v10.16b, v10.16b //GHASH block 8k+2 + eor v8.16b, v8.16b, v19.16b //PRE 1 + + rev32 v6.16b, v30.16b //CTR block 8k+14 + + pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high + pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low + pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high + + rev64 v13.16b, v13.16b //GHASH block 8k+5 (t0, t1, t2 and t3 free) + trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid + + pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low + eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high + trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid + + eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low + eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid + + ldp q26, q27, [x8, #0] //load rk0, rk1 + add v30.4s, v30.4s, v31.4s //CTR block 8k+14 + + pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid + pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid + + rev64 v12.16b, v12.16b //GHASH block 8k+4 (t0, t1, and t2 free) + rev64 v15.16b, v15.16b //GHASH block 8k+7 (t0, t1, t2 and t3 free) + + eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid + + rev32 v7.16b, v30.16b //CTR block 8k+15 + + rev64 v14.16b, v14.16b //GHASH block 8k+6 (t0, t1, and t2 free) + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 + + pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high + pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 + + pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 + +.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high + trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid + trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 + + eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 + pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low + + ldr q23, [x3, #80] //load h3l | h3h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q25, [x3, #112] //load h4l | h4h + ext v25.16b, v25.16b, v25.16b, #8 + + ldp q28, q26, [x8, #32] //load rk2, rk3 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 + pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid + +.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low + pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 + +.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid + ldr q21, [x3, #48] //load h2k | h1k + ldr q24, [x3, #96] //load h4k | h3k + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 + + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 + ldp q27, q28, [x8, #64] //load rk4, rk5 + + ldr q20, [x3, #32] //load h1l | h1h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q22, [x3, #64] //load h1l | h1h + ext v22.16b, v22.16b, v22.16b, #8 + trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 + + pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 + + pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low + trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid + pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 + add v30.4s, v30.4s, v31.4s //CTR block 8k+15 + + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 + eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid + + pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 + pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high + + trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid + pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low + trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 +.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high + +.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low + eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid + pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 + + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 + + pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 + + pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high + ldp q26, q27, [x8, #96] //load rk6, rk7 + pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low + +.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid + pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid + pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 + ldr d16, [x10] //MODULO - load modulo constant + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 + +.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 +.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low +.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 + + pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid +.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up + ldp q28, q26, [x8, #128] //load rk8, rk9 + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 + ext v29.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment + + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 +.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 + + pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 + + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 + ext v18.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment + + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 +.inst 0xce114a73 //eor3 v19.16b, v19.16b, v17.16b, v18.16b //MODULO - fold into low + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 + + ldr q27, [x8, #160] //load rk10 + aese v6.16b, v26.16b //AES block 8k+14 - round 9 + aese v2.16b, v26.16b //AES block 8k+10 - round 9 + + aese v0.16b, v26.16b //AES block 8k+8 - round 9 + aese v1.16b, v26.16b //AES block 8k+9 - round 9 + + aese v3.16b, v26.16b //AES block 8k+11 - round 9 + aese v5.16b, v26.16b //AES block 8k+13 - round 9 + + aese v4.16b, v26.16b //AES block 8k+12 - round 9 + aese v7.16b, v26.16b //AES block 8k+15 - round 9 +.L128_enc_tail: //TAIL + + sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process + ldr q8, [x0], #16 //AES block 8k+8 - load plaintext + + mov v29.16b, v27.16b + ldp q20, q21, [x3, #128] //load h5l | h5h + ext v20.16b, v20.16b, v20.16b, #8 + +.inst 0xce007509 //eor3 v9.16b, v8.16b, v0.16b, v29.16b //AES block 8k+8 - result + ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag + ldp q22, q23, [x3, #160] //load h6l | h6h + ext v22.16b, v22.16b, v22.16b, #8 + ext v23.16b, v23.16b, v23.16b, #8 + + ldp q24, q25, [x3, #192] //load h8k | h7k + ext v25.16b, v25.16b, v25.16b, #8 + cmp x5, #112 + b.gt .L128_enc_blocks_more_than_7 + + mov v7.16b, v6.16b + mov v6.16b, v5.16b + movi v17.8b, #0 + + cmp x5, #96 + sub v30.4s, v30.4s, v31.4s + mov v5.16b, v4.16b + + mov v4.16b, v3.16b + mov v3.16b, v2.16b + mov v2.16b, v1.16b + + movi v19.8b, #0 + movi v18.8b, #0 + b.gt .L128_enc_blocks_more_than_6 + + mov v7.16b, v6.16b + cmp x5, #80 + + sub v30.4s, v30.4s, v31.4s + mov v6.16b, v5.16b + mov v5.16b, v4.16b + + mov v4.16b, v3.16b + mov v3.16b, v1.16b + b.gt .L128_enc_blocks_more_than_5 + + cmp x5, #64 + sub v30.4s, v30.4s, v31.4s + + mov v7.16b, v6.16b + mov v6.16b, v5.16b + + mov v5.16b, v4.16b + mov v4.16b, v1.16b + b.gt .L128_enc_blocks_more_than_4 + + mov v7.16b, v6.16b + sub v30.4s, v30.4s, v31.4s + mov v6.16b, v5.16b + + mov v5.16b, v1.16b + cmp x5, #48 + b.gt .L128_enc_blocks_more_than_3 + + sub v30.4s, v30.4s, v31.4s + mov v7.16b, v6.16b + mov v6.16b, v1.16b + + cmp x5, #32 + ldr q24, [x3, #96] //load h4k | h3k + b.gt .L128_enc_blocks_more_than_2 + + cmp x5, #16 + + sub v30.4s, v30.4s, v31.4s + mov v7.16b, v1.16b + b.gt .L128_enc_blocks_more_than_1 + + ldr q21, [x3, #48] //load h2k | h1k + sub v30.4s, v30.4s, v31.4s + b .L128_enc_blocks_less_than_1 +.L128_enc_blocks_more_than_7: //blocks left > 7 + st1 { v9.16b}, [x2], #16 //AES final-7 block - store result + + rev64 v8.16b, v9.16b //GHASH final-7 block + ldr q9, [x0], #16 //AES final-6 block - load plaintext + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v27.d[0], v8.d[1] //GHASH final-7 block - mid + + pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high + + ins v18.d[0], v24.d[1] //GHASH final-7 block - mid + + eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid + movi v16.8b, #0 //suppress further partial tag feed in + +.inst 0xce017529 //eor3 v9.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result + + pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid + pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low +.L128_enc_blocks_more_than_6: //blocks left > 6 + + st1 { v9.16b}, [x2], #16 //AES final-6 block - store result + + rev64 v8.16b, v9.16b //GHASH final-6 block + ldr q9, [x0], #16 //AES final-5 block - load plaintext + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v27.d[0], v8.d[1] //GHASH final-6 block - mid + +.inst 0xce027529 //eor3 v9.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result + pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low + + eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid + movi v16.8b, #0 //suppress further partial tag feed in + + pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid + pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high + + eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low + + eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid + eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high +.L128_enc_blocks_more_than_5: //blocks left > 5 + + st1 { v9.16b}, [x2], #16 //AES final-5 block - store result + + rev64 v8.16b, v9.16b //GHASH final-5 block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v27.d[0], v8.d[1] //GHASH final-5 block - mid + ldr q9, [x0], #16 //AES final-4 block - load plaintext + pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high + + eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high + + eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid + + ins v27.d[1], v27.d[0] //GHASH final-5 block - mid + +.inst 0xce037529 //eor3 v9.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result + pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low + movi v16.8b, #0 //suppress further partial tag feed in + + pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid + eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low + + eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid +.L128_enc_blocks_more_than_4: //blocks left > 4 + + st1 { v9.16b}, [x2], #16 //AES final-4 block - store result + + rev64 v8.16b, v9.16b //GHASH final-4 block + + ldr q9, [x0], #16 //AES final-3 block - load plaintext + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v27.d[0], v8.d[1] //GHASH final-4 block - mid + movi v16.8b, #0 //suppress further partial tag feed in + pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high + + eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid + + pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low + + eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high + pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid + + eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low + +.inst 0xce047529 //eor3 v9.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result + eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid +.L128_enc_blocks_more_than_3: //blocks left > 3 + + st1 { v9.16b}, [x2], #16 //AES final-3 block - store result + + ldr q25, [x3, #112] //load h4l | h4h + ext v25.16b, v25.16b, v25.16b, #8 + + rev64 v8.16b, v9.16b //GHASH final-3 block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + movi v16.8b, #0 //suppress further partial tag feed in + + ins v27.d[0], v8.d[1] //GHASH final-3 block - mid + ldr q24, [x3, #96] //load h4k | h3k + pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low + + ldr q9, [x0], #16 //AES final-2 block - load plaintext + + eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid + + ins v27.d[1], v27.d[0] //GHASH final-3 block - mid + eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low + +.inst 0xce057529 //eor3 v9.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result + + pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid + pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high + + eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid + eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high +.L128_enc_blocks_more_than_2: //blocks left > 2 + + st1 { v9.16b}, [x2], #16 //AES final-2 block - store result + + rev64 v8.16b, v9.16b //GHASH final-2 block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ldr q9, [x0], #16 //AES final-1 block - load plaintext + + ins v27.d[0], v8.d[1] //GHASH final-2 block - mid + ldr q23, [x3, #80] //load h3l | h3h + ext v23.16b, v23.16b, v23.16b, #8 + movi v16.8b, #0 //suppress further partial tag feed in + + eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid +.inst 0xce067529 //eor3 v9.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result + + pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high + + pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low + pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid + + eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high + + eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid + eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low +.L128_enc_blocks_more_than_1: //blocks left > 1 + + st1 { v9.16b}, [x2], #16 //AES final-1 block - store result + + ldr q22, [x3, #64] //load h2l | h2h + ext v22.16b, v22.16b, v22.16b, #8 + rev64 v8.16b, v9.16b //GHASH final-1 block + ldr q9, [x0], #16 //AES final block - load plaintext + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + movi v16.8b, #0 //suppress further partial tag feed in + ins v27.d[0], v8.d[1] //GHASH final-1 block - mid +.inst 0xce077529 //eor3 v9.16b, v9.16b, v7.16b, v29.16b //AES final block - result + + pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high + + eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid + + ldr q21, [x3, #48] //load h2k | h1k + + ins v27.d[1], v27.d[0] //GHASH final-1 block - mid + + pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low + pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid + + eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high + + eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid + eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low +.L128_enc_blocks_less_than_1: //blocks left <= 1 + + rev32 v30.16b, v30.16b + str q30, [x16] //store the updated counter + and x1, x1, #127 //bit_length %= 128 + + sub x1, x1, #128 //bit_length -= 128 + + neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) + + mvn x6, xzr //temp0_x = 0xffffffffffffffff + ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored + and x1, x1, #127 //bit_length %= 128 + + lsr x6, x6, x1 //temp0_x is mask for top 64b of last block + mvn x7, xzr //temp1_x = 0xffffffffffffffff + cmp x1, #64 + + csel x13, x7, x6, lt + csel x14, x6, xzr, lt + + mov v0.d[1], x14 + mov v0.d[0], x13 //ctr0b is mask for last block + + and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits + + rev64 v8.16b, v9.16b //GHASH final block + + bif v9.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing + st1 { v9.16b}, [x2] //store all 16B + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v16.d[0], v8.d[1] //GHASH final block - mid + + eor v16.8b, v16.8b, v8.8b //GHASH final block - mid + ldr q20, [x3, #32] //load h1l | h1h + ext v20.16b, v20.16b, v20.16b, #8 + + pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid + + pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high + eor v18.16b, v18.16b, v16.16b //GHASH final block - mid + ldr d16, [x10] //MODULO - load modulo constant + + pmull v26.1q, v8.1d, v20.1d //GHASH final block - low + + eor v17.16b, v17.16b, v28.16b //GHASH final block - high + + eor v19.16b, v19.16b, v26.16b //GHASH final block - low + + ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment + pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid + +.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up + +.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid + + pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low + ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment + +.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low + ext v19.16b, v19.16b, v19.16b, #8 + rev64 v19.16b, v19.16b + st1 { v19.16b }, [x3] + mov x0, x9 + + ldp d10, d11, [sp, #16] + ldp d12, d13, [sp, #32] + ldp d14, d15, [sp, #48] + ldp d8, d9, [sp], #80 + ret + +.L128_enc_ret: + mov w0, #0x0 + ret +.size unroll8_eor3_aes_gcm_enc_128_kernel,.-unroll8_eor3_aes_gcm_enc_128_kernel +.globl unroll8_eor3_aes_gcm_dec_128_kernel +.type unroll8_eor3_aes_gcm_dec_128_kernel,%function +.align 4 +unroll8_eor3_aes_gcm_dec_128_kernel: + AARCH64_VALID_CALL_TARGET + cbz x1, .L128_dec_ret + stp d8, d9, [sp, #-80]! + lsr x9, x1, #3 + mov x16, x4 + mov x8, x5 + stp d10, d11, [sp, #16] + stp d12, d13, [sp, #32] + stp d14, d15, [sp, #48] + mov x5, #0xc200000000000000 + stp x5, xzr, [sp, #64] + add x10, sp, #64 + + mov x5, x9 + ld1 { v0.16b}, [x16] //CTR block 0 + + ldp q26, q27, [x8, #0] //load rk0, rk1 + sub x5, x5, #1 //byte_len - 1 + + mov x15, #0x100000000 //set up counter increment + movi v31.16b, #0x0 + mov v31.d[1], x15 + ld1 { v19.16b}, [x3] + ext v19.16b, v19.16b, v19.16b, #8 + rev64 v19.16b, v19.16b + + rev32 v30.16b, v0.16b //set up reversed counter + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 0 + + add v30.4s, v30.4s, v31.4s //CTR block 0 + + rev32 v1.16b, v30.16b //CTR block 1 + add v30.4s, v30.4s, v31.4s //CTR block 1 + + and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) + + rev32 v2.16b, v30.16b //CTR block 2 + add v30.4s, v30.4s, v31.4s //CTR block 2 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 0 + + rev32 v3.16b, v30.16b //CTR block 3 + add v30.4s, v30.4s, v31.4s //CTR block 3 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 1 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 1 + + rev32 v4.16b, v30.16b //CTR block 4 + add v30.4s, v30.4s, v31.4s //CTR block 4 + + rev32 v5.16b, v30.16b //CTR block 5 + add v30.4s, v30.4s, v31.4s //CTR block 5 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 0 + + rev32 v6.16b, v30.16b //CTR block 6 + add v30.4s, v30.4s, v31.4s //CTR block 6 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 5 - round 0 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 0 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 4 - round 0 + + rev32 v7.16b, v30.16b //CTR block 7 + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 6 - round 0 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 1 + + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 7 - round 0 + + ldp q28, q26, [x8, #32] //load rk2, rk3 + + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 6 - round 1 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 5 - round 1 + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 4 - round 1 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 7 - round 1 + + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 7 - round 2 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 0 - round 2 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 1 + + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 6 - round 2 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 2 - round 2 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 5 - round 2 + + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 4 - round 2 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 3 - round 2 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 1 - round 2 + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 6 - round 3 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 3 + + ldp q27, q28, [x8, #64] //load rk4, rk5 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 5 - round 3 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 3 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 7 - round 3 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 3 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 3 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 4 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 7 - round 4 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 4 - round 3 + + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 6 - round 4 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 4 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 4 + + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 5 - round 4 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 4 - round 4 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 4 + + ldp q26, q27, [x8, #96] //load rk6, rk7 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 2 - round 5 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 3 - round 5 + + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 6 - round 5 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 1 - round 5 + + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 7 - round 5 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 5 - round 5 + + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 4 - round 5 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 6 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 6 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 0 - round 5 + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 5 - round 6 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 4 - round 6 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 6 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 6 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 7 - round 6 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 6 - round 6 + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 7 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 4 - round 7 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 7 + + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 7 - round 7 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 5 - round 7 + ldp q28, q26, [x8, #128] //load rk8, rk9 + + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 6 - round 7 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 7 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 7 + + add x5, x5, x0 + add v30.4s, v30.4s, v31.4s //CTR block 7 + + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 6 - round 8 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 0 - round 8 + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 1 - round 8 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 7 - round 8 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 3 - round 8 + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 5 - round 8 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 2 - round 8 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 4 - round 8 + + aese v0.16b, v26.16b //AES block 0 - round 9 + aese v1.16b, v26.16b //AES block 1 - round 9 + aese v6.16b, v26.16b //AES block 6 - round 9 + + ldr q27, [x8, #160] //load rk10 + aese v4.16b, v26.16b //AES block 4 - round 9 + aese v3.16b, v26.16b //AES block 3 - round 9 + + aese v2.16b, v26.16b //AES block 2 - round 9 + aese v5.16b, v26.16b //AES block 5 - round 9 + aese v7.16b, v26.16b //AES block 7 - round 9 + + add x4, x0, x1, lsr #3 //end_input_ptr + cmp x0, x5 //check if we have <= 8 blocks + b.ge .L128_dec_tail //handle tail + + ldp q8, q9, [x0], #32 //AES block 0, 1 - load ciphertext + +.inst 0xce006d00 //eor3 v0.16b, v8.16b, v0.16b, v27.16b //AES block 0 - result +.inst 0xce016d21 //eor3 v1.16b, v9.16b, v1.16b, v27.16b //AES block 1 - result + stp q0, q1, [x2], #32 //AES block 0, 1 - store result + + rev32 v0.16b, v30.16b //CTR block 8 + add v30.4s, v30.4s, v31.4s //CTR block 8 + ldp q10, q11, [x0], #32 //AES block 2, 3 - load ciphertext + + ldp q12, q13, [x0], #32 //AES block 4, 5 - load ciphertext + + rev32 v1.16b, v30.16b //CTR block 9 + add v30.4s, v30.4s, v31.4s //CTR block 9 + ldp q14, q15, [x0], #32 //AES block 6, 7 - load ciphertext + +.inst 0xce036d63 //eor3 v3.16b, v11.16b, v3.16b, v27.16b //AES block 3 - result +.inst 0xce026d42 //eor3 v2.16b, v10.16b, v2.16b, v27.16b //AES block 2 - result + stp q2, q3, [x2], #32 //AES block 2, 3 - store result + + rev32 v2.16b, v30.16b //CTR block 10 + add v30.4s, v30.4s, v31.4s //CTR block 10 + +.inst 0xce066dc6 //eor3 v6.16b, v14.16b, v6.16b, v27.16b //AES block 6 - result + + rev32 v3.16b, v30.16b //CTR block 11 + add v30.4s, v30.4s, v31.4s //CTR block 11 + +.inst 0xce046d84 //eor3 v4.16b, v12.16b, v4.16b, v27.16b //AES block 4 - result +.inst 0xce056da5 //eor3 v5.16b, v13.16b, v5.16b, v27.16b //AES block 5 - result + stp q4, q5, [x2], #32 //AES block 4, 5 - store result + +.inst 0xce076de7 //eor3 v7.16b, v15.16b, v7.16b, v27.16b //AES block 7 - result + stp q6, q7, [x2], #32 //AES block 6, 7 - store result + rev32 v4.16b, v30.16b //CTR block 12 + + cmp x0, x5 //check if we have <= 8 blocks + add v30.4s, v30.4s, v31.4s //CTR block 12 + b.ge .L128_dec_prepretail //do prepretail + +.L128_dec_main_loop: //main loop start + ldr q23, [x3, #176] //load h7l | h7h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q25, [x3, #208] //load h8l | h8h + ext v25.16b, v25.16b, v25.16b, #8 + + rev64 v9.16b, v9.16b //GHASH block 8k+1 + rev64 v8.16b, v8.16b //GHASH block 8k + ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 + + rev64 v14.16b, v14.16b //GHASH block 8k+6 + ldr q20, [x3, #128] //load h5l | h5h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q22, [x3, #160] //load h6l | h6h + ext v22.16b, v22.16b, v22.16b, #8 + + eor v8.16b, v8.16b, v19.16b //PRE 1 + rev32 v5.16b, v30.16b //CTR block 8k+13 + add v30.4s, v30.4s, v31.4s //CTR block 8k+13 + + rev64 v10.16b, v10.16b //GHASH block 8k+2 + rev64 v12.16b, v12.16b //GHASH block 8k+4 + ldp q26, q27, [x8, #0] //load rk0, rk1 + + rev32 v6.16b, v30.16b //CTR block 8k+14 + add v30.4s, v30.4s, v31.4s //CTR block 8k+14 + ldr q21, [x3, #144] //load h6k | h5k + ldr q24, [x3, #192] //load h8k | h7k + + pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high + pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high + rev64 v11.16b, v11.16b //GHASH block 8k+3 + + rev32 v7.16b, v30.16b //CTR block 8k+15 + trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid + rev64 v13.16b, v13.16b //GHASH block 8k+5 + + pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low + pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low + trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid + + pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 + pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 + eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 + eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 + eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low +.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high + + ldp q28, q26, [x8, #32] //load rk2, rk3 + trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 + + pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low + trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid + pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid + + ldr q23, [x3, #80] //load h3l | h3h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q25, [x3, #112] //load h4l | h4h + ext v25.16b, v25.16b, v25.16b, #8 + pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 + pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 + + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 +.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low + + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 + eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid + ldr q20, [x3, #32] //load h1l | h1h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q22, [x3, #64] //load h2l | h2h + ext v22.16b, v22.16b, v22.16b, #8 + + eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 + + trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 + + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 + pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid + pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid + + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 + rev64 v15.16b, v15.16b //GHASH block 8k+7 + pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high + + ldp q27, q28, [x8, #64] //load rk4, rk5 + pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low +.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid + + ldr q21, [x3, #48] //load h2k | h1k + ldr q24, [x3, #96] //load h4k | h3k + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 + trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid + + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 + + pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high + pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low + pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high + + pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 + + eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid + trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 + trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid + + ldp q26, q27, [x8, #96] //load rk6, rk7 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 + pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 + eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 + + pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 + + pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high +.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid +.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 +.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 + pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 + pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low + + pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 + add v30.4s, v30.4s, v31.4s //CTR block 8k+15 + +.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 + ldp q28, q26, [x8, #128] //load rk8, rk9 + + ldr d16, [x10] //MODULO - load modulo constant +.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 + + rev32 v20.16b, v30.16b //CTR block 8k+16 +.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid + add v30.4s, v30.4s, v31.4s //CTR block 8k+16 + + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 + rev32 v22.16b, v30.16b //CTR block 8k+17 + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 + ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment + pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid + +.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 + add v30.4s, v30.4s, v31.4s //CTR block 8k+17 + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 + ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load ciphertext + + ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load ciphertext + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 + rev32 v23.16b, v30.16b //CTR block 8k+18 + + ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load ciphertext + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 +.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid + + ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load ciphertext + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 + add v30.4s, v30.4s, v31.4s //CTR block 8k+18 + + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 + + aese v0.16b, v26.16b //AES block 8k+8 - round 9 + aese v1.16b, v26.16b //AES block 8k+9 - round 9 + ldr q27, [x8, #160] //load rk10 + + aese v6.16b, v26.16b //AES block 8k+14 - round 9 + pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low + aese v2.16b, v26.16b //AES block 8k+10 - round 9 + + aese v7.16b, v26.16b //AES block 8k+15 - round 9 + aese v4.16b, v26.16b //AES block 8k+12 - round 9 + ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment + + rev32 v25.16b, v30.16b //CTR block 8k+19 + add v30.4s, v30.4s, v31.4s //CTR block 8k+19 + + aese v3.16b, v26.16b //AES block 8k+11 - round 9 + aese v5.16b, v26.16b //AES block 8k+13 - round 9 +.inst 0xce016d21 //eor3 v1.16b, v9.16b, v1.16b, v27.16b //AES block 8k+9 - result + +.inst 0xce006d00 //eor3 v0.16b, v8.16b, v0.16b, v27.16b //AES block 8k+8 - result +.inst 0xce076de7 //eor3 v7.16b, v15.16b, v7.16b, v27.16b //AES block 8k+15 - result +.inst 0xce066dc6 //eor3 v6.16b, v14.16b, v6.16b, v27.16b //AES block 8k+14 - result + +.inst 0xce026d42 //eor3 v2.16b, v10.16b, v2.16b, v27.16b //AES block 8k+10 - result + stp q0, q1, [x2], #32 //AES block 8k+8, 8k+9 - store result + mov v1.16b, v22.16b //CTR block 8k+17 + +.inst 0xce046d84 //eor3 v4.16b, v12.16b, v4.16b, v27.16b //AES block 8k+12 - result +.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low + mov v0.16b, v20.16b //CTR block 8k+16 + +.inst 0xce036d63 //eor3 v3.16b, v11.16b, v3.16b, v27.16b //AES block 8k+11 - result + cmp x0, x5 //.LOOP CONTROL + stp q2, q3, [x2], #32 //AES block 8k+10, 8k+11 - store result + +.inst 0xce056da5 //eor3 v5.16b, v13.16b, v5.16b, v27.16b //AES block 8k+13 - result + mov v2.16b, v23.16b //CTR block 8k+18 + + stp q4, q5, [x2], #32 //AES block 8k+12, 8k+13 - store result + rev32 v4.16b, v30.16b //CTR block 8k+20 + add v30.4s, v30.4s, v31.4s //CTR block 8k+20 + + stp q6, q7, [x2], #32 //AES block 8k+14, 8k+15 - store result + mov v3.16b, v25.16b //CTR block 8k+19 + b.lt .L128_dec_main_loop + +.L128_dec_prepretail: //PREPRETAIL + rev64 v11.16b, v11.16b //GHASH block 8k+3 + ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 + rev64 v8.16b, v8.16b //GHASH block 8k + + rev64 v10.16b, v10.16b //GHASH block 8k+2 + rev32 v5.16b, v30.16b //CTR block 8k+13 + ldp q26, q27, [x8, #0] //load rk0, rk1 + + ldr q23, [x3, #176] //load h7l | h7h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q25, [x3, #208] //load h8l | h8h + ext v25.16b, v25.16b, v25.16b, #8 + eor v8.16b, v8.16b, v19.16b //PRE 1 + rev64 v9.16b, v9.16b //GHASH block 8k+1 + + add v30.4s, v30.4s, v31.4s //CTR block 8k+13 + ldr q20, [x3, #128] //load h5l | h5h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q22, [x3, #160] //load h6l | h6h + ext v22.16b, v22.16b, v22.16b, #8 + rev64 v13.16b, v13.16b //GHASH block 8k+5 + + rev64 v12.16b, v12.16b //GHASH block 8k+4 + + rev64 v14.16b, v14.16b //GHASH block 8k+6 + + ldr q21, [x3, #144] //load h6k | h5k + ldr q24, [x3, #192] //load h8k | h7k + rev32 v6.16b, v30.16b //CTR block 8k+14 + add v30.4s, v30.4s, v31.4s //CTR block 8k+14 + + pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high + pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low + pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high + + trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid + trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid + pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high + + pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low + pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 + + eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 + eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid + + pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low + rev32 v7.16b, v30.16b //CTR block 8k+15 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 + +.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high + trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid + trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 + + pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid + pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid + pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 + + eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low + eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid + eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid + + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 + + ldp q28, q26, [x8, #32] //load rk2, rk3 +.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low + pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid + + ldr q23, [x3, #80] //load h3l | h3h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q25, [x3, #112] //load h4l | h4h + ext v25.16b, v25.16b, v25.16b, #8 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 + pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 + + ldr q20, [x3, #32] //load h1l | h1h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q22, [x3, #64] //load h2l | h2h + ext v22.16b, v22.16b, v22.16b, #8 +.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 + + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 + trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 + + pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high + pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low + trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid + + ldp q27, q28, [x8, #64] //load rk4, rk5 + rev64 v15.16b, v15.16b //GHASH block 8k+7 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 + + ldr q21, [x3, #48] //load h2k | h1k + ldr q24, [x3, #96] //load h4k | h3k + pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high + pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 + trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid + + pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high + pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low + trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid + + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 + eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid + +.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 + + eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 + pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 + + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 + pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid + + pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high + pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid + pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid + + ldp q26, q27, [x8, #96] //load rk6, rk7 +.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 + + ldr d16, [x10] //MODULO - load modulo constant + pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low +.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 + + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 +.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid +.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low + + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 + +.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 +.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up + ldp q28, q26, [x8, #128] //load rk8, rk9 + + pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 + ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment + + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 + + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 + +.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid + ldr q27, [x8, #160] //load rk10 + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 + + pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 + ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 + + aese v6.16b, v26.16b //AES block 8k+14 - round 9 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 + +.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low + add v30.4s, v30.4s, v31.4s //CTR block 8k+15 + aese v2.16b, v26.16b //AES block 8k+10 - round 9 + + aese v3.16b, v26.16b //AES block 8k+11 - round 9 + aese v5.16b, v26.16b //AES block 8k+13 - round 9 + aese v0.16b, v26.16b //AES block 8k+8 - round 9 + + aese v4.16b, v26.16b //AES block 8k+12 - round 9 + aese v1.16b, v26.16b //AES block 8k+9 - round 9 + aese v7.16b, v26.16b //AES block 8k+15 - round 9 + +.L128_dec_tail: //TAIL + + mov v29.16b, v27.16b + sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process + + cmp x5, #112 + + ldp q24, q25, [x3, #192] //load h8k | h7k + ext v25.16b, v25.16b, v25.16b, #8 + ldr q9, [x0], #16 //AES block 8k+8 - load ciphertext + + ldp q20, q21, [x3, #128] //load h5l | h5h + ext v20.16b, v20.16b, v20.16b, #8 + ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag + + ldp q22, q23, [x3, #160] //load h6l | h6h + ext v22.16b, v22.16b, v22.16b, #8 + ext v23.16b, v23.16b, v23.16b, #8 + +.inst 0xce00752c //eor3 v12.16b, v9.16b, v0.16b, v29.16b //AES block 8k+8 - result + b.gt .L128_dec_blocks_more_than_7 + + cmp x5, #96 + mov v7.16b, v6.16b + movi v19.8b, #0 + + movi v17.8b, #0 + mov v6.16b, v5.16b + mov v5.16b, v4.16b + + mov v4.16b, v3.16b + mov v3.16b, v2.16b + mov v2.16b, v1.16b + + movi v18.8b, #0 + sub v30.4s, v30.4s, v31.4s + b.gt .L128_dec_blocks_more_than_6 + + cmp x5, #80 + sub v30.4s, v30.4s, v31.4s + + mov v7.16b, v6.16b + mov v6.16b, v5.16b + mov v5.16b, v4.16b + + mov v4.16b, v3.16b + mov v3.16b, v1.16b + b.gt .L128_dec_blocks_more_than_5 + + cmp x5, #64 + + mov v7.16b, v6.16b + mov v6.16b, v5.16b + mov v5.16b, v4.16b + + mov v4.16b, v1.16b + sub v30.4s, v30.4s, v31.4s + b.gt .L128_dec_blocks_more_than_4 + + sub v30.4s, v30.4s, v31.4s + mov v7.16b, v6.16b + mov v6.16b, v5.16b + + mov v5.16b, v1.16b + cmp x5, #48 + b.gt .L128_dec_blocks_more_than_3 + + sub v30.4s, v30.4s, v31.4s + mov v7.16b, v6.16b + cmp x5, #32 + + ldr q24, [x3, #96] //load h4k | h3k + mov v6.16b, v1.16b + b.gt .L128_dec_blocks_more_than_2 + + cmp x5, #16 + + mov v7.16b, v1.16b + sub v30.4s, v30.4s, v31.4s + b.gt .L128_dec_blocks_more_than_1 + + sub v30.4s, v30.4s, v31.4s + ldr q21, [x3, #48] //load h2k | h1k + b .L128_dec_blocks_less_than_1 +.L128_dec_blocks_more_than_7: //blocks left > 7 + rev64 v8.16b, v9.16b //GHASH final-7 block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v18.d[0], v24.d[1] //GHASH final-7 block - mid + + pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low + ins v27.d[0], v8.d[1] //GHASH final-7 block - mid + + movi v16.8b, #0 //suppress further partial tag feed in + ldr q9, [x0], #16 //AES final-6 block - load ciphertext + + eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid + + pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high + st1 { v12.16b}, [x2], #16 //AES final-7 block - store result +.inst 0xce01752c //eor3 v12.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result + + pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid +.L128_dec_blocks_more_than_6: //blocks left > 6 + + rev64 v8.16b, v9.16b //GHASH final-6 block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v27.d[0], v8.d[1] //GHASH final-6 block - mid + + eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid + + pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low + ldr q9, [x0], #16 //AES final-5 block - load ciphertext + movi v16.8b, #0 //suppress further partial tag feed in + + pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid + st1 { v12.16b}, [x2], #16 //AES final-6 block - store result + pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high + + eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low + eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high + + eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid +.inst 0xce02752c //eor3 v12.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result +.L128_dec_blocks_more_than_5: //blocks left > 5 + + rev64 v8.16b, v9.16b //GHASH final-5 block + + ldr q9, [x0], #16 //AES final-4 block - load ciphertext + st1 { v12.16b}, [x2], #16 //AES final-5 block - store result + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v27.d[0], v8.d[1] //GHASH final-5 block - mid + +.inst 0xce03752c //eor3 v12.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result + + eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid + + ins v27.d[1], v27.d[0] //GHASH final-5 block - mid + pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low + movi v16.8b, #0 //suppress further partial tag feed in + + pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid + pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high + eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low + + eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid + eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high +.L128_dec_blocks_more_than_4: //blocks left > 4 + + rev64 v8.16b, v9.16b //GHASH final-4 block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + ldr q9, [x0], #16 //AES final-3 block - load ciphertext + + ins v27.d[0], v8.d[1] //GHASH final-4 block - mid + movi v16.8b, #0 //suppress further partial tag feed in + pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high + + pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low + + eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high + + st1 { v12.16b}, [x2], #16 //AES final-4 block - store result + eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid + +.inst 0xce04752c //eor3 v12.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result + eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low + + pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid + + eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid +.L128_dec_blocks_more_than_3: //blocks left > 3 + + st1 { v12.16b}, [x2], #16 //AES final-3 block - store result + rev64 v8.16b, v9.16b //GHASH final-3 block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v27.d[0], v8.d[1] //GHASH final-3 block - mid + + ldr q25, [x3, #112] //load h4l | h4h + ext v25.16b, v25.16b, v25.16b, #8 + ldr q24, [x3, #96] //load h4k | h3k + + eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid + + ldr q9, [x0], #16 //AES final-2 block - load ciphertext + + ins v27.d[1], v27.d[0] //GHASH final-3 block - mid + pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low + pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high + + movi v16.8b, #0 //suppress further partial tag feed in +.inst 0xce05752c //eor3 v12.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result + eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low + + pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid + + eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high + eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid +.L128_dec_blocks_more_than_2: //blocks left > 2 + + rev64 v8.16b, v9.16b //GHASH final-2 block + + st1 { v12.16b}, [x2], #16 //AES final-2 block - store result + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + ldr q23, [x3, #80] //load h3l | h3h + ext v23.16b, v23.16b, v23.16b, #8 + movi v16.8b, #0 //suppress further partial tag feed in + + ins v27.d[0], v8.d[1] //GHASH final-2 block - mid + + eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid + + pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low + + pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high + pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid + ldr q9, [x0], #16 //AES final-1 block - load ciphertext + + eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid + + eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low + +.inst 0xce06752c //eor3 v12.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result + eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high +.L128_dec_blocks_more_than_1: //blocks left > 1 + + st1 { v12.16b}, [x2], #16 //AES final-1 block - store result + rev64 v8.16b, v9.16b //GHASH final-1 block + + ldr q22, [x3, #64] //load h2l | h2h + ext v22.16b, v22.16b, v22.16b, #8 + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + movi v16.8b, #0 //suppress further partial tag feed in + + ins v27.d[0], v8.d[1] //GHASH final-1 block - mid + + ldr q9, [x0], #16 //AES final block - load ciphertext + pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high + + eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid + eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high + ldr q21, [x3, #48] //load h2k | h1k + + ins v27.d[1], v27.d[0] //GHASH final-1 block - mid +.inst 0xce07752c //eor3 v12.16b, v9.16b, v7.16b, v29.16b //AES final block - result + + pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low + + pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid + + eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low + + eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid +.L128_dec_blocks_less_than_1: //blocks left <= 1 + + and x1, x1, #127 //bit_length %= 128 + + sub x1, x1, #128 //bit_length -= 128 + + neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) + + mvn x6, xzr //temp0_x = 0xffffffffffffffff + and x1, x1, #127 //bit_length %= 128 + + lsr x6, x6, x1 //temp0_x is mask for top 64b of last block + cmp x1, #64 + mvn x7, xzr //temp1_x = 0xffffffffffffffff + + csel x13, x7, x6, lt + csel x14, x6, xzr, lt + + mov v0.d[1], x14 + mov v0.d[0], x13 //ctr0b is mask for last block + + ldr q20, [x3, #32] //load h1l | h1h + ext v20.16b, v20.16b, v20.16b, #8 + ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored + + and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits + + rev64 v8.16b, v9.16b //GHASH final block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high + ins v16.d[0], v8.d[1] //GHASH final block - mid + + eor v17.16b, v17.16b, v28.16b //GHASH final block - high + eor v16.8b, v16.8b, v8.8b //GHASH final block - mid + + bif v12.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing + + pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid + st1 { v12.16b}, [x2] //store all 16B + + pmull v26.1q, v8.1d, v20.1d //GHASH final block - low + + eor v18.16b, v18.16b, v16.16b //GHASH final block - mid + ldr d16, [x10] //MODULO - load modulo constant + + eor v19.16b, v19.16b, v26.16b //GHASH final block - low + + eor v14.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up + + pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid + ext v17.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment + + eor v18.16b, v18.16b, v14.16b //MODULO - karatsuba tidy up + +.inst 0xce115652 //eor3 v18.16b, v18.16b, v17.16b, v21.16b //MODULO - fold into mid + + pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low + ext v18.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment + +.inst 0xce124673 //eor3 v19.16b, v19.16b, v18.16b, v17.16b //MODULO - fold into low + ext v19.16b, v19.16b, v19.16b, #8 + rev64 v19.16b, v19.16b + st1 { v19.16b }, [x3] + rev32 v30.16b, v30.16b + + str q30, [x16] //store the updated counter + + mov x0, x9 + + ldp d10, d11, [sp, #16] + ldp d12, d13, [sp, #32] + ldp d14, d15, [sp, #48] + ldp d8, d9, [sp], #80 + ret +.L128_dec_ret: + mov w0, #0x0 + ret +.size unroll8_eor3_aes_gcm_dec_128_kernel,.-unroll8_eor3_aes_gcm_dec_128_kernel +.globl unroll8_eor3_aes_gcm_enc_192_kernel +.type unroll8_eor3_aes_gcm_enc_192_kernel,%function +.align 4 +unroll8_eor3_aes_gcm_enc_192_kernel: + AARCH64_VALID_CALL_TARGET + cbz x1, .L192_enc_ret + stp d8, d9, [sp, #-80]! + lsr x9, x1, #3 + mov x16, x4 + mov x8, x5 + stp d10, d11, [sp, #16] + stp d12, d13, [sp, #32] + stp d14, d15, [sp, #48] + mov x5, #0xc200000000000000 + stp x5, xzr, [sp, #64] + add x10, sp, #64 + + mov x5, x9 + ld1 { v0.16b}, [x16] //CTR block 0 + + mov x15, #0x100000000 //set up counter increment + movi v31.16b, #0x0 + mov v31.d[1], x15 + + rev32 v30.16b, v0.16b //set up reversed counter + + add v30.4s, v30.4s, v31.4s //CTR block 0 + + rev32 v1.16b, v30.16b //CTR block 1 + add v30.4s, v30.4s, v31.4s //CTR block 1 + + rev32 v2.16b, v30.16b //CTR block 2 + add v30.4s, v30.4s, v31.4s //CTR block 2 + + rev32 v3.16b, v30.16b //CTR block 3 + add v30.4s, v30.4s, v31.4s //CTR block 3 + + rev32 v4.16b, v30.16b //CTR block 4 + add v30.4s, v30.4s, v31.4s //CTR block 4 + sub x5, x5, #1 //byte_len - 1 + + and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) + + rev32 v5.16b, v30.16b //CTR block 5 + add v30.4s, v30.4s, v31.4s //CTR block 5 + ldp q26, q27, [x8, #0] //load rk0, rk1 + + add x5, x5, x0 + + rev32 v6.16b, v30.16b //CTR block 6 + add v30.4s, v30.4s, v31.4s //CTR block 6 + + rev32 v7.16b, v30.16b //CTR block 7 + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 5 - round 0 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 4 - round 0 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 0 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 0 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 0 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 7 - round 0 + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 6 - round 0 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 0 + ldp q28, q26, [x8, #32] //load rk2, rk3 + + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 5 - round 1 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 7 - round 1 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 1 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 1 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 6 - round 1 + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 5 - round 2 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 4 - round 1 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 1 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 1 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 7 - round 2 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 3 - round 2 + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 2 - round 2 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 0 - round 2 + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 1 - round 2 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 4 - round 2 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 6 - round 2 + + ldp q27, q28, [x8, #64] //load rk4, rk5 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 4 - round 3 + + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 7 - round 3 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 3 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 3 + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 3 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 3 + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 6 - round 3 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 4 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 4 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 5 - round 3 + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 4 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 4 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 4 - round 4 + + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 6 - round 4 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 7 - round 4 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 5 - round 4 + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 1 - round 5 + ldp q26, q27, [x8, #96] //load rk6, rk7 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 2 - round 5 + + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 4 - round 5 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 7 - round 5 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 0 - round 5 + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 5 - round 5 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 6 - round 5 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 3 - round 5 + + add v30.4s, v30.4s, v31.4s //CTR block 7 + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 5 - round 6 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 4 - round 6 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 6 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 6 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 6 - round 6 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 6 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 6 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 7 - round 6 + ldp q28, q26, [x8, #128] //load rk8, rk9 + + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 6 - round 7 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 7 + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 4 - round 7 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 7 + + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 7 - round 7 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 7 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 7 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 5 - round 7 + + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 7 - round 8 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 0 - round 8 + + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 4 - round 8 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 3 - round 8 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 5 - round 8 + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 2 - round 8 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 1 - round 8 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 6 - round 8 + + add x4, x0, x1, lsr #3 //end_input_ptr + cmp x0, x5 //check if we have <= 8 blocks + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 9 + + ld1 { v19.16b}, [x3] + ext v19.16b, v19.16b, v19.16b, #8 + rev64 v19.16b, v19.16b + ldp q27, q28, [x8, #160] //load rk10, rk11 + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 6 - round 9 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 9 + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 5 - round 9 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 9 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 9 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 4 - round 9 + + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 14 - round 10 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 7 - round 9 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 11 - round 10 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 9 - round 10 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 13 - round 10 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 12 - round 10 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8 - round 10 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 10 - round 10 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 15 - round 10 + + aese v6.16b, v28.16b //AES block 14 - round 11 + aese v3.16b, v28.16b //AES block 11 - round 11 + + aese v4.16b, v28.16b //AES block 12 - round 11 + aese v7.16b, v28.16b //AES block 15 - round 11 + ldr q26, [x8, #192] //load rk12 + + aese v1.16b, v28.16b //AES block 9 - round 11 + aese v5.16b, v28.16b //AES block 13 - round 11 + + aese v2.16b, v28.16b //AES block 10 - round 11 + aese v0.16b, v28.16b //AES block 8 - round 11 + b.ge .L192_enc_tail //handle tail + + ldp q8, q9, [x0], #32 //AES block 0, 1 - load plaintext + + ldp q10, q11, [x0], #32 //AES block 2, 3 - load plaintext + + ldp q12, q13, [x0], #32 //AES block 4, 5 - load plaintext + + ldp q14, q15, [x0], #32 //AES block 6, 7 - load plaintext + +.inst 0xce006908 //eor3 v8.16b, v8.16b, v0.16b, v26.16b //AES block 0 - result + rev32 v0.16b, v30.16b //CTR block 8 + add v30.4s, v30.4s, v31.4s //CTR block 8 + +.inst 0xce03696b //eor3 v11.16b, v11.16b, v3.16b, v26.16b //AES block 3 - result +.inst 0xce016929 //eor3 v9.16b, v9.16b, v1.16b, v26.16b //AES block 1 - result + + rev32 v1.16b, v30.16b //CTR block 9 + add v30.4s, v30.4s, v31.4s //CTR block 9 +.inst 0xce04698c //eor3 v12.16b, v12.16b, v4.16b, v26.16b //AES block 4 - result + +.inst 0xce0569ad //eor3 v13.16b, v13.16b, v5.16b, v26.16b //AES block 5 - result +.inst 0xce0769ef //eor3 v15.16b, v15.16b, v7.16b, v26.16b //AES block 7 - result + stp q8, q9, [x2], #32 //AES block 0, 1 - store result + +.inst 0xce02694a //eor3 v10.16b, v10.16b, v2.16b, v26.16b //AES block 2 - result + rev32 v2.16b, v30.16b //CTR block 10 + add v30.4s, v30.4s, v31.4s //CTR block 10 + + stp q10, q11, [x2], #32 //AES block 2, 3 - store result + cmp x0, x5 //check if we have <= 8 blocks + + rev32 v3.16b, v30.16b //CTR block 11 + add v30.4s, v30.4s, v31.4s //CTR block 11 +.inst 0xce0669ce //eor3 v14.16b, v14.16b, v6.16b, v26.16b //AES block 6 - result + + stp q12, q13, [x2], #32 //AES block 4, 5 - store result + + rev32 v4.16b, v30.16b //CTR block 12 + stp q14, q15, [x2], #32 //AES block 6, 7 - store result + add v30.4s, v30.4s, v31.4s //CTR block 12 + + b.ge .L192_enc_prepretail //do prepretail + +.L192_enc_main_loop: //main loop start + rev64 v12.16b, v12.16b //GHASH block 8k+4 (t0, t1, and t2 free) + ldp q26, q27, [x8, #0] //load rk0, rk1 + rev64 v10.16b, v10.16b //GHASH block 8k+2 + + rev32 v5.16b, v30.16b //CTR block 8k+13 + add v30.4s, v30.4s, v31.4s //CTR block 8k+13 + ldr q23, [x3, #176] //load h7l | h7h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q25, [x3, #208] //load h8l | h8h + ext v25.16b, v25.16b, v25.16b, #8 + + ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 + rev64 v8.16b, v8.16b //GHASH block 8k + ldr q20, [x3, #128] //load h5l | h5h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q22, [x3, #160] //load h6l | h6h + ext v22.16b, v22.16b, v22.16b, #8 + + rev64 v9.16b, v9.16b //GHASH block 8k+1 + rev32 v6.16b, v30.16b //CTR block 8k+14 + add v30.4s, v30.4s, v31.4s //CTR block 8k+14 + + eor v8.16b, v8.16b, v19.16b //PRE 1 + rev64 v11.16b, v11.16b //GHASH block 8k+3 + rev64 v13.16b, v13.16b //GHASH block 8k+5 (t0, t1, t2 and t3 free) + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 + rev32 v7.16b, v30.16b //CTR block 8k+15 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 + + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 + + ldp q28, q26, [x8, #32] //load rk2, rk3 + pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 + pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high + pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low + + trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 + ldr q21, [x3, #144] //load h6k | h5k + ldr q24, [x3, #192] //load h8k | h7k + + pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high + pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low + trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 + + eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 + + pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high + eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 +.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high + + pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 + trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 + + trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 + ldp q27, q28, [x8, #64] //load rk4, rk5 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 + eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low + ldr q23, [x3, #80] //load h3l | h3h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q25, [x3, #112] //load h4l | h4h + ext v25.16b, v25.16b, v25.16b, #8 + + pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid + pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid + pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 + eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid + trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid + + eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 +.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 + + pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 + + pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 + +.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 + ldr q20, [x3, #32] //load h1l | h1h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q22, [x3, #64] //load h2l | h2h + ext v22.16b, v22.16b, v22.16b, #8 + + ldp q26, q27, [x8, #96] //load rk6, rk7 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 + rev64 v15.16b, v15.16b //GHASH block 8k+7 (t0, t1, t2 and t3 free) + + rev64 v14.16b, v14.16b //GHASH block 8k+6 (t0, t1, and t2 free) + pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high + pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 + trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid + + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 + ldr q21, [x3, #48] //load h2k | h1k + ldr q24, [x3, #96] //load h4k | h3k + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 + pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high + eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 + + pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 + trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 + pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high + + pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low + trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 + + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 + + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 + eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid + + pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid + ldp q28, q26, [x8, #128] //load rk8, rk9 + pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 + pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 + +.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 + add v30.4s, v30.4s, v31.4s //CTR block 8k+15 + + ldr d16, [x10] //MODULO - load modulo constant +.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 + + pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid + pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 + + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 +.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 + + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 + pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 + ldp q27, q28, [x8, #160] //load rk10, rk11 + +.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low + rev32 v20.16b, v30.16b //CTR block 8k+16 + add v30.4s, v30.4s, v31.4s //CTR block 8k+16 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 +.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid +.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 + ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load plaintext + + pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid + rev32 v22.16b, v30.16b //CTR block 8k+17 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 + + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 + +.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 + add v30.4s, v30.4s, v31.4s //CTR block 8k+17 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 + ldr q26, [x8, #192] //load rk12 + ext v29.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 + ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load plaintext + + aese v4.16b, v28.16b //AES block 8k+12 - round 11 +.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid + ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load plaintext + + ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load plaintext + aese v2.16b, v28.16b //AES block 8k+10 - round 11 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 + + rev32 v23.16b, v30.16b //CTR block 8k+18 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 + pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low + + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 + aese v5.16b, v28.16b //AES block 8k+13 - round 11 + add v30.4s, v30.4s, v31.4s //CTR block 8k+18 + + aese v7.16b, v28.16b //AES block 8k+15 - round 11 + aese v0.16b, v28.16b //AES block 8k+8 - round 11 +.inst 0xce04698c //eor3 v12.16b, v12.16b, v4.16b, v26.16b //AES block 4 - result + + aese v6.16b, v28.16b //AES block 8k+14 - round 11 + aese v3.16b, v28.16b //AES block 8k+11 - round 11 + aese v1.16b, v28.16b //AES block 8k+9 - round 11 + + rev32 v25.16b, v30.16b //CTR block 8k+19 + add v30.4s, v30.4s, v31.4s //CTR block 8k+19 +.inst 0xce0769ef //eor3 v15.16b, v15.16b, v7.16b, v26.16b //AES block 7 - result + +.inst 0xce02694a //eor3 v10.16b, v10.16b, v2.16b, v26.16b //AES block 8k+10 - result +.inst 0xce006908 //eor3 v8.16b, v8.16b, v0.16b, v26.16b //AES block 8k+8 - result + mov v2.16b, v23.16b //CTR block 8k+18 + +.inst 0xce016929 //eor3 v9.16b, v9.16b, v1.16b, v26.16b //AES block 8k+9 - result + mov v1.16b, v22.16b //CTR block 8k+17 + stp q8, q9, [x2], #32 //AES block 8k+8, 8k+9 - store result + ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment + +.inst 0xce0669ce //eor3 v14.16b, v14.16b, v6.16b, v26.16b //AES block 6 - result + mov v0.16b, v20.16b //CTR block 8k+16 + rev32 v4.16b, v30.16b //CTR block 8k+20 + + add v30.4s, v30.4s, v31.4s //CTR block 8k+20 +.inst 0xce0569ad //eor3 v13.16b, v13.16b, v5.16b, v26.16b //AES block 5 - result +.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low + +.inst 0xce03696b //eor3 v11.16b, v11.16b, v3.16b, v26.16b //AES block 8k+11 - result + mov v3.16b, v25.16b //CTR block 8k+19 + + stp q10, q11, [x2], #32 //AES block 8k+10, 8k+11 - store result + + stp q12, q13, [x2], #32 //AES block 8k+12, 8k+13 - store result + + cmp x0, x5 //.LOOP CONTROL + stp q14, q15, [x2], #32 //AES block 8k+14, 8k+15 - store result + b.lt .L192_enc_main_loop + +.L192_enc_prepretail: //PREPRETAIL + rev32 v5.16b, v30.16b //CTR block 8k+13 + ldp q26, q27, [x8, #0] //load rk0, rk1 + add v30.4s, v30.4s, v31.4s //CTR block 8k+13 + + ldr q23, [x3, #176] //load h7l | h7h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q25, [x3, #208] //load h8l | h8h + ext v25.16b, v25.16b, v25.16b, #8 + rev64 v8.16b, v8.16b //GHASH block 8k + ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 + + rev32 v6.16b, v30.16b //CTR block 8k+14 + add v30.4s, v30.4s, v31.4s //CTR block 8k+14 + ldr q21, [x3, #144] //load h6k | h5k + ldr q24, [x3, #192] //load h8k | h7k + + rev64 v11.16b, v11.16b //GHASH block 8k+3 + rev64 v10.16b, v10.16b //GHASH block 8k+2 + ldr q20, [x3, #128] //load h5l | h5h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q22, [x3, #160] //load h6l | h6h + ext v22.16b, v22.16b, v22.16b, #8 + + eor v8.16b, v8.16b, v19.16b //PRE 1 + rev32 v7.16b, v30.16b //CTR block 8k+15 + rev64 v9.16b, v9.16b //GHASH block 8k+1 + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 + + pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 + pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high + + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 + pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low + trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid + + trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 + ldp q28, q26, [x8, #32] //load rk2, rk3 + + pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low + eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 + + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 + eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 + + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 + pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high + pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 + + pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 + eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low + + pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 +.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 + trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 + pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid + trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 + rev64 v13.16b, v13.16b //GHASH block 8k+5 (t0, t1, t2 and t3 free) + rev64 v14.16b, v14.16b //GHASH block 8k+6 (t0, t1, and t2 free) + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 + + eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid + pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid + ldp q27, q28, [x8, #64] //load rk4, rk5 + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 + + eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid +.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 + + ldr q23, [x3, #80] //load h3l | h3h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q25, [x3, #112] //load h4l | h4h + ext v25.16b, v25.16b, v25.16b, #8 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 + pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid + + ldr q20, [x3, #32] //load h1l | h1h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q22, [x3, #64] //load h2l | h2h + ext v22.16b, v22.16b, v22.16b, #8 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 + rev64 v12.16b, v12.16b //GHASH block 8k+4 (t0, t1, and t2 free) + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 + pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 + + trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 + +.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 + rev64 v15.16b, v15.16b //GHASH block 8k+7 (t0, t1, t2 and t3 free) + ldr q21, [x3, #48] //load h2k | h1k + ldr q24, [x3, #96] //load h4k | h3k + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 + ldp q26, q27, [x8, #96] //load rk6, rk7 + + pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high + pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high + pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low + + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 + trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid + + pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high + pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low + pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low + + trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid + eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid + trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 + + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 + eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 + + pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid + pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid + + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 +.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 +.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 + ldr d16, [x10] //MODULO - load modulo constant + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 + + pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 +.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low + + pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high + pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid + pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 + ldp q28, q26, [x8, #128] //load rk8, rk9 + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 +.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid + +.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low +.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high + +.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up + ext v29.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 + pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 + + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 +.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 + ldp q27, q28, [x8, #160] //load rk10, rk11 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 + + ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 + + pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low + ldr q26, [x8, #192] //load rk12 + + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 + +.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 + + aese v1.16b, v28.16b //AES block 8k+9 - round 11 + aese v7.16b, v28.16b //AES block 8k+15 - round 11 + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 + aese v3.16b, v28.16b //AES block 8k+11 - round 11 + + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 + + add v30.4s, v30.4s, v31.4s //CTR block 8k+15 + aese v2.16b, v28.16b //AES block 8k+10 - round 11 + aese v0.16b, v28.16b //AES block 8k+8 - round 11 + + aese v6.16b, v28.16b //AES block 8k+14 - round 11 + aese v4.16b, v28.16b //AES block 8k+12 - round 11 + aese v5.16b, v28.16b //AES block 8k+13 - round 11 + +.L192_enc_tail: //TAIL + + ldp q20, q21, [x3, #128] //load h5l | h5h + ext v20.16b, v20.16b, v20.16b, #8 + sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process + + ldr q8, [x0], #16 //AES block 8k+8 - l3ad plaintext + + ldp q24, q25, [x3, #192] //load h8k | h7k + ext v25.16b, v25.16b, v25.16b, #8 + + mov v29.16b, v26.16b + + ldp q22, q23, [x3, #160] //load h6l | h6h + ext v22.16b, v22.16b, v22.16b, #8 + ext v23.16b, v23.16b, v23.16b, #8 + cmp x5, #112 + +.inst 0xce007509 //eor3 v9.16b, v8.16b, v0.16b, v29.16b //AES block 8k+8 - result + ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag + b.gt .L192_enc_blocks_more_than_7 + + cmp x5, #96 + mov v7.16b, v6.16b + movi v17.8b, #0 + + mov v6.16b, v5.16b + movi v19.8b, #0 + sub v30.4s, v30.4s, v31.4s + + mov v5.16b, v4.16b + mov v4.16b, v3.16b + mov v3.16b, v2.16b + + mov v2.16b, v1.16b + movi v18.8b, #0 + b.gt .L192_enc_blocks_more_than_6 + + mov v7.16b, v6.16b + cmp x5, #80 + + mov v6.16b, v5.16b + mov v5.16b, v4.16b + mov v4.16b, v3.16b + + mov v3.16b, v1.16b + sub v30.4s, v30.4s, v31.4s + b.gt .L192_enc_blocks_more_than_5 + + cmp x5, #64 + sub v30.4s, v30.4s, v31.4s + + mov v7.16b, v6.16b + mov v6.16b, v5.16b + mov v5.16b, v4.16b + + mov v4.16b, v1.16b + b.gt .L192_enc_blocks_more_than_4 + + mov v7.16b, v6.16b + mov v6.16b, v5.16b + mov v5.16b, v1.16b + + sub v30.4s, v30.4s, v31.4s + cmp x5, #48 + b.gt .L192_enc_blocks_more_than_3 + + mov v7.16b, v6.16b + mov v6.16b, v1.16b + sub v30.4s, v30.4s, v31.4s + + ldr q24, [x3, #96] //load h4k | h3k + cmp x5, #32 + b.gt .L192_enc_blocks_more_than_2 + + sub v30.4s, v30.4s, v31.4s + + cmp x5, #16 + mov v7.16b, v1.16b + b.gt .L192_enc_blocks_more_than_1 + + sub v30.4s, v30.4s, v31.4s + ldr q21, [x3, #48] //load h2k | h1k + b .L192_enc_blocks_less_than_1 +.L192_enc_blocks_more_than_7: //blocks left > 7 + st1 { v9.16b}, [x2], #16 //AES final-7 block - store result + + rev64 v8.16b, v9.16b //GHASH final-7 block + ins v18.d[0], v24.d[1] //GHASH final-7 block - mid + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v27.d[0], v8.d[1] //GHASH final-7 block - mid + + ldr q9, [x0], #16 //AES final-6 block - load plaintext + + eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid + movi v16.8b, #0 //suppress further partial tag feed in + pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low + + pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high + + pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid +.inst 0xce017529 //eor3 v9.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result +.L192_enc_blocks_more_than_6: //blocks left > 6 + + st1 { v9.16b}, [x2], #16 //AES final-6 block - store result + + rev64 v8.16b, v9.16b //GHASH final-6 block + + ldr q9, [x0], #16 //AES final-5 block - load plaintext + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v27.d[0], v8.d[1] //GHASH final-6 block - mid + + pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low +.inst 0xce027529 //eor3 v9.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result + + movi v16.8b, #0 //suppress further partial tag feed in + pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high + eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid + + pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid + + eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high + eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low + + eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid +.L192_enc_blocks_more_than_5: //blocks left > 5 + + st1 { v9.16b}, [x2], #16 //AES final-5 block - store result + + rev64 v8.16b, v9.16b //GHASH final-5 block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v27.d[0], v8.d[1] //GHASH final-5 block - mid + + ldr q9, [x0], #16 //AES final-4 block - load plaintext + pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high + + eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid + eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high + + ins v27.d[1], v27.d[0] //GHASH final-5 block - mid + pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low + + eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low + pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid + +.inst 0xce037529 //eor3 v9.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result + movi v16.8b, #0 //suppress further partial tag feed in + + eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid +.L192_enc_blocks_more_than_4: //blocks left > 4 + + st1 { v9.16b}, [x2], #16 //AES final-4 block - store result + + rev64 v8.16b, v9.16b //GHASH final-4 block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ldr q9, [x0], #16 //AES final-3 block - load plaintext + pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high + ins v27.d[0], v8.d[1] //GHASH final-4 block - mid + + pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low + eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high + + eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid + + movi v16.8b, #0 //suppress further partial tag feed in + eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low + + pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid + + eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid +.inst 0xce047529 //eor3 v9.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result +.L192_enc_blocks_more_than_3: //blocks left > 3 + + ldr q24, [x3, #96] //load h4k | h3k + st1 { v9.16b}, [x2], #16 //AES final-3 block - store result + + rev64 v8.16b, v9.16b //GHASH final-3 block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + movi v16.8b, #0 //suppress further partial tag feed in + + ldr q9, [x0], #16 //AES final-2 block - load plaintext + ldr q25, [x3, #112] //load h4l | h4h + ext v25.16b, v25.16b, v25.16b, #8 + + ins v27.d[0], v8.d[1] //GHASH final-3 block - mid + +.inst 0xce057529 //eor3 v9.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result + eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid + + ins v27.d[1], v27.d[0] //GHASH final-3 block - mid + pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low + + pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high + pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid + + eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low + + eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid + eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high +.L192_enc_blocks_more_than_2: //blocks left > 2 + + st1 { v9.16b}, [x2], #16 //AES final-2 block - store result + + rev64 v8.16b, v9.16b //GHASH final-2 block + ldr q23, [x3, #80] //load h3l | h3h + ext v23.16b, v23.16b, v23.16b, #8 + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ldr q9, [x0], #16 //AES final-1 block - load plaintext + ins v27.d[0], v8.d[1] //GHASH final-2 block - mid + + eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid + + pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low + pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high + movi v16.8b, #0 //suppress further partial tag feed in + + pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid + + eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low + eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high + + eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid +.inst 0xce067529 //eor3 v9.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result +.L192_enc_blocks_more_than_1: //blocks left > 1 + + ldr q22, [x3, #64] //load h1l | h1h + ext v22.16b, v22.16b, v22.16b, #8 + st1 { v9.16b}, [x2], #16 //AES final-1 block - store result + + rev64 v8.16b, v9.16b //GHASH final-1 block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v27.d[0], v8.d[1] //GHASH final-1 block - mid + pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low + + eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low + pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high + eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid + + ldr q9, [x0], #16 //AES final block - load plaintext + ldr q21, [x3, #48] //load h2k | h1k + + ins v27.d[1], v27.d[0] //GHASH final-1 block - mid + +.inst 0xce077529 //eor3 v9.16b, v9.16b, v7.16b, v29.16b //AES final block - result + pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid + + movi v16.8b, #0 //suppress further partial tag feed in + + eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid + eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high +.L192_enc_blocks_less_than_1: //blocks left <= 1 + + mvn x6, xzr //temp0_x = 0xffffffffffffffff + and x1, x1, #127 //bit_length %= 128 + + sub x1, x1, #128 //bit_length -= 128 + + neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) + + and x1, x1, #127 //bit_length %= 128 + + lsr x6, x6, x1 //temp0_x is mask for top 64b of last block + cmp x1, #64 + mvn x7, xzr //temp1_x = 0xffffffffffffffff + + csel x13, x7, x6, lt + csel x14, x6, xzr, lt + + mov v0.d[1], x14 + ldr q20, [x3, #32] //load h1l | h1h + ext v20.16b, v20.16b, v20.16b, #8 + + ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored + mov v0.d[0], x13 //ctr0b is mask for last block + + and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits + + rev64 v8.16b, v9.16b //GHASH final block + bif v9.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing + + st1 { v9.16b}, [x2] //store all 16B + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v16.d[0], v8.d[1] //GHASH final block - mid + pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high + + eor v17.16b, v17.16b, v28.16b //GHASH final block - high + pmull v26.1q, v8.1d, v20.1d //GHASH final block - low + + eor v16.8b, v16.8b, v8.8b //GHASH final block - mid + + pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid + + eor v18.16b, v18.16b, v16.16b //GHASH final block - mid + ldr d16, [x10] //MODULO - load modulo constant + + eor v19.16b, v19.16b, v26.16b //GHASH final block - low + ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment + + rev32 v30.16b, v30.16b + + str q30, [x16] //store the updated counter +.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up + + pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid + +.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid + + pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low + ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment + +.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low + ext v19.16b, v19.16b, v19.16b, #8 + rev64 v19.16b, v19.16b + st1 { v19.16b }, [x3] + + mov x0, x9 //return sizes + + ldp d10, d11, [sp, #16] + ldp d12, d13, [sp, #32] + ldp d14, d15, [sp, #48] + ldp d8, d9, [sp], #80 + ret + +.L192_enc_ret: + mov w0, #0x0 + ret +.size unroll8_eor3_aes_gcm_enc_192_kernel,.-unroll8_eor3_aes_gcm_enc_192_kernel +.globl unroll8_eor3_aes_gcm_dec_192_kernel +.type unroll8_eor3_aes_gcm_dec_192_kernel,%function +.align 4 +unroll8_eor3_aes_gcm_dec_192_kernel: + AARCH64_VALID_CALL_TARGET + cbz x1, .L192_dec_ret + stp d8, d9, [sp, #-80]! + lsr x9, x1, #3 + mov x16, x4 + mov x8, x5 + stp d10, d11, [sp, #16] + stp d12, d13, [sp, #32] + stp d14, d15, [sp, #48] + mov x5, #0xc200000000000000 + stp x5, xzr, [sp, #64] + add x10, sp, #64 + + mov x5, x9 + ld1 { v0.16b}, [x16] //CTR block 0 + ld1 { v19.16b}, [x3] + + mov x15, #0x100000000 //set up counter increment + movi v31.16b, #0x0 + mov v31.d[1], x15 + + rev32 v30.16b, v0.16b //set up reversed counter + + add v30.4s, v30.4s, v31.4s //CTR block 0 + + rev32 v1.16b, v30.16b //CTR block 1 + add v30.4s, v30.4s, v31.4s //CTR block 1 + + rev32 v2.16b, v30.16b //CTR block 2 + add v30.4s, v30.4s, v31.4s //CTR block 2 + + rev32 v3.16b, v30.16b //CTR block 3 + add v30.4s, v30.4s, v31.4s //CTR block 3 + + rev32 v4.16b, v30.16b //CTR block 4 + add v30.4s, v30.4s, v31.4s //CTR block 4 + + rev32 v5.16b, v30.16b //CTR block 5 + add v30.4s, v30.4s, v31.4s //CTR block 5 + ldp q26, q27, [x8, #0] //load rk0, rk1 + + rev32 v6.16b, v30.16b //CTR block 6 + add v30.4s, v30.4s, v31.4s //CTR block 6 + + rev32 v7.16b, v30.16b //CTR block 7 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 0 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 6 - round 0 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 5 - round 0 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 0 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 0 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 7 - round 0 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 0 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 4 - round 0 + ldp q28, q26, [x8, #32] //load rk2, rk3 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 1 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 1 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 1 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 1 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 7 - round 1 + + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 5 - round 1 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 6 - round 1 + + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 7 - round 2 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 0 - round 2 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 4 - round 1 + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 5 - round 2 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 1 - round 2 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 2 - round 2 + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 3 - round 2 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 4 - round 2 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 6 - round 2 + + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 7 - round 3 + + ldp q27, q28, [x8, #64] //load rk4, rk5 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 3 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 5 - round 3 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 3 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 3 + + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 4 - round 3 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 3 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 6 - round 3 + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 4 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 4 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 5 - round 4 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 4 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 7 - round 4 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 6 - round 4 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 4 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 5 - round 5 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 4 - round 4 + + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 6 - round 5 + ldp q26, q27, [x8, #96] //load rk6, rk7 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 0 - round 5 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 4 - round 5 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 1 - round 5 + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 3 - round 5 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 2 - round 5 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 7 - round 5 + + sub x5, x5, #1 //byte_len - 1 + + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 4 - round 6 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 5 - round 6 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 6 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 6 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 6 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 6 - round 6 + + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 7 - round 6 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 6 + ldp q28, q26, [x8, #128] //load rk8, rk9 + + add v30.4s, v30.4s, v31.4s //CTR block 7 + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 7 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 7 - round 7 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 7 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 7 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 4 - round 7 + + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 6 - round 7 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 7 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 5 - round 7 + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 1 - round 8 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 2 - round 8 + and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) + + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 7 - round 8 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 6 - round 8 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 5 - round 8 + + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 4 - round 8 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 3 - round 8 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 0 - round 8 + + add x4, x0, x1, lsr #3 //end_input_ptr + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 6 - round 9 + + ld1 { v19.16b}, [x3] + ext v19.16b, v19.16b, v19.16b, #8 + rev64 v19.16b, v19.16b + + ldp q27, q28, [x8, #160] //load rk10, rk11 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 9 + add x5, x5, x0 + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 9 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 7 - round 9 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 4 - round 9 + + cmp x0, x5 //check if we have <= 8 blocks + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 9 + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 5 - round 9 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 9 + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 10 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 10 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 7 - round 10 + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 4 - round 10 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 10 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 10 + + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 6 - round 10 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 5 - round 10 + ldr q26, [x8, #192] //load rk12 + + aese v0.16b, v28.16b //AES block 0 - round 11 + aese v1.16b, v28.16b //AES block 1 - round 11 + aese v4.16b, v28.16b //AES block 4 - round 11 + + aese v6.16b, v28.16b //AES block 6 - round 11 + aese v5.16b, v28.16b //AES block 5 - round 11 + aese v7.16b, v28.16b //AES block 7 - round 11 + + aese v2.16b, v28.16b //AES block 2 - round 11 + aese v3.16b, v28.16b //AES block 3 - round 11 + b.ge .L192_dec_tail //handle tail + + ldp q8, q9, [x0], #32 //AES block 0, 1 - load ciphertext + + ldp q10, q11, [x0], #32 //AES block 2, 3 - load ciphertext + + ldp q12, q13, [x0], #32 //AES block 4, 5 - load ciphertext + +.inst 0xce016921 //eor3 v1.16b, v9.16b, v1.16b, v26.16b //AES block 1 - result +.inst 0xce006900 //eor3 v0.16b, v8.16b, v0.16b, v26.16b //AES block 0 - result + stp q0, q1, [x2], #32 //AES block 0, 1 - store result + + rev32 v0.16b, v30.16b //CTR block 8 + add v30.4s, v30.4s, v31.4s //CTR block 8 + + rev32 v1.16b, v30.16b //CTR block 9 + add v30.4s, v30.4s, v31.4s //CTR block 9 +.inst 0xce036963 //eor3 v3.16b, v11.16b, v3.16b, v26.16b //AES block 3 - result + +.inst 0xce026942 //eor3 v2.16b, v10.16b, v2.16b, v26.16b //AES block 2 - result + stp q2, q3, [x2], #32 //AES block 2, 3 - store result + ldp q14, q15, [x0], #32 //AES block 6, 7 - load ciphertext + + rev32 v2.16b, v30.16b //CTR block 10 + add v30.4s, v30.4s, v31.4s //CTR block 10 + +.inst 0xce046984 //eor3 v4.16b, v12.16b, v4.16b, v26.16b //AES block 4 - result + + rev32 v3.16b, v30.16b //CTR block 11 + add v30.4s, v30.4s, v31.4s //CTR block 11 + +.inst 0xce0569a5 //eor3 v5.16b, v13.16b, v5.16b, v26.16b //AES block 5 - result + stp q4, q5, [x2], #32 //AES block 4, 5 - store result + cmp x0, x5 //check if we have <= 8 blocks + +.inst 0xce0669c6 //eor3 v6.16b, v14.16b, v6.16b, v26.16b //AES block 6 - result +.inst 0xce0769e7 //eor3 v7.16b, v15.16b, v7.16b, v26.16b //AES block 7 - result + rev32 v4.16b, v30.16b //CTR block 12 + + add v30.4s, v30.4s, v31.4s //CTR block 12 + stp q6, q7, [x2], #32 //AES block 6, 7 - store result + b.ge .L192_dec_prepretail //do prepretail + +.L192_dec_main_loop: //main loop start + rev64 v9.16b, v9.16b //GHASH block 8k+1 + ldp q26, q27, [x8, #0] //load rk0, rk1 + ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 + + rev64 v8.16b, v8.16b //GHASH block 8k + rev32 v5.16b, v30.16b //CTR block 8k+13 + add v30.4s, v30.4s, v31.4s //CTR block 8k+13 + + ldr q23, [x3, #176] //load h7l | h7h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q25, [x3, #208] //load h8l | h8h + ext v25.16b, v25.16b, v25.16b, #8 + rev64 v12.16b, v12.16b //GHASH block 8k+4 + rev64 v11.16b, v11.16b //GHASH block 8k+3 + + eor v8.16b, v8.16b, v19.16b //PRE 1 + rev32 v6.16b, v30.16b //CTR block 8k+14 + add v30.4s, v30.4s, v31.4s //CTR block 8k+14 + + rev64 v13.16b, v13.16b //GHASH block 8k+5 + + rev32 v7.16b, v30.16b //CTR block 8k+15 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 + + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 + + pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low + pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high + ldp q28, q26, [x8, #32] //load rk2, rk3 + + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 + pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low + ldr q20, [x3, #128] //load h5l | h5h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q22, [x3, #160] //load h6l | h6h + ext v22.16b, v22.16b, v22.16b, #8 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 + + pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 + + trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid + rev64 v10.16b, v10.16b //GHASH block 8k+2 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 + + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 + ldr q21, [x3, #144] //load h6k | h5k + ldr q24, [x3, #192] //load h8k | h7k + trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid + + eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high + pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high + pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high + + eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid + eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 + pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low +.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 + + ldr q23, [x3, #80] //load h3l | h3h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q25, [x3, #112] //load h4l | h4h + ext v25.16b, v25.16b, v25.16b, #8 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 + + pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low + trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid + trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 + ldp q27, q28, [x8, #64] //load rk4, rk5 + + eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid +.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 + + trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid + add v30.4s, v30.4s, v31.4s //CTR block 8k+15 + + pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid + pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid + pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 + pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid + pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 + eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid + + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 + + ldr q20, [x3, #32] //load h1l | h1h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q22, [x3, #64] //load h2l | h2h + ext v22.16b, v22.16b, v22.16b, #8 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 + + ldp q26, q27, [x8, #96] //load rk6, rk7 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 + rev64 v15.16b, v15.16b //GHASH block 8k+7 + + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 +.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 + + pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low + trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 + + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 + rev64 v14.16b, v14.16b //GHASH block 8k+6 + + ldr q21, [x3, #48] //load h2k | h1k + ldr q24, [x3, #96] //load h4k | h3k + pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high + pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 + eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid + trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid + + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 + + pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 + + pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid +.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high +.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low + + pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low + trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 + + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 + ldp q28, q26, [x8, #128] //load rk8, rk9 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 + + eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid + pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 + +.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid + pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid + pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high + + pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid + ldr d16, [x10] //MODULO - load modulo constant + pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 +.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low + + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 + +.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high + rev32 v20.16b, v30.16b //CTR block 8k+16 + add v30.4s, v30.4s, v31.4s //CTR block 8k+16 + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 +.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 + ldp q27, q28, [x8, #160] //load rk10, rk11 + +.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up + ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load ciphertext + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 + ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load ciphertext + + rev32 v22.16b, v30.16b //CTR block 8k+17 + pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid + add v30.4s, v30.4s, v31.4s //CTR block 8k+17 + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 + ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 + ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load ciphertext + + rev32 v23.16b, v30.16b //CTR block 8k+18 + add v30.4s, v30.4s, v31.4s //CTR block 8k+18 +.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 + ldr q26, [x8, #192] //load rk12 + + ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load ciphertext + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 + + aese v0.16b, v28.16b //AES block 8k+8 - round 11 + ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment + aese v1.16b, v28.16b //AES block 8k+9 - round 11 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 + aese v6.16b, v28.16b //AES block 8k+14 - round 11 + aese v3.16b, v28.16b //AES block 8k+11 - round 11 + +.inst 0xce006900 //eor3 v0.16b, v8.16b, v0.16b, v26.16b //AES block 8k+8 - result + rev32 v25.16b, v30.16b //CTR block 8k+19 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 + + aese v4.16b, v28.16b //AES block 8k+12 - round 11 + aese v2.16b, v28.16b //AES block 8k+10 - round 11 + add v30.4s, v30.4s, v31.4s //CTR block 8k+19 + + aese v7.16b, v28.16b //AES block 8k+15 - round 11 + aese v5.16b, v28.16b //AES block 8k+13 - round 11 + pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low + +.inst 0xce016921 //eor3 v1.16b, v9.16b, v1.16b, v26.16b //AES block 8k+9 - result + stp q0, q1, [x2], #32 //AES block 8k+8, 8k+9 - store result +.inst 0xce036963 //eor3 v3.16b, v11.16b, v3.16b, v26.16b //AES block 8k+11 - result + +.inst 0xce026942 //eor3 v2.16b, v10.16b, v2.16b, v26.16b //AES block 8k+10 - result +.inst 0xce0769e7 //eor3 v7.16b, v15.16b, v7.16b, v26.16b //AES block 8k+15 - result + stp q2, q3, [x2], #32 //AES block 8k+10, 8k+11 - store result + +.inst 0xce0569a5 //eor3 v5.16b, v13.16b, v5.16b, v26.16b //AES block 8k+13 - result +.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low + mov v3.16b, v25.16b //CTR block 8k+19 + +.inst 0xce046984 //eor3 v4.16b, v12.16b, v4.16b, v26.16b //AES block 8k+12 - result + stp q4, q5, [x2], #32 //AES block 8k+12, 8k+13 - store result + cmp x0, x5 //.LOOP CONTROL + +.inst 0xce0669c6 //eor3 v6.16b, v14.16b, v6.16b, v26.16b //AES block 8k+14 - result + stp q6, q7, [x2], #32 //AES block 8k+14, 8k+15 - store result + mov v0.16b, v20.16b //CTR block 8k+16 + + mov v1.16b, v22.16b //CTR block 8k+17 + mov v2.16b, v23.16b //CTR block 8k+18 + + rev32 v4.16b, v30.16b //CTR block 8k+20 + add v30.4s, v30.4s, v31.4s //CTR block 8k+20 + b.lt .L192_dec_main_loop + +.L192_dec_prepretail: //PREPRETAIL + ldp q26, q27, [x8, #0] //load rk0, rk1 + rev32 v5.16b, v30.16b //CTR block 8k+13 + add v30.4s, v30.4s, v31.4s //CTR block 8k+13 + + ldr q23, [x3, #176] //load h7l | h7h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q25, [x3, #208] //load h8l | h8h + ext v25.16b, v25.16b, v25.16b, #8 + rev64 v8.16b, v8.16b //GHASH block 8k + ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 + + rev64 v11.16b, v11.16b //GHASH block 8k+3 + rev32 v6.16b, v30.16b //CTR block 8k+14 + add v30.4s, v30.4s, v31.4s //CTR block 8k+14 + + eor v8.16b, v8.16b, v19.16b //PRE 1 + rev64 v10.16b, v10.16b //GHASH block 8k+2 + rev64 v9.16b, v9.16b //GHASH block 8k+1 + + ldr q20, [x3, #128] //load h5l | h5h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q22, [x3, #160] //load h6l | h6h + ext v22.16b, v22.16b, v22.16b, #8 + rev32 v7.16b, v30.16b //CTR block 8k+15 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 + pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high + + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 + pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 + + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 + ldp q28, q26, [x8, #32] //load rk2, rk3 + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 + pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high + pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low + + pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low + eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 + + pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 + + trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid + trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid + pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 + + ldr q21, [x3, #144] //load h6k | h5k + ldr q24, [x3, #192] //load h8k | h7k + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 + eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid + + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 + rev64 v13.16b, v13.16b //GHASH block 8k+5 + pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low + +.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 + + trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 + trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid + + pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 + pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 + eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid + eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low + + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 + +.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low + ldp q27, q28, [x8, #64] //load rk4, rk5 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 + + ldr q23, [x3, #80] //load h3l | h3h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q25, [x3, #112] //load h4l | h4h + ext v25.16b, v25.16b, v25.16b, #8 + pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid + pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid + + ldr q20, [x3, #32] //load h1l | h1h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q22, [x3, #64] //load h2l | h2h + ext v22.16b, v22.16b, v22.16b, #8 + eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 + + rev64 v15.16b, v15.16b //GHASH block 8k+7 + +.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid + rev64 v12.16b, v12.16b //GHASH block 8k+4 + + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 + + rev64 v14.16b, v14.16b //GHASH block 8k+6 + ldr q21, [x3, #48] //load h2k | h1k + ldr q24, [x3, #96] //load h4k | h3k + trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid + + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 + + ldp q26, q27, [x8, #96] //load rk6, rk7 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 + + pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high + pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high + pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low + + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 + + pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low + trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid + pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high + + pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low + trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 + + trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 + eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid + + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 + + eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 + + pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid + pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 + + pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 + pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high + +.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 +.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 + + ldp q28, q26, [x8, #128] //load rk8, rk9 + pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 + + ldr d16, [x10] //MODULO - load modulo constant +.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high + pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 + +.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high +.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low +.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 + +.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up + ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 + + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 + pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 + ldp q27, q28, [x8, #160] //load rk10, rk11 + +.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 + + pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low + ldr q26, [x8, #192] //load rk12 + ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 + + aese v0.16b, v28.16b //AES block 8k+8 - round 11 +.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low + aese v5.16b, v28.16b //AES block 8k+13 - round 11 + + aese v2.16b, v28.16b //AES block 8k+10 - round 11 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 + + aese v6.16b, v28.16b //AES block 8k+14 - round 11 + aese v4.16b, v28.16b //AES block 8k+12 - round 11 + add v30.4s, v30.4s, v31.4s //CTR block 8k+15 + + aese v3.16b, v28.16b //AES block 8k+11 - round 11 + aese v1.16b, v28.16b //AES block 8k+9 - round 11 + aese v7.16b, v28.16b //AES block 8k+15 - round 11 + +.L192_dec_tail: //TAIL + + sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process + + ldp q20, q21, [x3, #128] //load h5l | h5h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q9, [x0], #16 //AES block 8k+8 - load ciphertext + + ldp q24, q25, [x3, #192] //load h8k | h7k + ext v25.16b, v25.16b, v25.16b, #8 + + mov v29.16b, v26.16b + + ldp q22, q23, [x3, #160] //load h6l | h6h + ext v22.16b, v22.16b, v22.16b, #8 + ext v23.16b, v23.16b, v23.16b, #8 + ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag + +.inst 0xce00752c //eor3 v12.16b, v9.16b, v0.16b, v29.16b //AES block 8k+8 - result + cmp x5, #112 + b.gt .L192_dec_blocks_more_than_7 + + mov v7.16b, v6.16b + movi v17.8b, #0 + sub v30.4s, v30.4s, v31.4s + + mov v6.16b, v5.16b + mov v5.16b, v4.16b + mov v4.16b, v3.16b + + cmp x5, #96 + movi v19.8b, #0 + mov v3.16b, v2.16b + + mov v2.16b, v1.16b + movi v18.8b, #0 + b.gt .L192_dec_blocks_more_than_6 + + mov v7.16b, v6.16b + mov v6.16b, v5.16b + mov v5.16b, v4.16b + + mov v4.16b, v3.16b + mov v3.16b, v1.16b + + sub v30.4s, v30.4s, v31.4s + cmp x5, #80 + b.gt .L192_dec_blocks_more_than_5 + + mov v7.16b, v6.16b + mov v6.16b, v5.16b + + mov v5.16b, v4.16b + mov v4.16b, v1.16b + cmp x5, #64 + + sub v30.4s, v30.4s, v31.4s + b.gt .L192_dec_blocks_more_than_4 + + sub v30.4s, v30.4s, v31.4s + mov v7.16b, v6.16b + mov v6.16b, v5.16b + + mov v5.16b, v1.16b + cmp x5, #48 + b.gt .L192_dec_blocks_more_than_3 + + sub v30.4s, v30.4s, v31.4s + mov v7.16b, v6.16b + cmp x5, #32 + + mov v6.16b, v1.16b + ldr q24, [x3, #96] //load h4k | h3k + b.gt .L192_dec_blocks_more_than_2 + + sub v30.4s, v30.4s, v31.4s + + mov v7.16b, v1.16b + cmp x5, #16 + b.gt .L192_dec_blocks_more_than_1 + + sub v30.4s, v30.4s, v31.4s + ldr q21, [x3, #48] //load h2k | h1k + b .L192_dec_blocks_less_than_1 +.L192_dec_blocks_more_than_7: //blocks left > 7 + rev64 v8.16b, v9.16b //GHASH final-7 block + + ins v18.d[0], v24.d[1] //GHASH final-7 block - mid + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high + ins v27.d[0], v8.d[1] //GHASH final-7 block - mid + ldr q9, [x0], #16 //AES final-6 block - load ciphertext + + pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low + + eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid + st1 { v12.16b}, [x2], #16 //AES final-7 block - store result + +.inst 0xce01752c //eor3 v12.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result + + pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid + movi v16.8b, #0 //suppress further partial tag feed in +.L192_dec_blocks_more_than_6: //blocks left > 6 + + rev64 v8.16b, v9.16b //GHASH final-6 block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ldr q9, [x0], #16 //AES final-5 block - load ciphertext + ins v27.d[0], v8.d[1] //GHASH final-6 block - mid + + eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid + movi v16.8b, #0 //suppress further partial tag feed in + pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high + + st1 { v12.16b}, [x2], #16 //AES final-6 block - store result +.inst 0xce02752c //eor3 v12.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result + + eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high + pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid + pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low + + eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid + eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low +.L192_dec_blocks_more_than_5: //blocks left > 5 + + rev64 v8.16b, v9.16b //GHASH final-5 block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v27.d[0], v8.d[1] //GHASH final-5 block - mid + + eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid + + ins v27.d[1], v27.d[0] //GHASH final-5 block - mid + pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high + + ldr q9, [x0], #16 //AES final-4 block - load ciphertext + + eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high + pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low + + pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid + + eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low + movi v16.8b, #0 //suppress further partial tag feed in + st1 { v12.16b}, [x2], #16 //AES final-5 block - store result + + eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid +.inst 0xce03752c //eor3 v12.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result +.L192_dec_blocks_more_than_4: //blocks left > 4 + + rev64 v8.16b, v9.16b //GHASH final-4 block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + movi v16.8b, #0 //suppress further partial tag feed in + + ldr q9, [x0], #16 //AES final-3 block - load ciphertext + ins v27.d[0], v8.d[1] //GHASH final-4 block - mid + pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low + + eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid + + eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low + + pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid + st1 { v12.16b}, [x2], #16 //AES final-4 block - store result + pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high + +.inst 0xce04752c //eor3 v12.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result + + eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid + eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high +.L192_dec_blocks_more_than_3: //blocks left > 3 + + ldr q25, [x3, #112] //load h4l | h4h + ext v25.16b, v25.16b, v25.16b, #8 + rev64 v8.16b, v9.16b //GHASH final-3 block + ldr q9, [x0], #16 //AES final-2 block - load ciphertext + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v27.d[0], v8.d[1] //GHASH final-3 block - mid + pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high + + eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high + movi v16.8b, #0 //suppress further partial tag feed in + pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low + + st1 { v12.16b}, [x2], #16 //AES final-3 block - store result + eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid +.inst 0xce05752c //eor3 v12.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result + + eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low + ldr q24, [x3, #96] //load h4k | h3k + + ins v27.d[1], v27.d[0] //GHASH final-3 block - mid + + pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid + + eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid +.L192_dec_blocks_more_than_2: //blocks left > 2 + + rev64 v8.16b, v9.16b //GHASH final-2 block + ldr q23, [x3, #80] //load h3l | h3h + ext v23.16b, v23.16b, v23.16b, #8 + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v27.d[0], v8.d[1] //GHASH final-2 block - mid + ldr q9, [x0], #16 //AES final-1 block - load ciphertext + + pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high + + eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid + + eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high + pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low + + pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid + movi v16.8b, #0 //suppress further partial tag feed in + + eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low + st1 { v12.16b}, [x2], #16 //AES final-2 block - store result + + eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid +.inst 0xce06752c //eor3 v12.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result +.L192_dec_blocks_more_than_1: //blocks left > 1 + + rev64 v8.16b, v9.16b //GHASH final-1 block + ldr q9, [x0], #16 //AES final block - load ciphertext + ldr q22, [x3, #64] //load h1l | h1h + ext v22.16b, v22.16b, v22.16b, #8 + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + movi v16.8b, #0 //suppress further partial tag feed in + ldr q21, [x3, #48] //load h2k | h1k + + pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low + ins v27.d[0], v8.d[1] //GHASH final-1 block - mid + st1 { v12.16b}, [x2], #16 //AES final-1 block - store result + + pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high + +.inst 0xce07752c //eor3 v12.16b, v9.16b, v7.16b, v29.16b //AES final block - result + + eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid + + ins v27.d[1], v27.d[0] //GHASH final-1 block - mid + + pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid + + eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low + + eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid + eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high +.L192_dec_blocks_less_than_1: //blocks left <= 1 + + rev32 v30.16b, v30.16b + and x1, x1, #127 //bit_length %= 128 + + sub x1, x1, #128 //bit_length -= 128 + str q30, [x16] //store the updated counter + + neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) + mvn x6, xzr //temp0_x = 0xffffffffffffffff + + and x1, x1, #127 //bit_length %= 128 + + mvn x7, xzr //temp1_x = 0xffffffffffffffff + lsr x6, x6, x1 //temp0_x is mask for top 64b of last block + cmp x1, #64 + + csel x13, x7, x6, lt + csel x14, x6, xzr, lt + ldr q20, [x3, #32] //load h1l | h1h + ext v20.16b, v20.16b, v20.16b, #8 + + mov v0.d[1], x14 + ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored + + mov v0.d[0], x13 //ctr0b is mask for last block + + and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits + bif v12.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing + + rev64 v8.16b, v9.16b //GHASH final block + + st1 { v12.16b}, [x2] //store all 16B + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v16.d[0], v8.d[1] //GHASH final block - mid + pmull v26.1q, v8.1d, v20.1d //GHASH final block - low + + eor v16.8b, v16.8b, v8.8b //GHASH final block - mid + pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high + eor v19.16b, v19.16b, v26.16b //GHASH final block - low + + pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid + eor v17.16b, v17.16b, v28.16b //GHASH final block - high + + eor v14.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up + eor v18.16b, v18.16b, v16.16b //GHASH final block - mid + ldr d16, [x10] //MODULO - load modulo constant + + pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid + ext v17.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment + + eor v18.16b, v18.16b, v14.16b //MODULO - karatsuba tidy up + +.inst 0xce115652 //eor3 v18.16b, v18.16b, v17.16b, v21.16b //MODULO - fold into mid + + pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low + ext v18.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment + +.inst 0xce124673 //eor3 v19.16b, v19.16b, v18.16b, v17.16b //MODULO - fold into low + ext v19.16b, v19.16b, v19.16b, #8 + rev64 v19.16b, v19.16b + st1 { v19.16b }, [x3] + + mov x0, x9 + + ldp d10, d11, [sp, #16] + ldp d12, d13, [sp, #32] + ldp d14, d15, [sp, #48] + ldp d8, d9, [sp], #80 + ret + +.L192_dec_ret: + mov w0, #0x0 + ret +.size unroll8_eor3_aes_gcm_dec_192_kernel,.-unroll8_eor3_aes_gcm_dec_192_kernel +.globl unroll8_eor3_aes_gcm_enc_256_kernel +.type unroll8_eor3_aes_gcm_enc_256_kernel,%function +.align 4 +unroll8_eor3_aes_gcm_enc_256_kernel: + AARCH64_VALID_CALL_TARGET + cbz x1, .L256_enc_ret + stp d8, d9, [sp, #-80]! + lsr x9, x1, #3 + mov x16, x4 + mov x8, x5 + stp d10, d11, [sp, #16] + stp d12, d13, [sp, #32] + stp d14, d15, [sp, #48] + mov x5, #0xc200000000000000 + stp x5, xzr, [sp, #64] + add x10, sp, #64 + + ld1 { v0.16b}, [x16] //CTR block 0 + + mov x5, x9 + + mov x15, #0x100000000 //set up counter increment + movi v31.16b, #0x0 + mov v31.d[1], x15 + sub x5, x5, #1 //byte_len - 1 + + and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) + + add x5, x5, x0 + + rev32 v30.16b, v0.16b //set up reversed counter + + add v30.4s, v30.4s, v31.4s //CTR block 0 + + rev32 v1.16b, v30.16b //CTR block 1 + add v30.4s, v30.4s, v31.4s //CTR block 1 + + rev32 v2.16b, v30.16b //CTR block 2 + add v30.4s, v30.4s, v31.4s //CTR block 2 + + rev32 v3.16b, v30.16b //CTR block 3 + add v30.4s, v30.4s, v31.4s //CTR block 3 + + rev32 v4.16b, v30.16b //CTR block 4 + add v30.4s, v30.4s, v31.4s //CTR block 4 + + rev32 v5.16b, v30.16b //CTR block 5 + add v30.4s, v30.4s, v31.4s //CTR block 5 + ldp q26, q27, [x8, #0] //load rk0, rk1 + + rev32 v6.16b, v30.16b //CTR block 6 + add v30.4s, v30.4s, v31.4s //CTR block 6 + + rev32 v7.16b, v30.16b //CTR block 7 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 0 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 4 - round 0 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 0 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 0 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 0 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 6 - round 0 + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 5 - round 0 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 7 - round 0 + ldp q28, q26, [x8, #32] //load rk2, rk3 + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 4 - round 1 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 1 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 1 + + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 6 - round 1 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 5 - round 1 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 1 + + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 7 - round 1 + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 2 - round 2 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 3 - round 2 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 1 + + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 7 - round 2 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 6 - round 2 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 5 - round 2 + + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 4 - round 2 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 0 - round 2 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 1 - round 2 + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 5 - round 3 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 3 + ldp q27, q28, [x8, #64] //load rk4, rk5 + + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 4 - round 3 + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 3 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 6 - round 3 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 7 - round 3 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 3 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 3 + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 4 - round 4 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 6 - round 4 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 4 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 4 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 4 + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 4 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 7 - round 4 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 5 - round 4 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 0 - round 5 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 2 - round 5 + ldp q26, q27, [x8, #96] //load rk6, rk7 + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 1 - round 5 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 4 - round 5 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 5 - round 5 + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 3 - round 5 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 6 - round 5 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 7 - round 5 + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 6 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 5 - round 6 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 4 - round 6 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 6 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 6 - round 6 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 6 + + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 7 - round 6 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 6 + ldp q28, q26, [x8, #128] //load rk8, rk9 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 7 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 7 + + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 7 - round 7 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 6 - round 7 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 7 + + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 5 - round 7 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 7 + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 4 - round 7 + + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 6 - round 8 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 1 - round 8 + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 3 - round 8 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 0 - round 8 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 7 - round 8 + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 5 - round 8 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 4 - round 8 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 2 - round 8 + + ld1 { v19.16b}, [x3] + ext v19.16b, v19.16b, v19.16b, #8 + rev64 v19.16b, v19.16b + ldp q27, q28, [x8, #160] //load rk10, rk11 + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 6 - round 9 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 7 - round 9 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 9 + + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 4 - round 9 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 5 - round 9 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 9 + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 9 + + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 7 - round 10 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 4 - round 10 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 9 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 10 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 5 - round 10 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 10 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 10 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 10 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 6 - round 10 + + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 4 - round 11 + ldp q26, q27, [x8, #192] //load rk12, rk13 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 5 - round 11 + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 2 - round 11 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 6 - round 11 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 1 - round 11 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 0 - round 11 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 3 - round 11 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 7 - round 11 + + add v30.4s, v30.4s, v31.4s //CTR block 7 + ldr q28, [x8, #224] //load rk14 + + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 4 - round 12 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 12 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 12 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 12 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 5 - round 12 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 12 + + aese v2.16b, v27.16b //AES block 2 - round 13 + aese v1.16b, v27.16b //AES block 1 - round 13 + aese v4.16b, v27.16b //AES block 4 - round 13 + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 6 - round 12 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 7 - round 12 + + aese v0.16b, v27.16b //AES block 0 - round 13 + aese v5.16b, v27.16b //AES block 5 - round 13 + + aese v6.16b, v27.16b //AES block 6 - round 13 + aese v7.16b, v27.16b //AES block 7 - round 13 + aese v3.16b, v27.16b //AES block 3 - round 13 + + add x4, x0, x1, lsr #3 //end_input_ptr + cmp x0, x5 //check if we have <= 8 blocks + b.ge .L256_enc_tail //handle tail + + ldp q8, q9, [x0], #32 //AES block 0, 1 - load plaintext + + ldp q10, q11, [x0], #32 //AES block 2, 3 - load plaintext + +.inst 0xce007108 //eor3 v8.16b, v8.16b, v0.16b, v28.16b //AES block 0 - result + rev32 v0.16b, v30.16b //CTR block 8 + add v30.4s, v30.4s, v31.4s //CTR block 8 + +.inst 0xce017129 //eor3 v9.16b, v9.16b, v1.16b, v28.16b //AES block 1 - result +.inst 0xce03716b //eor3 v11.16b, v11.16b, v3.16b, v28.16b //AES block 3 - result + + rev32 v1.16b, v30.16b //CTR block 9 + add v30.4s, v30.4s, v31.4s //CTR block 9 + ldp q12, q13, [x0], #32 //AES block 4, 5 - load plaintext + + ldp q14, q15, [x0], #32 //AES block 6, 7 - load plaintext +.inst 0xce02714a //eor3 v10.16b, v10.16b, v2.16b, v28.16b //AES block 2 - result + cmp x0, x5 //check if we have <= 8 blocks + + rev32 v2.16b, v30.16b //CTR block 10 + add v30.4s, v30.4s, v31.4s //CTR block 10 + stp q8, q9, [x2], #32 //AES block 0, 1 - store result + + stp q10, q11, [x2], #32 //AES block 2, 3 - store result + + rev32 v3.16b, v30.16b //CTR block 11 + add v30.4s, v30.4s, v31.4s //CTR block 11 + +.inst 0xce04718c //eor3 v12.16b, v12.16b, v4.16b, v28.16b //AES block 4 - result + +.inst 0xce0771ef //eor3 v15.16b, v15.16b, v7.16b, v28.16b //AES block 7 - result +.inst 0xce0671ce //eor3 v14.16b, v14.16b, v6.16b, v28.16b //AES block 6 - result +.inst 0xce0571ad //eor3 v13.16b, v13.16b, v5.16b, v28.16b //AES block 5 - result + + stp q12, q13, [x2], #32 //AES block 4, 5 - store result + rev32 v4.16b, v30.16b //CTR block 12 + + stp q14, q15, [x2], #32 //AES block 6, 7 - store result + add v30.4s, v30.4s, v31.4s //CTR block 12 + b.ge .L256_enc_prepretail //do prepretail + +.L256_enc_main_loop: //main loop start + ldp q26, q27, [x8, #0] //load rk0, rk1 + + rev32 v5.16b, v30.16b //CTR block 8k+13 + add v30.4s, v30.4s, v31.4s //CTR block 8k+13 + ldr q21, [x3, #144] //load h6k | h5k + ldr q24, [x3, #192] //load h8k | h7k + + rev64 v11.16b, v11.16b //GHASH block 8k+3 + ldr q20, [x3, #128] //load h5l | h5h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q22, [x3, #160] //load h6l | h6h + ext v22.16b, v22.16b, v22.16b, #8 + rev64 v9.16b, v9.16b //GHASH block 8k+1 + + rev32 v6.16b, v30.16b //CTR block 8k+14 + add v30.4s, v30.4s, v31.4s //CTR block 8k+14 + rev64 v8.16b, v8.16b //GHASH block 8k + + rev64 v12.16b, v12.16b //GHASH block 8k+4 + ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 + ldr q23, [x3, #176] //load h7l | h7h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q25, [x3, #208] //load h8l | h8h + ext v25.16b, v25.16b, v25.16b, #8 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 + rev32 v7.16b, v30.16b //CTR block 8k+15 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 + + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 + + ldp q28, q26, [x8, #32] //load rk2, rk3 + eor v8.16b, v8.16b, v19.16b //PRE 1 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 + + pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high + pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low + pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high + + trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid + trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 + pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 + + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 + rev64 v14.16b, v14.16b //GHASH block 8k+6 + pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 + ldp q27, q28, [x8, #64] //load rk4, rk5 + rev64 v10.16b, v10.16b //GHASH block 8k+2 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 + + eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high + pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high + rev64 v13.16b, v13.16b //GHASH block 8k+5 + + pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low + eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low + ldr q23, [x3, #80] //load h3l | h3h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q25, [x3, #112] //load h4l | h4h + ext v25.16b, v25.16b, v25.16b, #8 + + trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid +.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high + pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 + + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 + + trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 + + trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid + eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid + ldp q26, q27, [x8, #96] //load rk6, rk7 + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 + + eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 + rev64 v15.16b, v15.16b //GHASH block 8k+7 + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 + + pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid + pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 + + pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 + + eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid + pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 + +.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 + + ldp q28, q26, [x8, #128] //load rk8, rk9 + pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 + + ldr q20, [x3, #32] //load h1l | h1h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q22, [x3, #64] //load h2l | h2h + ext v22.16b, v22.16b, v22.16b, #8 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 +.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid + + ldr q21, [x3, #48] //load h2k | h1k + ldr q24, [x3, #96] //load h4k | h3k + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 + pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low + + trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 + + pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 + + pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low + trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid + eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 + + pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid + pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 + pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high + pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low + + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 + trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 + +.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 + + eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 + + ldp q27, q28, [x8, #160] //load rk10, rk11 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 + + pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high +.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low + pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low + + ldr d16, [x10] //MODULO - load modulo constant + pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid + pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 + +.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid +.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low +.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 + add v30.4s, v30.4s, v31.4s //CTR block 8k+15 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 + +.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high + + ldp q26, q27, [x8, #192] //load rk12, rk13 + rev32 v20.16b, v30.16b //CTR block 8k+16 + + ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment + ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load plaintext + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 11 + + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 11 + add v30.4s, v30.4s, v31.4s //CTR block 8k+16 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 11 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 11 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 11 + + pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 11 + + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 12 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 11 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 12 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 12 + rev32 v22.16b, v30.16b //CTR block 8k+17 + + add v30.4s, v30.4s, v31.4s //CTR block 8k+17 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 11 +.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 12 + ldr q28, [x8, #224] //load rk14 + aese v7.16b, v27.16b //AES block 8k+15 - round 13 + + ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load plaintext + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 12 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 12 + +.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 12 + ldp q12, q13, [x0], #32 //AES block 4, 5 - load plaintext + + ldp q14, q15, [x0], #32 //AES block 6, 7 - load plaintext + aese v2.16b, v27.16b //AES block 8k+10 - round 13 + aese v4.16b, v27.16b //AES block 8k+12 - round 13 + + rev32 v23.16b, v30.16b //CTR block 8k+18 + add v30.4s, v30.4s, v31.4s //CTR block 8k+18 + aese v5.16b, v27.16b //AES block 8k+13 - round 13 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 12 + aese v3.16b, v27.16b //AES block 8k+11 - round 13 + cmp x0, x5 //.LOOP CONTROL + +.inst 0xce02714a //eor3 v10.16b, v10.16b, v2.16b, v28.16b //AES block 8k+10 - result + rev32 v25.16b, v30.16b //CTR block 8k+19 + add v30.4s, v30.4s, v31.4s //CTR block 8k+19 + + aese v0.16b, v27.16b //AES block 8k+8 - round 13 + aese v6.16b, v27.16b //AES block 8k+14 - round 13 +.inst 0xce0571ad //eor3 v13.16b, v13.16b, v5.16b, v28.16b //AES block 5 - result + + ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment + pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low + aese v1.16b, v27.16b //AES block 8k+9 - round 13 + +.inst 0xce04718c //eor3 v12.16b, v12.16b, v4.16b, v28.16b //AES block 4 - result + rev32 v4.16b, v30.16b //CTR block 8k+20 +.inst 0xce03716b //eor3 v11.16b, v11.16b, v3.16b, v28.16b //AES block 8k+11 - result + + mov v3.16b, v25.16b //CTR block 8k+19 +.inst 0xce017129 //eor3 v9.16b, v9.16b, v1.16b, v28.16b //AES block 8k+9 - result +.inst 0xce007108 //eor3 v8.16b, v8.16b, v0.16b, v28.16b //AES block 8k+8 - result + + add v30.4s, v30.4s, v31.4s //CTR block 8k+20 + stp q8, q9, [x2], #32 //AES block 8k+8, 8k+9 - store result + mov v2.16b, v23.16b //CTR block 8k+18 + +.inst 0xce0771ef //eor3 v15.16b, v15.16b, v7.16b, v28.16b //AES block 7 - result +.inst 0xce154673 //eor3 v19.16b, v19.16b, v21.16b, v17.16b //MODULO - fold into low + stp q10, q11, [x2], #32 //AES block 8k+10, 8k+11 - store result + +.inst 0xce0671ce //eor3 v14.16b, v14.16b, v6.16b, v28.16b //AES block 6 - result + mov v1.16b, v22.16b //CTR block 8k+17 + stp q12, q13, [x2], #32 //AES block 4, 5 - store result + + stp q14, q15, [x2], #32 //AES block 6, 7 - store result + mov v0.16b, v20.16b //CTR block 8k+16 + b.lt .L256_enc_main_loop + +.L256_enc_prepretail: //PREPRETAIL + rev32 v5.16b, v30.16b //CTR block 8k+13 + ldp q26, q27, [x8, #0] //load rk0, rk1 + add v30.4s, v30.4s, v31.4s //CTR block 8k+13 + + rev64 v10.16b, v10.16b //GHASH block 8k+2 + + rev32 v6.16b, v30.16b //CTR block 8k+14 + add v30.4s, v30.4s, v31.4s //CTR block 8k+14 + + rev64 v13.16b, v13.16b //GHASH block 8k+5 + ldr q21, [x3, #144] //load h6k | h5k + ldr q24, [x3, #192] //load h8k | h7k + + rev32 v7.16b, v30.16b //CTR block 8k+15 + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 + + ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 + rev64 v8.16b, v8.16b //GHASH block 8k + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 + + rev64 v9.16b, v9.16b //GHASH block 8k+1 + ldp q28, q26, [x8, #32] //load rk2, rk3 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 + + ldr q23, [x3, #176] //load h7l | h7h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q25, [x3, #208] //load h8l | h8h + ext v25.16b, v25.16b, v25.16b, #8 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 + + ldr q20, [x3, #128] //load h5l | h5h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q22, [x3, #160] //load h6l | h6h + ext v22.16b, v22.16b, v22.16b, #8 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 + eor v8.16b, v8.16b, v19.16b //PRE 1 + + rev64 v11.16b, v11.16b //GHASH block 8k+3 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 + + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 + + ldp q27, q28, [x8, #64] //load rk4, rk5 + trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid + pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high + + rev64 v14.16b, v14.16b //GHASH block 8k+6 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 + pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high + + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 + pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low + trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid + + pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 + eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high + + pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low + pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 + eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 + + pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 + + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 + pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid +.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high + + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 + trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid + trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid + + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 + eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 + + pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low + pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid + eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid + + rev64 v12.16b, v12.16b //GHASH block 8k+4 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 + + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 + ldp q26, q27, [x8, #96] //load rk6, rk7 + + ldr q23, [x3, #80] //load h3l | h3h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q25, [x3, #112] //load h4l | h4h + ext v25.16b, v25.16b, v25.16b, #8 + pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid + pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid + +.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low + eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 + rev64 v15.16b, v15.16b //GHASH block 8k+7 + trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 +.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid + + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 + + ldr q21, [x3, #48] //load h2k | h1k + ldr q24, [x3, #96] //load h4k | h3k + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 + + pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high + pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low + ldr q20, [x3, #32] //load h1l | h1h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q22, [x3, #64] //load h2l | h2h + ext v22.16b, v22.16b, v22.16b, #8 + + ldp q28, q26, [x8, #128] //load rk8, rk9 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 + + pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high + trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid + + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 + pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low + + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 + eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid + + pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high + pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 + + trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid + trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 + + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 +.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 + + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 + eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 + + pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid + pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 + + pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high + pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid + pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid + + pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low +.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid +.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high + + ldp q27, q28, [x8, #160] //load rk10, rk11 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 + +.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high +.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid + ldr d16, [x10] //MODULO - load modulo constant + +.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 + + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 + + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 + + pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid +.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 11 + + ldp q26, q27, [x8, #192] //load rk12, rk13 + ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 11 + +.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 11 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 11 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 11 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 11 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 11 + + pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 11 + ldr q28, [x8, #224] //load rk14 + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 12 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 12 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 12 + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 12 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 12 + ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment + + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 12 + add v30.4s, v30.4s, v31.4s //CTR block 8k+15 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 12 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 12 + aese v0.16b, v27.16b //AES block 8k+8 - round 13 + +.inst 0xce154673 //eor3 v19.16b, v19.16b, v21.16b, v17.16b //MODULO - fold into low + aese v5.16b, v27.16b //AES block 8k+13 - round 13 + aese v1.16b, v27.16b //AES block 8k+9 - round 13 + + aese v3.16b, v27.16b //AES block 8k+11 - round 13 + aese v4.16b, v27.16b //AES block 8k+12 - round 13 + aese v7.16b, v27.16b //AES block 8k+15 - round 13 + + aese v2.16b, v27.16b //AES block 8k+10 - round 13 + aese v6.16b, v27.16b //AES block 8k+14 - round 13 +.L256_enc_tail: //TAIL + + ldp q24, q25, [x3, #192] //load h8l | h8h + ext v25.16b, v25.16b, v25.16b, #8 + sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process + + ldr q8, [x0], #16 //AES block 8k+8 - load plaintext + + ldp q20, q21, [x3, #128] //load h5l | h5h + ext v20.16b, v20.16b, v20.16b, #8 + + ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag + ldp q22, q23, [x3, #160] //load h6l | h6h + ext v22.16b, v22.16b, v22.16b, #8 + ext v23.16b, v23.16b, v23.16b, #8 + mov v29.16b, v28.16b + + cmp x5, #112 +.inst 0xce007509 //eor3 v9.16b, v8.16b, v0.16b, v29.16b //AES block 8k+8 - result + b.gt .L256_enc_blocks_more_than_7 + + movi v19.8b, #0 + mov v7.16b, v6.16b + movi v17.8b, #0 + + mov v6.16b, v5.16b + mov v5.16b, v4.16b + mov v4.16b, v3.16b + + mov v3.16b, v2.16b + sub v30.4s, v30.4s, v31.4s + mov v2.16b, v1.16b + + movi v18.8b, #0 + cmp x5, #96 + b.gt .L256_enc_blocks_more_than_6 + + mov v7.16b, v6.16b + mov v6.16b, v5.16b + cmp x5, #80 + + mov v5.16b, v4.16b + mov v4.16b, v3.16b + mov v3.16b, v1.16b + + sub v30.4s, v30.4s, v31.4s + b.gt .L256_enc_blocks_more_than_5 + + mov v7.16b, v6.16b + sub v30.4s, v30.4s, v31.4s + + mov v6.16b, v5.16b + mov v5.16b, v4.16b + + cmp x5, #64 + mov v4.16b, v1.16b + b.gt .L256_enc_blocks_more_than_4 + + cmp x5, #48 + mov v7.16b, v6.16b + mov v6.16b, v5.16b + + mov v5.16b, v1.16b + sub v30.4s, v30.4s, v31.4s + b.gt .L256_enc_blocks_more_than_3 + + cmp x5, #32 + mov v7.16b, v6.16b + ldr q24, [x3, #96] //load h4k | h3k + + mov v6.16b, v1.16b + sub v30.4s, v30.4s, v31.4s + b.gt .L256_enc_blocks_more_than_2 + + mov v7.16b, v1.16b + + sub v30.4s, v30.4s, v31.4s + cmp x5, #16 + b.gt .L256_enc_blocks_more_than_1 + + sub v30.4s, v30.4s, v31.4s + ldr q21, [x3, #48] //load h2k | h1k + b .L256_enc_blocks_less_than_1 +.L256_enc_blocks_more_than_7: //blocks left > 7 + st1 { v9.16b}, [x2], #16 //AES final-7 block - store result + + rev64 v8.16b, v9.16b //GHASH final-7 block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ldr q9, [x0], #16 //AES final-6 block - load plaintext + + pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high + ins v27.d[0], v8.d[1] //GHASH final-7 block - mid + ins v18.d[0], v24.d[1] //GHASH final-7 block - mid + + movi v16.8b, #0 //suppress further partial tag feed in + + eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid +.inst 0xce017529 //eor3 v9.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result + + pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid + pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low +.L256_enc_blocks_more_than_6: //blocks left > 6 + + st1 { v9.16b}, [x2], #16 //AES final-6 block - store result + + rev64 v8.16b, v9.16b //GHASH final-6 block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low + ins v27.d[0], v8.d[1] //GHASH final-6 block - mid + pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high + + ldr q9, [x0], #16 //AES final-5 block - load plaintext + + eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low + + eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid + + pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid +.inst 0xce027529 //eor3 v9.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result + + movi v16.8b, #0 //suppress further partial tag feed in + + eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid + eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high +.L256_enc_blocks_more_than_5: //blocks left > 5 + + st1 { v9.16b}, [x2], #16 //AES final-5 block - store result + + rev64 v8.16b, v9.16b //GHASH final-5 block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v27.d[0], v8.d[1] //GHASH final-5 block - mid + + pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high + + eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high + eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid + + ins v27.d[1], v27.d[0] //GHASH final-5 block - mid + + ldr q9, [x0], #16 //AES final-4 block - load plaintext + pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low + + pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid + movi v16.8b, #0 //suppress further partial tag feed in + eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low + + eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid +.inst 0xce037529 //eor3 v9.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result +.L256_enc_blocks_more_than_4: //blocks left > 4 + + st1 { v9.16b}, [x2], #16 //AES final-4 block - store result + + rev64 v8.16b, v9.16b //GHASH final-4 block + + ldr q9, [x0], #16 //AES final-3 block - load plaintext + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v27.d[0], v8.d[1] //GHASH final-4 block - mid + pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high + +.inst 0xce047529 //eor3 v9.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result + pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low + + eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid + eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low + + pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid + + movi v16.8b, #0 //suppress further partial tag feed in + + eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid + eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high +.L256_enc_blocks_more_than_3: //blocks left > 3 + + st1 { v9.16b}, [x2], #16 //AES final-3 block - store result + + ldr q25, [x3, #112] //load h4l | h4h + ext v25.16b, v25.16b, v25.16b, #8 + rev64 v8.16b, v9.16b //GHASH final-3 block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v27.d[0], v8.d[1] //GHASH final-3 block - mid + pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high + + eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high + eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid + ldr q24, [x3, #96] //load h4k | h3k + + ins v27.d[1], v27.d[0] //GHASH final-3 block - mid + ldr q9, [x0], #16 //AES final-2 block - load plaintext + + pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid + pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low + +.inst 0xce057529 //eor3 v9.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result + movi v16.8b, #0 //suppress further partial tag feed in + + eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid + eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low +.L256_enc_blocks_more_than_2: //blocks left > 2 + + ldr q23, [x3, #80] //load h3l | h3h + ext v23.16b, v23.16b, v23.16b, #8 + + st1 { v9.16b}, [x2], #16 //AES final-2 block - store result + + rev64 v8.16b, v9.16b //GHASH final-2 block + ldr q9, [x0], #16 //AES final-1 block - load plaintext + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v27.d[0], v8.d[1] //GHASH final-2 block - mid + + movi v16.8b, #0 //suppress further partial tag feed in + + pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high +.inst 0xce067529 //eor3 v9.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result + + eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid + + eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high + + pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid + pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low + + eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid + eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low +.L256_enc_blocks_more_than_1: //blocks left > 1 + + st1 { v9.16b}, [x2], #16 //AES final-1 block - store result + + ldr q22, [x3, #64] //load h2l | h2h + ext v22.16b, v22.16b, v22.16b, #8 + rev64 v8.16b, v9.16b //GHASH final-1 block + ldr q9, [x0], #16 //AES final block - load plaintext + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + movi v16.8b, #0 //suppress further partial tag feed in + + ins v27.d[0], v8.d[1] //GHASH final-1 block - mid + pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high + +.inst 0xce077529 //eor3 v9.16b, v9.16b, v7.16b, v29.16b //AES final block - result + eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high + + pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low + eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid + + ldr q21, [x3, #48] //load h2k | h1k + + eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low + ins v27.d[1], v27.d[0] //GHASH final-1 block - mid + + pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid + + eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid +.L256_enc_blocks_less_than_1: //blocks left <= 1 + + and x1, x1, #127 //bit_length %= 128 + + sub x1, x1, #128 //bit_length -= 128 + + neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) + + mvn x6, xzr //temp0_x = 0xffffffffffffffff + and x1, x1, #127 //bit_length %= 128 + + lsr x6, x6, x1 //temp0_x is mask for top 64b of last block + cmp x1, #64 + mvn x7, xzr //temp1_x = 0xffffffffffffffff + + csel x14, x6, xzr, lt + csel x13, x7, x6, lt + + mov v0.d[0], x13 //ctr0b is mask for last block + ldr q20, [x3, #32] //load h1l | h1h + ext v20.16b, v20.16b, v20.16b, #8 + + ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored + mov v0.d[1], x14 + + and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits + + rev64 v8.16b, v9.16b //GHASH final block + + rev32 v30.16b, v30.16b + bif v9.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing + str q30, [x16] //store the updated counter + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + st1 { v9.16b}, [x2] //store all 16B + + ins v16.d[0], v8.d[1] //GHASH final block - mid + pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high + pmull v26.1q, v8.1d, v20.1d //GHASH final block - low + + eor v17.16b, v17.16b, v28.16b //GHASH final block - high + eor v19.16b, v19.16b, v26.16b //GHASH final block - low + + eor v16.8b, v16.8b, v8.8b //GHASH final block - mid + + pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid + + eor v18.16b, v18.16b, v16.16b //GHASH final block - mid + ldr d16, [x10] //MODULO - load modulo constant + + ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment + +.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up + pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid + +.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid + + pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low + ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment + +.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low + ext v19.16b, v19.16b, v19.16b, #8 + rev64 v19.16b, v19.16b + st1 { v19.16b }, [x3] + mov x0, x9 //return sizes + + ldp d10, d11, [sp, #16] + ldp d12, d13, [sp, #32] + ldp d14, d15, [sp, #48] + ldp d8, d9, [sp], #80 + ret + +.L256_enc_ret: + mov w0, #0x0 + ret +.size unroll8_eor3_aes_gcm_enc_256_kernel,.-unroll8_eor3_aes_gcm_enc_256_kernel +.globl unroll8_eor3_aes_gcm_dec_256_kernel +.type unroll8_eor3_aes_gcm_dec_256_kernel,%function +.align 4 +unroll8_eor3_aes_gcm_dec_256_kernel: + AARCH64_VALID_CALL_TARGET + cbz x1, .L256_dec_ret + stp d8, d9, [sp, #-80]! + lsr x9, x1, #3 + mov x16, x4 + mov x8, x5 + stp d10, d11, [sp, #16] + stp d12, d13, [sp, #32] + stp d14, d15, [sp, #48] + mov x5, #0xc200000000000000 + stp x5, xzr, [sp, #64] + add x10, sp, #64 + + ld1 { v0.16b}, [x16] //CTR block 0 + + mov x15, #0x100000000 //set up counter increment + movi v31.16b, #0x0 + mov v31.d[1], x15 + mov x5, x9 + + sub x5, x5, #1 //byte_len - 1 + + rev32 v30.16b, v0.16b //set up reversed counter + + add v30.4s, v30.4s, v31.4s //CTR block 0 + + rev32 v1.16b, v30.16b //CTR block 1 + add v30.4s, v30.4s, v31.4s //CTR block 1 + + rev32 v2.16b, v30.16b //CTR block 2 + add v30.4s, v30.4s, v31.4s //CTR block 2 + ldp q26, q27, [x8, #0] //load rk0, rk1 + + rev32 v3.16b, v30.16b //CTR block 3 + add v30.4s, v30.4s, v31.4s //CTR block 3 + + rev32 v4.16b, v30.16b //CTR block 4 + add v30.4s, v30.4s, v31.4s //CTR block 4 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 0 + + rev32 v5.16b, v30.16b //CTR block 5 + add v30.4s, v30.4s, v31.4s //CTR block 5 + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 0 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 0 + + rev32 v6.16b, v30.16b //CTR block 6 + add v30.4s, v30.4s, v31.4s //CTR block 6 + + rev32 v7.16b, v30.16b //CTR block 7 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 4 - round 0 + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 6 - round 0 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 5 - round 0 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 0 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 7 - round 0 + ldp q28, q26, [x8, #32] //load rk2, rk3 + + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 6 - round 1 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 4 - round 1 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 1 + + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 5 - round 1 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 7 - round 1 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 1 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 1 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 1 + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 3 - round 2 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 2 - round 2 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 6 - round 2 + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 1 - round 2 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 7 - round 2 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 5 - round 2 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 0 - round 2 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 4 - round 2 + ldp q27, q28, [x8, #64] //load rk4, rk5 + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 3 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 3 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 3 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 4 - round 3 + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 5 - round 3 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 7 - round 3 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 3 + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 6 - round 3 + + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 7 - round 4 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 4 + + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 6 - round 4 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 4 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 4 + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 4 - round 4 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 4 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 5 - round 4 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 0 - round 5 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 6 - round 5 + + ldp q26, q27, [x8, #96] //load rk6, rk7 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 4 - round 5 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 7 - round 5 + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 5 - round 5 + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 2 - round 5 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 3 - round 5 + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 1 - round 5 + + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 4 - round 6 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 6 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 7 - round 6 + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 6 - round 6 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 6 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 5 - round 6 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 6 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 6 + ldp q28, q26, [x8, #128] //load rk8, rk9 + + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 5 - round 7 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 7 + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 7 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 7 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 7 - round 7 + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 4 - round 7 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 7 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 6 - round 7 + + and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 7 - round 8 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 5 - round 8 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 0 - round 8 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 1 - round 8 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 2 - round 8 + + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 4 - round 8 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 3 - round 8 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 6 - round 8 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 9 + + ld1 { v19.16b}, [x3] + ext v19.16b, v19.16b, v19.16b, #8 + rev64 v19.16b, v19.16b + ldp q27, q28, [x8, #160] //load rk10, rk11 + add x4, x0, x1, lsr #3 //end_input_ptr + add x5, x5, x0 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 9 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 6 - round 9 + + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 4 - round 9 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 5 - round 9 + + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 7 - round 9 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 9 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 9 + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 4 - round 10 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 7 - round 10 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 5 - round 10 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 10 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 10 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 10 + + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 6 - round 10 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 10 + ldp q26, q27, [x8, #192] //load rk12, rk13 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 0 - round 11 + add v30.4s, v30.4s, v31.4s //CTR block 7 + + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 7 - round 11 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 3 - round 11 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 1 - round 11 + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 5 - round 11 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 4 - round 11 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 2 - round 11 + + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 6 - round 11 + ldr q28, [x8, #224] //load rk14 + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 12 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 4 - round 12 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 5 - round 12 + + cmp x0, x5 //check if we have <= 8 blocks + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 12 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 12 + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 6 - round 12 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 12 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 7 - round 12 + + aese v5.16b, v27.16b //AES block 5 - round 13 + aese v1.16b, v27.16b //AES block 1 - round 13 + aese v2.16b, v27.16b //AES block 2 - round 13 + + aese v0.16b, v27.16b //AES block 0 - round 13 + aese v4.16b, v27.16b //AES block 4 - round 13 + aese v6.16b, v27.16b //AES block 6 - round 13 + + aese v3.16b, v27.16b //AES block 3 - round 13 + aese v7.16b, v27.16b //AES block 7 - round 13 + b.ge .L256_dec_tail //handle tail + + ldp q8, q9, [x0], #32 //AES block 0, 1 - load ciphertext + + ldp q10, q11, [x0], #32 //AES block 2, 3 - load ciphertext + + ldp q12, q13, [x0], #32 //AES block 4, 5 - load ciphertext + + ldp q14, q15, [x0], #32 //AES block 6, 7 - load ciphertext + cmp x0, x5 //check if we have <= 8 blocks + +.inst 0xce017121 //eor3 v1.16b, v9.16b, v1.16b, v28.16b //AES block 1 - result +.inst 0xce007100 //eor3 v0.16b, v8.16b, v0.16b, v28.16b //AES block 0 - result + stp q0, q1, [x2], #32 //AES block 0, 1 - store result + + rev32 v0.16b, v30.16b //CTR block 8 + add v30.4s, v30.4s, v31.4s //CTR block 8 +.inst 0xce037163 //eor3 v3.16b, v11.16b, v3.16b, v28.16b //AES block 3 - result + +.inst 0xce0571a5 //eor3 v5.16b, v13.16b, v5.16b, v28.16b //AES block 5 - result + +.inst 0xce047184 //eor3 v4.16b, v12.16b, v4.16b, v28.16b //AES block 4 - result + rev32 v1.16b, v30.16b //CTR block 9 + add v30.4s, v30.4s, v31.4s //CTR block 9 + +.inst 0xce027142 //eor3 v2.16b, v10.16b, v2.16b, v28.16b //AES block 2 - result + stp q2, q3, [x2], #32 //AES block 2, 3 - store result + + rev32 v2.16b, v30.16b //CTR block 10 + add v30.4s, v30.4s, v31.4s //CTR block 10 + +.inst 0xce0671c6 //eor3 v6.16b, v14.16b, v6.16b, v28.16b //AES block 6 - result + + rev32 v3.16b, v30.16b //CTR block 11 + add v30.4s, v30.4s, v31.4s //CTR block 11 + stp q4, q5, [x2], #32 //AES block 4, 5 - store result + +.inst 0xce0771e7 //eor3 v7.16b, v15.16b, v7.16b, v28.16b //AES block 7 - result + stp q6, q7, [x2], #32 //AES block 6, 7 - store result + + rev32 v4.16b, v30.16b //CTR block 12 + add v30.4s, v30.4s, v31.4s //CTR block 12 + b.ge .L256_dec_prepretail //do prepretail + +.L256_dec_main_loop: //main loop start + rev32 v5.16b, v30.16b //CTR block 8k+13 + ldp q26, q27, [x8, #0] //load rk0, rk1 + add v30.4s, v30.4s, v31.4s //CTR block 8k+13 + + rev64 v9.16b, v9.16b //GHASH block 8k+1 + ldr q23, [x3, #176] //load h7l | h7h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q25, [x3, #208] //load h8l | h8h + ext v25.16b, v25.16b, v25.16b, #8 + + rev32 v6.16b, v30.16b //CTR block 8k+14 + add v30.4s, v30.4s, v31.4s //CTR block 8k+14 + rev64 v8.16b, v8.16b //GHASH block 8k + + ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 + rev64 v12.16b, v12.16b //GHASH block 8k+4 + rev64 v11.16b, v11.16b //GHASH block 8k+3 + + rev32 v7.16b, v30.16b //CTR block 8k+15 + rev64 v15.16b, v15.16b //GHASH block 8k+7 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 + + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 + + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 + ldp q28, q26, [x8, #32] //load rk2, rk3 + + eor v8.16b, v8.16b, v19.16b //PRE 1 + ldr q20, [x3, #128] //load h5l | h5h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q22, [x3, #160] //load h6l | h6h + ext v22.16b, v22.16b, v22.16b, #8 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 + rev64 v10.16b, v10.16b //GHASH block 8k+2 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 + + trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 + + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 + + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 + pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 + + ldp q27, q28, [x8, #64] //load rk4, rk5 + pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 + pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high + pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 + pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high + + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 + trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid + + pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 + eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high + + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 + + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 + + ldr q21, [x3, #144] //load h6k | h5k + ldr q24, [x3, #192] //load h8k | h7k + eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid + pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low + + ldp q26, q27, [x8, #96] //load rk6, rk7 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 + eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 + +.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high + trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid + rev64 v13.16b, v13.16b //GHASH block 8k+5 + + pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid + pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid + trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 + + trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 + + eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid + pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 + + pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid + pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid +.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low + + ldr q23, [x3, #80] //load h3l | h3h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q25, [x3, #112] //load h4l | h4h + ext v25.16b, v25.16b, v25.16b, #8 + rev64 v14.16b, v14.16b //GHASH block 8k+6 + eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 + ldp q28, q26, [x8, #128] //load rk8, rk9 + + ldr q20, [x3, #32] //load h1l | h1h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q22, [x3, #64] //load h2l | h2h + ext v22.16b, v22.16b, v22.16b, #8 +.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 + + ldr q21, [x3, #48] //load h2k | h1k + ldr q24, [x3, #96] //load h4k | h3k + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 + + pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high + pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low + trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 + pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 + + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 + pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 + + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 + pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high + + trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 + + ldp q27, q28, [x8, #160] //load rk10, rk11 + pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low + trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid + + add v30.4s, v30.4s, v31.4s //CTR block 8k+15 +.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 + eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 + + ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load ciphertext + eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 + + pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 + + pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid + pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid + pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high + + pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 + + pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 +.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low + + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 +.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid +.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 + +.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low + rev32 v20.16b, v30.16b //CTR block 8k+16 + ldr d16, [x10] //MODULO - load modulo constant + + add v30.4s, v30.4s, v31.4s //CTR block 8k+16 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 11 + ldp q26, q27, [x8, #192] //load rk12, rk13 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 11 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 11 + +.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid + rev32 v22.16b, v30.16b //CTR block 8k+17 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 11 + + ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load ciphertext + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 11 + ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 11 + add v30.4s, v30.4s, v31.4s //CTR block 8k+17 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 11 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 12 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 12 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 12 + + rev32 v23.16b, v30.16b //CTR block 8k+18 + add v30.4s, v30.4s, v31.4s //CTR block 8k+18 + pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid + +.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 12 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 11 + + ldr q28, [x8, #224] //load rk14 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 12 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 12 + +.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 12 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 12 + + ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load ciphertext + aese v1.16b, v27.16b //AES block 8k+9 - round 13 + aese v2.16b, v27.16b //AES block 8k+10 - round 13 + + ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load ciphertext + aese v0.16b, v27.16b //AES block 8k+8 - round 13 + aese v5.16b, v27.16b //AES block 8k+13 - round 13 + + rev32 v25.16b, v30.16b //CTR block 8k+19 +.inst 0xce027142 //eor3 v2.16b, v10.16b, v2.16b, v28.16b //AES block 8k+10 - result +.inst 0xce017121 //eor3 v1.16b, v9.16b, v1.16b, v28.16b //AES block 8k+9 - result + + ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment + aese v7.16b, v27.16b //AES block 8k+15 - round 13 + + add v30.4s, v30.4s, v31.4s //CTR block 8k+19 + pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low + aese v4.16b, v27.16b //AES block 8k+12 - round 13 + +.inst 0xce0571a5 //eor3 v5.16b, v13.16b, v5.16b, v28.16b //AES block 8k+13 - result +.inst 0xce007100 //eor3 v0.16b, v8.16b, v0.16b, v28.16b //AES block 8k+8 - result + aese v3.16b, v27.16b //AES block 8k+11 - round 13 + + stp q0, q1, [x2], #32 //AES block 8k+8, 8k+9 - store result + mov v0.16b, v20.16b //CTR block 8k+16 +.inst 0xce047184 //eor3 v4.16b, v12.16b, v4.16b, v28.16b //AES block 8k+12 - result + +.inst 0xce154673 //eor3 v19.16b, v19.16b, v21.16b, v17.16b //MODULO - fold into low +.inst 0xce037163 //eor3 v3.16b, v11.16b, v3.16b, v28.16b //AES block 8k+11 - result + stp q2, q3, [x2], #32 //AES block 8k+10, 8k+11 - store result + + mov v3.16b, v25.16b //CTR block 8k+19 + mov v2.16b, v23.16b //CTR block 8k+18 + aese v6.16b, v27.16b //AES block 8k+14 - round 13 + + mov v1.16b, v22.16b //CTR block 8k+17 + stp q4, q5, [x2], #32 //AES block 8k+12, 8k+13 - store result +.inst 0xce0771e7 //eor3 v7.16b, v15.16b, v7.16b, v28.16b //AES block 8k+15 - result + +.inst 0xce0671c6 //eor3 v6.16b, v14.16b, v6.16b, v28.16b //AES block 8k+14 - result + rev32 v4.16b, v30.16b //CTR block 8k+20 + add v30.4s, v30.4s, v31.4s //CTR block 8k+20 + + cmp x0, x5 //.LOOP CONTROL + stp q6, q7, [x2], #32 //AES block 8k+14, 8k+15 - store result + b.lt .L256_dec_main_loop + +.L256_dec_prepretail: //PREPRETAIL + ldp q26, q27, [x8, #0] //load rk0, rk1 + rev32 v5.16b, v30.16b //CTR block 8k+13 + add v30.4s, v30.4s, v31.4s //CTR block 8k+13 + + rev64 v12.16b, v12.16b //GHASH block 8k+4 + ldr q21, [x3, #144] //load h6k | h5k + ldr q24, [x3, #192] //load h8k | h7k + + rev32 v6.16b, v30.16b //CTR block 8k+14 + rev64 v8.16b, v8.16b //GHASH block 8k + add v30.4s, v30.4s, v31.4s //CTR block 8k+14 + + ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 + ldr q23, [x3, #176] //load h7l | h7h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q25, [x3, #208] //load h8l | h8h + ext v25.16b, v25.16b, v25.16b, #8 + rev64 v9.16b, v9.16b //GHASH block 8k+1 + + rev32 v7.16b, v30.16b //CTR block 8k+15 + rev64 v10.16b, v10.16b //GHASH block 8k+2 + ldr q20, [x3, #128] //load h5l | h5h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q22, [x3, #160] //load h6l | h6h + ext v22.16b, v22.16b, v22.16b, #8 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 + + ldp q28, q26, [x8, #32] //load rk2, rk3 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 + eor v8.16b, v8.16b, v19.16b //PRE 1 + + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 + + pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high + trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid + pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low + + rev64 v11.16b, v11.16b //GHASH block 8k+3 + pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 + pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 + rev64 v14.16b, v14.16b //GHASH block 8k+6 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 + + pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high + trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 + + ldp q27, q28, [x8, #64] //load rk4, rk5 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 + pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 + eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high + eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid + + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 + pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 + +.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high + trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid + trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid + + pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid + pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low + eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low + + pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 + +.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low + ldr q20, [x3, #32] //load h1l | h1h + ext v20.16b, v20.16b, v20.16b, #8 + ldr q22, [x3, #64] //load h2l | h2h + ext v22.16b, v22.16b, v22.16b, #8 + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 + eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid + + eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 + pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 + + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 + pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 + ldp q26, q27, [x8, #96] //load rk6, rk7 + + ldr q23, [x3, #80] //load h3l | h3h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q25, [x3, #112] //load h4l | h4h + ext v25.16b, v25.16b, v25.16b, #8 + rev64 v15.16b, v15.16b //GHASH block 8k+7 + rev64 v13.16b, v13.16b //GHASH block 8k+5 + +.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid + + trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 + ldr q21, [x3, #48] //load h2k | h1k + ldr q24, [x3, #96] //load h4k | h3k + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 + + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 + + pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high + pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high + pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low + + trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid + pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low + trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid + + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 + pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 + + ldp q28, q26, [x8, #128] //load rk8, rk9 + pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 + + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 +.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 + trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 + eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 + eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid + + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 + pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 + pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid + pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high + + pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid + pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid + pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low + + ldp q27, q28, [x8, #160] //load rk10, rk11 +.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low +.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 + +.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high +.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low + ldr d16, [x10] //MODULO - load modulo constant + +.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid + + aese v4.16b, v27.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 + aese v6.16b, v27.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 + aese v5.16b, v27.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 + +.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up + + aese v7.16b, v27.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 + ldp q26, q27, [x8, #192] //load rk12, rk13 + + ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 11 + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 11 + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 11 + + pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 11 + + aese v7.16b, v28.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 11 + aese v6.16b, v28.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 11 + aese v4.16b, v28.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 11 + + aese v5.16b, v28.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 11 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 8k+11 - round 12 + +.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid + + aese v3.16b, v27.16b //AES block 8k+11 - round 13 + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 8k+10 - round 12 + aese v6.16b, v26.16b + aesmc v6.16b, v6.16b //AES block 8k+14 - round 12 + + pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low + aese v4.16b, v26.16b + aesmc v4.16b, v4.16b //AES block 8k+12 - round 12 + aese v7.16b, v26.16b + aesmc v7.16b, v7.16b //AES block 8k+15 - round 12 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 8k+8 - round 12 + ldr q28, [x8, #224] //load rk14 + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 8k+9 - round 12 + + aese v4.16b, v27.16b //AES block 8k+12 - round 13 + ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment + aese v5.16b, v26.16b + aesmc v5.16b, v5.16b //AES block 8k+13 - round 12 + + aese v6.16b, v27.16b //AES block 8k+14 - round 13 + aese v2.16b, v27.16b //AES block 8k+10 - round 13 + aese v1.16b, v27.16b //AES block 8k+9 - round 13 + + aese v5.16b, v27.16b //AES block 8k+13 - round 13 +.inst 0xce154673 //eor3 v19.16b, v19.16b, v21.16b, v17.16b //MODULO - fold into low + add v30.4s, v30.4s, v31.4s //CTR block 8k+15 + + aese v7.16b, v27.16b //AES block 8k+15 - round 13 + aese v0.16b, v27.16b //AES block 8k+8 - round 13 +.L256_dec_tail: //TAIL + + ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag + sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process + cmp x5, #112 + + ldr q9, [x0], #16 //AES block 8k+8 - load ciphertext + + ldp q24, q25, [x3, #192] //load h8k | h7k + ext v25.16b, v25.16b, v25.16b, #8 + mov v29.16b, v28.16b + + ldp q20, q21, [x3, #128] //load h5l | h5h + ext v20.16b, v20.16b, v20.16b, #8 + +.inst 0xce00752c //eor3 v12.16b, v9.16b, v0.16b, v29.16b //AES block 8k+8 - result + ldp q22, q23, [x3, #160] //load h6l | h6h + ext v22.16b, v22.16b, v22.16b, #8 + ext v23.16b, v23.16b, v23.16b, #8 + b.gt .L256_dec_blocks_more_than_7 + + mov v7.16b, v6.16b + sub v30.4s, v30.4s, v31.4s + mov v6.16b, v5.16b + + mov v5.16b, v4.16b + mov v4.16b, v3.16b + movi v19.8b, #0 + + movi v17.8b, #0 + movi v18.8b, #0 + mov v3.16b, v2.16b + + cmp x5, #96 + mov v2.16b, v1.16b + b.gt .L256_dec_blocks_more_than_6 + + mov v7.16b, v6.16b + mov v6.16b, v5.16b + + mov v5.16b, v4.16b + cmp x5, #80 + sub v30.4s, v30.4s, v31.4s + + mov v4.16b, v3.16b + mov v3.16b, v1.16b + b.gt .L256_dec_blocks_more_than_5 + + cmp x5, #64 + mov v7.16b, v6.16b + sub v30.4s, v30.4s, v31.4s + + mov v6.16b, v5.16b + + mov v5.16b, v4.16b + mov v4.16b, v1.16b + b.gt .L256_dec_blocks_more_than_4 + + sub v30.4s, v30.4s, v31.4s + mov v7.16b, v6.16b + cmp x5, #48 + + mov v6.16b, v5.16b + mov v5.16b, v1.16b + b.gt .L256_dec_blocks_more_than_3 + + ldr q24, [x3, #96] //load h4k | h3k + sub v30.4s, v30.4s, v31.4s + mov v7.16b, v6.16b + + cmp x5, #32 + mov v6.16b, v1.16b + b.gt .L256_dec_blocks_more_than_2 + + sub v30.4s, v30.4s, v31.4s + + mov v7.16b, v1.16b + cmp x5, #16 + b.gt .L256_dec_blocks_more_than_1 + + sub v30.4s, v30.4s, v31.4s + ldr q21, [x3, #48] //load h2k | h1k + b .L256_dec_blocks_less_than_1 +.L256_dec_blocks_more_than_7: //blocks left > 7 + rev64 v8.16b, v9.16b //GHASH final-7 block + ldr q9, [x0], #16 //AES final-6 block - load ciphertext + st1 { v12.16b}, [x2], #16 //AES final-7 block - store result + + ins v18.d[0], v24.d[1] //GHASH final-7 block - mid + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v27.d[0], v8.d[1] //GHASH final-7 block - mid +.inst 0xce01752c //eor3 v12.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result + + pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high + + eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid + movi v16.8b, #0 //suppress further partial tag feed in + + pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low + pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid +.L256_dec_blocks_more_than_6: //blocks left > 6 + + rev64 v8.16b, v9.16b //GHASH final-6 block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + ldr q9, [x0], #16 //AES final-5 block - load ciphertext + movi v16.8b, #0 //suppress further partial tag feed in + + ins v27.d[0], v8.d[1] //GHASH final-6 block - mid + st1 { v12.16b}, [x2], #16 //AES final-6 block - store result + pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high + + pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low + +.inst 0xce02752c //eor3 v12.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result + eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low + eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid + + pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid + + eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid + eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high +.L256_dec_blocks_more_than_5: //blocks left > 5 + + rev64 v8.16b, v9.16b //GHASH final-5 block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high + ins v27.d[0], v8.d[1] //GHASH final-5 block - mid + + ldr q9, [x0], #16 //AES final-4 block - load ciphertext + + eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid + st1 { v12.16b}, [x2], #16 //AES final-5 block - store result + + pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low + ins v27.d[1], v27.d[0] //GHASH final-5 block - mid + + pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid + + eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high +.inst 0xce03752c //eor3 v12.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result + eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low + + eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid + movi v16.8b, #0 //suppress further partial tag feed in +.L256_dec_blocks_more_than_4: //blocks left > 4 + + rev64 v8.16b, v9.16b //GHASH final-4 block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v27.d[0], v8.d[1] //GHASH final-4 block - mid + ldr q9, [x0], #16 //AES final-3 block - load ciphertext + + movi v16.8b, #0 //suppress further partial tag feed in + + pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low + pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high + + eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid + + eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high + + pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid + + eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low + st1 { v12.16b}, [x2], #16 //AES final-4 block - store result + + eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid +.inst 0xce04752c //eor3 v12.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result +.L256_dec_blocks_more_than_3: //blocks left > 3 + + ldr q25, [x3, #112] //load h4l | h4h + ext v25.16b, v25.16b, v25.16b, #8 + rev64 v8.16b, v9.16b //GHASH final-3 block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + ldr q9, [x0], #16 //AES final-2 block - load ciphertext + ldr q24, [x3, #96] //load h4k | h3k + + ins v27.d[0], v8.d[1] //GHASH final-3 block - mid + st1 { v12.16b}, [x2], #16 //AES final-3 block - store result + +.inst 0xce05752c //eor3 v12.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result + + eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid + + ins v27.d[1], v27.d[0] //GHASH final-3 block - mid + pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low + pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high + + movi v16.8b, #0 //suppress further partial tag feed in + pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid + eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low + + eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high + + eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid +.L256_dec_blocks_more_than_2: //blocks left > 2 + + rev64 v8.16b, v9.16b //GHASH final-2 block + + ldr q23, [x3, #80] //load h3l | h3h + ext v23.16b, v23.16b, v23.16b, #8 + ldr q9, [x0], #16 //AES final-1 block - load ciphertext + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v27.d[0], v8.d[1] //GHASH final-2 block - mid + + pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low + st1 { v12.16b}, [x2], #16 //AES final-2 block - store result +.inst 0xce06752c //eor3 v12.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result + + eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid + eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low + movi v16.8b, #0 //suppress further partial tag feed in + + pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid + pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high + + eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid + eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high +.L256_dec_blocks_more_than_1: //blocks left > 1 + + rev64 v8.16b, v9.16b //GHASH final-1 block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v27.d[0], v8.d[1] //GHASH final-1 block - mid + ldr q22, [x3, #64] //load h2l | h2h + ext v22.16b, v22.16b, v22.16b, #8 + + eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid + ldr q9, [x0], #16 //AES final block - load ciphertext + st1 { v12.16b}, [x2], #16 //AES final-1 block - store result + + ldr q21, [x3, #48] //load h2k | h1k + pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low + + ins v27.d[1], v27.d[0] //GHASH final-1 block - mid + + eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low + +.inst 0xce07752c //eor3 v12.16b, v9.16b, v7.16b, v29.16b //AES final block - result + pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high + + pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid + + movi v16.8b, #0 //suppress further partial tag feed in + eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high + + eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid +.L256_dec_blocks_less_than_1: //blocks left <= 1 + + ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored + mvn x6, xzr //temp0_x = 0xffffffffffffffff + and x1, x1, #127 //bit_length %= 128 + + sub x1, x1, #128 //bit_length -= 128 + rev32 v30.16b, v30.16b + str q30, [x16] //store the updated counter + + neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) + + and x1, x1, #127 //bit_length %= 128 + + lsr x6, x6, x1 //temp0_x is mask for top 64b of last block + cmp x1, #64 + mvn x7, xzr //temp1_x = 0xffffffffffffffff + + csel x14, x6, xzr, lt + csel x13, x7, x6, lt + + mov v0.d[0], x13 //ctr0b is mask for last block + mov v0.d[1], x14 + + and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits + ldr q20, [x3, #32] //load h1l | h1h + ext v20.16b, v20.16b, v20.16b, #8 + bif v12.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing + + rev64 v8.16b, v9.16b //GHASH final block + + eor v8.16b, v8.16b, v16.16b //feed in partial tag + + ins v16.d[0], v8.d[1] //GHASH final block - mid + pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high + + eor v16.8b, v16.8b, v8.8b //GHASH final block - mid + + pmull v26.1q, v8.1d, v20.1d //GHASH final block - low + eor v17.16b, v17.16b, v28.16b //GHASH final block - high + + pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid + + eor v18.16b, v18.16b, v16.16b //GHASH final block - mid + ldr d16, [x10] //MODULO - load modulo constant + eor v19.16b, v19.16b, v26.16b //GHASH final block - low + + pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid + eor v14.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up + + ext v17.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment + st1 { v12.16b}, [x2] //store all 16B + + eor v18.16b, v18.16b, v14.16b //MODULO - karatsuba tidy up + + eor v21.16b, v17.16b, v21.16b //MODULO - fold into mid + eor v18.16b, v18.16b, v21.16b //MODULO - fold into mid + + pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low + + ext v18.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment + eor v19.16b, v19.16b, v17.16b //MODULO - fold into low + + eor v19.16b, v19.16b, v18.16b //MODULO - fold into low + ext v19.16b, v19.16b, v19.16b, #8 + rev64 v19.16b, v19.16b + st1 { v19.16b }, [x3] + mov x0, x9 + + ldp d10, d11, [sp, #16] + ldp d12, d13, [sp, #32] + ldp d14, d15, [sp, #48] + ldp d8, d9, [sp], #80 + ret + +.L256_dec_ret: + mov w0, #0x0 + ret +.size unroll8_eor3_aes_gcm_dec_256_kernel,.-unroll8_eor3_aes_gcm_dec_256_kernel +.byte 65,69,83,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,65,82,77,118,56,44,32,83,80,68,88,32,66,83,68,45,51,45,67,108,97,117,115,101,32,98,121,32,60,120,105,97,111,107,97,110,103,46,113,105,97,110,64,97,114,109,46,99,111,109,62,0 +.align 2 +.align 2 +#endif diff --git a/contrib/openssl-cmake/asm/crypto/modes/asm/aes-gcm-armv8_64.S b/contrib/openssl-cmake/asm/crypto/modes/asm/aes-gcm-armv8_64.S new file mode 100644 index 000000000000..def2071cf378 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/modes/asm/aes-gcm-armv8_64.S @@ -0,0 +1,6396 @@ +#include "arm_arch.h" + +#if __ARM_MAX_ARCH__>=8 +.arch armv8-a+crypto +.text +.globl aes_gcm_enc_128_kernel +.type aes_gcm_enc_128_kernel,%function +.align 4 +aes_gcm_enc_128_kernel: + AARCH64_VALID_CALL_TARGET + cbz x1, .L128_enc_ret + stp x19, x20, [sp, #-112]! + mov x16, x4 + mov x8, x5 + stp x21, x22, [sp, #16] + stp x23, x24, [sp, #32] + stp d8, d9, [sp, #48] + stp d10, d11, [sp, #64] + stp d12, d13, [sp, #80] + stp d14, d15, [sp, #96] + + ldp x10, x11, [x16] //ctr96_b64, ctr96_t32 +#ifdef __AARCH64EB__ + rev x10, x10 + rev x11, x11 +#endif + ldp x13, x14, [x8, #160] //load rk10 +#ifdef __AARCH64EB__ + ror x13, x13, #32 + ror x14, x14, #32 +#endif + ld1 {v11.16b}, [x3] + ext v11.16b, v11.16b, v11.16b, #8 + rev64 v11.16b, v11.16b + lsr x5, x1, #3 //byte_len + mov x15, x5 + + ld1 {v18.4s}, [x8], #16 //load rk0 + add x4, x0, x1, lsr #3 //end_input_ptr + sub x5, x5, #1 //byte_len - 1 + + lsr x12, x11, #32 + ldr q15, [x3, #112] //load h4l | h4h +#ifndef __AARCH64EB__ + ext v15.16b, v15.16b, v15.16b, #8 +#endif + fmov d1, x10 //CTR block 1 + rev w12, w12 //rev_ctr32 + + add w12, w12, #1 //increment rev_ctr32 + orr w11, w11, w11 + ld1 {v19.4s}, [x8], #16 //load rk1 + + rev w9, w12 //CTR block 1 + add w12, w12, #1 //CTR block 1 + fmov d3, x10 //CTR block 3 + + orr x9, x11, x9, lsl #32 //CTR block 1 + ld1 { v0.16b}, [x16] //special case vector load initial counter so we can start first AES block as quickly as possible + + fmov v1.d[1], x9 //CTR block 1 + rev w9, w12 //CTR block 2 + + fmov d2, x10 //CTR block 2 + orr x9, x11, x9, lsl #32 //CTR block 2 + add w12, w12, #1 //CTR block 2 + + fmov v2.d[1], x9 //CTR block 2 + rev w9, w12 //CTR block 3 + + orr x9, x11, x9, lsl #32 //CTR block 3 + ld1 {v20.4s}, [x8], #16 //load rk2 + + add w12, w12, #1 //CTR block 3 + fmov v3.d[1], x9 //CTR block 3 + + ldr q14, [x3, #80] //load h3l | h3h +#ifndef __AARCH64EB__ + ext v14.16b, v14.16b, v14.16b, #8 +#endif + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b //AES block 1 - round 0 + ld1 {v21.4s}, [x8], #16 //load rk3 + + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b //AES block 2 - round 0 + ldr q12, [x3, #32] //load h1l | h1h +#ifndef __AARCH64EB__ + ext v12.16b, v12.16b, v12.16b, #8 +#endif + + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b //AES block 0 - round 0 + ld1 {v22.4s}, [x8], #16 //load rk4 + + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b //AES block 3 - round 0 + ld1 {v23.4s}, [x8], #16 //load rk5 + + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b //AES block 2 - round 1 + trn2 v17.2d, v14.2d, v15.2d //h4l | h3l + + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b //AES block 0 - round 1 + ld1 {v24.4s}, [x8], #16 //load rk6 + + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b //AES block 1 - round 1 + ld1 {v25.4s}, [x8], #16 //load rk7 + + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b //AES block 3 - round 1 + trn1 v9.2d, v14.2d, v15.2d //h4h | h3h + + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b //AES block 0 - round 2 + ld1 {v26.4s}, [x8], #16 //load rk8 + + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b //AES block 1 - round 2 + ldr q13, [x3, #64] //load h2l | h2h +#ifndef __AARCH64EB__ + ext v13.16b, v13.16b, v13.16b, #8 +#endif + + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b //AES block 3 - round 2 + + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b //AES block 2 - round 2 + eor v17.16b, v17.16b, v9.16b //h4k | h3k + + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b //AES block 0 - round 3 + + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b //AES block 1 - round 3 + + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b //AES block 2 - round 3 + ld1 {v27.4s}, [x8], #16 //load rk9 + + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b //AES block 3 - round 3 + + and x5, x5, #0xffffffffffffffc0 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) + trn2 v16.2d, v12.2d, v13.2d //h2l | h1l + + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b //AES block 3 - round 4 + add x5, x5, x0 + + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b //AES block 2 - round 4 + cmp x0, x5 //check if we have <= 4 blocks + + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b //AES block 0 - round 4 + + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b //AES block 3 - round 5 + + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b //AES block 2 - round 5 + + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b //AES block 0 - round 5 + + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b //AES block 3 - round 6 + + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b //AES block 1 - round 4 + + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b //AES block 2 - round 6 + trn1 v8.2d, v12.2d, v13.2d //h2h | h1h + + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b //AES block 0 - round 6 + + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b //AES block 1 - round 5 + + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b //AES block 3 - round 7 + + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b //AES block 0 - round 7 + + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b //AES block 1 - round 6 + + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b //AES block 2 - round 7 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 8 + + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b //AES block 1 - round 7 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 8 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 8 + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 8 + + aese v2.16b, v27.16b //AES block 2 - round 9 + + aese v0.16b, v27.16b //AES block 0 - round 9 + + eor v16.16b, v16.16b, v8.16b //h2k | h1k + + aese v1.16b, v27.16b //AES block 1 - round 9 + + aese v3.16b, v27.16b //AES block 3 - round 9 + b.ge .L128_enc_tail //handle tail + + ldp x6, x7, [x0, #0] //AES block 0 - load plaintext +#ifdef __AARCH64EB__ + rev x6, x6 + rev x7, x7 +#endif + ldp x21, x22, [x0, #32] //AES block 2 - load plaintext +#ifdef __AARCH64EB__ + rev x21, x21 + rev x22, x22 +#endif + ldp x19, x20, [x0, #16] //AES block 1 - load plaintext +#ifdef __AARCH64EB__ + rev x19, x19 + rev x20, x20 +#endif + ldp x23, x24, [x0, #48] //AES block 3 - load plaintext +#ifdef __AARCH64EB__ + rev x23, x23 + rev x24, x24 +#endif + eor x6, x6, x13 //AES block 0 - round 10 low + eor x7, x7, x14 //AES block 0 - round 10 high + + eor x21, x21, x13 //AES block 2 - round 10 low + fmov d4, x6 //AES block 0 - mov low + + eor x19, x19, x13 //AES block 1 - round 10 low + eor x22, x22, x14 //AES block 2 - round 10 high + fmov v4.d[1], x7 //AES block 0 - mov high + + fmov d5, x19 //AES block 1 - mov low + eor x20, x20, x14 //AES block 1 - round 10 high + + eor x23, x23, x13 //AES block 3 - round 10 low + fmov v5.d[1], x20 //AES block 1 - mov high + + fmov d6, x21 //AES block 2 - mov low + eor x24, x24, x14 //AES block 3 - round 10 high + rev w9, w12 //CTR block 4 + + fmov v6.d[1], x22 //AES block 2 - mov high + orr x9, x11, x9, lsl #32 //CTR block 4 + + eor v4.16b, v4.16b, v0.16b //AES block 0 - result + fmov d0, x10 //CTR block 4 + add w12, w12, #1 //CTR block 4 + + fmov v0.d[1], x9 //CTR block 4 + rev w9, w12 //CTR block 5 + + eor v5.16b, v5.16b, v1.16b //AES block 1 - result + fmov d1, x10 //CTR block 5 + orr x9, x11, x9, lsl #32 //CTR block 5 + + add w12, w12, #1 //CTR block 5 + add x0, x0, #64 //AES input_ptr update + fmov v1.d[1], x9 //CTR block 5 + + fmov d7, x23 //AES block 3 - mov low + rev w9, w12 //CTR block 6 + st1 { v4.16b}, [x2], #16 //AES block 0 - store result + + fmov v7.d[1], x24 //AES block 3 - mov high + orr x9, x11, x9, lsl #32 //CTR block 6 + + add w12, w12, #1 //CTR block 6 + eor v6.16b, v6.16b, v2.16b //AES block 2 - result + st1 { v5.16b}, [x2], #16 //AES block 1 - store result + + fmov d2, x10 //CTR block 6 + cmp x0, x5 //check if we have <= 8 blocks + + fmov v2.d[1], x9 //CTR block 6 + rev w9, w12 //CTR block 7 + st1 { v6.16b}, [x2], #16 //AES block 2 - store result + + orr x9, x11, x9, lsl #32 //CTR block 7 + + eor v7.16b, v7.16b, v3.16b //AES block 3 - result + st1 { v7.16b}, [x2], #16 //AES block 3 - store result + b.ge .L128_enc_prepretail //do prepretail + +.L128_enc_main_loop: //main loop start + ldp x23, x24, [x0, #48] //AES block 4k+3 - load plaintext +#ifdef __AARCH64EB__ + rev x23, x23 + rev x24, x24 +#endif + rev64 v4.16b, v4.16b //GHASH block 4k (only t0 is free) + rev64 v6.16b, v6.16b //GHASH block 4k+2 (t0, t1, and t2 free) + + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 0 + fmov d3, x10 //CTR block 4k+3 + + ext v11.16b, v11.16b, v11.16b, #8 //PRE 0 + rev64 v5.16b, v5.16b //GHASH block 4k+1 (t0 and t1 free) + + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 0 + add w12, w12, #1 //CTR block 4k+3 + fmov v3.d[1], x9 //CTR block 4k+3 + + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 0 + mov d31, v6.d[1] //GHASH block 4k+2 - mid + + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 1 + mov d30, v5.d[1] //GHASH block 4k+1 - mid + + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 1 + eor v4.16b, v4.16b, v11.16b //PRE 1 + + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 0 + eor x24, x24, x14 //AES block 4k+3 - round 10 high + + pmull2 v28.1q, v5.2d, v14.2d //GHASH block 4k+1 - high + eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid + ldp x6, x7, [x0, #0] //AES block 4k+4 - load plaintext +#ifdef __AARCH64EB__ + rev x6, x6 + rev x7, x7 +#endif + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 1 + rev w9, w12 //CTR block 4k+8 + + eor v30.8b, v30.8b, v5.8b //GHASH block 4k+1 - mid + mov d8, v4.d[1] //GHASH block 4k - mid + orr x9, x11, x9, lsl #32 //CTR block 4k+8 + + pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high + add w12, w12, #1 //CTR block 4k+8 + mov d10, v17.d[1] //GHASH block 4k - mid + + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 2 + + pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low + eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid + + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 2 + + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 3 + eor v9.16b, v9.16b, v28.16b //GHASH block 4k+1 - high + + pmull v28.1q, v6.1d, v13.1d //GHASH block 4k+2 - low + + pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid + rev64 v7.16b, v7.16b //GHASH block 4k+3 (t0, t1, t2 and t3 free) + + pmull v30.1q, v30.1d, v17.1d //GHASH block 4k+1 - mid + + pmull v29.1q, v5.1d, v14.1d //GHASH block 4k+1 - low + ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid + + pmull2 v8.1q, v6.2d, v13.2d //GHASH block 4k+2 - high + eor x7, x7, x14 //AES block 4k+4 - round 10 high + + eor v10.16b, v10.16b, v30.16b //GHASH block 4k+1 - mid + mov d30, v7.d[1] //GHASH block 4k+3 - mid + + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 1 + eor v11.16b, v11.16b, v29.16b //GHASH block 4k+1 - low + + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 2 + eor x6, x6, x13 //AES block 4k+4 - round 10 low + + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 3 + eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid + + pmull2 v4.1q, v7.2d, v12.2d //GHASH block 4k+3 - high + + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 3 + eor v9.16b, v9.16b, v8.16b //GHASH block 4k+2 - high + + pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid + + pmull v29.1q, v7.1d, v12.1d //GHASH block 4k+3 - low + movi v8.8b, #0xc2 + + pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid + eor v11.16b, v11.16b, v28.16b //GHASH block 4k+2 - low + + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 4 + + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 2 + shl d8, d8, #56 //mod_constant + + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 4 + eor v9.16b, v9.16b, v4.16b //GHASH block 4k+3 - high + + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 5 + ldp x19, x20, [x0, #16] //AES block 4k+5 - load plaintext +#ifdef __AARCH64EB__ + rev x19, x19 + rev x20, x20 +#endif + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 3 + eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid + + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 5 + ldp x21, x22, [x0, #32] //AES block 4k+6 - load plaintext +#ifdef __AARCH64EB__ + rev x21, x21 + rev x22, x22 +#endif + pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid + eor v11.16b, v11.16b, v29.16b //GHASH block 4k+3 - low + + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 4 + eor x19, x19, x13 //AES block 4k+5 - round 10 low + + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 4 + eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid + + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 6 + eor x23, x23, x13 //AES block 4k+3 - round 10 low + + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 5 + eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up + + fmov d4, x6 //AES block 4k+4 - mov low + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 6 + fmov v4.d[1], x7 //AES block 4k+4 - mov high + + add x0, x0, #64 //AES input_ptr update + fmov d7, x23 //AES block 4k+3 - mov low + ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment + + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 5 + fmov d5, x19 //AES block 4k+5 - mov low + + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 7 + eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up + + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 6 + eor x20, x20, x14 //AES block 4k+5 - round 10 high + + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 7 + fmov v5.d[1], x20 //AES block 4k+5 - mov high + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 8 + fmov v7.d[1], x24 //AES block 4k+3 - mov high + + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 6 + cmp x0, x5 //.LOOP CONTROL + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 8 + eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid + + aese v0.16b, v27.16b //AES block 4k+4 - round 9 + eor x21, x21, x13 //AES block 4k+6 - round 10 low + eor x22, x22, x14 //AES block 4k+6 - round 10 high + + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 7 + fmov d6, x21 //AES block 4k+6 - mov low + + aese v1.16b, v27.16b //AES block 4k+5 - round 9 + fmov v6.d[1], x22 //AES block 4k+6 - mov high + + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 7 + eor v4.16b, v4.16b, v0.16b //AES block 4k+4 - result + + fmov d0, x10 //CTR block 4k+8 + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 8 + + fmov v0.d[1], x9 //CTR block 4k+8 + rev w9, w12 //CTR block 4k+9 + eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 8 + eor v5.16b, v5.16b, v1.16b //AES block 4k+5 - result + + add w12, w12, #1 //CTR block 4k+9 + orr x9, x11, x9, lsl #32 //CTR block 4k+9 + fmov d1, x10 //CTR block 4k+9 + + pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low + fmov v1.d[1], x9 //CTR block 4k+9 + rev w9, w12 //CTR block 4k+10 + + aese v2.16b, v27.16b //AES block 4k+6 - round 9 + st1 { v4.16b}, [x2], #16 //AES block 4k+4 - store result + eor v6.16b, v6.16b, v2.16b //AES block 4k+6 - result + orr x9, x11, x9, lsl #32 //CTR block 4k+10 + + aese v3.16b, v27.16b //AES block 4k+7 - round 9 + add w12, w12, #1 //CTR block 4k+10 + ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment + fmov d2, x10 //CTR block 4k+10 + + eor v11.16b, v11.16b, v9.16b //MODULO - fold into low + st1 { v5.16b}, [x2], #16 //AES block 4k+5 - store result + + fmov v2.d[1], x9 //CTR block 4k+10 + st1 { v6.16b}, [x2], #16 //AES block 4k+6 - store result + rev w9, w12 //CTR block 4k+11 + + orr x9, x11, x9, lsl #32 //CTR block 4k+11 + eor v7.16b, v7.16b, v3.16b //AES block 4k+3 - result + + eor v11.16b, v11.16b, v10.16b //MODULO - fold into low + st1 { v7.16b}, [x2], #16 //AES block 4k+3 - store result + b.lt .L128_enc_main_loop + +.L128_enc_prepretail: //PREPRETAIL + rev64 v4.16b, v4.16b //GHASH block 4k (only t0 is free) + fmov d3, x10 //CTR block 4k+3 + rev64 v5.16b, v5.16b //GHASH block 4k+1 (t0 and t1 free) + + ext v11.16b, v11.16b, v11.16b, #8 //PRE 0 + add w12, w12, #1 //CTR block 4k+3 + fmov v3.d[1], x9 //CTR block 4k+3 + + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 0 + rev64 v6.16b, v6.16b //GHASH block 4k+2 (t0, t1, and t2 free) + + pmull v29.1q, v5.1d, v14.1d //GHASH block 4k+1 - low + + rev64 v7.16b, v7.16b //GHASH block 4k+3 (t0, t1, t2 and t3 free) + eor v4.16b, v4.16b, v11.16b //PRE 1 + + pmull2 v28.1q, v5.2d, v14.2d //GHASH block 4k+1 - high + + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 0 + mov d30, v5.d[1] //GHASH block 4k+1 - mid + + pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low + mov d8, v4.d[1] //GHASH block 4k - mid + + mov d31, v6.d[1] //GHASH block 4k+2 - mid + mov d10, v17.d[1] //GHASH block 4k - mid + + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 1 + eor v30.8b, v30.8b, v5.8b //GHASH block 4k+1 - mid + + eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid + + pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high + eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid + + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 1 + + pmull v30.1q, v30.1d, v17.1d //GHASH block 4k+1 - mid + eor v11.16b, v11.16b, v29.16b //GHASH block 4k+1 - low + + pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid + + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 0 + ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid + + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 0 + + eor v10.16b, v10.16b, v30.16b //GHASH block 4k+1 - mid + mov d30, v7.d[1] //GHASH block 4k+3 - mid + + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 1 + eor v9.16b, v9.16b, v28.16b //GHASH block 4k+1 - high + + pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid + + pmull2 v8.1q, v6.2d, v13.2d //GHASH block 4k+2 - high + eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid + + pmull2 v4.1q, v7.2d, v12.2d //GHASH block 4k+3 - high + + pmull v28.1q, v6.1d, v13.1d //GHASH block 4k+2 - low + + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 1 + eor v9.16b, v9.16b, v8.16b //GHASH block 4k+2 - high + + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 2 + + pmull v29.1q, v7.1d, v12.1d //GHASH block 4k+3 - low + movi v8.8b, #0xc2 + + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 2 + eor v11.16b, v11.16b, v28.16b //GHASH block 4k+2 - low + + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 2 + + pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid + eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid + + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 3 + + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 2 + eor v9.16b, v9.16b, v4.16b //GHASH block 4k+3 - high + + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 3 + + eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid + shl d8, d8, #56 //mod_constant + + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 3 + eor v11.16b, v11.16b, v29.16b //GHASH block 4k+3 - low + + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 4 + + pmull v28.1q, v9.1d, v8.1d + eor v10.16b, v10.16b, v9.16b //karatsuba tidy up + + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 4 + + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 5 + ext v9.16b, v9.16b, v9.16b, #8 + + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 3 + + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 4 + eor v10.16b, v10.16b, v11.16b + + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 6 + + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 4 + + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 5 + + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 5 + eor v10.16b, v10.16b, v28.16b + + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 5 + + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 6 + + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 6 + + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 6 + eor v10.16b, v10.16b, v9.16b + + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 7 + + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 7 + + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 7 + + pmull v28.1q, v10.1d, v8.1d + + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 7 + ext v10.16b, v10.16b, v10.16b, #8 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 8 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 8 + eor v11.16b, v11.16b, v28.16b + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 8 + + aese v3.16b, v27.16b //AES block 4k+7 - round 9 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 8 + + aese v0.16b, v27.16b //AES block 4k+4 - round 9 + + aese v1.16b, v27.16b //AES block 4k+5 - round 9 + eor v11.16b, v11.16b, v10.16b + + aese v2.16b, v27.16b //AES block 4k+6 - round 9 +.L128_enc_tail: //TAIL + + sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process + ldp x6, x7, [x0], #16 //AES block 4k+4 - load plaintext +#ifdef __AARCH64EB__ + rev x6, x6 + rev x7, x7 +#endif + cmp x5, #48 + + ext v8.16b, v11.16b, v11.16b, #8 //prepare final partial tag + eor x6, x6, x13 //AES block 4k+4 - round 10 low + eor x7, x7, x14 //AES block 4k+4 - round 10 high + + fmov d4, x6 //AES block 4k+4 - mov low + + fmov v4.d[1], x7 //AES block 4k+4 - mov high + + eor v5.16b, v4.16b, v0.16b //AES block 4k+4 - result + + b.gt .L128_enc_blocks_more_than_3 + + sub w12, w12, #1 + movi v11.8b, #0 + mov v3.16b, v2.16b + + cmp x5, #32 + mov v2.16b, v1.16b + movi v9.8b, #0 + + movi v10.8b, #0 + b.gt .L128_enc_blocks_more_than_2 + + mov v3.16b, v1.16b + cmp x5, #16 + + sub w12, w12, #1 + b.gt .L128_enc_blocks_more_than_1 + + sub w12, w12, #1 + b .L128_enc_blocks_less_than_1 +.L128_enc_blocks_more_than_3: //blocks left > 3 + st1 { v5.16b}, [x2], #16 //AES final-3 block - store result + + ldp x6, x7, [x0], #16 //AES final-2 block - load input low & high +#ifdef __AARCH64EB__ + rev x6, x6 + rev x7, x7 +#endif + rev64 v4.16b, v5.16b //GHASH final-3 block + + eor v4.16b, v4.16b, v8.16b //feed in partial tag + eor x7, x7, x14 //AES final-2 block - round 10 high + eor x6, x6, x13 //AES final-2 block - round 10 low + + fmov d5, x6 //AES final-2 block - mov low + + movi v8.8b, #0 //suppress further partial tag feed in + fmov v5.d[1], x7 //AES final-2 block - mov high + + pmull v11.1q, v4.1d, v15.1d //GHASH final-3 block - low + mov d22, v4.d[1] //GHASH final-3 block - mid + + pmull2 v9.1q, v4.2d, v15.2d //GHASH final-3 block - high + + mov d10, v17.d[1] //GHASH final-3 block - mid + + eor v5.16b, v5.16b, v1.16b //AES final-2 block - result + eor v22.8b, v22.8b, v4.8b //GHASH final-3 block - mid + + pmull v10.1q, v22.1d, v10.1d //GHASH final-3 block - mid +.L128_enc_blocks_more_than_2: //blocks left > 2 + + st1 { v5.16b}, [x2], #16 //AES final-2 block - store result + + rev64 v4.16b, v5.16b //GHASH final-2 block + ldp x6, x7, [x0], #16 //AES final-1 block - load input low & high +#ifdef __AARCH64EB__ + rev x6, x6 + rev x7, x7 +#endif + eor v4.16b, v4.16b, v8.16b //feed in partial tag + + eor x6, x6, x13 //AES final-1 block - round 10 low + + fmov d5, x6 //AES final-1 block - mov low + eor x7, x7, x14 //AES final-1 block - round 10 high + + pmull2 v20.1q, v4.2d, v14.2d //GHASH final-2 block - high + fmov v5.d[1], x7 //AES final-1 block - mov high + + mov d22, v4.d[1] //GHASH final-2 block - mid + + pmull v21.1q, v4.1d, v14.1d //GHASH final-2 block - low + + eor v9.16b, v9.16b, v20.16b //GHASH final-2 block - high + + eor v22.8b, v22.8b, v4.8b //GHASH final-2 block - mid + + eor v5.16b, v5.16b, v2.16b //AES final-1 block - result + + eor v11.16b, v11.16b, v21.16b //GHASH final-2 block - low + + pmull v22.1q, v22.1d, v17.1d //GHASH final-2 block - mid + + movi v8.8b, #0 //suppress further partial tag feed in + + eor v10.16b, v10.16b, v22.16b //GHASH final-2 block - mid +.L128_enc_blocks_more_than_1: //blocks left > 1 + + st1 { v5.16b}, [x2], #16 //AES final-1 block - store result + + rev64 v4.16b, v5.16b //GHASH final-1 block + ldp x6, x7, [x0], #16 //AES final block - load input low & high +#ifdef __AARCH64EB__ + rev x6, x6 + rev x7, x7 +#endif + eor v4.16b, v4.16b, v8.16b //feed in partial tag + + eor x7, x7, x14 //AES final block - round 10 high + eor x6, x6, x13 //AES final block - round 10 low + + fmov d5, x6 //AES final block - mov low + + pmull2 v20.1q, v4.2d, v13.2d //GHASH final-1 block - high + fmov v5.d[1], x7 //AES final block - mov high + + mov d22, v4.d[1] //GHASH final-1 block - mid + + pmull v21.1q, v4.1d, v13.1d //GHASH final-1 block - low + + eor v22.8b, v22.8b, v4.8b //GHASH final-1 block - mid + + eor v5.16b, v5.16b, v3.16b //AES final block - result + + ins v22.d[1], v22.d[0] //GHASH final-1 block - mid + + pmull2 v22.1q, v22.2d, v16.2d //GHASH final-1 block - mid + + eor v11.16b, v11.16b, v21.16b //GHASH final-1 block - low + + eor v9.16b, v9.16b, v20.16b //GHASH final-1 block - high + + eor v10.16b, v10.16b, v22.16b //GHASH final-1 block - mid + movi v8.8b, #0 //suppress further partial tag feed in +.L128_enc_blocks_less_than_1: //blocks left <= 1 + + and x1, x1, #127 //bit_length %= 128 + mvn x13, xzr //rk10_l = 0xffffffffffffffff + + mvn x14, xzr //rk10_h = 0xffffffffffffffff + sub x1, x1, #128 //bit_length -= 128 + + neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) + + and x1, x1, #127 //bit_length %= 128 + + lsr x14, x14, x1 //rk10_h is mask for top 64b of last block + cmp x1, #64 + + csel x6, x13, x14, lt + csel x7, x14, xzr, lt + + fmov d0, x6 //ctr0b is mask for last block + + fmov v0.d[1], x7 + + and v5.16b, v5.16b, v0.16b //possibly partial last block has zeroes in highest bits + + rev64 v4.16b, v5.16b //GHASH final block + + eor v4.16b, v4.16b, v8.16b //feed in partial tag + + mov d8, v4.d[1] //GHASH final block - mid + + pmull v21.1q, v4.1d, v12.1d //GHASH final block - low + ld1 { v18.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored + + eor v8.8b, v8.8b, v4.8b //GHASH final block - mid +#ifndef __AARCH64EB__ + rev w9, w12 +#else + mov w9, w12 +#endif + pmull2 v20.1q, v4.2d, v12.2d //GHASH final block - high + + pmull v8.1q, v8.1d, v16.1d //GHASH final block - mid + + eor v11.16b, v11.16b, v21.16b //GHASH final block - low + + eor v9.16b, v9.16b, v20.16b //GHASH final block - high + + eor v10.16b, v10.16b, v8.16b //GHASH final block - mid + movi v8.8b, #0xc2 + + eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up + + shl d8, d8, #56 //mod_constant + + eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up + + pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid + + ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment + + eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid + + eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid + + pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low + + ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment + + bif v5.16b, v18.16b, v0.16b //insert existing bytes in top end of result before storing + + eor v11.16b, v11.16b, v9.16b //MODULO - fold into low + st1 { v5.16b}, [x2] //store all 16B + + str w9, [x16, #12] //store the updated counter + + eor v11.16b, v11.16b, v10.16b //MODULO - fold into low + ext v11.16b, v11.16b, v11.16b, #8 + rev64 v11.16b, v11.16b + mov x0, x15 + st1 { v11.16b }, [x3] + ldp x21, x22, [sp, #16] + ldp x23, x24, [sp, #32] + ldp d8, d9, [sp, #48] + ldp d10, d11, [sp, #64] + ldp d12, d13, [sp, #80] + ldp d14, d15, [sp, #96] + ldp x19, x20, [sp], #112 + ret + +.L128_enc_ret: + mov w0, #0x0 + ret +.size aes_gcm_enc_128_kernel,.-aes_gcm_enc_128_kernel +.globl aes_gcm_dec_128_kernel +.type aes_gcm_dec_128_kernel,%function +.align 4 +aes_gcm_dec_128_kernel: + AARCH64_VALID_CALL_TARGET + cbz x1, .L128_dec_ret + stp x19, x20, [sp, #-112]! + mov x16, x4 + mov x8, x5 + stp x21, x22, [sp, #16] + stp x23, x24, [sp, #32] + stp d8, d9, [sp, #48] + stp d10, d11, [sp, #64] + stp d12, d13, [sp, #80] + stp d14, d15, [sp, #96] + + lsr x5, x1, #3 //byte_len + mov x15, x5 + ldp x10, x11, [x16] //ctr96_b64, ctr96_t32 +#ifdef __AARCH64EB__ + rev x10, x10 + rev x11, x11 +#endif + ldp x13, x14, [x8, #160] //load rk10 +#ifdef __AARCH64EB__ + ror x14, x14, 32 + ror x13, x13, 32 +#endif + sub x5, x5, #1 //byte_len - 1 + ld1 {v18.4s}, [x8], #16 //load rk0 + + and x5, x5, #0xffffffffffffffc0 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) + ld1 { v0.16b}, [x16] //special case vector load initial counter so we can start first AES block as quickly as possible + + ldr q13, [x3, #64] //load h2l | h2h +#ifndef __AARCH64EB__ + ext v13.16b, v13.16b, v13.16b, #8 +#endif + lsr x12, x11, #32 + fmov d2, x10 //CTR block 2 + + ld1 {v19.4s}, [x8], #16 //load rk1 + orr w11, w11, w11 + rev w12, w12 //rev_ctr32 + + fmov d1, x10 //CTR block 1 + add w12, w12, #1 //increment rev_ctr32 + + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b //AES block 0 - round 0 + rev w9, w12 //CTR block 1 + + orr x9, x11, x9, lsl #32 //CTR block 1 + ld1 {v20.4s}, [x8], #16 //load rk2 + add w12, w12, #1 //CTR block 1 + + fmov v1.d[1], x9 //CTR block 1 + rev w9, w12 //CTR block 2 + add w12, w12, #1 //CTR block 2 + + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b //AES block 0 - round 1 + orr x9, x11, x9, lsl #32 //CTR block 2 + + fmov v2.d[1], x9 //CTR block 2 + rev w9, w12 //CTR block 3 + + fmov d3, x10 //CTR block 3 + orr x9, x11, x9, lsl #32 //CTR block 3 + add w12, w12, #1 //CTR block 3 + + fmov v3.d[1], x9 //CTR block 3 + add x4, x0, x1, lsr #3 //end_input_ptr + + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b //AES block 1 - round 0 + ld1 {v21.4s}, [x8], #16 //load rk3 + + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b //AES block 0 - round 2 + ld1 {v22.4s}, [x8], #16 //load rk4 + + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b //AES block 2 - round 0 + ld1 {v23.4s}, [x8], #16 //load rk5 + + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b //AES block 1 - round 1 + ld1 {v24.4s}, [x8], #16 //load rk6 + + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b //AES block 3 - round 0 + + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b //AES block 2 - round 1 + + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b //AES block 1 - round 2 + + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b //AES block 3 - round 1 + ld1 { v11.16b}, [x3] + ext v11.16b, v11.16b, v11.16b, #8 + rev64 v11.16b, v11.16b + + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b //AES block 0 - round 3 + ld1 {v25.4s}, [x8], #16 //load rk7 + + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b //AES block 1 - round 3 + + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b //AES block 3 - round 2 + + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b //AES block 2 - round 2 + ld1 {v26.4s}, [x8], #16 //load rk8 + + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b //AES block 1 - round 4 + + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b //AES block 3 - round 3 + + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b //AES block 2 - round 3 + ldr q14, [x3, #80] //load h3l | h3h +#ifndef __AARCH64EB__ + ext v14.16b, v14.16b, v14.16b, #8 +#endif + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b //AES block 0 - round 4 + ld1 {v27.4s}, [x8], #16 //load rk9 + + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b //AES block 1 - round 5 + + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b //AES block 2 - round 4 + + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b //AES block 3 - round 4 + + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b //AES block 0 - round 5 + + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b //AES block 2 - round 5 + ldr q12, [x3, #32] //load h1l | h1h +#ifndef __AARCH64EB__ + ext v12.16b, v12.16b, v12.16b, #8 +#endif + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b //AES block 3 - round 5 + + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b //AES block 0 - round 6 + + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b //AES block 1 - round 6 + + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b //AES block 3 - round 6 + + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b //AES block 2 - round 6 + trn1 v8.2d, v12.2d, v13.2d //h2h | h1h + + ldr q15, [x3, #112] //load h4l | h4h +#ifndef __AARCH64EB__ + ext v15.16b, v15.16b, v15.16b, #8 +#endif + trn2 v16.2d, v12.2d, v13.2d //h2l | h1l + add x5, x5, x0 + + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b //AES block 1 - round 7 + + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b //AES block 2 - round 7 + + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b //AES block 0 - round 7 + eor v16.16b, v16.16b, v8.16b //h2k | h1k + + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b //AES block 3 - round 7 + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 8 + trn2 v17.2d, v14.2d, v15.2d //h4l | h3l + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 8 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 8 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 8 + trn1 v9.2d, v14.2d, v15.2d //h4h | h3h + + aese v2.16b, v27.16b //AES block 2 - round 9 + + aese v3.16b, v27.16b //AES block 3 - round 9 + + aese v0.16b, v27.16b //AES block 0 - round 9 + cmp x0, x5 //check if we have <= 4 blocks + + aese v1.16b, v27.16b //AES block 1 - round 9 + eor v17.16b, v17.16b, v9.16b //h4k | h3k + b.ge .L128_dec_tail //handle tail + + ld1 {v4.16b, v5.16b}, [x0], #32 //AES block 0 - load ciphertext; AES block 1 - load ciphertext + + eor v1.16b, v5.16b, v1.16b //AES block 1 - result + ld1 {v6.16b}, [x0], #16 //AES block 2 - load ciphertext + + eor v0.16b, v4.16b, v0.16b //AES block 0 - result + rev64 v4.16b, v4.16b //GHASH block 0 + rev w9, w12 //CTR block 4 + + orr x9, x11, x9, lsl #32 //CTR block 4 + add w12, w12, #1 //CTR block 4 + ld1 {v7.16b}, [x0], #16 //AES block 3 - load ciphertext + + rev64 v5.16b, v5.16b //GHASH block 1 + mov x19, v1.d[0] //AES block 1 - mov low + + mov x20, v1.d[1] //AES block 1 - mov high + + mov x6, v0.d[0] //AES block 0 - mov low + cmp x0, x5 //check if we have <= 8 blocks + + mov x7, v0.d[1] //AES block 0 - mov high + + fmov d0, x10 //CTR block 4 + + fmov v0.d[1], x9 //CTR block 4 + rev w9, w12 //CTR block 5 + eor x19, x19, x13 //AES block 1 - round 10 low +#ifdef __AARCH64EB__ + rev x19, x19 +#endif + fmov d1, x10 //CTR block 5 + add w12, w12, #1 //CTR block 5 + orr x9, x11, x9, lsl #32 //CTR block 5 + + fmov v1.d[1], x9 //CTR block 5 + rev w9, w12 //CTR block 6 + add w12, w12, #1 //CTR block 6 + + orr x9, x11, x9, lsl #32 //CTR block 6 + + eor x20, x20, x14 //AES block 1 - round 10 high +#ifdef __AARCH64EB__ + rev x20, x20 +#endif + eor x6, x6, x13 //AES block 0 - round 10 low +#ifdef __AARCH64EB__ + rev x6, x6 +#endif + eor v2.16b, v6.16b, v2.16b //AES block 2 - result + + eor x7, x7, x14 //AES block 0 - round 10 high +#ifdef __AARCH64EB__ + rev x7, x7 +#endif + stp x6, x7, [x2], #16 //AES block 0 - store result + + stp x19, x20, [x2], #16 //AES block 1 - store result + b.ge .L128_dec_prepretail //do prepretail + +.L128_dec_main_loop: //main loop start + eor v3.16b, v7.16b, v3.16b //AES block 4k+3 - result + ext v11.16b, v11.16b, v11.16b, #8 //PRE 0 + mov x21, v2.d[0] //AES block 4k+2 - mov low + + pmull2 v28.1q, v5.2d, v14.2d //GHASH block 4k+1 - high + mov x22, v2.d[1] //AES block 4k+2 - mov high + + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 0 + fmov d2, x10 //CTR block 4k+6 + + rev64 v6.16b, v6.16b //GHASH block 4k+2 + fmov v2.d[1], x9 //CTR block 4k+6 + rev w9, w12 //CTR block 4k+7 + + mov x23, v3.d[0] //AES block 4k+3 - mov low + eor v4.16b, v4.16b, v11.16b //PRE 1 + mov d30, v5.d[1] //GHASH block 4k+1 - mid + + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 1 + rev64 v7.16b, v7.16b //GHASH block 4k+3 + + pmull v29.1q, v5.1d, v14.1d //GHASH block 4k+1 - low + mov x24, v3.d[1] //AES block 4k+3 - mov high + orr x9, x11, x9, lsl #32 //CTR block 4k+7 + + pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low + fmov d3, x10 //CTR block 4k+7 + eor v30.8b, v30.8b, v5.8b //GHASH block 4k+1 - mid + + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 2 + fmov v3.d[1], x9 //CTR block 4k+7 + + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 0 + mov d10, v17.d[1] //GHASH block 4k - mid + + pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high + eor v11.16b, v11.16b, v29.16b //GHASH block 4k+1 - low + + pmull v29.1q, v7.1d, v12.1d //GHASH block 4k+3 - low + + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 3 + mov d8, v4.d[1] //GHASH block 4k - mid + + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 0 + eor v9.16b, v9.16b, v28.16b //GHASH block 4k+1 - high + + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 0 + + pmull v28.1q, v6.1d, v13.1d //GHASH block 4k+2 - low + eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid + + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 1 + eor x23, x23, x13 //AES block 4k+3 - round 10 low +#ifdef __AARCH64EB__ + rev x23, x23 +#endif + pmull v30.1q, v30.1d, v17.1d //GHASH block 4k+1 - mid + eor x22, x22, x14 //AES block 4k+2 - round 10 high +#ifdef __AARCH64EB__ + rev x22, x22 +#endif + mov d31, v6.d[1] //GHASH block 4k+2 - mid + + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 1 + eor v11.16b, v11.16b, v28.16b //GHASH block 4k+2 - low + + pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid + + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 2 + eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid + + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 2 + + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 4 + eor v10.16b, v10.16b, v30.16b //GHASH block 4k+1 - mid + + pmull2 v8.1q, v6.2d, v13.2d //GHASH block 4k+2 - high + + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 3 + ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid + + pmull2 v4.1q, v7.2d, v12.2d //GHASH block 4k+3 - high + + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 1 + mov d30, v7.d[1] //GHASH block 4k+3 - mid + + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 4 + eor v9.16b, v9.16b, v8.16b //GHASH block 4k+2 - high + + pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid + eor x24, x24, x14 //AES block 4k+3 - round 10 high +#ifdef __AARCH64EB__ + rev x24, x24 +#endif + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 2 + eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid + + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 5 + eor x21, x21, x13 //AES block 4k+2 - round 10 low +#ifdef __AARCH64EB__ + rev x21, x21 +#endif + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 5 + movi v8.8b, #0xc2 + + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 3 + eor v11.16b, v11.16b, v29.16b //GHASH block 4k+3 - low + + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 6 + + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 6 + eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid + + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 4 + stp x21, x22, [x2], #16 //AES block 4k+2 - store result + + pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid + eor v9.16b, v9.16b, v4.16b //GHASH block 4k+3 - high + ld1 {v4.16b}, [x0], #16 //AES block 4k+3 - load ciphertext + + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 7 + add w12, w12, #1 //CTR block 4k+7 + + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 7 + shl d8, d8, #56 //mod_constant + + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 5 + eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 8 + stp x23, x24, [x2], #16 //AES block 4k+3 - store result + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 8 + eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up + + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 3 + rev w9, w12 //CTR block 4k+8 + + pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid + ld1 {v5.16b}, [x0], #16 //AES block 4k+4 - load ciphertext + ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment + + aese v0.16b, v27.16b //AES block 4k+4 - round 9 + orr x9, x11, x9, lsl #32 //CTR block 4k+8 + + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 4 + eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up + + aese v1.16b, v27.16b //AES block 4k+5 - round 9 + + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 6 + eor v0.16b, v4.16b, v0.16b //AES block 4k+4 - result + + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 5 + ld1 {v6.16b}, [x0], #16 //AES block 4k+5 - load ciphertext + + add w12, w12, #1 //CTR block 4k+8 + eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid + eor v1.16b, v5.16b, v1.16b //AES block 4k+5 - result + + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 7 + ld1 {v7.16b}, [x0], #16 //AES block 4k+6 - load ciphertext + + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 6 + + rev64 v5.16b, v5.16b //GHASH block 4k+5 + eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid + mov x7, v0.d[1] //AES block 4k+4 - mov high + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 8 + mov x6, v0.d[0] //AES block 4k+4 - mov low + + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 7 + fmov d0, x10 //CTR block 4k+8 + + pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low + fmov v0.d[1], x9 //CTR block 4k+8 + rev w9, w12 //CTR block 4k+9 + + aese v2.16b, v27.16b //AES block 4k+6 - round 9 + orr x9, x11, x9, lsl #32 //CTR block 4k+9 + ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 8 + eor x7, x7, x14 //AES block 4k+4 - round 10 high +#ifdef __AARCH64EB__ + rev x7, x7 +#endif + eor v11.16b, v11.16b, v8.16b //MODULO - fold into low + mov x20, v1.d[1] //AES block 4k+5 - mov high + eor x6, x6, x13 //AES block 4k+4 - round 10 low +#ifdef __AARCH64EB__ + rev x6, x6 +#endif + eor v2.16b, v6.16b, v2.16b //AES block 4k+6 - result + mov x19, v1.d[0] //AES block 4k+5 - mov low + add w12, w12, #1 //CTR block 4k+9 + + aese v3.16b, v27.16b //AES block 4k+7 - round 9 + fmov d1, x10 //CTR block 4k+9 + cmp x0, x5 //.LOOP CONTROL + + rev64 v4.16b, v4.16b //GHASH block 4k+4 + eor v11.16b, v11.16b, v10.16b //MODULO - fold into low + fmov v1.d[1], x9 //CTR block 4k+9 + + rev w9, w12 //CTR block 4k+10 + add w12, w12, #1 //CTR block 4k+10 + + eor x20, x20, x14 //AES block 4k+5 - round 10 high +#ifdef __AARCH64EB__ + rev x20, x20 +#endif + stp x6, x7, [x2], #16 //AES block 4k+4 - store result + + eor x19, x19, x13 //AES block 4k+5 - round 10 low +#ifdef __AARCH64EB__ + rev x19, x19 +#endif + stp x19, x20, [x2], #16 //AES block 4k+5 - store result + + orr x9, x11, x9, lsl #32 //CTR block 4k+10 + b.lt .L128_dec_main_loop + +.L128_dec_prepretail: //PREPRETAIL + ext v11.16b, v11.16b, v11.16b, #8 //PRE 0 + mov x21, v2.d[0] //AES block 4k+2 - mov low + mov d30, v5.d[1] //GHASH block 4k+1 - mid + + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 0 + eor v3.16b, v7.16b, v3.16b //AES block 4k+3 - result + + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 0 + mov x22, v2.d[1] //AES block 4k+2 - mov high + + eor v4.16b, v4.16b, v11.16b //PRE 1 + fmov d2, x10 //CTR block 4k+6 + rev64 v6.16b, v6.16b //GHASH block 4k+2 + + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 1 + fmov v2.d[1], x9 //CTR block 4k+6 + + rev w9, w12 //CTR block 4k+7 + mov x23, v3.d[0] //AES block 4k+3 - mov low + eor v30.8b, v30.8b, v5.8b //GHASH block 4k+1 - mid + + pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low + mov d10, v17.d[1] //GHASH block 4k - mid + mov x24, v3.d[1] //AES block 4k+3 - mov high + + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 1 + mov d31, v6.d[1] //GHASH block 4k+2 - mid + + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 2 + orr x9, x11, x9, lsl #32 //CTR block 4k+7 + + pmull v29.1q, v5.1d, v14.1d //GHASH block 4k+1 - low + mov d8, v4.d[1] //GHASH block 4k - mid + fmov d3, x10 //CTR block 4k+7 + + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 0 + fmov v3.d[1], x9 //CTR block 4k+7 + + pmull v30.1q, v30.1d, v17.1d //GHASH block 4k+1 - mid + eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid + + rev64 v7.16b, v7.16b //GHASH block 4k+3 + + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 1 + eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid + + pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high + + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 0 + ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid + + pmull2 v28.1q, v5.2d, v14.2d //GHASH block 4k+1 - high + + pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid + eor v11.16b, v11.16b, v29.16b //GHASH block 4k+1 - low + + pmull v29.1q, v7.1d, v12.1d //GHASH block 4k+3 - low + + pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid + eor v9.16b, v9.16b, v28.16b //GHASH block 4k+1 - high + + eor v10.16b, v10.16b, v30.16b //GHASH block 4k+1 - mid + + pmull2 v4.1q, v7.2d, v12.2d //GHASH block 4k+3 - high + + pmull2 v8.1q, v6.2d, v13.2d //GHASH block 4k+2 - high + mov d30, v7.d[1] //GHASH block 4k+3 - mid + + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 2 + eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid + + pmull v28.1q, v6.1d, v13.1d //GHASH block 4k+2 - low + + eor v9.16b, v9.16b, v8.16b //GHASH block 4k+2 - high + movi v8.8b, #0xc2 + + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 1 + eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid + + eor v11.16b, v11.16b, v28.16b //GHASH block 4k+2 - low + + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 2 + eor v9.16b, v9.16b, v4.16b //GHASH block 4k+3 - high + + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 2 + eor x23, x23, x13 //AES block 4k+3 - round 10 low +#ifdef __AARCH64EB__ + rev x23, x23 +#endif + pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid + eor x21, x21, x13 //AES block 4k+2 - round 10 low +#ifdef __AARCH64EB__ + rev x21, x21 +#endif + eor v11.16b, v11.16b, v29.16b //GHASH block 4k+3 - low + + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 3 + + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 3 + shl d8, d8, #56 //mod_constant + + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 3 + + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 4 + eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid + + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 4 + + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 3 + eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up + + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 5 + + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 5 + + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 4 + + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 4 + eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up + + pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid + + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 6 + ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment + + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 5 + + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 5 + eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid + + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 7 + + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 6 + + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 6 + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 8 + eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid + + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 6 + + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 7 + + aese v1.16b, v27.16b //AES block 4k+5 - round 9 + + pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low + eor x24, x24, x14 //AES block 4k+3 - round 10 high +#ifdef __AARCH64EB__ + rev x24, x24 +#endif + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 7 + ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment + + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 7 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 8 + eor v11.16b, v11.16b, v8.16b //MODULO - fold into low + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 8 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 8 + eor x22, x22, x14 //AES block 4k+2 - round 10 high +#ifdef __AARCH64EB__ + rev x22, x22 +#endif + aese v0.16b, v27.16b //AES block 4k+4 - round 9 + stp x21, x22, [x2], #16 //AES block 4k+2 - store result + + aese v2.16b, v27.16b //AES block 4k+6 - round 9 + add w12, w12, #1 //CTR block 4k+7 + stp x23, x24, [x2], #16 //AES block 4k+3 - store result + + aese v3.16b, v27.16b //AES block 4k+7 - round 9 + eor v11.16b, v11.16b, v10.16b //MODULO - fold into low +.L128_dec_tail: //TAIL + + sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process + ld1 { v5.16b}, [x0], #16 //AES block 4k+4 - load ciphertext + + eor v0.16b, v5.16b, v0.16b //AES block 4k+4 - result + + mov x7, v0.d[1] //AES block 4k+4 - mov high + + mov x6, v0.d[0] //AES block 4k+4 - mov low + + cmp x5, #48 + + eor x7, x7, x14 //AES block 4k+4 - round 10 high +#ifdef __AARCH64EB__ + rev x7, x7 +#endif + ext v8.16b, v11.16b, v11.16b, #8 //prepare final partial tag + eor x6, x6, x13 //AES block 4k+4 - round 10 low +#ifdef __AARCH64EB__ + rev x6, x6 +#endif + b.gt .L128_dec_blocks_more_than_3 + + mov v3.16b, v2.16b + sub w12, w12, #1 + movi v11.8b, #0 + + movi v9.8b, #0 + mov v2.16b, v1.16b + + movi v10.8b, #0 + cmp x5, #32 + b.gt .L128_dec_blocks_more_than_2 + + cmp x5, #16 + + mov v3.16b, v1.16b + sub w12, w12, #1 + b.gt .L128_dec_blocks_more_than_1 + + sub w12, w12, #1 + b .L128_dec_blocks_less_than_1 +.L128_dec_blocks_more_than_3: //blocks left > 3 + rev64 v4.16b, v5.16b //GHASH final-3 block + ld1 { v5.16b}, [x0], #16 //AES final-2 block - load ciphertext + + eor v4.16b, v4.16b, v8.16b //feed in partial tag + + mov d10, v17.d[1] //GHASH final-3 block - mid + stp x6, x7, [x2], #16 //AES final-3 block - store result + eor v0.16b, v5.16b, v1.16b //AES final-2 block - result + + mov d22, v4.d[1] //GHASH final-3 block - mid + mov x7, v0.d[1] //AES final-2 block - mov high + + pmull v11.1q, v4.1d, v15.1d //GHASH final-3 block - low + mov x6, v0.d[0] //AES final-2 block - mov low + + pmull2 v9.1q, v4.2d, v15.2d //GHASH final-3 block - high + + eor v22.8b, v22.8b, v4.8b //GHASH final-3 block - mid + + movi v8.8b, #0 //suppress further partial tag feed in + eor x7, x7, x14 //AES final-2 block - round 10 high +#ifdef __AARCH64EB__ + rev x7, x7 +#endif + pmull v10.1q, v22.1d, v10.1d //GHASH final-3 block - mid + eor x6, x6, x13 //AES final-2 block - round 10 low +#ifdef __AARCH64EB__ + rev x6, x6 +#endif +.L128_dec_blocks_more_than_2: //blocks left > 2 + + rev64 v4.16b, v5.16b //GHASH final-2 block + ld1 { v5.16b}, [x0], #16 //AES final-1 block - load ciphertext + + eor v4.16b, v4.16b, v8.16b //feed in partial tag + + eor v0.16b, v5.16b, v2.16b //AES final-1 block - result + stp x6, x7, [x2], #16 //AES final-2 block - store result + + mov d22, v4.d[1] //GHASH final-2 block - mid + + pmull v21.1q, v4.1d, v14.1d //GHASH final-2 block - low + + pmull2 v20.1q, v4.2d, v14.2d //GHASH final-2 block - high + mov x6, v0.d[0] //AES final-1 block - mov low + + mov x7, v0.d[1] //AES final-1 block - mov high + eor v22.8b, v22.8b, v4.8b //GHASH final-2 block - mid + + movi v8.8b, #0 //suppress further partial tag feed in + + pmull v22.1q, v22.1d, v17.1d //GHASH final-2 block - mid + + eor x6, x6, x13 //AES final-1 block - round 10 low +#ifdef __AARCH64EB__ + rev x6, x6 +#endif + eor v11.16b, v11.16b, v21.16b //GHASH final-2 block - low + + eor v9.16b, v9.16b, v20.16b //GHASH final-2 block - high + + eor v10.16b, v10.16b, v22.16b //GHASH final-2 block - mid + eor x7, x7, x14 //AES final-1 block - round 10 high +#ifdef __AARCH64EB__ + rev x7, x7 +#endif +.L128_dec_blocks_more_than_1: //blocks left > 1 + + rev64 v4.16b, v5.16b //GHASH final-1 block + + ld1 { v5.16b}, [x0], #16 //AES final block - load ciphertext + eor v4.16b, v4.16b, v8.16b //feed in partial tag + + mov d22, v4.d[1] //GHASH final-1 block - mid + + eor v0.16b, v5.16b, v3.16b //AES final block - result + + eor v22.8b, v22.8b, v4.8b //GHASH final-1 block - mid + + stp x6, x7, [x2], #16 //AES final-1 block - store result + mov x6, v0.d[0] //AES final block - mov low + + mov x7, v0.d[1] //AES final block - mov high + ins v22.d[1], v22.d[0] //GHASH final-1 block - mid + + pmull v21.1q, v4.1d, v13.1d //GHASH final-1 block - low + + pmull2 v20.1q, v4.2d, v13.2d //GHASH final-1 block - high + + pmull2 v22.1q, v22.2d, v16.2d //GHASH final-1 block - mid + movi v8.8b, #0 //suppress further partial tag feed in + + eor v11.16b, v11.16b, v21.16b //GHASH final-1 block - low + + eor v9.16b, v9.16b, v20.16b //GHASH final-1 block - high + eor x7, x7, x14 //AES final block - round 10 high +#ifdef __AARCH64EB__ + rev x7, x7 +#endif + eor x6, x6, x13 //AES final block - round 10 low +#ifdef __AARCH64EB__ + rev x6, x6 +#endif + eor v10.16b, v10.16b, v22.16b //GHASH final-1 block - mid +.L128_dec_blocks_less_than_1: //blocks left <= 1 + + mvn x14, xzr //rk10_h = 0xffffffffffffffff + and x1, x1, #127 //bit_length %= 128 + + mvn x13, xzr //rk10_l = 0xffffffffffffffff + sub x1, x1, #128 //bit_length -= 128 + + neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) + + and x1, x1, #127 //bit_length %= 128 + + lsr x14, x14, x1 //rk10_h is mask for top 64b of last block + cmp x1, #64 + + csel x10, x14, xzr, lt + csel x9, x13, x14, lt + + fmov d0, x9 //ctr0b is mask for last block + + mov v0.d[1], x10 + + and v5.16b, v5.16b, v0.16b //possibly partial last block has zeroes in highest bits + + rev64 v4.16b, v5.16b //GHASH final block + + eor v4.16b, v4.16b, v8.16b //feed in partial tag + + ldp x4, x5, [x2] //load existing bytes we need to not overwrite + + and x7, x7, x10 + + pmull2 v20.1q, v4.2d, v12.2d //GHASH final block - high + mov d8, v4.d[1] //GHASH final block - mid + + eor v8.8b, v8.8b, v4.8b //GHASH final block - mid + eor v9.16b, v9.16b, v20.16b //GHASH final block - high + + pmull v8.1q, v8.1d, v16.1d //GHASH final block - mid + + pmull v21.1q, v4.1d, v12.1d //GHASH final block - low + bic x4, x4, x9 //mask out low existing bytes + and x6, x6, x9 + +#ifndef __AARCH64EB__ + rev w9, w12 +#else + mov w9, w12 +#endif + + eor v10.16b, v10.16b, v8.16b //GHASH final block - mid + movi v8.8b, #0xc2 + + eor v11.16b, v11.16b, v21.16b //GHASH final block - low + + bic x5, x5, x10 //mask out high existing bytes + shl d8, d8, #56 //mod_constant + + eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up + + pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid + + eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up + + orr x6, x6, x4 + str w9, [x16, #12] //store the updated counter + + orr x7, x7, x5 + stp x6, x7, [x2] + ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment + + eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid + + eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid + + pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low + ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment + + eor v11.16b, v11.16b, v8.16b //MODULO - fold into low + + eor v11.16b, v11.16b, v10.16b //MODULO - fold into low + ext v11.16b, v11.16b, v11.16b, #8 + rev64 v11.16b, v11.16b + mov x0, x15 + st1 { v11.16b }, [x3] + + ldp x21, x22, [sp, #16] + ldp x23, x24, [sp, #32] + ldp d8, d9, [sp, #48] + ldp d10, d11, [sp, #64] + ldp d12, d13, [sp, #80] + ldp d14, d15, [sp, #96] + ldp x19, x20, [sp], #112 + ret + +.L128_dec_ret: + mov w0, #0x0 + ret +.size aes_gcm_dec_128_kernel,.-aes_gcm_dec_128_kernel +.globl aes_gcm_enc_192_kernel +.type aes_gcm_enc_192_kernel,%function +.align 4 +aes_gcm_enc_192_kernel: + AARCH64_VALID_CALL_TARGET + cbz x1, .L192_enc_ret + stp x19, x20, [sp, #-112]! + mov x16, x4 + mov x8, x5 + stp x21, x22, [sp, #16] + stp x23, x24, [sp, #32] + stp d8, d9, [sp, #48] + stp d10, d11, [sp, #64] + stp d12, d13, [sp, #80] + stp d14, d15, [sp, #96] + + ldp x10, x11, [x16] //ctr96_b64, ctr96_t32 +#ifdef __AARCH64EB__ + rev x10, x10 + rev x11, x11 +#endif + ldp x13, x14, [x8, #192] //load rk12 +#ifdef __AARCH64EB__ + ror x13, x13, #32 + ror x14, x14, #32 +#endif + ld1 {v18.4s}, [x8], #16 //load rk0 + + ld1 {v19.4s}, [x8], #16 //load rk1 + + ld1 {v20.4s}, [x8], #16 //load rk2 + + lsr x12, x11, #32 + ld1 {v21.4s}, [x8], #16 //load rk3 + orr w11, w11, w11 + + ld1 {v22.4s}, [x8], #16 //load rk4 + rev w12, w12 //rev_ctr32 + + add w12, w12, #1 //increment rev_ctr32 + fmov d3, x10 //CTR block 3 + + rev w9, w12 //CTR block 1 + add w12, w12, #1 //CTR block 1 + fmov d1, x10 //CTR block 1 + + orr x9, x11, x9, lsl #32 //CTR block 1 + ld1 { v0.16b}, [x16] //special case vector load initial counter so we can start first AES block as quickly as possible + + fmov v1.d[1], x9 //CTR block 1 + rev w9, w12 //CTR block 2 + add w12, w12, #1 //CTR block 2 + + fmov d2, x10 //CTR block 2 + orr x9, x11, x9, lsl #32 //CTR block 2 + + fmov v2.d[1], x9 //CTR block 2 + rev w9, w12 //CTR block 3 + + orr x9, x11, x9, lsl #32 //CTR block 3 + ld1 {v23.4s}, [x8], #16 //load rk5 + + fmov v3.d[1], x9 //CTR block 3 + + ld1 {v24.4s}, [x8], #16 //load rk6 + + ld1 {v25.4s}, [x8], #16 //load rk7 + + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b //AES block 0 - round 0 + ld1 { v11.16b}, [x3] + ext v11.16b, v11.16b, v11.16b, #8 + rev64 v11.16b, v11.16b + + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b //AES block 3 - round 0 + ld1 {v26.4s}, [x8], #16 //load rk8 + + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b //AES block 1 - round 0 + ldr q15, [x3, #112] //load h4l | h4h +#ifndef __AARCH64EB__ + ext v15.16b, v15.16b, v15.16b, #8 +#endif + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b //AES block 2 - round 0 + ld1 {v27.4s}, [x8], #16 //load rk9 + + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b //AES block 0 - round 1 + ld1 {v28.4s}, [x8], #16 //load rk10 + + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b //AES block 1 - round 1 + ldr q12, [x3, #32] //load h1l | h1h +#ifndef __AARCH64EB__ + ext v12.16b, v12.16b, v12.16b, #8 +#endif + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b //AES block 2 - round 1 + ld1 {v29.4s}, [x8], #16 //load rk11 + + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b //AES block 3 - round 1 + ldr q14, [x3, #80] //load h3l | h3h +#ifndef __AARCH64EB__ + ext v14.16b, v14.16b, v14.16b, #8 +#endif + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b //AES block 0 - round 2 + + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b //AES block 2 - round 2 + + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b //AES block 3 - round 2 + + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b //AES block 0 - round 3 + trn1 v9.2d, v14.2d, v15.2d //h4h | h3h + + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b //AES block 2 - round 3 + + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b //AES block 1 - round 2 + trn2 v17.2d, v14.2d, v15.2d //h4l | h3l + + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b //AES block 0 - round 4 + + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b //AES block 3 - round 3 + + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b //AES block 1 - round 3 + + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b //AES block 0 - round 5 + + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b //AES block 2 - round 4 + + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b //AES block 1 - round 4 + + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b //AES block 0 - round 6 + + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b //AES block 3 - round 4 + + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b //AES block 2 - round 5 + + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b //AES block 1 - round 5 + + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b //AES block 3 - round 5 + + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b //AES block 2 - round 6 + ldr q13, [x3, #64] //load h2l | h2h +#ifndef __AARCH64EB__ + ext v13.16b, v13.16b, v13.16b, #8 +#endif + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b //AES block 1 - round 6 + + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b //AES block 3 - round 6 + + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b //AES block 0 - round 7 + + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b //AES block 1 - round 7 + trn2 v16.2d, v12.2d, v13.2d //h2l | h1l + + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b //AES block 3 - round 7 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 8 + + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b //AES block 2 - round 7 + trn1 v8.2d, v12.2d, v13.2d //h2h | h1h + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 8 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 8 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 8 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 9 + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 9 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 9 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 9 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 0 - round 10 + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 2 - round 10 + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 1 - round 10 + lsr x5, x1, #3 //byte_len + mov x15, x5 + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 3 - round 10 + sub x5, x5, #1 //byte_len - 1 + + eor v16.16b, v16.16b, v8.16b //h2k | h1k + and x5, x5, #0xffffffffffffffc0 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) + + eor v17.16b, v17.16b, v9.16b //h4k | h3k + + aese v2.16b, v29.16b //AES block 2 - round 11 + add x4, x0, x1, lsr #3 //end_input_ptr + add x5, x5, x0 + + aese v1.16b, v29.16b //AES block 1 - round 11 + cmp x0, x5 //check if we have <= 4 blocks + + aese v0.16b, v29.16b //AES block 0 - round 11 + add w12, w12, #1 //CTR block 3 + + aese v3.16b, v29.16b //AES block 3 - round 11 + b.ge .L192_enc_tail //handle tail + + rev w9, w12 //CTR block 4 + ldp x6, x7, [x0, #0] //AES block 0 - load plaintext +#ifdef __AARCH64EB__ + rev x6, x6 + rev x7, x7 +#endif + orr x9, x11, x9, lsl #32 //CTR block 4 + ldp x21, x22, [x0, #32] //AES block 2 - load plaintext +#ifdef __AARCH64EB__ + rev x21, x21 + rev x22, x22 +#endif + ldp x23, x24, [x0, #48] //AES block 3 - load plaintext +#ifdef __AARCH64EB__ + rev x23, x23 + rev x24, x24 +#endif + ldp x19, x20, [x0, #16] //AES block 1 - load plaintext +#ifdef __AARCH64EB__ + rev x19, x19 + rev x20, x20 +#endif + add x0, x0, #64 //AES input_ptr update + cmp x0, x5 //check if we have <= 8 blocks + + eor x6, x6, x13 //AES block 0 - round 12 low + + eor x7, x7, x14 //AES block 0 - round 12 high + eor x22, x22, x14 //AES block 2 - round 12 high + fmov d4, x6 //AES block 0 - mov low + + eor x24, x24, x14 //AES block 3 - round 12 high + fmov v4.d[1], x7 //AES block 0 - mov high + + eor x21, x21, x13 //AES block 2 - round 12 low + eor x19, x19, x13 //AES block 1 - round 12 low + + fmov d5, x19 //AES block 1 - mov low + eor x20, x20, x14 //AES block 1 - round 12 high + + fmov v5.d[1], x20 //AES block 1 - mov high + + eor x23, x23, x13 //AES block 3 - round 12 low + fmov d6, x21 //AES block 2 - mov low + + add w12, w12, #1 //CTR block 4 + eor v4.16b, v4.16b, v0.16b //AES block 0 - result + fmov d0, x10 //CTR block 4 + + fmov v0.d[1], x9 //CTR block 4 + rev w9, w12 //CTR block 5 + + orr x9, x11, x9, lsl #32 //CTR block 5 + add w12, w12, #1 //CTR block 5 + + fmov d7, x23 //AES block 3 - mov low + st1 { v4.16b}, [x2], #16 //AES block 0 - store result + + fmov v6.d[1], x22 //AES block 2 - mov high + + eor v5.16b, v5.16b, v1.16b //AES block 1 - result + fmov d1, x10 //CTR block 5 + st1 { v5.16b}, [x2], #16 //AES block 1 - store result + + fmov v7.d[1], x24 //AES block 3 - mov high + + fmov v1.d[1], x9 //CTR block 5 + rev w9, w12 //CTR block 6 + + orr x9, x11, x9, lsl #32 //CTR block 6 + + add w12, w12, #1 //CTR block 6 + eor v6.16b, v6.16b, v2.16b //AES block 2 - result + fmov d2, x10 //CTR block 6 + + fmov v2.d[1], x9 //CTR block 6 + rev w9, w12 //CTR block 7 + + orr x9, x11, x9, lsl #32 //CTR block 7 + st1 { v6.16b}, [x2], #16 //AES block 2 - store result + + eor v7.16b, v7.16b, v3.16b //AES block 3 - result + st1 { v7.16b}, [x2], #16 //AES block 3 - store result + b.ge .L192_enc_prepretail //do prepretail + +.L192_enc_main_loop: //main loop start + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 0 + rev64 v5.16b, v5.16b //GHASH block 4k+1 (t0 and t1 free) + + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 0 + ldp x19, x20, [x0, #16] //AES block 4k+5 - load plaintext +#ifdef __AARCH64EB__ + rev x19, x19 + rev x20, x20 +#endif + ext v11.16b, v11.16b, v11.16b, #8 //PRE 0 + fmov d3, x10 //CTR block 4k+3 + rev64 v4.16b, v4.16b //GHASH block 4k (only t0 is free) + + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 1 + fmov v3.d[1], x9 //CTR block 4k+3 + + pmull2 v30.1q, v5.2d, v14.2d //GHASH block 4k+1 - high + rev64 v7.16b, v7.16b //GHASH block 4k+3 (t0, t1, t2 and t3 free) + ldp x21, x22, [x0, #32] //AES block 4k+6 - load plaintext +#ifdef __AARCH64EB__ + rev x21, x21 + rev x22, x22 +#endif + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 0 + ldp x23, x24, [x0, #48] //AES block 4k+3 - load plaintext +#ifdef __AARCH64EB__ + rev x23, x23 + rev x24, x24 +#endif + pmull v31.1q, v5.1d, v14.1d //GHASH block 4k+1 - low + eor v4.16b, v4.16b, v11.16b //PRE 1 + + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 1 + + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 1 + rev64 v6.16b, v6.16b //GHASH block 4k+2 (t0, t1, and t2 free) + + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 0 + eor x24, x24, x14 //AES block 4k+3 - round 12 high + + pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low + mov d8, v4.d[1] //GHASH block 4k - mid + + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 2 + + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 1 + eor x21, x21, x13 //AES block 4k+6 - round 12 low + + eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid + eor v11.16b, v11.16b, v31.16b //GHASH block 4k+1 - low + + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 3 + eor x19, x19, x13 //AES block 4k+5 - round 12 low + + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 2 + mov d31, v6.d[1] //GHASH block 4k+2 - mid + + pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high + mov d4, v5.d[1] //GHASH block 4k+1 - mid + + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 2 + + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 3 + + mov d10, v17.d[1] //GHASH block 4k - mid + eor v9.16b, v9.16b, v30.16b //GHASH block 4k+1 - high + + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 2 + eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid + + pmull2 v30.1q, v6.2d, v13.2d //GHASH block 4k+2 - high + + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 4 + eor v4.8b, v4.8b, v5.8b //GHASH block 4k+1 - mid + + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 3 + + pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high + eor x20, x20, x14 //AES block 4k+5 - round 12 high + ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid + + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 5 + add w12, w12, #1 //CTR block 4k+3 + + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 4 + eor v9.16b, v9.16b, v30.16b //GHASH block 4k+2 - high + + pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid + eor x22, x22, x14 //AES block 4k+6 - round 12 high + + pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid + eor x23, x23, x13 //AES block 4k+3 - round 12 low + mov d30, v7.d[1] //GHASH block 4k+3 - mid + + pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid + rev w9, w12 //CTR block 4k+8 + + pmull v8.1q, v6.1d, v13.1d //GHASH block 4k+2 - low + orr x9, x11, x9, lsl #32 //CTR block 4k+8 + + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 3 + eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid + + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 4 + ldp x6, x7, [x0, #0] //AES block 4k+4 - load plaintext +#ifdef __AARCH64EB__ + rev x6, x6 + rev x7, x7 +#endif + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 6 + eor v11.16b, v11.16b, v8.16b //GHASH block 4k+2 - low + + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 4 + add x0, x0, #64 //AES input_ptr update + + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 5 + movi v8.8b, #0xc2 + + pmull v6.1q, v7.1d, v12.1d //GHASH block 4k+3 - low + eor x7, x7, x14 //AES block 4k+4 - round 12 high + eor v10.16b, v10.16b, v4.16b //GHASH block 4k+1 - mid + + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 5 + eor x6, x6, x13 //AES block 4k+4 - round 12 low + + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 6 + shl d8, d8, #56 //mod_constant + + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 5 + eor v9.16b, v9.16b, v5.16b //GHASH block 4k+3 - high + + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 7 + fmov d5, x19 //AES block 4k+5 - mov low + + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 7 + eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid + + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 6 + fmov v5.d[1], x20 //AES block 4k+5 - mov high + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 8 + eor v11.16b, v11.16b, v6.16b //GHASH block 4k+3 - low + + pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid + cmp x0, x5 //.LOOP CONTROL + fmov d4, x6 //AES block 4k+4 - mov low + + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 6 + fmov v4.d[1], x7 //AES block 4k+4 - mov high + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 8 + fmov d7, x23 //AES block 4k+3 - mov low + + eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid + eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up + add w12, w12, #1 //CTR block 4k+8 + + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 7 + fmov v7.d[1], x24 //AES block 4k+3 - mov high + + pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid + ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment + fmov d6, x21 //AES block 4k+6 - mov low + + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 7 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 9 + eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 8 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 8 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 9 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 10 + eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 9 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 9 + + aese v0.16b, v29.16b //AES block 4k+4 - round 11 + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 10 + eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 10 + + eor v4.16b, v4.16b, v0.16b //AES block 4k+4 - result + fmov d0, x10 //CTR block 4k+8 + + aese v1.16b, v29.16b //AES block 4k+5 - round 11 + fmov v0.d[1], x9 //CTR block 4k+8 + rev w9, w12 //CTR block 4k+9 + + pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low + fmov v6.d[1], x22 //AES block 4k+6 - mov high + st1 { v4.16b}, [x2], #16 //AES block 4k+4 - store result + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 10 + orr x9, x11, x9, lsl #32 //CTR block 4k+9 + + eor v5.16b, v5.16b, v1.16b //AES block 4k+5 - result + add w12, w12, #1 //CTR block 4k+9 + fmov d1, x10 //CTR block 4k+9 + + aese v2.16b, v29.16b //AES block 4k+6 - round 11 + fmov v1.d[1], x9 //CTR block 4k+9 + rev w9, w12 //CTR block 4k+10 + + add w12, w12, #1 //CTR block 4k+10 + ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment + orr x9, x11, x9, lsl #32 //CTR block 4k+10 + + st1 { v5.16b}, [x2], #16 //AES block 4k+5 - store result + eor v11.16b, v11.16b, v9.16b //MODULO - fold into low + + aese v3.16b, v29.16b //AES block 4k+7 - round 11 + eor v6.16b, v6.16b, v2.16b //AES block 4k+6 - result + fmov d2, x10 //CTR block 4k+10 + + st1 { v6.16b}, [x2], #16 //AES block 4k+6 - store result + fmov v2.d[1], x9 //CTR block 4k+10 + rev w9, w12 //CTR block 4k+11 + + eor v11.16b, v11.16b, v10.16b //MODULO - fold into low + orr x9, x11, x9, lsl #32 //CTR block 4k+11 + + eor v7.16b, v7.16b, v3.16b //AES block 4k+3 - result + st1 { v7.16b}, [x2], #16 //AES block 4k+3 - store result + b.lt .L192_enc_main_loop + +.L192_enc_prepretail: //PREPRETAIL + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 0 + rev64 v4.16b, v4.16b //GHASH block 4k (only t0 is free) + + fmov d3, x10 //CTR block 4k+3 + ext v11.16b, v11.16b, v11.16b, #8 //PRE 0 + add w12, w12, #1 //CTR block 4k+3 + + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 0 + rev64 v5.16b, v5.16b //GHASH block 4k+1 (t0 and t1 free) + + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 0 + + fmov v3.d[1], x9 //CTR block 4k+3 + eor v4.16b, v4.16b, v11.16b //PRE 1 + mov d10, v17.d[1] //GHASH block 4k - mid + + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 1 + rev64 v6.16b, v6.16b //GHASH block 4k+2 (t0, t1, and t2 free) + + pmull2 v30.1q, v5.2d, v14.2d //GHASH block 4k+1 - high + + pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low + mov d8, v4.d[1] //GHASH block 4k - mid + + pmull v31.1q, v5.1d, v14.1d //GHASH block 4k+1 - low + rev64 v7.16b, v7.16b //GHASH block 4k+3 (t0, t1, t2 and t3 free) + + pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high + + eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid + mov d4, v5.d[1] //GHASH block 4k+1 - mid + + eor v11.16b, v11.16b, v31.16b //GHASH block 4k+1 - low + mov d31, v6.d[1] //GHASH block 4k+2 - mid + + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 0 + eor v9.16b, v9.16b, v30.16b //GHASH block 4k+1 - high + + pmull2 v30.1q, v6.2d, v13.2d //GHASH block 4k+2 - high + + eor v4.8b, v4.8b, v5.8b //GHASH block 4k+1 - mid + eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid + + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 1 + + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 1 + eor v9.16b, v9.16b, v30.16b //GHASH block 4k+2 - high + + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 1 + + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 2 + mov d30, v7.d[1] //GHASH block 4k+3 - mid + + pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high + ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid + + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 2 + + pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid + eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid + + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 3 + + pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid + + pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid + + pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid + eor v9.16b, v9.16b, v5.16b //GHASH block 4k+3 - high + + pmull v8.1q, v6.1d, v13.1d //GHASH block 4k+2 - low + + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 3 + eor v10.16b, v10.16b, v4.16b //GHASH block 4k+1 - mid + + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 2 + + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 2 + eor v11.16b, v11.16b, v8.16b //GHASH block 4k+2 - low + + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 4 + + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 3 + eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid + + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 3 + + pmull v6.1q, v7.1d, v12.1d //GHASH block 4k+3 - low + movi v8.8b, #0xc2 + + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 4 + + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 4 + + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 4 + eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid + + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 5 + + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 5 + + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 5 + eor v11.16b, v11.16b, v6.16b //GHASH block 4k+3 - low + + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 5 + + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 6 + eor v10.16b, v10.16b, v9.16b //karatsuba tidy up + + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 6 + + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 6 + shl d8, d8, #56 //mod_constant + + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 7 + + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 7 + eor v10.16b, v10.16b, v11.16b + + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 7 + + pmull v30.1q, v9.1d, v8.1d + + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 6 + ext v9.16b, v9.16b, v9.16b, #8 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 8 + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 8 + eor v10.16b, v10.16b, v30.16b + + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 7 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 8 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 9 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 8 + eor v10.16b, v10.16b, v9.16b + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 9 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 9 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 9 + + pmull v30.1q, v10.1d, v8.1d + + ext v10.16b, v10.16b, v10.16b, #8 + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 10 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 10 + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 10 + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 10 + eor v11.16b, v11.16b, v30.16b + + aese v0.16b, v29.16b //AES block 4k+4 - round 11 + + aese v3.16b, v29.16b //AES block 4k+7 - round 11 + + aese v2.16b, v29.16b //AES block 4k+6 - round 11 + + aese v1.16b, v29.16b //AES block 4k+5 - round 11 + eor v11.16b, v11.16b, v10.16b +.L192_enc_tail: //TAIL + + sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process + ldp x6, x7, [x0], #16 //AES block 4k+4 - load plaintext +#ifdef __AARCH64EB__ + rev x6, x6 + rev x7, x7 +#endif + eor x6, x6, x13 //AES block 4k+4 - round 12 low + eor x7, x7, x14 //AES block 4k+4 - round 12 high + + fmov d4, x6 //AES block 4k+4 - mov low + + fmov v4.d[1], x7 //AES block 4k+4 - mov high + cmp x5, #48 + + eor v5.16b, v4.16b, v0.16b //AES block 4k+4 - result + + ext v8.16b, v11.16b, v11.16b, #8 //prepare final partial tag + b.gt .L192_enc_blocks_more_than_3 + + sub w12, w12, #1 + movi v10.8b, #0 + + mov v3.16b, v2.16b + movi v9.8b, #0 + cmp x5, #32 + + mov v2.16b, v1.16b + movi v11.8b, #0 + b.gt .L192_enc_blocks_more_than_2 + + sub w12, w12, #1 + + mov v3.16b, v1.16b + cmp x5, #16 + b.gt .L192_enc_blocks_more_than_1 + + sub w12, w12, #1 + b .L192_enc_blocks_less_than_1 +.L192_enc_blocks_more_than_3: //blocks left > 3 + st1 { v5.16b}, [x2], #16 //AES final-3 block - store result + + ldp x6, x7, [x0], #16 //AES final-2 block - load input low & high +#ifdef __AARCH64EB__ + rev x6, x6 + rev x7, x7 +#endif + rev64 v4.16b, v5.16b //GHASH final-3 block + + eor x6, x6, x13 //AES final-2 block - round 12 low + eor v4.16b, v4.16b, v8.16b //feed in partial tag + + eor x7, x7, x14 //AES final-2 block - round 12 high + fmov d5, x6 //AES final-2 block - mov low + + fmov v5.d[1], x7 //AES final-2 block - mov high + + mov d22, v4.d[1] //GHASH final-3 block - mid + + pmull v11.1q, v4.1d, v15.1d //GHASH final-3 block - low + + mov d10, v17.d[1] //GHASH final-3 block - mid + + eor v22.8b, v22.8b, v4.8b //GHASH final-3 block - mid + + movi v8.8b, #0 //suppress further partial tag feed in + + pmull2 v9.1q, v4.2d, v15.2d //GHASH final-3 block - high + + pmull v10.1q, v22.1d, v10.1d //GHASH final-3 block - mid + eor v5.16b, v5.16b, v1.16b //AES final-2 block - result +.L192_enc_blocks_more_than_2: //blocks left > 2 + + st1 { v5.16b}, [x2], #16 //AES final-2 block - store result + + rev64 v4.16b, v5.16b //GHASH final-2 block + ldp x6, x7, [x0], #16 //AES final-1 block - load input low & high +#ifdef __AARCH64EB__ + rev x6, x6 + rev x7, x7 +#endif + eor v4.16b, v4.16b, v8.16b //feed in partial tag + + eor x7, x7, x14 //AES final-1 block - round 12 high + + pmull2 v20.1q, v4.2d, v14.2d //GHASH final-2 block - high + mov d22, v4.d[1] //GHASH final-2 block - mid + + pmull v21.1q, v4.1d, v14.1d //GHASH final-2 block - low + eor x6, x6, x13 //AES final-1 block - round 12 low + + fmov d5, x6 //AES final-1 block - mov low + + fmov v5.d[1], x7 //AES final-1 block - mov high + eor v9.16b, v9.16b, v20.16b //GHASH final-2 block - high + eor v22.8b, v22.8b, v4.8b //GHASH final-2 block - mid + + eor v11.16b, v11.16b, v21.16b //GHASH final-2 block - low + + pmull v22.1q, v22.1d, v17.1d //GHASH final-2 block - mid + + movi v8.8b, #0 //suppress further partial tag feed in + + eor v5.16b, v5.16b, v2.16b //AES final-1 block - result + + eor v10.16b, v10.16b, v22.16b //GHASH final-2 block - mid +.L192_enc_blocks_more_than_1: //blocks left > 1 + + st1 { v5.16b}, [x2], #16 //AES final-1 block - store result + + ldp x6, x7, [x0], #16 //AES final block - load input low & high +#ifdef __AARCH64EB__ + rev x6, x6 + rev x7, x7 +#endif + rev64 v4.16b, v5.16b //GHASH final-1 block + + eor x6, x6, x13 //AES final block - round 12 low + eor v4.16b, v4.16b, v8.16b //feed in partial tag + movi v8.8b, #0 //suppress further partial tag feed in + + mov d22, v4.d[1] //GHASH final-1 block - mid + + eor v22.8b, v22.8b, v4.8b //GHASH final-1 block - mid + eor x7, x7, x14 //AES final block - round 12 high + fmov d5, x6 //AES final block - mov low + + pmull2 v20.1q, v4.2d, v13.2d //GHASH final-1 block - high + fmov v5.d[1], x7 //AES final block - mov high + + ins v22.d[1], v22.d[0] //GHASH final-1 block - mid + + eor v9.16b, v9.16b, v20.16b //GHASH final-1 block - high + + pmull v21.1q, v4.1d, v13.1d //GHASH final-1 block - low + + pmull2 v22.1q, v22.2d, v16.2d //GHASH final-1 block - mid + + eor v5.16b, v5.16b, v3.16b //AES final block - result + + eor v11.16b, v11.16b, v21.16b //GHASH final-1 block - low + + eor v10.16b, v10.16b, v22.16b //GHASH final-1 block - mid +.L192_enc_blocks_less_than_1: //blocks left <= 1 + + ld1 { v18.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored +#ifndef __AARCH64EB__ + rev w9, w12 +#else + mov w9, w12 +#endif + and x1, x1, #127 //bit_length %= 128 + + sub x1, x1, #128 //bit_length -= 128 + mvn x14, xzr //rk12_h = 0xffffffffffffffff + + neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) + mvn x13, xzr //rk12_l = 0xffffffffffffffff + + and x1, x1, #127 //bit_length %= 128 + + lsr x14, x14, x1 //rk12_h is mask for top 64b of last block + cmp x1, #64 + + csel x6, x13, x14, lt + csel x7, x14, xzr, lt + + fmov d0, x6 //ctr0b is mask for last block + + fmov v0.d[1], x7 + + and v5.16b, v5.16b, v0.16b //possibly partial last block has zeroes in highest bits + + rev64 v4.16b, v5.16b //GHASH final block + + eor v4.16b, v4.16b, v8.16b //feed in partial tag + + mov d8, v4.d[1] //GHASH final block - mid + + pmull v21.1q, v4.1d, v12.1d //GHASH final block - low + + pmull2 v20.1q, v4.2d, v12.2d //GHASH final block - high + + eor v8.8b, v8.8b, v4.8b //GHASH final block - mid + + eor v11.16b, v11.16b, v21.16b //GHASH final block - low + + eor v9.16b, v9.16b, v20.16b //GHASH final block - high + + pmull v8.1q, v8.1d, v16.1d //GHASH final block - mid + + eor v10.16b, v10.16b, v8.16b //GHASH final block - mid + movi v8.8b, #0xc2 + + eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up + + shl d8, d8, #56 //mod_constant + + bif v5.16b, v18.16b, v0.16b //insert existing bytes in top end of result before storing + + eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up + + pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid + + ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment + + eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid + + eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid + + pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low + + ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment + + eor v11.16b, v11.16b, v9.16b //MODULO - fold into low + str w9, [x16, #12] //store the updated counter + + st1 { v5.16b}, [x2] //store all 16B + + eor v11.16b, v11.16b, v10.16b //MODULO - fold into low + ext v11.16b, v11.16b, v11.16b, #8 + rev64 v11.16b, v11.16b + mov x0, x15 + st1 { v11.16b }, [x3] + + ldp x21, x22, [sp, #16] + ldp x23, x24, [sp, #32] + ldp d8, d9, [sp, #48] + ldp d10, d11, [sp, #64] + ldp d12, d13, [sp, #80] + ldp d14, d15, [sp, #96] + ldp x19, x20, [sp], #112 + ret + +.L192_enc_ret: + mov w0, #0x0 + ret +.size aes_gcm_enc_192_kernel,.-aes_gcm_enc_192_kernel +.globl aes_gcm_dec_192_kernel +.type aes_gcm_dec_192_kernel,%function +.align 4 +aes_gcm_dec_192_kernel: + AARCH64_VALID_CALL_TARGET + cbz x1, .L192_dec_ret + stp x19, x20, [sp, #-112]! + mov x16, x4 + mov x8, x5 + stp x21, x22, [sp, #16] + stp x23, x24, [sp, #32] + stp d8, d9, [sp, #48] + stp d10, d11, [sp, #64] + stp d12, d13, [sp, #80] + stp d14, d15, [sp, #96] + + add x4, x0, x1, lsr #3 //end_input_ptr + ldp x10, x11, [x16] //ctr96_b64, ctr96_t32 +#ifdef __AARCH64EB__ + rev x10, x10 + rev x11, x11 +#endif + ldp x13, x14, [x8, #192] //load rk12 +#ifdef __AARCH64EB__ + ror x13, x13, #32 + ror x14, x14, #32 +#endif + ld1 { v0.16b}, [x16] //special case vector load initial counter so we can start first AES block as quickly as possible + + ld1 {v18.4s}, [x8], #16 //load rk0 + + lsr x5, x1, #3 //byte_len + mov x15, x5 + ld1 {v19.4s}, [x8], #16 //load rk1 + + lsr x12, x11, #32 + orr w11, w11, w11 + fmov d3, x10 //CTR block 3 + + rev w12, w12 //rev_ctr32 + fmov d1, x10 //CTR block 1 + + add w12, w12, #1 //increment rev_ctr32 + ld1 {v20.4s}, [x8], #16 //load rk2 + + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b //AES block 0 - round 0 + rev w9, w12 //CTR block 1 + + add w12, w12, #1 //CTR block 1 + orr x9, x11, x9, lsl #32 //CTR block 1 + ld1 {v21.4s}, [x8], #16 //load rk3 + + fmov v1.d[1], x9 //CTR block 1 + rev w9, w12 //CTR block 2 + add w12, w12, #1 //CTR block 2 + + fmov d2, x10 //CTR block 2 + orr x9, x11, x9, lsl #32 //CTR block 2 + + fmov v2.d[1], x9 //CTR block 2 + rev w9, w12 //CTR block 3 + + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b //AES block 0 - round 1 + orr x9, x11, x9, lsl #32 //CTR block 3 + + fmov v3.d[1], x9 //CTR block 3 + + ld1 {v22.4s}, [x8], #16 //load rk4 + + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b //AES block 0 - round 2 + + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b //AES block 2 - round 0 + ld1 {v23.4s}, [x8], #16 //load rk5 + + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b //AES block 1 - round 0 + ldr q15, [x3, #112] //load h4l | h4h +#ifndef __AARCH64EB__ + ext v15.16b, v15.16b, v15.16b, #8 +#endif + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b //AES block 3 - round 0 + ldr q13, [x3, #64] //load h2l | h2h +#ifndef __AARCH64EB__ + ext v13.16b, v13.16b, v13.16b, #8 +#endif + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b //AES block 2 - round 1 + ldr q14, [x3, #80] //load h3l | h3h +#ifndef __AARCH64EB__ + ext v14.16b, v14.16b, v14.16b, #8 +#endif + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b //AES block 1 - round 1 + + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b //AES block 3 - round 1 + ldr q12, [x3, #32] //load h1l | h1h +#ifndef __AARCH64EB__ + ext v12.16b, v12.16b, v12.16b, #8 +#endif + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b //AES block 2 - round 2 + ld1 {v24.4s}, [x8], #16 //load rk6 + + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b //AES block 0 - round 3 + ld1 {v25.4s}, [x8], #16 //load rk7 + + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b //AES block 1 - round 2 + ld1 {v26.4s}, [x8], #16 //load rk8 + + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b //AES block 3 - round 2 + ld1 {v27.4s}, [x8], #16 //load rk9 + + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b //AES block 2 - round 3 + ld1 { v11.16b}, [x3] + ext v11.16b, v11.16b, v11.16b, #8 + rev64 v11.16b, v11.16b + + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b //AES block 1 - round 3 + add w12, w12, #1 //CTR block 3 + + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b //AES block 3 - round 3 + trn1 v9.2d, v14.2d, v15.2d //h4h | h3h + + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b //AES block 0 - round 4 + ld1 {v28.4s}, [x8], #16 //load rk10 + + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b //AES block 1 - round 4 + trn2 v17.2d, v14.2d, v15.2d //h4l | h3l + + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b //AES block 2 - round 4 + + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b //AES block 3 - round 4 + trn2 v16.2d, v12.2d, v13.2d //h2l | h1l + + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b //AES block 0 - round 5 + ld1 {v29.4s}, [x8], #16 //load rk11 + + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b //AES block 1 - round 5 + + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b //AES block 2 - round 5 + + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b //AES block 3 - round 5 + + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b //AES block 0 - round 6 + + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b //AES block 2 - round 6 + + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b //AES block 3 - round 6 + + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b //AES block 0 - round 7 + + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b //AES block 2 - round 7 + + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b //AES block 3 - round 7 + + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b //AES block 1 - round 6 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 8 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 8 + + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b //AES block 1 - round 7 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 9 + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 9 + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 8 + sub x5, x5, #1 //byte_len - 1 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 8 + and x5, x5, #0xffffffffffffffc0 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 3 - round 10 + add x5, x5, x0 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 9 + cmp x0, x5 //check if we have <= 4 blocks + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 9 + trn1 v8.2d, v12.2d, v13.2d //h2h | h1h + + aese v3.16b, v29.16b //AES block 3 - round 11 + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 2 - round 10 + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 1 - round 10 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 0 - round 10 + eor v16.16b, v16.16b, v8.16b //h2k | h1k + + aese v2.16b, v29.16b //AES block 2 - round 11 + + aese v1.16b, v29.16b //AES block 1 - round 11 + eor v17.16b, v17.16b, v9.16b //h4k | h3k + + aese v0.16b, v29.16b //AES block 0 - round 11 + b.ge .L192_dec_tail //handle tail + + ld1 {v4.16b, v5.16b}, [x0], #32 //AES block 0,1 - load ciphertext + + eor v1.16b, v5.16b, v1.16b //AES block 1 - result + + eor v0.16b, v4.16b, v0.16b //AES block 0 - result + rev w9, w12 //CTR block 4 + ld1 {v6.16b, v7.16b}, [x0], #32 //AES block 2,3 - load ciphertext + + mov x19, v1.d[0] //AES block 1 - mov low + + mov x20, v1.d[1] //AES block 1 - mov high + + mov x6, v0.d[0] //AES block 0 - mov low + orr x9, x11, x9, lsl #32 //CTR block 4 + add w12, w12, #1 //CTR block 4 + + mov x7, v0.d[1] //AES block 0 - mov high + rev64 v4.16b, v4.16b //GHASH block 0 + + fmov d0, x10 //CTR block 4 + rev64 v5.16b, v5.16b //GHASH block 1 + cmp x0, x5 //check if we have <= 8 blocks + + eor x19, x19, x13 //AES block 1 - round 12 low +#ifdef __AARCH64EB__ + rev x19, x19 +#endif + fmov v0.d[1], x9 //CTR block 4 + rev w9, w12 //CTR block 5 + + orr x9, x11, x9, lsl #32 //CTR block 5 + fmov d1, x10 //CTR block 5 + eor x20, x20, x14 //AES block 1 - round 12 high +#ifdef __AARCH64EB__ + rev x20, x20 +#endif + add w12, w12, #1 //CTR block 5 + fmov v1.d[1], x9 //CTR block 5 + eor x6, x6, x13 //AES block 0 - round 12 low +#ifdef __AARCH64EB__ + rev x6, x6 +#endif + rev w9, w12 //CTR block 6 + eor x7, x7, x14 //AES block 0 - round 12 high +#ifdef __AARCH64EB__ + rev x7, x7 +#endif + stp x6, x7, [x2], #16 //AES block 0 - store result + orr x9, x11, x9, lsl #32 //CTR block 6 + + stp x19, x20, [x2], #16 //AES block 1 - store result + + add w12, w12, #1 //CTR block 6 + eor v2.16b, v6.16b, v2.16b //AES block 2 - result + b.ge .L192_dec_prepretail //do prepretail + +.L192_dec_main_loop: //main loop start + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 0 + ext v11.16b, v11.16b, v11.16b, #8 //PRE 0 + + pmull v31.1q, v5.1d, v14.1d //GHASH block 4k+1 - low + mov x21, v2.d[0] //AES block 4k+2 - mov low + + mov x22, v2.d[1] //AES block 4k+2 - mov high + eor v3.16b, v7.16b, v3.16b //AES block 4k+3 - result + rev64 v7.16b, v7.16b //GHASH block 4k+3 + + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 1 + fmov d2, x10 //CTR block 4k+6 + + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 0 + eor v4.16b, v4.16b, v11.16b //PRE 1 + + pmull2 v30.1q, v5.2d, v14.2d //GHASH block 4k+1 - high + fmov v2.d[1], x9 //CTR block 4k+6 + + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 2 + mov x24, v3.d[1] //AES block 4k+3 - mov high + + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 1 + mov x23, v3.d[0] //AES block 4k+3 - mov low + + pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high + fmov d3, x10 //CTR block 4k+7 + mov d8, v4.d[1] //GHASH block 4k - mid + + pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low + mov d10, v17.d[1] //GHASH block 4k - mid + rev w9, w12 //CTR block 4k+7 + + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 0 + orr x9, x11, x9, lsl #32 //CTR block 4k+7 + + fmov v3.d[1], x9 //CTR block 4k+7 + eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid + mov d4, v5.d[1] //GHASH block 4k+1 - mid + + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 3 + + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 2 + eor x22, x22, x14 //AES block 4k+2 - round 12 high +#ifdef __AARCH64EB__ + rev x22, x22 +#endif + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 1 + eor v4.8b, v4.8b, v5.8b //GHASH block 4k+1 - mid + + pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid + + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 0 + rev64 v6.16b, v6.16b //GHASH block 4k+2 + + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 2 + + pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid + eor v11.16b, v11.16b, v31.16b //GHASH block 4k+1 - low + eor x21, x21, x13 //AES block 4k+2 - round 12 low +#ifdef __AARCH64EB__ + rev x21, x21 +#endif + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 4 + + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 3 + + eor v10.16b, v10.16b, v4.16b //GHASH block 4k+1 - mid + mov d31, v6.d[1] //GHASH block 4k+2 - mid + + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 1 + eor v9.16b, v9.16b, v30.16b //GHASH block 4k+1 - high + + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 4 + + pmull2 v30.1q, v6.2d, v13.2d //GHASH block 4k+2 - high + eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid + + pmull v8.1q, v6.1d, v13.1d //GHASH block 4k+2 - low + + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 5 + + eor v9.16b, v9.16b, v30.16b //GHASH block 4k+2 - high + mov d30, v7.d[1] //GHASH block 4k+3 - mid + + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 5 + + pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high + + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 2 + eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid + + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 6 + + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 6 + ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid + + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 3 + + pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid + eor v11.16b, v11.16b, v8.16b //GHASH block 4k+2 - low + + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 7 + + pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid + eor v9.16b, v9.16b, v5.16b //GHASH block 4k+3 - high + + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 7 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 8 + movi v8.8b, #0xc2 + + pmull v6.1q, v7.1d, v12.1d //GHASH block 4k+3 - low + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 8 + eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid + + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 3 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 9 + eor v11.16b, v11.16b, v6.16b //GHASH block 4k+3 - low + + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 4 + + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 4 + eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 10 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 9 + eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up + + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 5 + + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 5 + shl d8, d8, #56 //mod_constant + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 10 + + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 6 + ld1 {v4.16b}, [x0], #16 //AES block 4k+4 - load ciphertext + + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 6 + eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up + + pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid + ld1 {v5.16b}, [x0], #16 //AES block 4k+5 - load ciphertext + eor x23, x23, x13 //AES block 4k+3 - round 12 low +#ifdef __AARCH64EB__ + rev x23, x23 +#endif + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 7 + ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment + + aese v0.16b, v29.16b //AES block 4k+4 - round 11 + add w12, w12, #1 //CTR block 4k+7 + + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 7 + eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 8 + ld1 {v6.16b}, [x0], #16 //AES block 4k+6 - load ciphertext + + aese v1.16b, v29.16b //AES block 4k+5 - round 11 + ld1 {v7.16b}, [x0], #16 //AES block 4k+7 - load ciphertext + rev w9, w12 //CTR block 4k+8 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 8 + stp x21, x22, [x2], #16 //AES block 4k+2 - store result + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 9 + eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid + + cmp x0, x5 //.LOOP CONTROL + + eor v0.16b, v4.16b, v0.16b //AES block 4k+4 - result + eor x24, x24, x14 //AES block 4k+3 - round 12 high +#ifdef __AARCH64EB__ + rev x24, x24 +#endif + eor v1.16b, v5.16b, v1.16b //AES block 4k+5 - result + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 10 + orr x9, x11, x9, lsl #32 //CTR block 4k+8 + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 9 + + pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low + mov x19, v1.d[0] //AES block 4k+5 - mov low + + mov x6, v0.d[0] //AES block 4k+4 - mov low + stp x23, x24, [x2], #16 //AES block 4k+3 - store result + rev64 v5.16b, v5.16b //GHASH block 4k+5 + + aese v2.16b, v29.16b //AES block 4k+6 - round 11 + mov x7, v0.d[1] //AES block 4k+4 - mov high + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 10 + mov x20, v1.d[1] //AES block 4k+5 - mov high + + fmov d0, x10 //CTR block 4k+8 + add w12, w12, #1 //CTR block 4k+8 + ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment + + eor v2.16b, v6.16b, v2.16b //AES block 4k+6 - result + fmov v0.d[1], x9 //CTR block 4k+8 + rev w9, w12 //CTR block 4k+9 + + eor x6, x6, x13 //AES block 4k+4 - round 12 low +#ifdef __AARCH64EB__ + rev x6, x6 +#endif + orr x9, x11, x9, lsl #32 //CTR block 4k+9 + eor v11.16b, v11.16b, v8.16b //MODULO - fold into low + + fmov d1, x10 //CTR block 4k+9 + add w12, w12, #1 //CTR block 4k+9 + eor x19, x19, x13 //AES block 4k+5 - round 12 low +#ifdef __AARCH64EB__ + rev x19, x19 +#endif + fmov v1.d[1], x9 //CTR block 4k+9 + rev w9, w12 //CTR block 4k+10 + eor x20, x20, x14 //AES block 4k+5 - round 12 high +#ifdef __AARCH64EB__ + rev x20, x20 +#endif + eor x7, x7, x14 //AES block 4k+4 - round 12 high +#ifdef __AARCH64EB__ + rev x7, x7 +#endif + stp x6, x7, [x2], #16 //AES block 4k+4 - store result + eor v11.16b, v11.16b, v10.16b //MODULO - fold into low + + add w12, w12, #1 //CTR block 4k+10 + rev64 v4.16b, v4.16b //GHASH block 4k+4 + orr x9, x11, x9, lsl #32 //CTR block 4k+10 + + aese v3.16b, v29.16b //AES block 4k+7 - round 11 + stp x19, x20, [x2], #16 //AES block 4k+5 - store result + b.lt .L192_dec_main_loop + +.L192_dec_prepretail: //PREPRETAIL + mov x22, v2.d[1] //AES block 4k+2 - mov high + ext v11.16b, v11.16b, v11.16b, #8 //PRE 0 + eor v3.16b, v7.16b, v3.16b //AES block 4k+3 - result + + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 0 + mov x21, v2.d[0] //AES block 4k+2 - mov low + + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 0 + mov d10, v17.d[1] //GHASH block 4k - mid + + eor v4.16b, v4.16b, v11.16b //PRE 1 + fmov d2, x10 //CTR block 4k+6 + + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 1 + mov x23, v3.d[0] //AES block 4k+3 - mov low + + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 1 + mov x24, v3.d[1] //AES block 4k+3 - mov high + + pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low + mov d8, v4.d[1] //GHASH block 4k - mid + fmov d3, x10 //CTR block 4k+7 + + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 2 + rev64 v6.16b, v6.16b //GHASH block 4k+2 + + pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high + fmov v2.d[1], x9 //CTR block 4k+6 + rev w9, w12 //CTR block 4k+7 + + orr x9, x11, x9, lsl #32 //CTR block 4k+7 + eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid + mov d4, v5.d[1] //GHASH block 4k+1 - mid + + pmull v31.1q, v5.1d, v14.1d //GHASH block 4k+1 - low + eor x24, x24, x14 //AES block 4k+3 - round 12 high +#ifdef __AARCH64EB__ + rev x24, x24 +#endif + fmov v3.d[1], x9 //CTR block 4k+7 + + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 2 + eor x21, x21, x13 //AES block 4k+2 - round 12 low +#ifdef __AARCH64EB__ + rev x21, x21 +#endif + pmull2 v30.1q, v5.2d, v14.2d //GHASH block 4k+1 - high + eor x22, x22, x14 //AES block 4k+2 - round 12 high +#ifdef __AARCH64EB__ + rev x22, x22 +#endif + eor v4.8b, v4.8b, v5.8b //GHASH block 4k+1 - mid + + pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid + eor x23, x23, x13 //AES block 4k+3 - round 12 low +#ifdef __AARCH64EB__ + rev x23, x23 +#endif + stp x21, x22, [x2], #16 //AES block 4k+2 - store result + + rev64 v7.16b, v7.16b //GHASH block 4k+3 + stp x23, x24, [x2], #16 //AES block 4k+3 - store result + + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 0 + eor v9.16b, v9.16b, v30.16b //GHASH block 4k+1 - high + + pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid + add w12, w12, #1 //CTR block 4k+7 + + pmull2 v30.1q, v6.2d, v13.2d //GHASH block 4k+2 - high + eor v11.16b, v11.16b, v31.16b //GHASH block 4k+1 - low + + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 0 + + eor v10.16b, v10.16b, v4.16b //GHASH block 4k+1 - mid + mov d31, v6.d[1] //GHASH block 4k+2 - mid + + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 1 + + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 1 + eor v9.16b, v9.16b, v30.16b //GHASH block 4k+2 - high + + eor v31.8b, v31.8b, v6.8b //GHASH block 4k+2 - mid + + pmull v8.1q, v6.1d, v13.1d //GHASH block 4k+2 - low + + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 2 + mov d30, v7.d[1] //GHASH block 4k+3 - mid + + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 2 + ins v31.d[1], v31.d[0] //GHASH block 4k+2 - mid + + pmull v6.1q, v7.1d, v12.1d //GHASH block 4k+3 - low + + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 3 + eor v30.8b, v30.8b, v7.8b //GHASH block 4k+3 - mid + + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 3 + + pmull2 v31.1q, v31.2d, v16.2d //GHASH block 4k+2 - mid + eor v11.16b, v11.16b, v8.16b //GHASH block 4k+2 - low + + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 4 + + pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high + movi v8.8b, #0xc2 + + pmull v30.1q, v30.1d, v16.1d //GHASH block 4k+3 - mid + + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 3 + + shl d8, d8, #56 //mod_constant + eor v9.16b, v9.16b, v5.16b //GHASH block 4k+3 - high + + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 5 + eor v10.16b, v10.16b, v31.16b //GHASH block 4k+2 - mid + + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 4 + + pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid + eor v11.16b, v11.16b, v6.16b //GHASH block 4k+3 - low + + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 6 + + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 3 + eor v10.16b, v10.16b, v30.16b //GHASH block 4k+3 - mid + + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 5 + + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 7 + eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up + + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 4 + + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 6 + ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 8 + + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 5 + eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up + + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 4 + + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 7 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 9 + + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 5 + + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 6 + eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 10 + + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 6 + + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 7 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 8 + eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid + + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 7 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 8 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 9 + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 8 + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 9 + + pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 9 + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 10 + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 10 + ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 10 + + aese v0.16b, v29.16b + eor v11.16b, v11.16b, v8.16b //MODULO - fold into low + + aese v2.16b, v29.16b + + aese v1.16b, v29.16b + + aese v3.16b, v29.16b + + eor v11.16b, v11.16b, v10.16b //MODULO - fold into low +.L192_dec_tail: //TAIL + + sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process + ld1 { v5.16b}, [x0], #16 //AES block 4k+4 - load ciphertext + + eor v0.16b, v5.16b, v0.16b //AES block 4k+4 - result + + mov x7, v0.d[1] //AES block 4k+4 - mov high + + mov x6, v0.d[0] //AES block 4k+4 - mov low + + ext v8.16b, v11.16b, v11.16b, #8 //prepare final partial tag + + cmp x5, #48 + + eor x7, x7, x14 //AES block 4k+4 - round 12 high +#ifdef __AARCH64EB__ + rev x7, x7 +#endif + eor x6, x6, x13 //AES block 4k+4 - round 12 low +#ifdef __AARCH64EB__ + rev x6, x6 +#endif + b.gt .L192_dec_blocks_more_than_3 + + movi v11.8b, #0 + movi v9.8b, #0 + + mov v3.16b, v2.16b + mov v2.16b, v1.16b + sub w12, w12, #1 + + movi v10.8b, #0 + cmp x5, #32 + b.gt .L192_dec_blocks_more_than_2 + + mov v3.16b, v1.16b + cmp x5, #16 + sub w12, w12, #1 + + b.gt .L192_dec_blocks_more_than_1 + + sub w12, w12, #1 + b .L192_dec_blocks_less_than_1 +.L192_dec_blocks_more_than_3: //blocks left > 3 + rev64 v4.16b, v5.16b //GHASH final-3 block + ld1 { v5.16b}, [x0], #16 //AES final-2 block - load ciphertext + + stp x6, x7, [x2], #16 //AES final-3 block - store result + + eor v4.16b, v4.16b, v8.16b //feed in partial tag + + eor v0.16b, v5.16b, v1.16b //AES final-2 block - result + + pmull v11.1q, v4.1d, v15.1d //GHASH final-3 block - low + mov x6, v0.d[0] //AES final-2 block - mov low + mov d22, v4.d[1] //GHASH final-3 block - mid + + mov x7, v0.d[1] //AES final-2 block - mov high + + mov d10, v17.d[1] //GHASH final-3 block - mid + eor v22.8b, v22.8b, v4.8b //GHASH final-3 block - mid + + pmull2 v9.1q, v4.2d, v15.2d //GHASH final-3 block - high + + eor x6, x6, x13 //AES final-2 block - round 12 low +#ifdef __AARCH64EB__ + rev x6, x6 +#endif + movi v8.8b, #0 //suppress further partial tag feed in + + pmull v10.1q, v22.1d, v10.1d //GHASH final-3 block - mid + eor x7, x7, x14 //AES final-2 block - round 12 high +#ifdef __AARCH64EB__ + rev x7, x7 +#endif +.L192_dec_blocks_more_than_2: //blocks left > 2 + + rev64 v4.16b, v5.16b //GHASH final-2 block + ld1 { v5.16b}, [x0], #16 //AES final-1 block - load ciphertext + + eor v4.16b, v4.16b, v8.16b //feed in partial tag + + movi v8.8b, #0 //suppress further partial tag feed in + + eor v0.16b, v5.16b, v2.16b //AES final-1 block - result + + mov d22, v4.d[1] //GHASH final-2 block - mid + + pmull v21.1q, v4.1d, v14.1d //GHASH final-2 block - low + + stp x6, x7, [x2], #16 //AES final-2 block - store result + + eor v22.8b, v22.8b, v4.8b //GHASH final-2 block - mid + mov x7, v0.d[1] //AES final-1 block - mov high + + eor v11.16b, v11.16b, v21.16b //GHASH final-2 block - low + mov x6, v0.d[0] //AES final-1 block - mov low + + pmull2 v20.1q, v4.2d, v14.2d //GHASH final-2 block - high + + pmull v22.1q, v22.1d, v17.1d //GHASH final-2 block - mid + + eor v9.16b, v9.16b, v20.16b //GHASH final-2 block - high + eor x7, x7, x14 //AES final-1 block - round 12 high +#ifdef __AARCH64EB__ + rev x7, x7 +#endif + eor x6, x6, x13 //AES final-1 block - round 12 low +#ifdef __AARCH64EB__ + rev x6, x6 +#endif + eor v10.16b, v10.16b, v22.16b //GHASH final-2 block - mid +.L192_dec_blocks_more_than_1: //blocks left > 1 + + rev64 v4.16b, v5.16b //GHASH final-1 block + + eor v4.16b, v4.16b, v8.16b //feed in partial tag + ld1 { v5.16b}, [x0], #16 //AES final block - load ciphertext + + mov d22, v4.d[1] //GHASH final-1 block - mid + + pmull2 v20.1q, v4.2d, v13.2d //GHASH final-1 block - high + + eor v0.16b, v5.16b, v3.16b //AES final block - result + stp x6, x7, [x2], #16 //AES final-1 block - store result + + eor v22.8b, v22.8b, v4.8b //GHASH final-1 block - mid + + eor v9.16b, v9.16b, v20.16b //GHASH final-1 block - high + + pmull v21.1q, v4.1d, v13.1d //GHASH final-1 block - low + mov x7, v0.d[1] //AES final block - mov high + + ins v22.d[1], v22.d[0] //GHASH final-1 block - mid + mov x6, v0.d[0] //AES final block - mov low + + pmull2 v22.1q, v22.2d, v16.2d //GHASH final-1 block - mid + + movi v8.8b, #0 //suppress further partial tag feed in + eor v11.16b, v11.16b, v21.16b //GHASH final-1 block - low + eor x7, x7, x14 //AES final block - round 12 high +#ifdef __AARCH64EB__ + rev x7, x7 +#endif + eor x6, x6, x13 //AES final block - round 12 low +#ifdef __AARCH64EB__ + rev x6, x6 +#endif + eor v10.16b, v10.16b, v22.16b //GHASH final-1 block - mid +.L192_dec_blocks_less_than_1: //blocks left <= 1 + + mvn x13, xzr //rk12_l = 0xffffffffffffffff + ldp x4, x5, [x2] //load existing bytes we need to not overwrite + and x1, x1, #127 //bit_length %= 128 + + sub x1, x1, #128 //bit_length -= 128 + + neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) + + and x1, x1, #127 //bit_length %= 128 + mvn x14, xzr //rk12_h = 0xffffffffffffffff + + lsr x14, x14, x1 //rk12_h is mask for top 64b of last block + cmp x1, #64 + + csel x9, x13, x14, lt + csel x10, x14, xzr, lt + + fmov d0, x9 //ctr0b is mask for last block + and x6, x6, x9 + bic x4, x4, x9 //mask out low existing bytes + + orr x6, x6, x4 + mov v0.d[1], x10 +#ifndef __AARCH64EB__ + rev w9, w12 +#else + mov w9, w12 +#endif + + and v5.16b, v5.16b, v0.16b //possibly partial last block has zeroes in highest bits + str w9, [x16, #12] //store the updated counter + + rev64 v4.16b, v5.16b //GHASH final block + + eor v4.16b, v4.16b, v8.16b //feed in partial tag + bic x5, x5, x10 //mask out high existing bytes + + and x7, x7, x10 + + pmull2 v20.1q, v4.2d, v12.2d //GHASH final block - high + mov d8, v4.d[1] //GHASH final block - mid + + pmull v21.1q, v4.1d, v12.1d //GHASH final block - low + + eor v8.8b, v8.8b, v4.8b //GHASH final block - mid + + eor v9.16b, v9.16b, v20.16b //GHASH final block - high + + pmull v8.1q, v8.1d, v16.1d //GHASH final block - mid + + eor v11.16b, v11.16b, v21.16b //GHASH final block - low + + eor v10.16b, v10.16b, v8.16b //GHASH final block - mid + movi v8.8b, #0xc2 + + eor v30.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up + + shl d8, d8, #56 //mod_constant + + eor v10.16b, v10.16b, v30.16b //MODULO - karatsuba tidy up + + pmull v31.1q, v9.1d, v8.1d //MODULO - top 64b align with mid + orr x7, x7, x5 + stp x6, x7, [x2] + + ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment + + eor v10.16b, v10.16b, v31.16b //MODULO - fold into mid + + eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid + + pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low + + eor v11.16b, v11.16b, v8.16b //MODULO - fold into low + + ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment + + eor v11.16b, v11.16b, v10.16b //MODULO - fold into low + ext v11.16b, v11.16b, v11.16b, #8 + rev64 v11.16b, v11.16b + mov x0, x15 + st1 { v11.16b }, [x3] + + ldp x21, x22, [sp, #16] + ldp x23, x24, [sp, #32] + ldp d8, d9, [sp, #48] + ldp d10, d11, [sp, #64] + ldp d12, d13, [sp, #80] + ldp d14, d15, [sp, #96] + ldp x19, x20, [sp], #112 + ret + +.L192_dec_ret: + mov w0, #0x0 + ret +.size aes_gcm_dec_192_kernel,.-aes_gcm_dec_192_kernel +.globl aes_gcm_enc_256_kernel +.type aes_gcm_enc_256_kernel,%function +.align 4 +aes_gcm_enc_256_kernel: + AARCH64_VALID_CALL_TARGET + cbz x1, .L256_enc_ret + stp x19, x20, [sp, #-112]! + mov x16, x4 + mov x8, x5 + stp x21, x22, [sp, #16] + stp x23, x24, [sp, #32] + stp d8, d9, [sp, #48] + stp d10, d11, [sp, #64] + stp d12, d13, [sp, #80] + stp d14, d15, [sp, #96] + + add x4, x0, x1, lsr #3 //end_input_ptr + lsr x5, x1, #3 //byte_len + mov x15, x5 + ldp x10, x11, [x16] //ctr96_b64, ctr96_t32 +#ifdef __AARCH64EB__ + rev x10, x10 + rev x11, x11 +#endif + ldp x13, x14, [x8, #224] //load rk14 +#ifdef __AARCH64EB__ + ror x13, x13, #32 + ror x14, x14, #32 +#endif + ld1 { v0.16b}, [x16] //special case vector load initial counter so we can start first AES block as quickly as possible + sub x5, x5, #1 //byte_len - 1 + + ld1 {v18.4s}, [x8], #16 //load rk0 + and x5, x5, #0xffffffffffffffc0 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) + + ld1 {v19.4s}, [x8], #16 //load rk1 + add x5, x5, x0 + + lsr x12, x11, #32 + fmov d2, x10 //CTR block 2 + orr w11, w11, w11 + + rev w12, w12 //rev_ctr32 + cmp x0, x5 //check if we have <= 4 blocks + fmov d1, x10 //CTR block 1 + + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b //AES block 0 - round 0 + add w12, w12, #1 //increment rev_ctr32 + + rev w9, w12 //CTR block 1 + fmov d3, x10 //CTR block 3 + + orr x9, x11, x9, lsl #32 //CTR block 1 + add w12, w12, #1 //CTR block 1 + ld1 {v20.4s}, [x8], #16 //load rk2 + + fmov v1.d[1], x9 //CTR block 1 + rev w9, w12 //CTR block 2 + add w12, w12, #1 //CTR block 2 + + orr x9, x11, x9, lsl #32 //CTR block 2 + ld1 {v21.4s}, [x8], #16 //load rk3 + + fmov v2.d[1], x9 //CTR block 2 + rev w9, w12 //CTR block 3 + + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b //AES block 0 - round 1 + orr x9, x11, x9, lsl #32 //CTR block 3 + + fmov v3.d[1], x9 //CTR block 3 + + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b //AES block 1 - round 0 + ld1 {v22.4s}, [x8], #16 //load rk4 + + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b //AES block 0 - round 2 + ld1 {v23.4s}, [x8], #16 //load rk5 + + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b //AES block 2 - round 0 + ld1 {v24.4s}, [x8], #16 //load rk6 + + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b //AES block 1 - round 1 + ldr q14, [x3, #80] //load h3l | h3h +#ifndef __AARCH64EB__ + ext v14.16b, v14.16b, v14.16b, #8 +#endif + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b //AES block 3 - round 0 + ld1 {v25.4s}, [x8], #16 //load rk7 + + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b //AES block 2 - round 1 + ld1 {v26.4s}, [x8], #16 //load rk8 + + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b //AES block 1 - round 2 + ldr q13, [x3, #64] //load h2l | h2h +#ifndef __AARCH64EB__ + ext v13.16b, v13.16b, v13.16b, #8 +#endif + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b //AES block 3 - round 1 + ld1 {v27.4s}, [x8], #16 //load rk9 + + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b //AES block 2 - round 2 + ldr q15, [x3, #112] //load h4l | h4h +#ifndef __AARCH64EB__ + ext v15.16b, v15.16b, v15.16b, #8 +#endif + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b //AES block 1 - round 3 + ld1 {v28.4s}, [x8], #16 //load rk10 + + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b //AES block 3 - round 2 + ld1 {v29.4s}, [x8], #16 //load rk11 + + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b //AES block 2 - round 3 + add w12, w12, #1 //CTR block 3 + + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b //AES block 0 - round 3 + + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b //AES block 3 - round 3 + ld1 { v11.16b}, [x3] + ext v11.16b, v11.16b, v11.16b, #8 + rev64 v11.16b, v11.16b + + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b //AES block 2 - round 4 + + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b //AES block 0 - round 4 + + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b //AES block 1 - round 4 + + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b //AES block 3 - round 4 + + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b //AES block 0 - round 5 + + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b //AES block 1 - round 5 + + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b //AES block 3 - round 5 + + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b //AES block 2 - round 5 + + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b //AES block 1 - round 6 + trn2 v17.2d, v14.2d, v15.2d //h4l | h3l + + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b //AES block 3 - round 6 + ld1 {v30.4s}, [x8], #16 //load rk12 + + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b //AES block 0 - round 6 + ldr q12, [x3, #32] //load h1l | h1h +#ifndef __AARCH64EB__ + ext v12.16b, v12.16b, v12.16b, #8 +#endif + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b //AES block 2 - round 6 + ld1 {v31.4s}, [x8], #16 //load rk13 + + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b //AES block 1 - round 7 + trn1 v9.2d, v14.2d, v15.2d //h4h | h3h + + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b //AES block 0 - round 7 + + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b //AES block 2 - round 7 + + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b //AES block 3 - round 7 + trn2 v16.2d, v12.2d, v13.2d //h2l | h1l + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 8 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 8 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 8 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 9 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 9 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 8 + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 1 - round 10 + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 9 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 9 + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 2 - round 10 + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 3 - round 10 + + aese v1.16b, v29.16b + aesmc v1.16b, v1.16b //AES block 1 - round 11 + + aese v2.16b, v29.16b + aesmc v2.16b, v2.16b //AES block 2 - round 11 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 0 - round 10 + + aese v1.16b, v30.16b + aesmc v1.16b, v1.16b //AES block 1 - round 12 + + aese v2.16b, v30.16b + aesmc v2.16b, v2.16b //AES block 2 - round 12 + + aese v0.16b, v29.16b + aesmc v0.16b, v0.16b //AES block 0 - round 11 + eor v17.16b, v17.16b, v9.16b //h4k | h3k + + aese v3.16b, v29.16b + aesmc v3.16b, v3.16b //AES block 3 - round 11 + + aese v2.16b, v31.16b //AES block 2 - round 13 + trn1 v8.2d, v12.2d, v13.2d //h2h | h1h + + aese v0.16b, v30.16b + aesmc v0.16b, v0.16b //AES block 0 - round 12 + + aese v3.16b, v30.16b + aesmc v3.16b, v3.16b //AES block 3 - round 12 + + aese v1.16b, v31.16b //AES block 1 - round 13 + + aese v0.16b, v31.16b //AES block 0 - round 13 + + aese v3.16b, v31.16b //AES block 3 - round 13 + eor v16.16b, v16.16b, v8.16b //h2k | h1k + b.ge .L256_enc_tail //handle tail + + ldp x19, x20, [x0, #16] //AES block 1 - load plaintext +#ifdef __AARCH64EB__ + rev x19, x19 + rev x20, x20 +#endif + rev w9, w12 //CTR block 4 + ldp x6, x7, [x0, #0] //AES block 0 - load plaintext +#ifdef __AARCH64EB__ + rev x6, x6 + rev x7, x7 +#endif + ldp x23, x24, [x0, #48] //AES block 3 - load plaintext +#ifdef __AARCH64EB__ + rev x23, x23 + rev x24, x24 +#endif + ldp x21, x22, [x0, #32] //AES block 2 - load plaintext +#ifdef __AARCH64EB__ + rev x21, x21 + rev x22, x22 +#endif + add x0, x0, #64 //AES input_ptr update + + eor x19, x19, x13 //AES block 1 - round 14 low + eor x20, x20, x14 //AES block 1 - round 14 high + + fmov d5, x19 //AES block 1 - mov low + eor x6, x6, x13 //AES block 0 - round 14 low + + eor x7, x7, x14 //AES block 0 - round 14 high + eor x24, x24, x14 //AES block 3 - round 14 high + fmov d4, x6 //AES block 0 - mov low + + cmp x0, x5 //check if we have <= 8 blocks + fmov v4.d[1], x7 //AES block 0 - mov high + eor x23, x23, x13 //AES block 3 - round 14 low + + eor x21, x21, x13 //AES block 2 - round 14 low + fmov v5.d[1], x20 //AES block 1 - mov high + + fmov d6, x21 //AES block 2 - mov low + add w12, w12, #1 //CTR block 4 + + orr x9, x11, x9, lsl #32 //CTR block 4 + fmov d7, x23 //AES block 3 - mov low + eor x22, x22, x14 //AES block 2 - round 14 high + + fmov v6.d[1], x22 //AES block 2 - mov high + + eor v4.16b, v4.16b, v0.16b //AES block 0 - result + fmov d0, x10 //CTR block 4 + + fmov v0.d[1], x9 //CTR block 4 + rev w9, w12 //CTR block 5 + add w12, w12, #1 //CTR block 5 + + eor v5.16b, v5.16b, v1.16b //AES block 1 - result + fmov d1, x10 //CTR block 5 + orr x9, x11, x9, lsl #32 //CTR block 5 + + fmov v1.d[1], x9 //CTR block 5 + rev w9, w12 //CTR block 6 + st1 { v4.16b}, [x2], #16 //AES block 0 - store result + + fmov v7.d[1], x24 //AES block 3 - mov high + orr x9, x11, x9, lsl #32 //CTR block 6 + eor v6.16b, v6.16b, v2.16b //AES block 2 - result + + st1 { v5.16b}, [x2], #16 //AES block 1 - store result + + add w12, w12, #1 //CTR block 6 + fmov d2, x10 //CTR block 6 + + fmov v2.d[1], x9 //CTR block 6 + st1 { v6.16b}, [x2], #16 //AES block 2 - store result + rev w9, w12 //CTR block 7 + + orr x9, x11, x9, lsl #32 //CTR block 7 + + eor v7.16b, v7.16b, v3.16b //AES block 3 - result + st1 { v7.16b}, [x2], #16 //AES block 3 - store result + b.ge .L256_enc_prepretail //do prepretail + +.L256_enc_main_loop: //main loop start + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 0 + rev64 v4.16b, v4.16b //GHASH block 4k (only t0 is free) + + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 0 + fmov d3, x10 //CTR block 4k+3 + + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 0 + ext v11.16b, v11.16b, v11.16b, #8 //PRE 0 + + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 1 + fmov v3.d[1], x9 //CTR block 4k+3 + + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 1 + ldp x23, x24, [x0, #48] //AES block 4k+7 - load plaintext +#ifdef __AARCH64EB__ + rev x23, x23 + rev x24, x24 +#endif + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 1 + ldp x21, x22, [x0, #32] //AES block 4k+6 - load plaintext +#ifdef __AARCH64EB__ + rev x21, x21 + rev x22, x22 +#endif + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 2 + eor v4.16b, v4.16b, v11.16b //PRE 1 + + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 2 + + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 0 + eor x23, x23, x13 //AES block 4k+7 - round 14 low + + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 3 + mov d10, v17.d[1] //GHASH block 4k - mid + + pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high + eor x22, x22, x14 //AES block 4k+6 - round 14 high + mov d8, v4.d[1] //GHASH block 4k - mid + + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 1 + rev64 v5.16b, v5.16b //GHASH block 4k+1 (t0 and t1 free) + + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 4 + + pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low + eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid + + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 2 + + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 5 + rev64 v7.16b, v7.16b //GHASH block 4k+3 (t0, t1, t2 and t3 free) + + pmull2 v4.1q, v5.2d, v14.2d //GHASH block 4k+1 - high + + pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid + rev64 v6.16b, v6.16b //GHASH block 4k+2 (t0, t1, and t2 free) + + pmull v8.1q, v5.1d, v14.1d //GHASH block 4k+1 - low + + eor v9.16b, v9.16b, v4.16b //GHASH block 4k+1 - high + mov d4, v5.d[1] //GHASH block 4k+1 - mid + + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 3 + + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 2 + eor v11.16b, v11.16b, v8.16b //GHASH block 4k+1 - low + + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 3 + + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 4 + mov d8, v6.d[1] //GHASH block 4k+2 - mid + + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 3 + eor v4.8b, v4.8b, v5.8b //GHASH block 4k+1 - mid + + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 4 + + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 6 + eor v8.8b, v8.8b, v6.8b //GHASH block 4k+2 - mid + + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 4 + + pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid + + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 7 + + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 5 + ins v8.d[1], v8.d[0] //GHASH block 4k+2 - mid + + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 5 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 8 + + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 5 + + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 6 + eor v10.16b, v10.16b, v4.16b //GHASH block 4k+1 - mid + + pmull2 v4.1q, v6.2d, v13.2d //GHASH block 4k+2 - high + + pmull v5.1q, v6.1d, v13.1d //GHASH block 4k+2 - low + + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 7 + + pmull v6.1q, v7.1d, v12.1d //GHASH block 4k+3 - low + eor v9.16b, v9.16b, v4.16b //GHASH block 4k+2 - high + + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 6 + ldp x19, x20, [x0, #16] //AES block 4k+5 - load plaintext +#ifdef __AARCH64EB__ + rev x19, x19 + rev x20, x20 +#endif + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 8 + mov d4, v7.d[1] //GHASH block 4k+3 - mid + + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 6 + eor v11.16b, v11.16b, v5.16b //GHASH block 4k+2 - low + + pmull2 v8.1q, v8.2d, v16.2d //GHASH block 4k+2 - mid + + pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high + eor v4.8b, v4.8b, v7.8b //GHASH block 4k+3 - mid + + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 7 + eor x19, x19, x13 //AES block 4k+5 - round 14 low + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 9 + eor v10.16b, v10.16b, v8.16b //GHASH block 4k+2 - mid + + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 7 + eor x21, x21, x13 //AES block 4k+6 - round 14 low + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 9 + movi v8.8b, #0xc2 + + pmull v4.1q, v4.1d, v16.1d //GHASH block 4k+3 - mid + eor v9.16b, v9.16b, v5.16b //GHASH block 4k+3 - high + fmov d5, x19 //AES block 4k+5 - mov low + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 8 + ldp x6, x7, [x0, #0] //AES block 4k+4 - load plaintext +#ifdef __AARCH64EB__ + rev x6, x6 + rev x7, x7 +#endif + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 10 + shl d8, d8, #56 //mod_constant + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 8 + eor v11.16b, v11.16b, v6.16b //GHASH block 4k+3 - low + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 9 + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 10 + eor v10.16b, v10.16b, v4.16b //GHASH block 4k+3 - mid + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 9 + add w12, w12, #1 //CTR block 4k+3 + + aese v0.16b, v29.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 11 + eor v4.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up + + aese v1.16b, v29.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 11 + add x0, x0, #64 //AES input_ptr update + + pmull v7.1q, v9.1d, v8.1d //MODULO - top 64b align with mid + rev w9, w12 //CTR block 4k+8 + ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 10 + eor x6, x6, x13 //AES block 4k+4 - round 14 low + + aese v1.16b, v30.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 12 + eor v10.16b, v10.16b, v4.16b //MODULO - karatsuba tidy up + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 10 + eor x7, x7, x14 //AES block 4k+4 - round 14 high + + fmov d4, x6 //AES block 4k+4 - mov low + orr x9, x11, x9, lsl #32 //CTR block 4k+8 + eor v7.16b, v9.16b, v7.16b //MODULO - fold into mid + + aese v0.16b, v30.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 12 + eor x20, x20, x14 //AES block 4k+5 - round 14 high + + aese v2.16b, v29.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 11 + eor x24, x24, x14 //AES block 4k+7 - round 14 high + + aese v3.16b, v29.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 11 + add w12, w12, #1 //CTR block 4k+8 + + aese v0.16b, v31.16b //AES block 4k+4 - round 13 + fmov v4.d[1], x7 //AES block 4k+4 - mov high + eor v10.16b, v10.16b, v7.16b //MODULO - fold into mid + + aese v2.16b, v30.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 12 + fmov d7, x23 //AES block 4k+7 - mov low + + aese v1.16b, v31.16b //AES block 4k+5 - round 13 + fmov v5.d[1], x20 //AES block 4k+5 - mov high + + fmov d6, x21 //AES block 4k+6 - mov low + cmp x0, x5 //.LOOP CONTROL + + fmov v6.d[1], x22 //AES block 4k+6 - mov high + + pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low + eor v4.16b, v4.16b, v0.16b //AES block 4k+4 - result + fmov d0, x10 //CTR block 4k+8 + + fmov v0.d[1], x9 //CTR block 4k+8 + rev w9, w12 //CTR block 4k+9 + add w12, w12, #1 //CTR block 4k+9 + + eor v5.16b, v5.16b, v1.16b //AES block 4k+5 - result + fmov d1, x10 //CTR block 4k+9 + orr x9, x11, x9, lsl #32 //CTR block 4k+9 + + aese v3.16b, v30.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 12 + fmov v1.d[1], x9 //CTR block 4k+9 + + aese v2.16b, v31.16b //AES block 4k+6 - round 13 + rev w9, w12 //CTR block 4k+10 + st1 { v4.16b}, [x2], #16 //AES block 4k+4 - store result + + orr x9, x11, x9, lsl #32 //CTR block 4k+10 + eor v11.16b, v11.16b, v9.16b //MODULO - fold into low + fmov v7.d[1], x24 //AES block 4k+7 - mov high + + ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment + st1 { v5.16b}, [x2], #16 //AES block 4k+5 - store result + add w12, w12, #1 //CTR block 4k+10 + + aese v3.16b, v31.16b //AES block 4k+7 - round 13 + eor v6.16b, v6.16b, v2.16b //AES block 4k+6 - result + fmov d2, x10 //CTR block 4k+10 + + st1 { v6.16b}, [x2], #16 //AES block 4k+6 - store result + fmov v2.d[1], x9 //CTR block 4k+10 + rev w9, w12 //CTR block 4k+11 + + eor v11.16b, v11.16b, v10.16b //MODULO - fold into low + orr x9, x11, x9, lsl #32 //CTR block 4k+11 + + eor v7.16b, v7.16b, v3.16b //AES block 4k+7 - result + st1 { v7.16b}, [x2], #16 //AES block 4k+7 - store result + b.lt .L256_enc_main_loop + +.L256_enc_prepretail: //PREPRETAIL + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 0 + rev64 v6.16b, v6.16b //GHASH block 4k+2 (t0, t1, and t2 free) + + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 0 + fmov d3, x10 //CTR block 4k+3 + + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 0 + rev64 v4.16b, v4.16b //GHASH block 4k (only t0 is free) + + fmov v3.d[1], x9 //CTR block 4k+3 + ext v11.16b, v11.16b, v11.16b, #8 //PRE 0 + + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 1 + + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 1 + + eor v4.16b, v4.16b, v11.16b //PRE 1 + rev64 v5.16b, v5.16b //GHASH block 4k+1 (t0 and t1 free) + + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 2 + + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 0 + mov d10, v17.d[1] //GHASH block 4k - mid + + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 1 + + pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low + mov d8, v4.d[1] //GHASH block 4k - mid + + pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high + + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 3 + + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 2 + eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid + + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 2 + + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 1 + + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 3 + + pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid + + pmull2 v4.1q, v5.2d, v14.2d //GHASH block 4k+1 - high + + pmull v8.1q, v5.1d, v14.1d //GHASH block 4k+1 - low + + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 2 + + eor v9.16b, v9.16b, v4.16b //GHASH block 4k+1 - high + mov d4, v5.d[1] //GHASH block 4k+1 - mid + + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 3 + eor v11.16b, v11.16b, v8.16b //GHASH block 4k+1 - low + + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 3 + + eor v4.8b, v4.8b, v5.8b //GHASH block 4k+1 - mid + mov d8, v6.d[1] //GHASH block 4k+2 - mid + + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 4 + rev64 v7.16b, v7.16b //GHASH block 4k+3 (t0, t1, t2 and t3 free) + + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 4 + + pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid + eor v8.8b, v8.8b, v6.8b //GHASH block 4k+2 - mid + add w12, w12, #1 //CTR block 4k+3 + + pmull v5.1q, v6.1d, v13.1d //GHASH block 4k+2 - low + + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 5 + + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 4 + eor v10.16b, v10.16b, v4.16b //GHASH block 4k+1 - mid + + pmull2 v4.1q, v6.2d, v13.2d //GHASH block 4k+2 - high + + eor v11.16b, v11.16b, v5.16b //GHASH block 4k+2 - low + ins v8.d[1], v8.d[0] //GHASH block 4k+2 - mid + + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 5 + + eor v9.16b, v9.16b, v4.16b //GHASH block 4k+2 - high + mov d4, v7.d[1] //GHASH block 4k+3 - mid + + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 4 + + pmull2 v8.1q, v8.2d, v16.2d //GHASH block 4k+2 - mid + + eor v4.8b, v4.8b, v7.8b //GHASH block 4k+3 - mid + + pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high + + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 5 + + pmull v4.1q, v4.1d, v16.1d //GHASH block 4k+3 - mid + eor v10.16b, v10.16b, v8.16b //GHASH block 4k+2 - mid + + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 5 + + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 6 + + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 6 + + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 6 + movi v8.8b, #0xc2 + + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 6 + + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 7 + eor v9.16b, v9.16b, v5.16b //GHASH block 4k+3 - high + + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 7 + + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 7 + shl d8, d8, #56 //mod_constant + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 8 + eor v10.16b, v10.16b, v4.16b //GHASH block 4k+3 - mid + + pmull v6.1q, v7.1d, v12.1d //GHASH block 4k+3 - low + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 8 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 9 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 8 + eor v11.16b, v11.16b, v6.16b //GHASH block 4k+3 - low + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 9 + + eor v10.16b, v10.16b, v9.16b //karatsuba tidy up + + pmull v4.1q, v9.1d, v8.1d + ext v9.16b, v9.16b, v9.16b, #8 + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 10 + + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 7 + eor v10.16b, v10.16b, v11.16b + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 10 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 9 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 8 + + aese v1.16b, v29.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 11 + eor v10.16b, v10.16b, v4.16b + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 10 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 9 + + aese v1.16b, v30.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 12 + + aese v0.16b, v29.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 11 + eor v10.16b, v10.16b, v9.16b + + aese v3.16b, v29.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 11 + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 10 + + aese v0.16b, v30.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 12 + + pmull v4.1q, v10.1d, v8.1d + + aese v2.16b, v29.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 11 + ext v10.16b, v10.16b, v10.16b, #8 + + aese v3.16b, v30.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 12 + + aese v1.16b, v31.16b //AES block 4k+5 - round 13 + eor v11.16b, v11.16b, v4.16b + + aese v2.16b, v30.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 12 + + aese v3.16b, v31.16b //AES block 4k+7 - round 13 + + aese v0.16b, v31.16b //AES block 4k+4 - round 13 + + aese v2.16b, v31.16b //AES block 4k+6 - round 13 + eor v11.16b, v11.16b, v10.16b +.L256_enc_tail: //TAIL + + ext v8.16b, v11.16b, v11.16b, #8 //prepare final partial tag + sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process + ldp x6, x7, [x0], #16 //AES block 4k+4 - load plaintext +#ifdef __AARCH64EB__ + rev x6, x6 + rev x7, x7 +#endif + eor x6, x6, x13 //AES block 4k+4 - round 14 low + eor x7, x7, x14 //AES block 4k+4 - round 14 high + + cmp x5, #48 + fmov d4, x6 //AES block 4k+4 - mov low + + fmov v4.d[1], x7 //AES block 4k+4 - mov high + + eor v5.16b, v4.16b, v0.16b //AES block 4k+4 - result + b.gt .L256_enc_blocks_more_than_3 + + cmp x5, #32 + mov v3.16b, v2.16b + movi v11.8b, #0 + + movi v9.8b, #0 + sub w12, w12, #1 + + mov v2.16b, v1.16b + movi v10.8b, #0 + b.gt .L256_enc_blocks_more_than_2 + + mov v3.16b, v1.16b + sub w12, w12, #1 + cmp x5, #16 + + b.gt .L256_enc_blocks_more_than_1 + + sub w12, w12, #1 + b .L256_enc_blocks_less_than_1 +.L256_enc_blocks_more_than_3: //blocks left > 3 + st1 { v5.16b}, [x2], #16 //AES final-3 block - store result + + ldp x6, x7, [x0], #16 //AES final-2 block - load input low & high +#ifdef __AARCH64EB__ + rev x6, x6 + rev x7, x7 +#endif + rev64 v4.16b, v5.16b //GHASH final-3 block + + eor x6, x6, x13 //AES final-2 block - round 14 low + eor v4.16b, v4.16b, v8.16b //feed in partial tag + + eor x7, x7, x14 //AES final-2 block - round 14 high + + mov d22, v4.d[1] //GHASH final-3 block - mid + fmov d5, x6 //AES final-2 block - mov low + + fmov v5.d[1], x7 //AES final-2 block - mov high + + eor v22.8b, v22.8b, v4.8b //GHASH final-3 block - mid + movi v8.8b, #0 //suppress further partial tag feed in + + mov d10, v17.d[1] //GHASH final-3 block - mid + + pmull v11.1q, v4.1d, v15.1d //GHASH final-3 block - low + + pmull2 v9.1q, v4.2d, v15.2d //GHASH final-3 block - high + + pmull v10.1q, v22.1d, v10.1d //GHASH final-3 block - mid + eor v5.16b, v5.16b, v1.16b //AES final-2 block - result +.L256_enc_blocks_more_than_2: //blocks left > 2 + + st1 { v5.16b}, [x2], #16 //AES final-2 block - store result + + ldp x6, x7, [x0], #16 //AES final-1 block - load input low & high +#ifdef __AARCH64EB__ + rev x6, x6 + rev x7, x7 +#endif + rev64 v4.16b, v5.16b //GHASH final-2 block + + eor x6, x6, x13 //AES final-1 block - round 14 low + eor v4.16b, v4.16b, v8.16b //feed in partial tag + + fmov d5, x6 //AES final-1 block - mov low + eor x7, x7, x14 //AES final-1 block - round 14 high + + fmov v5.d[1], x7 //AES final-1 block - mov high + + movi v8.8b, #0 //suppress further partial tag feed in + + pmull2 v20.1q, v4.2d, v14.2d //GHASH final-2 block - high + mov d22, v4.d[1] //GHASH final-2 block - mid + + pmull v21.1q, v4.1d, v14.1d //GHASH final-2 block - low + + eor v22.8b, v22.8b, v4.8b //GHASH final-2 block - mid + + eor v5.16b, v5.16b, v2.16b //AES final-1 block - result + + eor v9.16b, v9.16b, v20.16b //GHASH final-2 block - high + + pmull v22.1q, v22.1d, v17.1d //GHASH final-2 block - mid + + eor v11.16b, v11.16b, v21.16b //GHASH final-2 block - low + + eor v10.16b, v10.16b, v22.16b //GHASH final-2 block - mid +.L256_enc_blocks_more_than_1: //blocks left > 1 + + st1 { v5.16b}, [x2], #16 //AES final-1 block - store result + + rev64 v4.16b, v5.16b //GHASH final-1 block + + ldp x6, x7, [x0], #16 //AES final block - load input low & high +#ifdef __AARCH64EB__ + rev x6, x6 + rev x7, x7 +#endif + eor v4.16b, v4.16b, v8.16b //feed in partial tag + + movi v8.8b, #0 //suppress further partial tag feed in + + eor x6, x6, x13 //AES final block - round 14 low + mov d22, v4.d[1] //GHASH final-1 block - mid + + pmull2 v20.1q, v4.2d, v13.2d //GHASH final-1 block - high + eor x7, x7, x14 //AES final block - round 14 high + + eor v22.8b, v22.8b, v4.8b //GHASH final-1 block - mid + + eor v9.16b, v9.16b, v20.16b //GHASH final-1 block - high + + ins v22.d[1], v22.d[0] //GHASH final-1 block - mid + fmov d5, x6 //AES final block - mov low + + fmov v5.d[1], x7 //AES final block - mov high + + pmull2 v22.1q, v22.2d, v16.2d //GHASH final-1 block - mid + + pmull v21.1q, v4.1d, v13.1d //GHASH final-1 block - low + + eor v5.16b, v5.16b, v3.16b //AES final block - result + eor v10.16b, v10.16b, v22.16b //GHASH final-1 block - mid + + eor v11.16b, v11.16b, v21.16b //GHASH final-1 block - low +.L256_enc_blocks_less_than_1: //blocks left <= 1 + + and x1, x1, #127 //bit_length %= 128 + + mvn x13, xzr //rk14_l = 0xffffffffffffffff + sub x1, x1, #128 //bit_length -= 128 + + neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) + ld1 { v18.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored + + mvn x14, xzr //rk14_h = 0xffffffffffffffff + and x1, x1, #127 //bit_length %= 128 + + lsr x14, x14, x1 //rk14_h is mask for top 64b of last block + cmp x1, #64 + + csel x6, x13, x14, lt + csel x7, x14, xzr, lt + + fmov d0, x6 //ctr0b is mask for last block + + fmov v0.d[1], x7 + + and v5.16b, v5.16b, v0.16b //possibly partial last block has zeroes in highest bits + + rev64 v4.16b, v5.16b //GHASH final block + + eor v4.16b, v4.16b, v8.16b //feed in partial tag + + bif v5.16b, v18.16b, v0.16b //insert existing bytes in top end of result before storing + + pmull2 v20.1q, v4.2d, v12.2d //GHASH final block - high + mov d8, v4.d[1] //GHASH final block - mid +#ifndef __AARCH64EB__ + rev w9, w12 +#else + mov w9, w12 +#endif + + pmull v21.1q, v4.1d, v12.1d //GHASH final block - low + + eor v9.16b, v9.16b, v20.16b //GHASH final block - high + eor v8.8b, v8.8b, v4.8b //GHASH final block - mid + + pmull v8.1q, v8.1d, v16.1d //GHASH final block - mid + + eor v11.16b, v11.16b, v21.16b //GHASH final block - low + + eor v10.16b, v10.16b, v8.16b //GHASH final block - mid + movi v8.8b, #0xc2 + + eor v4.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up + + shl d8, d8, #56 //mod_constant + + eor v10.16b, v10.16b, v4.16b //MODULO - karatsuba tidy up + + pmull v7.1q, v9.1d, v8.1d //MODULO - top 64b align with mid + + ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment + + eor v10.16b, v10.16b, v7.16b //MODULO - fold into mid + + eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid + + pmull v9.1q, v10.1d, v8.1d //MODULO - mid 64b align with low + + ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment + + str w9, [x16, #12] //store the updated counter + + st1 { v5.16b}, [x2] //store all 16B + eor v11.16b, v11.16b, v9.16b //MODULO - fold into low + + eor v11.16b, v11.16b, v10.16b //MODULO - fold into low + ext v11.16b, v11.16b, v11.16b, #8 + rev64 v11.16b, v11.16b + mov x0, x15 + st1 { v11.16b }, [x3] + + ldp x21, x22, [sp, #16] + ldp x23, x24, [sp, #32] + ldp d8, d9, [sp, #48] + ldp d10, d11, [sp, #64] + ldp d12, d13, [sp, #80] + ldp d14, d15, [sp, #96] + ldp x19, x20, [sp], #112 + ret + +.L256_enc_ret: + mov w0, #0x0 + ret +.size aes_gcm_enc_256_kernel,.-aes_gcm_enc_256_kernel +.globl aes_gcm_dec_256_kernel +.type aes_gcm_dec_256_kernel,%function +.align 4 +aes_gcm_dec_256_kernel: + AARCH64_VALID_CALL_TARGET + cbz x1, .L256_dec_ret + stp x19, x20, [sp, #-112]! + mov x16, x4 + mov x8, x5 + stp x21, x22, [sp, #16] + stp x23, x24, [sp, #32] + stp d8, d9, [sp, #48] + stp d10, d11, [sp, #64] + stp d12, d13, [sp, #80] + stp d14, d15, [sp, #96] + + lsr x5, x1, #3 //byte_len + mov x15, x5 + ldp x10, x11, [x16] //ctr96_b64, ctr96_t32 +#ifdef __AARCH64EB__ + rev x10, x10 + rev x11, x11 +#endif + ldp x13, x14, [x8, #224] //load rk14 +#ifdef __AARCH64EB__ + ror x14, x14, #32 + ror x13, x13, #32 +#endif + ld1 {v18.4s}, [x8], #16 //load rk0 + sub x5, x5, #1 //byte_len - 1 + + ld1 {v19.4s}, [x8], #16 //load rk1 + and x5, x5, #0xffffffffffffffc0 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) + + add x4, x0, x1, lsr #3 //end_input_ptr + ld1 {v20.4s}, [x8], #16 //load rk2 + + lsr x12, x11, #32 + ld1 {v21.4s}, [x8], #16 //load rk3 + orr w11, w11, w11 + + ld1 {v22.4s}, [x8], #16 //load rk4 + add x5, x5, x0 + rev w12, w12 //rev_ctr32 + + add w12, w12, #1 //increment rev_ctr32 + fmov d3, x10 //CTR block 3 + + rev w9, w12 //CTR block 1 + add w12, w12, #1 //CTR block 1 + fmov d1, x10 //CTR block 1 + + orr x9, x11, x9, lsl #32 //CTR block 1 + ld1 { v0.16b}, [x16] //special case vector load initial counter so we can start first AES block as quickly as possible + + fmov v1.d[1], x9 //CTR block 1 + rev w9, w12 //CTR block 2 + add w12, w12, #1 //CTR block 2 + + fmov d2, x10 //CTR block 2 + orr x9, x11, x9, lsl #32 //CTR block 2 + + fmov v2.d[1], x9 //CTR block 2 + rev w9, w12 //CTR block 3 + + orr x9, x11, x9, lsl #32 //CTR block 3 + ld1 {v23.4s}, [x8], #16 //load rk5 + + fmov v3.d[1], x9 //CTR block 3 + add w12, w12, #1 //CTR block 3 + + ld1 {v24.4s}, [x8], #16 //load rk6 + + ld1 {v25.4s}, [x8], #16 //load rk7 + + ld1 {v26.4s}, [x8], #16 //load rk8 + + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b //AES block 0 - round 0 + ldr q14, [x3, #80] //load h3l | h3h +#ifndef __AARCH64EB__ + ext v14.16b, v14.16b, v14.16b, #8 +#endif + + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b //AES block 3 - round 0 + ldr q15, [x3, #112] //load h4l | h4h +#ifndef __AARCH64EB__ + ext v15.16b, v15.16b, v15.16b, #8 +#endif + + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b //AES block 1 - round 0 + ldr q13, [x3, #64] //load h2l | h2h +#ifndef __AARCH64EB__ + ext v13.16b, v13.16b, v13.16b, #8 +#endif + + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b //AES block 2 - round 0 + ld1 {v27.4s}, [x8], #16 //load rk9 + + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b //AES block 0 - round 1 + + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b //AES block 1 - round 1 + ld1 { v11.16b}, [x3] + ext v11.16b, v11.16b, v11.16b, #8 + rev64 v11.16b, v11.16b + + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b //AES block 2 - round 1 + ld1 {v28.4s}, [x8], #16 //load rk10 + + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b //AES block 3 - round 1 + ld1 {v29.4s}, [x8], #16 //load rk11 + + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b //AES block 0 - round 2 + ldr q12, [x3, #32] //load h1l | h1h +#ifndef __AARCH64EB__ + ext v12.16b, v12.16b, v12.16b, #8 +#endif + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b //AES block 2 - round 2 + ld1 {v30.4s}, [x8], #16 //load rk12 + + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b //AES block 3 - round 2 + + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b //AES block 0 - round 3 + + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b //AES block 1 - round 2 + + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b //AES block 3 - round 3 + + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b //AES block 0 - round 4 + cmp x0, x5 //check if we have <= 4 blocks + + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b //AES block 2 - round 3 + + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b //AES block 1 - round 3 + + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b //AES block 3 - round 4 + + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b //AES block 2 - round 4 + + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b //AES block 1 - round 4 + + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b //AES block 3 - round 5 + + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b //AES block 0 - round 5 + + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b //AES block 1 - round 5 + + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b //AES block 2 - round 5 + + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b //AES block 0 - round 6 + + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b //AES block 3 - round 6 + + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b //AES block 1 - round 6 + + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b //AES block 2 - round 6 + + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b //AES block 0 - round 7 + + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b //AES block 1 - round 7 + + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b //AES block 3 - round 7 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 0 - round 8 + + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b //AES block 2 - round 7 + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 3 - round 8 + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 1 - round 8 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 0 - round 9 + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 2 - round 8 + ld1 {v31.4s}, [x8], #16 //load rk13 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 1 - round 9 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 0 - round 10 + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 3 - round 9 + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 1 - round 10 + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 2 - round 9 + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 3 - round 10 + + aese v0.16b, v29.16b + aesmc v0.16b, v0.16b //AES block 0 - round 11 + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 2 - round 10 + + aese v3.16b, v29.16b + aesmc v3.16b, v3.16b //AES block 3 - round 11 + + aese v1.16b, v29.16b + aesmc v1.16b, v1.16b //AES block 1 - round 11 + + aese v2.16b, v29.16b + aesmc v2.16b, v2.16b //AES block 2 - round 11 + + trn1 v9.2d, v14.2d, v15.2d //h4h | h3h + + trn2 v17.2d, v14.2d, v15.2d //h4l | h3l + + trn1 v8.2d, v12.2d, v13.2d //h2h | h1h + trn2 v16.2d, v12.2d, v13.2d //h2l | h1l + + aese v1.16b, v30.16b + aesmc v1.16b, v1.16b //AES block 1 - round 12 + + aese v0.16b, v30.16b + aesmc v0.16b, v0.16b //AES block 0 - round 12 + + aese v2.16b, v30.16b + aesmc v2.16b, v2.16b //AES block 2 - round 12 + + aese v3.16b, v30.16b + aesmc v3.16b, v3.16b //AES block 3 - round 12 + eor v17.16b, v17.16b, v9.16b //h4k | h3k + + aese v1.16b, v31.16b //AES block 1 - round 13 + + aese v2.16b, v31.16b //AES block 2 - round 13 + eor v16.16b, v16.16b, v8.16b //h2k | h1k + + aese v3.16b, v31.16b //AES block 3 - round 13 + + aese v0.16b, v31.16b //AES block 0 - round 13 + b.ge .L256_dec_tail //handle tail + + ld1 {v4.16b, v5.16b}, [x0], #32 //AES block 0,1 - load ciphertext + + rev w9, w12 //CTR block 4 + + eor v0.16b, v4.16b, v0.16b //AES block 0 - result + + eor v1.16b, v5.16b, v1.16b //AES block 1 - result + rev64 v5.16b, v5.16b //GHASH block 1 + ld1 {v6.16b}, [x0], #16 //AES block 2 - load ciphertext + + mov x7, v0.d[1] //AES block 0 - mov high + + mov x6, v0.d[0] //AES block 0 - mov low + rev64 v4.16b, v4.16b //GHASH block 0 + add w12, w12, #1 //CTR block 4 + + fmov d0, x10 //CTR block 4 + orr x9, x11, x9, lsl #32 //CTR block 4 + + fmov v0.d[1], x9 //CTR block 4 + rev w9, w12 //CTR block 5 + add w12, w12, #1 //CTR block 5 + + mov x19, v1.d[0] //AES block 1 - mov low + + orr x9, x11, x9, lsl #32 //CTR block 5 + mov x20, v1.d[1] //AES block 1 - mov high + eor x7, x7, x14 //AES block 0 - round 14 high +#ifdef __AARCH64EB__ + rev x7, x7 +#endif + eor x6, x6, x13 //AES block 0 - round 14 low +#ifdef __AARCH64EB__ + rev x6, x6 +#endif + stp x6, x7, [x2], #16 //AES block 0 - store result + fmov d1, x10 //CTR block 5 + + ld1 {v7.16b}, [x0], #16 //AES block 3 - load ciphertext + + fmov v1.d[1], x9 //CTR block 5 + rev w9, w12 //CTR block 6 + add w12, w12, #1 //CTR block 6 + + eor x19, x19, x13 //AES block 1 - round 14 low +#ifdef __AARCH64EB__ + rev x19, x19 +#endif + orr x9, x11, x9, lsl #32 //CTR block 6 + + eor x20, x20, x14 //AES block 1 - round 14 high +#ifdef __AARCH64EB__ + rev x20, x20 +#endif + stp x19, x20, [x2], #16 //AES block 1 - store result + + eor v2.16b, v6.16b, v2.16b //AES block 2 - result + cmp x0, x5 //check if we have <= 8 blocks + b.ge .L256_dec_prepretail //do prepretail + +.L256_dec_main_loop: //main loop start + mov x21, v2.d[0] //AES block 4k+2 - mov low + ext v11.16b, v11.16b, v11.16b, #8 //PRE 0 + eor v3.16b, v7.16b, v3.16b //AES block 4k+3 - result + + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 0 + mov x22, v2.d[1] //AES block 4k+2 - mov high + + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 0 + fmov d2, x10 //CTR block 4k+6 + + fmov v2.d[1], x9 //CTR block 4k+6 + eor v4.16b, v4.16b, v11.16b //PRE 1 + rev w9, w12 //CTR block 4k+7 + + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 1 + mov x24, v3.d[1] //AES block 4k+3 - mov high + + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 1 + mov x23, v3.d[0] //AES block 4k+3 - mov low + + pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high + mov d8, v4.d[1] //GHASH block 4k - mid + fmov d3, x10 //CTR block 4k+7 + + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 2 + orr x9, x11, x9, lsl #32 //CTR block 4k+7 + + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 0 + fmov v3.d[1], x9 //CTR block 4k+7 + + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 2 + eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid + + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 3 + eor x22, x22, x14 //AES block 4k+2 - round 14 high +#ifdef __AARCH64EB__ + rev x22, x22 +#endif + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 1 + mov d10, v17.d[1] //GHASH block 4k - mid + + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 3 + rev64 v6.16b, v6.16b //GHASH block 4k+2 + + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 0 + eor x21, x21, x13 //AES block 4k+2 - round 14 low +#ifdef __AARCH64EB__ + rev x21, x21 +#endif + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 2 + stp x21, x22, [x2], #16 //AES block 4k+2 - store result + + pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low + + pmull2 v4.1q, v5.2d, v14.2d //GHASH block 4k+1 - high + + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 3 + rev64 v7.16b, v7.16b //GHASH block 4k+3 + + pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid + eor x23, x23, x13 //AES block 4k+3 - round 14 low +#ifdef __AARCH64EB__ + rev x23, x23 +#endif + pmull v8.1q, v5.1d, v14.1d //GHASH block 4k+1 - low + eor x24, x24, x14 //AES block 4k+3 - round 14 high +#ifdef __AARCH64EB__ + rev x24, x24 +#endif + eor v9.16b, v9.16b, v4.16b //GHASH block 4k+1 - high + + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 4 + + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 1 + mov d4, v5.d[1] //GHASH block 4k+1 - mid + + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 4 + eor v11.16b, v11.16b, v8.16b //GHASH block 4k+1 - low + + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 5 + add w12, w12, #1 //CTR block 4k+7 + + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 2 + mov d8, v6.d[1] //GHASH block 4k+2 - mid + + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 4 + eor v4.8b, v4.8b, v5.8b //GHASH block 4k+1 - mid + + pmull v5.1q, v6.1d, v13.1d //GHASH block 4k+2 - low + + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 3 + eor v8.8b, v8.8b, v6.8b //GHASH block 4k+2 - mid + + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 5 + + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 5 + eor v11.16b, v11.16b, v5.16b //GHASH block 4k+2 - low + + pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid + rev w9, w12 //CTR block 4k+8 + + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 6 + ins v8.d[1], v8.d[0] //GHASH block 4k+2 - mid + + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 6 + add w12, w12, #1 //CTR block 4k+8 + + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 4 + + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 7 + eor v10.16b, v10.16b, v4.16b //GHASH block 4k+1 - mid + + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 7 + + pmull2 v4.1q, v6.2d, v13.2d //GHASH block 4k+2 - high + mov d6, v7.d[1] //GHASH block 4k+3 - mid + + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 5 + + pmull2 v8.1q, v8.2d, v16.2d //GHASH block 4k+2 - mid + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 8 + eor v9.16b, v9.16b, v4.16b //GHASH block 4k+2 - high + + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 6 + + pmull v4.1q, v7.1d, v12.1d //GHASH block 4k+3 - low + orr x9, x11, x9, lsl #32 //CTR block 4k+8 + eor v10.16b, v10.16b, v8.16b //GHASH block 4k+2 - mid + + pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 9 + eor v6.8b, v6.8b, v7.8b //GHASH block 4k+3 - mid + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 8 + + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 6 + eor v9.16b, v9.16b, v5.16b //GHASH block 4k+3 - high + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 10 + + pmull v6.1q, v6.1d, v16.1d //GHASH block 4k+3 - mid + movi v8.8b, #0xc2 + + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 7 + eor v11.16b, v11.16b, v4.16b //GHASH block 4k+3 - low + + aese v0.16b, v29.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 11 + + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 7 + shl d8, d8, #56 //mod_constant + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 8 + eor v10.16b, v10.16b, v6.16b //GHASH block 4k+3 - mid + + aese v0.16b, v30.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 12 + + pmull v7.1q, v9.1d, v8.1d //MODULO - top 64b align with mid + eor v6.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 9 + ld1 {v4.16b}, [x0], #16 //AES block 4k+4 - load ciphertext + + aese v0.16b, v31.16b //AES block 4k+4 - round 13 + ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment + + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 10 + eor v10.16b, v10.16b, v6.16b //MODULO - karatsuba tidy up + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 9 + ld1 {v5.16b}, [x0], #16 //AES block 4k+5 - load ciphertext + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 8 + eor v0.16b, v4.16b, v0.16b //AES block 4k+4 - result + + aese v1.16b, v29.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 11 + stp x23, x24, [x2], #16 //AES block 4k+3 - store result + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 10 + eor v10.16b, v10.16b, v7.16b //MODULO - fold into mid + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 9 + ld1 {v6.16b}, [x0], #16 //AES block 4k+6 - load ciphertext + + aese v1.16b, v30.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 12 + ld1 {v7.16b}, [x0], #16 //AES block 4k+7 - load ciphertext + + aese v2.16b, v29.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 11 + mov x7, v0.d[1] //AES block 4k+4 - mov high + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 10 + eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid + + aese v1.16b, v31.16b //AES block 4k+5 - round 13 + mov x6, v0.d[0] //AES block 4k+4 - mov low + + aese v2.16b, v30.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 12 + fmov d0, x10 //CTR block 4k+8 + + aese v3.16b, v29.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 11 + fmov v0.d[1], x9 //CTR block 4k+8 + + pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low + eor v1.16b, v5.16b, v1.16b //AES block 4k+5 - result + rev w9, w12 //CTR block 4k+9 + + aese v2.16b, v31.16b //AES block 4k+6 - round 13 + orr x9, x11, x9, lsl #32 //CTR block 4k+9 + cmp x0, x5 //.LOOP CONTROL + + add w12, w12, #1 //CTR block 4k+9 + + eor x6, x6, x13 //AES block 4k+4 - round 14 low +#ifdef __AARCH64EB__ + rev x6, x6 +#endif + eor x7, x7, x14 //AES block 4k+4 - round 14 high +#ifdef __AARCH64EB__ + rev x7, x7 +#endif + mov x20, v1.d[1] //AES block 4k+5 - mov high + eor v2.16b, v6.16b, v2.16b //AES block 4k+6 - result + eor v11.16b, v11.16b, v8.16b //MODULO - fold into low + + aese v3.16b, v30.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 12 + mov x19, v1.d[0] //AES block 4k+5 - mov low + + fmov d1, x10 //CTR block 4k+9 + ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment + + fmov v1.d[1], x9 //CTR block 4k+9 + rev w9, w12 //CTR block 4k+10 + add w12, w12, #1 //CTR block 4k+10 + + aese v3.16b, v31.16b //AES block 4k+7 - round 13 + orr x9, x11, x9, lsl #32 //CTR block 4k+10 + + rev64 v5.16b, v5.16b //GHASH block 4k+5 + eor x20, x20, x14 //AES block 4k+5 - round 14 high +#ifdef __AARCH64EB__ + rev x20, x20 +#endif + stp x6, x7, [x2], #16 //AES block 4k+4 - store result + + eor x19, x19, x13 //AES block 4k+5 - round 14 low +#ifdef __AARCH64EB__ + rev x19, x19 +#endif + stp x19, x20, [x2], #16 //AES block 4k+5 - store result + + rev64 v4.16b, v4.16b //GHASH block 4k+4 + eor v11.16b, v11.16b, v10.16b //MODULO - fold into low + b.lt .L256_dec_main_loop + + +.L256_dec_prepretail: //PREPRETAIL + ext v11.16b, v11.16b, v11.16b, #8 //PRE 0 + mov x21, v2.d[0] //AES block 4k+2 - mov low + eor v3.16b, v7.16b, v3.16b //AES block 4k+3 - result + + aese v0.16b, v18.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 0 + mov x22, v2.d[1] //AES block 4k+2 - mov high + + aese v1.16b, v18.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 0 + fmov d2, x10 //CTR block 4k+6 + + fmov v2.d[1], x9 //CTR block 4k+6 + rev w9, w12 //CTR block 4k+7 + eor v4.16b, v4.16b, v11.16b //PRE 1 + + rev64 v6.16b, v6.16b //GHASH block 4k+2 + orr x9, x11, x9, lsl #32 //CTR block 4k+7 + mov x23, v3.d[0] //AES block 4k+3 - mov low + + aese v1.16b, v19.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 1 + mov x24, v3.d[1] //AES block 4k+3 - mov high + + pmull v11.1q, v4.1d, v15.1d //GHASH block 4k - low + mov d8, v4.d[1] //GHASH block 4k - mid + fmov d3, x10 //CTR block 4k+7 + + pmull2 v9.1q, v4.2d, v15.2d //GHASH block 4k - high + fmov v3.d[1], x9 //CTR block 4k+7 + + aese v2.16b, v18.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 0 + mov d10, v17.d[1] //GHASH block 4k - mid + + aese v0.16b, v19.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 1 + eor v8.8b, v8.8b, v4.8b //GHASH block 4k - mid + + pmull2 v4.1q, v5.2d, v14.2d //GHASH block 4k+1 - high + + aese v2.16b, v19.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 1 + rev64 v7.16b, v7.16b //GHASH block 4k+3 + + aese v3.16b, v18.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 0 + + pmull v10.1q, v8.1d, v10.1d //GHASH block 4k - mid + eor v9.16b, v9.16b, v4.16b //GHASH block 4k+1 - high + + pmull v8.1q, v5.1d, v14.1d //GHASH block 4k+1 - low + + aese v3.16b, v19.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 1 + mov d4, v5.d[1] //GHASH block 4k+1 - mid + + aese v0.16b, v20.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 2 + + aese v1.16b, v20.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 2 + eor v11.16b, v11.16b, v8.16b //GHASH block 4k+1 - low + + aese v2.16b, v20.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 2 + + aese v0.16b, v21.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 3 + mov d8, v6.d[1] //GHASH block 4k+2 - mid + + aese v3.16b, v20.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 2 + eor v4.8b, v4.8b, v5.8b //GHASH block 4k+1 - mid + + pmull v5.1q, v6.1d, v13.1d //GHASH block 4k+2 - low + + aese v0.16b, v22.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 4 + + aese v3.16b, v21.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 3 + eor v8.8b, v8.8b, v6.8b //GHASH block 4k+2 - mid + + pmull v4.1q, v4.1d, v17.1d //GHASH block 4k+1 - mid + + aese v0.16b, v23.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 5 + eor v11.16b, v11.16b, v5.16b //GHASH block 4k+2 - low + + aese v3.16b, v22.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 4 + + pmull2 v5.1q, v7.2d, v12.2d //GHASH block 4k+3 - high + eor v10.16b, v10.16b, v4.16b //GHASH block 4k+1 - mid + + pmull2 v4.1q, v6.2d, v13.2d //GHASH block 4k+2 - high + + aese v3.16b, v23.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 5 + ins v8.d[1], v8.d[0] //GHASH block 4k+2 - mid + + aese v2.16b, v21.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 3 + + aese v1.16b, v21.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 3 + eor v9.16b, v9.16b, v4.16b //GHASH block 4k+2 - high + + pmull v4.1q, v7.1d, v12.1d //GHASH block 4k+3 - low + + aese v2.16b, v22.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 4 + mov d6, v7.d[1] //GHASH block 4k+3 - mid + + aese v1.16b, v22.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 4 + + pmull2 v8.1q, v8.2d, v16.2d //GHASH block 4k+2 - mid + + aese v2.16b, v23.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 5 + eor v6.8b, v6.8b, v7.8b //GHASH block 4k+3 - mid + + aese v1.16b, v23.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 5 + + aese v3.16b, v24.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 6 + eor v10.16b, v10.16b, v8.16b //GHASH block 4k+2 - mid + + aese v2.16b, v24.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 6 + + aese v0.16b, v24.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 6 + movi v8.8b, #0xc2 + + aese v1.16b, v24.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 6 + eor v11.16b, v11.16b, v4.16b //GHASH block 4k+3 - low + + pmull v6.1q, v6.1d, v16.1d //GHASH block 4k+3 - mid + + aese v3.16b, v25.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 7 + eor v9.16b, v9.16b, v5.16b //GHASH block 4k+3 - high + + aese v1.16b, v25.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 7 + + aese v0.16b, v25.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 7 + eor v10.16b, v10.16b, v6.16b //GHASH block 4k+3 - mid + + aese v3.16b, v26.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 8 + + aese v2.16b, v25.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 7 + eor v6.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up + + aese v1.16b, v26.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 8 + + aese v0.16b, v26.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 8 + shl d8, d8, #56 //mod_constant + + aese v2.16b, v26.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 8 + + aese v1.16b, v27.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 9 + eor v10.16b, v10.16b, v6.16b //MODULO - karatsuba tidy up + + pmull v7.1q, v9.1d, v8.1d //MODULO - top 64b align with mid + + aese v2.16b, v27.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 9 + ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment + + aese v3.16b, v27.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 9 + + aese v0.16b, v27.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 9 + eor v10.16b, v10.16b, v7.16b //MODULO - fold into mid + + aese v2.16b, v28.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 10 + + aese v3.16b, v28.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 10 + + aese v0.16b, v28.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 10 + eor x22, x22, x14 //AES block 4k+2 - round 14 high +#ifdef __AARCH64EB__ + rev x22, x22 +#endif + aese v1.16b, v28.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 10 + eor x23, x23, x13 //AES block 4k+3 - round 14 low +#ifdef __AARCH64EB__ + rev x23, x23 +#endif + aese v2.16b, v29.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 11 + eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid + + aese v0.16b, v29.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 11 + add w12, w12, #1 //CTR block 4k+7 + + aese v1.16b, v29.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 11 + eor x21, x21, x13 //AES block 4k+2 - round 14 low +#ifdef __AARCH64EB__ + rev x21, x21 +#endif + + aese v2.16b, v30.16b + aesmc v2.16b, v2.16b //AES block 4k+6 - round 12 + + pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low + eor x24, x24, x14 //AES block 4k+3 - round 14 high +#ifdef __AARCH64EB__ + rev x24, x24 +#endif + + aese v3.16b, v29.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 11 + stp x21, x22, [x2], #16 //AES block 4k+2 - store result + + aese v1.16b, v30.16b + aesmc v1.16b, v1.16b //AES block 4k+5 - round 12 + ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment + + aese v0.16b, v30.16b + aesmc v0.16b, v0.16b //AES block 4k+4 - round 12 + stp x23, x24, [x2], #16 //AES block 4k+3 - store result + + aese v3.16b, v30.16b + aesmc v3.16b, v3.16b //AES block 4k+7 - round 12 + eor v11.16b, v11.16b, v8.16b //MODULO - fold into low + + aese v1.16b, v31.16b //AES block 4k+5 - round 13 + + aese v0.16b, v31.16b //AES block 4k+4 - round 13 + + aese v3.16b, v31.16b //AES block 4k+7 - round 13 + + aese v2.16b, v31.16b //AES block 4k+6 - round 13 + eor v11.16b, v11.16b, v10.16b //MODULO - fold into low +.L256_dec_tail: //TAIL + + sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process + ld1 { v5.16b}, [x0], #16 //AES block 4k+4 - load ciphertext + + eor v0.16b, v5.16b, v0.16b //AES block 4k+4 - result + + mov x6, v0.d[0] //AES block 4k+4 - mov low + + mov x7, v0.d[1] //AES block 4k+4 - mov high + ext v8.16b, v11.16b, v11.16b, #8 //prepare final partial tag + + cmp x5, #48 + + eor x6, x6, x13 //AES block 4k+4 - round 14 low +#ifdef __AARCH64EB__ + rev x6, x6 +#endif + + eor x7, x7, x14 //AES block 4k+4 - round 14 high +#ifdef __AARCH64EB__ + rev x7, x7 +#endif + b.gt .L256_dec_blocks_more_than_3 + + sub w12, w12, #1 + mov v3.16b, v2.16b + movi v10.8b, #0 + + movi v11.8b, #0 + cmp x5, #32 + + movi v9.8b, #0 + mov v2.16b, v1.16b + b.gt .L256_dec_blocks_more_than_2 + + sub w12, w12, #1 + + mov v3.16b, v1.16b + cmp x5, #16 + b.gt .L256_dec_blocks_more_than_1 + + sub w12, w12, #1 + b .L256_dec_blocks_less_than_1 +.L256_dec_blocks_more_than_3: //blocks left > 3 + rev64 v4.16b, v5.16b //GHASH final-3 block + ld1 { v5.16b}, [x0], #16 //AES final-2 block - load ciphertext + + stp x6, x7, [x2], #16 //AES final-3 block - store result + + mov d10, v17.d[1] //GHASH final-3 block - mid + + eor v4.16b, v4.16b, v8.16b //feed in partial tag + + eor v0.16b, v5.16b, v1.16b //AES final-2 block - result + + mov d22, v4.d[1] //GHASH final-3 block - mid + + mov x6, v0.d[0] //AES final-2 block - mov low + + mov x7, v0.d[1] //AES final-2 block - mov high + + eor v22.8b, v22.8b, v4.8b //GHASH final-3 block - mid + + movi v8.8b, #0 //suppress further partial tag feed in + + pmull2 v9.1q, v4.2d, v15.2d //GHASH final-3 block - high + + pmull v10.1q, v22.1d, v10.1d //GHASH final-3 block - mid + eor x6, x6, x13 //AES final-2 block - round 14 low +#ifdef __AARCH64EB__ + rev x6, x6 +#endif + + pmull v11.1q, v4.1d, v15.1d //GHASH final-3 block - low + eor x7, x7, x14 //AES final-2 block - round 14 high +#ifdef __AARCH64EB__ + rev x7, x7 +#endif +.L256_dec_blocks_more_than_2: //blocks left > 2 + + rev64 v4.16b, v5.16b //GHASH final-2 block + ld1 { v5.16b}, [x0], #16 //AES final-1 block - load ciphertext + + eor v4.16b, v4.16b, v8.16b //feed in partial tag + stp x6, x7, [x2], #16 //AES final-2 block - store result + + eor v0.16b, v5.16b, v2.16b //AES final-1 block - result + + mov d22, v4.d[1] //GHASH final-2 block - mid + + pmull v21.1q, v4.1d, v14.1d //GHASH final-2 block - low + + pmull2 v20.1q, v4.2d, v14.2d //GHASH final-2 block - high + + eor v22.8b, v22.8b, v4.8b //GHASH final-2 block - mid + mov x6, v0.d[0] //AES final-1 block - mov low + + mov x7, v0.d[1] //AES final-1 block - mov high + eor v11.16b, v11.16b, v21.16b //GHASH final-2 block - low + movi v8.8b, #0 //suppress further partial tag feed in + + pmull v22.1q, v22.1d, v17.1d //GHASH final-2 block - mid + + eor v9.16b, v9.16b, v20.16b //GHASH final-2 block - high + eor x6, x6, x13 //AES final-1 block - round 14 low +#ifdef __AARCH64EB__ + rev x6, x6 +#endif + + eor v10.16b, v10.16b, v22.16b //GHASH final-2 block - mid + eor x7, x7, x14 //AES final-1 block - round 14 high +#ifdef __AARCH64EB__ + rev x7, x7 +#endif +.L256_dec_blocks_more_than_1: //blocks left > 1 + + stp x6, x7, [x2], #16 //AES final-1 block - store result + rev64 v4.16b, v5.16b //GHASH final-1 block + + ld1 { v5.16b}, [x0], #16 //AES final block - load ciphertext + + eor v4.16b, v4.16b, v8.16b //feed in partial tag + movi v8.8b, #0 //suppress further partial tag feed in + + mov d22, v4.d[1] //GHASH final-1 block - mid + + eor v0.16b, v5.16b, v3.16b //AES final block - result + + pmull2 v20.1q, v4.2d, v13.2d //GHASH final-1 block - high + + eor v22.8b, v22.8b, v4.8b //GHASH final-1 block - mid + + pmull v21.1q, v4.1d, v13.1d //GHASH final-1 block - low + mov x6, v0.d[0] //AES final block - mov low + + ins v22.d[1], v22.d[0] //GHASH final-1 block - mid + + mov x7, v0.d[1] //AES final block - mov high + + pmull2 v22.1q, v22.2d, v16.2d //GHASH final-1 block - mid + eor x6, x6, x13 //AES final block - round 14 low +#ifdef __AARCH64EB__ + rev x6, x6 +#endif + eor v11.16b, v11.16b, v21.16b //GHASH final-1 block - low + + eor v9.16b, v9.16b, v20.16b //GHASH final-1 block - high + + eor v10.16b, v10.16b, v22.16b //GHASH final-1 block - mid + eor x7, x7, x14 //AES final block - round 14 high +#ifdef __AARCH64EB__ + rev x7, x7 +#endif +.L256_dec_blocks_less_than_1: //blocks left <= 1 + + and x1, x1, #127 //bit_length %= 128 + mvn x14, xzr //rk14_h = 0xffffffffffffffff + + sub x1, x1, #128 //bit_length -= 128 + mvn x13, xzr //rk14_l = 0xffffffffffffffff + + ldp x4, x5, [x2] //load existing bytes we need to not overwrite + neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) + + and x1, x1, #127 //bit_length %= 128 + + lsr x14, x14, x1 //rk14_h is mask for top 64b of last block + cmp x1, #64 + + csel x9, x13, x14, lt + csel x10, x14, xzr, lt + + fmov d0, x9 //ctr0b is mask for last block + and x6, x6, x9 + + mov v0.d[1], x10 + bic x4, x4, x9 //mask out low existing bytes + +#ifndef __AARCH64EB__ + rev w9, w12 +#else + mov w9, w12 +#endif + + bic x5, x5, x10 //mask out high existing bytes + + orr x6, x6, x4 + + and x7, x7, x10 + + orr x7, x7, x5 + + and v5.16b, v5.16b, v0.16b //possibly partial last block has zeroes in highest bits + + rev64 v4.16b, v5.16b //GHASH final block + + eor v4.16b, v4.16b, v8.16b //feed in partial tag + + pmull v21.1q, v4.1d, v12.1d //GHASH final block - low + + mov d8, v4.d[1] //GHASH final block - mid + + eor v8.8b, v8.8b, v4.8b //GHASH final block - mid + + pmull2 v20.1q, v4.2d, v12.2d //GHASH final block - high + + pmull v8.1q, v8.1d, v16.1d //GHASH final block - mid + + eor v9.16b, v9.16b, v20.16b //GHASH final block - high + + eor v11.16b, v11.16b, v21.16b //GHASH final block - low + + eor v10.16b, v10.16b, v8.16b //GHASH final block - mid + movi v8.8b, #0xc2 + + eor v6.16b, v11.16b, v9.16b //MODULO - karatsuba tidy up + + shl d8, d8, #56 //mod_constant + + eor v10.16b, v10.16b, v6.16b //MODULO - karatsuba tidy up + + pmull v7.1q, v9.1d, v8.1d //MODULO - top 64b align with mid + + ext v9.16b, v9.16b, v9.16b, #8 //MODULO - other top alignment + + eor v10.16b, v10.16b, v7.16b //MODULO - fold into mid + + eor v10.16b, v10.16b, v9.16b //MODULO - fold into mid + + pmull v8.1q, v10.1d, v8.1d //MODULO - mid 64b align with low + + ext v10.16b, v10.16b, v10.16b, #8 //MODULO - other mid alignment + + eor v11.16b, v11.16b, v8.16b //MODULO - fold into low + + stp x6, x7, [x2] + + str w9, [x16, #12] //store the updated counter + + eor v11.16b, v11.16b, v10.16b //MODULO - fold into low + ext v11.16b, v11.16b, v11.16b, #8 + rev64 v11.16b, v11.16b + mov x0, x15 + st1 { v11.16b }, [x3] + + ldp x21, x22, [sp, #16] + ldp x23, x24, [sp, #32] + ldp d8, d9, [sp, #48] + ldp d10, d11, [sp, #64] + ldp d12, d13, [sp, #80] + ldp d14, d15, [sp, #96] + ldp x19, x20, [sp], #112 + ret + +.L256_dec_ret: + mov w0, #0x0 + ret +.size aes_gcm_dec_256_kernel,.-aes_gcm_dec_256_kernel +.section .rodata +.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +#endif diff --git a/contrib/openssl-cmake/asm/crypto/modes/ghash-riscv64-zvkb-zvbc.S b/contrib/openssl-cmake/asm/crypto/modes/ghash-riscv64-zvkb-zvbc.S new file mode 100644 index 000000000000..ca549d473a4e --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/modes/ghash-riscv64-zvkb-zvbc.S @@ -0,0 +1,268 @@ +.text +.p2align 3 +.globl gcm_init_rv64i_zvkb_zvbc +.type gcm_init_rv64i_zvkb_zvbc,@function +gcm_init_rv64i_zvkb_zvbc: + # Load/store data in reverse order. + # This is needed as a part of endianness swap. + add a1, a1, 8 + li t0, -8 + li t1, 63 + la t2, Lpolymod + + .word 0xc1817057 # vsetivli x0, 2, e64, m1, tu, mu + + .word 173404295 # vlse64.v v1, (a1), t0 + .word 33812743 # vle64.v v2, (t2) + + # Shift one left and get the carry bits. + .word 2719171031 # vsrl.vx v3, v1, t1 + .word 2517676247 # vsll.vi v1, v1, 1 + + # Use the fact that the polynomial degree is no more than 128, + # i.e. only the LSB of the upper half could be set. + # Thanks to this we don't need to do the full reduction here. + # Instead simply subtract the reduction polynomial. + # This idea was taken from x86 ghash implementation in OpenSSL. + .word 976269911 # vslideup.vi v4, v3, 1 + .word 1043378647 # vslidedown.vi v3, v3, 1 + + .word 1577136215 # vmv.v.i v0, 2 + .word 672268503 # vor.vv v1, v1, v4, v0.t + + # Need to set the mask to 3, if the carry bit is set. + .word 1577156695 # vmv.v.v v0, v3 + .word 1577071063 # vmv.v.i v3, 0 + .word 1546760663 # vmerge.vim v3, v3, 3, v0 + .word 1577156695 # vmv.v.v v0, v3 + + .word 739311831 # vxor.vv v1, v1, v2, v0.t + + .word 33910951 # vse64.v v1, (a0) + ret +.size gcm_init_rv64i_zvkb_zvbc,.-gcm_init_rv64i_zvkb_zvbc +.text +.p2align 3 +.globl gcm_gmult_rv64i_zvkb_zvbc +.type gcm_gmult_rv64i_zvkb_zvbc,@function +gcm_gmult_rv64i_zvkb_zvbc: + ld t0, (a1) + ld t1, 8(a1) + li t2, 63 + la t3, Lpolymod + ld t3, 8(t3) + + # Load/store data in reverse order. + # This is needed as a part of endianness swap. + add a0, a0, 8 + li t4, -8 + + .word 0xc1817057 # vsetivli x0, 2, e64, m1, tu, mu + + .word 198537863 # vlse64.v v5, (a0), t4 + .word 1247060695 # vrev8.v v5, v5 + + # Multiplication + + # Do two 64x64 multiplications in one go to save some time + # and simplify things. + + # A = a1a0 (t1, t0) + # B = b1b0 (v5) + # C = c1c0 (256 bit) + # c1 = a1b1 + (a0b1)h + (a1b0)h + # c0 = a0b0 + (a0b1)l + (a1b0)h + + # v1 = (a0b1)l,(a0b0)l + .word 844292311 # vclmul.vx v1, v5, t0 + # v3 = (a0b1)h,(a0b0)h + .word 911401431 # vclmulh.vx v3, v5, t0 + + # v4 = (a1b1)l,(a1b0)l + .word 844325463 # vclmul.vx v4, v5, t1 + # v2 = (a1b1)h,(a1b0)h + .word 911434071 # vclmulh.vx v2, v5, t1 + + # Is there a better way to do this? + # Would need to swap the order of elements within a vector register. + .word 976270039 # vslideup.vi v5, v3, 1 + .word 977318743 # vslideup.vi v6, v4, 1 + .word 1043378647 # vslidedown.vi v3, v3, 1 + .word 1044427351 # vslidedown.vi v4, v4, 1 + + .word 1577103447 # vmv.v.i v0, 1 + # v2 += (a0b1)h + .word 740393303 # vxor.vv v2, v2, v3, v0.t + # v2 += (a1b1)l + .word 740426071 # vxor.vv v2, v2, v4, v0.t + + .word 1577136215 # vmv.v.i v0, 2 + # v1 += (a0b0)h,0 + .word 739410135 # vxor.vv v1, v1, v5, v0.t + # v1 += (a1b0)l,0 + .word 739442903 # vxor.vv v1, v1, v6, v0.t + + # Now the 256bit product should be stored in (v2,v1) + # v1 = (a0b1)l + (a0b0)h + (a1b0)l, (a0b0)l + # v2 = (a1b1)h, (a1b0)h + (a0b1)h + (a1b1)l + + # Reduction + # Let C := A*B = c3,c2,c1,c0 = v2[1],v2[0],v1[1],v1[0] + # This is a slight variation of the Gueron's Montgomery reduction. + # The difference being the order of some operations has been changed, + # to make a better use of vclmul(h) instructions. + + # First step: + # c1 += (c0 * P)l + # vmv.v.i v0, 2 + .word 940618199 # vslideup.vi v3, v1, 1, v0.t + .word 809394647 # vclmul.vx v3, v3, t3, v0.t + .word 739344599 # vxor.vv v1, v1, v3, v0.t + + # Second step: + # D = d1,d0 is final result + # We want: + # m1 = c1 + (c1 * P)h + # m0 = (c1 * P)l + (c0 * P)h + c0 + # d1 = c3 + m1 + # d0 = c2 + m0 + + #v3 = (c1 * P)l, 0 + .word 807297495 # vclmul.vx v3, v1, t3, v0.t + #v4 = (c1 * P)h, (c0 * P)h + .word 907960919 # vclmulh.vx v4, v1, t3 + + .word 1577103447 # vmv.v.i v0, 1 + .word 1043378647 # vslidedown.vi v3, v3, 1 + + .word 772931799 # vxor.vv v1, v1, v4 + .word 739344599 # vxor.vv v1, v1, v3, v0.t + + # XOR in the upper upper part of the product + .word 773882199 # vxor.vv v2, v2, v1 + + .word 1243914583 # vrev8.v v2, v2 + .word 198537511 # vsse64.v v2, (a0), t4 + ret +.size gcm_gmult_rv64i_zvkb_zvbc,.-gcm_gmult_rv64i_zvkb_zvbc +.p2align 3 +.globl gcm_ghash_rv64i_zvkb_zvbc +.type gcm_ghash_rv64i_zvkb_zvbc,@function +gcm_ghash_rv64i_zvkb_zvbc: + ld t0, (a1) + ld t1, 8(a1) + li t2, 63 + la t3, Lpolymod + ld t3, 8(t3) + + # Load/store data in reverse order. + # This is needed as a part of endianness swap. + add a0, a0, 8 + add a2, a2, 8 + li t4, -8 + + .word 0xc1817057 # vsetivli x0, 2, e64, m1, tu, mu + + .word 198537863 # vlse64.v v5, (a0), t4 + +Lstep: + # Read input data + .word 198603655 # vle64.v v0, (a2) + add a2, a2, 16 + add a3, a3, -16 + # XOR them into Xi + .word 777224919 # vxor.vv v0, v0, v1 + + .word 1247060695 # vrev8.v v5, v5 + + # Multiplication + + # Do two 64x64 multiplications in one go to save some time + # and simplify things. + + # A = a1a0 (t1, t0) + # B = b1b0 (v5) + # C = c1c0 (256 bit) + # c1 = a1b1 + (a0b1)h + (a1b0)h + # c0 = a0b0 + (a0b1)l + (a1b0)h + + # v1 = (a0b1)l,(a0b0)l + .word 844292311 # vclmul.vx v1, v5, t0 + # v3 = (a0b1)h,(a0b0)h + .word 911401431 # vclmulh.vx v3, v5, t0 + + # v4 = (a1b1)l,(a1b0)l + .word 844325463 # vclmul.vx v4, v5, t1 + # v2 = (a1b1)h,(a1b0)h + .word 911434071 # vclmulh.vx v2, v5, t1 + + # Is there a better way to do this? + # Would need to swap the order of elements within a vector register. + .word 976270039 # vslideup.vi v5, v3, 1 + .word 977318743 # vslideup.vi v6, v4, 1 + .word 1043378647 # vslidedown.vi v3, v3, 1 + .word 1044427351 # vslidedown.vi v4, v4, 1 + + .word 1577103447 # vmv.v.i v0, 1 + # v2 += (a0b1)h + .word 740393303 # vxor.vv v2, v2, v3, v0.t + # v2 += (a1b1)l + .word 740426071 # vxor.vv v2, v2, v4, v0.t + + .word 1577136215 # vmv.v.i v0, 2 + # v1 += (a0b0)h,0 + .word 739410135 # vxor.vv v1, v1, v5, v0.t + # v1 += (a1b0)l,0 + .word 739442903 # vxor.vv v1, v1, v6, v0.t + + # Now the 256bit product should be stored in (v2,v1) + # v1 = (a0b1)l + (a0b0)h + (a1b0)l, (a0b0)l + # v2 = (a1b1)h, (a1b0)h + (a0b1)h + (a1b1)l + + # Reduction + # Let C := A*B = c3,c2,c1,c0 = v2[1],v2[0],v1[1],v1[0] + # This is a slight variation of the Gueron's Montgomery reduction. + # The difference being the order of some operations has been changed, + # to make a better use of vclmul(h) instructions. + + # First step: + # c1 += (c0 * P)l + # vmv.v.i v0, 2 + .word 940618199 # vslideup.vi v3, v1, 1, v0.t + .word 809394647 # vclmul.vx v3, v3, t3, v0.t + .word 739344599 # vxor.vv v1, v1, v3, v0.t + + # Second step: + # D = d1,d0 is final result + # We want: + # m1 = c1 + (c1 * P)h + # m0 = (c1 * P)l + (c0 * P)h + c0 + # d1 = c3 + m1 + # d0 = c2 + m0 + + #v3 = (c1 * P)l, 0 + .word 807297495 # vclmul.vx v3, v1, t3, v0.t + #v4 = (c1 * P)h, (c0 * P)h + .word 907960919 # vclmulh.vx v4, v1, t3 + + .word 1577103447 # vmv.v.i v0, 1 + .word 1043378647 # vslidedown.vi v3, v3, 1 + + .word 772931799 # vxor.vv v1, v1, v4 + .word 739344599 # vxor.vv v1, v1, v3, v0.t + + # XOR in the upper upper part of the product + .word 773882199 # vxor.vv v2, v2, v1 + + .word 1243914967 # vrev8.v v2, v2 + + bnez a3, Lstep + + .word 198537895 # vsse64.v v2, (a0), t4 + ret +.size gcm_ghash_rv64i_zvkb_zvbc,.-gcm_ghash_rv64i_zvkb_zvbc +.p2align 4 +Lpolymod: + .dword 0x0000000000000001 + .dword 0xc200000000000000 +.size Lpolymod,.-Lpolymod diff --git a/contrib/openssl-cmake/asm/crypto/modes/ghash-riscv64-zvkg.S b/contrib/openssl-cmake/asm/crypto/modes/ghash-riscv64-zvkg.S new file mode 100644 index 000000000000..759c7c9c9e4d --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/modes/ghash-riscv64-zvkg.S @@ -0,0 +1,81 @@ +.text +.p2align 3 +.globl gcm_init_rv64i_zvkg +.type gcm_init_rv64i_zvkg,@function +gcm_init_rv64i_zvkg: + ld a2, 0(a1) + ld a3, 8(a1) + sb a2, 7(a0) + srli t0, a2, 8 + sb t0, 6(a0) + srli t0, a2, 16 + sb t0, 5(a0) + srli t0, a2, 24 + sb t0, 4(a0) + srli t0, a2, 32 + sb t0, 3(a0) + srli t0, a2, 40 + sb t0, 2(a0) + srli t0, a2, 48 + sb t0, 1(a0) + srli t0, a2, 56 + sb t0, 0(a0) + + sb a3, 15(a0) + srli t0, a3, 8 + sb t0, 14(a0) + srli t0, a3, 16 + sb t0, 13(a0) + srli t0, a3, 24 + sb t0, 12(a0) + srli t0, a3, 32 + sb t0, 11(a0) + srli t0, a3, 40 + sb t0, 10(a0) + srli t0, a3, 48 + sb t0, 9(a0) + srli t0, a3, 56 + sb t0, 8(a0) + + ret +.size gcm_init_rv64i_zvkg,.-gcm_init_rv64i_zvkg +.p2align 3 +.globl gcm_init_rv64i_zvkg_zvkb +.type gcm_init_rv64i_zvkg_zvkb,@function +gcm_init_rv64i_zvkg_zvkb: + .word 0xc1817057 # vsetivli x0, 2, e64, m1, ta, ma + .word 33943559 # vle64.v v0, (a1) + .word 1241817175 # vrev8.v v0, v0 + .word 33910823 # vse64.v v0, (a0) + ret +.size gcm_init_rv64i_zvkg_zvkb,.-gcm_init_rv64i_zvkg_zvkb +.p2align 3 +.globl gcm_gmult_rv64i_zvkg +.type gcm_gmult_rv64i_zvkg,@function +gcm_gmult_rv64i_zvkg: + .word 0xc1027057 + .word 33939719 + .word 33906823 + .word 2720571639 + .word 33906855 + ret +.size gcm_gmult_rv64i_zvkg,.-gcm_gmult_rv64i_zvkg +.p2align 3 +.globl gcm_ghash_rv64i_zvkg +.type gcm_ghash_rv64i_zvkg,@function +gcm_ghash_rv64i_zvkg: + .word 0xc1027057 + .word 33939719 + .word 33906823 + +Lstep: + .word 33972615 + add a2, a2, 16 + add a3, a3, -16 + .word 2988548343 + bnez a3, Lstep + + .word 33906855 + ret + +.size gcm_ghash_rv64i_zvkg,.-gcm_ghash_rv64i_zvkg diff --git a/contrib/openssl-cmake/asm/crypto/modes/ghash-riscv64.S b/contrib/openssl-cmake/asm/crypto/modes/ghash-riscv64.S new file mode 100644 index 000000000000..337766973685 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/modes/ghash-riscv64.S @@ -0,0 +1,613 @@ +.text +.p2align 3 +.globl gcm_init_rv64i_zbc +.type gcm_init_rv64i_zbc,@function +gcm_init_rv64i_zbc: + ld a2,0(a1) + ld a3,8(a1) + la t2, Lbrev8_const + + ld t0, 0(t2) # 0xAAAAAAAAAAAAAAAA + slli t1, a2, 1 + and t1, t1, t0 + and a2, a2, t0 + srli a2, a2, 1 + or a2, t1, a2 + + ld t0, 8(t2) # 0xCCCCCCCCCCCCCCCC + slli t1, a2, 2 + and t1, t1, t0 + and a2, a2, t0 + srli a2, a2, 2 + or a2, t1, a2 + + ld t0, 16(t2) # 0xF0F0F0F0F0F0F0F0 + slli t1, a2, 4 + and t1, t1, t0 + and a2, a2, t0 + srli a2, a2, 4 + or a2, t1, a2 + + la t2, Lbrev8_const + + ld t0, 0(t2) # 0xAAAAAAAAAAAAAAAA + slli t1, a3, 1 + and t1, t1, t0 + and a3, a3, t0 + srli a3, a3, 1 + or a3, t1, a3 + + ld t0, 8(t2) # 0xCCCCCCCCCCCCCCCC + slli t1, a3, 2 + and t1, t1, t0 + and a3, a3, t0 + srli a3, a3, 2 + or a3, t1, a3 + + ld t0, 16(t2) # 0xF0F0F0F0F0F0F0F0 + slli t1, a3, 4 + and t1, t1, t0 + and a3, a3, t0 + srli a3, a3, 4 + or a3, t1, a3 + + sb a2, 7(a0) + srli t0, a2, 8 + sb t0, 6(a0) + srli t0, a2, 16 + sb t0, 5(a0) + srli t0, a2, 24 + sb t0, 4(a0) + srli t0, a2, 32 + sb t0, 3(a0) + srli t0, a2, 40 + sb t0, 2(a0) + srli t0, a2, 48 + sb t0, 1(a0) + srli t0, a2, 56 + sb t0, 0(a0) + + sb a3, 15(a0) + srli t0, a3, 8 + sb t0, 14(a0) + srli t0, a3, 16 + sb t0, 13(a0) + srli t0, a3, 24 + sb t0, 12(a0) + srli t0, a3, 32 + sb t0, 11(a0) + srli t0, a3, 40 + sb t0, 10(a0) + srli t0, a3, 48 + sb t0, 9(a0) + srli t0, a3, 56 + sb t0, 8(a0) + + ret +.size gcm_init_rv64i_zbc,.-gcm_init_rv64i_zbc +.p2align 3 +.globl gcm_init_rv64i_zbc__zbb +.type gcm_init_rv64i_zbc__zbb,@function +gcm_init_rv64i_zbc__zbb: + ld a2,0(a1) + ld a3,8(a1) + la t2, Lbrev8_const + + ld t0, 0(t2) # 0xAAAAAAAAAAAAAAAA + slli t1, a2, 1 + and t1, t1, t0 + and a2, a2, t0 + srli a2, a2, 1 + or a2, t1, a2 + + ld t0, 8(t2) # 0xCCCCCCCCCCCCCCCC + slli t1, a2, 2 + and t1, t1, t0 + and a2, a2, t0 + srli a2, a2, 2 + or a2, t1, a2 + + ld t0, 16(t2) # 0xF0F0F0F0F0F0F0F0 + slli t1, a2, 4 + and t1, t1, t0 + and a2, a2, t0 + srli a2, a2, 4 + or a2, t1, a2 + + la t2, Lbrev8_const + + ld t0, 0(t2) # 0xAAAAAAAAAAAAAAAA + slli t1, a3, 1 + and t1, t1, t0 + and a3, a3, t0 + srli a3, a3, 1 + or a3, t1, a3 + + ld t0, 8(t2) # 0xCCCCCCCCCCCCCCCC + slli t1, a3, 2 + and t1, t1, t0 + and a3, a3, t0 + srli a3, a3, 2 + or a3, t1, a3 + + ld t0, 16(t2) # 0xF0F0F0F0F0F0F0F0 + slli t1, a3, 4 + and t1, t1, t0 + and a3, a3, t0 + srli a3, a3, 4 + or a3, t1, a3 + + .word 1803965971 + .word 1803998867 + sd a2,0(a0) + sd a3,8(a0) + ret +.size gcm_init_rv64i_zbc__zbb,.-gcm_init_rv64i_zbc__zbb +.p2align 3 +.globl gcm_init_rv64i_zbc__zbkb +.type gcm_init_rv64i_zbc__zbkb,@function +gcm_init_rv64i_zbc__zbkb: + ld t0,0(a1) + ld t1,8(a1) + .word 1752355475 + .word 1752388371 + .word 1803735699 + .word 1803768595 + sd t0,0(a0) + sd t1,8(a0) + ret +.size gcm_init_rv64i_zbc__zbkb,.-gcm_init_rv64i_zbc__zbkb +.p2align 3 +.globl gcm_gmult_rv64i_zbc +.type gcm_gmult_rv64i_zbc,@function +gcm_gmult_rv64i_zbc: + # Load Xi and bit-reverse it + ld a4, 0(a0) + ld a5, 8(a0) + la t2, Lbrev8_const + + ld t0, 0(t2) # 0xAAAAAAAAAAAAAAAA + slli t1, a4, 1 + and t1, t1, t0 + and a4, a4, t0 + srli a4, a4, 1 + or a4, t1, a4 + + ld t0, 8(t2) # 0xCCCCCCCCCCCCCCCC + slli t1, a4, 2 + and t1, t1, t0 + and a4, a4, t0 + srli a4, a4, 2 + or a4, t1, a4 + + ld t0, 16(t2) # 0xF0F0F0F0F0F0F0F0 + slli t1, a4, 4 + and t1, t1, t0 + and a4, a4, t0 + srli a4, a4, 4 + or a4, t1, a4 + + la t2, Lbrev8_const + + ld t0, 0(t2) # 0xAAAAAAAAAAAAAAAA + slli t1, a5, 1 + and t1, t1, t0 + and a5, a5, t0 + srli a5, a5, 1 + or a5, t1, a5 + + ld t0, 8(t2) # 0xCCCCCCCCCCCCCCCC + slli t1, a5, 2 + and t1, t1, t0 + and a5, a5, t0 + srli a5, a5, 2 + or a5, t1, a5 + + ld t0, 16(t2) # 0xF0F0F0F0F0F0F0F0 + slli t1, a5, 4 + and t1, t1, t0 + and a5, a5, t0 + srli a5, a5, 4 + or a5, t1, a5 + + + # Load the key (already bit-reversed) + ld a6, 0(a1) + ld a7, 8(a1) + + # Load the reduction constant + la t6, Lpolymod + lbu t6, 0(t6) + + # Multiplication (without Karatsuba) + .word 186105395 + .word 186094515 + .word 186072883 + .word 186061619 + xor t2, t2, t5 + .word 185057075 + .word 185048755 + xor t2, t2, t5 + xor t1, t1, t4 + .word 185024307 + .word 185012915 + xor t1, t1, t5 + + # Reduction with clmul + .word 201211699 + .word 201203379 + xor t2, t2, t5 + xor t1, t1, t4 + .word 200523571 + .word 200515251 + xor a5, t1, t5 + xor a4, t0, t4 + + # Bit-reverse Xi back and store it + la t2, Lbrev8_const + + ld t0, 0(t2) # 0xAAAAAAAAAAAAAAAA + slli t1, a4, 1 + and t1, t1, t0 + and a4, a4, t0 + srli a4, a4, 1 + or a4, t1, a4 + + ld t0, 8(t2) # 0xCCCCCCCCCCCCCCCC + slli t1, a4, 2 + and t1, t1, t0 + and a4, a4, t0 + srli a4, a4, 2 + or a4, t1, a4 + + ld t0, 16(t2) # 0xF0F0F0F0F0F0F0F0 + slli t1, a4, 4 + and t1, t1, t0 + and a4, a4, t0 + srli a4, a4, 4 + or a4, t1, a4 + + la t2, Lbrev8_const + + ld t0, 0(t2) # 0xAAAAAAAAAAAAAAAA + slli t1, a5, 1 + and t1, t1, t0 + and a5, a5, t0 + srli a5, a5, 1 + or a5, t1, a5 + + ld t0, 8(t2) # 0xCCCCCCCCCCCCCCCC + slli t1, a5, 2 + and t1, t1, t0 + and a5, a5, t0 + srli a5, a5, 2 + or a5, t1, a5 + + ld t0, 16(t2) # 0xF0F0F0F0F0F0F0F0 + slli t1, a5, 4 + and t1, t1, t0 + and a5, a5, t0 + srli a5, a5, 4 + or a5, t1, a5 + + sd a4, 0(a0) + sd a5, 8(a0) + ret +.size gcm_gmult_rv64i_zbc,.-gcm_gmult_rv64i_zbc +.p2align 3 +.globl gcm_gmult_rv64i_zbc__zbkb +.type gcm_gmult_rv64i_zbc__zbkb,@function +gcm_gmult_rv64i_zbc__zbkb: + # Load Xi and bit-reverse it + ld a4, 0(a0) + ld a5, 8(a0) + .word 1752651539 + .word 1752684435 + + # Load the key (already bit-reversed) + ld a6, 0(a1) + ld a7, 8(a1) + + # Load the reduction constant + la t6, Lpolymod + lbu t6, 0(t6) + + # Multiplication (without Karatsuba) + .word 186105395 + .word 186094515 + .word 186072883 + .word 186061619 + xor t2, t2, t5 + .word 185057075 + .word 185048755 + xor t2, t2, t5 + xor t1, t1, t4 + .word 185024307 + .word 185012915 + xor t1, t1, t5 + + # Reduction with clmul + .word 201211699 + .word 201203379 + xor t2, t2, t5 + xor t1, t1, t4 + .word 200523571 + .word 200515251 + xor a5, t1, t5 + xor a4, t0, t4 + + # Bit-reverse Xi back and store it + .word 1752651539 + .word 1752684435 + sd a4, 0(a0) + sd a5, 8(a0) + ret +.size gcm_gmult_rv64i_zbc__zbkb,.-gcm_gmult_rv64i_zbc__zbkb +.p2align 3 +.globl gcm_ghash_rv64i_zbc +.type gcm_ghash_rv64i_zbc,@function +gcm_ghash_rv64i_zbc: + # Load Xi and bit-reverse it + ld a4, 0(a0) + ld a5, 8(a0) + la t2, Lbrev8_const + + ld t0, 0(t2) # 0xAAAAAAAAAAAAAAAA + slli t1, a4, 1 + and t1, t1, t0 + and a4, a4, t0 + srli a4, a4, 1 + or a4, t1, a4 + + ld t0, 8(t2) # 0xCCCCCCCCCCCCCCCC + slli t1, a4, 2 + and t1, t1, t0 + and a4, a4, t0 + srli a4, a4, 2 + or a4, t1, a4 + + ld t0, 16(t2) # 0xF0F0F0F0F0F0F0F0 + slli t1, a4, 4 + and t1, t1, t0 + and a4, a4, t0 + srli a4, a4, 4 + or a4, t1, a4 + + la t2, Lbrev8_const + + ld t0, 0(t2) # 0xAAAAAAAAAAAAAAAA + slli t1, a5, 1 + and t1, t1, t0 + and a5, a5, t0 + srli a5, a5, 1 + or a5, t1, a5 + + ld t0, 8(t2) # 0xCCCCCCCCCCCCCCCC + slli t1, a5, 2 + and t1, t1, t0 + and a5, a5, t0 + srli a5, a5, 2 + or a5, t1, a5 + + ld t0, 16(t2) # 0xF0F0F0F0F0F0F0F0 + slli t1, a5, 4 + and t1, t1, t0 + and a5, a5, t0 + srli a5, a5, 4 + or a5, t1, a5 + + + # Load the key (already bit-reversed) + ld a6, 0(a1) + ld a7, 8(a1) + + # Load the reduction constant + la t6, Lpolymod + lbu t6, 0(t6) + +Lstep: + # Load the input data, bit-reverse them, and XOR them with Xi + ld t4, 0(a2) + ld t5, 8(a2) + add a2, a2, 16 + add a3, a3, -16 + la t2, Lbrev8_const + + ld t0, 0(t2) # 0xAAAAAAAAAAAAAAAA + slli t1, t4, 1 + and t1, t1, t0 + and t4, t4, t0 + srli t4, t4, 1 + or t4, t1, t4 + + ld t0, 8(t2) # 0xCCCCCCCCCCCCCCCC + slli t1, t4, 2 + and t1, t1, t0 + and t4, t4, t0 + srli t4, t4, 2 + or t4, t1, t4 + + ld t0, 16(t2) # 0xF0F0F0F0F0F0F0F0 + slli t1, t4, 4 + and t1, t1, t0 + and t4, t4, t0 + srli t4, t4, 4 + or t4, t1, t4 + + la t2, Lbrev8_const + + ld t0, 0(t2) # 0xAAAAAAAAAAAAAAAA + slli t1, t5, 1 + and t1, t1, t0 + and t5, t5, t0 + srli t5, t5, 1 + or t5, t1, t5 + + ld t0, 8(t2) # 0xCCCCCCCCCCCCCCCC + slli t1, t5, 2 + and t1, t1, t0 + and t5, t5, t0 + srli t5, t5, 2 + or t5, t1, t5 + + ld t0, 16(t2) # 0xF0F0F0F0F0F0F0F0 + slli t1, t5, 4 + and t1, t1, t0 + and t5, t5, t0 + srli t5, t5, 4 + or t5, t1, t5 + + xor a4, a4, t4 + xor a5, a5, t5 + + # Multiplication (without Karatsuba) + .word 186105395 + .word 186094515 + .word 186072883 + .word 186061619 + xor t2, t2, t5 + .word 185057075 + .word 185048755 + xor t2, t2, t5 + xor t1, t1, t4 + .word 185024307 + .word 185012915 + xor t1, t1, t5 + + # Reduction with clmul + .word 201211699 + .word 201203379 + xor t2, t2, t5 + xor t1, t1, t4 + .word 200523571 + .word 200515251 + xor a5, t1, t5 + xor a4, t0, t4 + + # Iterate over all blocks + bnez a3, Lstep + + # Bit-reverse final Xi back and store it + la t2, Lbrev8_const + + ld t0, 0(t2) # 0xAAAAAAAAAAAAAAAA + slli t1, a4, 1 + and t1, t1, t0 + and a4, a4, t0 + srli a4, a4, 1 + or a4, t1, a4 + + ld t0, 8(t2) # 0xCCCCCCCCCCCCCCCC + slli t1, a4, 2 + and t1, t1, t0 + and a4, a4, t0 + srli a4, a4, 2 + or a4, t1, a4 + + ld t0, 16(t2) # 0xF0F0F0F0F0F0F0F0 + slli t1, a4, 4 + and t1, t1, t0 + and a4, a4, t0 + srli a4, a4, 4 + or a4, t1, a4 + + la t2, Lbrev8_const + + ld t0, 0(t2) # 0xAAAAAAAAAAAAAAAA + slli t1, a5, 1 + and t1, t1, t0 + and a5, a5, t0 + srli a5, a5, 1 + or a5, t1, a5 + + ld t0, 8(t2) # 0xCCCCCCCCCCCCCCCC + slli t1, a5, 2 + and t1, t1, t0 + and a5, a5, t0 + srli a5, a5, 2 + or a5, t1, a5 + + ld t0, 16(t2) # 0xF0F0F0F0F0F0F0F0 + slli t1, a5, 4 + and t1, t1, t0 + and a5, a5, t0 + srli a5, a5, 4 + or a5, t1, a5 + + sd a4, 0(a0) + sd a5, 8(a0) + ret +.size gcm_ghash_rv64i_zbc,.-gcm_ghash_rv64i_zbc +.p2align 3 +.globl gcm_ghash_rv64i_zbc__zbkb +.type gcm_ghash_rv64i_zbc__zbkb,@function +gcm_ghash_rv64i_zbc__zbkb: + # Load Xi and bit-reverse it + ld a4, 0(a0) + ld a5, 8(a0) + .word 1752651539 + .word 1752684435 + + # Load the key (already bit-reversed) + ld a6, 0(a1) + ld a7, 8(a1) + + # Load the reduction constant + la t6, Lpolymod + lbu t6, 0(t6) + +Lstep_zkbk: + # Load the input data, bit-reverse them, and XOR them with Xi + ld t4, 0(a2) + ld t5, 8(a2) + add a2, a2, 16 + add a3, a3, -16 + .word 1753144979 + .word 1753177875 + xor a4, a4, t4 + xor a5, a5, t5 + + # Multiplication (without Karatsuba) + .word 186105395 + .word 186094515 + .word 186072883 + .word 186061619 + xor t2, t2, t5 + .word 185057075 + .word 185048755 + xor t2, t2, t5 + xor t1, t1, t4 + .word 185024307 + .word 185012915 + xor t1, t1, t5 + + # Reduction with clmul + .word 201211699 + .word 201203379 + xor t2, t2, t5 + xor t1, t1, t4 + .word 200523571 + .word 200515251 + xor a5, t1, t5 + xor a4, t0, t4 + + # Iterate over all blocks + bnez a3, Lstep_zkbk + + # Bit-reverse final Xi back and store it + .word 1752651539 + .word 1752684435 + sd a4, 0(a0) + sd a5, 8(a0) + ret +.size gcm_ghash_rv64i_zbc__zbkb,.-gcm_ghash_rv64i_zbc__zbkb +.p2align 3 +Lbrev8_const: + .dword 0xAAAAAAAAAAAAAAAA + .dword 0xCCCCCCCCCCCCCCCC + .dword 0xF0F0F0F0F0F0F0F0 +.size Lbrev8_const,.-Lbrev8_const + +Lpolymod: + .byte 0x87 +.size Lpolymod,.-Lpolymod diff --git a/contrib/openssl-cmake/asm/crypto/modes/ghash-x86_64.s b/contrib/openssl-cmake/asm/crypto/modes/ghash-x86_64.s new file mode 100644 index 000000000000..63aaa4789cdb --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/modes/ghash-x86_64.s @@ -0,0 +1,1857 @@ +.text + + +.globl gcm_gmult_4bit +.type gcm_gmult_4bit,@function +.align 16 +gcm_gmult_4bit: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $280,%rsp +.cfi_adjust_cfa_offset 280 +.Lgmult_prologue: + + movzbq 15(%rdi),%r8 + leaq .Lrem_4bit(%rip),%r11 + xorq %rax,%rax + xorq %rbx,%rbx + movb %r8b,%al + movb %r8b,%bl + shlb $4,%al + movq $14,%rcx + movq 8(%rsi,%rax,1),%r8 + movq (%rsi,%rax,1),%r9 + andb $0xf0,%bl + movq %r8,%rdx + jmp .Loop1 + +.align 16 +.Loop1: + shrq $4,%r8 + andq $0xf,%rdx + movq %r9,%r10 + movb (%rdi,%rcx,1),%al + shrq $4,%r9 + xorq 8(%rsi,%rbx,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rbx,1),%r9 + movb %al,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + shlb $4,%al + xorq %r10,%r8 + decq %rcx + js .Lbreak1 + + shrq $4,%r8 + andq $0xf,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rax,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rax,1),%r9 + andb $0xf0,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + xorq %r10,%r8 + jmp .Loop1 + +.align 16 +.Lbreak1: + shrq $4,%r8 + andq $0xf,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rax,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rax,1),%r9 + andb $0xf0,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + xorq %r10,%r8 + + shrq $4,%r8 + andq $0xf,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rbx,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rbx,1),%r9 + xorq %r10,%r8 + xorq (%r11,%rdx,8),%r9 + + bswapq %r8 + bswapq %r9 + movq %r8,8(%rdi) + movq %r9,(%rdi) + + leaq 280+48(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lgmult_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size gcm_gmult_4bit,.-gcm_gmult_4bit +.globl gcm_ghash_4bit +.type gcm_ghash_4bit,@function +.align 16 +gcm_ghash_4bit: +.cfi_startproc +.byte 243,15,30,250 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $280,%rsp +.cfi_adjust_cfa_offset 280 +.Lghash_prologue: + movq %rdx,%r14 + movq %rcx,%r15 + subq $-128,%rsi + leaq 16+128(%rsp),%rbp + xorl %edx,%edx + movq 0+0-128(%rsi),%r8 + movq 0+8-128(%rsi),%rax + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq 16+0-128(%rsi),%r9 + shlb $4,%dl + movq 16+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,0(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,0(%rbp) + movq 32+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,0-128(%rbp) + movq 32+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,1(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,8(%rbp) + movq 48+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,8-128(%rbp) + movq 48+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,2(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,16(%rbp) + movq 64+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,16-128(%rbp) + movq 64+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,3(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,24(%rbp) + movq 80+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,24-128(%rbp) + movq 80+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,4(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,32(%rbp) + movq 96+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,32-128(%rbp) + movq 96+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,5(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,40(%rbp) + movq 112+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,40-128(%rbp) + movq 112+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,6(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,48(%rbp) + movq 128+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,48-128(%rbp) + movq 128+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,7(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,56(%rbp) + movq 144+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,56-128(%rbp) + movq 144+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,8(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,64(%rbp) + movq 160+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,64-128(%rbp) + movq 160+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,9(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,72(%rbp) + movq 176+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,72-128(%rbp) + movq 176+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,10(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,80(%rbp) + movq 192+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,80-128(%rbp) + movq 192+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,11(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,88(%rbp) + movq 208+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,88-128(%rbp) + movq 208+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,12(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,96(%rbp) + movq 224+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,96-128(%rbp) + movq 224+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,13(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,104(%rbp) + movq 240+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,104-128(%rbp) + movq 240+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,14(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,112(%rbp) + shlb $4,%dl + movq %rax,112-128(%rbp) + shlq $60,%r10 + movb %dl,15(%rsp) + orq %r10,%rbx + movq %r9,120(%rbp) + movq %rbx,120-128(%rbp) + addq $-128,%rsi + movq 8(%rdi),%r8 + movq 0(%rdi),%r9 + addq %r14,%r15 + leaq .Lrem_8bit(%rip),%r11 + jmp .Louter_loop +.align 16 +.Louter_loop: + xorq (%r14),%r9 + movq 8(%r14),%rdx + leaq 16(%r14),%r14 + xorq %r8,%rdx + movq %r9,(%rdi) + movq %rdx,8(%rdi) + shrq $32,%rdx + xorq %rax,%rax + roll $8,%edx + movb %dl,%al + movzbl %dl,%ebx + shlb $4,%al + shrl $4,%ebx + roll $8,%edx + movq 8(%rsi,%rax,1),%r8 + movq (%rsi,%rax,1),%r9 + movb %dl,%al + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + xorq %r8,%r12 + movq %r9,%r10 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 8(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 4(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 0(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + andl $240,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl -4(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + movzwq (%r11,%r12,2),%r12 + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + shlq $48,%r12 + xorq %r10,%r8 + xorq %r12,%r9 + movzbq %r8b,%r13 + shrq $4,%r8 + movq %r9,%r10 + shlb $4,%r13b + shrq $4,%r9 + xorq 8(%rsi,%rcx,1),%r8 + movzwq (%r11,%r13,2),%r13 + shlq $60,%r10 + xorq (%rsi,%rcx,1),%r9 + xorq %r10,%r8 + shlq $48,%r13 + bswapq %r8 + xorq %r13,%r9 + bswapq %r9 + cmpq %r15,%r14 + jb .Louter_loop + movq %r8,8(%rdi) + movq %r9,(%rdi) + + leaq 280+48(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq 0(%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lghash_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size gcm_ghash_4bit,.-gcm_ghash_4bit +.globl gcm_init_clmul +.type gcm_init_clmul,@function +.align 16 +gcm_init_clmul: +.cfi_startproc +.byte 243,15,30,250 +.L_init_clmul: + movdqu (%rsi),%xmm2 + pshufd $78,%xmm2,%xmm2 + + + pshufd $255,%xmm2,%xmm4 + movdqa %xmm2,%xmm3 + psllq $1,%xmm2 + pxor %xmm5,%xmm5 + psrlq $63,%xmm3 + pcmpgtd %xmm4,%xmm5 + pslldq $8,%xmm3 + por %xmm3,%xmm2 + + + pand .L0x1c2_polynomial(%rip),%xmm5 + pxor %xmm5,%xmm2 + + + pshufd $78,%xmm2,%xmm6 + movdqa %xmm2,%xmm0 + pxor %xmm2,%xmm6 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm2,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm2,%xmm3 + movdqu %xmm2,0(%rdi) + pxor %xmm0,%xmm4 + movdqu %xmm0,16(%rdi) +.byte 102,15,58,15,227,8 + movdqu %xmm4,32(%rdi) + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + movdqa %xmm0,%xmm5 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm5,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm5,%xmm3 + movdqu %xmm5,48(%rdi) + pxor %xmm0,%xmm4 + movdqu %xmm0,64(%rdi) +.byte 102,15,58,15,227,8 + movdqu %xmm4,80(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc +.size gcm_init_clmul,.-gcm_init_clmul +.globl gcm_gmult_clmul +.type gcm_gmult_clmul,@function +.align 16 +gcm_gmult_clmul: +.cfi_startproc +.byte 243,15,30,250 +.L_gmult_clmul: + movdqu (%rdi),%xmm0 + movdqa .Lbswap_mask(%rip),%xmm5 + movdqu (%rsi),%xmm2 + movdqu 32(%rsi),%xmm4 +.byte 102,15,56,0,197 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,197 + movdqu %xmm0,(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc +.size gcm_gmult_clmul,.-gcm_gmult_clmul +.globl gcm_ghash_clmul +.type gcm_ghash_clmul,@function +.align 32 +gcm_ghash_clmul: +.cfi_startproc +.byte 243,15,30,250 +.L_ghash_clmul: + movdqa .Lbswap_mask(%rip),%xmm10 + + movdqu (%rdi),%xmm0 + movdqu (%rsi),%xmm2 + movdqu 32(%rsi),%xmm7 +.byte 102,65,15,56,0,194 + + subq $0x10,%rcx + jz .Lodd_tail + + movdqu 16(%rsi),%xmm6 + movl OPENSSL_ia32cap_P+4(%rip),%eax + cmpq $0x30,%rcx + jb .Lskip4x + + andl $71303168,%eax + cmpl $4194304,%eax + je .Lskip4x + + subq $0x30,%rcx + movq $0xA040608020C0E000,%rax + movdqu 48(%rsi),%xmm14 + movdqu 64(%rsi),%xmm15 + + + + + movdqu 48(%rdx),%xmm3 + movdqu 32(%rdx),%xmm11 +.byte 102,65,15,56,0,218 +.byte 102,69,15,56,0,218 + movdqa %xmm3,%xmm5 + pshufd $78,%xmm3,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,68,218,0 +.byte 102,15,58,68,234,17 +.byte 102,15,58,68,231,0 + + movdqa %xmm11,%xmm13 + pshufd $78,%xmm11,%xmm12 + pxor %xmm11,%xmm12 +.byte 102,68,15,58,68,222,0 +.byte 102,68,15,58,68,238,17 +.byte 102,68,15,58,68,231,16 + xorps %xmm11,%xmm3 + xorps %xmm13,%xmm5 + movups 80(%rsi),%xmm7 + xorps %xmm12,%xmm4 + + movdqu 16(%rdx),%xmm11 + movdqu 0(%rdx),%xmm8 +.byte 102,69,15,56,0,218 +.byte 102,69,15,56,0,194 + movdqa %xmm11,%xmm13 + pshufd $78,%xmm11,%xmm12 + pxor %xmm8,%xmm0 + pxor %xmm11,%xmm12 +.byte 102,69,15,58,68,222,0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm8 + pxor %xmm0,%xmm8 +.byte 102,69,15,58,68,238,17 +.byte 102,68,15,58,68,231,0 + xorps %xmm11,%xmm3 + xorps %xmm13,%xmm5 + + leaq 64(%rdx),%rdx + subq $0x40,%rcx + jc .Ltail4x + + jmp .Lmod4_loop +.align 32 +.Lmod4_loop: +.byte 102,65,15,58,68,199,0 + xorps %xmm12,%xmm4 + movdqu 48(%rdx),%xmm11 +.byte 102,69,15,56,0,218 +.byte 102,65,15,58,68,207,17 + xorps %xmm3,%xmm0 + movdqu 32(%rdx),%xmm3 + movdqa %xmm11,%xmm13 +.byte 102,68,15,58,68,199,16 + pshufd $78,%xmm11,%xmm12 + xorps %xmm5,%xmm1 + pxor %xmm11,%xmm12 +.byte 102,65,15,56,0,218 + movups 32(%rsi),%xmm7 + xorps %xmm4,%xmm8 +.byte 102,68,15,58,68,218,0 + pshufd $78,%xmm3,%xmm4 + + pxor %xmm0,%xmm8 + movdqa %xmm3,%xmm5 + pxor %xmm1,%xmm8 + pxor %xmm3,%xmm4 + movdqa %xmm8,%xmm9 +.byte 102,68,15,58,68,234,17 + pslldq $8,%xmm8 + psrldq $8,%xmm9 + pxor %xmm8,%xmm0 + movdqa .L7_mask(%rip),%xmm8 + pxor %xmm9,%xmm1 +.byte 102,76,15,110,200 + + pand %xmm0,%xmm8 +.byte 102,69,15,56,0,200 + pxor %xmm0,%xmm9 +.byte 102,68,15,58,68,231,0 + psllq $57,%xmm9 + movdqa %xmm9,%xmm8 + pslldq $8,%xmm9 +.byte 102,15,58,68,222,0 + psrldq $8,%xmm8 + pxor %xmm9,%xmm0 + pxor %xmm8,%xmm1 + movdqu 0(%rdx),%xmm8 + + movdqa %xmm0,%xmm9 + psrlq $1,%xmm0 +.byte 102,15,58,68,238,17 + xorps %xmm11,%xmm3 + movdqu 16(%rdx),%xmm11 +.byte 102,69,15,56,0,218 +.byte 102,15,58,68,231,16 + xorps %xmm13,%xmm5 + movups 80(%rsi),%xmm7 +.byte 102,69,15,56,0,194 + pxor %xmm9,%xmm1 + pxor %xmm0,%xmm9 + psrlq $5,%xmm0 + + movdqa %xmm11,%xmm13 + pxor %xmm12,%xmm4 + pshufd $78,%xmm11,%xmm12 + pxor %xmm9,%xmm0 + pxor %xmm8,%xmm1 + pxor %xmm11,%xmm12 +.byte 102,69,15,58,68,222,0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + movdqa %xmm0,%xmm1 +.byte 102,69,15,58,68,238,17 + xorps %xmm11,%xmm3 + pshufd $78,%xmm0,%xmm8 + pxor %xmm0,%xmm8 + +.byte 102,68,15,58,68,231,0 + xorps %xmm13,%xmm5 + + leaq 64(%rdx),%rdx + subq $0x40,%rcx + jnc .Lmod4_loop + +.Ltail4x: +.byte 102,65,15,58,68,199,0 +.byte 102,65,15,58,68,207,17 +.byte 102,68,15,58,68,199,16 + xorps %xmm12,%xmm4 + xorps %xmm3,%xmm0 + xorps %xmm5,%xmm1 + pxor %xmm0,%xmm1 + pxor %xmm4,%xmm8 + + pxor %xmm1,%xmm8 + pxor %xmm0,%xmm1 + + movdqa %xmm8,%xmm9 + psrldq $8,%xmm8 + pslldq $8,%xmm9 + pxor %xmm8,%xmm1 + pxor %xmm9,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + addq $0x40,%rcx + jz .Ldone + movdqu 32(%rsi),%xmm7 + subq $0x10,%rcx + jz .Lodd_tail +.Lskip4x: + + + + + + movdqu (%rdx),%xmm8 + movdqu 16(%rdx),%xmm3 +.byte 102,69,15,56,0,194 +.byte 102,65,15,56,0,218 + pxor %xmm8,%xmm0 + + movdqa %xmm3,%xmm5 + pshufd $78,%xmm3,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,68,218,0 +.byte 102,15,58,68,234,17 +.byte 102,15,58,68,231,0 + + leaq 32(%rdx),%rdx + nop + subq $0x20,%rcx + jbe .Leven_tail + nop + jmp .Lmod_loop + +.align 32 +.Lmod_loop: + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm8 + pshufd $78,%xmm0,%xmm4 + pxor %xmm0,%xmm4 + +.byte 102,15,58,68,198,0 +.byte 102,15,58,68,206,17 +.byte 102,15,58,68,231,16 + + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm1 + movdqu (%rdx),%xmm9 + pxor %xmm0,%xmm8 +.byte 102,69,15,56,0,202 + movdqu 16(%rdx),%xmm3 + + pxor %xmm1,%xmm8 + pxor %xmm9,%xmm1 + pxor %xmm8,%xmm4 +.byte 102,65,15,56,0,218 + movdqa %xmm4,%xmm8 + psrldq $8,%xmm8 + pslldq $8,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm3,%xmm5 + + movdqa %xmm0,%xmm9 + movdqa %xmm0,%xmm8 + psllq $5,%xmm0 + pxor %xmm0,%xmm8 +.byte 102,15,58,68,218,0 + psllq $1,%xmm0 + pxor %xmm8,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm8 + pslldq $8,%xmm0 + psrldq $8,%xmm8 + pxor %xmm9,%xmm0 + pshufd $78,%xmm5,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm5,%xmm4 + + movdqa %xmm0,%xmm9 + psrlq $1,%xmm0 +.byte 102,15,58,68,234,17 + pxor %xmm9,%xmm1 + pxor %xmm0,%xmm9 + psrlq $5,%xmm0 + pxor %xmm9,%xmm0 + leaq 32(%rdx),%rdx + psrlq $1,%xmm0 +.byte 102,15,58,68,231,0 + pxor %xmm1,%xmm0 + + subq $0x20,%rcx + ja .Lmod_loop + +.Leven_tail: + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm8 + pshufd $78,%xmm0,%xmm4 + pxor %xmm0,%xmm4 + +.byte 102,15,58,68,198,0 +.byte 102,15,58,68,206,17 +.byte 102,15,58,68,231,16 + + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm1 + pxor %xmm0,%xmm8 + pxor %xmm1,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm8 + psrldq $8,%xmm8 + pslldq $8,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + testq %rcx,%rcx + jnz .Ldone + +.Lodd_tail: + movdqu (%rdx),%xmm8 +.byte 102,69,15,56,0,194 + pxor %xmm8,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,223,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.Ldone: +.byte 102,65,15,56,0,194 + movdqu %xmm0,(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc +.size gcm_ghash_clmul,.-gcm_ghash_clmul +.globl gcm_init_avx +.type gcm_init_avx,@function +.align 32 +gcm_init_avx: +.cfi_startproc +.byte 243,15,30,250 + vzeroupper + + vmovdqu (%rsi),%xmm2 + vpshufd $78,%xmm2,%xmm2 + + + vpshufd $255,%xmm2,%xmm4 + vpsrlq $63,%xmm2,%xmm3 + vpsllq $1,%xmm2,%xmm2 + vpxor %xmm5,%xmm5,%xmm5 + vpcmpgtd %xmm4,%xmm5,%xmm5 + vpslldq $8,%xmm3,%xmm3 + vpor %xmm3,%xmm2,%xmm2 + + + vpand .L0x1c2_polynomial(%rip),%xmm5,%xmm5 + vpxor %xmm5,%xmm2,%xmm2 + + vpunpckhqdq %xmm2,%xmm2,%xmm6 + vmovdqa %xmm2,%xmm0 + vpxor %xmm2,%xmm6,%xmm6 + movq $4,%r10 + jmp .Linit_start_avx +.align 32 +.Linit_loop_avx: + vpalignr $8,%xmm3,%xmm4,%xmm5 + vmovdqu %xmm5,-16(%rdi) + vpunpckhqdq %xmm0,%xmm0,%xmm3 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 + vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 + vpxor %xmm0,%xmm1,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + + vpslldq $8,%xmm3,%xmm4 + vpsrldq $8,%xmm3,%xmm3 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm3,%xmm1,%xmm1 + vpsllq $57,%xmm0,%xmm3 + vpsllq $62,%xmm0,%xmm4 + vpxor %xmm3,%xmm4,%xmm4 + vpsllq $63,%xmm0,%xmm3 + vpxor %xmm3,%xmm4,%xmm4 + vpslldq $8,%xmm4,%xmm3 + vpsrldq $8,%xmm4,%xmm4 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm4,%xmm1,%xmm1 + + vpsrlq $1,%xmm0,%xmm4 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $5,%xmm4,%xmm4 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $1,%xmm0,%xmm0 + vpxor %xmm1,%xmm0,%xmm0 +.Linit_start_avx: + vmovdqa %xmm0,%xmm5 + vpunpckhqdq %xmm0,%xmm0,%xmm3 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 + vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 + vpxor %xmm0,%xmm1,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + + vpslldq $8,%xmm3,%xmm4 + vpsrldq $8,%xmm3,%xmm3 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm3,%xmm1,%xmm1 + vpsllq $57,%xmm0,%xmm3 + vpsllq $62,%xmm0,%xmm4 + vpxor %xmm3,%xmm4,%xmm4 + vpsllq $63,%xmm0,%xmm3 + vpxor %xmm3,%xmm4,%xmm4 + vpslldq $8,%xmm4,%xmm3 + vpsrldq $8,%xmm4,%xmm4 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm4,%xmm1,%xmm1 + + vpsrlq $1,%xmm0,%xmm4 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $5,%xmm4,%xmm4 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $1,%xmm0,%xmm0 + vpxor %xmm1,%xmm0,%xmm0 + vpshufd $78,%xmm5,%xmm3 + vpshufd $78,%xmm0,%xmm4 + vpxor %xmm5,%xmm3,%xmm3 + vmovdqu %xmm5,0(%rdi) + vpxor %xmm0,%xmm4,%xmm4 + vmovdqu %xmm0,16(%rdi) + leaq 48(%rdi),%rdi + subq $1,%r10 + jnz .Linit_loop_avx + + vpalignr $8,%xmm4,%xmm3,%xmm5 + vmovdqu %xmm5,-16(%rdi) + + vzeroupper + .byte 0xf3,0xc3 +.cfi_endproc +.size gcm_init_avx,.-gcm_init_avx +.globl gcm_gmult_avx +.type gcm_gmult_avx,@function +.align 32 +gcm_gmult_avx: +.cfi_startproc +.byte 243,15,30,250 + jmp .L_gmult_clmul +.cfi_endproc +.size gcm_gmult_avx,.-gcm_gmult_avx +.globl gcm_ghash_avx +.type gcm_ghash_avx,@function +.align 32 +gcm_ghash_avx: +.cfi_startproc +.byte 243,15,30,250 + vzeroupper + + vmovdqu (%rdi),%xmm10 + leaq .L0x1c2_polynomial(%rip),%r10 + leaq 64(%rsi),%rsi + vmovdqu .Lbswap_mask(%rip),%xmm13 + vpshufb %xmm13,%xmm10,%xmm10 + cmpq $0x80,%rcx + jb .Lshort_avx + subq $0x80,%rcx + + vmovdqu 112(%rdx),%xmm14 + vmovdqu 0-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm14 + vmovdqu 32-64(%rsi),%xmm7 + + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vmovdqu 96(%rdx),%xmm15 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm14,%xmm9,%xmm9 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 16-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vmovdqu 80(%rdx),%xmm14 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + + vpshufb %xmm13,%xmm14,%xmm14 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 48-64(%rsi),%xmm6 + vpxor %xmm14,%xmm9,%xmm9 + vmovdqu 64(%rdx),%xmm15 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 80-64(%rsi),%xmm7 + + vpshufb %xmm13,%xmm15,%xmm15 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm1,%xmm4,%xmm4 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 64-64(%rsi),%xmm6 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + + vmovdqu 48(%rdx),%xmm14 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpxor %xmm4,%xmm1,%xmm1 + vpshufb %xmm13,%xmm14,%xmm14 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 96-64(%rsi),%xmm6 + vpxor %xmm5,%xmm2,%xmm2 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 128-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + + vmovdqu 32(%rdx),%xmm15 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm1,%xmm4,%xmm4 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 112-64(%rsi),%xmm6 + vpxor %xmm2,%xmm5,%xmm5 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + + vmovdqu 16(%rdx),%xmm14 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpxor %xmm4,%xmm1,%xmm1 + vpshufb %xmm13,%xmm14,%xmm14 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 144-64(%rsi),%xmm6 + vpxor %xmm5,%xmm2,%xmm2 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 176-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + + vmovdqu (%rdx),%xmm15 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm1,%xmm4,%xmm4 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 160-64(%rsi),%xmm6 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 + + leaq 128(%rdx),%rdx + cmpq $0x80,%rcx + jb .Ltail_avx + + vpxor %xmm10,%xmm15,%xmm15 + subq $0x80,%rcx + jmp .Loop8x_avx + +.align 32 +.Loop8x_avx: + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vmovdqu 112(%rdx),%xmm14 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm15,%xmm8,%xmm8 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10 + vpshufb %xmm13,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11 + vmovdqu 0-64(%rsi),%xmm6 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12 + vmovdqu 32-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + + vmovdqu 96(%rdx),%xmm15 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm3,%xmm10,%xmm10 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vxorps %xmm4,%xmm11,%xmm11 + vmovdqu 16-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm5,%xmm12,%xmm12 + vxorps %xmm15,%xmm8,%xmm8 + + vmovdqu 80(%rdx),%xmm14 + vpxor %xmm10,%xmm12,%xmm12 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpxor %xmm11,%xmm12,%xmm12 + vpslldq $8,%xmm12,%xmm9 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vpsrldq $8,%xmm12,%xmm12 + vpxor %xmm9,%xmm10,%xmm10 + vmovdqu 48-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm14 + vxorps %xmm12,%xmm11,%xmm11 + vpxor %xmm1,%xmm4,%xmm4 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 80-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu 64(%rdx),%xmm15 + vpalignr $8,%xmm10,%xmm10,%xmm12 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpshufb %xmm13,%xmm15,%xmm15 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 64-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vxorps %xmm15,%xmm8,%xmm8 + vpxor %xmm5,%xmm2,%xmm2 + + vmovdqu 48(%rdx),%xmm14 + vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpshufb %xmm13,%xmm14,%xmm14 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 96-64(%rsi),%xmm6 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 128-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu 32(%rdx),%xmm15 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpshufb %xmm13,%xmm15,%xmm15 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 112-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + vpxor %xmm5,%xmm2,%xmm2 + vxorps %xmm12,%xmm10,%xmm10 + + vmovdqu 16(%rdx),%xmm14 + vpalignr $8,%xmm10,%xmm10,%xmm12 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpshufb %xmm13,%xmm14,%xmm14 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 144-64(%rsi),%xmm6 + vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 + vxorps %xmm11,%xmm12,%xmm12 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 176-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu (%rdx),%xmm15 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 160-64(%rsi),%xmm6 + vpxor %xmm12,%xmm15,%xmm15 + vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 + vpxor %xmm10,%xmm15,%xmm15 + + leaq 128(%rdx),%rdx + subq $0x80,%rcx + jnc .Loop8x_avx + + addq $0x80,%rcx + jmp .Ltail_no_xor_avx + +.align 32 +.Lshort_avx: + vmovdqu -16(%rdx,%rcx,1),%xmm14 + leaq (%rdx,%rcx,1),%rdx + vmovdqu 0-64(%rsi),%xmm6 + vmovdqu 32-64(%rsi),%xmm7 + vpshufb %xmm13,%xmm14,%xmm15 + + vmovdqa %xmm0,%xmm3 + vmovdqa %xmm1,%xmm4 + vmovdqa %xmm2,%xmm5 + subq $0x10,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -32(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 16-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vpsrldq $8,%xmm7,%xmm7 + subq $0x10,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -48(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 48-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vmovdqu 80-64(%rsi),%xmm7 + subq $0x10,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -64(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 64-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vpsrldq $8,%xmm7,%xmm7 + subq $0x10,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -80(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 96-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vmovdqu 128-64(%rsi),%xmm7 + subq $0x10,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -96(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 112-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vpsrldq $8,%xmm7,%xmm7 + subq $0x10,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -112(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 144-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vmovq 184-64(%rsi),%xmm7 + subq $0x10,%rcx + jmp .Ltail_avx + +.align 32 +.Ltail_avx: + vpxor %xmm10,%xmm15,%xmm15 +.Ltail_no_xor_avx: + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + + vmovdqu (%r10),%xmm12 + + vpxor %xmm0,%xmm3,%xmm10 + vpxor %xmm1,%xmm4,%xmm11 + vpxor %xmm2,%xmm5,%xmm5 + + vpxor %xmm10,%xmm5,%xmm5 + vpxor %xmm11,%xmm5,%xmm5 + vpslldq $8,%xmm5,%xmm9 + vpsrldq $8,%xmm5,%xmm5 + vpxor %xmm9,%xmm10,%xmm10 + vpxor %xmm5,%xmm11,%xmm11 + + vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 + vpalignr $8,%xmm10,%xmm10,%xmm10 + vpxor %xmm9,%xmm10,%xmm10 + + vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 + vpalignr $8,%xmm10,%xmm10,%xmm10 + vpxor %xmm11,%xmm10,%xmm10 + vpxor %xmm9,%xmm10,%xmm10 + + cmpq $0,%rcx + jne .Lshort_avx + + vpshufb %xmm13,%xmm10,%xmm10 + vmovdqu %xmm10,(%rdi) + vzeroupper + .byte 0xf3,0xc3 +.cfi_endproc +.size gcm_ghash_avx,.-gcm_ghash_avx +.section .rodata +.align 64 +.Lbswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.L0x1c2_polynomial: +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +.L7_mask: +.long 7,0,7,0 +.L7_mask_poly: +.long 7,0,450,0 +.align 64 +.type .Lrem_4bit,@object +.Lrem_4bit: +.long 0,0,0,471859200,0,943718400,0,610271232 +.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 +.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 +.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 +.type .Lrem_8bit,@object +.Lrem_8bit: +.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E +.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E +.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E +.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E +.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E +.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E +.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E +.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E +.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE +.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE +.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE +.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE +.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E +.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E +.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE +.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE +.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E +.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E +.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E +.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E +.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E +.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E +.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E +.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E +.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE +.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE +.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE +.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE +.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E +.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E +.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE +.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE + +.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +.previous diff --git a/contrib/openssl-cmake/asm/crypto/modes/ghashp8-ppc.s b/contrib/openssl-cmake/asm/crypto/modes/ghashp8-ppc.s new file mode 100644 index 000000000000..60aa7b045723 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/modes/ghashp8-ppc.s @@ -0,0 +1,575 @@ +.machine "any" + +.abiversion 2 +.text + +.globl gcm_init_p8 +.type gcm_init_p8,@function +.align 5 +gcm_init_p8: +.localentry gcm_init_p8,0 + + li 0,-4096 + li 8,0x10 + li 12,-1 + li 9,0x20 + or 0,0,0 + li 10,0x30 + .long 0x7D202699 + + vspltisb 8,-16 + vspltisb 5,1 + vaddubm 8,8,8 + vxor 4,4,4 + vor 8,8,5 + vsldoi 8,8,4,15 + vsldoi 6,4,5,1 + vaddubm 8,8,8 + vspltisb 7,7 + vor 8,8,6 + vspltb 6,9,0 + vsl 9,9,5 + vsrab 6,6,7 + vand 6,6,8 + vxor 3,9,6 + + vsldoi 9,3,3,8 + vsldoi 8,4,8,8 + vsldoi 11,4,9,8 + vsldoi 10,9,4,8 + + .long 0x7D001F99 + .long 0x7D681F99 + li 8,0x40 + .long 0x7D291F99 + li 9,0x50 + .long 0x7D4A1F99 + li 10,0x60 + + .long 0x10035CC8 + .long 0x10234CC8 + .long 0x104354C8 + + .long 0x10E044C8 + + vsldoi 5,1,4,8 + vsldoi 6,4,1,8 + vxor 0,0,5 + vxor 2,2,6 + + vsldoi 0,0,0,8 + vxor 0,0,7 + + vsldoi 6,0,0,8 + .long 0x100044C8 + vxor 6,6,2 + vxor 16,0,6 + + vsldoi 17,16,16,8 + vsldoi 19,4,17,8 + vsldoi 18,17,4,8 + + .long 0x7E681F99 + li 8,0x70 + .long 0x7E291F99 + li 9,0x80 + .long 0x7E4A1F99 + li 10,0x90 + .long 0x10039CC8 + .long 0x11B09CC8 + .long 0x10238CC8 + .long 0x11D08CC8 + .long 0x104394C8 + .long 0x11F094C8 + + .long 0x10E044C8 + .long 0x114D44C8 + + vsldoi 5,1,4,8 + vsldoi 6,4,1,8 + vsldoi 11,14,4,8 + vsldoi 9,4,14,8 + vxor 0,0,5 + vxor 2,2,6 + vxor 13,13,11 + vxor 15,15,9 + + vsldoi 0,0,0,8 + vsldoi 13,13,13,8 + vxor 0,0,7 + vxor 13,13,10 + + vsldoi 6,0,0,8 + vsldoi 9,13,13,8 + .long 0x100044C8 + .long 0x11AD44C8 + vxor 6,6,2 + vxor 9,9,15 + vxor 0,0,6 + vxor 13,13,9 + + vsldoi 9,0,0,8 + vsldoi 17,13,13,8 + vsldoi 11,4,9,8 + vsldoi 10,9,4,8 + vsldoi 19,4,17,8 + vsldoi 18,17,4,8 + + .long 0x7D681F99 + li 8,0xa0 + .long 0x7D291F99 + li 9,0xb0 + .long 0x7D4A1F99 + li 10,0xc0 + .long 0x7E681F99 + .long 0x7E291F99 + .long 0x7E4A1F99 + + or 12,12,12 + blr +.long 0 +.byte 0,12,0x14,0,0,0,2,0 +.long 0 +.size gcm_init_p8,.-gcm_init_p8 +.globl gcm_gmult_p8 +.type gcm_gmult_p8,@function +.align 5 +gcm_gmult_p8: +.localentry gcm_gmult_p8,0 + + lis 0,0xfff8 + li 8,0x10 + li 12,-1 + li 9,0x20 + or 0,0,0 + li 10,0x30 + .long 0x7C601E99 + + .long 0x7D682699 + + .long 0x7D292699 + + .long 0x7D4A2699 + + .long 0x7D002699 + + vxor 4,4,4 + + .long 0x10035CC8 + .long 0x10234CC8 + .long 0x104354C8 + + .long 0x10E044C8 + + vsldoi 5,1,4,8 + vsldoi 6,4,1,8 + vxor 0,0,5 + vxor 2,2,6 + + vsldoi 0,0,0,8 + vxor 0,0,7 + + vsldoi 6,0,0,8 + .long 0x100044C8 + vxor 6,6,2 + vxor 0,0,6 + + + .long 0x7C001F99 + + or 12,12,12 + blr +.long 0 +.byte 0,12,0x14,0,0,0,2,0 +.long 0 +.size gcm_gmult_p8,.-gcm_gmult_p8 + +.globl gcm_ghash_p8 +.type gcm_ghash_p8,@function +.align 5 +gcm_ghash_p8: +.localentry gcm_ghash_p8,0 + + li 0,-4096 + li 8,0x10 + li 12,-1 + li 9,0x20 + or 0,0,0 + li 10,0x30 + .long 0x7C001E99 + + .long 0x7D682699 + li 8,0x40 + + .long 0x7D292699 + li 9,0x50 + + .long 0x7D4A2699 + li 10,0x60 + + .long 0x7D002699 + + vxor 4,4,4 + + cmpldi 6,64 + bge .Lgcm_ghash_p8_4x + + .long 0x7C602E99 + addi 5,5,16 + subic. 6,6,16 + + vxor 3,3,0 + beq .Lshort + + .long 0x7E682699 + li 8,16 + .long 0x7E292699 + add 9,5,6 + .long 0x7E4A2699 + b .Loop_2x + +.align 5 +.Loop_2x: + .long 0x7E002E99 + + + subic 6,6,32 + .long 0x10039CC8 + .long 0x11B05CC8 + subfe 0,0,0 + .long 0x10238CC8 + .long 0x11D04CC8 + and 0,0,6 + .long 0x104394C8 + .long 0x11F054C8 + add 5,5,0 + + vxor 0,0,13 + vxor 1,1,14 + + .long 0x10E044C8 + + vsldoi 5,1,4,8 + vsldoi 6,4,1,8 + vxor 2,2,15 + vxor 0,0,5 + vxor 2,2,6 + + vsldoi 0,0,0,8 + vxor 0,0,7 + .long 0x7C682E99 + addi 5,5,32 + + vsldoi 6,0,0,8 + .long 0x100044C8 + + vxor 6,6,2 + vxor 3,3,6 + vxor 3,3,0 + cmpld 9,5 + bgt .Loop_2x + + cmplwi 6,0 + bne .Leven + +.Lshort: + .long 0x10035CC8 + .long 0x10234CC8 + .long 0x104354C8 + + .long 0x10E044C8 + + vsldoi 5,1,4,8 + vsldoi 6,4,1,8 + vxor 0,0,5 + vxor 2,2,6 + + vsldoi 0,0,0,8 + vxor 0,0,7 + + vsldoi 6,0,0,8 + .long 0x100044C8 + vxor 6,6,2 + +.Leven: + vxor 0,0,6 + + .long 0x7C001F99 + + or 12,12,12 + blr +.long 0 +.byte 0,12,0x14,0,0,0,4,0 +.long 0 +.align 5 +.gcm_ghash_p8_4x: +.Lgcm_ghash_p8_4x: + stdu 1,-256(1) + li 10,63 + li 11,79 + stvx 20,10,1 + addi 10,10,32 + stvx 21,11,1 + addi 11,11,32 + stvx 22,10,1 + addi 10,10,32 + stvx 23,11,1 + addi 11,11,32 + stvx 24,10,1 + addi 10,10,32 + stvx 25,11,1 + addi 11,11,32 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + li 10,0x60 + stvx 31,11,1 + li 0,-1 + stw 12,252(1) + or 0,0,0 + + lvsl 5,0,8 + + li 8,0x70 + .long 0x7E292699 + li 9,0x80 + vspltisb 6,8 + + li 10,0x90 + .long 0x7EE82699 + li 8,0xa0 + .long 0x7F092699 + li 9,0xb0 + .long 0x7F2A2699 + li 10,0xc0 + .long 0x7FA82699 + li 8,0x10 + .long 0x7FC92699 + li 9,0x20 + .long 0x7FEA2699 + li 10,0x30 + + vsldoi 7,4,6,8 + vaddubm 18,5,7 + vaddubm 19,6,18 + + srdi 6,6,4 + + .long 0x7C602E99 + .long 0x7E082E99 + subic. 6,6,8 + .long 0x7EC92E99 + .long 0x7F8A2E99 + addi 5,5,0x40 + + + + + + vxor 2,3,0 + + .long 0x11B0BCC8 + .long 0x11D0C4C8 + .long 0x11F0CCC8 + + vperm 11,17,9,18 + vperm 5,22,28,19 + vperm 10,17,9,19 + vperm 6,22,28,18 + .long 0x12B68CC8 + .long 0x12855CC8 + .long 0x137C4CC8 + .long 0x134654C8 + + vxor 21,21,14 + vxor 20,20,13 + vxor 27,27,21 + vxor 26,26,15 + + blt .Ltail_4x + +.Loop_4x: + .long 0x7C602E99 + .long 0x7E082E99 + subic. 6,6,4 + .long 0x7EC92E99 + .long 0x7F8A2E99 + addi 5,5,0x40 + + + + + + .long 0x1002ECC8 + .long 0x1022F4C8 + .long 0x1042FCC8 + .long 0x11B0BCC8 + .long 0x11D0C4C8 + .long 0x11F0CCC8 + + vxor 0,0,20 + vxor 1,1,27 + vxor 2,2,26 + vperm 5,22,28,19 + vperm 6,22,28,18 + + .long 0x10E044C8 + .long 0x12855CC8 + .long 0x134654C8 + + vsldoi 5,1,4,8 + vsldoi 6,4,1,8 + vxor 0,0,5 + vxor 2,2,6 + + vsldoi 0,0,0,8 + vxor 0,0,7 + + vsldoi 6,0,0,8 + .long 0x12B68CC8 + .long 0x137C4CC8 + .long 0x100044C8 + + vxor 20,20,13 + vxor 26,26,15 + vxor 2,2,3 + vxor 21,21,14 + vxor 2,2,6 + vxor 27,27,21 + vxor 2,2,0 + bge .Loop_4x + +.Ltail_4x: + .long 0x1002ECC8 + .long 0x1022F4C8 + .long 0x1042FCC8 + + vxor 0,0,20 + vxor 1,1,27 + + .long 0x10E044C8 + + vsldoi 5,1,4,8 + vsldoi 6,4,1,8 + vxor 2,2,26 + vxor 0,0,5 + vxor 2,2,6 + + vsldoi 0,0,0,8 + vxor 0,0,7 + + vsldoi 6,0,0,8 + .long 0x100044C8 + vxor 6,6,2 + vxor 0,0,6 + + addic. 6,6,4 + beq .Ldone_4x + + .long 0x7C602E99 + cmpldi 6,2 + li 6,-4 + blt .Lone + .long 0x7E082E99 + beq .Ltwo + +.Lthree: + .long 0x7EC92E99 + + + + + vxor 2,3,0 + vor 29,23,23 + vor 30,24,24 + vor 31,25,25 + + vperm 5,16,22,19 + vperm 6,16,22,18 + .long 0x12B08CC8 + .long 0x13764CC8 + .long 0x12855CC8 + .long 0x134654C8 + + vxor 27,27,21 + b .Ltail_4x + +.align 4 +.Ltwo: + + + + vxor 2,3,0 + vperm 5,4,16,19 + vperm 6,4,16,18 + + vsldoi 29,4,17,8 + vor 30,17,17 + vsldoi 31,17,4,8 + + .long 0x12855CC8 + .long 0x13704CC8 + .long 0x134654C8 + + b .Ltail_4x + +.align 4 +.Lone: + + + vsldoi 29,4,9,8 + vor 30,9,9 + vsldoi 31,9,4,8 + + vxor 2,3,0 + vxor 20,20,20 + vxor 27,27,27 + vxor 26,26,26 + + b .Ltail_4x + +.Ldone_4x: + + .long 0x7C001F99 + + li 10,63 + li 11,79 + or 12,12,12 + lvx 20,10,1 + addi 10,10,32 + lvx 21,11,1 + addi 11,11,32 + lvx 22,10,1 + addi 10,10,32 + lvx 23,11,1 + addi 11,11,32 + lvx 24,10,1 + addi 10,10,32 + lvx 25,11,1 + addi 11,11,32 + lvx 26,10,1 + addi 10,10,32 + lvx 27,11,1 + addi 11,11,32 + lvx 28,10,1 + addi 10,10,32 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + addi 1,1,256 + blr +.long 0 +.byte 0,12,0x04,0,0x80,0,4,0 +.long 0 +.size gcm_ghash_p8,.-gcm_ghash_p8 + +.byte 71,72,65,83,72,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 diff --git a/contrib/openssl-cmake/asm/crypto/modes/ghashv8-armx.S b/contrib/openssl-cmake/asm/crypto/modes/ghashv8-armx.S new file mode 100644 index 000000000000..2224d2e7f7d8 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/modes/ghashv8-armx.S @@ -0,0 +1,644 @@ +#include "arm_arch.h" + +#if __ARM_MAX_ARCH__>=7 +.arch armv8-a+crypto +.text +.globl gcm_init_v8 +.type gcm_init_v8,%function +.align 4 +gcm_init_v8: + AARCH64_VALID_CALL_TARGET + ld1 {v17.2d},[x1] //load input H + movi v19.16b,#0xe1 + shl v19.2d,v19.2d,#57 //0xc2.0 + ext v3.16b,v17.16b,v17.16b,#8 + ushr v18.2d,v19.2d,#63 + dup v17.4s,v17.s[1] + ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01 + ushr v18.2d,v3.2d,#63 + sshr v17.4s,v17.4s,#31 //broadcast carry bit + and v18.16b,v18.16b,v16.16b + shl v3.2d,v3.2d,#1 + ext v18.16b,v18.16b,v18.16b,#8 + and v16.16b,v16.16b,v17.16b + orr v3.16b,v3.16b,v18.16b //H<<<=1 + eor v20.16b,v3.16b,v16.16b //twisted H + st1 {v20.2d},[x0],#16 //store Htable[0] + + //calculate H^2 + ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing + pmull v0.1q,v20.1d,v20.1d + eor v16.16b,v16.16b,v20.16b + pmull2 v2.1q,v20.2d,v20.2d + pmull v1.1q,v16.1d,v16.1d + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v22.16b,v0.16b,v18.16b + + ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing + eor v17.16b,v17.16b,v22.16b + ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed + st1 {v21.2d,v22.2d},[x0],#32 //store Htable[1..2] + //calculate H^3 and H^4 + pmull v0.1q,v20.1d, v22.1d + pmull v5.1q,v22.1d,v22.1d + pmull2 v2.1q,v20.2d, v22.2d + pmull2 v7.1q,v22.2d,v22.2d + pmull v1.1q,v16.1d,v17.1d + pmull v6.1q,v17.1d,v17.1d + + ext v16.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + ext v17.16b,v5.16b,v7.16b,#8 + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v16.16b + eor v4.16b,v5.16b,v7.16b + eor v6.16b,v6.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase + eor v6.16b,v6.16b,v4.16b + pmull v4.1q,v5.1d,v19.1d + + ins v2.d[0],v1.d[1] + ins v7.d[0],v6.d[1] + ins v1.d[1],v0.d[0] + ins v6.d[1],v5.d[0] + eor v0.16b,v1.16b,v18.16b + eor v5.16b,v6.16b,v4.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase + ext v4.16b,v5.16b,v5.16b,#8 + pmull v0.1q,v0.1d,v19.1d + pmull v5.1q,v5.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v4.16b,v4.16b,v7.16b + eor v23.16b, v0.16b,v18.16b //H^3 + eor v25.16b,v5.16b,v4.16b //H^4 + + ext v16.16b,v23.16b, v23.16b,#8 //Karatsuba pre-processing + ext v17.16b,v25.16b,v25.16b,#8 + ext v18.16b,v22.16b,v22.16b,#8 + eor v16.16b,v16.16b,v23.16b + eor v17.16b,v17.16b,v25.16b + eor v18.16b,v18.16b,v22.16b + ext v24.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed + st1 {v23.2d,v24.2d,v25.2d},[x0],#48 //store Htable[3..5] + + //calculate H^5 and H^6 + pmull v0.1q,v22.1d, v23.1d + pmull v5.1q,v23.1d,v23.1d + pmull2 v2.1q,v22.2d, v23.2d + pmull2 v7.1q,v23.2d,v23.2d + pmull v1.1q,v16.1d,v18.1d + pmull v6.1q,v16.1d,v16.1d + + ext v16.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + ext v17.16b,v5.16b,v7.16b,#8 + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v16.16b + eor v4.16b,v5.16b,v7.16b + eor v6.16b,v6.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase + eor v6.16b,v6.16b,v4.16b + pmull v4.1q,v5.1d,v19.1d + + ins v2.d[0],v1.d[1] + ins v7.d[0],v6.d[1] + ins v1.d[1],v0.d[0] + ins v6.d[1],v5.d[0] + eor v0.16b,v1.16b,v18.16b + eor v5.16b,v6.16b,v4.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase + ext v4.16b,v5.16b,v5.16b,#8 + pmull v0.1q,v0.1d,v19.1d + pmull v5.1q,v5.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v4.16b,v4.16b,v7.16b + eor v26.16b,v0.16b,v18.16b //H^5 + eor v28.16b,v5.16b,v4.16b //H^6 + + ext v16.16b,v26.16b, v26.16b,#8 //Karatsuba pre-processing + ext v17.16b,v28.16b,v28.16b,#8 + ext v18.16b,v22.16b,v22.16b,#8 + eor v16.16b,v16.16b,v26.16b + eor v17.16b,v17.16b,v28.16b + eor v18.16b,v18.16b,v22.16b + ext v27.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed + st1 {v26.2d,v27.2d,v28.2d},[x0],#48 //store Htable[6..8] + + //calculate H^7 and H^8 + pmull v0.1q,v22.1d,v26.1d + pmull v5.1q,v22.1d,v28.1d + pmull2 v2.1q,v22.2d,v26.2d + pmull2 v7.1q,v22.2d,v28.2d + pmull v1.1q,v16.1d,v18.1d + pmull v6.1q,v17.1d,v18.1d + + ext v16.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + ext v17.16b,v5.16b,v7.16b,#8 + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v16.16b + eor v4.16b,v5.16b,v7.16b + eor v6.16b,v6.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase + eor v6.16b,v6.16b,v4.16b + pmull v4.1q,v5.1d,v19.1d + + ins v2.d[0],v1.d[1] + ins v7.d[0],v6.d[1] + ins v1.d[1],v0.d[0] + ins v6.d[1],v5.d[0] + eor v0.16b,v1.16b,v18.16b + eor v5.16b,v6.16b,v4.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase + ext v4.16b,v5.16b,v5.16b,#8 + pmull v0.1q,v0.1d,v19.1d + pmull v5.1q,v5.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v4.16b,v4.16b,v7.16b + eor v29.16b,v0.16b,v18.16b //H^7 + eor v31.16b,v5.16b,v4.16b //H^8 + + ext v16.16b,v29.16b,v29.16b,#8 //Karatsuba pre-processing + ext v17.16b,v31.16b,v31.16b,#8 + eor v16.16b,v16.16b,v29.16b + eor v17.16b,v17.16b,v31.16b + ext v30.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed + st1 {v29.2d,v30.2d,v31.2d},[x0] //store Htable[9..11] + ret +.size gcm_init_v8,.-gcm_init_v8 +.globl gcm_gmult_v8 +.type gcm_gmult_v8,%function +.align 4 +gcm_gmult_v8: + AARCH64_VALID_CALL_TARGET + ld1 {v17.2d},[x0] //load Xi + movi v19.16b,#0xe1 + ld1 {v20.2d,v21.2d},[x1] //load twisted H, ... + shl v19.2d,v19.2d,#57 +#ifndef __AARCH64EB__ + rev64 v17.16b,v17.16b +#endif + ext v3.16b,v17.16b,v17.16b,#8 + + pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo + eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing + pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi + pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi) + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + +#ifndef __AARCH64EB__ + rev64 v0.16b,v0.16b +#endif + ext v0.16b,v0.16b,v0.16b,#8 + st1 {v0.2d},[x0] //write out Xi + + ret +.size gcm_gmult_v8,.-gcm_gmult_v8 +.globl gcm_ghash_v8 +.type gcm_ghash_v8,%function +.align 4 +gcm_ghash_v8: + AARCH64_VALID_CALL_TARGET + cmp x3,#64 + b.hs .Lgcm_ghash_v8_4x + ld1 {v0.2d},[x0] //load [rotated] Xi + //"[rotated]" means that + //loaded value would have + //to be rotated in order to + //make it appear as in + //algorithm specification + subs x3,x3,#32 //see if x3 is 32 or larger + mov x12,#16 //x12 is used as post- + //increment for input pointer; + //as loop is modulo-scheduled + //x12 is zeroed just in time + //to preclude overstepping + //inp[len], which means that + //last block[s] are actually + //loaded twice, but last + //copy is not processed + ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2 + movi v19.16b,#0xe1 + ld1 {v22.2d},[x1] + csel x12,xzr,x12,eq //is it time to zero x12? + ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi + ld1 {v16.2d},[x2],#16 //load [rotated] I[0] + shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant +#ifndef __AARCH64EB__ + rev64 v16.16b,v16.16b + rev64 v0.16b,v0.16b +#endif + ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0] + b.lo .Lodd_tail_v8 //x3 was less than 32 + ld1 {v17.2d},[x2],x12 //load [rotated] I[1] +#ifndef __AARCH64EB__ + rev64 v17.16b,v17.16b +#endif + ext v7.16b,v17.16b,v17.16b,#8 + eor v3.16b,v3.16b,v0.16b //I[i]^=Xi + pmull v4.1q,v20.1d,v7.1d //H·Ii+1 + eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing + pmull2 v6.1q,v20.2d,v7.2d + b .Loop_mod2x_v8 + +.align 4 +.Loop_mod2x_v8: + ext v18.16b,v3.16b,v3.16b,#8 + subs x3,x3,#32 //is there more data? + pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo + csel x12,xzr,x12,lo //is it time to zero x12? + + pmull v5.1q,v21.1d,v17.1d + eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing + pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi + eor v0.16b,v0.16b,v4.16b //accumulate + pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi) + ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2] + + eor v2.16b,v2.16b,v6.16b + csel x12,xzr,x12,eq //is it time to zero x12? + eor v1.16b,v1.16b,v5.16b + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3] +#ifndef __AARCH64EB__ + rev64 v16.16b,v16.16b +#endif + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + +#ifndef __AARCH64EB__ + rev64 v17.16b,v17.16b +#endif + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + ext v7.16b,v17.16b,v17.16b,#8 + ext v3.16b,v16.16b,v16.16b,#8 + eor v0.16b,v1.16b,v18.16b + pmull v4.1q,v20.1d,v7.1d //H·Ii+1 + eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v3.16b,v3.16b,v18.16b + eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing + eor v3.16b,v3.16b,v0.16b + pmull2 v6.1q,v20.2d,v7.2d + b.hs .Loop_mod2x_v8 //there was at least 32 more bytes + + eor v2.16b,v2.16b,v18.16b + ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b + adds x3,x3,#32 //re-construct x3 + eor v0.16b,v0.16b,v2.16b //re-construct v0.16b + b.eq .Ldone_v8 //is x3 zero? +.Lodd_tail_v8: + ext v18.16b,v0.16b,v0.16b,#8 + eor v3.16b,v3.16b,v0.16b //inp^=Xi + eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi + + pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo + eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing + pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi + pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi) + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + +.Ldone_v8: +#ifndef __AARCH64EB__ + rev64 v0.16b,v0.16b +#endif + ext v0.16b,v0.16b,v0.16b,#8 + st1 {v0.2d},[x0] //write out Xi + + ret +.size gcm_ghash_v8,.-gcm_ghash_v8 +.type gcm_ghash_v8_4x,%function +.align 4 +gcm_ghash_v8_4x: +.Lgcm_ghash_v8_4x: + ld1 {v0.2d},[x0] //load [rotated] Xi + ld1 {v20.2d,v21.2d,v22.2d},[x1],#48 //load twisted H, ..., H^2 + movi v19.16b,#0xe1 + ld1 {v26.2d,v27.2d,v28.2d},[x1] //load twisted H^3, ..., H^4 + shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant + + ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64 +#ifndef __AARCH64EB__ + rev64 v0.16b,v0.16b + rev64 v5.16b,v5.16b + rev64 v6.16b,v6.16b + rev64 v7.16b,v7.16b + rev64 v4.16b,v4.16b +#endif + ext v25.16b,v7.16b,v7.16b,#8 + ext v24.16b,v6.16b,v6.16b,#8 + ext v23.16b,v5.16b,v5.16b,#8 + + pmull v29.1q,v20.1d,v25.1d //H·Ii+3 + eor v7.16b,v7.16b,v25.16b + pmull2 v31.1q,v20.2d,v25.2d + pmull v30.1q,v21.1d,v7.1d + + pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2 + eor v6.16b,v6.16b,v24.16b + pmull2 v24.1q,v22.2d,v24.2d + pmull2 v6.1q,v21.2d,v6.2d + + eor v29.16b,v29.16b,v16.16b + eor v31.16b,v31.16b,v24.16b + eor v30.16b,v30.16b,v6.16b + + pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1 + eor v5.16b,v5.16b,v23.16b + pmull2 v23.1q,v26.2d,v23.2d + pmull v5.1q,v27.1d,v5.1d + + eor v29.16b,v29.16b,v7.16b + eor v31.16b,v31.16b,v23.16b + eor v30.16b,v30.16b,v5.16b + + subs x3,x3,#128 + b.lo .Ltail4x + + b .Loop4x + +.align 4 +.Loop4x: + eor v16.16b,v4.16b,v0.16b + ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64 + ext v3.16b,v16.16b,v16.16b,#8 +#ifndef __AARCH64EB__ + rev64 v5.16b,v5.16b + rev64 v6.16b,v6.16b + rev64 v7.16b,v7.16b + rev64 v4.16b,v4.16b +#endif + + pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii) + eor v16.16b,v16.16b,v3.16b + pmull2 v2.1q,v28.2d,v3.2d + ext v25.16b,v7.16b,v7.16b,#8 + pmull2 v1.1q,v27.2d,v16.2d + + eor v0.16b,v0.16b,v29.16b + eor v2.16b,v2.16b,v31.16b + ext v24.16b,v6.16b,v6.16b,#8 + eor v1.16b,v1.16b,v30.16b + ext v23.16b,v5.16b,v5.16b,#8 + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + pmull v29.1q,v20.1d,v25.1d //H·Ii+3 + eor v7.16b,v7.16b,v25.16b + eor v1.16b,v1.16b,v17.16b + pmull2 v31.1q,v20.2d,v25.2d + eor v1.16b,v1.16b,v18.16b + pmull v30.1q,v21.1d,v7.1d + + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2 + eor v6.16b,v6.16b,v24.16b + pmull2 v24.1q,v22.2d,v24.2d + eor v0.16b,v1.16b,v18.16b + pmull2 v6.1q,v21.2d,v6.2d + + eor v29.16b,v29.16b,v16.16b + eor v31.16b,v31.16b,v24.16b + eor v30.16b,v30.16b,v6.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1 + eor v5.16b,v5.16b,v23.16b + eor v18.16b,v18.16b,v2.16b + pmull2 v23.1q,v26.2d,v23.2d + pmull v5.1q,v27.1d,v5.1d + + eor v0.16b,v0.16b,v18.16b + eor v29.16b,v29.16b,v7.16b + eor v31.16b,v31.16b,v23.16b + ext v0.16b,v0.16b,v0.16b,#8 + eor v30.16b,v30.16b,v5.16b + + subs x3,x3,#64 + b.hs .Loop4x + +.Ltail4x: + eor v16.16b,v4.16b,v0.16b + ext v3.16b,v16.16b,v16.16b,#8 + + pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii) + eor v16.16b,v16.16b,v3.16b + pmull2 v2.1q,v28.2d,v3.2d + pmull2 v1.1q,v27.2d,v16.2d + + eor v0.16b,v0.16b,v29.16b + eor v2.16b,v2.16b,v31.16b + eor v1.16b,v1.16b,v30.16b + + adds x3,x3,#64 + b.eq .Ldone4x + + cmp x3,#32 + b.lo .Lone + b.eq .Ltwo +.Lthree: + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + ld1 {v4.2d,v5.2d,v6.2d},[x2] + eor v1.16b,v1.16b,v18.16b +#ifndef __AARCH64EB__ + rev64 v5.16b,v5.16b + rev64 v6.16b,v6.16b + rev64 v4.16b,v4.16b +#endif + + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + ext v24.16b,v6.16b,v6.16b,#8 + ext v23.16b,v5.16b,v5.16b,#8 + eor v0.16b,v1.16b,v18.16b + + pmull v29.1q,v20.1d,v24.1d //H·Ii+2 + eor v6.16b,v6.16b,v24.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + pmull2 v31.1q,v20.2d,v24.2d + pmull v30.1q,v21.1d,v6.1d + eor v0.16b,v0.16b,v18.16b + pmull v7.1q,v22.1d,v23.1d //H^2·Ii+1 + eor v5.16b,v5.16b,v23.16b + ext v0.16b,v0.16b,v0.16b,#8 + + pmull2 v23.1q,v22.2d,v23.2d + eor v16.16b,v4.16b,v0.16b + pmull2 v5.1q,v21.2d,v5.2d + ext v3.16b,v16.16b,v16.16b,#8 + + eor v29.16b,v29.16b,v7.16b + eor v31.16b,v31.16b,v23.16b + eor v30.16b,v30.16b,v5.16b + + pmull v0.1q,v26.1d,v3.1d //H^3·(Xi+Ii) + eor v16.16b,v16.16b,v3.16b + pmull2 v2.1q,v26.2d,v3.2d + pmull v1.1q,v27.1d,v16.1d + + eor v0.16b,v0.16b,v29.16b + eor v2.16b,v2.16b,v31.16b + eor v1.16b,v1.16b,v30.16b + b .Ldone4x + +.align 4 +.Ltwo: + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + ld1 {v4.2d,v5.2d},[x2] + eor v1.16b,v1.16b,v18.16b +#ifndef __AARCH64EB__ + rev64 v5.16b,v5.16b + rev64 v4.16b,v4.16b +#endif + + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + ext v23.16b,v5.16b,v5.16b,#8 + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + ext v0.16b,v0.16b,v0.16b,#8 + + pmull v29.1q,v20.1d,v23.1d //H·Ii+1 + eor v5.16b,v5.16b,v23.16b + + eor v16.16b,v4.16b,v0.16b + ext v3.16b,v16.16b,v16.16b,#8 + + pmull2 v31.1q,v20.2d,v23.2d + pmull v30.1q,v21.1d,v5.1d + + pmull v0.1q,v22.1d,v3.1d //H^2·(Xi+Ii) + eor v16.16b,v16.16b,v3.16b + pmull2 v2.1q,v22.2d,v3.2d + pmull2 v1.1q,v21.2d,v16.2d + + eor v0.16b,v0.16b,v29.16b + eor v2.16b,v2.16b,v31.16b + eor v1.16b,v1.16b,v30.16b + b .Ldone4x + +.align 4 +.Lone: + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + ld1 {v4.2d},[x2] + eor v1.16b,v1.16b,v18.16b +#ifndef __AARCH64EB__ + rev64 v4.16b,v4.16b +#endif + + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + ext v0.16b,v0.16b,v0.16b,#8 + + eor v16.16b,v4.16b,v0.16b + ext v3.16b,v16.16b,v16.16b,#8 + + pmull v0.1q,v20.1d,v3.1d + eor v16.16b,v16.16b,v3.16b + pmull2 v2.1q,v20.2d,v3.2d + pmull v1.1q,v21.1d,v16.1d + +.Ldone4x: + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + ext v0.16b,v0.16b,v0.16b,#8 + +#ifndef __AARCH64EB__ + rev64 v0.16b,v0.16b +#endif + st1 {v0.2d},[x0] //write out Xi + + ret +.size gcm_ghash_v8_4x,.-gcm_ghash_v8_4x +.section .rodata +.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 +#endif diff --git a/contrib/openssl-cmake/asm/crypto/poly1305/poly1305-armv8.S b/contrib/openssl-cmake/asm/crypto/poly1305/poly1305-armv8.S new file mode 100644 index 000000000000..edf9679a11d4 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/poly1305/poly1305-armv8.S @@ -0,0 +1,884 @@ +#include "arm_arch.h" + +.text + +// forward "declarations" are required for Apple + +.hidden OPENSSL_armcap_P +.globl poly1305_init +.hidden poly1305_init +.globl poly1305_blocks +.hidden poly1305_blocks +.globl poly1305_emit +.hidden poly1305_emit + +.type poly1305_init,%function +.align 5 +poly1305_init: + AARCH64_VALID_CALL_TARGET + cmp x1,xzr + stp xzr,xzr,[x0] // zero hash value + stp xzr,xzr,[x0,#16] // [along with is_base2_26] + + csel x0,xzr,x0,eq + b.eq .Lno_key + + adrp x17,OPENSSL_armcap_P + ldr w17,[x17,#:lo12:OPENSSL_armcap_P] + + ldp x7,x8,[x1] // load key + mov x9,#0xfffffffc0fffffff + movk x9,#0x0fff,lsl#48 +#ifdef __AARCH64EB__ + rev x7,x7 // flip bytes + rev x8,x8 +#endif + and x7,x7,x9 // &=0ffffffc0fffffff + and x9,x9,#-4 + and x8,x8,x9 // &=0ffffffc0ffffffc + stp x7,x8,[x0,#32] // save key value + + tst w17,#ARMV7_NEON + + adrp x12,poly1305_blocks + add x12,x12,#:lo12:.Lpoly1305_blocks + adrp x7,poly1305_blocks_neon + add x7,x7,#:lo12:.Lpoly1305_blocks_neon + adrp x13,poly1305_emit + add x13,x13,#:lo12:.Lpoly1305_emit + adrp x8,poly1305_emit_neon + add x8,x8,#:lo12:.Lpoly1305_emit_neon + + csel x12,x12,x7,eq + csel x13,x13,x8,eq + +#ifdef __ILP32__ + stp w12,w13,[x2] +#else + stp x12,x13,[x2] +#endif + + mov x0,#1 +.Lno_key: + ret +.size poly1305_init,.-poly1305_init + +.type poly1305_blocks,%function +.align 5 +poly1305_blocks: +.Lpoly1305_blocks: + // The symbol .Lpoly1305_blocks is not a .globl symbol + // but a pointer to it is returned by poly1305_init + AARCH64_VALID_CALL_TARGET + ands x2,x2,#-16 + b.eq .Lno_data + + ldp x4,x5,[x0] // load hash value + ldp x7,x8,[x0,#32] // load key value + ldr x6,[x0,#16] + add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2) + b .Loop + +.align 5 +.Loop: + ldp x10,x11,[x1],#16 // load input + sub x2,x2,#16 +#ifdef __AARCH64EB__ + rev x10,x10 + rev x11,x11 +#endif + adds x4,x4,x10 // accumulate input + adcs x5,x5,x11 + + mul x12,x4,x7 // h0*r0 + adc x6,x6,x3 + umulh x13,x4,x7 + + mul x10,x5,x9 // h1*5*r1 + umulh x11,x5,x9 + + adds x12,x12,x10 + mul x10,x4,x8 // h0*r1 + adc x13,x13,x11 + umulh x14,x4,x8 + + adds x13,x13,x10 + mul x10,x5,x7 // h1*r0 + adc x14,x14,xzr + umulh x11,x5,x7 + + adds x13,x13,x10 + mul x10,x6,x9 // h2*5*r1 + adc x14,x14,x11 + mul x11,x6,x7 // h2*r0 + + adds x13,x13,x10 + adc x14,x14,x11 + + and x10,x14,#-4 // final reduction + and x6,x14,#3 + add x10,x10,x14,lsr#2 + adds x4,x12,x10 + adcs x5,x13,xzr + adc x6,x6,xzr + + cbnz x2,.Loop + + stp x4,x5,[x0] // store hash value + str x6,[x0,#16] + +.Lno_data: + ret +.size poly1305_blocks,.-poly1305_blocks + +.type poly1305_emit,%function +.align 5 +poly1305_emit: +.Lpoly1305_emit: + // The symbol .poly1305_emit is not a .globl symbol + // but a pointer to it is returned by poly1305_init + AARCH64_VALID_CALL_TARGET + ldp x4,x5,[x0] // load hash base 2^64 + ldr x6,[x0,#16] + ldp x10,x11,[x2] // load nonce + + adds x12,x4,#5 // compare to modulus + adcs x13,x5,xzr + adc x14,x6,xzr + + tst x14,#-4 // see if it's carried/borrowed + + csel x4,x4,x12,eq + csel x5,x5,x13,eq + +#ifdef __AARCH64EB__ + ror x10,x10,#32 // flip nonce words + ror x11,x11,#32 +#endif + adds x4,x4,x10 // accumulate nonce + adc x5,x5,x11 +#ifdef __AARCH64EB__ + rev x4,x4 // flip output bytes + rev x5,x5 +#endif + stp x4,x5,[x1] // write result + + ret +.size poly1305_emit,.-poly1305_emit +.type poly1305_mult,%function +.align 5 +poly1305_mult: + mul x12,x4,x7 // h0*r0 + umulh x13,x4,x7 + + mul x10,x5,x9 // h1*5*r1 + umulh x11,x5,x9 + + adds x12,x12,x10 + mul x10,x4,x8 // h0*r1 + adc x13,x13,x11 + umulh x14,x4,x8 + + adds x13,x13,x10 + mul x10,x5,x7 // h1*r0 + adc x14,x14,xzr + umulh x11,x5,x7 + + adds x13,x13,x10 + mul x10,x6,x9 // h2*5*r1 + adc x14,x14,x11 + mul x11,x6,x7 // h2*r0 + + adds x13,x13,x10 + adc x14,x14,x11 + + and x10,x14,#-4 // final reduction + and x6,x14,#3 + add x10,x10,x14,lsr#2 + adds x4,x12,x10 + adcs x5,x13,xzr + adc x6,x6,xzr + + ret +.size poly1305_mult,.-poly1305_mult + +.type poly1305_splat,%function +.align 5 +poly1305_splat: + and x12,x4,#0x03ffffff // base 2^64 -> base 2^26 + ubfx x13,x4,#26,#26 + extr x14,x5,x4,#52 + and x14,x14,#0x03ffffff + ubfx x15,x5,#14,#26 + extr x16,x6,x5,#40 + + str w12,[x0,#16*0] // r0 + add w12,w13,w13,lsl#2 // r1*5 + str w13,[x0,#16*1] // r1 + add w13,w14,w14,lsl#2 // r2*5 + str w12,[x0,#16*2] // s1 + str w14,[x0,#16*3] // r2 + add w14,w15,w15,lsl#2 // r3*5 + str w13,[x0,#16*4] // s2 + str w15,[x0,#16*5] // r3 + add w15,w16,w16,lsl#2 // r4*5 + str w14,[x0,#16*6] // s3 + str w16,[x0,#16*7] // r4 + str w15,[x0,#16*8] // s4 + + ret +.size poly1305_splat,.-poly1305_splat + +.type poly1305_blocks_neon,%function +.align 5 +poly1305_blocks_neon: +.Lpoly1305_blocks_neon: + // The symbol .Lpoly1305_blocks_neon is not a .globl symbol + // but a pointer to it is returned by poly1305_init + AARCH64_VALID_CALL_TARGET + ldr x17,[x0,#24] + cmp x2,#128 + b.hs .Lblocks_neon + cbz x17,.Lpoly1305_blocks + +.Lblocks_neon: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-80]! + add x29,sp,#0 + + ands x2,x2,#-16 + b.eq .Lno_data_neon + + cbz x17,.Lbase2_64_neon + + ldp w10,w11,[x0] // load hash value base 2^26 + ldp w12,w13,[x0,#8] + ldr w14,[x0,#16] + + tst x2,#31 + b.eq .Leven_neon + + ldp x7,x8,[x0,#32] // load key value + + add x4,x10,x11,lsl#26 // base 2^26 -> base 2^64 + lsr x5,x12,#12 + adds x4,x4,x12,lsl#52 + add x5,x5,x13,lsl#14 + adc x5,x5,xzr + lsr x6,x14,#24 + adds x5,x5,x14,lsl#40 + adc x14,x6,xzr // can be partially reduced... + + ldp x12,x13,[x1],#16 // load input + sub x2,x2,#16 + add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2) + + and x10,x14,#-4 // ... so reduce + and x6,x14,#3 + add x10,x10,x14,lsr#2 + adds x4,x4,x10 + adcs x5,x5,xzr + adc x6,x6,xzr + +#ifdef __AARCH64EB__ + rev x12,x12 + rev x13,x13 +#endif + adds x4,x4,x12 // accumulate input + adcs x5,x5,x13 + adc x6,x6,x3 + + bl poly1305_mult + ldr x30,[sp,#8] + + cbz x3,.Lstore_base2_64_neon + + and x10,x4,#0x03ffffff // base 2^64 -> base 2^26 + ubfx x11,x4,#26,#26 + extr x12,x5,x4,#52 + and x12,x12,#0x03ffffff + ubfx x13,x5,#14,#26 + extr x14,x6,x5,#40 + + cbnz x2,.Leven_neon + + stp w10,w11,[x0] // store hash value base 2^26 + stp w12,w13,[x0,#8] + str w14,[x0,#16] + b .Lno_data_neon + +.align 4 +.Lstore_base2_64_neon: + stp x4,x5,[x0] // store hash value base 2^64 + stp x6,xzr,[x0,#16] // note that is_base2_26 is zeroed + b .Lno_data_neon + +.align 4 +.Lbase2_64_neon: + ldp x7,x8,[x0,#32] // load key value + + ldp x4,x5,[x0] // load hash value base 2^64 + ldr x6,[x0,#16] + + tst x2,#31 + b.eq .Linit_neon + + ldp x12,x13,[x1],#16 // load input + sub x2,x2,#16 + add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2) +#ifdef __AARCH64EB__ + rev x12,x12 + rev x13,x13 +#endif + adds x4,x4,x12 // accumulate input + adcs x5,x5,x13 + adc x6,x6,x3 + + bl poly1305_mult + +.Linit_neon: + and x10,x4,#0x03ffffff // base 2^64 -> base 2^26 + ubfx x11,x4,#26,#26 + extr x12,x5,x4,#52 + and x12,x12,#0x03ffffff + ubfx x13,x5,#14,#26 + extr x14,x6,x5,#40 + + stp d8,d9,[sp,#16] // meet ABI requirements + stp d10,d11,[sp,#32] + stp d12,d13,[sp,#48] + stp d14,d15,[sp,#64] + + fmov d24,x10 + fmov d25,x11 + fmov d26,x12 + fmov d27,x13 + fmov d28,x14 + + ////////////////////////////////// initialize r^n table + mov x4,x7 // r^1 + add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2) + mov x5,x8 + mov x6,xzr + add x0,x0,#48+12 + bl poly1305_splat + + bl poly1305_mult // r^2 + sub x0,x0,#4 + bl poly1305_splat + + bl poly1305_mult // r^3 + sub x0,x0,#4 + bl poly1305_splat + + bl poly1305_mult // r^4 + sub x0,x0,#4 + bl poly1305_splat + ldr x30,[sp,#8] + + add x16,x1,#32 + adrp x17,.Lzeros + add x17,x17,#:lo12:.Lzeros + subs x2,x2,#64 + csel x16,x17,x16,lo + + mov x4,#1 + stur x4,[x0,#-24] // set is_base2_26 + sub x0,x0,#48 // restore original x0 + b .Ldo_neon + +.align 4 +.Leven_neon: + add x16,x1,#32 + adrp x17,.Lzeros + add x17,x17,#:lo12:.Lzeros + subs x2,x2,#64 + csel x16,x17,x16,lo + + stp d8,d9,[sp,#16] // meet ABI requirements + stp d10,d11,[sp,#32] + stp d12,d13,[sp,#48] + stp d14,d15,[sp,#64] + + fmov d24,x10 + fmov d25,x11 + fmov d26,x12 + fmov d27,x13 + fmov d28,x14 + +.Ldo_neon: + ldp x8,x12,[x16],#16 // inp[2:3] (or zero) + ldp x9,x13,[x16],#48 + + lsl x3,x3,#24 + add x15,x0,#48 + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x12,x12 + rev x9,x9 + rev x13,x13 +#endif + and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 + and x5,x9,#0x03ffffff + ubfx x6,x8,#26,#26 + ubfx x7,x9,#26,#26 + add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 + extr x8,x12,x8,#52 + extr x9,x13,x9,#52 + add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 + fmov d14,x4 + and x8,x8,#0x03ffffff + and x9,x9,#0x03ffffff + ubfx x10,x12,#14,#26 + ubfx x11,x13,#14,#26 + add x12,x3,x12,lsr#40 + add x13,x3,x13,lsr#40 + add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 + fmov d15,x6 + add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 + add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 + fmov d16,x8 + fmov d17,x10 + fmov d18,x12 + + ldp x8,x12,[x1],#16 // inp[0:1] + ldp x9,x13,[x1],#48 + + ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x15],#64 + ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x15],#64 + ld1 {v8.4s},[x15] + +#ifdef __AARCH64EB__ + rev x8,x8 + rev x12,x12 + rev x9,x9 + rev x13,x13 +#endif + and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 + and x5,x9,#0x03ffffff + ubfx x6,x8,#26,#26 + ubfx x7,x9,#26,#26 + add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 + extr x8,x12,x8,#52 + extr x9,x13,x9,#52 + add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 + fmov d9,x4 + and x8,x8,#0x03ffffff + and x9,x9,#0x03ffffff + ubfx x10,x12,#14,#26 + ubfx x11,x13,#14,#26 + add x12,x3,x12,lsr#40 + add x13,x3,x13,lsr#40 + add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 + fmov d10,x6 + add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 + add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 + movi v31.2d,#-1 + fmov d11,x8 + fmov d12,x10 + fmov d13,x12 + ushr v31.2d,v31.2d,#38 + + b.ls .Lskip_loop + +.align 4 +.Loop_neon: + //////////////////////////////////////////////////////////////// + // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2 + // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r + // ___________________/ + // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2 + // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r + // ___________________/ ____________________/ + // + // Note that we start with inp[2:3]*r^2. This is because it + // doesn't depend on reduction in previous iteration. + //////////////////////////////////////////////////////////////// + // d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0 + // d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4 + // d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3 + // d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2 + // d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1 + + subs x2,x2,#64 + umull v23.2d,v14.2s,v7.s[2] + csel x16,x17,x16,lo + umull v22.2d,v14.2s,v5.s[2] + umull v21.2d,v14.2s,v3.s[2] + ldp x8,x12,[x16],#16 // inp[2:3] (or zero) + umull v20.2d,v14.2s,v1.s[2] + ldp x9,x13,[x16],#48 + umull v19.2d,v14.2s,v0.s[2] +#ifdef __AARCH64EB__ + rev x8,x8 + rev x12,x12 + rev x9,x9 + rev x13,x13 +#endif + + umlal v23.2d,v15.2s,v5.s[2] + and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 + umlal v22.2d,v15.2s,v3.s[2] + and x5,x9,#0x03ffffff + umlal v21.2d,v15.2s,v1.s[2] + ubfx x6,x8,#26,#26 + umlal v20.2d,v15.2s,v0.s[2] + ubfx x7,x9,#26,#26 + umlal v19.2d,v15.2s,v8.s[2] + add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 + + umlal v23.2d,v16.2s,v3.s[2] + extr x8,x12,x8,#52 + umlal v22.2d,v16.2s,v1.s[2] + extr x9,x13,x9,#52 + umlal v21.2d,v16.2s,v0.s[2] + add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 + umlal v20.2d,v16.2s,v8.s[2] + fmov d14,x4 + umlal v19.2d,v16.2s,v6.s[2] + and x8,x8,#0x03ffffff + + umlal v23.2d,v17.2s,v1.s[2] + and x9,x9,#0x03ffffff + umlal v22.2d,v17.2s,v0.s[2] + ubfx x10,x12,#14,#26 + umlal v21.2d,v17.2s,v8.s[2] + ubfx x11,x13,#14,#26 + umlal v20.2d,v17.2s,v6.s[2] + add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 + umlal v19.2d,v17.2s,v4.s[2] + fmov d15,x6 + + add v11.2s,v11.2s,v26.2s + add x12,x3,x12,lsr#40 + umlal v23.2d,v18.2s,v0.s[2] + add x13,x3,x13,lsr#40 + umlal v22.2d,v18.2s,v8.s[2] + add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 + umlal v21.2d,v18.2s,v6.s[2] + add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 + umlal v20.2d,v18.2s,v4.s[2] + fmov d16,x8 + umlal v19.2d,v18.2s,v2.s[2] + fmov d17,x10 + + //////////////////////////////////////////////////////////////// + // (hash+inp[0:1])*r^4 and accumulate + + add v9.2s,v9.2s,v24.2s + fmov d18,x12 + umlal v22.2d,v11.2s,v1.s[0] + ldp x8,x12,[x1],#16 // inp[0:1] + umlal v19.2d,v11.2s,v6.s[0] + ldp x9,x13,[x1],#48 + umlal v23.2d,v11.2s,v3.s[0] + umlal v20.2d,v11.2s,v8.s[0] + umlal v21.2d,v11.2s,v0.s[0] +#ifdef __AARCH64EB__ + rev x8,x8 + rev x12,x12 + rev x9,x9 + rev x13,x13 +#endif + + add v10.2s,v10.2s,v25.2s + umlal v22.2d,v9.2s,v5.s[0] + umlal v23.2d,v9.2s,v7.s[0] + and x4,x8,#0x03ffffff // base 2^64 -> base 2^26 + umlal v21.2d,v9.2s,v3.s[0] + and x5,x9,#0x03ffffff + umlal v19.2d,v9.2s,v0.s[0] + ubfx x6,x8,#26,#26 + umlal v20.2d,v9.2s,v1.s[0] + ubfx x7,x9,#26,#26 + + add v12.2s,v12.2s,v27.2s + add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32 + umlal v22.2d,v10.2s,v3.s[0] + extr x8,x12,x8,#52 + umlal v23.2d,v10.2s,v5.s[0] + extr x9,x13,x9,#52 + umlal v19.2d,v10.2s,v8.s[0] + add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32 + umlal v21.2d,v10.2s,v1.s[0] + fmov d9,x4 + umlal v20.2d,v10.2s,v0.s[0] + and x8,x8,#0x03ffffff + + add v13.2s,v13.2s,v28.2s + and x9,x9,#0x03ffffff + umlal v22.2d,v12.2s,v0.s[0] + ubfx x10,x12,#14,#26 + umlal v19.2d,v12.2s,v4.s[0] + ubfx x11,x13,#14,#26 + umlal v23.2d,v12.2s,v1.s[0] + add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32 + umlal v20.2d,v12.2s,v6.s[0] + fmov d10,x6 + umlal v21.2d,v12.2s,v8.s[0] + add x12,x3,x12,lsr#40 + + umlal v22.2d,v13.2s,v8.s[0] + add x13,x3,x13,lsr#40 + umlal v19.2d,v13.2s,v2.s[0] + add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32 + umlal v23.2d,v13.2s,v0.s[0] + add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32 + umlal v20.2d,v13.2s,v4.s[0] + fmov d11,x8 + umlal v21.2d,v13.2s,v6.s[0] + fmov d12,x10 + fmov d13,x12 + + ///////////////////////////////////////////////////////////////// + // lazy reduction as discussed in "NEON crypto" by D.J. Bernstein + // and P. Schwabe + // + // [see discussion in poly1305-armv4 module] + + ushr v29.2d,v22.2d,#26 + xtn v27.2s,v22.2d + ushr v30.2d,v19.2d,#26 + and v19.16b,v19.16b,v31.16b + add v23.2d,v23.2d,v29.2d // h3 -> h4 + bic v27.2s,#0xfc,lsl#24 // &=0x03ffffff + add v20.2d,v20.2d,v30.2d // h0 -> h1 + + ushr v29.2d,v23.2d,#26 + xtn v28.2s,v23.2d + ushr v30.2d,v20.2d,#26 + xtn v25.2s,v20.2d + bic v28.2s,#0xfc,lsl#24 + add v21.2d,v21.2d,v30.2d // h1 -> h2 + + add v19.2d,v19.2d,v29.2d + shl v29.2d,v29.2d,#2 + shrn v30.2s,v21.2d,#26 + xtn v26.2s,v21.2d + add v19.2d,v19.2d,v29.2d // h4 -> h0 + bic v25.2s,#0xfc,lsl#24 + add v27.2s,v27.2s,v30.2s // h2 -> h3 + bic v26.2s,#0xfc,lsl#24 + + shrn v29.2s,v19.2d,#26 + xtn v24.2s,v19.2d + ushr v30.2s,v27.2s,#26 + bic v27.2s,#0xfc,lsl#24 + bic v24.2s,#0xfc,lsl#24 + add v25.2s,v25.2s,v29.2s // h0 -> h1 + add v28.2s,v28.2s,v30.2s // h3 -> h4 + + b.hi .Loop_neon + +.Lskip_loop: + dup v16.2d,v16.d[0] + add v11.2s,v11.2s,v26.2s + + //////////////////////////////////////////////////////////////// + // multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1 + + adds x2,x2,#32 + b.ne .Long_tail + + dup v16.2d,v11.d[0] + add v14.2s,v9.2s,v24.2s + add v17.2s,v12.2s,v27.2s + add v15.2s,v10.2s,v25.2s + add v18.2s,v13.2s,v28.2s + +.Long_tail: + dup v14.2d,v14.d[0] + umull2 v19.2d,v16.4s,v6.4s + umull2 v22.2d,v16.4s,v1.4s + umull2 v23.2d,v16.4s,v3.4s + umull2 v21.2d,v16.4s,v0.4s + umull2 v20.2d,v16.4s,v8.4s + + dup v15.2d,v15.d[0] + umlal2 v19.2d,v14.4s,v0.4s + umlal2 v21.2d,v14.4s,v3.4s + umlal2 v22.2d,v14.4s,v5.4s + umlal2 v23.2d,v14.4s,v7.4s + umlal2 v20.2d,v14.4s,v1.4s + + dup v17.2d,v17.d[0] + umlal2 v19.2d,v15.4s,v8.4s + umlal2 v22.2d,v15.4s,v3.4s + umlal2 v21.2d,v15.4s,v1.4s + umlal2 v23.2d,v15.4s,v5.4s + umlal2 v20.2d,v15.4s,v0.4s + + dup v18.2d,v18.d[0] + umlal2 v22.2d,v17.4s,v0.4s + umlal2 v23.2d,v17.4s,v1.4s + umlal2 v19.2d,v17.4s,v4.4s + umlal2 v20.2d,v17.4s,v6.4s + umlal2 v21.2d,v17.4s,v8.4s + + umlal2 v22.2d,v18.4s,v8.4s + umlal2 v19.2d,v18.4s,v2.4s + umlal2 v23.2d,v18.4s,v0.4s + umlal2 v20.2d,v18.4s,v4.4s + umlal2 v21.2d,v18.4s,v6.4s + + b.eq .Lshort_tail + + //////////////////////////////////////////////////////////////// + // (hash+inp[0:1])*r^4:r^3 and accumulate + + add v9.2s,v9.2s,v24.2s + umlal v22.2d,v11.2s,v1.2s + umlal v19.2d,v11.2s,v6.2s + umlal v23.2d,v11.2s,v3.2s + umlal v20.2d,v11.2s,v8.2s + umlal v21.2d,v11.2s,v0.2s + + add v10.2s,v10.2s,v25.2s + umlal v22.2d,v9.2s,v5.2s + umlal v19.2d,v9.2s,v0.2s + umlal v23.2d,v9.2s,v7.2s + umlal v20.2d,v9.2s,v1.2s + umlal v21.2d,v9.2s,v3.2s + + add v12.2s,v12.2s,v27.2s + umlal v22.2d,v10.2s,v3.2s + umlal v19.2d,v10.2s,v8.2s + umlal v23.2d,v10.2s,v5.2s + umlal v20.2d,v10.2s,v0.2s + umlal v21.2d,v10.2s,v1.2s + + add v13.2s,v13.2s,v28.2s + umlal v22.2d,v12.2s,v0.2s + umlal v19.2d,v12.2s,v4.2s + umlal v23.2d,v12.2s,v1.2s + umlal v20.2d,v12.2s,v6.2s + umlal v21.2d,v12.2s,v8.2s + + umlal v22.2d,v13.2s,v8.2s + umlal v19.2d,v13.2s,v2.2s + umlal v23.2d,v13.2s,v0.2s + umlal v20.2d,v13.2s,v4.2s + umlal v21.2d,v13.2s,v6.2s + +.Lshort_tail: + //////////////////////////////////////////////////////////////// + // horizontal add + + addp v22.2d,v22.2d,v22.2d + ldp d8,d9,[sp,#16] // meet ABI requirements + addp v19.2d,v19.2d,v19.2d + ldp d10,d11,[sp,#32] + addp v23.2d,v23.2d,v23.2d + ldp d12,d13,[sp,#48] + addp v20.2d,v20.2d,v20.2d + ldp d14,d15,[sp,#64] + addp v21.2d,v21.2d,v21.2d + + //////////////////////////////////////////////////////////////// + // lazy reduction, but without narrowing + + ushr v29.2d,v22.2d,#26 + and v22.16b,v22.16b,v31.16b + ushr v30.2d,v19.2d,#26 + and v19.16b,v19.16b,v31.16b + + add v23.2d,v23.2d,v29.2d // h3 -> h4 + add v20.2d,v20.2d,v30.2d // h0 -> h1 + + ushr v29.2d,v23.2d,#26 + and v23.16b,v23.16b,v31.16b + ushr v30.2d,v20.2d,#26 + and v20.16b,v20.16b,v31.16b + add v21.2d,v21.2d,v30.2d // h1 -> h2 + + add v19.2d,v19.2d,v29.2d + shl v29.2d,v29.2d,#2 + ushr v30.2d,v21.2d,#26 + and v21.16b,v21.16b,v31.16b + add v19.2d,v19.2d,v29.2d // h4 -> h0 + add v22.2d,v22.2d,v30.2d // h2 -> h3 + + ushr v29.2d,v19.2d,#26 + and v19.16b,v19.16b,v31.16b + ushr v30.2d,v22.2d,#26 + and v22.16b,v22.16b,v31.16b + add v20.2d,v20.2d,v29.2d // h0 -> h1 + add v23.2d,v23.2d,v30.2d // h3 -> h4 + + //////////////////////////////////////////////////////////////// + // write the result, can be partially reduced + + st4 {v19.s,v20.s,v21.s,v22.s}[0],[x0],#16 + st1 {v23.s}[0],[x0] + +.Lno_data_neon: + ldr x29,[sp],#80 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size poly1305_blocks_neon,.-poly1305_blocks_neon + +.type poly1305_emit_neon,%function +.align 5 +poly1305_emit_neon: +.Lpoly1305_emit_neon: + // The symbol .Lpoly1305_emit_neon is not a .globl symbol + // but a pointer to it is returned by poly1305_init + AARCH64_VALID_CALL_TARGET + ldr x17,[x0,#24] + cbz x17,poly1305_emit + + ldp w10,w11,[x0] // load hash value base 2^26 + ldp w12,w13,[x0,#8] + ldr w14,[x0,#16] + + add x4,x10,x11,lsl#26 // base 2^26 -> base 2^64 + lsr x5,x12,#12 + adds x4,x4,x12,lsl#52 + add x5,x5,x13,lsl#14 + adc x5,x5,xzr + lsr x6,x14,#24 + adds x5,x5,x14,lsl#40 + adc x6,x6,xzr // can be partially reduced... + + ldp x10,x11,[x2] // load nonce + + and x12,x6,#-4 // ... so reduce + add x12,x12,x6,lsr#2 + and x6,x6,#3 + adds x4,x4,x12 + adcs x5,x5,xzr + adc x6,x6,xzr + + adds x12,x4,#5 // compare to modulus + adcs x13,x5,xzr + adc x14,x6,xzr + + tst x14,#-4 // see if it's carried/borrowed + + csel x4,x4,x12,eq + csel x5,x5,x13,eq + +#ifdef __AARCH64EB__ + ror x10,x10,#32 // flip nonce words + ror x11,x11,#32 +#endif + adds x4,x4,x10 // accumulate nonce + adc x5,x5,x11 +#ifdef __AARCH64EB__ + rev x4,x4 // flip output bytes + rev x5,x5 +#endif + stp x4,x5,[x1] // write result + + ret +.size poly1305_emit_neon,.-poly1305_emit_neon + +.section .rodata + +.align 5 +.Lzeros: +.long 0,0,0,0,0,0,0,0 +.byte 80,111,108,121,49,51,48,53,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 diff --git a/contrib/openssl-cmake/asm/crypto/poly1305/poly1305-x86_64.s b/contrib/openssl-cmake/asm/crypto/poly1305/poly1305-x86_64.s new file mode 100644 index 000000000000..603a92cf2c9f --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/poly1305/poly1305-x86_64.s @@ -0,0 +1,3594 @@ +.text + + + +.globl poly1305_init +.hidden poly1305_init +.globl poly1305_blocks +.hidden poly1305_blocks +.globl poly1305_emit +.hidden poly1305_emit + +.type poly1305_init,@function +.align 32 +poly1305_init: +.cfi_startproc + xorq %rax,%rax + movq %rax,0(%rdi) + movq %rax,8(%rdi) + movq %rax,16(%rdi) + + cmpq $0,%rsi + je .Lno_key + + leaq poly1305_blocks(%rip),%r10 + leaq poly1305_emit(%rip),%r11 + movq OPENSSL_ia32cap_P+4(%rip),%r9 + leaq poly1305_blocks_avx(%rip),%rax + leaq poly1305_emit_avx(%rip),%rcx + btq $28,%r9 + cmovcq %rax,%r10 + cmovcq %rcx,%r11 + leaq poly1305_blocks_avx2(%rip),%rax + btq $37,%r9 + cmovcq %rax,%r10 + movq $2149646336,%rax + shrq $32,%r9 + andq %rax,%r9 + cmpq %rax,%r9 + je .Linit_base2_44 + movq $0x0ffffffc0fffffff,%rax + movq $0x0ffffffc0ffffffc,%rcx + andq 0(%rsi),%rax + andq 8(%rsi),%rcx + movq %rax,24(%rdi) + movq %rcx,32(%rdi) + movq %r10,0(%rdx) + movq %r11,8(%rdx) + movl $1,%eax +.Lno_key: + .byte 0xf3,0xc3 +.cfi_endproc +.size poly1305_init,.-poly1305_init + +.type poly1305_blocks,@function +.align 32 +poly1305_blocks: +.cfi_startproc +.byte 243,15,30,250 +.Lblocks: + shrq $4,%rdx + jz .Lno_data + + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lblocks_body: + + movq %rdx,%r15 + + movq 24(%rdi),%r11 + movq 32(%rdi),%r13 + + movq 0(%rdi),%r14 + movq 8(%rdi),%rbx + movq 16(%rdi),%rbp + + movq %r13,%r12 + shrq $2,%r13 + movq %r12,%rax + addq %r12,%r13 + jmp .Loop + +.align 32 +.Loop: + addq 0(%rsi),%r14 + adcq 8(%rsi),%rbx + leaq 16(%rsi),%rsi + adcq %rcx,%rbp + mulq %r14 + movq %rax,%r9 + movq %r11,%rax + movq %rdx,%r10 + + mulq %r14 + movq %rax,%r14 + movq %r11,%rax + movq %rdx,%r8 + + mulq %rbx + addq %rax,%r9 + movq %r13,%rax + adcq %rdx,%r10 + + mulq %rbx + movq %rbp,%rbx + addq %rax,%r14 + adcq %rdx,%r8 + + imulq %r13,%rbx + addq %rbx,%r9 + movq %r8,%rbx + adcq $0,%r10 + + imulq %r11,%rbp + addq %r9,%rbx + movq $-4,%rax + adcq %rbp,%r10 + + andq %r10,%rax + movq %r10,%rbp + shrq $2,%r10 + andq $3,%rbp + addq %r10,%rax + addq %rax,%r14 + adcq $0,%rbx + adcq $0,%rbp + movq %r12,%rax + decq %r15 + jnz .Loop + + movq %r14,0(%rdi) + movq %rbx,8(%rdi) + movq %rbp,16(%rdi) + + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbp +.cfi_restore %rbp + movq 40(%rsp),%rbx +.cfi_restore %rbx + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lno_data: +.Lblocks_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size poly1305_blocks,.-poly1305_blocks + +.type poly1305_emit,@function +.align 32 +poly1305_emit: +.cfi_startproc +.byte 243,15,30,250 +.Lemit: + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + + movq %r8,%rax + addq $5,%r8 + movq %r9,%rcx + adcq $0,%r9 + adcq $0,%r10 + shrq $2,%r10 + cmovnzq %r8,%rax + cmovnzq %r9,%rcx + + addq 0(%rdx),%rax + adcq 8(%rdx),%rcx + movq %rax,0(%rsi) + movq %rcx,8(%rsi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size poly1305_emit,.-poly1305_emit +.type __poly1305_block,@function +.align 32 +__poly1305_block: +.cfi_startproc + mulq %r14 + movq %rax,%r9 + movq %r11,%rax + movq %rdx,%r10 + + mulq %r14 + movq %rax,%r14 + movq %r11,%rax + movq %rdx,%r8 + + mulq %rbx + addq %rax,%r9 + movq %r13,%rax + adcq %rdx,%r10 + + mulq %rbx + movq %rbp,%rbx + addq %rax,%r14 + adcq %rdx,%r8 + + imulq %r13,%rbx + addq %rbx,%r9 + movq %r8,%rbx + adcq $0,%r10 + + imulq %r11,%rbp + addq %r9,%rbx + movq $-4,%rax + adcq %rbp,%r10 + + andq %r10,%rax + movq %r10,%rbp + shrq $2,%r10 + andq $3,%rbp + addq %r10,%rax + addq %rax,%r14 + adcq $0,%rbx + adcq $0,%rbp + .byte 0xf3,0xc3 +.cfi_endproc +.size __poly1305_block,.-__poly1305_block + +.type __poly1305_init_avx,@function +.align 32 +__poly1305_init_avx: +.cfi_startproc + movq %r11,%r14 + movq %r12,%rbx + xorq %rbp,%rbp + + leaq 48+64(%rdi),%rdi + + movq %r12,%rax + call __poly1305_block + + movl $0x3ffffff,%eax + movl $0x3ffffff,%edx + movq %r14,%r8 + andl %r14d,%eax + movq %r11,%r9 + andl %r11d,%edx + movl %eax,-64(%rdi) + shrq $26,%r8 + movl %edx,-60(%rdi) + shrq $26,%r9 + + movl $0x3ffffff,%eax + movl $0x3ffffff,%edx + andl %r8d,%eax + andl %r9d,%edx + movl %eax,-48(%rdi) + leal (%rax,%rax,4),%eax + movl %edx,-44(%rdi) + leal (%rdx,%rdx,4),%edx + movl %eax,-32(%rdi) + shrq $26,%r8 + movl %edx,-28(%rdi) + shrq $26,%r9 + + movq %rbx,%rax + movq %r12,%rdx + shlq $12,%rax + shlq $12,%rdx + orq %r8,%rax + orq %r9,%rdx + andl $0x3ffffff,%eax + andl $0x3ffffff,%edx + movl %eax,-16(%rdi) + leal (%rax,%rax,4),%eax + movl %edx,-12(%rdi) + leal (%rdx,%rdx,4),%edx + movl %eax,0(%rdi) + movq %rbx,%r8 + movl %edx,4(%rdi) + movq %r12,%r9 + + movl $0x3ffffff,%eax + movl $0x3ffffff,%edx + shrq $14,%r8 + shrq $14,%r9 + andl %r8d,%eax + andl %r9d,%edx + movl %eax,16(%rdi) + leal (%rax,%rax,4),%eax + movl %edx,20(%rdi) + leal (%rdx,%rdx,4),%edx + movl %eax,32(%rdi) + shrq $26,%r8 + movl %edx,36(%rdi) + shrq $26,%r9 + + movq %rbp,%rax + shlq $24,%rax + orq %rax,%r8 + movl %r8d,48(%rdi) + leaq (%r8,%r8,4),%r8 + movl %r9d,52(%rdi) + leaq (%r9,%r9,4),%r9 + movl %r8d,64(%rdi) + movl %r9d,68(%rdi) + + movq %r12,%rax + call __poly1305_block + + movl $0x3ffffff,%eax + movq %r14,%r8 + andl %r14d,%eax + shrq $26,%r8 + movl %eax,-52(%rdi) + + movl $0x3ffffff,%edx + andl %r8d,%edx + movl %edx,-36(%rdi) + leal (%rdx,%rdx,4),%edx + shrq $26,%r8 + movl %edx,-20(%rdi) + + movq %rbx,%rax + shlq $12,%rax + orq %r8,%rax + andl $0x3ffffff,%eax + movl %eax,-4(%rdi) + leal (%rax,%rax,4),%eax + movq %rbx,%r8 + movl %eax,12(%rdi) + + movl $0x3ffffff,%edx + shrq $14,%r8 + andl %r8d,%edx + movl %edx,28(%rdi) + leal (%rdx,%rdx,4),%edx + shrq $26,%r8 + movl %edx,44(%rdi) + + movq %rbp,%rax + shlq $24,%rax + orq %rax,%r8 + movl %r8d,60(%rdi) + leaq (%r8,%r8,4),%r8 + movl %r8d,76(%rdi) + + movq %r12,%rax + call __poly1305_block + + movl $0x3ffffff,%eax + movq %r14,%r8 + andl %r14d,%eax + shrq $26,%r8 + movl %eax,-56(%rdi) + + movl $0x3ffffff,%edx + andl %r8d,%edx + movl %edx,-40(%rdi) + leal (%rdx,%rdx,4),%edx + shrq $26,%r8 + movl %edx,-24(%rdi) + + movq %rbx,%rax + shlq $12,%rax + orq %r8,%rax + andl $0x3ffffff,%eax + movl %eax,-8(%rdi) + leal (%rax,%rax,4),%eax + movq %rbx,%r8 + movl %eax,8(%rdi) + + movl $0x3ffffff,%edx + shrq $14,%r8 + andl %r8d,%edx + movl %edx,24(%rdi) + leal (%rdx,%rdx,4),%edx + shrq $26,%r8 + movl %edx,40(%rdi) + + movq %rbp,%rax + shlq $24,%rax + orq %rax,%r8 + movl %r8d,56(%rdi) + leaq (%r8,%r8,4),%r8 + movl %r8d,72(%rdi) + + leaq -48-64(%rdi),%rdi + .byte 0xf3,0xc3 +.cfi_endproc +.size __poly1305_init_avx,.-__poly1305_init_avx + +.type poly1305_blocks_avx,@function +.align 32 +poly1305_blocks_avx: +.cfi_startproc +.byte 243,15,30,250 + movl 20(%rdi),%r8d + cmpq $128,%rdx + jae .Lblocks_avx + testl %r8d,%r8d + jz .Lblocks + +.Lblocks_avx: + andq $-16,%rdx + jz .Lno_data_avx + + vzeroupper + + testl %r8d,%r8d + jz .Lbase2_64_avx + + testq $31,%rdx + jz .Leven_avx + + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lblocks_avx_body: + + movq %rdx,%r15 + + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movl 16(%rdi),%ebp + + movq 24(%rdi),%r11 + movq 32(%rdi),%r13 + + + movl %r8d,%r14d + andq $-2147483648,%r8 + movq %r9,%r12 + movl %r9d,%ebx + andq $-2147483648,%r9 + + shrq $6,%r8 + shlq $52,%r12 + addq %r8,%r14 + shrq $12,%rbx + shrq $18,%r9 + addq %r12,%r14 + adcq %r9,%rbx + + movq %rbp,%r8 + shlq $40,%r8 + shrq $24,%rbp + addq %r8,%rbx + adcq $0,%rbp + + movq $-4,%r9 + movq %rbp,%r8 + andq %rbp,%r9 + shrq $2,%r8 + andq $3,%rbp + addq %r9,%r8 + addq %r8,%r14 + adcq $0,%rbx + adcq $0,%rbp + + movq %r13,%r12 + movq %r13,%rax + shrq $2,%r13 + addq %r12,%r13 + + addq 0(%rsi),%r14 + adcq 8(%rsi),%rbx + leaq 16(%rsi),%rsi + adcq %rcx,%rbp + + call __poly1305_block + + testq %rcx,%rcx + jz .Lstore_base2_64_avx + + + movq %r14,%rax + movq %r14,%rdx + shrq $52,%r14 + movq %rbx,%r11 + movq %rbx,%r12 + shrq $26,%rdx + andq $0x3ffffff,%rax + shlq $12,%r11 + andq $0x3ffffff,%rdx + shrq $14,%rbx + orq %r11,%r14 + shlq $24,%rbp + andq $0x3ffffff,%r14 + shrq $40,%r12 + andq $0x3ffffff,%rbx + orq %r12,%rbp + + subq $16,%r15 + jz .Lstore_base2_26_avx + + vmovd %eax,%xmm0 + vmovd %edx,%xmm1 + vmovd %r14d,%xmm2 + vmovd %ebx,%xmm3 + vmovd %ebp,%xmm4 + jmp .Lproceed_avx + +.align 32 +.Lstore_base2_64_avx: + movq %r14,0(%rdi) + movq %rbx,8(%rdi) + movq %rbp,16(%rdi) + jmp .Ldone_avx + +.align 16 +.Lstore_base2_26_avx: + movl %eax,0(%rdi) + movl %edx,4(%rdi) + movl %r14d,8(%rdi) + movl %ebx,12(%rdi) + movl %ebp,16(%rdi) +.align 16 +.Ldone_avx: + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbp +.cfi_restore %rbp + movq 40(%rsp),%rbx +.cfi_restore %rbx + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lno_data_avx: +.Lblocks_avx_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc + +.align 32 +.Lbase2_64_avx: +.cfi_startproc + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lbase2_64_avx_body: + + movq %rdx,%r15 + + movq 24(%rdi),%r11 + movq 32(%rdi),%r13 + + movq 0(%rdi),%r14 + movq 8(%rdi),%rbx + movl 16(%rdi),%ebp + + movq %r13,%r12 + movq %r13,%rax + shrq $2,%r13 + addq %r12,%r13 + + testq $31,%rdx + jz .Linit_avx + + addq 0(%rsi),%r14 + adcq 8(%rsi),%rbx + leaq 16(%rsi),%rsi + adcq %rcx,%rbp + subq $16,%r15 + + call __poly1305_block + +.Linit_avx: + + movq %r14,%rax + movq %r14,%rdx + shrq $52,%r14 + movq %rbx,%r8 + movq %rbx,%r9 + shrq $26,%rdx + andq $0x3ffffff,%rax + shlq $12,%r8 + andq $0x3ffffff,%rdx + shrq $14,%rbx + orq %r8,%r14 + shlq $24,%rbp + andq $0x3ffffff,%r14 + shrq $40,%r9 + andq $0x3ffffff,%rbx + orq %r9,%rbp + + vmovd %eax,%xmm0 + vmovd %edx,%xmm1 + vmovd %r14d,%xmm2 + vmovd %ebx,%xmm3 + vmovd %ebp,%xmm4 + movl $1,20(%rdi) + + call __poly1305_init_avx + +.Lproceed_avx: + movq %r15,%rdx + + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbp +.cfi_restore %rbp + movq 40(%rsp),%rbx +.cfi_restore %rbx + leaq 48(%rsp),%rax + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lbase2_64_avx_epilogue: + jmp .Ldo_avx +.cfi_endproc + +.align 32 +.Leven_avx: +.cfi_startproc + vmovd 0(%rdi),%xmm0 + vmovd 4(%rdi),%xmm1 + vmovd 8(%rdi),%xmm2 + vmovd 12(%rdi),%xmm3 + vmovd 16(%rdi),%xmm4 + +.Ldo_avx: + leaq -88(%rsp),%r11 +.cfi_def_cfa %r11,0x60 + subq $0x178,%rsp + subq $64,%rdx + leaq -32(%rsi),%rax + cmovcq %rax,%rsi + + vmovdqu 48(%rdi),%xmm14 + leaq 112(%rdi),%rdi + leaq .Lconst(%rip),%rcx + + + + vmovdqu 32(%rsi),%xmm5 + vmovdqu 48(%rsi),%xmm6 + vmovdqa 64(%rcx),%xmm15 + + vpsrldq $6,%xmm5,%xmm7 + vpsrldq $6,%xmm6,%xmm8 + vpunpckhqdq %xmm6,%xmm5,%xmm9 + vpunpcklqdq %xmm6,%xmm5,%xmm5 + vpunpcklqdq %xmm8,%xmm7,%xmm8 + + vpsrlq $40,%xmm9,%xmm9 + vpsrlq $26,%xmm5,%xmm6 + vpand %xmm15,%xmm5,%xmm5 + vpsrlq $4,%xmm8,%xmm7 + vpand %xmm15,%xmm6,%xmm6 + vpsrlq $30,%xmm8,%xmm8 + vpand %xmm15,%xmm7,%xmm7 + vpand %xmm15,%xmm8,%xmm8 + vpor 32(%rcx),%xmm9,%xmm9 + + jbe .Lskip_loop_avx + + + vmovdqu -48(%rdi),%xmm11 + vmovdqu -32(%rdi),%xmm12 + vpshufd $0xEE,%xmm14,%xmm13 + vpshufd $0x44,%xmm14,%xmm10 + vmovdqa %xmm13,-144(%r11) + vmovdqa %xmm10,0(%rsp) + vpshufd $0xEE,%xmm11,%xmm14 + vmovdqu -16(%rdi),%xmm10 + vpshufd $0x44,%xmm11,%xmm11 + vmovdqa %xmm14,-128(%r11) + vmovdqa %xmm11,16(%rsp) + vpshufd $0xEE,%xmm12,%xmm13 + vmovdqu 0(%rdi),%xmm11 + vpshufd $0x44,%xmm12,%xmm12 + vmovdqa %xmm13,-112(%r11) + vmovdqa %xmm12,32(%rsp) + vpshufd $0xEE,%xmm10,%xmm14 + vmovdqu 16(%rdi),%xmm12 + vpshufd $0x44,%xmm10,%xmm10 + vmovdqa %xmm14,-96(%r11) + vmovdqa %xmm10,48(%rsp) + vpshufd $0xEE,%xmm11,%xmm13 + vmovdqu 32(%rdi),%xmm10 + vpshufd $0x44,%xmm11,%xmm11 + vmovdqa %xmm13,-80(%r11) + vmovdqa %xmm11,64(%rsp) + vpshufd $0xEE,%xmm12,%xmm14 + vmovdqu 48(%rdi),%xmm11 + vpshufd $0x44,%xmm12,%xmm12 + vmovdqa %xmm14,-64(%r11) + vmovdqa %xmm12,80(%rsp) + vpshufd $0xEE,%xmm10,%xmm13 + vmovdqu 64(%rdi),%xmm12 + vpshufd $0x44,%xmm10,%xmm10 + vmovdqa %xmm13,-48(%r11) + vmovdqa %xmm10,96(%rsp) + vpshufd $0xEE,%xmm11,%xmm14 + vpshufd $0x44,%xmm11,%xmm11 + vmovdqa %xmm14,-32(%r11) + vmovdqa %xmm11,112(%rsp) + vpshufd $0xEE,%xmm12,%xmm13 + vmovdqa 0(%rsp),%xmm14 + vpshufd $0x44,%xmm12,%xmm12 + vmovdqa %xmm13,-16(%r11) + vmovdqa %xmm12,128(%rsp) + + jmp .Loop_avx + +.align 32 +.Loop_avx: + + + + + + + + + + + + + + + + + + + + + vpmuludq %xmm5,%xmm14,%xmm10 + vpmuludq %xmm6,%xmm14,%xmm11 + vmovdqa %xmm2,32(%r11) + vpmuludq %xmm7,%xmm14,%xmm12 + vmovdqa 16(%rsp),%xmm2 + vpmuludq %xmm8,%xmm14,%xmm13 + vpmuludq %xmm9,%xmm14,%xmm14 + + vmovdqa %xmm0,0(%r11) + vpmuludq 32(%rsp),%xmm9,%xmm0 + vmovdqa %xmm1,16(%r11) + vpmuludq %xmm8,%xmm2,%xmm1 + vpaddq %xmm0,%xmm10,%xmm10 + vpaddq %xmm1,%xmm14,%xmm14 + vmovdqa %xmm3,48(%r11) + vpmuludq %xmm7,%xmm2,%xmm0 + vpmuludq %xmm6,%xmm2,%xmm1 + vpaddq %xmm0,%xmm13,%xmm13 + vmovdqa 48(%rsp),%xmm3 + vpaddq %xmm1,%xmm12,%xmm12 + vmovdqa %xmm4,64(%r11) + vpmuludq %xmm5,%xmm2,%xmm2 + vpmuludq %xmm7,%xmm3,%xmm0 + vpaddq %xmm2,%xmm11,%xmm11 + + vmovdqa 64(%rsp),%xmm4 + vpaddq %xmm0,%xmm14,%xmm14 + vpmuludq %xmm6,%xmm3,%xmm1 + vpmuludq %xmm5,%xmm3,%xmm3 + vpaddq %xmm1,%xmm13,%xmm13 + vmovdqa 80(%rsp),%xmm2 + vpaddq %xmm3,%xmm12,%xmm12 + vpmuludq %xmm9,%xmm4,%xmm0 + vpmuludq %xmm8,%xmm4,%xmm4 + vpaddq %xmm0,%xmm11,%xmm11 + vmovdqa 96(%rsp),%xmm3 + vpaddq %xmm4,%xmm10,%xmm10 + + vmovdqa 128(%rsp),%xmm4 + vpmuludq %xmm6,%xmm2,%xmm1 + vpmuludq %xmm5,%xmm2,%xmm2 + vpaddq %xmm1,%xmm14,%xmm14 + vpaddq %xmm2,%xmm13,%xmm13 + vpmuludq %xmm9,%xmm3,%xmm0 + vpmuludq %xmm8,%xmm3,%xmm1 + vpaddq %xmm0,%xmm12,%xmm12 + vmovdqu 0(%rsi),%xmm0 + vpaddq %xmm1,%xmm11,%xmm11 + vpmuludq %xmm7,%xmm3,%xmm3 + vpmuludq %xmm7,%xmm4,%xmm7 + vpaddq %xmm3,%xmm10,%xmm10 + + vmovdqu 16(%rsi),%xmm1 + vpaddq %xmm7,%xmm11,%xmm11 + vpmuludq %xmm8,%xmm4,%xmm8 + vpmuludq %xmm9,%xmm4,%xmm9 + vpsrldq $6,%xmm0,%xmm2 + vpaddq %xmm8,%xmm12,%xmm12 + vpaddq %xmm9,%xmm13,%xmm13 + vpsrldq $6,%xmm1,%xmm3 + vpmuludq 112(%rsp),%xmm5,%xmm9 + vpmuludq %xmm6,%xmm4,%xmm5 + vpunpckhqdq %xmm1,%xmm0,%xmm4 + vpaddq %xmm9,%xmm14,%xmm14 + vmovdqa -144(%r11),%xmm9 + vpaddq %xmm5,%xmm10,%xmm10 + + vpunpcklqdq %xmm1,%xmm0,%xmm0 + vpunpcklqdq %xmm3,%xmm2,%xmm3 + + + vpsrldq $5,%xmm4,%xmm4 + vpsrlq $26,%xmm0,%xmm1 + vpand %xmm15,%xmm0,%xmm0 + vpsrlq $4,%xmm3,%xmm2 + vpand %xmm15,%xmm1,%xmm1 + vpand 0(%rcx),%xmm4,%xmm4 + vpsrlq $30,%xmm3,%xmm3 + vpand %xmm15,%xmm2,%xmm2 + vpand %xmm15,%xmm3,%xmm3 + vpor 32(%rcx),%xmm4,%xmm4 + + vpaddq 0(%r11),%xmm0,%xmm0 + vpaddq 16(%r11),%xmm1,%xmm1 + vpaddq 32(%r11),%xmm2,%xmm2 + vpaddq 48(%r11),%xmm3,%xmm3 + vpaddq 64(%r11),%xmm4,%xmm4 + + leaq 32(%rsi),%rax + leaq 64(%rsi),%rsi + subq $64,%rdx + cmovcq %rax,%rsi + + + + + + + + + + + vpmuludq %xmm0,%xmm9,%xmm5 + vpmuludq %xmm1,%xmm9,%xmm6 + vpaddq %xmm5,%xmm10,%xmm10 + vpaddq %xmm6,%xmm11,%xmm11 + vmovdqa -128(%r11),%xmm7 + vpmuludq %xmm2,%xmm9,%xmm5 + vpmuludq %xmm3,%xmm9,%xmm6 + vpaddq %xmm5,%xmm12,%xmm12 + vpaddq %xmm6,%xmm13,%xmm13 + vpmuludq %xmm4,%xmm9,%xmm9 + vpmuludq -112(%r11),%xmm4,%xmm5 + vpaddq %xmm9,%xmm14,%xmm14 + + vpaddq %xmm5,%xmm10,%xmm10 + vpmuludq %xmm2,%xmm7,%xmm6 + vpmuludq %xmm3,%xmm7,%xmm5 + vpaddq %xmm6,%xmm13,%xmm13 + vmovdqa -96(%r11),%xmm8 + vpaddq %xmm5,%xmm14,%xmm14 + vpmuludq %xmm1,%xmm7,%xmm6 + vpmuludq %xmm0,%xmm7,%xmm7 + vpaddq %xmm6,%xmm12,%xmm12 + vpaddq %xmm7,%xmm11,%xmm11 + + vmovdqa -80(%r11),%xmm9 + vpmuludq %xmm2,%xmm8,%xmm5 + vpmuludq %xmm1,%xmm8,%xmm6 + vpaddq %xmm5,%xmm14,%xmm14 + vpaddq %xmm6,%xmm13,%xmm13 + vmovdqa -64(%r11),%xmm7 + vpmuludq %xmm0,%xmm8,%xmm8 + vpmuludq %xmm4,%xmm9,%xmm5 + vpaddq %xmm8,%xmm12,%xmm12 + vpaddq %xmm5,%xmm11,%xmm11 + vmovdqa -48(%r11),%xmm8 + vpmuludq %xmm3,%xmm9,%xmm9 + vpmuludq %xmm1,%xmm7,%xmm6 + vpaddq %xmm9,%xmm10,%xmm10 + + vmovdqa -16(%r11),%xmm9 + vpaddq %xmm6,%xmm14,%xmm14 + vpmuludq %xmm0,%xmm7,%xmm7 + vpmuludq %xmm4,%xmm8,%xmm5 + vpaddq %xmm7,%xmm13,%xmm13 + vpaddq %xmm5,%xmm12,%xmm12 + vmovdqu 32(%rsi),%xmm5 + vpmuludq %xmm3,%xmm8,%xmm7 + vpmuludq %xmm2,%xmm8,%xmm8 + vpaddq %xmm7,%xmm11,%xmm11 + vmovdqu 48(%rsi),%xmm6 + vpaddq %xmm8,%xmm10,%xmm10 + + vpmuludq %xmm2,%xmm9,%xmm2 + vpmuludq %xmm3,%xmm9,%xmm3 + vpsrldq $6,%xmm5,%xmm7 + vpaddq %xmm2,%xmm11,%xmm11 + vpmuludq %xmm4,%xmm9,%xmm4 + vpsrldq $6,%xmm6,%xmm8 + vpaddq %xmm3,%xmm12,%xmm2 + vpaddq %xmm4,%xmm13,%xmm3 + vpmuludq -32(%r11),%xmm0,%xmm4 + vpmuludq %xmm1,%xmm9,%xmm0 + vpunpckhqdq %xmm6,%xmm5,%xmm9 + vpaddq %xmm4,%xmm14,%xmm4 + vpaddq %xmm0,%xmm10,%xmm0 + + vpunpcklqdq %xmm6,%xmm5,%xmm5 + vpunpcklqdq %xmm8,%xmm7,%xmm8 + + + vpsrldq $5,%xmm9,%xmm9 + vpsrlq $26,%xmm5,%xmm6 + vmovdqa 0(%rsp),%xmm14 + vpand %xmm15,%xmm5,%xmm5 + vpsrlq $4,%xmm8,%xmm7 + vpand %xmm15,%xmm6,%xmm6 + vpand 0(%rcx),%xmm9,%xmm9 + vpsrlq $30,%xmm8,%xmm8 + vpand %xmm15,%xmm7,%xmm7 + vpand %xmm15,%xmm8,%xmm8 + vpor 32(%rcx),%xmm9,%xmm9 + + + + + + vpsrlq $26,%xmm3,%xmm13 + vpand %xmm15,%xmm3,%xmm3 + vpaddq %xmm13,%xmm4,%xmm4 + + vpsrlq $26,%xmm0,%xmm10 + vpand %xmm15,%xmm0,%xmm0 + vpaddq %xmm10,%xmm11,%xmm1 + + vpsrlq $26,%xmm4,%xmm10 + vpand %xmm15,%xmm4,%xmm4 + + vpsrlq $26,%xmm1,%xmm11 + vpand %xmm15,%xmm1,%xmm1 + vpaddq %xmm11,%xmm2,%xmm2 + + vpaddq %xmm10,%xmm0,%xmm0 + vpsllq $2,%xmm10,%xmm10 + vpaddq %xmm10,%xmm0,%xmm0 + + vpsrlq $26,%xmm2,%xmm12 + vpand %xmm15,%xmm2,%xmm2 + vpaddq %xmm12,%xmm3,%xmm3 + + vpsrlq $26,%xmm0,%xmm10 + vpand %xmm15,%xmm0,%xmm0 + vpaddq %xmm10,%xmm1,%xmm1 + + vpsrlq $26,%xmm3,%xmm13 + vpand %xmm15,%xmm3,%xmm3 + vpaddq %xmm13,%xmm4,%xmm4 + + ja .Loop_avx + +.Lskip_loop_avx: + + + + vpshufd $0x10,%xmm14,%xmm14 + addq $32,%rdx + jnz .Long_tail_avx + + vpaddq %xmm2,%xmm7,%xmm7 + vpaddq %xmm0,%xmm5,%xmm5 + vpaddq %xmm1,%xmm6,%xmm6 + vpaddq %xmm3,%xmm8,%xmm8 + vpaddq %xmm4,%xmm9,%xmm9 + +.Long_tail_avx: + vmovdqa %xmm2,32(%r11) + vmovdqa %xmm0,0(%r11) + vmovdqa %xmm1,16(%r11) + vmovdqa %xmm3,48(%r11) + vmovdqa %xmm4,64(%r11) + + + + + + + + vpmuludq %xmm7,%xmm14,%xmm12 + vpmuludq %xmm5,%xmm14,%xmm10 + vpshufd $0x10,-48(%rdi),%xmm2 + vpmuludq %xmm6,%xmm14,%xmm11 + vpmuludq %xmm8,%xmm14,%xmm13 + vpmuludq %xmm9,%xmm14,%xmm14 + + vpmuludq %xmm8,%xmm2,%xmm0 + vpaddq %xmm0,%xmm14,%xmm14 + vpshufd $0x10,-32(%rdi),%xmm3 + vpmuludq %xmm7,%xmm2,%xmm1 + vpaddq %xmm1,%xmm13,%xmm13 + vpshufd $0x10,-16(%rdi),%xmm4 + vpmuludq %xmm6,%xmm2,%xmm0 + vpaddq %xmm0,%xmm12,%xmm12 + vpmuludq %xmm5,%xmm2,%xmm2 + vpaddq %xmm2,%xmm11,%xmm11 + vpmuludq %xmm9,%xmm3,%xmm3 + vpaddq %xmm3,%xmm10,%xmm10 + + vpshufd $0x10,0(%rdi),%xmm2 + vpmuludq %xmm7,%xmm4,%xmm1 + vpaddq %xmm1,%xmm14,%xmm14 + vpmuludq %xmm6,%xmm4,%xmm0 + vpaddq %xmm0,%xmm13,%xmm13 + vpshufd $0x10,16(%rdi),%xmm3 + vpmuludq %xmm5,%xmm4,%xmm4 + vpaddq %xmm4,%xmm12,%xmm12 + vpmuludq %xmm9,%xmm2,%xmm1 + vpaddq %xmm1,%xmm11,%xmm11 + vpshufd $0x10,32(%rdi),%xmm4 + vpmuludq %xmm8,%xmm2,%xmm2 + vpaddq %xmm2,%xmm10,%xmm10 + + vpmuludq %xmm6,%xmm3,%xmm0 + vpaddq %xmm0,%xmm14,%xmm14 + vpmuludq %xmm5,%xmm3,%xmm3 + vpaddq %xmm3,%xmm13,%xmm13 + vpshufd $0x10,48(%rdi),%xmm2 + vpmuludq %xmm9,%xmm4,%xmm1 + vpaddq %xmm1,%xmm12,%xmm12 + vpshufd $0x10,64(%rdi),%xmm3 + vpmuludq %xmm8,%xmm4,%xmm0 + vpaddq %xmm0,%xmm11,%xmm11 + vpmuludq %xmm7,%xmm4,%xmm4 + vpaddq %xmm4,%xmm10,%xmm10 + + vpmuludq %xmm5,%xmm2,%xmm2 + vpaddq %xmm2,%xmm14,%xmm14 + vpmuludq %xmm9,%xmm3,%xmm1 + vpaddq %xmm1,%xmm13,%xmm13 + vpmuludq %xmm8,%xmm3,%xmm0 + vpaddq %xmm0,%xmm12,%xmm12 + vpmuludq %xmm7,%xmm3,%xmm1 + vpaddq %xmm1,%xmm11,%xmm11 + vpmuludq %xmm6,%xmm3,%xmm3 + vpaddq %xmm3,%xmm10,%xmm10 + + jz .Lshort_tail_avx + + vmovdqu 0(%rsi),%xmm0 + vmovdqu 16(%rsi),%xmm1 + + vpsrldq $6,%xmm0,%xmm2 + vpsrldq $6,%xmm1,%xmm3 + vpunpckhqdq %xmm1,%xmm0,%xmm4 + vpunpcklqdq %xmm1,%xmm0,%xmm0 + vpunpcklqdq %xmm3,%xmm2,%xmm3 + + vpsrlq $40,%xmm4,%xmm4 + vpsrlq $26,%xmm0,%xmm1 + vpand %xmm15,%xmm0,%xmm0 + vpsrlq $4,%xmm3,%xmm2 + vpand %xmm15,%xmm1,%xmm1 + vpsrlq $30,%xmm3,%xmm3 + vpand %xmm15,%xmm2,%xmm2 + vpand %xmm15,%xmm3,%xmm3 + vpor 32(%rcx),%xmm4,%xmm4 + + vpshufd $0x32,-64(%rdi),%xmm9 + vpaddq 0(%r11),%xmm0,%xmm0 + vpaddq 16(%r11),%xmm1,%xmm1 + vpaddq 32(%r11),%xmm2,%xmm2 + vpaddq 48(%r11),%xmm3,%xmm3 + vpaddq 64(%r11),%xmm4,%xmm4 + + + + + vpmuludq %xmm0,%xmm9,%xmm5 + vpaddq %xmm5,%xmm10,%xmm10 + vpmuludq %xmm1,%xmm9,%xmm6 + vpaddq %xmm6,%xmm11,%xmm11 + vpmuludq %xmm2,%xmm9,%xmm5 + vpaddq %xmm5,%xmm12,%xmm12 + vpshufd $0x32,-48(%rdi),%xmm7 + vpmuludq %xmm3,%xmm9,%xmm6 + vpaddq %xmm6,%xmm13,%xmm13 + vpmuludq %xmm4,%xmm9,%xmm9 + vpaddq %xmm9,%xmm14,%xmm14 + + vpmuludq %xmm3,%xmm7,%xmm5 + vpaddq %xmm5,%xmm14,%xmm14 + vpshufd $0x32,-32(%rdi),%xmm8 + vpmuludq %xmm2,%xmm7,%xmm6 + vpaddq %xmm6,%xmm13,%xmm13 + vpshufd $0x32,-16(%rdi),%xmm9 + vpmuludq %xmm1,%xmm7,%xmm5 + vpaddq %xmm5,%xmm12,%xmm12 + vpmuludq %xmm0,%xmm7,%xmm7 + vpaddq %xmm7,%xmm11,%xmm11 + vpmuludq %xmm4,%xmm8,%xmm8 + vpaddq %xmm8,%xmm10,%xmm10 + + vpshufd $0x32,0(%rdi),%xmm7 + vpmuludq %xmm2,%xmm9,%xmm6 + vpaddq %xmm6,%xmm14,%xmm14 + vpmuludq %xmm1,%xmm9,%xmm5 + vpaddq %xmm5,%xmm13,%xmm13 + vpshufd $0x32,16(%rdi),%xmm8 + vpmuludq %xmm0,%xmm9,%xmm9 + vpaddq %xmm9,%xmm12,%xmm12 + vpmuludq %xmm4,%xmm7,%xmm6 + vpaddq %xmm6,%xmm11,%xmm11 + vpshufd $0x32,32(%rdi),%xmm9 + vpmuludq %xmm3,%xmm7,%xmm7 + vpaddq %xmm7,%xmm10,%xmm10 + + vpmuludq %xmm1,%xmm8,%xmm5 + vpaddq %xmm5,%xmm14,%xmm14 + vpmuludq %xmm0,%xmm8,%xmm8 + vpaddq %xmm8,%xmm13,%xmm13 + vpshufd $0x32,48(%rdi),%xmm7 + vpmuludq %xmm4,%xmm9,%xmm6 + vpaddq %xmm6,%xmm12,%xmm12 + vpshufd $0x32,64(%rdi),%xmm8 + vpmuludq %xmm3,%xmm9,%xmm5 + vpaddq %xmm5,%xmm11,%xmm11 + vpmuludq %xmm2,%xmm9,%xmm9 + vpaddq %xmm9,%xmm10,%xmm10 + + vpmuludq %xmm0,%xmm7,%xmm7 + vpaddq %xmm7,%xmm14,%xmm14 + vpmuludq %xmm4,%xmm8,%xmm6 + vpaddq %xmm6,%xmm13,%xmm13 + vpmuludq %xmm3,%xmm8,%xmm5 + vpaddq %xmm5,%xmm12,%xmm12 + vpmuludq %xmm2,%xmm8,%xmm6 + vpaddq %xmm6,%xmm11,%xmm11 + vpmuludq %xmm1,%xmm8,%xmm8 + vpaddq %xmm8,%xmm10,%xmm10 + +.Lshort_tail_avx: + + + + vpsrldq $8,%xmm14,%xmm9 + vpsrldq $8,%xmm13,%xmm8 + vpsrldq $8,%xmm11,%xmm6 + vpsrldq $8,%xmm10,%xmm5 + vpsrldq $8,%xmm12,%xmm7 + vpaddq %xmm8,%xmm13,%xmm13 + vpaddq %xmm9,%xmm14,%xmm14 + vpaddq %xmm5,%xmm10,%xmm10 + vpaddq %xmm6,%xmm11,%xmm11 + vpaddq %xmm7,%xmm12,%xmm12 + + + + + vpsrlq $26,%xmm13,%xmm3 + vpand %xmm15,%xmm13,%xmm13 + vpaddq %xmm3,%xmm14,%xmm14 + + vpsrlq $26,%xmm10,%xmm0 + vpand %xmm15,%xmm10,%xmm10 + vpaddq %xmm0,%xmm11,%xmm11 + + vpsrlq $26,%xmm14,%xmm4 + vpand %xmm15,%xmm14,%xmm14 + + vpsrlq $26,%xmm11,%xmm1 + vpand %xmm15,%xmm11,%xmm11 + vpaddq %xmm1,%xmm12,%xmm12 + + vpaddq %xmm4,%xmm10,%xmm10 + vpsllq $2,%xmm4,%xmm4 + vpaddq %xmm4,%xmm10,%xmm10 + + vpsrlq $26,%xmm12,%xmm2 + vpand %xmm15,%xmm12,%xmm12 + vpaddq %xmm2,%xmm13,%xmm13 + + vpsrlq $26,%xmm10,%xmm0 + vpand %xmm15,%xmm10,%xmm10 + vpaddq %xmm0,%xmm11,%xmm11 + + vpsrlq $26,%xmm13,%xmm3 + vpand %xmm15,%xmm13,%xmm13 + vpaddq %xmm3,%xmm14,%xmm14 + + vmovd %xmm10,-112(%rdi) + vmovd %xmm11,-108(%rdi) + vmovd %xmm12,-104(%rdi) + vmovd %xmm13,-100(%rdi) + vmovd %xmm14,-96(%rdi) + leaq 88(%r11),%rsp +.cfi_def_cfa %rsp,8 + vzeroupper + .byte 0xf3,0xc3 +.cfi_endproc +.size poly1305_blocks_avx,.-poly1305_blocks_avx + +.type poly1305_emit_avx,@function +.align 32 +poly1305_emit_avx: +.cfi_startproc +.byte 243,15,30,250 + cmpl $0,20(%rdi) + je .Lemit + + movl 0(%rdi),%eax + movl 4(%rdi),%ecx + movl 8(%rdi),%r8d + movl 12(%rdi),%r11d + movl 16(%rdi),%r10d + + shlq $26,%rcx + movq %r8,%r9 + shlq $52,%r8 + addq %rcx,%rax + shrq $12,%r9 + addq %rax,%r8 + adcq $0,%r9 + + shlq $14,%r11 + movq %r10,%rax + shrq $24,%r10 + addq %r11,%r9 + shlq $40,%rax + addq %rax,%r9 + adcq $0,%r10 + + movq %r10,%rax + movq %r10,%rcx + andq $3,%r10 + shrq $2,%rax + andq $-4,%rcx + addq %rcx,%rax + addq %rax,%r8 + adcq $0,%r9 + adcq $0,%r10 + + movq %r8,%rax + addq $5,%r8 + movq %r9,%rcx + adcq $0,%r9 + adcq $0,%r10 + shrq $2,%r10 + cmovnzq %r8,%rax + cmovnzq %r9,%rcx + + addq 0(%rdx),%rax + adcq 8(%rdx),%rcx + movq %rax,0(%rsi) + movq %rcx,8(%rsi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size poly1305_emit_avx,.-poly1305_emit_avx +.type poly1305_blocks_avx2,@function +.align 32 +poly1305_blocks_avx2: +.cfi_startproc +.byte 243,15,30,250 + movl 20(%rdi),%r8d + cmpq $128,%rdx + jae .Lblocks_avx2 + testl %r8d,%r8d + jz .Lblocks + +.Lblocks_avx2: + andq $-16,%rdx + jz .Lno_data_avx2 + + vzeroupper + + testl %r8d,%r8d + jz .Lbase2_64_avx2 + + testq $63,%rdx + jz .Leven_avx2 + + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lblocks_avx2_body: + + movq %rdx,%r15 + + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movl 16(%rdi),%ebp + + movq 24(%rdi),%r11 + movq 32(%rdi),%r13 + + + movl %r8d,%r14d + andq $-2147483648,%r8 + movq %r9,%r12 + movl %r9d,%ebx + andq $-2147483648,%r9 + + shrq $6,%r8 + shlq $52,%r12 + addq %r8,%r14 + shrq $12,%rbx + shrq $18,%r9 + addq %r12,%r14 + adcq %r9,%rbx + + movq %rbp,%r8 + shlq $40,%r8 + shrq $24,%rbp + addq %r8,%rbx + adcq $0,%rbp + + movq $-4,%r9 + movq %rbp,%r8 + andq %rbp,%r9 + shrq $2,%r8 + andq $3,%rbp + addq %r9,%r8 + addq %r8,%r14 + adcq $0,%rbx + adcq $0,%rbp + + movq %r13,%r12 + movq %r13,%rax + shrq $2,%r13 + addq %r12,%r13 + +.Lbase2_26_pre_avx2: + addq 0(%rsi),%r14 + adcq 8(%rsi),%rbx + leaq 16(%rsi),%rsi + adcq %rcx,%rbp + subq $16,%r15 + + call __poly1305_block + movq %r12,%rax + + testq $63,%r15 + jnz .Lbase2_26_pre_avx2 + + testq %rcx,%rcx + jz .Lstore_base2_64_avx2 + + + movq %r14,%rax + movq %r14,%rdx + shrq $52,%r14 + movq %rbx,%r11 + movq %rbx,%r12 + shrq $26,%rdx + andq $0x3ffffff,%rax + shlq $12,%r11 + andq $0x3ffffff,%rdx + shrq $14,%rbx + orq %r11,%r14 + shlq $24,%rbp + andq $0x3ffffff,%r14 + shrq $40,%r12 + andq $0x3ffffff,%rbx + orq %r12,%rbp + + testq %r15,%r15 + jz .Lstore_base2_26_avx2 + + vmovd %eax,%xmm0 + vmovd %edx,%xmm1 + vmovd %r14d,%xmm2 + vmovd %ebx,%xmm3 + vmovd %ebp,%xmm4 + jmp .Lproceed_avx2 + +.align 32 +.Lstore_base2_64_avx2: + movq %r14,0(%rdi) + movq %rbx,8(%rdi) + movq %rbp,16(%rdi) + jmp .Ldone_avx2 + +.align 16 +.Lstore_base2_26_avx2: + movl %eax,0(%rdi) + movl %edx,4(%rdi) + movl %r14d,8(%rdi) + movl %ebx,12(%rdi) + movl %ebp,16(%rdi) +.align 16 +.Ldone_avx2: + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbp +.cfi_restore %rbp + movq 40(%rsp),%rbx +.cfi_restore %rbx + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lno_data_avx2: +.Lblocks_avx2_epilogue: + .byte 0xf3,0xc3 +.cfi_endproc + +.align 32 +.Lbase2_64_avx2: +.cfi_startproc + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 +.Lbase2_64_avx2_body: + + movq %rdx,%r15 + + movq 24(%rdi),%r11 + movq 32(%rdi),%r13 + + movq 0(%rdi),%r14 + movq 8(%rdi),%rbx + movl 16(%rdi),%ebp + + movq %r13,%r12 + movq %r13,%rax + shrq $2,%r13 + addq %r12,%r13 + + testq $63,%rdx + jz .Linit_avx2 + +.Lbase2_64_pre_avx2: + addq 0(%rsi),%r14 + adcq 8(%rsi),%rbx + leaq 16(%rsi),%rsi + adcq %rcx,%rbp + subq $16,%r15 + + call __poly1305_block + movq %r12,%rax + + testq $63,%r15 + jnz .Lbase2_64_pre_avx2 + +.Linit_avx2: + + movq %r14,%rax + movq %r14,%rdx + shrq $52,%r14 + movq %rbx,%r8 + movq %rbx,%r9 + shrq $26,%rdx + andq $0x3ffffff,%rax + shlq $12,%r8 + andq $0x3ffffff,%rdx + shrq $14,%rbx + orq %r8,%r14 + shlq $24,%rbp + andq $0x3ffffff,%r14 + shrq $40,%r9 + andq $0x3ffffff,%rbx + orq %r9,%rbp + + vmovd %eax,%xmm0 + vmovd %edx,%xmm1 + vmovd %r14d,%xmm2 + vmovd %ebx,%xmm3 + vmovd %ebp,%xmm4 + movl $1,20(%rdi) + + call __poly1305_init_avx + +.Lproceed_avx2: + movq %r15,%rdx + movl OPENSSL_ia32cap_P+8(%rip),%r10d + movl $3221291008,%r11d + + movq 0(%rsp),%r15 +.cfi_restore %r15 + movq 8(%rsp),%r14 +.cfi_restore %r14 + movq 16(%rsp),%r13 +.cfi_restore %r13 + movq 24(%rsp),%r12 +.cfi_restore %r12 + movq 32(%rsp),%rbp +.cfi_restore %rbp + movq 40(%rsp),%rbx +.cfi_restore %rbx + leaq 48(%rsp),%rax + leaq 48(%rsp),%rsp +.cfi_adjust_cfa_offset -48 +.Lbase2_64_avx2_epilogue: + jmp .Ldo_avx2 +.cfi_endproc + +.align 32 +.Leven_avx2: +.cfi_startproc + movl OPENSSL_ia32cap_P+8(%rip),%r10d + vmovd 0(%rdi),%xmm0 + vmovd 4(%rdi),%xmm1 + vmovd 8(%rdi),%xmm2 + vmovd 12(%rdi),%xmm3 + vmovd 16(%rdi),%xmm4 + +.Ldo_avx2: + cmpq $512,%rdx + jb .Lskip_avx512 + andl %r11d,%r10d + testl $65536,%r10d + jnz .Lblocks_avx512 +.Lskip_avx512: + leaq -8(%rsp),%r11 +.cfi_def_cfa %r11,16 + subq $0x128,%rsp + leaq .Lconst(%rip),%rcx + leaq 48+64(%rdi),%rdi + vmovdqa 96(%rcx),%ymm7 + + + vmovdqu -64(%rdi),%xmm9 + andq $-512,%rsp + vmovdqu -48(%rdi),%xmm10 + vmovdqu -32(%rdi),%xmm6 + vmovdqu -16(%rdi),%xmm11 + vmovdqu 0(%rdi),%xmm12 + vmovdqu 16(%rdi),%xmm13 + leaq 144(%rsp),%rax + vmovdqu 32(%rdi),%xmm14 + vpermd %ymm9,%ymm7,%ymm9 + vmovdqu 48(%rdi),%xmm15 + vpermd %ymm10,%ymm7,%ymm10 + vmovdqu 64(%rdi),%xmm5 + vpermd %ymm6,%ymm7,%ymm6 + vmovdqa %ymm9,0(%rsp) + vpermd %ymm11,%ymm7,%ymm11 + vmovdqa %ymm10,32-144(%rax) + vpermd %ymm12,%ymm7,%ymm12 + vmovdqa %ymm6,64-144(%rax) + vpermd %ymm13,%ymm7,%ymm13 + vmovdqa %ymm11,96-144(%rax) + vpermd %ymm14,%ymm7,%ymm14 + vmovdqa %ymm12,128-144(%rax) + vpermd %ymm15,%ymm7,%ymm15 + vmovdqa %ymm13,160-144(%rax) + vpermd %ymm5,%ymm7,%ymm5 + vmovdqa %ymm14,192-144(%rax) + vmovdqa %ymm15,224-144(%rax) + vmovdqa %ymm5,256-144(%rax) + vmovdqa 64(%rcx),%ymm5 + + + + vmovdqu 0(%rsi),%xmm7 + vmovdqu 16(%rsi),%xmm8 + vinserti128 $1,32(%rsi),%ymm7,%ymm7 + vinserti128 $1,48(%rsi),%ymm8,%ymm8 + leaq 64(%rsi),%rsi + + vpsrldq $6,%ymm7,%ymm9 + vpsrldq $6,%ymm8,%ymm10 + vpunpckhqdq %ymm8,%ymm7,%ymm6 + vpunpcklqdq %ymm10,%ymm9,%ymm9 + vpunpcklqdq %ymm8,%ymm7,%ymm7 + + vpsrlq $30,%ymm9,%ymm10 + vpsrlq $4,%ymm9,%ymm9 + vpsrlq $26,%ymm7,%ymm8 + vpsrlq $40,%ymm6,%ymm6 + vpand %ymm5,%ymm9,%ymm9 + vpand %ymm5,%ymm7,%ymm7 + vpand %ymm5,%ymm8,%ymm8 + vpand %ymm5,%ymm10,%ymm10 + vpor 32(%rcx),%ymm6,%ymm6 + + vpaddq %ymm2,%ymm9,%ymm2 + subq $64,%rdx + jz .Ltail_avx2 + jmp .Loop_avx2 + +.align 32 +.Loop_avx2: + + + + + + + + + vpaddq %ymm0,%ymm7,%ymm0 + vmovdqa 0(%rsp),%ymm7 + vpaddq %ymm1,%ymm8,%ymm1 + vmovdqa 32(%rsp),%ymm8 + vpaddq %ymm3,%ymm10,%ymm3 + vmovdqa 96(%rsp),%ymm9 + vpaddq %ymm4,%ymm6,%ymm4 + vmovdqa 48(%rax),%ymm10 + vmovdqa 112(%rax),%ymm5 + + + + + + + + + + + + + + + + + vpmuludq %ymm2,%ymm7,%ymm13 + vpmuludq %ymm2,%ymm8,%ymm14 + vpmuludq %ymm2,%ymm9,%ymm15 + vpmuludq %ymm2,%ymm10,%ymm11 + vpmuludq %ymm2,%ymm5,%ymm12 + + vpmuludq %ymm0,%ymm8,%ymm6 + vpmuludq %ymm1,%ymm8,%ymm2 + vpaddq %ymm6,%ymm12,%ymm12 + vpaddq %ymm2,%ymm13,%ymm13 + vpmuludq %ymm3,%ymm8,%ymm6 + vpmuludq 64(%rsp),%ymm4,%ymm2 + vpaddq %ymm6,%ymm15,%ymm15 + vpaddq %ymm2,%ymm11,%ymm11 + vmovdqa -16(%rax),%ymm8 + + vpmuludq %ymm0,%ymm7,%ymm6 + vpmuludq %ymm1,%ymm7,%ymm2 + vpaddq %ymm6,%ymm11,%ymm11 + vpaddq %ymm2,%ymm12,%ymm12 + vpmuludq %ymm3,%ymm7,%ymm6 + vpmuludq %ymm4,%ymm7,%ymm2 + vmovdqu 0(%rsi),%xmm7 + vpaddq %ymm6,%ymm14,%ymm14 + vpaddq %ymm2,%ymm15,%ymm15 + vinserti128 $1,32(%rsi),%ymm7,%ymm7 + + vpmuludq %ymm3,%ymm8,%ymm6 + vpmuludq %ymm4,%ymm8,%ymm2 + vmovdqu 16(%rsi),%xmm8 + vpaddq %ymm6,%ymm11,%ymm11 + vpaddq %ymm2,%ymm12,%ymm12 + vmovdqa 16(%rax),%ymm2 + vpmuludq %ymm1,%ymm9,%ymm6 + vpmuludq %ymm0,%ymm9,%ymm9 + vpaddq %ymm6,%ymm14,%ymm14 + vpaddq %ymm9,%ymm13,%ymm13 + vinserti128 $1,48(%rsi),%ymm8,%ymm8 + leaq 64(%rsi),%rsi + + vpmuludq %ymm1,%ymm2,%ymm6 + vpmuludq %ymm0,%ymm2,%ymm2 + vpsrldq $6,%ymm7,%ymm9 + vpaddq %ymm6,%ymm15,%ymm15 + vpaddq %ymm2,%ymm14,%ymm14 + vpmuludq %ymm3,%ymm10,%ymm6 + vpmuludq %ymm4,%ymm10,%ymm2 + vpsrldq $6,%ymm8,%ymm10 + vpaddq %ymm6,%ymm12,%ymm12 + vpaddq %ymm2,%ymm13,%ymm13 + vpunpckhqdq %ymm8,%ymm7,%ymm6 + + vpmuludq %ymm3,%ymm5,%ymm3 + vpmuludq %ymm4,%ymm5,%ymm4 + vpunpcklqdq %ymm8,%ymm7,%ymm7 + vpaddq %ymm3,%ymm13,%ymm2 + vpaddq %ymm4,%ymm14,%ymm3 + vpunpcklqdq %ymm10,%ymm9,%ymm10 + vpmuludq 80(%rax),%ymm0,%ymm4 + vpmuludq %ymm1,%ymm5,%ymm0 + vmovdqa 64(%rcx),%ymm5 + vpaddq %ymm4,%ymm15,%ymm4 + vpaddq %ymm0,%ymm11,%ymm0 + + + + + vpsrlq $26,%ymm3,%ymm14 + vpand %ymm5,%ymm3,%ymm3 + vpaddq %ymm14,%ymm4,%ymm4 + + vpsrlq $26,%ymm0,%ymm11 + vpand %ymm5,%ymm0,%ymm0 + vpaddq %ymm11,%ymm12,%ymm1 + + vpsrlq $26,%ymm4,%ymm15 + vpand %ymm5,%ymm4,%ymm4 + + vpsrlq $4,%ymm10,%ymm9 + + vpsrlq $26,%ymm1,%ymm12 + vpand %ymm5,%ymm1,%ymm1 + vpaddq %ymm12,%ymm2,%ymm2 + + vpaddq %ymm15,%ymm0,%ymm0 + vpsllq $2,%ymm15,%ymm15 + vpaddq %ymm15,%ymm0,%ymm0 + + vpand %ymm5,%ymm9,%ymm9 + vpsrlq $26,%ymm7,%ymm8 + + vpsrlq $26,%ymm2,%ymm13 + vpand %ymm5,%ymm2,%ymm2 + vpaddq %ymm13,%ymm3,%ymm3 + + vpaddq %ymm9,%ymm2,%ymm2 + vpsrlq $30,%ymm10,%ymm10 + + vpsrlq $26,%ymm0,%ymm11 + vpand %ymm5,%ymm0,%ymm0 + vpaddq %ymm11,%ymm1,%ymm1 + + vpsrlq $40,%ymm6,%ymm6 + + vpsrlq $26,%ymm3,%ymm14 + vpand %ymm5,%ymm3,%ymm3 + vpaddq %ymm14,%ymm4,%ymm4 + + vpand %ymm5,%ymm7,%ymm7 + vpand %ymm5,%ymm8,%ymm8 + vpand %ymm5,%ymm10,%ymm10 + vpor 32(%rcx),%ymm6,%ymm6 + + subq $64,%rdx + jnz .Loop_avx2 + +.byte 0x66,0x90 +.Ltail_avx2: + + + + + + + + vpaddq %ymm0,%ymm7,%ymm0 + vmovdqu 4(%rsp),%ymm7 + vpaddq %ymm1,%ymm8,%ymm1 + vmovdqu 36(%rsp),%ymm8 + vpaddq %ymm3,%ymm10,%ymm3 + vmovdqu 100(%rsp),%ymm9 + vpaddq %ymm4,%ymm6,%ymm4 + vmovdqu 52(%rax),%ymm10 + vmovdqu 116(%rax),%ymm5 + + vpmuludq %ymm2,%ymm7,%ymm13 + vpmuludq %ymm2,%ymm8,%ymm14 + vpmuludq %ymm2,%ymm9,%ymm15 + vpmuludq %ymm2,%ymm10,%ymm11 + vpmuludq %ymm2,%ymm5,%ymm12 + + vpmuludq %ymm0,%ymm8,%ymm6 + vpmuludq %ymm1,%ymm8,%ymm2 + vpaddq %ymm6,%ymm12,%ymm12 + vpaddq %ymm2,%ymm13,%ymm13 + vpmuludq %ymm3,%ymm8,%ymm6 + vpmuludq 68(%rsp),%ymm4,%ymm2 + vpaddq %ymm6,%ymm15,%ymm15 + vpaddq %ymm2,%ymm11,%ymm11 + + vpmuludq %ymm0,%ymm7,%ymm6 + vpmuludq %ymm1,%ymm7,%ymm2 + vpaddq %ymm6,%ymm11,%ymm11 + vmovdqu -12(%rax),%ymm8 + vpaddq %ymm2,%ymm12,%ymm12 + vpmuludq %ymm3,%ymm7,%ymm6 + vpmuludq %ymm4,%ymm7,%ymm2 + vpaddq %ymm6,%ymm14,%ymm14 + vpaddq %ymm2,%ymm15,%ymm15 + + vpmuludq %ymm3,%ymm8,%ymm6 + vpmuludq %ymm4,%ymm8,%ymm2 + vpaddq %ymm6,%ymm11,%ymm11 + vpaddq %ymm2,%ymm12,%ymm12 + vmovdqu 20(%rax),%ymm2 + vpmuludq %ymm1,%ymm9,%ymm6 + vpmuludq %ymm0,%ymm9,%ymm9 + vpaddq %ymm6,%ymm14,%ymm14 + vpaddq %ymm9,%ymm13,%ymm13 + + vpmuludq %ymm1,%ymm2,%ymm6 + vpmuludq %ymm0,%ymm2,%ymm2 + vpaddq %ymm6,%ymm15,%ymm15 + vpaddq %ymm2,%ymm14,%ymm14 + vpmuludq %ymm3,%ymm10,%ymm6 + vpmuludq %ymm4,%ymm10,%ymm2 + vpaddq %ymm6,%ymm12,%ymm12 + vpaddq %ymm2,%ymm13,%ymm13 + + vpmuludq %ymm3,%ymm5,%ymm3 + vpmuludq %ymm4,%ymm5,%ymm4 + vpaddq %ymm3,%ymm13,%ymm2 + vpaddq %ymm4,%ymm14,%ymm3 + vpmuludq 84(%rax),%ymm0,%ymm4 + vpmuludq %ymm1,%ymm5,%ymm0 + vmovdqa 64(%rcx),%ymm5 + vpaddq %ymm4,%ymm15,%ymm4 + vpaddq %ymm0,%ymm11,%ymm0 + + + + + vpsrldq $8,%ymm12,%ymm8 + vpsrldq $8,%ymm2,%ymm9 + vpsrldq $8,%ymm3,%ymm10 + vpsrldq $8,%ymm4,%ymm6 + vpsrldq $8,%ymm0,%ymm7 + vpaddq %ymm8,%ymm12,%ymm12 + vpaddq %ymm9,%ymm2,%ymm2 + vpaddq %ymm10,%ymm3,%ymm3 + vpaddq %ymm6,%ymm4,%ymm4 + vpaddq %ymm7,%ymm0,%ymm0 + + vpermq $0x2,%ymm3,%ymm10 + vpermq $0x2,%ymm4,%ymm6 + vpermq $0x2,%ymm0,%ymm7 + vpermq $0x2,%ymm12,%ymm8 + vpermq $0x2,%ymm2,%ymm9 + vpaddq %ymm10,%ymm3,%ymm3 + vpaddq %ymm6,%ymm4,%ymm4 + vpaddq %ymm7,%ymm0,%ymm0 + vpaddq %ymm8,%ymm12,%ymm12 + vpaddq %ymm9,%ymm2,%ymm2 + + + + + vpsrlq $26,%ymm3,%ymm14 + vpand %ymm5,%ymm3,%ymm3 + vpaddq %ymm14,%ymm4,%ymm4 + + vpsrlq $26,%ymm0,%ymm11 + vpand %ymm5,%ymm0,%ymm0 + vpaddq %ymm11,%ymm12,%ymm1 + + vpsrlq $26,%ymm4,%ymm15 + vpand %ymm5,%ymm4,%ymm4 + + vpsrlq $26,%ymm1,%ymm12 + vpand %ymm5,%ymm1,%ymm1 + vpaddq %ymm12,%ymm2,%ymm2 + + vpaddq %ymm15,%ymm0,%ymm0 + vpsllq $2,%ymm15,%ymm15 + vpaddq %ymm15,%ymm0,%ymm0 + + vpsrlq $26,%ymm2,%ymm13 + vpand %ymm5,%ymm2,%ymm2 + vpaddq %ymm13,%ymm3,%ymm3 + + vpsrlq $26,%ymm0,%ymm11 + vpand %ymm5,%ymm0,%ymm0 + vpaddq %ymm11,%ymm1,%ymm1 + + vpsrlq $26,%ymm3,%ymm14 + vpand %ymm5,%ymm3,%ymm3 + vpaddq %ymm14,%ymm4,%ymm4 + + vmovd %xmm0,-112(%rdi) + vmovd %xmm1,-108(%rdi) + vmovd %xmm2,-104(%rdi) + vmovd %xmm3,-100(%rdi) + vmovd %xmm4,-96(%rdi) + leaq 8(%r11),%rsp +.cfi_def_cfa %rsp,8 + vzeroupper + .byte 0xf3,0xc3 +.cfi_endproc +.size poly1305_blocks_avx2,.-poly1305_blocks_avx2 +.type poly1305_blocks_avx512,@function +.align 32 +poly1305_blocks_avx512: +.cfi_startproc +.byte 243,15,30,250 +.Lblocks_avx512: + movl $15,%eax + kmovw %eax,%k2 + leaq -8(%rsp),%r11 +.cfi_def_cfa %r11,16 + subq $0x128,%rsp + leaq .Lconst(%rip),%rcx + leaq 48+64(%rdi),%rdi + vmovdqa 96(%rcx),%ymm9 + + + vmovdqu -64(%rdi),%xmm11 + andq $-512,%rsp + vmovdqu -48(%rdi),%xmm12 + movq $0x20,%rax + vmovdqu -32(%rdi),%xmm7 + vmovdqu -16(%rdi),%xmm13 + vmovdqu 0(%rdi),%xmm8 + vmovdqu 16(%rdi),%xmm14 + vmovdqu 32(%rdi),%xmm10 + vmovdqu 48(%rdi),%xmm15 + vmovdqu 64(%rdi),%xmm6 + vpermd %zmm11,%zmm9,%zmm16 + vpbroadcastq 64(%rcx),%zmm5 + vpermd %zmm12,%zmm9,%zmm17 + vpermd %zmm7,%zmm9,%zmm21 + vpermd %zmm13,%zmm9,%zmm18 + vmovdqa64 %zmm16,0(%rsp){%k2} + vpsrlq $32,%zmm16,%zmm7 + vpermd %zmm8,%zmm9,%zmm22 + vmovdqu64 %zmm17,0(%rsp,%rax,1){%k2} + vpsrlq $32,%zmm17,%zmm8 + vpermd %zmm14,%zmm9,%zmm19 + vmovdqa64 %zmm21,64(%rsp){%k2} + vpermd %zmm10,%zmm9,%zmm23 + vpermd %zmm15,%zmm9,%zmm20 + vmovdqu64 %zmm18,64(%rsp,%rax,1){%k2} + vpermd %zmm6,%zmm9,%zmm24 + vmovdqa64 %zmm22,128(%rsp){%k2} + vmovdqu64 %zmm19,128(%rsp,%rax,1){%k2} + vmovdqa64 %zmm23,192(%rsp){%k2} + vmovdqu64 %zmm20,192(%rsp,%rax,1){%k2} + vmovdqa64 %zmm24,256(%rsp){%k2} + + + + + + + + + + + vpmuludq %zmm7,%zmm16,%zmm11 + vpmuludq %zmm7,%zmm17,%zmm12 + vpmuludq %zmm7,%zmm18,%zmm13 + vpmuludq %zmm7,%zmm19,%zmm14 + vpmuludq %zmm7,%zmm20,%zmm15 + vpsrlq $32,%zmm18,%zmm9 + + vpmuludq %zmm8,%zmm24,%zmm25 + vpmuludq %zmm8,%zmm16,%zmm26 + vpmuludq %zmm8,%zmm17,%zmm27 + vpmuludq %zmm8,%zmm18,%zmm28 + vpmuludq %zmm8,%zmm19,%zmm29 + vpsrlq $32,%zmm19,%zmm10 + vpaddq %zmm25,%zmm11,%zmm11 + vpaddq %zmm26,%zmm12,%zmm12 + vpaddq %zmm27,%zmm13,%zmm13 + vpaddq %zmm28,%zmm14,%zmm14 + vpaddq %zmm29,%zmm15,%zmm15 + + vpmuludq %zmm9,%zmm23,%zmm25 + vpmuludq %zmm9,%zmm24,%zmm26 + vpmuludq %zmm9,%zmm17,%zmm28 + vpmuludq %zmm9,%zmm18,%zmm29 + vpmuludq %zmm9,%zmm16,%zmm27 + vpsrlq $32,%zmm20,%zmm6 + vpaddq %zmm25,%zmm11,%zmm11 + vpaddq %zmm26,%zmm12,%zmm12 + vpaddq %zmm28,%zmm14,%zmm14 + vpaddq %zmm29,%zmm15,%zmm15 + vpaddq %zmm27,%zmm13,%zmm13 + + vpmuludq %zmm10,%zmm22,%zmm25 + vpmuludq %zmm10,%zmm16,%zmm28 + vpmuludq %zmm10,%zmm17,%zmm29 + vpmuludq %zmm10,%zmm23,%zmm26 + vpmuludq %zmm10,%zmm24,%zmm27 + vpaddq %zmm25,%zmm11,%zmm11 + vpaddq %zmm28,%zmm14,%zmm14 + vpaddq %zmm29,%zmm15,%zmm15 + vpaddq %zmm26,%zmm12,%zmm12 + vpaddq %zmm27,%zmm13,%zmm13 + + vpmuludq %zmm6,%zmm24,%zmm28 + vpmuludq %zmm6,%zmm16,%zmm29 + vpmuludq %zmm6,%zmm21,%zmm25 + vpmuludq %zmm6,%zmm22,%zmm26 + vpmuludq %zmm6,%zmm23,%zmm27 + vpaddq %zmm28,%zmm14,%zmm14 + vpaddq %zmm29,%zmm15,%zmm15 + vpaddq %zmm25,%zmm11,%zmm11 + vpaddq %zmm26,%zmm12,%zmm12 + vpaddq %zmm27,%zmm13,%zmm13 + + + + vmovdqu64 0(%rsi),%zmm10 + vmovdqu64 64(%rsi),%zmm6 + leaq 128(%rsi),%rsi + + + + + vpsrlq $26,%zmm14,%zmm28 + vpandq %zmm5,%zmm14,%zmm14 + vpaddq %zmm28,%zmm15,%zmm15 + + vpsrlq $26,%zmm11,%zmm25 + vpandq %zmm5,%zmm11,%zmm11 + vpaddq %zmm25,%zmm12,%zmm12 + + vpsrlq $26,%zmm15,%zmm29 + vpandq %zmm5,%zmm15,%zmm15 + + vpsrlq $26,%zmm12,%zmm26 + vpandq %zmm5,%zmm12,%zmm12 + vpaddq %zmm26,%zmm13,%zmm13 + + vpaddq %zmm29,%zmm11,%zmm11 + vpsllq $2,%zmm29,%zmm29 + vpaddq %zmm29,%zmm11,%zmm11 + + vpsrlq $26,%zmm13,%zmm27 + vpandq %zmm5,%zmm13,%zmm13 + vpaddq %zmm27,%zmm14,%zmm14 + + vpsrlq $26,%zmm11,%zmm25 + vpandq %zmm5,%zmm11,%zmm11 + vpaddq %zmm25,%zmm12,%zmm12 + + vpsrlq $26,%zmm14,%zmm28 + vpandq %zmm5,%zmm14,%zmm14 + vpaddq %zmm28,%zmm15,%zmm15 + + + + + + vpunpcklqdq %zmm6,%zmm10,%zmm7 + vpunpckhqdq %zmm6,%zmm10,%zmm6 + + + + + + + vmovdqa32 128(%rcx),%zmm25 + movl $0x7777,%eax + kmovw %eax,%k1 + + vpermd %zmm16,%zmm25,%zmm16 + vpermd %zmm17,%zmm25,%zmm17 + vpermd %zmm18,%zmm25,%zmm18 + vpermd %zmm19,%zmm25,%zmm19 + vpermd %zmm20,%zmm25,%zmm20 + + vpermd %zmm11,%zmm25,%zmm16{%k1} + vpermd %zmm12,%zmm25,%zmm17{%k1} + vpermd %zmm13,%zmm25,%zmm18{%k1} + vpermd %zmm14,%zmm25,%zmm19{%k1} + vpermd %zmm15,%zmm25,%zmm20{%k1} + + vpslld $2,%zmm17,%zmm21 + vpslld $2,%zmm18,%zmm22 + vpslld $2,%zmm19,%zmm23 + vpslld $2,%zmm20,%zmm24 + vpaddd %zmm17,%zmm21,%zmm21 + vpaddd %zmm18,%zmm22,%zmm22 + vpaddd %zmm19,%zmm23,%zmm23 + vpaddd %zmm20,%zmm24,%zmm24 + + vpbroadcastq 32(%rcx),%zmm30 + + vpsrlq $52,%zmm7,%zmm9 + vpsllq $12,%zmm6,%zmm10 + vporq %zmm10,%zmm9,%zmm9 + vpsrlq $26,%zmm7,%zmm8 + vpsrlq $14,%zmm6,%zmm10 + vpsrlq $40,%zmm6,%zmm6 + vpandq %zmm5,%zmm9,%zmm9 + vpandq %zmm5,%zmm7,%zmm7 + + + + + vpaddq %zmm2,%zmm9,%zmm2 + subq $192,%rdx + jbe .Ltail_avx512 + jmp .Loop_avx512 + +.align 32 +.Loop_avx512: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + vpmuludq %zmm2,%zmm17,%zmm14 + vpaddq %zmm0,%zmm7,%zmm0 + vpmuludq %zmm2,%zmm18,%zmm15 + vpandq %zmm5,%zmm8,%zmm8 + vpmuludq %zmm2,%zmm23,%zmm11 + vpandq %zmm5,%zmm10,%zmm10 + vpmuludq %zmm2,%zmm24,%zmm12 + vporq %zmm30,%zmm6,%zmm6 + vpmuludq %zmm2,%zmm16,%zmm13 + vpaddq %zmm1,%zmm8,%zmm1 + vpaddq %zmm3,%zmm10,%zmm3 + vpaddq %zmm4,%zmm6,%zmm4 + + vmovdqu64 0(%rsi),%zmm10 + vmovdqu64 64(%rsi),%zmm6 + leaq 128(%rsi),%rsi + vpmuludq %zmm0,%zmm19,%zmm28 + vpmuludq %zmm0,%zmm20,%zmm29 + vpmuludq %zmm0,%zmm16,%zmm25 + vpmuludq %zmm0,%zmm17,%zmm26 + vpaddq %zmm28,%zmm14,%zmm14 + vpaddq %zmm29,%zmm15,%zmm15 + vpaddq %zmm25,%zmm11,%zmm11 + vpaddq %zmm26,%zmm12,%zmm12 + + vpmuludq %zmm1,%zmm18,%zmm28 + vpmuludq %zmm1,%zmm19,%zmm29 + vpmuludq %zmm1,%zmm24,%zmm25 + vpmuludq %zmm0,%zmm18,%zmm27 + vpaddq %zmm28,%zmm14,%zmm14 + vpaddq %zmm29,%zmm15,%zmm15 + vpaddq %zmm25,%zmm11,%zmm11 + vpaddq %zmm27,%zmm13,%zmm13 + + vpunpcklqdq %zmm6,%zmm10,%zmm7 + vpunpckhqdq %zmm6,%zmm10,%zmm6 + + vpmuludq %zmm3,%zmm16,%zmm28 + vpmuludq %zmm3,%zmm17,%zmm29 + vpmuludq %zmm1,%zmm16,%zmm26 + vpmuludq %zmm1,%zmm17,%zmm27 + vpaddq %zmm28,%zmm14,%zmm14 + vpaddq %zmm29,%zmm15,%zmm15 + vpaddq %zmm26,%zmm12,%zmm12 + vpaddq %zmm27,%zmm13,%zmm13 + + vpmuludq %zmm4,%zmm24,%zmm28 + vpmuludq %zmm4,%zmm16,%zmm29 + vpmuludq %zmm3,%zmm22,%zmm25 + vpmuludq %zmm3,%zmm23,%zmm26 + vpaddq %zmm28,%zmm14,%zmm14 + vpmuludq %zmm3,%zmm24,%zmm27 + vpaddq %zmm29,%zmm15,%zmm15 + vpaddq %zmm25,%zmm11,%zmm11 + vpaddq %zmm26,%zmm12,%zmm12 + vpaddq %zmm27,%zmm13,%zmm13 + + vpmuludq %zmm4,%zmm21,%zmm25 + vpmuludq %zmm4,%zmm22,%zmm26 + vpmuludq %zmm4,%zmm23,%zmm27 + vpaddq %zmm25,%zmm11,%zmm0 + vpaddq %zmm26,%zmm12,%zmm1 + vpaddq %zmm27,%zmm13,%zmm2 + + + + + vpsrlq $52,%zmm7,%zmm9 + vpsllq $12,%zmm6,%zmm10 + + vpsrlq $26,%zmm14,%zmm3 + vpandq %zmm5,%zmm14,%zmm14 + vpaddq %zmm3,%zmm15,%zmm4 + + vporq %zmm10,%zmm9,%zmm9 + + vpsrlq $26,%zmm0,%zmm11 + vpandq %zmm5,%zmm0,%zmm0 + vpaddq %zmm11,%zmm1,%zmm1 + + vpandq %zmm5,%zmm9,%zmm9 + + vpsrlq $26,%zmm4,%zmm15 + vpandq %zmm5,%zmm4,%zmm4 + + vpsrlq $26,%zmm1,%zmm12 + vpandq %zmm5,%zmm1,%zmm1 + vpaddq %zmm12,%zmm2,%zmm2 + + vpaddq %zmm15,%zmm0,%zmm0 + vpsllq $2,%zmm15,%zmm15 + vpaddq %zmm15,%zmm0,%zmm0 + + vpaddq %zmm9,%zmm2,%zmm2 + vpsrlq $26,%zmm7,%zmm8 + + vpsrlq $26,%zmm2,%zmm13 + vpandq %zmm5,%zmm2,%zmm2 + vpaddq %zmm13,%zmm14,%zmm3 + + vpsrlq $14,%zmm6,%zmm10 + + vpsrlq $26,%zmm0,%zmm11 + vpandq %zmm5,%zmm0,%zmm0 + vpaddq %zmm11,%zmm1,%zmm1 + + vpsrlq $40,%zmm6,%zmm6 + + vpsrlq $26,%zmm3,%zmm14 + vpandq %zmm5,%zmm3,%zmm3 + vpaddq %zmm14,%zmm4,%zmm4 + + vpandq %zmm5,%zmm7,%zmm7 + + + + + subq $128,%rdx + ja .Loop_avx512 + +.Ltail_avx512: + + + + + + vpsrlq $32,%zmm16,%zmm16 + vpsrlq $32,%zmm17,%zmm17 + vpsrlq $32,%zmm18,%zmm18 + vpsrlq $32,%zmm23,%zmm23 + vpsrlq $32,%zmm24,%zmm24 + vpsrlq $32,%zmm19,%zmm19 + vpsrlq $32,%zmm20,%zmm20 + vpsrlq $32,%zmm21,%zmm21 + vpsrlq $32,%zmm22,%zmm22 + + + + leaq (%rsi,%rdx,1),%rsi + + + vpaddq %zmm0,%zmm7,%zmm0 + + vpmuludq %zmm2,%zmm17,%zmm14 + vpmuludq %zmm2,%zmm18,%zmm15 + vpmuludq %zmm2,%zmm23,%zmm11 + vpandq %zmm5,%zmm8,%zmm8 + vpmuludq %zmm2,%zmm24,%zmm12 + vpandq %zmm5,%zmm10,%zmm10 + vpmuludq %zmm2,%zmm16,%zmm13 + vporq %zmm30,%zmm6,%zmm6 + vpaddq %zmm1,%zmm8,%zmm1 + vpaddq %zmm3,%zmm10,%zmm3 + vpaddq %zmm4,%zmm6,%zmm4 + + vmovdqu 0(%rsi),%xmm7 + vpmuludq %zmm0,%zmm19,%zmm28 + vpmuludq %zmm0,%zmm20,%zmm29 + vpmuludq %zmm0,%zmm16,%zmm25 + vpmuludq %zmm0,%zmm17,%zmm26 + vpaddq %zmm28,%zmm14,%zmm14 + vpaddq %zmm29,%zmm15,%zmm15 + vpaddq %zmm25,%zmm11,%zmm11 + vpaddq %zmm26,%zmm12,%zmm12 + + vmovdqu 16(%rsi),%xmm8 + vpmuludq %zmm1,%zmm18,%zmm28 + vpmuludq %zmm1,%zmm19,%zmm29 + vpmuludq %zmm1,%zmm24,%zmm25 + vpmuludq %zmm0,%zmm18,%zmm27 + vpaddq %zmm28,%zmm14,%zmm14 + vpaddq %zmm29,%zmm15,%zmm15 + vpaddq %zmm25,%zmm11,%zmm11 + vpaddq %zmm27,%zmm13,%zmm13 + + vinserti128 $1,32(%rsi),%ymm7,%ymm7 + vpmuludq %zmm3,%zmm16,%zmm28 + vpmuludq %zmm3,%zmm17,%zmm29 + vpmuludq %zmm1,%zmm16,%zmm26 + vpmuludq %zmm1,%zmm17,%zmm27 + vpaddq %zmm28,%zmm14,%zmm14 + vpaddq %zmm29,%zmm15,%zmm15 + vpaddq %zmm26,%zmm12,%zmm12 + vpaddq %zmm27,%zmm13,%zmm13 + + vinserti128 $1,48(%rsi),%ymm8,%ymm8 + vpmuludq %zmm4,%zmm24,%zmm28 + vpmuludq %zmm4,%zmm16,%zmm29 + vpmuludq %zmm3,%zmm22,%zmm25 + vpmuludq %zmm3,%zmm23,%zmm26 + vpmuludq %zmm3,%zmm24,%zmm27 + vpaddq %zmm28,%zmm14,%zmm3 + vpaddq %zmm29,%zmm15,%zmm15 + vpaddq %zmm25,%zmm11,%zmm11 + vpaddq %zmm26,%zmm12,%zmm12 + vpaddq %zmm27,%zmm13,%zmm13 + + vpmuludq %zmm4,%zmm21,%zmm25 + vpmuludq %zmm4,%zmm22,%zmm26 + vpmuludq %zmm4,%zmm23,%zmm27 + vpaddq %zmm25,%zmm11,%zmm0 + vpaddq %zmm26,%zmm12,%zmm1 + vpaddq %zmm27,%zmm13,%zmm2 + + + + + movl $1,%eax + vpermq $0xb1,%zmm3,%zmm14 + vpermq $0xb1,%zmm15,%zmm4 + vpermq $0xb1,%zmm0,%zmm11 + vpermq $0xb1,%zmm1,%zmm12 + vpermq $0xb1,%zmm2,%zmm13 + vpaddq %zmm14,%zmm3,%zmm3 + vpaddq %zmm15,%zmm4,%zmm4 + vpaddq %zmm11,%zmm0,%zmm0 + vpaddq %zmm12,%zmm1,%zmm1 + vpaddq %zmm13,%zmm2,%zmm2 + + kmovw %eax,%k3 + vpermq $0x2,%zmm3,%zmm14 + vpermq $0x2,%zmm4,%zmm15 + vpermq $0x2,%zmm0,%zmm11 + vpermq $0x2,%zmm1,%zmm12 + vpermq $0x2,%zmm2,%zmm13 + vpaddq %zmm14,%zmm3,%zmm3 + vpaddq %zmm15,%zmm4,%zmm4 + vpaddq %zmm11,%zmm0,%zmm0 + vpaddq %zmm12,%zmm1,%zmm1 + vpaddq %zmm13,%zmm2,%zmm2 + + vextracti64x4 $0x1,%zmm3,%ymm14 + vextracti64x4 $0x1,%zmm4,%ymm15 + vextracti64x4 $0x1,%zmm0,%ymm11 + vextracti64x4 $0x1,%zmm1,%ymm12 + vextracti64x4 $0x1,%zmm2,%ymm13 + vpaddq %zmm14,%zmm3,%zmm3{%k3}{z} + vpaddq %zmm15,%zmm4,%zmm4{%k3}{z} + vpaddq %zmm11,%zmm0,%zmm0{%k3}{z} + vpaddq %zmm12,%zmm1,%zmm1{%k3}{z} + vpaddq %zmm13,%zmm2,%zmm2{%k3}{z} + + + + vpsrlq $26,%ymm3,%ymm14 + vpand %ymm5,%ymm3,%ymm3 + vpsrldq $6,%ymm7,%ymm9 + vpsrldq $6,%ymm8,%ymm10 + vpunpckhqdq %ymm8,%ymm7,%ymm6 + vpaddq %ymm14,%ymm4,%ymm4 + + vpsrlq $26,%ymm0,%ymm11 + vpand %ymm5,%ymm0,%ymm0 + vpunpcklqdq %ymm10,%ymm9,%ymm9 + vpunpcklqdq %ymm8,%ymm7,%ymm7 + vpaddq %ymm11,%ymm1,%ymm1 + + vpsrlq $26,%ymm4,%ymm15 + vpand %ymm5,%ymm4,%ymm4 + + vpsrlq $26,%ymm1,%ymm12 + vpand %ymm5,%ymm1,%ymm1 + vpsrlq $30,%ymm9,%ymm10 + vpsrlq $4,%ymm9,%ymm9 + vpaddq %ymm12,%ymm2,%ymm2 + + vpaddq %ymm15,%ymm0,%ymm0 + vpsllq $2,%ymm15,%ymm15 + vpsrlq $26,%ymm7,%ymm8 + vpsrlq $40,%ymm6,%ymm6 + vpaddq %ymm15,%ymm0,%ymm0 + + vpsrlq $26,%ymm2,%ymm13 + vpand %ymm5,%ymm2,%ymm2 + vpand %ymm5,%ymm9,%ymm9 + vpand %ymm5,%ymm7,%ymm7 + vpaddq %ymm13,%ymm3,%ymm3 + + vpsrlq $26,%ymm0,%ymm11 + vpand %ymm5,%ymm0,%ymm0 + vpaddq %ymm2,%ymm9,%ymm2 + vpand %ymm5,%ymm8,%ymm8 + vpaddq %ymm11,%ymm1,%ymm1 + + vpsrlq $26,%ymm3,%ymm14 + vpand %ymm5,%ymm3,%ymm3 + vpand %ymm5,%ymm10,%ymm10 + vpor 32(%rcx),%ymm6,%ymm6 + vpaddq %ymm14,%ymm4,%ymm4 + + leaq 144(%rsp),%rax + addq $64,%rdx + jnz .Ltail_avx2 + + vpsubq %ymm9,%ymm2,%ymm2 + vmovd %xmm0,-112(%rdi) + vmovd %xmm1,-108(%rdi) + vmovd %xmm2,-104(%rdi) + vmovd %xmm3,-100(%rdi) + vmovd %xmm4,-96(%rdi) + vzeroall + leaq 8(%r11),%rsp +.cfi_def_cfa %rsp,8 + .byte 0xf3,0xc3 +.cfi_endproc +.size poly1305_blocks_avx512,.-poly1305_blocks_avx512 +.type poly1305_init_base2_44,@function +.align 32 +poly1305_init_base2_44: +.cfi_startproc + xorq %rax,%rax + movq %rax,0(%rdi) + movq %rax,8(%rdi) + movq %rax,16(%rdi) + +.Linit_base2_44: + leaq poly1305_blocks_vpmadd52(%rip),%r10 + leaq poly1305_emit_base2_44(%rip),%r11 + + movq $0x0ffffffc0fffffff,%rax + movq $0x0ffffffc0ffffffc,%rcx + andq 0(%rsi),%rax + movq $0x00000fffffffffff,%r8 + andq 8(%rsi),%rcx + movq $0x00000fffffffffff,%r9 + andq %rax,%r8 + shrdq $44,%rcx,%rax + movq %r8,40(%rdi) + andq %r9,%rax + shrq $24,%rcx + movq %rax,48(%rdi) + leaq (%rax,%rax,4),%rax + movq %rcx,56(%rdi) + shlq $2,%rax + leaq (%rcx,%rcx,4),%rcx + shlq $2,%rcx + movq %rax,24(%rdi) + movq %rcx,32(%rdi) + movq $-1,64(%rdi) + movq %r10,0(%rdx) + movq %r11,8(%rdx) + movl $1,%eax + .byte 0xf3,0xc3 +.cfi_endproc +.size poly1305_init_base2_44,.-poly1305_init_base2_44 +.type poly1305_blocks_vpmadd52,@function +.align 32 +poly1305_blocks_vpmadd52: +.cfi_startproc +.byte 243,15,30,250 + shrq $4,%rdx + jz .Lno_data_vpmadd52 + + shlq $40,%rcx + movq 64(%rdi),%r8 + + + + + + + movq $3,%rax + movq $1,%r10 + cmpq $4,%rdx + cmovaeq %r10,%rax + testq %r8,%r8 + cmovnsq %r10,%rax + + andq %rdx,%rax + jz .Lblocks_vpmadd52_4x + + subq %rax,%rdx + movl $7,%r10d + movl $1,%r11d + kmovw %r10d,%k7 + leaq .L2_44_inp_permd(%rip),%r10 + kmovw %r11d,%k1 + + vmovq %rcx,%xmm21 + vmovdqa64 0(%r10),%ymm19 + vmovdqa64 32(%r10),%ymm20 + vpermq $0xcf,%ymm21,%ymm21 + vmovdqa64 64(%r10),%ymm22 + + vmovdqu64 0(%rdi),%ymm16{%k7}{z} + vmovdqu64 40(%rdi),%ymm3{%k7}{z} + vmovdqu64 32(%rdi),%ymm4{%k7}{z} + vmovdqu64 24(%rdi),%ymm5{%k7}{z} + + vmovdqa64 96(%r10),%ymm23 + vmovdqa64 128(%r10),%ymm24 + + jmp .Loop_vpmadd52 + +.align 32 +.Loop_vpmadd52: + vmovdqu32 0(%rsi),%xmm18 + leaq 16(%rsi),%rsi + + vpermd %ymm18,%ymm19,%ymm18 + vpsrlvq %ymm20,%ymm18,%ymm18 + vpandq %ymm22,%ymm18,%ymm18 + vporq %ymm21,%ymm18,%ymm18 + + vpaddq %ymm18,%ymm16,%ymm16 + + vpermq $0,%ymm16,%ymm0{%k7}{z} + vpermq $85,%ymm16,%ymm1{%k7}{z} + vpermq $170,%ymm16,%ymm2{%k7}{z} + + vpxord %ymm16,%ymm16,%ymm16 + vpxord %ymm17,%ymm17,%ymm17 + + vpmadd52luq %ymm3,%ymm0,%ymm16 + vpmadd52huq %ymm3,%ymm0,%ymm17 + + vpmadd52luq %ymm4,%ymm1,%ymm16 + vpmadd52huq %ymm4,%ymm1,%ymm17 + + vpmadd52luq %ymm5,%ymm2,%ymm16 + vpmadd52huq %ymm5,%ymm2,%ymm17 + + vpsrlvq %ymm23,%ymm16,%ymm18 + vpsllvq %ymm24,%ymm17,%ymm17 + vpandq %ymm22,%ymm16,%ymm16 + + vpaddq %ymm18,%ymm17,%ymm17 + + vpermq $147,%ymm17,%ymm17 + + vpaddq %ymm17,%ymm16,%ymm16 + + vpsrlvq %ymm23,%ymm16,%ymm18 + vpandq %ymm22,%ymm16,%ymm16 + + vpermq $147,%ymm18,%ymm18 + + vpaddq %ymm18,%ymm16,%ymm16 + + vpermq $147,%ymm16,%ymm18{%k1}{z} + + vpaddq %ymm18,%ymm16,%ymm16 + vpsllq $2,%ymm18,%ymm18 + + vpaddq %ymm18,%ymm16,%ymm16 + + decq %rax + jnz .Loop_vpmadd52 + + vmovdqu64 %ymm16,0(%rdi){%k7} + + testq %rdx,%rdx + jnz .Lblocks_vpmadd52_4x + +.Lno_data_vpmadd52: + .byte 0xf3,0xc3 +.cfi_endproc +.size poly1305_blocks_vpmadd52,.-poly1305_blocks_vpmadd52 +.type poly1305_blocks_vpmadd52_4x,@function +.align 32 +poly1305_blocks_vpmadd52_4x: +.cfi_startproc + shrq $4,%rdx + jz .Lno_data_vpmadd52_4x + + shlq $40,%rcx + movq 64(%rdi),%r8 + +.Lblocks_vpmadd52_4x: + vpbroadcastq %rcx,%ymm31 + + vmovdqa64 .Lx_mask44(%rip),%ymm28 + movl $5,%eax + vmovdqa64 .Lx_mask42(%rip),%ymm29 + kmovw %eax,%k1 + + testq %r8,%r8 + js .Linit_vpmadd52 + + vmovq 0(%rdi),%xmm0 + vmovq 8(%rdi),%xmm1 + vmovq 16(%rdi),%xmm2 + + testq $3,%rdx + jnz .Lblocks_vpmadd52_2x_do + +.Lblocks_vpmadd52_4x_do: + vpbroadcastq 64(%rdi),%ymm3 + vpbroadcastq 96(%rdi),%ymm4 + vpbroadcastq 128(%rdi),%ymm5 + vpbroadcastq 160(%rdi),%ymm16 + +.Lblocks_vpmadd52_4x_key_loaded: + vpsllq $2,%ymm5,%ymm17 + vpaddq %ymm5,%ymm17,%ymm17 + vpsllq $2,%ymm17,%ymm17 + + testq $7,%rdx + jz .Lblocks_vpmadd52_8x + + vmovdqu64 0(%rsi),%ymm26 + vmovdqu64 32(%rsi),%ymm27 + leaq 64(%rsi),%rsi + + vpunpcklqdq %ymm27,%ymm26,%ymm25 + vpunpckhqdq %ymm27,%ymm26,%ymm27 + + + + vpsrlq $24,%ymm27,%ymm26 + vporq %ymm31,%ymm26,%ymm26 + vpaddq %ymm26,%ymm2,%ymm2 + vpandq %ymm28,%ymm25,%ymm24 + vpsrlq $44,%ymm25,%ymm25 + vpsllq $20,%ymm27,%ymm27 + vporq %ymm27,%ymm25,%ymm25 + vpandq %ymm28,%ymm25,%ymm25 + + subq $4,%rdx + jz .Ltail_vpmadd52_4x + jmp .Loop_vpmadd52_4x + ud2 + +.align 32 +.Linit_vpmadd52: + vmovq 24(%rdi),%xmm16 + vmovq 56(%rdi),%xmm2 + vmovq 32(%rdi),%xmm17 + vmovq 40(%rdi),%xmm3 + vmovq 48(%rdi),%xmm4 + + vmovdqa %ymm3,%ymm0 + vmovdqa %ymm4,%ymm1 + vmovdqa %ymm2,%ymm5 + + movl $2,%eax + +.Lmul_init_vpmadd52: + vpxorq %ymm18,%ymm18,%ymm18 + vpmadd52luq %ymm2,%ymm16,%ymm18 + vpxorq %ymm19,%ymm19,%ymm19 + vpmadd52huq %ymm2,%ymm16,%ymm19 + vpxorq %ymm20,%ymm20,%ymm20 + vpmadd52luq %ymm2,%ymm17,%ymm20 + vpxorq %ymm21,%ymm21,%ymm21 + vpmadd52huq %ymm2,%ymm17,%ymm21 + vpxorq %ymm22,%ymm22,%ymm22 + vpmadd52luq %ymm2,%ymm3,%ymm22 + vpxorq %ymm23,%ymm23,%ymm23 + vpmadd52huq %ymm2,%ymm3,%ymm23 + + vpmadd52luq %ymm0,%ymm3,%ymm18 + vpmadd52huq %ymm0,%ymm3,%ymm19 + vpmadd52luq %ymm0,%ymm4,%ymm20 + vpmadd52huq %ymm0,%ymm4,%ymm21 + vpmadd52luq %ymm0,%ymm5,%ymm22 + vpmadd52huq %ymm0,%ymm5,%ymm23 + + vpmadd52luq %ymm1,%ymm17,%ymm18 + vpmadd52huq %ymm1,%ymm17,%ymm19 + vpmadd52luq %ymm1,%ymm3,%ymm20 + vpmadd52huq %ymm1,%ymm3,%ymm21 + vpmadd52luq %ymm1,%ymm4,%ymm22 + vpmadd52huq %ymm1,%ymm4,%ymm23 + + + + vpsrlq $44,%ymm18,%ymm30 + vpsllq $8,%ymm19,%ymm19 + vpandq %ymm28,%ymm18,%ymm0 + vpaddq %ymm30,%ymm19,%ymm19 + + vpaddq %ymm19,%ymm20,%ymm20 + + vpsrlq $44,%ymm20,%ymm30 + vpsllq $8,%ymm21,%ymm21 + vpandq %ymm28,%ymm20,%ymm1 + vpaddq %ymm30,%ymm21,%ymm21 + + vpaddq %ymm21,%ymm22,%ymm22 + + vpsrlq $42,%ymm22,%ymm30 + vpsllq $10,%ymm23,%ymm23 + vpandq %ymm29,%ymm22,%ymm2 + vpaddq %ymm30,%ymm23,%ymm23 + + vpaddq %ymm23,%ymm0,%ymm0 + vpsllq $2,%ymm23,%ymm23 + + vpaddq %ymm23,%ymm0,%ymm0 + + vpsrlq $44,%ymm0,%ymm30 + vpandq %ymm28,%ymm0,%ymm0 + + vpaddq %ymm30,%ymm1,%ymm1 + + decl %eax + jz .Ldone_init_vpmadd52 + + vpunpcklqdq %ymm4,%ymm1,%ymm4 + vpbroadcastq %xmm1,%xmm1 + vpunpcklqdq %ymm5,%ymm2,%ymm5 + vpbroadcastq %xmm2,%xmm2 + vpunpcklqdq %ymm3,%ymm0,%ymm3 + vpbroadcastq %xmm0,%xmm0 + + vpsllq $2,%ymm4,%ymm16 + vpsllq $2,%ymm5,%ymm17 + vpaddq %ymm4,%ymm16,%ymm16 + vpaddq %ymm5,%ymm17,%ymm17 + vpsllq $2,%ymm16,%ymm16 + vpsllq $2,%ymm17,%ymm17 + + jmp .Lmul_init_vpmadd52 + ud2 + +.align 32 +.Ldone_init_vpmadd52: + vinserti128 $1,%xmm4,%ymm1,%ymm4 + vinserti128 $1,%xmm5,%ymm2,%ymm5 + vinserti128 $1,%xmm3,%ymm0,%ymm3 + + vpermq $216,%ymm4,%ymm4 + vpermq $216,%ymm5,%ymm5 + vpermq $216,%ymm3,%ymm3 + + vpsllq $2,%ymm4,%ymm16 + vpaddq %ymm4,%ymm16,%ymm16 + vpsllq $2,%ymm16,%ymm16 + + vmovq 0(%rdi),%xmm0 + vmovq 8(%rdi),%xmm1 + vmovq 16(%rdi),%xmm2 + + testq $3,%rdx + jnz .Ldone_init_vpmadd52_2x + + vmovdqu64 %ymm3,64(%rdi) + vpbroadcastq %xmm3,%ymm3 + vmovdqu64 %ymm4,96(%rdi) + vpbroadcastq %xmm4,%ymm4 + vmovdqu64 %ymm5,128(%rdi) + vpbroadcastq %xmm5,%ymm5 + vmovdqu64 %ymm16,160(%rdi) + vpbroadcastq %xmm16,%ymm16 + + jmp .Lblocks_vpmadd52_4x_key_loaded + ud2 + +.align 32 +.Ldone_init_vpmadd52_2x: + vmovdqu64 %ymm3,64(%rdi) + vpsrldq $8,%ymm3,%ymm3 + vmovdqu64 %ymm4,96(%rdi) + vpsrldq $8,%ymm4,%ymm4 + vmovdqu64 %ymm5,128(%rdi) + vpsrldq $8,%ymm5,%ymm5 + vmovdqu64 %ymm16,160(%rdi) + vpsrldq $8,%ymm16,%ymm16 + jmp .Lblocks_vpmadd52_2x_key_loaded + ud2 + +.align 32 +.Lblocks_vpmadd52_2x_do: + vmovdqu64 128+8(%rdi),%ymm5{%k1}{z} + vmovdqu64 160+8(%rdi),%ymm16{%k1}{z} + vmovdqu64 64+8(%rdi),%ymm3{%k1}{z} + vmovdqu64 96+8(%rdi),%ymm4{%k1}{z} + +.Lblocks_vpmadd52_2x_key_loaded: + vmovdqu64 0(%rsi),%ymm26 + vpxorq %ymm27,%ymm27,%ymm27 + leaq 32(%rsi),%rsi + + vpunpcklqdq %ymm27,%ymm26,%ymm25 + vpunpckhqdq %ymm27,%ymm26,%ymm27 + + + + vpsrlq $24,%ymm27,%ymm26 + vporq %ymm31,%ymm26,%ymm26 + vpaddq %ymm26,%ymm2,%ymm2 + vpandq %ymm28,%ymm25,%ymm24 + vpsrlq $44,%ymm25,%ymm25 + vpsllq $20,%ymm27,%ymm27 + vporq %ymm27,%ymm25,%ymm25 + vpandq %ymm28,%ymm25,%ymm25 + + jmp .Ltail_vpmadd52_2x + ud2 + +.align 32 +.Loop_vpmadd52_4x: + + vpaddq %ymm24,%ymm0,%ymm0 + vpaddq %ymm25,%ymm1,%ymm1 + + vpxorq %ymm18,%ymm18,%ymm18 + vpmadd52luq %ymm2,%ymm16,%ymm18 + vpxorq %ymm19,%ymm19,%ymm19 + vpmadd52huq %ymm2,%ymm16,%ymm19 + vpxorq %ymm20,%ymm20,%ymm20 + vpmadd52luq %ymm2,%ymm17,%ymm20 + vpxorq %ymm21,%ymm21,%ymm21 + vpmadd52huq %ymm2,%ymm17,%ymm21 + vpxorq %ymm22,%ymm22,%ymm22 + vpmadd52luq %ymm2,%ymm3,%ymm22 + vpxorq %ymm23,%ymm23,%ymm23 + vpmadd52huq %ymm2,%ymm3,%ymm23 + + vmovdqu64 0(%rsi),%ymm26 + vmovdqu64 32(%rsi),%ymm27 + leaq 64(%rsi),%rsi + vpmadd52luq %ymm0,%ymm3,%ymm18 + vpmadd52huq %ymm0,%ymm3,%ymm19 + vpmadd52luq %ymm0,%ymm4,%ymm20 + vpmadd52huq %ymm0,%ymm4,%ymm21 + vpmadd52luq %ymm0,%ymm5,%ymm22 + vpmadd52huq %ymm0,%ymm5,%ymm23 + + vpunpcklqdq %ymm27,%ymm26,%ymm25 + vpunpckhqdq %ymm27,%ymm26,%ymm27 + vpmadd52luq %ymm1,%ymm17,%ymm18 + vpmadd52huq %ymm1,%ymm17,%ymm19 + vpmadd52luq %ymm1,%ymm3,%ymm20 + vpmadd52huq %ymm1,%ymm3,%ymm21 + vpmadd52luq %ymm1,%ymm4,%ymm22 + vpmadd52huq %ymm1,%ymm4,%ymm23 + + + + vpsrlq $44,%ymm18,%ymm30 + vpsllq $8,%ymm19,%ymm19 + vpandq %ymm28,%ymm18,%ymm0 + vpaddq %ymm30,%ymm19,%ymm19 + + vpsrlq $24,%ymm27,%ymm26 + vporq %ymm31,%ymm26,%ymm26 + vpaddq %ymm19,%ymm20,%ymm20 + + vpsrlq $44,%ymm20,%ymm30 + vpsllq $8,%ymm21,%ymm21 + vpandq %ymm28,%ymm20,%ymm1 + vpaddq %ymm30,%ymm21,%ymm21 + + vpandq %ymm28,%ymm25,%ymm24 + vpsrlq $44,%ymm25,%ymm25 + vpsllq $20,%ymm27,%ymm27 + vpaddq %ymm21,%ymm22,%ymm22 + + vpsrlq $42,%ymm22,%ymm30 + vpsllq $10,%ymm23,%ymm23 + vpandq %ymm29,%ymm22,%ymm2 + vpaddq %ymm30,%ymm23,%ymm23 + + vpaddq %ymm26,%ymm2,%ymm2 + vpaddq %ymm23,%ymm0,%ymm0 + vpsllq $2,%ymm23,%ymm23 + + vpaddq %ymm23,%ymm0,%ymm0 + vporq %ymm27,%ymm25,%ymm25 + vpandq %ymm28,%ymm25,%ymm25 + + vpsrlq $44,%ymm0,%ymm30 + vpandq %ymm28,%ymm0,%ymm0 + + vpaddq %ymm30,%ymm1,%ymm1 + + subq $4,%rdx + jnz .Loop_vpmadd52_4x + +.Ltail_vpmadd52_4x: + vmovdqu64 128(%rdi),%ymm5 + vmovdqu64 160(%rdi),%ymm16 + vmovdqu64 64(%rdi),%ymm3 + vmovdqu64 96(%rdi),%ymm4 + +.Ltail_vpmadd52_2x: + vpsllq $2,%ymm5,%ymm17 + vpaddq %ymm5,%ymm17,%ymm17 + vpsllq $2,%ymm17,%ymm17 + + + vpaddq %ymm24,%ymm0,%ymm0 + vpaddq %ymm25,%ymm1,%ymm1 + + vpxorq %ymm18,%ymm18,%ymm18 + vpmadd52luq %ymm2,%ymm16,%ymm18 + vpxorq %ymm19,%ymm19,%ymm19 + vpmadd52huq %ymm2,%ymm16,%ymm19 + vpxorq %ymm20,%ymm20,%ymm20 + vpmadd52luq %ymm2,%ymm17,%ymm20 + vpxorq %ymm21,%ymm21,%ymm21 + vpmadd52huq %ymm2,%ymm17,%ymm21 + vpxorq %ymm22,%ymm22,%ymm22 + vpmadd52luq %ymm2,%ymm3,%ymm22 + vpxorq %ymm23,%ymm23,%ymm23 + vpmadd52huq %ymm2,%ymm3,%ymm23 + + vpmadd52luq %ymm0,%ymm3,%ymm18 + vpmadd52huq %ymm0,%ymm3,%ymm19 + vpmadd52luq %ymm0,%ymm4,%ymm20 + vpmadd52huq %ymm0,%ymm4,%ymm21 + vpmadd52luq %ymm0,%ymm5,%ymm22 + vpmadd52huq %ymm0,%ymm5,%ymm23 + + vpmadd52luq %ymm1,%ymm17,%ymm18 + vpmadd52huq %ymm1,%ymm17,%ymm19 + vpmadd52luq %ymm1,%ymm3,%ymm20 + vpmadd52huq %ymm1,%ymm3,%ymm21 + vpmadd52luq %ymm1,%ymm4,%ymm22 + vpmadd52huq %ymm1,%ymm4,%ymm23 + + + + + movl $1,%eax + kmovw %eax,%k1 + vpsrldq $8,%ymm18,%ymm24 + vpsrldq $8,%ymm19,%ymm0 + vpsrldq $8,%ymm20,%ymm25 + vpsrldq $8,%ymm21,%ymm1 + vpaddq %ymm24,%ymm18,%ymm18 + vpaddq %ymm0,%ymm19,%ymm19 + vpsrldq $8,%ymm22,%ymm26 + vpsrldq $8,%ymm23,%ymm2 + vpaddq %ymm25,%ymm20,%ymm20 + vpaddq %ymm1,%ymm21,%ymm21 + vpermq $0x2,%ymm18,%ymm24 + vpermq $0x2,%ymm19,%ymm0 + vpaddq %ymm26,%ymm22,%ymm22 + vpaddq %ymm2,%ymm23,%ymm23 + + vpermq $0x2,%ymm20,%ymm25 + vpermq $0x2,%ymm21,%ymm1 + vpaddq %ymm24,%ymm18,%ymm18{%k1}{z} + vpaddq %ymm0,%ymm19,%ymm19{%k1}{z} + vpermq $0x2,%ymm22,%ymm26 + vpermq $0x2,%ymm23,%ymm2 + vpaddq %ymm25,%ymm20,%ymm20{%k1}{z} + vpaddq %ymm1,%ymm21,%ymm21{%k1}{z} + vpaddq %ymm26,%ymm22,%ymm22{%k1}{z} + vpaddq %ymm2,%ymm23,%ymm23{%k1}{z} + + + + vpsrlq $44,%ymm18,%ymm30 + vpsllq $8,%ymm19,%ymm19 + vpandq %ymm28,%ymm18,%ymm0 + vpaddq %ymm30,%ymm19,%ymm19 + + vpaddq %ymm19,%ymm20,%ymm20 + + vpsrlq $44,%ymm20,%ymm30 + vpsllq $8,%ymm21,%ymm21 + vpandq %ymm28,%ymm20,%ymm1 + vpaddq %ymm30,%ymm21,%ymm21 + + vpaddq %ymm21,%ymm22,%ymm22 + + vpsrlq $42,%ymm22,%ymm30 + vpsllq $10,%ymm23,%ymm23 + vpandq %ymm29,%ymm22,%ymm2 + vpaddq %ymm30,%ymm23,%ymm23 + + vpaddq %ymm23,%ymm0,%ymm0 + vpsllq $2,%ymm23,%ymm23 + + vpaddq %ymm23,%ymm0,%ymm0 + + vpsrlq $44,%ymm0,%ymm30 + vpandq %ymm28,%ymm0,%ymm0 + + vpaddq %ymm30,%ymm1,%ymm1 + + + subq $2,%rdx + ja .Lblocks_vpmadd52_4x_do + + vmovq %xmm0,0(%rdi) + vmovq %xmm1,8(%rdi) + vmovq %xmm2,16(%rdi) + vzeroall + +.Lno_data_vpmadd52_4x: + .byte 0xf3,0xc3 +.cfi_endproc +.size poly1305_blocks_vpmadd52_4x,.-poly1305_blocks_vpmadd52_4x +.type poly1305_blocks_vpmadd52_8x,@function +.align 32 +poly1305_blocks_vpmadd52_8x: +.cfi_startproc + shrq $4,%rdx + jz .Lno_data_vpmadd52_8x + + shlq $40,%rcx + movq 64(%rdi),%r8 + + vmovdqa64 .Lx_mask44(%rip),%ymm28 + vmovdqa64 .Lx_mask42(%rip),%ymm29 + + testq %r8,%r8 + js .Linit_vpmadd52 + + vmovq 0(%rdi),%xmm0 + vmovq 8(%rdi),%xmm1 + vmovq 16(%rdi),%xmm2 + +.Lblocks_vpmadd52_8x: + + + + vmovdqu64 128(%rdi),%ymm5 + vmovdqu64 160(%rdi),%ymm16 + vmovdqu64 64(%rdi),%ymm3 + vmovdqu64 96(%rdi),%ymm4 + + vpsllq $2,%ymm5,%ymm17 + vpaddq %ymm5,%ymm17,%ymm17 + vpsllq $2,%ymm17,%ymm17 + + vpbroadcastq %xmm5,%ymm8 + vpbroadcastq %xmm3,%ymm6 + vpbroadcastq %xmm4,%ymm7 + + vpxorq %ymm18,%ymm18,%ymm18 + vpmadd52luq %ymm8,%ymm16,%ymm18 + vpxorq %ymm19,%ymm19,%ymm19 + vpmadd52huq %ymm8,%ymm16,%ymm19 + vpxorq %ymm20,%ymm20,%ymm20 + vpmadd52luq %ymm8,%ymm17,%ymm20 + vpxorq %ymm21,%ymm21,%ymm21 + vpmadd52huq %ymm8,%ymm17,%ymm21 + vpxorq %ymm22,%ymm22,%ymm22 + vpmadd52luq %ymm8,%ymm3,%ymm22 + vpxorq %ymm23,%ymm23,%ymm23 + vpmadd52huq %ymm8,%ymm3,%ymm23 + + vpmadd52luq %ymm6,%ymm3,%ymm18 + vpmadd52huq %ymm6,%ymm3,%ymm19 + vpmadd52luq %ymm6,%ymm4,%ymm20 + vpmadd52huq %ymm6,%ymm4,%ymm21 + vpmadd52luq %ymm6,%ymm5,%ymm22 + vpmadd52huq %ymm6,%ymm5,%ymm23 + + vpmadd52luq %ymm7,%ymm17,%ymm18 + vpmadd52huq %ymm7,%ymm17,%ymm19 + vpmadd52luq %ymm7,%ymm3,%ymm20 + vpmadd52huq %ymm7,%ymm3,%ymm21 + vpmadd52luq %ymm7,%ymm4,%ymm22 + vpmadd52huq %ymm7,%ymm4,%ymm23 + + + + vpsrlq $44,%ymm18,%ymm30 + vpsllq $8,%ymm19,%ymm19 + vpandq %ymm28,%ymm18,%ymm6 + vpaddq %ymm30,%ymm19,%ymm19 + + vpaddq %ymm19,%ymm20,%ymm20 + + vpsrlq $44,%ymm20,%ymm30 + vpsllq $8,%ymm21,%ymm21 + vpandq %ymm28,%ymm20,%ymm7 + vpaddq %ymm30,%ymm21,%ymm21 + + vpaddq %ymm21,%ymm22,%ymm22 + + vpsrlq $42,%ymm22,%ymm30 + vpsllq $10,%ymm23,%ymm23 + vpandq %ymm29,%ymm22,%ymm8 + vpaddq %ymm30,%ymm23,%ymm23 + + vpaddq %ymm23,%ymm6,%ymm6 + vpsllq $2,%ymm23,%ymm23 + + vpaddq %ymm23,%ymm6,%ymm6 + + vpsrlq $44,%ymm6,%ymm30 + vpandq %ymm28,%ymm6,%ymm6 + + vpaddq %ymm30,%ymm7,%ymm7 + + + + + + vpunpcklqdq %ymm5,%ymm8,%ymm26 + vpunpckhqdq %ymm5,%ymm8,%ymm5 + vpunpcklqdq %ymm3,%ymm6,%ymm24 + vpunpckhqdq %ymm3,%ymm6,%ymm3 + vpunpcklqdq %ymm4,%ymm7,%ymm25 + vpunpckhqdq %ymm4,%ymm7,%ymm4 + vshufi64x2 $0x44,%zmm5,%zmm26,%zmm8 + vshufi64x2 $0x44,%zmm3,%zmm24,%zmm6 + vshufi64x2 $0x44,%zmm4,%zmm25,%zmm7 + + vmovdqu64 0(%rsi),%zmm26 + vmovdqu64 64(%rsi),%zmm27 + leaq 128(%rsi),%rsi + + vpsllq $2,%zmm8,%zmm10 + vpsllq $2,%zmm7,%zmm9 + vpaddq %zmm8,%zmm10,%zmm10 + vpaddq %zmm7,%zmm9,%zmm9 + vpsllq $2,%zmm10,%zmm10 + vpsllq $2,%zmm9,%zmm9 + + vpbroadcastq %rcx,%zmm31 + vpbroadcastq %xmm28,%zmm28 + vpbroadcastq %xmm29,%zmm29 + + vpbroadcastq %xmm9,%zmm16 + vpbroadcastq %xmm10,%zmm17 + vpbroadcastq %xmm6,%zmm3 + vpbroadcastq %xmm7,%zmm4 + vpbroadcastq %xmm8,%zmm5 + + vpunpcklqdq %zmm27,%zmm26,%zmm25 + vpunpckhqdq %zmm27,%zmm26,%zmm27 + + + + vpsrlq $24,%zmm27,%zmm26 + vporq %zmm31,%zmm26,%zmm26 + vpaddq %zmm26,%zmm2,%zmm2 + vpandq %zmm28,%zmm25,%zmm24 + vpsrlq $44,%zmm25,%zmm25 + vpsllq $20,%zmm27,%zmm27 + vporq %zmm27,%zmm25,%zmm25 + vpandq %zmm28,%zmm25,%zmm25 + + subq $8,%rdx + jz .Ltail_vpmadd52_8x + jmp .Loop_vpmadd52_8x + +.align 32 +.Loop_vpmadd52_8x: + + vpaddq %zmm24,%zmm0,%zmm0 + vpaddq %zmm25,%zmm1,%zmm1 + + vpxorq %zmm18,%zmm18,%zmm18 + vpmadd52luq %zmm2,%zmm16,%zmm18 + vpxorq %zmm19,%zmm19,%zmm19 + vpmadd52huq %zmm2,%zmm16,%zmm19 + vpxorq %zmm20,%zmm20,%zmm20 + vpmadd52luq %zmm2,%zmm17,%zmm20 + vpxorq %zmm21,%zmm21,%zmm21 + vpmadd52huq %zmm2,%zmm17,%zmm21 + vpxorq %zmm22,%zmm22,%zmm22 + vpmadd52luq %zmm2,%zmm3,%zmm22 + vpxorq %zmm23,%zmm23,%zmm23 + vpmadd52huq %zmm2,%zmm3,%zmm23 + + vmovdqu64 0(%rsi),%zmm26 + vmovdqu64 64(%rsi),%zmm27 + leaq 128(%rsi),%rsi + vpmadd52luq %zmm0,%zmm3,%zmm18 + vpmadd52huq %zmm0,%zmm3,%zmm19 + vpmadd52luq %zmm0,%zmm4,%zmm20 + vpmadd52huq %zmm0,%zmm4,%zmm21 + vpmadd52luq %zmm0,%zmm5,%zmm22 + vpmadd52huq %zmm0,%zmm5,%zmm23 + + vpunpcklqdq %zmm27,%zmm26,%zmm25 + vpunpckhqdq %zmm27,%zmm26,%zmm27 + vpmadd52luq %zmm1,%zmm17,%zmm18 + vpmadd52huq %zmm1,%zmm17,%zmm19 + vpmadd52luq %zmm1,%zmm3,%zmm20 + vpmadd52huq %zmm1,%zmm3,%zmm21 + vpmadd52luq %zmm1,%zmm4,%zmm22 + vpmadd52huq %zmm1,%zmm4,%zmm23 + + + + vpsrlq $44,%zmm18,%zmm30 + vpsllq $8,%zmm19,%zmm19 + vpandq %zmm28,%zmm18,%zmm0 + vpaddq %zmm30,%zmm19,%zmm19 + + vpsrlq $24,%zmm27,%zmm26 + vporq %zmm31,%zmm26,%zmm26 + vpaddq %zmm19,%zmm20,%zmm20 + + vpsrlq $44,%zmm20,%zmm30 + vpsllq $8,%zmm21,%zmm21 + vpandq %zmm28,%zmm20,%zmm1 + vpaddq %zmm30,%zmm21,%zmm21 + + vpandq %zmm28,%zmm25,%zmm24 + vpsrlq $44,%zmm25,%zmm25 + vpsllq $20,%zmm27,%zmm27 + vpaddq %zmm21,%zmm22,%zmm22 + + vpsrlq $42,%zmm22,%zmm30 + vpsllq $10,%zmm23,%zmm23 + vpandq %zmm29,%zmm22,%zmm2 + vpaddq %zmm30,%zmm23,%zmm23 + + vpaddq %zmm26,%zmm2,%zmm2 + vpaddq %zmm23,%zmm0,%zmm0 + vpsllq $2,%zmm23,%zmm23 + + vpaddq %zmm23,%zmm0,%zmm0 + vporq %zmm27,%zmm25,%zmm25 + vpandq %zmm28,%zmm25,%zmm25 + + vpsrlq $44,%zmm0,%zmm30 + vpandq %zmm28,%zmm0,%zmm0 + + vpaddq %zmm30,%zmm1,%zmm1 + + subq $8,%rdx + jnz .Loop_vpmadd52_8x + +.Ltail_vpmadd52_8x: + + vpaddq %zmm24,%zmm0,%zmm0 + vpaddq %zmm25,%zmm1,%zmm1 + + vpxorq %zmm18,%zmm18,%zmm18 + vpmadd52luq %zmm2,%zmm9,%zmm18 + vpxorq %zmm19,%zmm19,%zmm19 + vpmadd52huq %zmm2,%zmm9,%zmm19 + vpxorq %zmm20,%zmm20,%zmm20 + vpmadd52luq %zmm2,%zmm10,%zmm20 + vpxorq %zmm21,%zmm21,%zmm21 + vpmadd52huq %zmm2,%zmm10,%zmm21 + vpxorq %zmm22,%zmm22,%zmm22 + vpmadd52luq %zmm2,%zmm6,%zmm22 + vpxorq %zmm23,%zmm23,%zmm23 + vpmadd52huq %zmm2,%zmm6,%zmm23 + + vpmadd52luq %zmm0,%zmm6,%zmm18 + vpmadd52huq %zmm0,%zmm6,%zmm19 + vpmadd52luq %zmm0,%zmm7,%zmm20 + vpmadd52huq %zmm0,%zmm7,%zmm21 + vpmadd52luq %zmm0,%zmm8,%zmm22 + vpmadd52huq %zmm0,%zmm8,%zmm23 + + vpmadd52luq %zmm1,%zmm10,%zmm18 + vpmadd52huq %zmm1,%zmm10,%zmm19 + vpmadd52luq %zmm1,%zmm6,%zmm20 + vpmadd52huq %zmm1,%zmm6,%zmm21 + vpmadd52luq %zmm1,%zmm7,%zmm22 + vpmadd52huq %zmm1,%zmm7,%zmm23 + + + + + movl $1,%eax + kmovw %eax,%k1 + vpsrldq $8,%zmm18,%zmm24 + vpsrldq $8,%zmm19,%zmm0 + vpsrldq $8,%zmm20,%zmm25 + vpsrldq $8,%zmm21,%zmm1 + vpaddq %zmm24,%zmm18,%zmm18 + vpaddq %zmm0,%zmm19,%zmm19 + vpsrldq $8,%zmm22,%zmm26 + vpsrldq $8,%zmm23,%zmm2 + vpaddq %zmm25,%zmm20,%zmm20 + vpaddq %zmm1,%zmm21,%zmm21 + vpermq $0x2,%zmm18,%zmm24 + vpermq $0x2,%zmm19,%zmm0 + vpaddq %zmm26,%zmm22,%zmm22 + vpaddq %zmm2,%zmm23,%zmm23 + + vpermq $0x2,%zmm20,%zmm25 + vpermq $0x2,%zmm21,%zmm1 + vpaddq %zmm24,%zmm18,%zmm18 + vpaddq %zmm0,%zmm19,%zmm19 + vpermq $0x2,%zmm22,%zmm26 + vpermq $0x2,%zmm23,%zmm2 + vpaddq %zmm25,%zmm20,%zmm20 + vpaddq %zmm1,%zmm21,%zmm21 + vextracti64x4 $1,%zmm18,%ymm24 + vextracti64x4 $1,%zmm19,%ymm0 + vpaddq %zmm26,%zmm22,%zmm22 + vpaddq %zmm2,%zmm23,%zmm23 + + vextracti64x4 $1,%zmm20,%ymm25 + vextracti64x4 $1,%zmm21,%ymm1 + vextracti64x4 $1,%zmm22,%ymm26 + vextracti64x4 $1,%zmm23,%ymm2 + vpaddq %ymm24,%ymm18,%ymm18{%k1}{z} + vpaddq %ymm0,%ymm19,%ymm19{%k1}{z} + vpaddq %ymm25,%ymm20,%ymm20{%k1}{z} + vpaddq %ymm1,%ymm21,%ymm21{%k1}{z} + vpaddq %ymm26,%ymm22,%ymm22{%k1}{z} + vpaddq %ymm2,%ymm23,%ymm23{%k1}{z} + + + + vpsrlq $44,%ymm18,%ymm30 + vpsllq $8,%ymm19,%ymm19 + vpandq %ymm28,%ymm18,%ymm0 + vpaddq %ymm30,%ymm19,%ymm19 + + vpaddq %ymm19,%ymm20,%ymm20 + + vpsrlq $44,%ymm20,%ymm30 + vpsllq $8,%ymm21,%ymm21 + vpandq %ymm28,%ymm20,%ymm1 + vpaddq %ymm30,%ymm21,%ymm21 + + vpaddq %ymm21,%ymm22,%ymm22 + + vpsrlq $42,%ymm22,%ymm30 + vpsllq $10,%ymm23,%ymm23 + vpandq %ymm29,%ymm22,%ymm2 + vpaddq %ymm30,%ymm23,%ymm23 + + vpaddq %ymm23,%ymm0,%ymm0 + vpsllq $2,%ymm23,%ymm23 + + vpaddq %ymm23,%ymm0,%ymm0 + + vpsrlq $44,%ymm0,%ymm30 + vpandq %ymm28,%ymm0,%ymm0 + + vpaddq %ymm30,%ymm1,%ymm1 + + + + vmovq %xmm0,0(%rdi) + vmovq %xmm1,8(%rdi) + vmovq %xmm2,16(%rdi) + vzeroall + +.Lno_data_vpmadd52_8x: + .byte 0xf3,0xc3 +.cfi_endproc +.size poly1305_blocks_vpmadd52_8x,.-poly1305_blocks_vpmadd52_8x +.type poly1305_emit_base2_44,@function +.align 32 +poly1305_emit_base2_44: +.cfi_startproc +.byte 243,15,30,250 + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + + movq %r9,%rax + shrq $20,%r9 + shlq $44,%rax + movq %r10,%rcx + shrq $40,%r10 + shlq $24,%rcx + + addq %rax,%r8 + adcq %rcx,%r9 + adcq $0,%r10 + + movq %r8,%rax + addq $5,%r8 + movq %r9,%rcx + adcq $0,%r9 + adcq $0,%r10 + shrq $2,%r10 + cmovnzq %r8,%rax + cmovnzq %r9,%rcx + + addq 0(%rdx),%rax + adcq 8(%rdx),%rcx + movq %rax,0(%rsi) + movq %rcx,8(%rsi) + + .byte 0xf3,0xc3 +.cfi_endproc +.size poly1305_emit_base2_44,.-poly1305_emit_base2_44 +.section .rodata +.align 64 +.Lconst: +.Lmask24: +.long 0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,0 +.L129: +.long 16777216,0,16777216,0,16777216,0,16777216,0 +.Lmask26: +.long 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0 +.Lpermd_avx2: +.long 2,2,2,3,2,0,2,1 +.Lpermd_avx512: +.long 0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7 + +.L2_44_inp_permd: +.long 0,1,1,2,2,3,7,7 +.L2_44_inp_shift: +.quad 0,12,24,64 +.L2_44_mask: +.quad 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff +.L2_44_shift_rgt: +.quad 44,44,42,64 +.L2_44_shift_lft: +.quad 8,8,10,64 + +.align 64 +.Lx_mask44: +.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff +.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff +.Lx_mask42: +.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff +.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff +.previous +.byte 80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 16 +.globl xor128_encrypt_n_pad +.type xor128_encrypt_n_pad,@function +.align 16 +xor128_encrypt_n_pad: +.cfi_startproc + subq %rdx,%rsi + subq %rdx,%rdi + movq %rcx,%r10 + shrq $4,%rcx + jz .Ltail_enc + nop +.Loop_enc_xmm: + movdqu (%rsi,%rdx,1),%xmm0 + pxor (%rdx),%xmm0 + movdqu %xmm0,(%rdi,%rdx,1) + movdqa %xmm0,(%rdx) + leaq 16(%rdx),%rdx + decq %rcx + jnz .Loop_enc_xmm + + andq $15,%r10 + jz .Ldone_enc + +.Ltail_enc: + movq $16,%rcx + subq %r10,%rcx + xorl %eax,%eax +.Loop_enc_byte: + movb (%rsi,%rdx,1),%al + xorb (%rdx),%al + movb %al,(%rdi,%rdx,1) + movb %al,(%rdx) + leaq 1(%rdx),%rdx + decq %r10 + jnz .Loop_enc_byte + + xorl %eax,%eax +.Loop_enc_pad: + movb %al,(%rdx) + leaq 1(%rdx),%rdx + decq %rcx + jnz .Loop_enc_pad + +.Ldone_enc: + movq %rdx,%rax + .byte 0xf3,0xc3 +.cfi_endproc +.size xor128_encrypt_n_pad,.-xor128_encrypt_n_pad + +.globl xor128_decrypt_n_pad +.type xor128_decrypt_n_pad,@function +.align 16 +xor128_decrypt_n_pad: +.cfi_startproc + subq %rdx,%rsi + subq %rdx,%rdi + movq %rcx,%r10 + shrq $4,%rcx + jz .Ltail_dec + nop +.Loop_dec_xmm: + movdqu (%rsi,%rdx,1),%xmm0 + movdqa (%rdx),%xmm1 + pxor %xmm0,%xmm1 + movdqu %xmm1,(%rdi,%rdx,1) + movdqa %xmm0,(%rdx) + leaq 16(%rdx),%rdx + decq %rcx + jnz .Loop_dec_xmm + + pxor %xmm1,%xmm1 + andq $15,%r10 + jz .Ldone_dec + +.Ltail_dec: + movq $16,%rcx + subq %r10,%rcx + xorl %eax,%eax + xorq %r11,%r11 +.Loop_dec_byte: + movb (%rsi,%rdx,1),%r11b + movb (%rdx),%al + xorb %r11b,%al + movb %al,(%rdi,%rdx,1) + movb %r11b,(%rdx) + leaq 1(%rdx),%rdx + decq %r10 + jnz .Loop_dec_byte + + xorl %eax,%eax +.Loop_dec_pad: + movb %al,(%rdx) + leaq 1(%rdx),%rdx + decq %rcx + jnz .Loop_dec_pad + +.Ldone_dec: + movq %rdx,%rax + .byte 0xf3,0xc3 +.cfi_endproc +.size xor128_decrypt_n_pad,.-xor128_decrypt_n_pad diff --git a/contrib/openssl-cmake/asm/crypto/ppccpuid.s b/contrib/openssl-cmake/asm/crypto/ppccpuid.s new file mode 100644 index 000000000000..f8c10ec9cf9b --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/ppccpuid.s @@ -0,0 +1,400 @@ +.machine "any" +.abiversion 2 +.text + +.globl OPENSSL_fpu_probe +.type OPENSSL_fpu_probe,@function +.align 4 +OPENSSL_fpu_probe: +.localentry OPENSSL_fpu_probe,0 + + fmr 0,0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.size OPENSSL_fpu_probe,.-OPENSSL_fpu_probe +.globl OPENSSL_ppc64_probe +.type OPENSSL_ppc64_probe,@function +.align 4 +OPENSSL_ppc64_probe: +.localentry OPENSSL_ppc64_probe,0 + + fcfid 1,1 + rldicl 0,0,32,32 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.size OPENSSL_ppc64_probe,.-OPENSSL_ppc64_probe + +.globl OPENSSL_altivec_probe +.type OPENSSL_altivec_probe,@function +.align 4 +OPENSSL_altivec_probe: +.localentry OPENSSL_altivec_probe,0 + +.long 0x10000484 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.size OPENSSL_altivec_probe,.-OPENSSL_altivec_probe + +.globl OPENSSL_crypto207_probe +.type OPENSSL_crypto207_probe,@function +.align 4 +OPENSSL_crypto207_probe: +.localentry OPENSSL_crypto207_probe,0 + + .long 0x7C000E99 + .long 0x10000508 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.size OPENSSL_crypto207_probe,.-OPENSSL_crypto207_probe + +.globl OPENSSL_madd300_probe +.type OPENSSL_madd300_probe,@function +.align 4 +OPENSSL_madd300_probe: +.localentry OPENSSL_madd300_probe,0 + + xor 0,0,0 + .long 0x10600033 + .long 0x10600031 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 + +.globl OPENSSL_brd31_probe +.type OPENSSL_brd31_probe,@function +.align 4 +OPENSSL_brd31_probe: +.localentry OPENSSL_brd31_probe,0 + + xor 0,0,0 + .long 0x7C030176 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.size OPENSSL_brd31_probe,.-OPENSSL_brd31_probe + + +.globl OPENSSL_wipe_cpu +.type OPENSSL_wipe_cpu,@function +.align 4 +OPENSSL_wipe_cpu: +.localentry OPENSSL_wipe_cpu,0 + + xor 0,0,0 + fmr 0,31 + fmr 1,31 + fmr 2,31 + mr 3,1 + fmr 3,31 + xor 4,4,4 + fmr 4,31 + xor 5,5,5 + fmr 5,31 + xor 6,6,6 + fmr 6,31 + xor 7,7,7 + fmr 7,31 + xor 8,8,8 + fmr 8,31 + xor 9,9,9 + fmr 9,31 + xor 10,10,10 + fmr 10,31 + xor 11,11,11 + fmr 11,31 + xor 12,12,12 + fmr 12,31 + fmr 13,31 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu + +.globl OPENSSL_atomic_add +.type OPENSSL_atomic_add,@function +.align 4 +OPENSSL_atomic_add: +.localentry OPENSSL_atomic_add,0 + +.Ladd: lwarx 5,0,3 + add 0,4,5 + stwcx. 0,0,3 + bne- .Ladd + extsw 3,0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,2,0 +.long 0 +.size OPENSSL_atomic_add,.-OPENSSL_atomic_add + +.globl OPENSSL_rdtsc_mftb +.type OPENSSL_rdtsc_mftb,@function +.align 4 +OPENSSL_rdtsc_mftb: +.localentry OPENSSL_rdtsc_mftb,0 + + mftb 3 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.size OPENSSL_rdtsc_mftb,.-OPENSSL_rdtsc_mftb + +.globl OPENSSL_rdtsc_mfspr268 +.type OPENSSL_rdtsc_mfspr268,@function +.align 4 +OPENSSL_rdtsc_mfspr268: +.localentry OPENSSL_rdtsc_mfspr268,0 + + mfspr 3,268 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.size OPENSSL_rdtsc_mfspr268,.-OPENSSL_rdtsc_mfspr268 + +.globl OPENSSL_cleanse +.type OPENSSL_cleanse,@function +.align 4 +OPENSSL_cleanse: +.localentry OPENSSL_cleanse,0 + + cmpldi 4,7 + li 0,0 + bge .Lot + cmpldi 4,0 + .long 0x4DC20020 +.Little: mtctr 4 + stb 0,0(3) + addi 3,3,1 + bdnz $-8 + blr +.Lot: andi. 5,3,3 + beq .Laligned + stb 0,0(3) + subi 4,4,1 + addi 3,3,1 + b .Lot +.Laligned: + srdi 5,4,2 + mtctr 5 + stw 0,0(3) + addi 3,3,4 + bdnz $-8 + andi. 4,4,3 + bne .Little + blr +.long 0 +.byte 0,12,0x14,0,0,0,2,0 +.long 0 +.size OPENSSL_cleanse,.-OPENSSL_cleanse + +.globl CRYPTO_memcmp +.type CRYPTO_memcmp,@function +.align 4 +CRYPTO_memcmp: +.localentry CRYPTO_memcmp,0 + + cmpldi 5,0 + li 0,0 + beq .Lno_data + mtctr 5 +.Loop_cmp: + lbz 6,0(3) + addi 3,3,1 + lbz 7,0(4) + addi 4,4,1 + xor 6,6,7 + or 0,0,6 + bdnz .Loop_cmp + +.Lno_data: + li 3,0 + sub 3,3,0 + extrwi 3,3,1,0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,3,0 +.long 0 +.size CRYPTO_memcmp,.-CRYPTO_memcmp +.globl OPENSSL_instrument_bus_mftb +.type OPENSSL_instrument_bus_mftb,@function +.align 4 +OPENSSL_instrument_bus_mftb: +.localentry OPENSSL_instrument_bus_mftb,0 + + mtctr 4 + + mftb 7 + li 8,0 + + dcbf 0,3 + lwarx 6,0,3 + add 6,6,8 + stwcx. 6,0,3 + stwx 6,0,3 + +.Loop: mftb 6 + sub 8,6,7 + mr 7,6 + dcbf 0,3 + lwarx 6,0,3 + add 6,6,8 + stwcx. 6,0,3 + stwx 6,0,3 + addi 3,3,4 + bdnz .Loop + + mr 3,4 + blr +.long 0 +.byte 0,12,0x14,0,0,0,2,0 +.long 0 +.size OPENSSL_instrument_bus_mftb,.-OPENSSL_instrument_bus_mftb + +.globl OPENSSL_instrument_bus2_mftb +.type OPENSSL_instrument_bus2_mftb,@function +.align 4 +OPENSSL_instrument_bus2_mftb: +.localentry OPENSSL_instrument_bus2_mftb,0 + + mr 0,4 + slwi 4,4,2 + + mftb 7 + li 8,0 + + dcbf 0,3 + lwarx 6,0,3 + add 6,6,8 + stwcx. 6,0,3 + stwx 6,0,3 + + mftb 6 + sub 8,6,7 + mr 7,6 + mr 9,8 +.Loop2: + dcbf 0,3 + lwarx 6,0,3 + add 6,6,8 + stwcx. 6,0,3 + stwx 6,0,3 + + addic. 5,5,-1 + beq .Ldone2 + + mftb 6 + sub 8,6,7 + mr 7,6 + cmplw 7,8,9 + mr 9,8 + + mfcr 6 + not 6,6 + rlwinm 6,6,1,29,29 + + sub. 4,4,6 + add 3,3,6 + bne .Loop2 + +.Ldone2: + srwi 4,4,2 + sub 3,0,4 + blr +.long 0 +.byte 0,12,0x14,0,0,0,3,0 +.long 0 +.size OPENSSL_instrument_bus2_mftb,.-OPENSSL_instrument_bus2_mftb + +.globl OPENSSL_instrument_bus_mfspr268 +.type OPENSSL_instrument_bus_mfspr268,@function +.align 4 +OPENSSL_instrument_bus_mfspr268: +.localentry OPENSSL_instrument_bus_mfspr268,0 + + mtctr 4 + + mfspr 7,268 + li 8,0 + + dcbf 0,3 + lwarx 6,0,3 + add 6,6,8 + stwcx. 6,0,3 + stwx 6,0,3 + +.Loop3: mfspr 6,268 + sub 8,6,7 + mr 7,6 + dcbf 0,3 + lwarx 6,0,3 + add 6,6,8 + stwcx. 6,0,3 + stwx 6,0,3 + addi 3,3,4 + bdnz .Loop3 + + mr 3,4 + blr +.long 0 +.byte 0,12,0x14,0,0,0,2,0 +.long 0 +.size OPENSSL_instrument_bus_mfspr268,.-OPENSSL_instrument_bus_mfspr268 + +.globl OPENSSL_instrument_bus2_mfspr268 +.type OPENSSL_instrument_bus2_mfspr268,@function +.align 4 +OPENSSL_instrument_bus2_mfspr268: +.localentry OPENSSL_instrument_bus2_mfspr268,0 + + mr 0,4 + slwi 4,4,2 + + mfspr 7,268 + li 8,0 + + dcbf 0,3 + lwarx 6,0,3 + add 6,6,8 + stwcx. 6,0,3 + stwx 6,0,3 + + mfspr 6,268 + sub 8,6,7 + mr 7,6 + mr 9,8 +.Loop4: + dcbf 0,3 + lwarx 6,0,3 + add 6,6,8 + stwcx. 6,0,3 + stwx 6,0,3 + + addic. 5,5,-1 + beq .Ldone4 + + mfspr 6,268 + sub 8,6,7 + mr 7,6 + cmplw 7,8,9 + mr 9,8 + + mfcr 6 + not 6,6 + rlwinm 6,6,1,29,29 + + sub. 4,4,6 + add 3,3,6 + bne .Loop4 + +.Ldone4: + srwi 4,4,2 + sub 3,0,4 + blr +.long 0 +.byte 0,12,0x14,0,0,0,3,0 +.long 0 +.size OPENSSL_instrument_bus2_mfspr268,.-OPENSSL_instrument_bus2_mfspr268 diff --git a/contrib/openssl-cmake/asm/crypto/rc4/rc4-md5-x86_64.s b/contrib/openssl-cmake/asm/crypto/rc4/rc4-md5-x86_64.s new file mode 100644 index 000000000000..03fbca89de46 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/rc4/rc4-md5-x86_64.s @@ -0,0 +1,1281 @@ +.text +.align 16 + +.globl rc4_md5_enc +.type rc4_md5_enc,@function +rc4_md5_enc: +.cfi_startproc + cmpq $0,%r9 + je .Labort + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + subq $40,%rsp +.cfi_adjust_cfa_offset 40 +.Lbody: + movq %rcx,%r11 + movq %r9,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %r8,%r15 + xorq %rbp,%rbp + xorq %rcx,%rcx + + leaq 8(%rdi),%rdi + movb -8(%rdi),%bpl + movb -4(%rdi),%cl + + incb %bpl + subq %r13,%r14 + movl (%rdi,%rbp,4),%eax + addb %al,%cl + leaq (%rdi,%rbp,4),%rsi + shlq $6,%r12 + addq %r15,%r12 + movq %r12,16(%rsp) + + movq %r11,24(%rsp) + movl 0(%r11),%r8d + movl 4(%r11),%r9d + movl 8(%r11),%r10d + movl 12(%r11),%r11d + jmp .Loop + +.align 16 +.Loop: + movl %r8d,0(%rsp) + movl %r9d,4(%rsp) + movl %r10d,8(%rsp) + movl %r11d,%r12d + movl %r11d,12(%rsp) + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 0(%r15),%r8d + addb %dl,%al + movl 4(%rsi),%ebx + addl $3614090360,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,0(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 4(%r15),%r11d + addb %dl,%bl + movl 8(%rsi),%eax + addl $3905402710,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,4(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 8(%r15),%r10d + addb %dl,%al + movl 12(%rsi),%ebx + addl $606105819,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,8(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 12(%r15),%r9d + addb %dl,%bl + movl 16(%rsi),%eax + addl $3250441966,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,12(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r11d,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 16(%r15),%r8d + addb %dl,%al + movl 20(%rsi),%ebx + addl $4118548399,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,16(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 20(%r15),%r11d + addb %dl,%bl + movl 24(%rsi),%eax + addl $1200080426,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,20(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 24(%r15),%r10d + addb %dl,%al + movl 28(%rsi),%ebx + addl $2821735955,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,24(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 28(%r15),%r9d + addb %dl,%bl + movl 32(%rsi),%eax + addl $4249261313,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,28(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r11d,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 32(%r15),%r8d + addb %dl,%al + movl 36(%rsi),%ebx + addl $1770035416,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,32(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 36(%r15),%r11d + addb %dl,%bl + movl 40(%rsi),%eax + addl $2336552879,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,36(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 40(%r15),%r10d + addb %dl,%al + movl 44(%rsi),%ebx + addl $4294925233,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,40(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 44(%r15),%r9d + addb %dl,%bl + movl 48(%rsi),%eax + addl $2304563134,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,44(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r11d,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 48(%r15),%r8d + addb %dl,%al + movl 52(%rsi),%ebx + addl $1804603682,%r8d + xorl %r11d,%r12d + movzbl %al,%eax + movl %edx,48(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $7,%r8d + movl %r10d,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 52(%r15),%r11d + addb %dl,%bl + movl 56(%rsi),%eax + addl $4254626195,%r11d + xorl %r10d,%r12d + movzbl %bl,%ebx + movl %edx,52(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $12,%r11d + movl %r9d,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 56(%r15),%r10d + addb %dl,%al + movl 60(%rsi),%ebx + addl $2792965006,%r10d + xorl %r9d,%r12d + movzbl %al,%eax + movl %edx,56(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $17,%r10d + movl %r8d,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu (%r13),%xmm2 + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 60(%r15),%r9d + addb %dl,%bl + movl 64(%rsi),%eax + addl $1236535329,%r9d + xorl %r8d,%r12d + movzbl %bl,%ebx + movl %edx,60(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $22,%r9d + movl %r10d,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + psllq $8,%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm1,%xmm2 + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 4(%r15),%r8d + addb %dl,%al + movl 68(%rsi),%ebx + addl $4129170786,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,64(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 24(%r15),%r11d + addb %dl,%bl + movl 72(%rsi),%eax + addl $3225465664,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,68(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 44(%r15),%r10d + addb %dl,%al + movl 76(%rsi),%ebx + addl $643717713,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,72(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 0(%r15),%r9d + addb %dl,%bl + movl 80(%rsi),%eax + addl $3921069994,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,76(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r10d,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 20(%r15),%r8d + addb %dl,%al + movl 84(%rsi),%ebx + addl $3593408605,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,80(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 40(%r15),%r11d + addb %dl,%bl + movl 88(%rsi),%eax + addl $38016083,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,84(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 60(%r15),%r10d + addb %dl,%al + movl 92(%rsi),%ebx + addl $3634488961,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,88(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 16(%r15),%r9d + addb %dl,%bl + movl 96(%rsi),%eax + addl $3889429448,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,92(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r10d,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 36(%r15),%r8d + addb %dl,%al + movl 100(%rsi),%ebx + addl $568446438,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,96(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 56(%r15),%r11d + addb %dl,%bl + movl 104(%rsi),%eax + addl $3275163606,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,100(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 12(%r15),%r10d + addb %dl,%al + movl 108(%rsi),%ebx + addl $4107603335,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,104(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 32(%r15),%r9d + addb %dl,%bl + movl 112(%rsi),%eax + addl $1163531501,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,108(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r10d,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r11d,%r12d + addl 52(%r15),%r8d + addb %dl,%al + movl 116(%rsi),%ebx + addl $2850285829,%r8d + xorl %r10d,%r12d + movzbl %al,%eax + movl %edx,112(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $5,%r8d + movl %r9d,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r10d,%r12d + addl 8(%r15),%r11d + addb %dl,%bl + movl 120(%rsi),%eax + addl $4243563512,%r11d + xorl %r9d,%r12d + movzbl %bl,%ebx + movl %edx,116(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $9,%r11d + movl %r8d,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + andl %r9d,%r12d + addl 28(%r15),%r10d + addb %dl,%al + movl 124(%rsi),%ebx + addl $1735328473,%r10d + xorl %r8d,%r12d + movzbl %al,%eax + movl %edx,120(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $14,%r10d + movl %r11d,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu 16(%r13),%xmm3 + addb $32,%bpl + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + andl %r8d,%r12d + addl 48(%r15),%r9d + addb %dl,%bl + movl 0(%rdi,%rbp,4),%eax + addl $2368359562,%r9d + xorl %r11d,%r12d + movzbl %bl,%ebx + movl %edx,124(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $20,%r9d + movl %r11d,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movq %rcx,%rsi + xorq %rcx,%rcx + movb %sil,%cl + leaq (%rdi,%rbp,4),%rsi + psllq $8,%xmm1 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 20(%r15),%r8d + addb %dl,%al + movl 4(%rsi),%ebx + addl $4294588738,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,0(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 32(%r15),%r11d + addb %dl,%bl + movl 8(%rsi),%eax + addl $2272392833,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,4(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 44(%r15),%r10d + addb %dl,%al + movl 12(%rsi),%ebx + addl $1839030562,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,8(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 56(%r15),%r9d + addb %dl,%bl + movl 16(%rsi),%eax + addl $4259657740,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,12(%rsi) + addb %al,%cl + roll $23,%r9d + movl %r11d,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 4(%r15),%r8d + addb %dl,%al + movl 20(%rsi),%ebx + addl $2763975236,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,16(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 16(%r15),%r11d + addb %dl,%bl + movl 24(%rsi),%eax + addl $1272893353,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,20(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 28(%r15),%r10d + addb %dl,%al + movl 28(%rsi),%ebx + addl $4139469664,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,24(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 40(%r15),%r9d + addb %dl,%bl + movl 32(%rsi),%eax + addl $3200236656,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,28(%rsi) + addb %al,%cl + roll $23,%r9d + movl %r11d,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 52(%r15),%r8d + addb %dl,%al + movl 36(%rsi),%ebx + addl $681279174,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,32(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 0(%r15),%r11d + addb %dl,%bl + movl 40(%rsi),%eax + addl $3936430074,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,36(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 12(%r15),%r10d + addb %dl,%al + movl 44(%rsi),%ebx + addl $3572445317,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,40(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 24(%r15),%r9d + addb %dl,%bl + movl 48(%rsi),%eax + addl $76029189,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,44(%rsi) + addb %al,%cl + roll $23,%r9d + movl %r11d,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r9d,%r12d + addl 36(%r15),%r8d + addb %dl,%al + movl 52(%rsi),%ebx + addl $3654602809,%r8d + movzbl %al,%eax + addl %r12d,%r8d + movl %edx,48(%rsi) + addb %bl,%cl + roll $4,%r8d + movl %r10d,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r8d,%r12d + addl 48(%r15),%r11d + addb %dl,%bl + movl 56(%rsi),%eax + addl $3873151461,%r11d + movzbl %bl,%ebx + addl %r12d,%r11d + movl %edx,52(%rsi) + addb %al,%cl + roll $11,%r11d + movl %r9d,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %eax,(%rdi,%rcx,4) + xorl %r11d,%r12d + addl 60(%r15),%r10d + addb %dl,%al + movl 60(%rsi),%ebx + addl $530742520,%r10d + movzbl %al,%eax + addl %r12d,%r10d + movl %edx,56(%rsi) + addb %bl,%cl + roll $16,%r10d + movl %r8d,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu 32(%r13),%xmm4 + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %ebx,(%rdi,%rcx,4) + xorl %r10d,%r12d + addl 8(%r15),%r9d + addb %dl,%bl + movl 64(%rsi),%eax + addl $3299628645,%r9d + movzbl %bl,%ebx + addl %r12d,%r9d + movl %edx,60(%rsi) + addb %al,%cl + roll $23,%r9d + movl $-1,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + psllq $8,%xmm1 + pxor %xmm0,%xmm4 + pxor %xmm1,%xmm4 + pxor %xmm0,%xmm0 + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 0(%r15),%r8d + addb %dl,%al + movl 68(%rsi),%ebx + addl $4096336452,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,64(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + movd (%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + pxor %xmm1,%xmm1 + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 28(%r15),%r11d + addb %dl,%bl + movl 72(%rsi),%eax + addl $1126891415,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,68(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + movd (%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 56(%r15),%r10d + addb %dl,%al + movl 76(%rsi),%ebx + addl $2878612391,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,72(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $1,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 20(%r15),%r9d + addb %dl,%bl + movl 80(%rsi),%eax + addl $4237533241,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,76(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $1,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 48(%r15),%r8d + addb %dl,%al + movl 84(%rsi),%ebx + addl $1700485571,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,80(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + pinsrw $2,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 12(%r15),%r11d + addb %dl,%bl + movl 88(%rsi),%eax + addl $2399980690,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,84(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + pinsrw $2,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 40(%r15),%r10d + addb %dl,%al + movl 92(%rsi),%ebx + addl $4293915773,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,88(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $3,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 4(%r15),%r9d + addb %dl,%bl + movl 96(%rsi),%eax + addl $2240044497,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,92(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $3,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 32(%r15),%r8d + addb %dl,%al + movl 100(%rsi),%ebx + addl $1873313359,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,96(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + pinsrw $4,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 60(%r15),%r11d + addb %dl,%bl + movl 104(%rsi),%eax + addl $4264355552,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,100(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + pinsrw $4,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 24(%r15),%r10d + addb %dl,%al + movl 108(%rsi),%ebx + addl $2734768916,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,104(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $5,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 52(%r15),%r9d + addb %dl,%bl + movl 112(%rsi),%eax + addl $1309151649,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,108(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $5,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movl (%rdi,%rcx,4),%edx + xorl %r11d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r9d,%r12d + addl 16(%r15),%r8d + addb %dl,%al + movl 116(%rsi),%ebx + addl $4149444226,%r8d + movzbl %al,%eax + xorl %r10d,%r12d + movl %edx,112(%rsi) + addl %r12d,%r8d + addb %bl,%cl + roll $6,%r8d + movl $-1,%r12d + pinsrw $6,(%rdi,%rax,4),%xmm0 + + addl %r9d,%r8d + movl (%rdi,%rcx,4),%edx + xorl %r10d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r8d,%r12d + addl 44(%r15),%r11d + addb %dl,%bl + movl 120(%rsi),%eax + addl $3174756917,%r11d + movzbl %bl,%ebx + xorl %r9d,%r12d + movl %edx,116(%rsi) + addl %r12d,%r11d + addb %al,%cl + roll $10,%r11d + movl $-1,%r12d + pinsrw $6,(%rdi,%rbx,4),%xmm1 + + addl %r8d,%r11d + movl (%rdi,%rcx,4),%edx + xorl %r9d,%r12d + movl %eax,(%rdi,%rcx,4) + orl %r11d,%r12d + addl 8(%r15),%r10d + addb %dl,%al + movl 124(%rsi),%ebx + addl $718787259,%r10d + movzbl %al,%eax + xorl %r8d,%r12d + movl %edx,120(%rsi) + addl %r12d,%r10d + addb %bl,%cl + roll $15,%r10d + movl $-1,%r12d + pinsrw $7,(%rdi,%rax,4),%xmm0 + + addl %r11d,%r10d + movdqu 48(%r13),%xmm5 + addb $32,%bpl + movl (%rdi,%rcx,4),%edx + xorl %r8d,%r12d + movl %ebx,(%rdi,%rcx,4) + orl %r10d,%r12d + addl 36(%r15),%r9d + addb %dl,%bl + movl 0(%rdi,%rbp,4),%eax + addl $3951481745,%r9d + movzbl %bl,%ebx + xorl %r11d,%r12d + movl %edx,124(%rsi) + addl %r12d,%r9d + addb %al,%cl + roll $21,%r9d + movl $-1,%r12d + pinsrw $7,(%rdi,%rbx,4),%xmm1 + + addl %r10d,%r9d + movq %rbp,%rsi + xorq %rbp,%rbp + movb %sil,%bpl + movq %rcx,%rsi + xorq %rcx,%rcx + movb %sil,%cl + leaq (%rdi,%rbp,4),%rsi + psllq $8,%xmm1 + pxor %xmm0,%xmm5 + pxor %xmm1,%xmm5 + addl 0(%rsp),%r8d + addl 4(%rsp),%r9d + addl 8(%rsp),%r10d + addl 12(%rsp),%r11d + + movdqu %xmm2,(%r14,%r13,1) + movdqu %xmm3,16(%r14,%r13,1) + movdqu %xmm4,32(%r14,%r13,1) + movdqu %xmm5,48(%r14,%r13,1) + leaq 64(%r15),%r15 + leaq 64(%r13),%r13 + cmpq 16(%rsp),%r15 + jb .Loop + + movq 24(%rsp),%r12 + subb %al,%cl + movl %r8d,0(%r12) + movl %r9d,4(%r12) + movl %r10d,8(%r12) + movl %r11d,12(%r12) + subb $1,%bpl + movl %ebp,-8(%rdi) + movl %ecx,-4(%rdi) + + movq 40(%rsp),%r15 +.cfi_restore %r15 + movq 48(%rsp),%r14 +.cfi_restore %r14 + movq 56(%rsp),%r13 +.cfi_restore %r13 + movq 64(%rsp),%r12 +.cfi_restore %r12 + movq 72(%rsp),%rbp +.cfi_restore %rbp + movq 80(%rsp),%rbx +.cfi_restore %rbx + leaq 88(%rsp),%rsp +.cfi_adjust_cfa_offset -88 +.Lepilogue: +.Labort: + .byte 0xf3,0xc3 +.cfi_endproc +.size rc4_md5_enc,.-rc4_md5_enc diff --git a/contrib/openssl-cmake/asm/crypto/rc4/rc4-s390x.S b/contrib/openssl-cmake/asm/crypto/rc4/rc4-s390x.S new file mode 100644 index 000000000000..d88918c0134d --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/rc4/rc4-s390x.S @@ -0,0 +1,240 @@ +.text + +.globl RC4 +.type RC4,@function +.align 64 +RC4: + stmg %r6,%r11,6*8(%r15) + llgc %r6,0(%r2) + llgc %r10,1(%r2) + la %r6,1(%r6) + nill %r6,0xff + srlg %r1,%r3,3 + ltgr %r1,%r1 + llgc %r8,2(%r6,%r2) + jz .Lshort + j .Loop8 + +.align 64 +.Loop8: + la %r10,0(%r10,%r8) # 0 + nill %r10,255 + la %r7,1(%r6) + nill %r7,255 + llgc %r11,2(%r10,%r2) + stc %r8,2(%r10,%r2) + llgc %r9,2(%r7,%r2) + stc %r11,2(%r6,%r2) + cr %r7,%r10 + jne .Lcmov0 + la %r9,0(%r8) +.Lcmov0: + la %r11,0(%r11,%r8) + nill %r11,255 + la %r10,0(%r10,%r9) # 1 + nill %r10,255 + la %r6,1(%r7) + nill %r6,255 + llgc %r0,2(%r11,%r2) + llgc %r11,2(%r10,%r2) + stc %r9,2(%r10,%r2) + llgc %r8,2(%r6,%r2) + stc %r11,2(%r7,%r2) + cr %r6,%r10 + jne .Lcmov1 + la %r8,0(%r9) +.Lcmov1: + la %r11,0(%r11,%r9) + nill %r11,255 + la %r10,0(%r10,%r8) # 2 + nill %r10,255 + la %r7,1(%r6) + nill %r7,255 + sllg %r0,%r0,8 + ic %r0,2(%r11,%r2) + llgc %r11,2(%r10,%r2) + stc %r8,2(%r10,%r2) + llgc %r9,2(%r7,%r2) + stc %r11,2(%r6,%r2) + cr %r7,%r10 + jne .Lcmov2 + la %r9,0(%r8) +.Lcmov2: + la %r11,0(%r11,%r8) + nill %r11,255 + la %r10,0(%r10,%r9) # 3 + nill %r10,255 + la %r6,1(%r7) + nill %r6,255 + sllg %r0,%r0,8 + ic %r0,2(%r11,%r2) + llgc %r11,2(%r10,%r2) + stc %r9,2(%r10,%r2) + llgc %r8,2(%r6,%r2) + stc %r11,2(%r7,%r2) + cr %r6,%r10 + jne .Lcmov3 + la %r8,0(%r9) +.Lcmov3: + la %r11,0(%r11,%r9) + nill %r11,255 + la %r10,0(%r10,%r8) # 4 + nill %r10,255 + la %r7,1(%r6) + nill %r7,255 + sllg %r0,%r0,8 + ic %r0,2(%r11,%r2) + llgc %r11,2(%r10,%r2) + stc %r8,2(%r10,%r2) + llgc %r9,2(%r7,%r2) + stc %r11,2(%r6,%r2) + cr %r7,%r10 + jne .Lcmov4 + la %r9,0(%r8) +.Lcmov4: + la %r11,0(%r11,%r8) + nill %r11,255 + la %r10,0(%r10,%r9) # 5 + nill %r10,255 + la %r6,1(%r7) + nill %r6,255 + sllg %r0,%r0,8 + ic %r0,2(%r11,%r2) + llgc %r11,2(%r10,%r2) + stc %r9,2(%r10,%r2) + llgc %r8,2(%r6,%r2) + stc %r11,2(%r7,%r2) + cr %r6,%r10 + jne .Lcmov5 + la %r8,0(%r9) +.Lcmov5: + la %r11,0(%r11,%r9) + nill %r11,255 + la %r10,0(%r10,%r8) # 6 + nill %r10,255 + la %r7,1(%r6) + nill %r7,255 + sllg %r0,%r0,8 + ic %r0,2(%r11,%r2) + llgc %r11,2(%r10,%r2) + stc %r8,2(%r10,%r2) + llgc %r9,2(%r7,%r2) + stc %r11,2(%r6,%r2) + cr %r7,%r10 + jne .Lcmov6 + la %r9,0(%r8) +.Lcmov6: + la %r11,0(%r11,%r8) + nill %r11,255 + la %r10,0(%r10,%r9) # 7 + nill %r10,255 + la %r6,1(%r7) + nill %r6,255 + sllg %r0,%r0,8 + ic %r0,2(%r11,%r2) + llgc %r11,2(%r10,%r2) + stc %r9,2(%r10,%r2) + llgc %r8,2(%r6,%r2) + stc %r11,2(%r7,%r2) + cr %r6,%r10 + jne .Lcmov7 + la %r8,0(%r9) +.Lcmov7: + la %r11,0(%r11,%r9) + nill %r11,255 + lg %r9,0(%r4) + sllg %r0,%r0,8 + la %r4,8(%r4) + ic %r0,2(%r11,%r2) + xgr %r0,%r9 + stg %r0,0(%r5) + la %r5,8(%r5) + brctg %r1,.Loop8 + +.Lshort: + lghi %r0,7 + ngr %r3,%r0 + jz .Lexit + j .Loop1 + +.align 16 +.Loop1: + la %r10,0(%r10,%r8) + nill %r10,255 + llgc %r11,2(%r10,%r2) + stc %r8,2(%r10,%r2) + stc %r11,2(%r6,%r2) + ar %r11,%r8 + ahi %r6,1 + nill %r11,255 + nill %r6,255 + llgc %r0,0(%r4) + la %r4,1(%r4) + llgc %r11,2(%r11,%r2) + llgc %r8,2(%r6,%r2) + xr %r0,%r11 + stc %r0,0(%r5) + la %r5,1(%r5) + brct %r3,.Loop1 + +.Lexit: + ahi %r6,-1 + stc %r6,0(%r2) + stc %r10,1(%r2) + lmg %r6,%r11,6*8(%r15) + br %r14 +.size RC4,.-RC4 +.string "RC4 for s390x, CRYPTOGAMS by " + +.globl RC4_set_key +.type RC4_set_key,@function +.align 64 +RC4_set_key: + stmg %r6,%r8,6*8(%r15) + lhi %r0,256 + la %r1,0 + sth %r1,0(%r2) +.align 4 +.L1stloop: + stc %r1,2(%r1,%r2) + la %r1,1(%r1) + brct %r0,.L1stloop + + lghi %r7,-256 + lr %r0,%r3 + la %r8,0 + la %r1,0 +.align 16 +.L2ndloop: + llgc %r5,2+256(%r7,%r2) + llgc %r6,0(%r8,%r4) + la %r1,0(%r1,%r5) + la %r7,1(%r7) + la %r1,0(%r1,%r6) + nill %r1,255 + la %r8,1(%r8) + tml %r7,255 + llgc %r6,2(%r1,%r2) + stc %r6,2+256-1(%r7,%r2) + stc %r5,2(%r1,%r2) + jz .Ldone + brct %r0,.L2ndloop + lr %r0,%r3 + la %r8,0 + j .L2ndloop +.Ldone: + lmg %r6,%r8,6*8(%r15) + br %r14 +.size RC4_set_key,.-RC4_set_key + +.globl RC4_options +.type RC4_options,@function +.align 16 +RC4_options: + larl %r2,.Loptions + br %r14 +.size RC4_options,.-RC4_options +.section .rodata +.Loptions: +.align 8 +.string "rc4(8x,char)" diff --git a/contrib/openssl-cmake/asm/crypto/rc4/rc4-x86_64.s b/contrib/openssl-cmake/asm/crypto/rc4/rc4-x86_64.s new file mode 100644 index 000000000000..5ae5dba4cdd8 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/rc4/rc4-x86_64.s @@ -0,0 +1,635 @@ +.text + + +.globl RC4 +.type RC4,@function +.align 16 +RC4: +.cfi_startproc +.byte 243,15,30,250 + orq %rsi,%rsi + jne .Lentry + .byte 0xf3,0xc3 +.Lentry: + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-24 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-32 +.Lprologue: + movq %rsi,%r11 + movq %rdx,%r12 + movq %rcx,%r13 + xorq %r10,%r10 + xorq %rcx,%rcx + + leaq 8(%rdi),%rdi + movb -8(%rdi),%r10b + movb -4(%rdi),%cl + cmpl $-1,256(%rdi) + je .LRC4_CHAR + movl OPENSSL_ia32cap_P(%rip),%r8d + xorq %rbx,%rbx + incb %r10b + subq %r10,%rbx + subq %r12,%r13 + movl (%rdi,%r10,4),%eax + testq $-16,%r11 + jz .Lloop1 + btl $30,%r8d + jc .Lintel + andq $7,%rbx + leaq 1(%r10),%rsi + jz .Loop8 + subq %rbx,%r11 +.Loop8_warmup: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl %edx,(%rdi,%r10,4) + addb %dl,%al + incb %r10b + movl (%rdi,%rax,4),%edx + movl (%rdi,%r10,4),%eax + xorb (%r12),%dl + movb %dl,(%r12,%r13,1) + leaq 1(%r12),%r12 + decq %rbx + jnz .Loop8_warmup + + leaq 1(%r10),%rsi + jmp .Loop8 +.align 16 +.Loop8: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 0(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,0(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl 4(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,4(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 8(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,8(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl 12(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,12(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 16(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,16(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl 20(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,20(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl 24(%rdi,%rsi,4),%ebx + rorq $8,%r8 + movl %edx,24(%rdi,%r10,4) + addb %al,%dl + movb (%rdi,%rdx,4),%r8b + addb $8,%sil + addb %bl,%cl + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + movl -4(%rdi,%rsi,4),%eax + rorq $8,%r8 + movl %edx,28(%rdi,%r10,4) + addb %bl,%dl + movb (%rdi,%rdx,4),%r8b + addb $8,%r10b + rorq $8,%r8 + subq $8,%r11 + + xorq (%r12),%r8 + movq %r8,(%r12,%r13,1) + leaq 8(%r12),%r12 + + testq $-8,%r11 + jnz .Loop8 + cmpq $0,%r11 + jne .Lloop1 + jmp .Lexit + +.align 16 +.Lintel: + testq $-32,%r11 + jz .Lloop1 + andq $15,%rbx + jz .Loop16_is_hot + subq %rbx,%r11 +.Loop16_warmup: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl %edx,(%rdi,%r10,4) + addb %dl,%al + incb %r10b + movl (%rdi,%rax,4),%edx + movl (%rdi,%r10,4),%eax + xorb (%r12),%dl + movb %dl,(%r12,%r13,1) + leaq 1(%r12),%r12 + decq %rbx + jnz .Loop16_warmup + + movq %rcx,%rbx + xorq %rcx,%rcx + movb %bl,%cl + +.Loop16_is_hot: + leaq (%rdi,%r10,4),%rsi + addb %al,%cl + movl (%rdi,%rcx,4),%edx + pxor %xmm0,%xmm0 + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 4(%rsi),%ebx + movzbl %al,%eax + movl %edx,0(%rsi) + addb %bl,%cl + pinsrw $0,(%rdi,%rax,4),%xmm0 + jmp .Loop16_enter +.align 16 +.Loop16: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + pxor %xmm0,%xmm2 + psllq $8,%xmm1 + pxor %xmm0,%xmm0 + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 4(%rsi),%ebx + movzbl %al,%eax + movl %edx,0(%rsi) + pxor %xmm1,%xmm2 + addb %bl,%cl + pinsrw $0,(%rdi,%rax,4),%xmm0 + movdqu %xmm2,(%r12,%r13,1) + leaq 16(%r12),%r12 +.Loop16_enter: + movl (%rdi,%rcx,4),%edx + pxor %xmm1,%xmm1 + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 8(%rsi),%eax + movzbl %bl,%ebx + movl %edx,4(%rsi) + addb %al,%cl + pinsrw $0,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 12(%rsi),%ebx + movzbl %al,%eax + movl %edx,8(%rsi) + addb %bl,%cl + pinsrw $1,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 16(%rsi),%eax + movzbl %bl,%ebx + movl %edx,12(%rsi) + addb %al,%cl + pinsrw $1,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 20(%rsi),%ebx + movzbl %al,%eax + movl %edx,16(%rsi) + addb %bl,%cl + pinsrw $2,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 24(%rsi),%eax + movzbl %bl,%ebx + movl %edx,20(%rsi) + addb %al,%cl + pinsrw $2,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 28(%rsi),%ebx + movzbl %al,%eax + movl %edx,24(%rsi) + addb %bl,%cl + pinsrw $3,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 32(%rsi),%eax + movzbl %bl,%ebx + movl %edx,28(%rsi) + addb %al,%cl + pinsrw $3,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 36(%rsi),%ebx + movzbl %al,%eax + movl %edx,32(%rsi) + addb %bl,%cl + pinsrw $4,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 40(%rsi),%eax + movzbl %bl,%ebx + movl %edx,36(%rsi) + addb %al,%cl + pinsrw $4,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 44(%rsi),%ebx + movzbl %al,%eax + movl %edx,40(%rsi) + addb %bl,%cl + pinsrw $5,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 48(%rsi),%eax + movzbl %bl,%ebx + movl %edx,44(%rsi) + addb %al,%cl + pinsrw $5,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 52(%rsi),%ebx + movzbl %al,%eax + movl %edx,48(%rsi) + addb %bl,%cl + pinsrw $6,(%rdi,%rax,4),%xmm0 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movl 56(%rsi),%eax + movzbl %bl,%ebx + movl %edx,52(%rsi) + addb %al,%cl + pinsrw $6,(%rdi,%rbx,4),%xmm1 + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + addb %dl,%al + movl 60(%rsi),%ebx + movzbl %al,%eax + movl %edx,56(%rsi) + addb %bl,%cl + pinsrw $7,(%rdi,%rax,4),%xmm0 + addb $16,%r10b + movdqu (%r12),%xmm2 + movl (%rdi,%rcx,4),%edx + movl %ebx,(%rdi,%rcx,4) + addb %dl,%bl + movzbl %bl,%ebx + movl %edx,60(%rsi) + leaq (%rdi,%r10,4),%rsi + pinsrw $7,(%rdi,%rbx,4),%xmm1 + movl (%rsi),%eax + movq %rcx,%rbx + xorq %rcx,%rcx + subq $16,%r11 + movb %bl,%cl + testq $-16,%r11 + jnz .Loop16 + + psllq $8,%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm1,%xmm2 + movdqu %xmm2,(%r12,%r13,1) + leaq 16(%r12),%r12 + + cmpq $0,%r11 + jne .Lloop1 + jmp .Lexit + +.align 16 +.Lloop1: + addb %al,%cl + movl (%rdi,%rcx,4),%edx + movl %eax,(%rdi,%rcx,4) + movl %edx,(%rdi,%r10,4) + addb %dl,%al + incb %r10b + movl (%rdi,%rax,4),%edx + movl (%rdi,%r10,4),%eax + xorb (%r12),%dl + movb %dl,(%r12,%r13,1) + leaq 1(%r12),%r12 + decq %r11 + jnz .Lloop1 + jmp .Lexit + +.align 16 +.LRC4_CHAR: + addb $1,%r10b + movzbl (%rdi,%r10,1),%eax + testq $-8,%r11 + jz .Lcloop1 + jmp .Lcloop8 +.align 16 +.Lcloop8: + movl (%r12),%r8d + movl 4(%r12),%r9d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) + jne .Lcmov0 + movq %rax,%rbx +.Lcmov0: + addb %al,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) + jne .Lcmov1 + movq %rbx,%rax +.Lcmov1: + addb %bl,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) + jne .Lcmov2 + movq %rax,%rbx +.Lcmov2: + addb %al,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) + jne .Lcmov3 + movq %rbx,%rax +.Lcmov3: + addb %bl,%dl + xorb (%rdi,%rdx,1),%r8b + rorl $8,%r8d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) + jne .Lcmov4 + movq %rax,%rbx +.Lcmov4: + addb %al,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) + jne .Lcmov5 + movq %rbx,%rax +.Lcmov5: + addb %bl,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + addb %al,%cl + leaq 1(%r10),%rsi + movzbl (%rdi,%rcx,1),%edx + movzbl %sil,%esi + movzbl (%rdi,%rsi,1),%ebx + movb %al,(%rdi,%rcx,1) + cmpq %rsi,%rcx + movb %dl,(%rdi,%r10,1) + jne .Lcmov6 + movq %rax,%rbx +.Lcmov6: + addb %al,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + addb %bl,%cl + leaq 1(%rsi),%r10 + movzbl (%rdi,%rcx,1),%edx + movzbl %r10b,%r10d + movzbl (%rdi,%r10,1),%eax + movb %bl,(%rdi,%rcx,1) + cmpq %r10,%rcx + movb %dl,(%rdi,%rsi,1) + jne .Lcmov7 + movq %rbx,%rax +.Lcmov7: + addb %bl,%dl + xorb (%rdi,%rdx,1),%r9b + rorl $8,%r9d + leaq -8(%r11),%r11 + movl %r8d,(%r13) + leaq 8(%r12),%r12 + movl %r9d,4(%r13) + leaq 8(%r13),%r13 + + testq $-8,%r11 + jnz .Lcloop8 + cmpq $0,%r11 + jne .Lcloop1 + jmp .Lexit +.align 16 +.Lcloop1: + addb %al,%cl + movzbl %cl,%ecx + movzbl (%rdi,%rcx,1),%edx + movb %al,(%rdi,%rcx,1) + movb %dl,(%rdi,%r10,1) + addb %al,%dl + addb $1,%r10b + movzbl %dl,%edx + movzbl %r10b,%r10d + movzbl (%rdi,%rdx,1),%edx + movzbl (%rdi,%r10,1),%eax + xorb (%r12),%dl + leaq 1(%r12),%r12 + movb %dl,(%r13) + leaq 1(%r13),%r13 + subq $1,%r11 + jnz .Lcloop1 + jmp .Lexit + +.align 16 +.Lexit: + subb $1,%r10b + movl %r10d,-8(%rdi) + movl %ecx,-4(%rdi) + + movq (%rsp),%r13 +.cfi_restore %r13 + movq 8(%rsp),%r12 +.cfi_restore %r12 + movq 16(%rsp),%rbx +.cfi_restore %rbx + addq $24,%rsp +.cfi_adjust_cfa_offset -24 +.Lepilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size RC4,.-RC4 +.globl RC4_set_key +.type RC4_set_key,@function +.align 16 +RC4_set_key: +.cfi_startproc +.byte 243,15,30,250 + leaq 8(%rdi),%rdi + leaq (%rdx,%rsi,1),%rdx + negq %rsi + movq %rsi,%rcx + xorl %eax,%eax + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 + + movl OPENSSL_ia32cap_P(%rip),%r8d + btl $20,%r8d + jc .Lc1stloop + jmp .Lw1stloop + +.align 16 +.Lw1stloop: + movl %eax,(%rdi,%rax,4) + addb $1,%al + jnc .Lw1stloop + + xorq %r9,%r9 + xorq %r8,%r8 +.align 16 +.Lw2ndloop: + movl (%rdi,%r9,4),%r10d + addb (%rdx,%rsi,1),%r8b + addb %r10b,%r8b + addq $1,%rsi + movl (%rdi,%r8,4),%r11d + cmovzq %rcx,%rsi + movl %r10d,(%rdi,%r8,4) + movl %r11d,(%rdi,%r9,4) + addb $1,%r9b + jnc .Lw2ndloop + jmp .Lexit_key + +.align 16 +.Lc1stloop: + movb %al,(%rdi,%rax,1) + addb $1,%al + jnc .Lc1stloop + + xorq %r9,%r9 + xorq %r8,%r8 +.align 16 +.Lc2ndloop: + movb (%rdi,%r9,1),%r10b + addb (%rdx,%rsi,1),%r8b + addb %r10b,%r8b + addq $1,%rsi + movb (%rdi,%r8,1),%r11b + jnz .Lcnowrap + movq %rcx,%rsi +.Lcnowrap: + movb %r10b,(%rdi,%r8,1) + movb %r11b,(%rdi,%r9,1) + addb $1,%r9b + jnc .Lc2ndloop + movl $-1,256(%rdi) + +.align 16 +.Lexit_key: + xorl %eax,%eax + movl %eax,-8(%rdi) + movl %eax,-4(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc +.size RC4_set_key,.-RC4_set_key + +.globl RC4_options +.type RC4_options,@function +.align 16 +RC4_options: +.cfi_startproc +.byte 243,15,30,250 + leaq .Lopts(%rip),%rax + movl OPENSSL_ia32cap_P(%rip),%edx + btl $20,%edx + jc .L8xchar + btl $30,%edx + jnc .Ldone + addq $25,%rax + .byte 0xf3,0xc3 +.L8xchar: + addq $12,%rax +.Ldone: + .byte 0xf3,0xc3 +.cfi_endproc +.align 64 +.Lopts: +.byte 114,99,52,40,56,120,44,105,110,116,41,0 +.byte 114,99,52,40,56,120,44,99,104,97,114,41,0 +.byte 114,99,52,40,49,54,120,44,105,110,116,41,0 +.byte 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +.size RC4_options,.-RC4_options diff --git a/contrib/openssl-cmake/asm/crypto/riscv64cpuid.S b/contrib/openssl-cmake/asm/crypto/riscv64cpuid.S new file mode 100644 index 000000000000..cef02fbcf6d7 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/riscv64cpuid.S @@ -0,0 +1,71 @@ +################################################################################ +# int CRYPTO_memcmp(const void * in_a, const void * in_b, size_t len) +################################################################################ +.text +.balign 16 +.globl CRYPTO_memcmp +.type CRYPTO_memcmp,@function +CRYPTO_memcmp: + li t0,0 + beqz a2,2f # len == 0 +1: + lbu t1,0(a0) + lbu t2,0(a1) + addi a0,a0,1 + addi a1,a1,1 + addi a2,a2,-1 + xor t1,t1,t2 + or t0,t0,t1 + bgtz a2,1b +2: + mv a0,t0 + ret +################################################################################ +# void OPENSSL_cleanse(void *ptr, size_t len) +################################################################################ +.text +.balign 16 +.globl OPENSSL_cleanse +.type OPENSSL_cleanse,@function +OPENSSL_cleanse: + beqz a1,2f # len == 0, return + srli t0,a1,4 + bnez t0,3f # len > 15 + +1: # Store <= 15 individual bytes + sb x0,0(a0) + addi a0,a0,1 + addi a1,a1,-1 + bnez a1,1b +2: + ret + +3: # Store individual bytes until we are aligned + andi t0,a0,0x7 + beqz t0,4f + sb x0,0(a0) + addi a0,a0,1 + addi a1,a1,-1 + j 3b + +4: # Store aligned dwords + li t1,8 +4: + sd x0,0(a0) + addi a0,a0,8 + addi a1,a1,-8 + bge a1,t1,4b # if len>=8 loop + bnez a1,1b # if len<8 and len != 0, store remaining bytes + ret +################################################################################ +# size_t riscv_vlen_asm(void) +# Return VLEN (i.e. the length of a vector register in bits). +.p2align 3 +.globl riscv_vlen_asm +.type riscv_vlen_asm,@function +riscv_vlen_asm: + # 0xc22 is CSR vlenb + csrr a0, 0xc22 + slli a0, a0, 3 + ret +.size riscv_vlen_asm,.-riscv_vlen_asm diff --git a/contrib/openssl-cmake/asm/crypto/s390xcpuid.S b/contrib/openssl-cmake/asm/crypto/s390xcpuid.S new file mode 100644 index 000000000000..52aa556b051c --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/s390xcpuid.S @@ -0,0 +1,428 @@ +#include "s390x_arch.h" + +.text + +.globl OPENSSL_s390x_facilities +.type OPENSSL_s390x_facilities,@function +.align 16 +OPENSSL_s390x_facilities: + lghi %r0,0 + larl %r4,OPENSSL_s390xcap_P + + stg %r0,S390X_STFLE+8(%r4) # wipe capability vectors + stg %r0,S390X_STFLE+16(%r4) + stg %r0,S390X_STFLE+24(%r4) + + .long 0xb2b04000 # stfle 0(%r4) + brc 8,.Ldone + lghi %r0,1 + .long 0xb2b04000 # stfle 0(%r4) + brc 8,.Ldone + lghi %r0,2 + .long 0xb2b04000 # stfle 0(%r4) +.Ldone: + br %r14 +.size OPENSSL_s390x_facilities,.-OPENSSL_s390x_facilities + +.globl OPENSSL_s390x_functions +.type OPENSSL_s390x_functions,@function +.align 16 +OPENSSL_s390x_functions: + lghi %r0,0 + larl %r4,OPENSSL_s390xcap_P + + stg %r0,S390X_KIMD(%r4) # wipe capability vectors + stg %r0,S390X_KIMD+8(%r4) + stg %r0,S390X_KLMD(%r4) + stg %r0,S390X_KLMD+8(%r4) + stg %r0,S390X_KM(%r4) + stg %r0,S390X_KM+8(%r4) + stg %r0,S390X_KMC(%r4) + stg %r0,S390X_KMC+8(%r4) + stg %r0,S390X_KMAC(%r4) + stg %r0,S390X_KMAC+8(%r4) + stg %r0,S390X_KMCTR(%r4) + stg %r0,S390X_KMCTR+8(%r4) + stg %r0,S390X_KMO(%r4) + stg %r0,S390X_KMO+8(%r4) + stg %r0,S390X_KMF(%r4) + stg %r0,S390X_KMF+8(%r4) + stg %r0,S390X_PRNO(%r4) + stg %r0,S390X_PRNO+8(%r4) + stg %r0,S390X_KMA(%r4) + stg %r0,S390X_KMA+8(%r4) + stg %r0,S390X_PCC(%r4) + stg %r0,S390X_PCC+8(%r4) + stg %r0,S390X_KDSA(%r4) + stg %r0,S390X_KDSA+8(%r4) + + lmg %r2,%r3,S390X_STFLE(%r4) + + tmhl %r2,0x4000 # check for message-security-assist + jz .Lret + + lghi %r0,S390X_QUERY # query kimd capabilities + la %r1,S390X_KIMD(%r4) + .long 0xb93e0002 # kimd %r0,%r2 + + lghi %r0,S390X_QUERY # query klmd capabilities + la %r1,S390X_KLMD(%r4) + .long 0xb93f0002 # klmd %r0,%r2 + + lghi %r0,S390X_QUERY # query km capability vector + la %r1,S390X_KM(%r4) + .long 0xb92e0042 # km %r4,%r2 + + lghi %r0,S390X_QUERY # query kmc capability vector + la %r1,S390X_KMC(%r4) + .long 0xb92f0042 # kmc %r4,%r2 + + lghi %r0,S390X_QUERY # query kmac capability vector + la %r1,S390X_KMAC(%r4) + .long 0xb91e0042 # kmac %r4,%r2 + + tmhh %r3,0x0008 # check for message-security-assist-3 + jz .Lret + + lghi %r0,S390X_QUERY # query pcc capability vector + la %r1,S390X_PCC(%r4) + .long 0xb92c0000 # pcc + + tmhh %r3,0x0004 # check for message-security-assist-4 + jz .Lret + + lghi %r0,S390X_QUERY # query kmctr capability vector + la %r1,S390X_KMCTR(%r4) + .long 0xb92d2042 # kmctr %r4,%r2,%r2 + + lghi %r0,S390X_QUERY # query kmo capability vector + la %r1,S390X_KMO(%r4) + .long 0xb92b0042 # kmo %r4,%r2 + + lghi %r0,S390X_QUERY # query kmf capability vector + la %r1,S390X_KMF(%r4) + .long 0xb92a0042 # kmf %r4,%r2 + + tml %r2,0x40 # check for message-security-assist-5 + jz .Lret + + lghi %r0,S390X_QUERY # query prno capability vector + la %r1,S390X_PRNO(%r4) + .long 0xb93c0042 # prno %r4,%r2 + + lg %r2,S390X_STFLE+16(%r4) + + tmhl %r2,0x2000 # check for message-security-assist-8 + jz .Lret + + lghi %r0,S390X_QUERY # query kma capability vector + la %r1,S390X_KMA(%r4) + .long 0xb9294022 # kma %r2,%r4,%r2 + + tmhl %r2,0x0010 # check for message-security-assist-9 + jz .Lret + + lghi %r0,S390X_QUERY # query kdsa capability vector + la %r1,S390X_KDSA(%r4) + .long 0xb93a0002 # kdsa %r0,%r2 + +.Lret: + br %r14 +.size OPENSSL_s390x_functions,.-OPENSSL_s390x_functions + +.globl OPENSSL_rdtsc +.type OPENSSL_rdtsc,@function +.align 16 +OPENSSL_rdtsc: + larl %r4,OPENSSL_s390xcap_P + tm S390X_STFLE+3(%r4),0x40 # check for store-clock-fast facility + jz .Lstck + + .long 0xb27cf010 # stckf 16(%r15) + lg %r2,16(%r15) + br %r14 +.Lstck: + stck 16(%r15) + lg %r2,16(%r15) + br %r14 +.size OPENSSL_rdtsc,.-OPENSSL_rdtsc + +.globl OPENSSL_atomic_add +.type OPENSSL_atomic_add,@function +.align 16 +OPENSSL_atomic_add: + l %r1,0(%r2) +.Lspin: lr %r0,%r1 + ar %r0,%r3 + cs %r1,%r0,0(%r2) + brc 4,.Lspin + lgfr %r2,%r0 # OpenSSL expects the new value + br %r14 +.size OPENSSL_atomic_add,.-OPENSSL_atomic_add + +.globl OPENSSL_wipe_cpu +.type OPENSSL_wipe_cpu,@function +.align 16 +OPENSSL_wipe_cpu: + xgr %r0,%r0 + xgr %r1,%r1 + lgr %r2,%r15 + xgr %r3,%r3 + xgr %r4,%r4 + lzdr %f0 + lzdr %f1 + lzdr %f2 + lzdr %f3 + lzdr %f4 + lzdr %f5 + lzdr %f6 + lzdr %f7 + br %r14 +.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu + +.globl OPENSSL_cleanse +.type OPENSSL_cleanse,@function +.align 16 +OPENSSL_cleanse: +#if !defined(__s390x__) && !defined(__s390x) + llgfr %r3,%r3 +#endif + lghi %r4,15 + lghi %r0,0 + clgr %r3,%r4 + jh .Lot + clgr %r3,%r0 + bcr 8,%r14 +.Little: + stc %r0,0(%r2) + la %r2,1(%r2) + brctg %r3,.Little + br %r14 +.align 4 +.Lot: tmll %r2,7 + jz .Laligned + stc %r0,0(%r2) + la %r2,1(%r2) + brctg %r3,.Lot +.Laligned: + srlg %r4,%r3,3 +.Loop: stg %r0,0(%r2) + la %r2,8(%r2) + brctg %r4,.Loop + lghi %r4,7 + ngr %r3,%r4 + jnz .Little + br %r14 +.size OPENSSL_cleanse,.-OPENSSL_cleanse + +.globl CRYPTO_memcmp +.type CRYPTO_memcmp,@function +.align 16 +CRYPTO_memcmp: +#if !defined(__s390x__) && !defined(__s390x) + llgfr %r4,%r4 +#endif + lghi %r5,0 + clgr %r4,%r5 + je .Lno_data + +.Loop_cmp: + llgc %r0,0(%r2) + la %r2,1(%r2) + llgc %r1,0(%r3) + la %r3,1(%r3) + xr %r1,%r0 + or %r5,%r1 + brctg %r4,.Loop_cmp + + lnr %r5,%r5 + srl %r5,31 +.Lno_data: + lgr %r2,%r5 + br %r14 +.size CRYPTO_memcmp,.-CRYPTO_memcmp + +.globl OPENSSL_instrument_bus +.type OPENSSL_instrument_bus,@function +.align 16 +OPENSSL_instrument_bus: + lghi %r2,0 + br %r14 +.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus + +.globl OPENSSL_instrument_bus2 +.type OPENSSL_instrument_bus2,@function +.align 16 +OPENSSL_instrument_bus2: + lghi %r2,0 + br %r14 +.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2 + +.globl OPENSSL_vx_probe +.type OPENSSL_vx_probe,@function +.align 16 +OPENSSL_vx_probe: + .word 0xe700,0x0000,0x0044 # vzero %v0 + br %r14 +.size OPENSSL_vx_probe,.-OPENSSL_vx_probe +.globl s390x_kimd +.type s390x_kimd,@function +.align 16 +s390x_kimd: + llgfr %r0,%r4 + lgr %r1,%r5 + + .long 0xb93e8002 # kimd %r0,%r2[,M3] + brc 1,.-4 # pay attention to "partial completion" + + br %r14 +.size s390x_kimd,.-s390x_kimd +.globl s390x_klmd +.type s390x_klmd,@function +.align 32 +s390x_klmd: + llgfr %r0,%r6 + lg %r1,160(%r15) + + .long 0xb93f8042 # klmd %r4,%r2[,M3] + brc 1,.-4 # pay attention to "partial completion" + + br %r14 +.size s390x_klmd,.-s390x_klmd +.globl s390x_km +.type s390x_km,@function +.align 16 +s390x_km: + lr %r0,%r5 + lgr %r1,%r6 + + .long 0xb92e0042 # km %r4,%r2 + brc 1,.-4 # pay attention to "partial completion" + + br %r14 +.size s390x_km,.-s390x_km +.globl s390x_kmac +.type s390x_kmac,@function +.align 16 +s390x_kmac: + lr %r0,%r4 + lgr %r1,%r5 + + .long 0xb91e0002 # kmac %r0,%r2 + brc 1,.-4 # pay attention to "partial completion" + + br %r14 +.size s390x_kmac,.-s390x_kmac +.globl s390x_kmo +.type s390x_kmo,@function +.align 16 +s390x_kmo: + lr %r0,%r5 + lgr %r1,%r6 + + .long 0xb92b0042 # kmo %r4,%r2 + brc 1,.-4 # pay attention to "partial completion" + + br %r14 +.size s390x_kmo,.-s390x_kmo +.globl s390x_kmf +.type s390x_kmf,@function +.align 16 +s390x_kmf: + lr %r0,%r5 + lgr %r1,%r6 + + .long 0xb92a0042 # kmf %r4,%r2 + brc 1,.-4 # pay attention to "partial completion" + + br %r14 +.size s390x_kmf,.-s390x_kmf +.globl s390x_kma +.type s390x_kma,@function +.align 16 +s390x_kma: + stg %r6,6*8(%r15) + lmg %r0,%r1,160(%r15) + + .long 0xb9292064 # kma %r6,%r2,%r4 + brc 1,.-4 # pay attention to "partial completion" + + lg %r6,6*8(%r15) + br %r14 +.size s390x_kma,.-s390x_kma +.globl s390x_pcc +.type s390x_pcc,@function +.align 16 +s390x_pcc: + lr %r0,%r2 + lgr %r1,%r3 + lhi %r2,0 + + .long 0xb92c0000 # pcc + brc 1,.-4 # pay attention to "partial completion" + brc 7,.Lpcc_err # if CC==0 return 0, else return 1 +.Lpcc_out: + br %r14 +.Lpcc_err: + lhi %r2,1 + j .Lpcc_out +.size s390x_pcc,.-s390x_pcc +.globl s390x_kdsa +.type s390x_kdsa,@function +.align 16 +s390x_kdsa: + lr %r0,%r2 + lgr %r1,%r3 + lhi %r2,0 + + .long 0xb93a0004 # kdsa %r0,%r4 + brc 1,.-4 # pay attention to "partial completion" + brc 7,.Lkdsa_err # if CC==0 return 0, else return 1 +.Lkdsa_out: + br %r14 +.Lkdsa_err: + lhi %r2,1 + j .Lkdsa_out +.size s390x_kdsa,.-s390x_kdsa +.globl s390x_flip_endian32 +.type s390x_flip_endian32,@function +.align 16 +s390x_flip_endian32: + lrvg %r0,0(%r3) + lrvg %r1,8(%r3) + lrvg %r4,16(%r3) + lrvg %r5,24(%r3) + stg %r0,24(%r2) + stg %r1,16(%r2) + stg %r4,8(%r2) + stg %r5,0(%r2) + br %r14 +.size s390x_flip_endian32,.-s390x_flip_endian32 +.globl s390x_flip_endian64 +.type s390x_flip_endian64,@function +.align 16 +s390x_flip_endian64: + stmg %r6,%r9,6*8(%r15) + + lrvg %r0,0(%r3) + lrvg %r1,8(%r3) + lrvg %r4,16(%r3) + lrvg %r5,24(%r3) + lrvg %r6,32(%r3) + lrvg %r7,40(%r3) + lrvg %r8,48(%r3) + lrvg %r9,56(%r3) + stg %r0,56(%r2) + stg %r1,48(%r2) + stg %r4,40(%r2) + stg %r5,32(%r2) + stg %r6,24(%r2) + stg %r7,16(%r2) + stg %r8,8(%r2) + stg %r9,0(%r2) + + lmg %r6,%r9,6*8(%r15) + br %r14 +.size s390x_flip_endian64,.-s390x_flip_endian64 +.section .init + brasl %r14,OPENSSL_cpuid_setup diff --git a/contrib/openssl-cmake/asm/crypto/sha/keccak1600-armv8.S b/contrib/openssl-cmake/asm/crypto/sha/keccak1600-armv8.S new file mode 100644 index 000000000000..8b6b5d0b799f --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/sha/keccak1600-armv8.S @@ -0,0 +1,1017 @@ +#include "arm_arch.h" + +.section .rodata + +.align 8 // strategic alignment and padding that allows to use + // address value as loop termination condition... +.quad 0,0,0,0,0,0,0,0 +.type iotas,%object +iotas: +.quad 0x0000000000000001 +.quad 0x0000000000008082 +.quad 0x800000000000808a +.quad 0x8000000080008000 +.quad 0x000000000000808b +.quad 0x0000000080000001 +.quad 0x8000000080008081 +.quad 0x8000000000008009 +.quad 0x000000000000008a +.quad 0x0000000000000088 +.quad 0x0000000080008009 +.quad 0x000000008000000a +.quad 0x000000008000808b +.quad 0x800000000000008b +.quad 0x8000000000008089 +.quad 0x8000000000008003 +.quad 0x8000000000008002 +.quad 0x8000000000000080 +.quad 0x000000000000800a +.quad 0x800000008000000a +.quad 0x8000000080008081 +.quad 0x8000000000008080 +.quad 0x0000000080000001 +.quad 0x8000000080008008 +.size iotas,.-iotas +.text + +.type KeccakF1600_int,%function +.align 5 +KeccakF1600_int: + AARCH64_SIGN_LINK_REGISTER + adrp x28,iotas + add x28,x28,#:lo12:iotas + stp x28,x30,[sp,#16] // 32 bytes on top are mine + b .Loop +.align 4 +.Loop: + ////////////////////////////////////////// Theta + eor x26,x0,x5 + stp x4,x9,[sp,#0] // offload pair... + eor x27,x1,x6 + eor x28,x2,x7 + eor x30,x3,x8 + eor x4,x4,x9 + eor x26,x26,x10 + eor x27,x27,x11 + eor x28,x28,x12 + eor x30,x30,x13 + eor x4,x4,x14 + eor x26,x26,x15 + eor x27,x27,x16 + eor x28,x28,x17 + eor x30,x30,x25 + eor x4,x4,x19 + eor x26,x26,x20 + eor x28,x28,x22 + eor x27,x27,x21 + eor x30,x30,x23 + eor x4,x4,x24 + + eor x9,x26,x28,ror#63 + + eor x1,x1,x9 + eor x6,x6,x9 + eor x11,x11,x9 + eor x16,x16,x9 + eor x21,x21,x9 + + eor x9,x27,x30,ror#63 + eor x28,x28,x4,ror#63 + eor x30,x30,x26,ror#63 + eor x4,x4,x27,ror#63 + + eor x27, x2,x9 // mov x27,x2 + eor x7,x7,x9 + eor x12,x12,x9 + eor x17,x17,x9 + eor x22,x22,x9 + + eor x0,x0,x4 + eor x5,x5,x4 + eor x10,x10,x4 + eor x15,x15,x4 + eor x20,x20,x4 + ldp x4,x9,[sp,#0] // re-load offloaded data + eor x26, x3,x28 // mov x26,x3 + eor x8,x8,x28 + eor x13,x13,x28 + eor x25,x25,x28 + eor x23,x23,x28 + + eor x28, x4,x30 // mov x28,x4 + eor x9,x9,x30 + eor x14,x14,x30 + eor x19,x19,x30 + eor x24,x24,x30 + + ////////////////////////////////////////// Rho+Pi + mov x30,x1 + ror x1,x6,#64-44 + //mov x27,x2 + ror x2,x12,#64-43 + //mov x26,x3 + ror x3,x25,#64-21 + //mov x28,x4 + ror x4,x24,#64-14 + + ror x6,x9,#64-20 + ror x12,x13,#64-25 + ror x25,x17,#64-15 + ror x24,x21,#64-2 + + ror x9,x22,#64-61 + ror x13,x19,#64-8 + ror x17,x11,#64-10 + ror x21,x8,#64-55 + + ror x22,x14,#64-39 + ror x19,x23,#64-56 + ror x11,x7,#64-6 + ror x8,x16,#64-45 + + ror x14,x20,#64-18 + ror x23,x15,#64-41 + ror x7,x10,#64-3 + ror x16,x5,#64-36 + + ror x5,x26,#64-28 + ror x10,x30,#64-1 + ror x15,x28,#64-27 + ror x20,x27,#64-62 + + ////////////////////////////////////////// Chi+Iota + bic x26,x2,x1 + bic x27,x3,x2 + bic x28,x0,x4 + bic x30,x1,x0 + eor x0,x0,x26 + bic x26,x4,x3 + eor x1,x1,x27 + ldr x27,[sp,#16] + eor x3,x3,x28 + eor x4,x4,x30 + eor x2,x2,x26 + ldr x30,[x27],#8 // Iota[i++] + + bic x26,x7,x6 + tst x27,#255 // are we done? + str x27,[sp,#16] + bic x27,x8,x7 + bic x28,x5,x9 + eor x0,x0,x30 // A[0][0] ^= Iota + bic x30,x6,x5 + eor x5,x5,x26 + bic x26,x9,x8 + eor x6,x6,x27 + eor x8,x8,x28 + eor x9,x9,x30 + eor x7,x7,x26 + + bic x26,x12,x11 + bic x27,x13,x12 + bic x28,x10,x14 + bic x30,x11,x10 + eor x10,x10,x26 + bic x26,x14,x13 + eor x11,x11,x27 + eor x13,x13,x28 + eor x14,x14,x30 + eor x12,x12,x26 + + bic x26,x17,x16 + bic x27,x25,x17 + bic x28,x15,x19 + bic x30,x16,x15 + eor x15,x15,x26 + bic x26,x19,x25 + eor x16,x16,x27 + eor x25,x25,x28 + eor x19,x19,x30 + eor x17,x17,x26 + + bic x26,x22,x21 + bic x27,x23,x22 + bic x28,x20,x24 + bic x30,x21,x20 + eor x20,x20,x26 + bic x26,x24,x23 + eor x21,x21,x27 + eor x23,x23,x28 + eor x24,x24,x30 + eor x22,x22,x26 + + bne .Loop + + ldr x30,[sp,#24] + AARCH64_VALIDATE_LINK_REGISTER + ret +.size KeccakF1600_int,.-KeccakF1600_int + +.type KeccakF1600,%function +.align 5 +KeccakF1600: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#48 + + str x0,[sp,#32] // offload argument + mov x26,x0 + ldp x0,x1,[x0,#16*0] + ldp x2,x3,[x26,#16*1] + ldp x4,x5,[x26,#16*2] + ldp x6,x7,[x26,#16*3] + ldp x8,x9,[x26,#16*4] + ldp x10,x11,[x26,#16*5] + ldp x12,x13,[x26,#16*6] + ldp x14,x15,[x26,#16*7] + ldp x16,x17,[x26,#16*8] + ldp x25,x19,[x26,#16*9] + ldp x20,x21,[x26,#16*10] + ldp x22,x23,[x26,#16*11] + ldr x24,[x26,#16*12] + + bl KeccakF1600_int + + ldr x26,[sp,#32] + stp x0,x1,[x26,#16*0] + stp x2,x3,[x26,#16*1] + stp x4,x5,[x26,#16*2] + stp x6,x7,[x26,#16*3] + stp x8,x9,[x26,#16*4] + stp x10,x11,[x26,#16*5] + stp x12,x13,[x26,#16*6] + stp x14,x15,[x26,#16*7] + stp x16,x17,[x26,#16*8] + stp x25,x19,[x26,#16*9] + stp x20,x21,[x26,#16*10] + stp x22,x23,[x26,#16*11] + str x24,[x26,#16*12] + + ldp x19,x20,[x29,#16] + add sp,sp,#48 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size KeccakF1600,.-KeccakF1600 + +.globl SHA3_absorb +.type SHA3_absorb,%function +.align 5 +SHA3_absorb: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#64 + + stp x0,x1,[sp,#32] // offload arguments + stp x2,x3,[sp,#48] + + mov x26,x0 // uint64_t A[5][5] + mov x27,x1 // const void *inp + mov x28,x2 // size_t len + mov x30,x3 // size_t bsz + ldp x0,x1,[x26,#16*0] + ldp x2,x3,[x26,#16*1] + ldp x4,x5,[x26,#16*2] + ldp x6,x7,[x26,#16*3] + ldp x8,x9,[x26,#16*4] + ldp x10,x11,[x26,#16*5] + ldp x12,x13,[x26,#16*6] + ldp x14,x15,[x26,#16*7] + ldp x16,x17,[x26,#16*8] + ldp x25,x19,[x26,#16*9] + ldp x20,x21,[x26,#16*10] + ldp x22,x23,[x26,#16*11] + ldr x24,[x26,#16*12] + b .Loop_absorb + +.align 4 +.Loop_absorb: + subs x26,x28,x30 // len - bsz + blo .Labsorbed + + str x26,[sp,#48] // save len - bsz + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x0,x0,x26 + cmp x30,#8*(0+2) + blo .Lprocess_block + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x1,x1,x26 + beq .Lprocess_block + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x2,x2,x26 + cmp x30,#8*(2+2) + blo .Lprocess_block + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x3,x3,x26 + beq .Lprocess_block + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x4,x4,x26 + cmp x30,#8*(4+2) + blo .Lprocess_block + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x5,x5,x26 + beq .Lprocess_block + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x6,x6,x26 + cmp x30,#8*(6+2) + blo .Lprocess_block + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x7,x7,x26 + beq .Lprocess_block + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x8,x8,x26 + cmp x30,#8*(8+2) + blo .Lprocess_block + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x9,x9,x26 + beq .Lprocess_block + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x10,x10,x26 + cmp x30,#8*(10+2) + blo .Lprocess_block + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x11,x11,x26 + beq .Lprocess_block + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x12,x12,x26 + cmp x30,#8*(12+2) + blo .Lprocess_block + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x13,x13,x26 + beq .Lprocess_block + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x14,x14,x26 + cmp x30,#8*(14+2) + blo .Lprocess_block + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x15,x15,x26 + beq .Lprocess_block + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x16,x16,x26 + cmp x30,#8*(16+2) + blo .Lprocess_block + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x17,x17,x26 + beq .Lprocess_block + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x25,x25,x26 + cmp x30,#8*(18+2) + blo .Lprocess_block + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x19,x19,x26 + beq .Lprocess_block + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x20,x20,x26 + cmp x30,#8*(20+2) + blo .Lprocess_block + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x21,x21,x26 + beq .Lprocess_block + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x22,x22,x26 + cmp x30,#8*(22+2) + blo .Lprocess_block + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x23,x23,x26 + beq .Lprocess_block + ldr x26,[x27],#8 // *inp++ +#ifdef __AARCH64EB__ + rev x26,x26 +#endif + eor x24,x24,x26 + +.Lprocess_block: + str x27,[sp,#40] // save inp + + bl KeccakF1600_int + + ldr x27,[sp,#40] // restore arguments + ldp x28,x30,[sp,#48] + b .Loop_absorb + +.align 4 +.Labsorbed: + ldr x27,[sp,#32] + stp x0,x1,[x27,#16*0] + stp x2,x3,[x27,#16*1] + stp x4,x5,[x27,#16*2] + stp x6,x7,[x27,#16*3] + stp x8,x9,[x27,#16*4] + stp x10,x11,[x27,#16*5] + stp x12,x13,[x27,#16*6] + stp x14,x15,[x27,#16*7] + stp x16,x17,[x27,#16*8] + stp x25,x19,[x27,#16*9] + stp x20,x21,[x27,#16*10] + stp x22,x23,[x27,#16*11] + str x24,[x27,#16*12] + + mov x0,x28 // return value + ldp x19,x20,[x29,#16] + add sp,sp,#64 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size SHA3_absorb,.-SHA3_absorb +.globl SHA3_squeeze +.type SHA3_squeeze,%function +.align 5 +SHA3_squeeze: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-48]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + + mov x19,x0 // put aside arguments + mov x20,x1 + mov x21,x2 + mov x22,x3 + cmp w4, #0 // w4 = 'next' argument + bne .Lnext_block + +.Loop_squeeze: + ldr x4,[x0],#8 + cmp x21,#8 + blo .Lsqueeze_tail +#ifdef __AARCH64EB__ + rev x4,x4 +#endif + str x4,[x20],#8 + subs x21,x21,#8 + beq .Lsqueeze_done + + subs x3,x3,#8 + bhi .Loop_squeeze +.Lnext_block: + mov x0,x19 + bl KeccakF1600 + mov x0,x19 + mov x3,x22 + b .Loop_squeeze + +.align 4 +.Lsqueeze_tail: + strb w4,[x20],#1 + lsr x4,x4,#8 + subs x21,x21,#1 + beq .Lsqueeze_done + strb w4,[x20],#1 + lsr x4,x4,#8 + subs x21,x21,#1 + beq .Lsqueeze_done + strb w4,[x20],#1 + lsr x4,x4,#8 + subs x21,x21,#1 + beq .Lsqueeze_done + strb w4,[x20],#1 + lsr x4,x4,#8 + subs x21,x21,#1 + beq .Lsqueeze_done + strb w4,[x20],#1 + lsr x4,x4,#8 + subs x21,x21,#1 + beq .Lsqueeze_done + strb w4,[x20],#1 + lsr x4,x4,#8 + subs x21,x21,#1 + beq .Lsqueeze_done + strb w4,[x20],#1 + +.Lsqueeze_done: + ldp x19,x20,[sp,#16] + ldp x21,x22,[sp,#32] + ldp x29,x30,[sp],#48 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size SHA3_squeeze,.-SHA3_squeeze +.type KeccakF1600_ce,%function +.align 5 +KeccakF1600_ce: + mov x9,#24 + adrp x10,iotas + add x10,x10,#:lo12:iotas + b .Loop_ce +.align 4 +.Loop_ce: + ////////////////////////////////////////////////// Theta +.inst 0xce0f2a99 //eor3 v25.16b,v20.16b,v15.16b,v10.16b +.inst 0xce102eba //eor3 v26.16b,v21.16b,v16.16b,v11.16b +.inst 0xce1132db //eor3 v27.16b,v22.16b,v17.16b,v12.16b +.inst 0xce1236fc //eor3 v28.16b,v23.16b,v18.16b,v13.16b +.inst 0xce133b1d //eor3 v29.16b,v24.16b,v19.16b,v14.16b +.inst 0xce050339 //eor3 v25.16b,v25.16b, v5.16b,v0.16b +.inst 0xce06075a //eor3 v26.16b,v26.16b, v6.16b,v1.16b +.inst 0xce070b7b //eor3 v27.16b,v27.16b, v7.16b,v2.16b +.inst 0xce080f9c //eor3 v28.16b,v28.16b, v8.16b,v3.16b +.inst 0xce0913bd //eor3 v29.16b,v29.16b, v9.16b,v4.16b + +.inst 0xce7b8f3e //rax1 v30.16b,v25.16b,v27.16b // D[1] +.inst 0xce7c8f5f //rax1 v31.16b,v26.16b,v28.16b // D[2] +.inst 0xce7d8f7b //rax1 v27.16b,v27.16b,v29.16b // D[3] +.inst 0xce798f9c //rax1 v28.16b,v28.16b,v25.16b // D[4] +.inst 0xce7a8fbd //rax1 v29.16b,v29.16b,v26.16b // D[0] + + ////////////////////////////////////////////////// Theta+Rho+Pi +.inst 0xce9efc39 //xar v25.16b, v1.16b,v30.16b,#64-1 // C[0]=A[2][0] + +.inst 0xce9e50c1 //xar v1.16b,v6.16b,v30.16b,#64-44 +.inst 0xce9cb126 //xar v6.16b,v9.16b,v28.16b,#64-20 +.inst 0xce9f0ec9 //xar v9.16b,v22.16b,v31.16b,#64-61 +.inst 0xce9c65d6 //xar v22.16b,v14.16b,v28.16b,#64-39 +.inst 0xce9dba8e //xar v14.16b,v20.16b,v29.16b,#64-18 + +.inst 0xce9f085a //xar v26.16b, v2.16b,v31.16b,#64-62 // C[1]=A[4][0] + +.inst 0xce9f5582 //xar v2.16b,v12.16b,v31.16b,#64-43 +.inst 0xce9b9dac //xar v12.16b,v13.16b,v27.16b,#64-25 +.inst 0xce9ce26d //xar v13.16b,v19.16b,v28.16b,#64-8 +.inst 0xce9b22f3 //xar v19.16b,v23.16b,v27.16b,#64-56 +.inst 0xce9d5df7 //xar v23.16b,v15.16b,v29.16b,#64-41 + +.inst 0xce9c948f //xar v15.16b,v4.16b,v28.16b,#64-27 + +.inst 0xce9ccb1c //xar v28.16b, v24.16b,v28.16b,#64-14 // D[4]=A[0][4] +.inst 0xce9efab8 //xar v24.16b,v21.16b,v30.16b,#64-2 +.inst 0xce9b2508 //xar v8.16b,v8.16b,v27.16b,#64-55 // A[1][3]=A[4][1] +.inst 0xce9e4e04 //xar v4.16b,v16.16b,v30.16b,#64-45 // A[0][4]=A[1][3] +.inst 0xce9d70b0 //xar v16.16b,v5.16b,v29.16b,#64-36 + +.inst 0xce9b9065 //xar v5.16b,v3.16b,v27.16b,#64-28 + + eor v0.16b,v0.16b,v29.16b + +.inst 0xce9bae5b //xar v27.16b, v18.16b,v27.16b,#64-21 // D[3]=A[0][3] +.inst 0xce9fc623 //xar v3.16b,v17.16b,v31.16b,#64-15 // A[0][3]=A[3][3] +.inst 0xce9ed97e //xar v30.16b, v11.16b,v30.16b,#64-10 // D[1]=A[3][2] +.inst 0xce9fe8ff //xar v31.16b, v7.16b,v31.16b,#64-6 // D[2]=A[2][1] +.inst 0xce9df55d //xar v29.16b, v10.16b,v29.16b,#64-3 // D[0]=A[1][2] + + ////////////////////////////////////////////////// Chi+Iota +.inst 0xce362354 //bcax v20.16b,v26.16b, v22.16b,v8.16b // A[1][3]=A[4][1] +.inst 0xce375915 //bcax v21.16b,v8.16b,v23.16b,v22.16b // A[1][3]=A[4][1] +.inst 0xce385ed6 //bcax v22.16b,v22.16b,v24.16b,v23.16b +.inst 0xce3a62f7 //bcax v23.16b,v23.16b,v26.16b, v24.16b +.inst 0xce286b18 //bcax v24.16b,v24.16b,v8.16b,v26.16b // A[1][3]=A[4][1] + + ld1r {v26.2d},[x10],#8 + +.inst 0xce330fd1 //bcax v17.16b,v30.16b, v19.16b,v3.16b // A[0][3]=A[3][3] +.inst 0xce2f4c72 //bcax v18.16b,v3.16b,v15.16b,v19.16b // A[0][3]=A[3][3] +.inst 0xce303e73 //bcax v19.16b,v19.16b,v16.16b,v15.16b +.inst 0xce3e41ef //bcax v15.16b,v15.16b,v30.16b, v16.16b +.inst 0xce237a10 //bcax v16.16b,v16.16b,v3.16b,v30.16b // A[0][3]=A[3][3] + +.inst 0xce2c7f2a //bcax v10.16b,v25.16b, v12.16b,v31.16b +.inst 0xce2d33eb //bcax v11.16b,v31.16b, v13.16b,v12.16b +.inst 0xce2e358c //bcax v12.16b,v12.16b,v14.16b,v13.16b +.inst 0xce3939ad //bcax v13.16b,v13.16b,v25.16b, v14.16b +.inst 0xce3f65ce //bcax v14.16b,v14.16b,v31.16b, v25.16b + +.inst 0xce2913a7 //bcax v7.16b,v29.16b, v9.16b,v4.16b // A[0][4]=A[1][3] +.inst 0xce252488 //bcax v8.16b,v4.16b,v5.16b,v9.16b // A[0][4]=A[1][3] +.inst 0xce261529 //bcax v9.16b,v9.16b,v6.16b,v5.16b +.inst 0xce3d18a5 //bcax v5.16b,v5.16b,v29.16b, v6.16b +.inst 0xce2474c6 //bcax v6.16b,v6.16b,v4.16b,v29.16b // A[0][4]=A[1][3] + +.inst 0xce207363 //bcax v3.16b,v27.16b, v0.16b,v28.16b +.inst 0xce210384 //bcax v4.16b,v28.16b, v1.16b,v0.16b +.inst 0xce220400 //bcax v0.16b,v0.16b,v2.16b,v1.16b +.inst 0xce3b0821 //bcax v1.16b,v1.16b,v27.16b, v2.16b +.inst 0xce3c6c42 //bcax v2.16b,v2.16b,v28.16b, v27.16b + + eor v0.16b,v0.16b,v26.16b + + subs x9,x9,#1 + bne .Loop_ce + + ret +.size KeccakF1600_ce,.-KeccakF1600_ce + +.type KeccakF1600_cext,%function +.align 5 +KeccakF1600_cext: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-80]! + add x29,sp,#0 + stp d8,d9,[sp,#16] // per ABI requirement + stp d10,d11,[sp,#32] + stp d12,d13,[sp,#48] + stp d14,d15,[sp,#64] + ldp d0,d1,[x0,#8*0] + ldp d2,d3,[x0,#8*2] + ldp d4,d5,[x0,#8*4] + ldp d6,d7,[x0,#8*6] + ldp d8,d9,[x0,#8*8] + ldp d10,d11,[x0,#8*10] + ldp d12,d13,[x0,#8*12] + ldp d14,d15,[x0,#8*14] + ldp d16,d17,[x0,#8*16] + ldp d18,d19,[x0,#8*18] + ldp d20,d21,[x0,#8*20] + ldp d22,d23,[x0,#8*22] + ldr d24,[x0,#8*24] + bl KeccakF1600_ce + ldr x30,[sp,#8] + stp d0,d1,[x0,#8*0] + stp d2,d3,[x0,#8*2] + stp d4,d5,[x0,#8*4] + stp d6,d7,[x0,#8*6] + stp d8,d9,[x0,#8*8] + stp d10,d11,[x0,#8*10] + stp d12,d13,[x0,#8*12] + stp d14,d15,[x0,#8*14] + stp d16,d17,[x0,#8*16] + stp d18,d19,[x0,#8*18] + stp d20,d21,[x0,#8*20] + stp d22,d23,[x0,#8*22] + str d24,[x0,#8*24] + + ldp d8,d9,[sp,#16] + ldp d10,d11,[sp,#32] + ldp d12,d13,[sp,#48] + ldp d14,d15,[sp,#64] + ldr x29,[sp],#80 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size KeccakF1600_cext,.-KeccakF1600_cext +.globl SHA3_absorb_cext +.type SHA3_absorb_cext,%function +.align 5 +SHA3_absorb_cext: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-80]! + add x29,sp,#0 + stp d8,d9,[sp,#16] // per ABI requirement + stp d10,d11,[sp,#32] + stp d12,d13,[sp,#48] + stp d14,d15,[sp,#64] + ldp d0,d1,[x0,#8*0] + ldp d2,d3,[x0,#8*2] + ldp d4,d5,[x0,#8*4] + ldp d6,d7,[x0,#8*6] + ldp d8,d9,[x0,#8*8] + ldp d10,d11,[x0,#8*10] + ldp d12,d13,[x0,#8*12] + ldp d14,d15,[x0,#8*14] + ldp d16,d17,[x0,#8*16] + ldp d18,d19,[x0,#8*18] + ldp d20,d21,[x0,#8*20] + ldp d22,d23,[x0,#8*22] + ldr d24,[x0,#8*24] + b .Loop_absorb_ce + +.align 4 +.Loop_absorb_ce: + subs x2,x2,x3 // len - bsz + blo .Labsorbed_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v0.16b,v0.16b,v31.16b + cmp x3,#8*(0+2) + blo .Lprocess_block_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v1.16b,v1.16b,v31.16b + beq .Lprocess_block_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v2.16b,v2.16b,v31.16b + cmp x3,#8*(2+2) + blo .Lprocess_block_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v3.16b,v3.16b,v31.16b + beq .Lprocess_block_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v4.16b,v4.16b,v31.16b + cmp x3,#8*(4+2) + blo .Lprocess_block_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v5.16b,v5.16b,v31.16b + beq .Lprocess_block_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v6.16b,v6.16b,v31.16b + cmp x3,#8*(6+2) + blo .Lprocess_block_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v7.16b,v7.16b,v31.16b + beq .Lprocess_block_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v8.16b,v8.16b,v31.16b + cmp x3,#8*(8+2) + blo .Lprocess_block_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v9.16b,v9.16b,v31.16b + beq .Lprocess_block_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v10.16b,v10.16b,v31.16b + cmp x3,#8*(10+2) + blo .Lprocess_block_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v11.16b,v11.16b,v31.16b + beq .Lprocess_block_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v12.16b,v12.16b,v31.16b + cmp x3,#8*(12+2) + blo .Lprocess_block_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v13.16b,v13.16b,v31.16b + beq .Lprocess_block_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v14.16b,v14.16b,v31.16b + cmp x3,#8*(14+2) + blo .Lprocess_block_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v15.16b,v15.16b,v31.16b + beq .Lprocess_block_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v16.16b,v16.16b,v31.16b + cmp x3,#8*(16+2) + blo .Lprocess_block_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v17.16b,v17.16b,v31.16b + beq .Lprocess_block_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v18.16b,v18.16b,v31.16b + cmp x3,#8*(18+2) + blo .Lprocess_block_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v19.16b,v19.16b,v31.16b + beq .Lprocess_block_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v20.16b,v20.16b,v31.16b + cmp x3,#8*(20+2) + blo .Lprocess_block_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v21.16b,v21.16b,v31.16b + beq .Lprocess_block_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v22.16b,v22.16b,v31.16b + cmp x3,#8*(22+2) + blo .Lprocess_block_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v23.16b,v23.16b,v31.16b + beq .Lprocess_block_ce + ldr d31,[x1],#8 // *inp++ +#ifdef __AARCH64EB__ + rev64 v31.16b,v31.16b +#endif + eor v24.16b,v24.16b,v31.16b + +.Lprocess_block_ce: + + bl KeccakF1600_ce + + b .Loop_absorb_ce + +.align 4 +.Labsorbed_ce: + stp d0,d1,[x0,#8*0] + stp d2,d3,[x0,#8*2] + stp d4,d5,[x0,#8*4] + stp d6,d7,[x0,#8*6] + stp d8,d9,[x0,#8*8] + stp d10,d11,[x0,#8*10] + stp d12,d13,[x0,#8*12] + stp d14,d15,[x0,#8*14] + stp d16,d17,[x0,#8*16] + stp d18,d19,[x0,#8*18] + stp d20,d21,[x0,#8*20] + stp d22,d23,[x0,#8*22] + str d24,[x0,#8*24] + add x0,x2,x3 // return value + + ldp d8,d9,[sp,#16] + ldp d10,d11,[sp,#32] + ldp d12,d13,[sp,#48] + ldp d14,d15,[sp,#64] + ldp x29,x30,[sp],#80 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size SHA3_absorb_cext,.-SHA3_absorb_cext +.globl SHA3_squeeze_cext +.type SHA3_squeeze_cext,%function +.align 5 +SHA3_squeeze_cext: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + mov x9,x0 + mov x10,x3 + +.Loop_squeeze_ce: + ldr x4,[x9],#8 + cmp x2,#8 + blo .Lsqueeze_tail_ce +#ifdef __AARCH64EB__ + rev x4,x4 +#endif + str x4,[x1],#8 + beq .Lsqueeze_done_ce + + sub x2,x2,#8 + subs x10,x10,#8 + bhi .Loop_squeeze_ce + + bl KeccakF1600_cext + ldr x30,[sp,#8] + mov x9,x0 + mov x10,x3 + b .Loop_squeeze_ce + +.align 4 +.Lsqueeze_tail_ce: + strb w4,[x1],#1 + lsr x4,x4,#8 + subs x2,x2,#1 + beq .Lsqueeze_done_ce + strb w4,[x1],#1 + lsr x4,x4,#8 + subs x2,x2,#1 + beq .Lsqueeze_done_ce + strb w4,[x1],#1 + lsr x4,x4,#8 + subs x2,x2,#1 + beq .Lsqueeze_done_ce + strb w4,[x1],#1 + lsr x4,x4,#8 + subs x2,x2,#1 + beq .Lsqueeze_done_ce + strb w4,[x1],#1 + lsr x4,x4,#8 + subs x2,x2,#1 + beq .Lsqueeze_done_ce + strb w4,[x1],#1 + lsr x4,x4,#8 + subs x2,x2,#1 + beq .Lsqueeze_done_ce + strb w4,[x1],#1 + +.Lsqueeze_done_ce: + ldr x29,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size SHA3_squeeze_cext,.-SHA3_squeeze_cext +.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 diff --git a/contrib/openssl-cmake/asm/crypto/sha/keccak1600-avx2.s b/contrib/openssl-cmake/asm/crypto/sha/keccak1600-avx2.s new file mode 100644 index 000000000000..4e4be2b94734 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/sha/keccak1600-avx2.s @@ -0,0 +1,600 @@ +.text + +.type __KeccakF1600,@function +.align 32 +__KeccakF1600: + lea rhotates_left+96(%rip),%r8 + lea rhotates_right+96(%rip),%r9 + lea iotas(%rip),%r10 + mov $24,%eax + jmp .Loop_avx2 + +.align 32 +.Loop_avx2: + ######################################### Theta + vpshufd $0b01001110,%ymm2,%ymm13 + vpxor %ymm3,%ymm5,%ymm12 + vpxor %ymm6,%ymm4,%ymm9 + vpxor %ymm1,%ymm12,%ymm12 + vpxor %ymm9,%ymm12,%ymm12 # C[1..4] + + vpermq $0b10010011,%ymm12,%ymm11 + vpxor %ymm2,%ymm13,%ymm13 + vpermq $0b01001110,%ymm13,%ymm7 + + vpsrlq $63,%ymm12,%ymm8 + vpaddq %ymm12,%ymm12,%ymm9 + vpor %ymm9,%ymm8,%ymm8 # ROL64(C[1..4],1) + + vpermq $0b00111001,%ymm8,%ymm15 + vpxor %ymm11,%ymm8,%ymm14 + vpermq $0b00000000,%ymm14,%ymm14 # D[0..0] = ROL64(C[1],1) ^ C[4] + + vpxor %ymm0,%ymm13,%ymm13 + vpxor %ymm7,%ymm13,%ymm13 # C[0..0] + + vpsrlq $63,%ymm13,%ymm7 + vpaddq %ymm13,%ymm13,%ymm8 + vpor %ymm7,%ymm8,%ymm8 # ROL64(C[0..0],1) + + vpxor %ymm14,%ymm2,%ymm2 # ^= D[0..0] + vpxor %ymm14,%ymm0,%ymm0 # ^= D[0..0] + + vpblendd $0b11000000,%ymm8,%ymm15,%ymm15 + vpblendd $0b00000011,%ymm13,%ymm11,%ymm11 + vpxor %ymm11,%ymm15,%ymm15 # D[1..4] = ROL64(C[2..4,0),1) ^ C[0..3] + + ######################################### Rho + Pi + pre-Chi shuffle + vpsllvq 0*32-96(%r8),%ymm2,%ymm10 + vpsrlvq 0*32-96(%r9),%ymm2,%ymm2 + vpor %ymm10,%ymm2,%ymm2 + + vpxor %ymm15,%ymm3,%ymm3 # ^= D[1..4] from Theta + vpsllvq 2*32-96(%r8),%ymm3,%ymm11 + vpsrlvq 2*32-96(%r9),%ymm3,%ymm3 + vpor %ymm11,%ymm3,%ymm3 + + vpxor %ymm15,%ymm4,%ymm4 # ^= D[1..4] from Theta + vpsllvq 3*32-96(%r8),%ymm4,%ymm12 + vpsrlvq 3*32-96(%r9),%ymm4,%ymm4 + vpor %ymm12,%ymm4,%ymm4 + + vpxor %ymm15,%ymm5,%ymm5 # ^= D[1..4] from Theta + vpsllvq 4*32-96(%r8),%ymm5,%ymm13 + vpsrlvq 4*32-96(%r9),%ymm5,%ymm5 + vpor %ymm13,%ymm5,%ymm5 + + vpxor %ymm15,%ymm6,%ymm6 # ^= D[1..4] from Theta + vpermq $0b10001101,%ymm2,%ymm10 # %ymm2 -> future %ymm3 + vpermq $0b10001101,%ymm3,%ymm11 # %ymm3 -> future %ymm4 + vpsllvq 5*32-96(%r8),%ymm6,%ymm14 + vpsrlvq 5*32-96(%r9),%ymm6,%ymm8 + vpor %ymm14,%ymm8,%ymm8 # %ymm6 -> future %ymm1 + + vpxor %ymm15,%ymm1,%ymm1 # ^= D[1..4] from Theta + vpermq $0b00011011,%ymm4,%ymm12 # %ymm4 -> future %ymm5 + vpermq $0b01110010,%ymm5,%ymm13 # %ymm5 -> future %ymm6 + vpsllvq 1*32-96(%r8),%ymm1,%ymm15 + vpsrlvq 1*32-96(%r9),%ymm1,%ymm9 + vpor %ymm15,%ymm9,%ymm9 # %ymm1 -> future %ymm2 + + ######################################### Chi + vpsrldq $8,%ymm8,%ymm14 + vpandn %ymm14,%ymm8,%ymm7 # tgting [0][0] [0][0] [0][0] [0][0] + + vpblendd $0b00001100,%ymm13,%ymm9,%ymm3 # [4][4] [2][0] + vpblendd $0b00001100,%ymm9,%ymm11,%ymm15 # [4][0] [2][1] + vpblendd $0b00001100,%ymm11,%ymm10,%ymm5 # [4][2] [2][4] + vpblendd $0b00001100,%ymm10,%ymm9,%ymm14 # [4][3] [2][0] + vpblendd $0b00110000,%ymm11,%ymm3,%ymm3 # [1][3] [4][4] [2][0] + vpblendd $0b00110000,%ymm12,%ymm15,%ymm15 # [1][4] [4][0] [2][1] + vpblendd $0b00110000,%ymm9,%ymm5,%ymm5 # [1][0] [4][2] [2][4] + vpblendd $0b00110000,%ymm13,%ymm14,%ymm14 # [1][1] [4][3] [2][0] + vpblendd $0b11000000,%ymm12,%ymm3,%ymm3 # [3][2] [1][3] [4][4] [2][0] + vpblendd $0b11000000,%ymm13,%ymm15,%ymm15 # [3][3] [1][4] [4][0] [2][1] + vpblendd $0b11000000,%ymm13,%ymm5,%ymm5 # [3][3] [1][0] [4][2] [2][4] + vpblendd $0b11000000,%ymm11,%ymm14,%ymm14 # [3][4] [1][1] [4][3] [2][0] + vpandn %ymm15,%ymm3,%ymm3 # tgting [3][1] [1][2] [4][3] [2][4] + vpandn %ymm14,%ymm5,%ymm5 # tgting [3][2] [1][4] [4][1] [2][3] + + vpblendd $0b00001100,%ymm9,%ymm12,%ymm6 # [4][0] [2][3] + vpblendd $0b00001100,%ymm12,%ymm10,%ymm15 # [4][1] [2][4] + vpxor %ymm10,%ymm3,%ymm3 + vpblendd $0b00110000,%ymm10,%ymm6,%ymm6 # [1][2] [4][0] [2][3] + vpblendd $0b00110000,%ymm11,%ymm15,%ymm15 # [1][3] [4][1] [2][4] + vpxor %ymm12,%ymm5,%ymm5 + vpblendd $0b11000000,%ymm11,%ymm6,%ymm6 # [3][4] [1][2] [4][0] [2][3] + vpblendd $0b11000000,%ymm9,%ymm15,%ymm15 # [3][0] [1][3] [4][1] [2][4] + vpandn %ymm15,%ymm6,%ymm6 # tgting [3][3] [1][1] [4][4] [2][2] + vpxor %ymm13,%ymm6,%ymm6 + + vpermq $0b00011110,%ymm8,%ymm4 # [0][1] [0][2] [0][4] [0][3] + vpblendd $0b00110000,%ymm0,%ymm4,%ymm15 # [0][1] [0][0] [0][4] [0][3] + vpermq $0b00111001,%ymm8,%ymm1 # [0][1] [0][4] [0][3] [0][2] + vpblendd $0b11000000,%ymm0,%ymm1,%ymm1 # [0][0] [0][4] [0][3] [0][2] + vpandn %ymm15,%ymm1,%ymm1 # tgting [0][4] [0][3] [0][2] [0][1] + + vpblendd $0b00001100,%ymm12,%ymm11,%ymm2 # [4][1] [2][1] + vpblendd $0b00001100,%ymm11,%ymm13,%ymm14 # [4][2] [2][2] + vpblendd $0b00110000,%ymm13,%ymm2,%ymm2 # [1][1] [4][1] [2][1] + vpblendd $0b00110000,%ymm10,%ymm14,%ymm14 # [1][2] [4][2] [2][2] + vpblendd $0b11000000,%ymm10,%ymm2,%ymm2 # [3][1] [1][1] [4][1] [2][1] + vpblendd $0b11000000,%ymm12,%ymm14,%ymm14 # [3][2] [1][2] [4][2] [2][2] + vpandn %ymm14,%ymm2,%ymm2 # tgting [3][0] [1][0] [4][0] [2][0] + vpxor %ymm9,%ymm2,%ymm2 + + vpermq $0b00000000,%ymm7,%ymm7 # [0][0] [0][0] [0][0] [0][0] + vpermq $0b00011011,%ymm3,%ymm3 # post-Chi shuffle + vpermq $0b10001101,%ymm5,%ymm5 + vpermq $0b01110010,%ymm6,%ymm6 + + vpblendd $0b00001100,%ymm10,%ymm13,%ymm4 # [4][3] [2][2] + vpblendd $0b00001100,%ymm13,%ymm12,%ymm14 # [4][4] [2][3] + vpblendd $0b00110000,%ymm12,%ymm4,%ymm4 # [1][4] [4][3] [2][2] + vpblendd $0b00110000,%ymm9,%ymm14,%ymm14 # [1][0] [4][4] [2][3] + vpblendd $0b11000000,%ymm9,%ymm4,%ymm4 # [3][0] [1][4] [4][3] [2][2] + vpblendd $0b11000000,%ymm10,%ymm14,%ymm14 # [3][1] [1][0] [4][4] [2][3] + vpandn %ymm14,%ymm4,%ymm4 # tgting [3][4] [1][3] [4][2] [2][1] + + vpxor %ymm7,%ymm0,%ymm0 + vpxor %ymm8,%ymm1,%ymm1 + vpxor %ymm11,%ymm4,%ymm4 + + ######################################### Iota + vpxor (%r10),%ymm0,%ymm0 + lea 32(%r10),%r10 + + dec %eax + jnz .Loop_avx2 + + ret +.size __KeccakF1600,.-__KeccakF1600 +.globl SHA3_absorb +.type SHA3_absorb,@function +.align 32 +SHA3_absorb: + mov %rsp,%r11 + + lea -240(%rsp),%rsp + and $-32,%rsp + + lea 96(%rdi),%rdi + lea 96(%rsi),%rsi + lea 96(%rsp),%r10 + + vzeroupper + + vpbroadcastq -96(%rdi),%ymm0 # load A[5][5] + vmovdqu 8+32*0-96(%rdi),%ymm1 + vmovdqu 8+32*1-96(%rdi),%ymm2 + vmovdqu 8+32*2-96(%rdi),%ymm3 + vmovdqu 8+32*3-96(%rdi),%ymm4 + vmovdqu 8+32*4-96(%rdi),%ymm5 + vmovdqu 8+32*5-96(%rdi),%ymm6 + + vpxor %ymm7,%ymm7,%ymm7 + vmovdqa %ymm7,32*2-96(%r10) # zero transfer area on stack + vmovdqa %ymm7,32*3-96(%r10) + vmovdqa %ymm7,32*4-96(%r10) + vmovdqa %ymm7,32*5-96(%r10) + vmovdqa %ymm7,32*6-96(%r10) + +.Loop_absorb_avx2: + mov %rcx,%rax + sub %rcx,%rdx + jc .Ldone_absorb_avx2 + + shr $3,%eax + vpbroadcastq 0-96(%rsi),%ymm7 + vmovdqu 8-96(%rsi),%ymm8 + sub $4,%eax + dec %eax + jz .Labsorved_avx2 + mov 8*5-96(%rsi),%r8 + mov %r8,80-96(%r10) + dec %eax + jz .Labsorved_avx2 + mov 8*6-96(%rsi),%r8 + mov %r8,192-96(%r10) + dec %eax + jz .Labsorved_avx2 + mov 8*7-96(%rsi),%r8 + mov %r8,104-96(%r10) + dec %eax + jz .Labsorved_avx2 + mov 8*8-96(%rsi),%r8 + mov %r8,144-96(%r10) + dec %eax + jz .Labsorved_avx2 + mov 8*9-96(%rsi),%r8 + mov %r8,184-96(%r10) + dec %eax + jz .Labsorved_avx2 + mov 8*10-96(%rsi),%r8 + mov %r8,64-96(%r10) + dec %eax + jz .Labsorved_avx2 + mov 8*11-96(%rsi),%r8 + mov %r8,128-96(%r10) + dec %eax + jz .Labsorved_avx2 + mov 8*12-96(%rsi),%r8 + mov %r8,200-96(%r10) + dec %eax + jz .Labsorved_avx2 + mov 8*13-96(%rsi),%r8 + mov %r8,176-96(%r10) + dec %eax + jz .Labsorved_avx2 + mov 8*14-96(%rsi),%r8 + mov %r8,120-96(%r10) + dec %eax + jz .Labsorved_avx2 + mov 8*15-96(%rsi),%r8 + mov %r8,88-96(%r10) + dec %eax + jz .Labsorved_avx2 + mov 8*16-96(%rsi),%r8 + mov %r8,96-96(%r10) + dec %eax + jz .Labsorved_avx2 + mov 8*17-96(%rsi),%r8 + mov %r8,168-96(%r10) + dec %eax + jz .Labsorved_avx2 + mov 8*18-96(%rsi),%r8 + mov %r8,208-96(%r10) + dec %eax + jz .Labsorved_avx2 + mov 8*19-96(%rsi),%r8 + mov %r8,152-96(%r10) + dec %eax + jz .Labsorved_avx2 + mov 8*20-96(%rsi),%r8 + mov %r8,72-96(%r10) + dec %eax + jz .Labsorved_avx2 + mov 8*21-96(%rsi),%r8 + mov %r8,160-96(%r10) + dec %eax + jz .Labsorved_avx2 + mov 8*22-96(%rsi),%r8 + mov %r8,136-96(%r10) + dec %eax + jz .Labsorved_avx2 + mov 8*23-96(%rsi),%r8 + mov %r8,112-96(%r10) + dec %eax + jz .Labsorved_avx2 + mov 8*24-96(%rsi),%r8 + mov %r8,216-96(%r10) +.Labsorved_avx2: + lea (%rsi,%rcx),%rsi + + vpxor %ymm7,%ymm0,%ymm0 + vpxor %ymm8,%ymm1,%ymm1 + vpxor 32*2-96(%r10),%ymm2,%ymm2 + vpxor 32*3-96(%r10),%ymm3,%ymm3 + vpxor 32*4-96(%r10),%ymm4,%ymm4 + vpxor 32*5-96(%r10),%ymm5,%ymm5 + vpxor 32*6-96(%r10),%ymm6,%ymm6 + + call __KeccakF1600 + + lea 96(%rsp),%r10 + jmp .Loop_absorb_avx2 + +.Ldone_absorb_avx2: + vmovq %xmm0,-96(%rdi) + vmovdqu %ymm1,8+32*0-96(%rdi) + vmovdqu %ymm2,8+32*1-96(%rdi) + vmovdqu %ymm3,8+32*2-96(%rdi) + vmovdqu %ymm4,8+32*3-96(%rdi) + vmovdqu %ymm5,8+32*4-96(%rdi) + vmovdqu %ymm6,8+32*5-96(%rdi) + + vzeroupper + + lea (%r11),%rsp + lea (%rdx,%rcx),%rax # return value + ret +.size SHA3_absorb,.-SHA3_absorb + +.globl SHA3_squeeze +.type SHA3_squeeze,@function +.align 32 +SHA3_squeeze: + mov %rsp,%r11 + + lea 96(%rdi),%rdi + shr $3,%rcx + + vzeroupper + + vpbroadcastq -96(%rdi),%ymm0 + vpxor %ymm7,%ymm7,%ymm7 + vmovdqu 8+32*0-96(%rdi),%ymm1 + vmovdqu 8+32*1-96(%rdi),%ymm2 + vmovdqu 8+32*2-96(%rdi),%ymm3 + vmovdqu 8+32*3-96(%rdi),%ymm4 + vmovdqu 8+32*4-96(%rdi),%ymm5 + vmovdqu 8+32*5-96(%rdi),%ymm6 + + mov %rcx,%rax + +.Loop_squeeze_avx2: + mov 0-96(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov 32-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov 40-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov 48-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov 56-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov 80-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov 192-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov 104-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov 144-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov 184-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov 64-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov 128-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov 200-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov 176-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov 120-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov 88-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov 96-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov 168-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov 208-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov 152-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov 72-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov 160-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov 136-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov 112-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov 216-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx2 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx2 + dec %eax + je .Lextend_output_avx2 + mov -120(%rdi),%r8 +.Lextend_output_avx2: + call __KeccakF1600 + + vmovq %xmm0,-96(%rdi) + vmovdqu %ymm1,8+32*0-96(%rdi) + vmovdqu %ymm2,8+32*1-96(%rdi) + vmovdqu %ymm3,8+32*2-96(%rdi) + vmovdqu %ymm4,8+32*3-96(%rdi) + vmovdqu %ymm5,8+32*4-96(%rdi) + vmovdqu %ymm6,8+32*5-96(%rdi) + + mov %rcx,%rax + jmp .Loop_squeeze_avx2 + + +.Ltail_squeeze_avx2: + add $8,%rdx +.Loop_tail_avx2: + mov %r8b,(%rsi) + lea 1(%rsi),%rsi + shr $8,%r8 + dec %rdx + jnz .Loop_tail_avx2 + +.Ldone_squeeze_avx2: + vzeroupper + + lea (%r11),%rsp + ret +.size SHA3_squeeze,.-SHA3_squeeze + +.section .rodata +.align 64 +rhotates_left: + .quad 3, 18, 36, 41 # [2][0] [4][0] [1][0] [3][0] + .quad 1, 62, 28, 27 # [0][1] [0][2] [0][3] [0][4] + .quad 45, 6, 56, 39 # [3][1] [1][2] [4][3] [2][4] + .quad 10, 61, 55, 8 # [2][1] [4][2] [1][3] [3][4] + .quad 2, 15, 25, 20 # [4][1] [3][2] [2][3] [1][4] + .quad 44, 43, 21, 14 # [1][1] [2][2] [3][3] [4][4] +rhotates_right: + .quad 64-3, 64-18, 64-36, 64-41 + .quad 64-1, 64-62, 64-28, 64-27 + .quad 64-45, 64-6, 64-56, 64-39 + .quad 64-10, 64-61, 64-55, 64-8 + .quad 64-2, 64-15, 64-25, 64-20 + .quad 64-44, 64-43, 64-21, 64-14 +iotas: + .quad 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001 + .quad 0x0000000000008082, 0x0000000000008082, 0x0000000000008082, 0x0000000000008082 + .quad 0x800000000000808a, 0x800000000000808a, 0x800000000000808a, 0x800000000000808a + .quad 0x8000000080008000, 0x8000000080008000, 0x8000000080008000, 0x8000000080008000 + .quad 0x000000000000808b, 0x000000000000808b, 0x000000000000808b, 0x000000000000808b + .quad 0x0000000080000001, 0x0000000080000001, 0x0000000080000001, 0x0000000080000001 + .quad 0x8000000080008081, 0x8000000080008081, 0x8000000080008081, 0x8000000080008081 + .quad 0x8000000000008009, 0x8000000000008009, 0x8000000000008009, 0x8000000000008009 + .quad 0x000000000000008a, 0x000000000000008a, 0x000000000000008a, 0x000000000000008a + .quad 0x0000000000000088, 0x0000000000000088, 0x0000000000000088, 0x0000000000000088 + .quad 0x0000000080008009, 0x0000000080008009, 0x0000000080008009, 0x0000000080008009 + .quad 0x000000008000000a, 0x000000008000000a, 0x000000008000000a, 0x000000008000000a + .quad 0x000000008000808b, 0x000000008000808b, 0x000000008000808b, 0x000000008000808b + .quad 0x800000000000008b, 0x800000000000008b, 0x800000000000008b, 0x800000000000008b + .quad 0x8000000000008089, 0x8000000000008089, 0x8000000000008089, 0x8000000000008089 + .quad 0x8000000000008003, 0x8000000000008003, 0x8000000000008003, 0x8000000000008003 + .quad 0x8000000000008002, 0x8000000000008002, 0x8000000000008002, 0x8000000000008002 + .quad 0x8000000000000080, 0x8000000000000080, 0x8000000000000080, 0x8000000000000080 + .quad 0x000000000000800a, 0x000000000000800a, 0x000000000000800a, 0x000000000000800a + .quad 0x800000008000000a, 0x800000008000000a, 0x800000008000000a, 0x800000008000000a + .quad 0x8000000080008081, 0x8000000080008081, 0x8000000080008081, 0x8000000080008081 + .quad 0x8000000000008080, 0x8000000000008080, 0x8000000000008080, 0x8000000000008080 + .quad 0x0000000080000001, 0x0000000080000001, 0x0000000080000001, 0x0000000080000001 + .quad 0x8000000080008008, 0x8000000080008008, 0x8000000080008008, 0x8000000080008008 + +.asciz "Keccak-1600 absorb and squeeze for AVX2, CRYPTOGAMS by " diff --git a/contrib/openssl-cmake/asm/crypto/sha/keccak1600-avx512.s b/contrib/openssl-cmake/asm/crypto/sha/keccak1600-avx512.s new file mode 100644 index 000000000000..67fb8acf06b8 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/sha/keccak1600-avx512.s @@ -0,0 +1,496 @@ +.text + +.type __KeccakF1600,@function +.align 32 +__KeccakF1600: + lea iotas(%rip),%r10 + mov $12,%eax + jmp .Loop_avx512 + +.align 32 +.Loop_avx512: + ######################################### Theta, even round + vmovdqa64 %zmm0,%zmm5 # put aside original A00 + vpternlogq $0x96,%zmm2,%zmm1,%zmm0 # and use it as "C00" + vpternlogq $0x96,%zmm4,%zmm3,%zmm0 + + vprolq $1,%zmm0,%zmm6 + vpermq %zmm0,%zmm13,%zmm0 + vpermq %zmm6,%zmm16,%zmm6 + + vpternlogq $0x96,%zmm0,%zmm6,%zmm5 # T[0] is original A00 + vpternlogq $0x96,%zmm0,%zmm6,%zmm1 + vpternlogq $0x96,%zmm0,%zmm6,%zmm2 + vpternlogq $0x96,%zmm0,%zmm6,%zmm3 + vpternlogq $0x96,%zmm0,%zmm6,%zmm4 + + ######################################### Rho + vprolvq %zmm22,%zmm5,%zmm0 # T[0] is original A00 + vprolvq %zmm23,%zmm1,%zmm1 + vprolvq %zmm24,%zmm2,%zmm2 + vprolvq %zmm25,%zmm3,%zmm3 + vprolvq %zmm26,%zmm4,%zmm4 + + ######################################### Pi + vpermq %zmm0,%zmm17,%zmm0 + vpermq %zmm1,%zmm18,%zmm1 + vpermq %zmm2,%zmm19,%zmm2 + vpermq %zmm3,%zmm20,%zmm3 + vpermq %zmm4,%zmm21,%zmm4 + + ######################################### Chi + vmovdqa64 %zmm0,%zmm5 + vmovdqa64 %zmm1,%zmm6 + vpternlogq $0xD2,%zmm2,%zmm1,%zmm0 + vpternlogq $0xD2,%zmm3,%zmm2,%zmm1 + vpternlogq $0xD2,%zmm4,%zmm3,%zmm2 + vpternlogq $0xD2,%zmm5,%zmm4,%zmm3 + vpternlogq $0xD2,%zmm6,%zmm5,%zmm4 + + ######################################### Iota + vpxorq (%r10),%zmm0,%zmm0{%k1} + lea 16(%r10),%r10 + + ######################################### Harmonize rounds + vpblendmq %zmm2,%zmm1,%zmm6{%k2} + vpblendmq %zmm3,%zmm2,%zmm7{%k2} + vpblendmq %zmm4,%zmm3,%zmm8{%k2} + vpblendmq %zmm1,%zmm0,%zmm5{%k2} + vpblendmq %zmm0,%zmm4,%zmm9{%k2} + + vpblendmq %zmm3,%zmm6,%zmm6{%k3} + vpblendmq %zmm4,%zmm7,%zmm7{%k3} + vpblendmq %zmm2,%zmm5,%zmm5{%k3} + vpblendmq %zmm0,%zmm8,%zmm8{%k3} + vpblendmq %zmm1,%zmm9,%zmm9{%k3} + + vpblendmq %zmm4,%zmm6,%zmm6{%k4} + vpblendmq %zmm3,%zmm5,%zmm5{%k4} + vpblendmq %zmm0,%zmm7,%zmm7{%k4} + vpblendmq %zmm1,%zmm8,%zmm8{%k4} + vpblendmq %zmm2,%zmm9,%zmm9{%k4} + + vpblendmq %zmm4,%zmm5,%zmm5{%k5} + vpblendmq %zmm0,%zmm6,%zmm6{%k5} + vpblendmq %zmm1,%zmm7,%zmm7{%k5} + vpblendmq %zmm2,%zmm8,%zmm8{%k5} + vpblendmq %zmm3,%zmm9,%zmm9{%k5} + + #vpermq %zmm5,%zmm33,%zmm0 # doesn't actually change order + vpermq %zmm6,%zmm13,%zmm1 + vpermq %zmm7,%zmm14,%zmm2 + vpermq %zmm8,%zmm15,%zmm3 + vpermq %zmm9,%zmm16,%zmm4 + + ######################################### Theta, odd round + vmovdqa64 %zmm5,%zmm0 # real A00 + vpternlogq $0x96,%zmm2,%zmm1,%zmm5 # C00 is %zmm5's alias + vpternlogq $0x96,%zmm4,%zmm3,%zmm5 + + vprolq $1,%zmm5,%zmm6 + vpermq %zmm5,%zmm13,%zmm5 + vpermq %zmm6,%zmm16,%zmm6 + + vpternlogq $0x96,%zmm5,%zmm6,%zmm0 + vpternlogq $0x96,%zmm5,%zmm6,%zmm3 + vpternlogq $0x96,%zmm5,%zmm6,%zmm1 + vpternlogq $0x96,%zmm5,%zmm6,%zmm4 + vpternlogq $0x96,%zmm5,%zmm6,%zmm2 + + ######################################### Rho + vprolvq %zmm27,%zmm0,%zmm0 + vprolvq %zmm30,%zmm3,%zmm6 + vprolvq %zmm28,%zmm1,%zmm7 + vprolvq %zmm31,%zmm4,%zmm8 + vprolvq %zmm29,%zmm2,%zmm9 + + vpermq %zmm0,%zmm16,%zmm10 + vpermq %zmm0,%zmm15,%zmm11 + + ######################################### Iota + vpxorq -8(%r10),%zmm0,%zmm0{%k1} + + ######################################### Pi + vpermq %zmm6,%zmm14,%zmm1 + vpermq %zmm7,%zmm16,%zmm2 + vpermq %zmm8,%zmm13,%zmm3 + vpermq %zmm9,%zmm15,%zmm4 + + ######################################### Chi + vpternlogq $0xD2,%zmm11,%zmm10,%zmm0 + + vpermq %zmm6,%zmm13,%zmm12 + #vpermq %zmm6,%zmm33,%zmm6 + vpternlogq $0xD2,%zmm6,%zmm12,%zmm1 + + vpermq %zmm7,%zmm15,%zmm5 + vpermq %zmm7,%zmm14,%zmm7 + vpternlogq $0xD2,%zmm7,%zmm5,%zmm2 + + #vpermq %zmm8,%zmm33,%zmm8 + vpermq %zmm8,%zmm16,%zmm6 + vpternlogq $0xD2,%zmm6,%zmm8,%zmm3 + + vpermq %zmm9,%zmm14,%zmm5 + vpermq %zmm9,%zmm13,%zmm9 + vpternlogq $0xD2,%zmm9,%zmm5,%zmm4 + + dec %eax + jnz .Loop_avx512 + + ret +.size __KeccakF1600,.-__KeccakF1600 +.globl SHA3_absorb +.type SHA3_absorb,@function +.align 32 +SHA3_absorb: + mov %rsp,%r11 + + lea -320(%rsp),%rsp + and $-64,%rsp + + lea 96(%rdi),%rdi + lea 96(%rsi),%rsi + lea 128(%rsp),%r9 + + lea theta_perm(%rip),%r8 + + kxnorw %k6,%k6,%k6 + kshiftrw $15,%k6,%k1 + kshiftrw $11,%k6,%k6 + kshiftlw $1,%k1,%k2 + kshiftlw $2,%k1,%k3 + kshiftlw $3,%k1,%k4 + kshiftlw $4,%k1,%k5 + + #vmovdqa64 64*0(%r8),%zmm33 + vmovdqa64 64*1(%r8),%zmm13 + vmovdqa64 64*2(%r8),%zmm14 + vmovdqa64 64*3(%r8),%zmm15 + vmovdqa64 64*4(%r8),%zmm16 + + vmovdqa64 64*5(%r8),%zmm27 + vmovdqa64 64*6(%r8),%zmm28 + vmovdqa64 64*7(%r8),%zmm29 + vmovdqa64 64*8(%r8),%zmm30 + vmovdqa64 64*9(%r8),%zmm31 + + vmovdqa64 64*10(%r8),%zmm22 + vmovdqa64 64*11(%r8),%zmm23 + vmovdqa64 64*12(%r8),%zmm24 + vmovdqa64 64*13(%r8),%zmm25 + vmovdqa64 64*14(%r8),%zmm26 + + vmovdqa64 64*15(%r8),%zmm17 + vmovdqa64 64*16(%r8),%zmm18 + vmovdqa64 64*17(%r8),%zmm19 + vmovdqa64 64*18(%r8),%zmm20 + vmovdqa64 64*19(%r8),%zmm21 + + vmovdqu64 40*0-96(%rdi),%zmm0{%k6}{z} + vpxorq %zmm5,%zmm5,%zmm5 + vmovdqu64 40*1-96(%rdi),%zmm1{%k6}{z} + vmovdqu64 40*2-96(%rdi),%zmm2{%k6}{z} + vmovdqu64 40*3-96(%rdi),%zmm3{%k6}{z} + vmovdqu64 40*4-96(%rdi),%zmm4{%k6}{z} + + vmovdqa64 %zmm5,0*64-128(%r9) # zero transfer area on stack + vmovdqa64 %zmm5,1*64-128(%r9) + vmovdqa64 %zmm5,2*64-128(%r9) + vmovdqa64 %zmm5,3*64-128(%r9) + vmovdqa64 %zmm5,4*64-128(%r9) + jmp .Loop_absorb_avx512 + +.align 32 +.Loop_absorb_avx512: + mov %rcx,%rax + sub %rcx,%rdx + jc .Ldone_absorb_avx512 + + shr $3,%eax + mov 8*0-96(%rsi),%r8 + mov %r8,0-128(%r9) + dec %eax + jz .Labsorved_avx512 + mov 8*1-96(%rsi),%r8 + mov %r8,8-128(%r9) + dec %eax + jz .Labsorved_avx512 + mov 8*2-96(%rsi),%r8 + mov %r8,16-128(%r9) + dec %eax + jz .Labsorved_avx512 + mov 8*3-96(%rsi),%r8 + mov %r8,24-128(%r9) + dec %eax + jz .Labsorved_avx512 + mov 8*4-96(%rsi),%r8 + mov %r8,32-128(%r9) + dec %eax + jz .Labsorved_avx512 + mov 8*5-96(%rsi),%r8 + mov %r8,64-128(%r9) + dec %eax + jz .Labsorved_avx512 + mov 8*6-96(%rsi),%r8 + mov %r8,72-128(%r9) + dec %eax + jz .Labsorved_avx512 + mov 8*7-96(%rsi),%r8 + mov %r8,80-128(%r9) + dec %eax + jz .Labsorved_avx512 + mov 8*8-96(%rsi),%r8 + mov %r8,88-128(%r9) + dec %eax + jz .Labsorved_avx512 + mov 8*9-96(%rsi),%r8 + mov %r8,96-128(%r9) + dec %eax + jz .Labsorved_avx512 + mov 8*10-96(%rsi),%r8 + mov %r8,128-128(%r9) + dec %eax + jz .Labsorved_avx512 + mov 8*11-96(%rsi),%r8 + mov %r8,136-128(%r9) + dec %eax + jz .Labsorved_avx512 + mov 8*12-96(%rsi),%r8 + mov %r8,144-128(%r9) + dec %eax + jz .Labsorved_avx512 + mov 8*13-96(%rsi),%r8 + mov %r8,152-128(%r9) + dec %eax + jz .Labsorved_avx512 + mov 8*14-96(%rsi),%r8 + mov %r8,160-128(%r9) + dec %eax + jz .Labsorved_avx512 + mov 8*15-96(%rsi),%r8 + mov %r8,192-128(%r9) + dec %eax + jz .Labsorved_avx512 + mov 8*16-96(%rsi),%r8 + mov %r8,200-128(%r9) + dec %eax + jz .Labsorved_avx512 + mov 8*17-96(%rsi),%r8 + mov %r8,208-128(%r9) + dec %eax + jz .Labsorved_avx512 + mov 8*18-96(%rsi),%r8 + mov %r8,216-128(%r9) + dec %eax + jz .Labsorved_avx512 + mov 8*19-96(%rsi),%r8 + mov %r8,224-128(%r9) + dec %eax + jz .Labsorved_avx512 + mov 8*20-96(%rsi),%r8 + mov %r8,256-128(%r9) + dec %eax + jz .Labsorved_avx512 + mov 8*21-96(%rsi),%r8 + mov %r8,264-128(%r9) + dec %eax + jz .Labsorved_avx512 + mov 8*22-96(%rsi),%r8 + mov %r8,272-128(%r9) + dec %eax + jz .Labsorved_avx512 + mov 8*23-96(%rsi),%r8 + mov %r8,280-128(%r9) + dec %eax + jz .Labsorved_avx512 + mov 8*24-96(%rsi),%r8 + mov %r8,288-128(%r9) + dec %eax + jz .Labsorved_avx512 +.Labsorved_avx512: + lea (%rsi,%rcx),%rsi + + vpxorq 64*0-128(%r9),%zmm0,%zmm0 + vpxorq 64*1-128(%r9),%zmm1,%zmm1 + vpxorq 64*2-128(%r9),%zmm2,%zmm2 + vpxorq 64*3-128(%r9),%zmm3,%zmm3 + vpxorq 64*4-128(%r9),%zmm4,%zmm4 + + call __KeccakF1600 + + jmp .Loop_absorb_avx512 + +.align 32 +.Ldone_absorb_avx512: + vmovdqu64 %zmm0,40*0-96(%rdi){%k6} + vmovdqu64 %zmm1,40*1-96(%rdi){%k6} + vmovdqu64 %zmm2,40*2-96(%rdi){%k6} + vmovdqu64 %zmm3,40*3-96(%rdi){%k6} + vmovdqu64 %zmm4,40*4-96(%rdi){%k6} + + vzeroupper + + lea (%r11),%rsp + lea (%rdx,%rcx),%rax # return value + ret +.size SHA3_absorb,.-SHA3_absorb + +.globl SHA3_squeeze +.type SHA3_squeeze,@function +.align 32 +SHA3_squeeze: + mov %rsp,%r11 + + lea 96(%rdi),%rdi + cmp %rcx,%rdx + jbe .Lno_output_extension_avx512 + + lea theta_perm(%rip),%r8 + + kxnorw %k6,%k6,%k6 + kshiftrw $15,%k6,%k1 + kshiftrw $11,%k6,%k6 + kshiftlw $1,%k1,%k2 + kshiftlw $2,%k1,%k3 + kshiftlw $3,%k1,%k4 + kshiftlw $4,%k1,%k5 + + #vmovdqa64 64*0(%r8),%zmm33 + vmovdqa64 64*1(%r8),%zmm13 + vmovdqa64 64*2(%r8),%zmm14 + vmovdqa64 64*3(%r8),%zmm15 + vmovdqa64 64*4(%r8),%zmm16 + + vmovdqa64 64*5(%r8),%zmm27 + vmovdqa64 64*6(%r8),%zmm28 + vmovdqa64 64*7(%r8),%zmm29 + vmovdqa64 64*8(%r8),%zmm30 + vmovdqa64 64*9(%r8),%zmm31 + + vmovdqa64 64*10(%r8),%zmm22 + vmovdqa64 64*11(%r8),%zmm23 + vmovdqa64 64*12(%r8),%zmm24 + vmovdqa64 64*13(%r8),%zmm25 + vmovdqa64 64*14(%r8),%zmm26 + + vmovdqa64 64*15(%r8),%zmm17 + vmovdqa64 64*16(%r8),%zmm18 + vmovdqa64 64*17(%r8),%zmm19 + vmovdqa64 64*18(%r8),%zmm20 + vmovdqa64 64*19(%r8),%zmm21 + + vmovdqu64 40*0-96(%rdi),%zmm0{%k6}{z} + vmovdqu64 40*1-96(%rdi),%zmm1{%k6}{z} + vmovdqu64 40*2-96(%rdi),%zmm2{%k6}{z} + vmovdqu64 40*3-96(%rdi),%zmm3{%k6}{z} + vmovdqu64 40*4-96(%rdi),%zmm4{%k6}{z} + +.Lno_output_extension_avx512: + shr $3,%rcx + lea -96(%rdi),%r9 + mov %rcx,%rax + jmp .Loop_squeeze_avx512 + +.align 32 +.Loop_squeeze_avx512: + cmp $8,%rdx + jb .Ltail_squeeze_avx512 + + mov (%r9),%r8 + lea 8(%r9),%r9 + mov %r8,(%rsi) + lea 8(%rsi),%rsi + sub $8,%rdx # len -= 8 + jz .Ldone_squeeze_avx512 + + sub $1,%rax # bsz-- + jnz .Loop_squeeze_avx512 + + #vpermq %zmm16,%zmm16,%zmm15 + #vpermq %zmm15,%zmm16,%zmm14 + #vpermq %zmm15,%zmm15,%zmm13 + + call __KeccakF1600 + + vmovdqu64 %zmm0,40*0-96(%rdi){%k6} + vmovdqu64 %zmm1,40*1-96(%rdi){%k6} + vmovdqu64 %zmm2,40*2-96(%rdi){%k6} + vmovdqu64 %zmm3,40*3-96(%rdi){%k6} + vmovdqu64 %zmm4,40*4-96(%rdi){%k6} + + lea -96(%rdi),%r9 + mov %rcx,%rax + jmp .Loop_squeeze_avx512 + +.Ltail_squeeze_avx512: + mov %rsi,%rdi + mov %r9,%rsi + mov %rdx,%rcx + .byte 0xf3,0xa4 # rep movsb + +.Ldone_squeeze_avx512: + vzeroupper + + lea (%r11),%rsp + ret +.size SHA3_squeeze,.-SHA3_squeeze + +.section .rodata +.align 64 +theta_perm: + .quad 0, 1, 2, 3, 4, 5, 6, 7 # [not used] + .quad 4, 0, 1, 2, 3, 5, 6, 7 + .quad 3, 4, 0, 1, 2, 5, 6, 7 + .quad 2, 3, 4, 0, 1, 5, 6, 7 + .quad 1, 2, 3, 4, 0, 5, 6, 7 + +rhotates1: + .quad 0, 44, 43, 21, 14, 0, 0, 0 # [0][0] [1][1] [2][2] [3][3] [4][4] + .quad 18, 1, 6, 25, 8, 0, 0, 0 # [4][0] [0][1] [1][2] [2][3] [3][4] + .quad 41, 2, 62, 55, 39, 0, 0, 0 # [3][0] [4][1] [0][2] [1][3] [2][4] + .quad 3, 45, 61, 28, 20, 0, 0, 0 # [2][0] [3][1] [4][2] [0][3] [1][4] + .quad 36, 10, 15, 56, 27, 0, 0, 0 # [1][0] [2][1] [3][2] [4][3] [0][4] + +rhotates0: + .quad 0, 1, 62, 28, 27, 0, 0, 0 + .quad 36, 44, 6, 55, 20, 0, 0, 0 + .quad 3, 10, 43, 25, 39, 0, 0, 0 + .quad 41, 45, 15, 21, 8, 0, 0, 0 + .quad 18, 2, 61, 56, 14, 0, 0, 0 + +pi0_perm: + .quad 0, 3, 1, 4, 2, 5, 6, 7 + .quad 1, 4, 2, 0, 3, 5, 6, 7 + .quad 2, 0, 3, 1, 4, 5, 6, 7 + .quad 3, 1, 4, 2, 0, 5, 6, 7 + .quad 4, 2, 0, 3, 1, 5, 6, 7 + + +iotas: + .quad 0x0000000000000001 + .quad 0x0000000000008082 + .quad 0x800000000000808a + .quad 0x8000000080008000 + .quad 0x000000000000808b + .quad 0x0000000080000001 + .quad 0x8000000080008081 + .quad 0x8000000000008009 + .quad 0x000000000000008a + .quad 0x0000000000000088 + .quad 0x0000000080008009 + .quad 0x000000008000000a + .quad 0x000000008000808b + .quad 0x800000000000008b + .quad 0x8000000000008089 + .quad 0x8000000000008003 + .quad 0x8000000000008002 + .quad 0x8000000000000080 + .quad 0x000000000000800a + .quad 0x800000008000000a + .quad 0x8000000080008081 + .quad 0x8000000000008080 + .quad 0x0000000080000001 + .quad 0x8000000080008008 + +.asciz "Keccak-1600 absorb and squeeze for AVX-512F, CRYPTOGAMS by " diff --git a/contrib/openssl-cmake/asm/crypto/sha/keccak1600-avx512vl.s b/contrib/openssl-cmake/asm/crypto/sha/keccak1600-avx512vl.s new file mode 100644 index 000000000000..6dbdc30acc04 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/sha/keccak1600-avx512vl.s @@ -0,0 +1,580 @@ +.text + +.type __KeccakF1600,@function +.align 32 +__KeccakF1600: + lea iotas(%rip),%r10 + mov $24,%eax + jmp .Loop_avx512vl + +.align 32 +.Loop_avx512vl: + ######################################### Theta + vpshufd $0b01001110,%ymm2,%ymm13 + vpxor %ymm3,%ymm5,%ymm12 + vpxor %ymm6,%ymm4,%ymm9 + vpternlogq $0x96,%ymm1,%ymm9,%ymm12 # C[1..4] + + vpxor %ymm2,%ymm13,%ymm13 + vpermq $0b01001110,%ymm13,%ymm7 + + vpermq $0b10010011,%ymm12,%ymm11 + vprolq $1,%ymm12,%ymm8 # ROL64(C[1..4],1) + + vpermq $0b00111001,%ymm8,%ymm15 + vpxor %ymm11,%ymm8,%ymm14 + vpermq $0b00000000,%ymm14,%ymm14 # D[0..0] = ROL64(C[1],1) ^ C[4] + + vpternlogq $0x96,%ymm7,%ymm0,%ymm13 # C[0..0] + vprolq $1,%ymm13,%ymm8 # ROL64(C[0..0],1) + + vpxor %ymm14,%ymm0,%ymm0 # ^= D[0..0] + + vpblendd $0b11000000,%ymm8,%ymm15,%ymm15 + vpblendd $0b00000011,%ymm13,%ymm11,%ymm7 + + ######################################### Rho + Pi + pre-Chi shuffle + vpxor %ymm14,%ymm2,%ymm2 # ^= D[0..0] from Theta + vprolvq %ymm16,%ymm2,%ymm2 + + vpternlogq $0x96,%ymm7,%ymm15,%ymm3 # ^= D[1..4] from Theta + vprolvq %ymm18,%ymm3,%ymm3 + + vpternlogq $0x96,%ymm7,%ymm15,%ymm4 # ^= D[1..4] from Theta + vprolvq %ymm19,%ymm4,%ymm4 + + vpternlogq $0x96,%ymm7,%ymm15,%ymm5 # ^= D[1..4] from Theta + vprolvq %ymm20,%ymm5,%ymm5 + + vpermq $0b10001101,%ymm2,%ymm10 # %ymm2 -> future %ymm3 + vpermq $0b10001101,%ymm3,%ymm11 # %ymm3 -> future %ymm4 + vpternlogq $0x96,%ymm7,%ymm15,%ymm6 # ^= D[1..4] from Theta + vprolvq %ymm21,%ymm6,%ymm8 # %ymm6 -> future %ymm1 + + vpermq $0b00011011,%ymm4,%ymm12 # %ymm4 -> future %ymm5 + vpermq $0b01110010,%ymm5,%ymm13 # %ymm5 -> future %ymm6 + vpternlogq $0x96,%ymm7,%ymm15,%ymm1 # ^= D[1..4] from Theta + vprolvq %ymm17,%ymm1,%ymm9 # %ymm1 -> future %ymm2 + + ######################################### Chi + vpblendd $0b00001100,%ymm13,%ymm9,%ymm3 # [4][4] [2][0] + vpblendd $0b00001100,%ymm9,%ymm11,%ymm15 # [4][0] [2][1] + vpblendd $0b00001100,%ymm11,%ymm10,%ymm5 # [4][2] [2][4] + vpblendd $0b00001100,%ymm10,%ymm9,%ymm14 # [4][3] [2][0] + vpblendd $0b00110000,%ymm11,%ymm3,%ymm3 # [1][3] [4][4] [2][0] + vpblendd $0b00110000,%ymm12,%ymm15,%ymm15 # [1][4] [4][0] [2][1] + vpblendd $0b00110000,%ymm9,%ymm5,%ymm5 # [1][0] [4][2] [2][4] + vpblendd $0b00110000,%ymm13,%ymm14,%ymm14 # [1][1] [4][3] [2][0] + vpblendd $0b11000000,%ymm12,%ymm3,%ymm3 # [3][2] [1][3] [4][4] [2][0] + vpblendd $0b11000000,%ymm13,%ymm15,%ymm15 # [3][3] [1][4] [4][0] [2][1] + vpblendd $0b11000000,%ymm13,%ymm5,%ymm5 # [3][3] [1][0] [4][2] [2][4] + vpblendd $0b11000000,%ymm11,%ymm14,%ymm14 # [3][4] [1][1] [4][3] [2][0] + vpternlogq $0xC6,%ymm15,%ymm10,%ymm3 # [3][1] [1][2] [4][3] [2][4] + vpternlogq $0xC6,%ymm14,%ymm12,%ymm5 # [3][2] [1][4] [4][1] [2][3] + + vpsrldq $8,%ymm8,%ymm7 + vpandn %ymm7,%ymm8,%ymm7 # tgting [0][0] [0][0] [0][0] [0][0] + + vpblendd $0b00001100,%ymm9,%ymm12,%ymm6 # [4][0] [2][3] + vpblendd $0b00001100,%ymm12,%ymm10,%ymm15 # [4][1] [2][4] + vpblendd $0b00110000,%ymm10,%ymm6,%ymm6 # [1][2] [4][0] [2][3] + vpblendd $0b00110000,%ymm11,%ymm15,%ymm15 # [1][3] [4][1] [2][4] + vpblendd $0b11000000,%ymm11,%ymm6,%ymm6 # [3][4] [1][2] [4][0] [2][3] + vpblendd $0b11000000,%ymm9,%ymm15,%ymm15 # [3][0] [1][3] [4][1] [2][4] + vpternlogq $0xC6,%ymm15,%ymm13,%ymm6 # [3][3] [1][1] [4][4] [2][2] + + vpermq $0b00011110,%ymm8,%ymm4 # [0][1] [0][2] [0][4] [0][3] + vpblendd $0b00110000,%ymm0,%ymm4,%ymm15 # [0][1] [0][0] [0][4] [0][3] + vpermq $0b00111001,%ymm8,%ymm1 # [0][1] [0][4] [0][3] [0][2] + vpblendd $0b11000000,%ymm0,%ymm1,%ymm1 # [0][0] [0][4] [0][3] [0][2] + + vpblendd $0b00001100,%ymm12,%ymm11,%ymm2 # [4][1] [2][1] + vpblendd $0b00001100,%ymm11,%ymm13,%ymm14 # [4][2] [2][2] + vpblendd $0b00110000,%ymm13,%ymm2,%ymm2 # [1][1] [4][1] [2][1] + vpblendd $0b00110000,%ymm10,%ymm14,%ymm14 # [1][2] [4][2] [2][2] + vpblendd $0b11000000,%ymm10,%ymm2,%ymm2 # [3][1] [1][1] [4][1] [2][1] + vpblendd $0b11000000,%ymm12,%ymm14,%ymm14 # [3][2] [1][2] [4][2] [2][2] + vpternlogq $0xC6,%ymm14,%ymm9,%ymm2 # [3][0] [1][0] [4][0] [2][0] + + vpermq $0b00000000,%ymm7,%ymm7 # [0][0] [0][0] [0][0] [0][0] + vpermq $0b00011011,%ymm3,%ymm3 # post-Chi shuffle + vpermq $0b10001101,%ymm5,%ymm5 + vpermq $0b01110010,%ymm6,%ymm6 + + vpblendd $0b00001100,%ymm10,%ymm13,%ymm4 # [4][3] [2][2] + vpblendd $0b00001100,%ymm13,%ymm12,%ymm14 # [4][4] [2][3] + vpblendd $0b00110000,%ymm12,%ymm4,%ymm4 # [1][4] [4][3] [2][2] + vpblendd $0b00110000,%ymm9,%ymm14,%ymm14 # [1][0] [4][4] [2][3] + vpblendd $0b11000000,%ymm9,%ymm4,%ymm4 # [3][0] [1][4] [4][3] [2][2] + vpblendd $0b11000000,%ymm10,%ymm14,%ymm14 # [3][1] [1][0] [4][4] [2][3] + + vpternlogq $0xC6,%ymm15,%ymm8,%ymm1 # [0][4] [0][3] [0][2] [0][1] + vpternlogq $0xC6,%ymm14,%ymm11,%ymm4 # [3][4] [1][3] [4][2] [2][1] + + ######################################### Iota + vpternlogq $0x96,(%r10),%ymm7,%ymm0 + lea 32(%r10),%r10 + + dec %eax + jnz .Loop_avx512vl + + ret +.size __KeccakF1600,.-__KeccakF1600 +.globl SHA3_absorb +.type SHA3_absorb,@function +.align 32 +SHA3_absorb: + mov %rsp,%r11 + + lea -240(%rsp),%rsp + and $-32,%rsp + + lea 96(%rdi),%rdi + lea 96(%rsi),%rsi + lea 96(%rsp),%r10 + lea rhotates_left(%rip),%r8 + + vzeroupper + + vpbroadcastq -96(%rdi),%ymm0 # load A[5][5] + vmovdqu 8+32*0-96(%rdi),%ymm1 + vmovdqu 8+32*1-96(%rdi),%ymm2 + vmovdqu 8+32*2-96(%rdi),%ymm3 + vmovdqu 8+32*3-96(%rdi),%ymm4 + vmovdqu 8+32*4-96(%rdi),%ymm5 + vmovdqu 8+32*5-96(%rdi),%ymm6 + + vmovdqa64 0*32(%r8),%ymm16 # load "rhotate" indices + vmovdqa64 1*32(%r8),%ymm17 + vmovdqa64 2*32(%r8),%ymm18 + vmovdqa64 3*32(%r8),%ymm19 + vmovdqa64 4*32(%r8),%ymm20 + vmovdqa64 5*32(%r8),%ymm21 + + vpxor %ymm7,%ymm7,%ymm7 + vmovdqa %ymm7,32*2-96(%r10) # zero transfer area on stack + vmovdqa %ymm7,32*3-96(%r10) + vmovdqa %ymm7,32*4-96(%r10) + vmovdqa %ymm7,32*5-96(%r10) + vmovdqa %ymm7,32*6-96(%r10) + +.Loop_absorb_avx512vl: + mov %rcx,%rax + sub %rcx,%rdx + jc .Ldone_absorb_avx512vl + + shr $3,%eax + vpbroadcastq 0-96(%rsi),%ymm7 + vmovdqu 8-96(%rsi),%ymm8 + sub $4,%eax + dec %eax + jz .Labsorved_avx512vl + mov 8*5-96(%rsi),%r8 + mov %r8,80-96(%r10) + dec %eax + jz .Labsorved_avx512vl + mov 8*6-96(%rsi),%r8 + mov %r8,192-96(%r10) + dec %eax + jz .Labsorved_avx512vl + mov 8*7-96(%rsi),%r8 + mov %r8,104-96(%r10) + dec %eax + jz .Labsorved_avx512vl + mov 8*8-96(%rsi),%r8 + mov %r8,144-96(%r10) + dec %eax + jz .Labsorved_avx512vl + mov 8*9-96(%rsi),%r8 + mov %r8,184-96(%r10) + dec %eax + jz .Labsorved_avx512vl + mov 8*10-96(%rsi),%r8 + mov %r8,64-96(%r10) + dec %eax + jz .Labsorved_avx512vl + mov 8*11-96(%rsi),%r8 + mov %r8,128-96(%r10) + dec %eax + jz .Labsorved_avx512vl + mov 8*12-96(%rsi),%r8 + mov %r8,200-96(%r10) + dec %eax + jz .Labsorved_avx512vl + mov 8*13-96(%rsi),%r8 + mov %r8,176-96(%r10) + dec %eax + jz .Labsorved_avx512vl + mov 8*14-96(%rsi),%r8 + mov %r8,120-96(%r10) + dec %eax + jz .Labsorved_avx512vl + mov 8*15-96(%rsi),%r8 + mov %r8,88-96(%r10) + dec %eax + jz .Labsorved_avx512vl + mov 8*16-96(%rsi),%r8 + mov %r8,96-96(%r10) + dec %eax + jz .Labsorved_avx512vl + mov 8*17-96(%rsi),%r8 + mov %r8,168-96(%r10) + dec %eax + jz .Labsorved_avx512vl + mov 8*18-96(%rsi),%r8 + mov %r8,208-96(%r10) + dec %eax + jz .Labsorved_avx512vl + mov 8*19-96(%rsi),%r8 + mov %r8,152-96(%r10) + dec %eax + jz .Labsorved_avx512vl + mov 8*20-96(%rsi),%r8 + mov %r8,72-96(%r10) + dec %eax + jz .Labsorved_avx512vl + mov 8*21-96(%rsi),%r8 + mov %r8,160-96(%r10) + dec %eax + jz .Labsorved_avx512vl + mov 8*22-96(%rsi),%r8 + mov %r8,136-96(%r10) + dec %eax + jz .Labsorved_avx512vl + mov 8*23-96(%rsi),%r8 + mov %r8,112-96(%r10) + dec %eax + jz .Labsorved_avx512vl + mov 8*24-96(%rsi),%r8 + mov %r8,216-96(%r10) +.Labsorved_avx512vl: + lea (%rsi,%rcx),%rsi + + vpxor %ymm7,%ymm0,%ymm0 + vpxor %ymm8,%ymm1,%ymm1 + vpxor 32*2-96(%r10),%ymm2,%ymm2 + vpxor 32*3-96(%r10),%ymm3,%ymm3 + vpxor 32*4-96(%r10),%ymm4,%ymm4 + vpxor 32*5-96(%r10),%ymm5,%ymm5 + vpxor 32*6-96(%r10),%ymm6,%ymm6 + + call __KeccakF1600 + + lea 96(%rsp),%r10 + jmp .Loop_absorb_avx512vl + +.Ldone_absorb_avx512vl: + vmovq %xmm0,-96(%rdi) + vmovdqu %ymm1,8+32*0-96(%rdi) + vmovdqu %ymm2,8+32*1-96(%rdi) + vmovdqu %ymm3,8+32*2-96(%rdi) + vmovdqu %ymm4,8+32*3-96(%rdi) + vmovdqu %ymm5,8+32*4-96(%rdi) + vmovdqu %ymm6,8+32*5-96(%rdi) + + vzeroupper + + lea (%r11),%rsp + lea (%rdx,%rcx),%rax # return value + ret +.size SHA3_absorb,.-SHA3_absorb + +.globl SHA3_squeeze +.type SHA3_squeeze,@function +.align 32 +SHA3_squeeze: + mov %rsp,%r11 + + lea 96(%rdi),%rdi + lea rhotates_left(%rip),%r8 + shr $3,%rcx + + vzeroupper + + vpbroadcastq -96(%rdi),%ymm0 + vpxor %ymm7,%ymm7,%ymm7 + vmovdqu 8+32*0-96(%rdi),%ymm1 + vmovdqu 8+32*1-96(%rdi),%ymm2 + vmovdqu 8+32*2-96(%rdi),%ymm3 + vmovdqu 8+32*3-96(%rdi),%ymm4 + vmovdqu 8+32*4-96(%rdi),%ymm5 + vmovdqu 8+32*5-96(%rdi),%ymm6 + + vmovdqa64 0*32(%r8),%ymm16 # load "rhotate" indices + vmovdqa64 1*32(%r8),%ymm17 + vmovdqa64 2*32(%r8),%ymm18 + vmovdqa64 3*32(%r8),%ymm19 + vmovdqa64 4*32(%r8),%ymm20 + vmovdqa64 5*32(%r8),%ymm21 + + mov %rcx,%rax + +.Loop_squeeze_avx512vl: + mov 0-96(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov 32-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov 40-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov 48-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov 56-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov 80-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov 192-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov 104-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov 144-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov 184-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov 64-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov 128-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov 200-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov 176-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov 120-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov 88-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov 96-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov 168-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov 208-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov 152-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov 72-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov 160-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov 136-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov 112-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov 216-120(%rdi),%r8 + sub $8,%rdx + jc .Ltail_squeeze_avx512vl + mov %r8,(%rsi) + lea 8(%rsi),%rsi + je .Ldone_squeeze_avx512vl + dec %eax + je .Lextend_output_avx512vl + mov -120(%rdi),%r8 +.Lextend_output_avx512vl: + call __KeccakF1600 + + vmovq %xmm0,-96(%rdi) + vmovdqu %ymm1,8+32*0-96(%rdi) + vmovdqu %ymm2,8+32*1-96(%rdi) + vmovdqu %ymm3,8+32*2-96(%rdi) + vmovdqu %ymm4,8+32*3-96(%rdi) + vmovdqu %ymm5,8+32*4-96(%rdi) + vmovdqu %ymm6,8+32*5-96(%rdi) + + mov %rcx,%rax + jmp .Loop_squeeze_avx512vl + + +.Ltail_squeeze_avx512vl: + add $8,%rdx +.Loop_tail_avx512vl: + mov %r8b,(%rsi) + lea 1(%rsi),%rsi + shr $8,%r8 + dec %rdx + jnz .Loop_tail_avx512vl + +.Ldone_squeeze_avx512vl: + vzeroupper + + lea (%r11),%rsp + ret +.size SHA3_squeeze,.-SHA3_squeeze + +.section .rodata +.align 64 +rhotates_left: + .quad 3, 18, 36, 41 # [2][0] [4][0] [1][0] [3][0] + .quad 1, 62, 28, 27 # [0][1] [0][2] [0][3] [0][4] + .quad 45, 6, 56, 39 # [3][1] [1][2] [4][3] [2][4] + .quad 10, 61, 55, 8 # [2][1] [4][2] [1][3] [3][4] + .quad 2, 15, 25, 20 # [4][1] [3][2] [2][3] [1][4] + .quad 44, 43, 21, 14 # [1][1] [2][2] [3][3] [4][4] +iotas: + .quad 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001 + .quad 0x0000000000008082, 0x0000000000008082, 0x0000000000008082, 0x0000000000008082 + .quad 0x800000000000808a, 0x800000000000808a, 0x800000000000808a, 0x800000000000808a + .quad 0x8000000080008000, 0x8000000080008000, 0x8000000080008000, 0x8000000080008000 + .quad 0x000000000000808b, 0x000000000000808b, 0x000000000000808b, 0x000000000000808b + .quad 0x0000000080000001, 0x0000000080000001, 0x0000000080000001, 0x0000000080000001 + .quad 0x8000000080008081, 0x8000000080008081, 0x8000000080008081, 0x8000000080008081 + .quad 0x8000000000008009, 0x8000000000008009, 0x8000000000008009, 0x8000000000008009 + .quad 0x000000000000008a, 0x000000000000008a, 0x000000000000008a, 0x000000000000008a + .quad 0x0000000000000088, 0x0000000000000088, 0x0000000000000088, 0x0000000000000088 + .quad 0x0000000080008009, 0x0000000080008009, 0x0000000080008009, 0x0000000080008009 + .quad 0x000000008000000a, 0x000000008000000a, 0x000000008000000a, 0x000000008000000a + .quad 0x000000008000808b, 0x000000008000808b, 0x000000008000808b, 0x000000008000808b + .quad 0x800000000000008b, 0x800000000000008b, 0x800000000000008b, 0x800000000000008b + .quad 0x8000000000008089, 0x8000000000008089, 0x8000000000008089, 0x8000000000008089 + .quad 0x8000000000008003, 0x8000000000008003, 0x8000000000008003, 0x8000000000008003 + .quad 0x8000000000008002, 0x8000000000008002, 0x8000000000008002, 0x8000000000008002 + .quad 0x8000000000000080, 0x8000000000000080, 0x8000000000000080, 0x8000000000000080 + .quad 0x000000000000800a, 0x000000000000800a, 0x000000000000800a, 0x000000000000800a + .quad 0x800000008000000a, 0x800000008000000a, 0x800000008000000a, 0x800000008000000a + .quad 0x8000000080008081, 0x8000000080008081, 0x8000000080008081, 0x8000000080008081 + .quad 0x8000000000008080, 0x8000000000008080, 0x8000000000008080, 0x8000000000008080 + .quad 0x0000000080000001, 0x0000000080000001, 0x0000000080000001, 0x0000000080000001 + .quad 0x8000000080008008, 0x8000000080008008, 0x8000000080008008, 0x8000000080008008 + +.asciz "Keccak-1600 absorb and squeeze for AVX512VL, CRYPTOGAMS by " diff --git a/contrib/openssl-cmake/asm/crypto/sha/keccak1600-s390x.S b/contrib/openssl-cmake/asm/crypto/sha/keccak1600-s390x.S new file mode 100644 index 000000000000..9f254bcf300e --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/sha/keccak1600-s390x.S @@ -0,0 +1,466 @@ +.text + +.type __KeccakF1600,@function +.align 32 +__KeccakF1600: + stg %r14,8*14(%r15) + lg %r0,160(%r2) + lg %r1,168(%r2) + lg %r5,176(%r2) + lg %r6,184(%r2) + lg %r7,192(%r2) + larl %r4,iotas + j .Loop + +.align 16 +.Loop: + lg %r8,0(%r2) + lg %r9,48(%r2) + lg %r10,96(%r2) + lg %r11,144(%r2) + + xgr %r0,%r8 + xg %r1,8(%r2) + xg %r5,16(%r2) + xg %r6,24(%r2) + lgr %r12,%r7 + xg %r7,32(%r2) + + xg %r0,40(%r2) + xgr %r1,%r9 + xg %r5,56(%r2) + xg %r6,64(%r2) + xg %r7,72(%r2) + + xg %r0,80(%r2) + xg %r1,88(%r2) + xgr %r5,%r10 + xg %r6,104(%r2) + xg %r7,112(%r2) + + xg %r0,120(%r2) + xg %r1,128(%r2) + xg %r5,136(%r2) + xgr %r6,%r11 + xg %r7,152(%r2) + + lgr %r13,%r5 + rllg %r5,%r5,1 + xgr %r5,%r0 # D[1] = ROL64(C[2], 1) ^ C[0] + + rllg %r0,%r0,1 + xgr %r0,%r6 # D[4] = ROL64(C[0], 1) ^ C[3] + + rllg %r6,%r6,1 + xgr %r6,%r1 # D[2] = ROL64(C[3], 1) ^ C[1] + + rllg %r1,%r1,1 + xgr %r1,%r7 # D[0] = ROL64(C[1], 1) ^ C[4] + + rllg %r7,%r7,1 + xgr %r7,%r13 # D[3] = ROL64(C[4], 1) ^ C[2] + xgr %r9,%r5 + xgr %r10,%r6 + xgr %r11,%r7 + rllg %r9,%r9,44 + xgr %r12,%r0 + rllg %r10,%r10,43 + xgr %r8,%r1 + + lgr %r13,%r9 + ogr %r9,%r10 + rllg %r11,%r11,21 + xgr %r9,%r8 # C[0] ^ ( C[1] | C[2]) + rllg %r12,%r12,14 + xg %r9,0(%r4) + la %r4,8(%r4) + stg %r9,0(%r3) # R[0][0] = C[0] ^ ( C[1] | C[2]) ^ iotas[i] + + lgr %r14,%r12 + ngr %r12,%r11 + lghi %r9,-1 # no 'not' instruction :-( + xgr %r12,%r10 # C[2] ^ ( C[4] & C[3]) + xgr %r10,%r9 # not %r10 + stg %r12,16(%r3) # R[0][2] = C[2] ^ ( C[4] & C[3]) + ogr %r10,%r11 + xgr %r10,%r13 # C[1] ^ (~C[2] | C[3]) + + ngr %r13,%r8 + stg %r10,8(%r3) # R[0][1] = C[1] ^ (~C[2] | C[3]) + xgr %r13,%r14 # C[4] ^ ( C[1] & C[0]) + ogr %r14,%r8 + stg %r13,32(%r3) # R[0][4] = C[4] ^ ( C[1] & C[0]) + xgr %r14,%r11 # C[3] ^ ( C[4] | C[0]) + stg %r14,24(%r3) # R[0][3] = C[3] ^ ( C[4] | C[0]) + + + lg %r8,24(%r2) + lg %r12,176(%r2) + lg %r11,128(%r2) + lg %r9,72(%r2) + lg %r10,80(%r2) + + xgr %r8,%r7 + xgr %r12,%r6 + rllg %r8,%r8,28 + xgr %r11,%r5 + rllg %r12,%r12,61 + xgr %r9,%r0 + rllg %r11,%r11,45 + xgr %r10,%r1 + + lgr %r13,%r8 + ogr %r8,%r12 + rllg %r9,%r9,20 + xgr %r8,%r11 # C[3] ^ (C[0] | C[4]) + rllg %r10,%r10,3 + stg %r8,64(%r3) # R[1][3] = C[3] ^ (C[0] | C[4]) + + lgr %r14,%r9 + ngr %r9,%r13 + lghi %r8,-1 # no 'not' instruction :-( + xgr %r9,%r12 # C[4] ^ (C[1] & C[0]) + xgr %r12,%r8 # not %r12 + stg %r9,72(%r3) # R[1][4] = C[4] ^ (C[1] & C[0]) + + ogr %r12,%r11 + xgr %r12,%r10 # C[2] ^ (~C[4] | C[3]) + + ngr %r11,%r10 + stg %r12,56(%r3) # R[1][2] = C[2] ^ (~C[4] | C[3]) + xgr %r11,%r14 # C[1] ^ (C[3] & C[2]) + ogr %r14,%r10 + stg %r11,48(%r3) # R[1][1] = C[1] ^ (C[3] & C[2]) + xgr %r14,%r13 # C[0] ^ (C[1] | C[2]) + stg %r14,40(%r3) # R[1][0] = C[0] ^ (C[1] | C[2]) + + + lg %r10,104(%r2) + lg %r11,152(%r2) + lg %r9,56(%r2) + lg %r12,160(%r2) + lg %r8,8(%r2) + + xgr %r10,%r7 + xgr %r11,%r0 + rllg %r10,%r10,25 + xgr %r9,%r6 + rllg %r11,%r11,8 + xgr %r12,%r1 + rllg %r9,%r9,6 + xgr %r8,%r5 + + lgr %r13,%r10 + ngr %r10,%r11 + rllg %r12,%r12,18 + xgr %r10,%r9 # C[1] ^ ( C[2] & C[3]) + lghi %r14,-1 # no 'not' instruction :-( + stg %r10,88(%r3) # R[2][1] = C[1] ^ ( C[2] & C[3]) + + xgr %r11,%r14 # not %r11 + lgr %r14,%r12 + ngr %r12,%r11 + rllg %r8,%r8,1 + xgr %r12,%r13 # C[2] ^ ( C[4] & ~C[3]) + ogr %r13,%r9 + stg %r12,96(%r3) # R[2][2] = C[2] ^ ( C[4] & ~C[3]) + xgr %r13,%r8 # C[0] ^ ( C[2] | C[1]) + + ngr %r9,%r8 + stg %r13,80(%r3) # R[2][0] = C[0] ^ ( C[2] | C[1]) + xgr %r9,%r14 # C[4] ^ ( C[1] & C[0]) + ogr %r8,%r14 + stg %r9,112(%r3) # R[2][4] = C[4] ^ ( C[1] & C[0]) + xgr %r8,%r11 # ~C[3] ^ ( C[0] | C[4]) + stg %r8,104(%r3) # R[2][3] = ~C[3] ^ ( C[0] | C[4]) + + + lg %r10,88(%r2) + lg %r11,136(%r2) + lg %r9,40(%r2) + lg %r12,184(%r2) + lg %r8,32(%r2) + + xgr %r10,%r5 + xgr %r11,%r6 + rllg %r10,%r10,10 + xgr %r9,%r1 + rllg %r11,%r11,15 + xgr %r12,%r7 + rllg %r9,%r9,36 + xgr %r8,%r0 + rllg %r12,%r12,56 + + lgr %r13,%r10 + ogr %r10,%r11 + lghi %r14,-1 # no 'not' instruction :-( + xgr %r10,%r9 # C[1] ^ ( C[2] | C[3]) + xgr %r11,%r14 # not %r11 + stg %r10,128(%r3) # R[3][1] = C[1] ^ ( C[2] | C[3]) + + lgr %r14,%r12 + ogr %r12,%r11 + rllg %r8,%r8,27 + xgr %r12,%r13 # C[2] ^ ( C[4] | ~C[3]) + ngr %r13,%r9 + stg %r12,136(%r3) # R[3][2] = C[2] ^ ( C[4] | ~C[3]) + xgr %r13,%r8 # C[0] ^ ( C[2] & C[1]) + + ogr %r9,%r8 + stg %r13,120(%r3) # R[3][0] = C[0] ^ ( C[2] & C[1]) + xgr %r9,%r14 # C[4] ^ ( C[1] | C[0]) + ngr %r8,%r14 + stg %r9,152(%r3) # R[3][4] = C[4] ^ ( C[1] | C[0]) + xgr %r8,%r11 # ~C[3] ^ ( C[0] & C[4]) + stg %r8,144(%r3) # R[3][3] = ~C[3] ^ ( C[0] & C[4]) + + + xg %r6,16(%r2) + xg %r7,64(%r2) + xg %r5,168(%r2) + xg %r0,112(%r2) + xgr %r3,%r2 # xchg %r3,%r2 + rllg %r6,%r6,62 + xg %r1,120(%r2) + rllg %r7,%r7,55 + xgr %r2,%r3 + rllg %r5,%r5,2 + xgr %r3,%r2 + rllg %r0,%r0,39 + lgr %r13,%r6 + ngr %r6,%r7 + lghi %r14,-1 # no 'not' instruction :-( + xgr %r6,%r5 # C[4] ^ ( C[0] & C[1]) + xgr %r7,%r14 # not %r7 + stg %r6,192(%r2) # R[4][4] = C[4] ^ ( C[0] & C[1]) + + lgr %r14,%r0 + ngr %r0,%r7 + rllg %r1,%r1,41 + xgr %r0,%r13 # C[0] ^ ( C[2] & ~C[1]) + ogr %r13,%r5 + stg %r0,160(%r2) # R[4][0] = C[0] ^ ( C[2] & ~C[1]) + xgr %r13,%r1 # C[3] ^ ( C[0] | C[4]) + + ngr %r5,%r1 + stg %r13,184(%r2) # R[4][3] = C[3] ^ ( C[0] | C[4]) + xgr %r5,%r14 # C[2] ^ ( C[4] & C[3]) + ogr %r1,%r14 + stg %r5,176(%r2) # R[4][2] = C[2] ^ ( C[4] & C[3]) + xgr %r1,%r7 # ~C[1] ^ ( C[2] | C[3]) + + lgr %r7,%r6 # harmonize with the loop top + lgr %r6,%r13 + stg %r1,168(%r2) # R[4][1] = ~C[1] ^ ( C[2] | C[3]) + + tmll %r4,255 + jnz .Loop + + lg %r14,8*14(%r15) + br %r14 +.size __KeccakF1600,.-__KeccakF1600 +.type KeccakF1600,@function +.align 32 +KeccakF1600: +.LKeccakF1600: + lghi %r1,-360 + stmg %r6,%r15,8*6(%r15) + lgr %r0,%r15 + la %r15,0(%r1,%r15) + stg %r0,0(%r15) + + lghi %r8,-1 # no 'not' instruction :-( + lghi %r9,-1 + lghi %r10,-1 + lghi %r11,-1 + lghi %r12,-1 + lghi %r13,-1 + xg %r8,8(%r2) + xg %r9,16(%r2) + xg %r10,64(%r2) + xg %r11,96(%r2) + xg %r12,136(%r2) + xg %r13,160(%r2) + stmg %r8,%r9,8(%r2) + stg %r10,64(%r2) + stg %r11,96(%r2) + stg %r12,136(%r2) + stg %r13,160(%r2) + + la %r3,160(%r15) + + bras %r14,__KeccakF1600 + + lghi %r8,-1 # no 'not' instruction :-( + lghi %r9,-1 + lghi %r10,-1 + lghi %r11,-1 + lghi %r12,-1 + lghi %r13,-1 + xg %r8,8(%r2) + xg %r9,16(%r2) + xg %r10,64(%r2) + xg %r11,96(%r2) + xg %r12,136(%r2) + xg %r13,160(%r2) + stmg %r8,%r9,8(%r2) + stg %r10,64(%r2) + stg %r11,96(%r2) + stg %r12,136(%r2) + stg %r13,160(%r2) + + lmg %r6,%r15,360+6*8(%r15) + br %r14 +.size KeccakF1600,.-KeccakF1600 +.globl SHA3_absorb +.type SHA3_absorb,@function +.align 32 +SHA3_absorb: + lghi %r1,-360 + stmg %r5,%r15,8*5(%r15) + lgr %r0,%r15 + la %r15,0(%r1,%r15) + stg %r0,0(%r15) + + lghi %r8,-1 # no 'not' instruction :-( + lghi %r9,-1 + lghi %r10,-1 + lghi %r11,-1 + lghi %r12,-1 + lghi %r13,-1 + xg %r8,8(%r2) + xg %r9,16(%r2) + xg %r10,64(%r2) + xg %r11,96(%r2) + xg %r12,136(%r2) + xg %r13,160(%r2) + stmg %r8,%r9,8(%r2) + stg %r10,64(%r2) + stg %r11,96(%r2) + stg %r12,136(%r2) + stg %r13,160(%r2) + +.Loop_absorb: + clgr %r4,%r5 + jl .Ldone_absorb + + srlg %r5,%r5,3 + la %r1,0(%r2) + +.Lblock_absorb: + lrvg %r0,0(%r3) + la %r3,8(%r3) + xg %r0,0(%r1) + aghi %r4,-8 + stg %r0,0(%r1) + la %r1,8(%r1) + brct %r5,.Lblock_absorb + + stmg %r3,%r4,360+3*8(%r15) + la %r3,160(%r15) + bras %r14,__KeccakF1600 + lmg %r3,%r5,360+3*8(%r15) + j .Loop_absorb + +.align 16 +.Ldone_absorb: + lghi %r8,-1 # no 'not' instruction :-( + lghi %r9,-1 + lghi %r10,-1 + lghi %r11,-1 + lghi %r12,-1 + lghi %r13,-1 + xg %r8,8(%r2) + xg %r9,16(%r2) + xg %r10,64(%r2) + xg %r11,96(%r2) + xg %r12,136(%r2) + xg %r13,160(%r2) + stmg %r8,%r9,8(%r2) + stg %r10,64(%r2) + stg %r11,96(%r2) + stg %r12,136(%r2) + stg %r13,160(%r2) + + lgr %r2,%r4 # return value + + lmg %r6,%r15,360+6*8(%r15) + br %r14 +.size SHA3_absorb,.-SHA3_absorb +.globl SHA3_squeeze +.type SHA3_squeeze,@function +.align 32 +SHA3_squeeze: + srlg %r5,%r5,3 + stg %r14,2*8(%r15) + lghi %r14,8 + stg %r5,5*8(%r15) + la %r1,0(%r2) + cijne %r6,0,.Lnext_block + + j .Loop_squeeze + +.align 16 +.Loop_squeeze: + clgr %r4,%r14 + jl .Ltail_squeeze + + lrvg %r0,0(%r1) + la %r1,8(%r1) + stg %r0,0(%r3) + la %r3,8(%r3) + aghi %r4,-8 # len -= 8 + jz .Ldone_squeeze + + brct %r5,.Loop_squeeze # bsz-- + +.Lnext_block: + stmg %r3,%r4,3*8(%r15) + bras %r14,.LKeccakF1600 + lmg %r3,%r5,3*8(%r15) + lghi %r14,8 + la %r1,0(%r2) + j .Loop_squeeze + +.Ltail_squeeze: + lg %r0,0(%r1) +.Loop_tail_squeeze: + stc %r0,0(%r3) + la %r3,1(%r3) + srlg %r0,%r0,8 + brct %r4,.Loop_tail_squeeze + +.Ldone_squeeze: + lg %r14,2*8(%r15) + br %r14 +.size SHA3_squeeze,.-SHA3_squeeze +.align 256 + .quad 0,0,0,0,0,0,0,0 +.type iotas,@object +iotas: + .quad 0x0000000000000001 + .quad 0x0000000000008082 + .quad 0x800000000000808a + .quad 0x8000000080008000 + .quad 0x000000000000808b + .quad 0x0000000080000001 + .quad 0x8000000080008081 + .quad 0x8000000000008009 + .quad 0x000000000000008a + .quad 0x0000000000000088 + .quad 0x0000000080008009 + .quad 0x000000008000000a + .quad 0x000000008000808b + .quad 0x800000000000008b + .quad 0x8000000000008089 + .quad 0x8000000000008003 + .quad 0x8000000000008002 + .quad 0x8000000000000080 + .quad 0x000000000000800a + .quad 0x800000008000000a + .quad 0x8000000080008081 + .quad 0x8000000000008080 + .quad 0x0000000080000001 + .quad 0x8000000080008008 +.size iotas,.-iotas +.asciz "Keccak-1600 absorb and squeeze for s390x, CRYPTOGAMS by " diff --git a/contrib/openssl-cmake/asm/crypto/sha/keccak1600-x86_64.s b/contrib/openssl-cmake/asm/crypto/sha/keccak1600-x86_64.s new file mode 100644 index 000000000000..0ee5e604b71e --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/sha/keccak1600-x86_64.s @@ -0,0 +1,527 @@ +.text + +.type __KeccakF1600,@function +.align 32 +__KeccakF1600: +.cfi_startproc + movq 60(%rdi),%rax + movq 68(%rdi),%rbx + movq 76(%rdi),%rcx + movq 84(%rdi),%rdx + movq 92(%rdi),%rbp + jmp .Loop + +.align 32 +.Loop: + movq -100(%rdi),%r8 + movq -52(%rdi),%r9 + movq -4(%rdi),%r10 + movq 44(%rdi),%r11 + + xorq -84(%rdi),%rcx + xorq -76(%rdi),%rdx + xorq %r8,%rax + xorq -92(%rdi),%rbx + xorq -44(%rdi),%rcx + xorq -60(%rdi),%rax + movq %rbp,%r12 + xorq -68(%rdi),%rbp + + xorq %r10,%rcx + xorq -20(%rdi),%rax + xorq -36(%rdi),%rdx + xorq %r9,%rbx + xorq -28(%rdi),%rbp + + xorq 36(%rdi),%rcx + xorq 20(%rdi),%rax + xorq 4(%rdi),%rdx + xorq -12(%rdi),%rbx + xorq 12(%rdi),%rbp + + movq %rcx,%r13 + rolq $1,%rcx + xorq %rax,%rcx + xorq %r11,%rdx + + rolq $1,%rax + xorq %rdx,%rax + xorq 28(%rdi),%rbx + + rolq $1,%rdx + xorq %rbx,%rdx + xorq 52(%rdi),%rbp + + rolq $1,%rbx + xorq %rbp,%rbx + + rolq $1,%rbp + xorq %r13,%rbp + xorq %rcx,%r9 + xorq %rdx,%r10 + rolq $44,%r9 + xorq %rbp,%r11 + xorq %rax,%r12 + rolq $43,%r10 + xorq %rbx,%r8 + movq %r9,%r13 + rolq $21,%r11 + orq %r10,%r9 + xorq %r8,%r9 + rolq $14,%r12 + + xorq (%r15),%r9 + leaq 8(%r15),%r15 + + movq %r12,%r14 + andq %r11,%r12 + movq %r9,-100(%rsi) + xorq %r10,%r12 + notq %r10 + movq %r12,-84(%rsi) + + orq %r11,%r10 + movq 76(%rdi),%r12 + xorq %r13,%r10 + movq %r10,-92(%rsi) + + andq %r8,%r13 + movq -28(%rdi),%r9 + xorq %r14,%r13 + movq -20(%rdi),%r10 + movq %r13,-68(%rsi) + + orq %r8,%r14 + movq -76(%rdi),%r8 + xorq %r11,%r14 + movq 28(%rdi),%r11 + movq %r14,-76(%rsi) + + + xorq %rbp,%r8 + xorq %rdx,%r12 + rolq $28,%r8 + xorq %rcx,%r11 + xorq %rax,%r9 + rolq $61,%r12 + rolq $45,%r11 + xorq %rbx,%r10 + rolq $20,%r9 + movq %r8,%r13 + orq %r12,%r8 + rolq $3,%r10 + + xorq %r11,%r8 + movq %r8,-36(%rsi) + + movq %r9,%r14 + andq %r13,%r9 + movq -92(%rdi),%r8 + xorq %r12,%r9 + notq %r12 + movq %r9,-28(%rsi) + + orq %r11,%r12 + movq -44(%rdi),%r9 + xorq %r10,%r12 + movq %r12,-44(%rsi) + + andq %r10,%r11 + movq 60(%rdi),%r12 + xorq %r14,%r11 + movq %r11,-52(%rsi) + + orq %r10,%r14 + movq 4(%rdi),%r10 + xorq %r13,%r14 + movq 52(%rdi),%r11 + movq %r14,-60(%rsi) + + + xorq %rbp,%r10 + xorq %rax,%r11 + rolq $25,%r10 + xorq %rdx,%r9 + rolq $8,%r11 + xorq %rbx,%r12 + rolq $6,%r9 + xorq %rcx,%r8 + rolq $18,%r12 + movq %r10,%r13 + andq %r11,%r10 + rolq $1,%r8 + + notq %r11 + xorq %r9,%r10 + movq %r10,-12(%rsi) + + movq %r12,%r14 + andq %r11,%r12 + movq -12(%rdi),%r10 + xorq %r13,%r12 + movq %r12,-4(%rsi) + + orq %r9,%r13 + movq 84(%rdi),%r12 + xorq %r8,%r13 + movq %r13,-20(%rsi) + + andq %r8,%r9 + xorq %r14,%r9 + movq %r9,12(%rsi) + + orq %r8,%r14 + movq -60(%rdi),%r9 + xorq %r11,%r14 + movq 36(%rdi),%r11 + movq %r14,4(%rsi) + + + movq -68(%rdi),%r8 + + xorq %rcx,%r10 + xorq %rdx,%r11 + rolq $10,%r10 + xorq %rbx,%r9 + rolq $15,%r11 + xorq %rbp,%r12 + rolq $36,%r9 + xorq %rax,%r8 + rolq $56,%r12 + movq %r10,%r13 + orq %r11,%r10 + rolq $27,%r8 + + notq %r11 + xorq %r9,%r10 + movq %r10,28(%rsi) + + movq %r12,%r14 + orq %r11,%r12 + xorq %r13,%r12 + movq %r12,36(%rsi) + + andq %r9,%r13 + xorq %r8,%r13 + movq %r13,20(%rsi) + + orq %r8,%r9 + xorq %r14,%r9 + movq %r9,52(%rsi) + + andq %r14,%r8 + xorq %r11,%r8 + movq %r8,44(%rsi) + + + xorq -84(%rdi),%rdx + xorq -36(%rdi),%rbp + rolq $62,%rdx + xorq 68(%rdi),%rcx + rolq $55,%rbp + xorq 12(%rdi),%rax + rolq $2,%rcx + xorq 20(%rdi),%rbx + xchgq %rsi,%rdi + rolq $39,%rax + rolq $41,%rbx + movq %rdx,%r13 + andq %rbp,%rdx + notq %rbp + xorq %rcx,%rdx + movq %rdx,92(%rdi) + + movq %rax,%r14 + andq %rbp,%rax + xorq %r13,%rax + movq %rax,60(%rdi) + + orq %rcx,%r13 + xorq %rbx,%r13 + movq %r13,84(%rdi) + + andq %rbx,%rcx + xorq %r14,%rcx + movq %rcx,76(%rdi) + + orq %r14,%rbx + xorq %rbp,%rbx + movq %rbx,68(%rdi) + + movq %rdx,%rbp + movq %r13,%rdx + + testq $255,%r15 + jnz .Loop + + leaq -192(%r15),%r15 + .byte 0xf3,0xc3 +.cfi_endproc +.size __KeccakF1600,.-__KeccakF1600 + +.type KeccakF1600,@function +.align 32 +KeccakF1600: +.cfi_startproc + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + leaq 100(%rdi),%rdi + subq $200,%rsp +.cfi_adjust_cfa_offset 200 + + notq -92(%rdi) + notq -84(%rdi) + notq -36(%rdi) + notq -4(%rdi) + notq 36(%rdi) + notq 60(%rdi) + + leaq iotas(%rip),%r15 + leaq 100(%rsp),%rsi + + call __KeccakF1600 + + notq -92(%rdi) + notq -84(%rdi) + notq -36(%rdi) + notq -4(%rdi) + notq 36(%rdi) + notq 60(%rdi) + leaq -100(%rdi),%rdi + + addq $200,%rsp +.cfi_adjust_cfa_offset -200 + + popq %r15 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r15 + popq %r14 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r14 + popq %r13 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r13 + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + popq %rbp +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbp + popq %rbx +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbx + .byte 0xf3,0xc3 +.cfi_endproc +.size KeccakF1600,.-KeccakF1600 +.globl SHA3_absorb +.type SHA3_absorb,@function +.align 32 +SHA3_absorb: +.cfi_startproc + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r15,-56 + + leaq 100(%rdi),%rdi + subq $232,%rsp +.cfi_adjust_cfa_offset 232 + + movq %rsi,%r9 + leaq 100(%rsp),%rsi + + notq -92(%rdi) + notq -84(%rdi) + notq -36(%rdi) + notq -4(%rdi) + notq 36(%rdi) + notq 60(%rdi) + leaq iotas(%rip),%r15 + + movq %rcx,216-100(%rsi) + +.Loop_absorb: + cmpq %rcx,%rdx + jc .Ldone_absorb + + shrq $3,%rcx + leaq -100(%rdi),%r8 + +.Lblock_absorb: + movq (%r9),%rax + leaq 8(%r9),%r9 + xorq (%r8),%rax + leaq 8(%r8),%r8 + subq $8,%rdx + movq %rax,-8(%r8) + subq $1,%rcx + jnz .Lblock_absorb + + movq %r9,200-100(%rsi) + movq %rdx,208-100(%rsi) + call __KeccakF1600 + movq 200-100(%rsi),%r9 + movq 208-100(%rsi),%rdx + movq 216-100(%rsi),%rcx + jmp .Loop_absorb + +.align 32 +.Ldone_absorb: + movq %rdx,%rax + + notq -92(%rdi) + notq -84(%rdi) + notq -36(%rdi) + notq -4(%rdi) + notq 36(%rdi) + notq 60(%rdi) + + addq $232,%rsp +.cfi_adjust_cfa_offset -232 + + popq %r15 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r15 + popq %r14 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r14 + popq %r13 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r13 + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r12 + popq %rbp +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbp + popq %rbx +.cfi_adjust_cfa_offset -8 +.cfi_restore %rbx + .byte 0xf3,0xc3 +.cfi_endproc +.size SHA3_absorb,.-SHA3_absorb +.globl SHA3_squeeze +.type SHA3_squeeze,@function +.align 32 +SHA3_squeeze: +.cfi_startproc + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r12,-16 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r13,-24 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset %r14,-32 + + shrq $3,%rcx + movq %rdi,%r9 + movq %rsi,%r12 + movq %rdx,%r13 + movq %rcx,%r14 + btl $0,%r8d + jc .Lnext_block + jmp .Loop_squeeze + +.align 32 +.Loop_squeeze: + cmpq $8,%r13 + jb .Ltail_squeeze + + movq (%r9),%rax + leaq 8(%r9),%r9 + movq %rax,(%r12) + leaq 8(%r12),%r12 + subq $8,%r13 + jz .Ldone_squeeze + + subq $1,%rcx + jnz .Loop_squeeze +.Lnext_block: + call KeccakF1600 + movq %rdi,%r9 + movq %r14,%rcx + jmp .Loop_squeeze + +.Ltail_squeeze: + movq %r9,%rsi + movq %r12,%rdi + movq %r13,%rcx +.byte 0xf3,0xa4 + +.Ldone_squeeze: + popq %r14 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r14 + popq %r13 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r13 + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_restore %r13 + .byte 0xf3,0xc3 +.cfi_endproc +.size SHA3_squeeze,.-SHA3_squeeze +.section .rodata +.align 256 +.quad 0,0,0,0,0,0,0,0 +.type iotas,@object +iotas: +.quad 0x0000000000000001 +.quad 0x0000000000008082 +.quad 0x800000000000808a +.quad 0x8000000080008000 +.quad 0x000000000000808b +.quad 0x0000000080000001 +.quad 0x8000000080008081 +.quad 0x8000000000008009 +.quad 0x000000000000008a +.quad 0x0000000000000088 +.quad 0x0000000080008009 +.quad 0x000000008000000a +.quad 0x000000008000808b +.quad 0x800000000000008b +.quad 0x8000000000008089 +.quad 0x8000000000008003 +.quad 0x8000000000008002 +.quad 0x8000000000000080 +.quad 0x000000000000800a +.quad 0x800000008000000a +.quad 0x8000000080008081 +.quad 0x8000000000008080 +.quad 0x0000000080000001 +.quad 0x8000000080008008 +.size iotas,.-iotas +.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 diff --git a/contrib/openssl-cmake/asm/crypto/sha/sha1-armv8.S b/contrib/openssl-cmake/asm/crypto/sha/sha1-armv8.S new file mode 100644 index 000000000000..365bfd164a1c --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/sha/sha1-armv8.S @@ -0,0 +1,1218 @@ +#include "arm_arch.h" +#ifndef __KERNEL__ + +.hidden OPENSSL_armcap_P +#endif + +.text + +.globl sha1_block_data_order +.type sha1_block_data_order,%function +.align 6 +sha1_block_data_order: + AARCH64_VALID_CALL_TARGET + adrp x16,OPENSSL_armcap_P + ldr w16,[x16,#:lo12:OPENSSL_armcap_P] + tst w16,#ARMV8_SHA1 + b.ne .Lv8_entry + + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + ldp w20,w21,[x0] + ldp w22,w23,[x0,#8] + ldr w24,[x0,#16] + +.Loop: + ldr x3,[x1],#64 + movz w28,#0x7999 + sub x2,x2,#1 + movk w28,#0x5a82,lsl#16 +#ifdef __AARCH64EB__ + ror x3,x3,#32 +#else + rev32 x3,x3 +#endif + add w24,w24,w28 // warm it up + add w24,w24,w3 + lsr x4,x3,#32 + ldur x5,[x1,#-56] + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + add w23,w23,w4 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x5,x5,#32 +#else + rev32 x5,x5 +#endif + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + add w22,w22,w5 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + lsr x6,x5,#32 + ldur x7,[x1,#-48] + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + add w21,w21,w6 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x7,x7,#32 +#else + rev32 x7,x7 +#endif + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w7 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + lsr x8,x7,#32 + ldur x9,[x1,#-40] + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + add w24,w24,w8 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x9,x9,#32 +#else + rev32 x9,x9 +#endif + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + add w23,w23,w9 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + lsr x10,x9,#32 + ldur x11,[x1,#-32] + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + add w22,w22,w10 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x11,x11,#32 +#else + rev32 x11,x11 +#endif + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + add w21,w21,w11 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + lsr x12,x11,#32 + ldur x13,[x1,#-24] + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w12 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x13,x13,#32 +#else + rev32 x13,x13 +#endif + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + add w24,w24,w13 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + lsr x14,x13,#32 + ldur x15,[x1,#-16] + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + add w23,w23,w14 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x15,x15,#32 +#else + rev32 x15,x15 +#endif + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + add w22,w22,w15 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + lsr x16,x15,#32 + ldur x17,[x1,#-8] + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + add w21,w21,w16 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) +#ifdef __AARCH64EB__ + ror x17,x17,#32 +#else + rev32 x17,x17 +#endif + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w17 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + lsr x19,x17,#32 + eor w3,w3,w5 + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + eor w3,w3,w11 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + eor w3,w3,w16 + ror w22,w22,#2 + add w24,w24,w19 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w3,w3,#31 + eor w4,w4,w6 + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + eor w4,w4,w12 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + eor w4,w4,w17 + ror w21,w21,#2 + add w23,w23,w3 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w4,w4,#31 + eor w5,w5,w7 + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + eor w5,w5,w13 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + eor w5,w5,w19 + ror w20,w20,#2 + add w22,w22,w4 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w5,w5,#31 + eor w6,w6,w8 + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + eor w6,w6,w14 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + eor w6,w6,w3 + ror w24,w24,#2 + add w21,w21,w5 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w6,w6,#31 + eor w7,w7,w9 + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + eor w7,w7,w15 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + eor w7,w7,w4 + ror w23,w23,#2 + add w20,w20,w6 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w7,w7,#31 + movz w28,#0xeba1 + movk w28,#0x6ed9,lsl#16 + eor w8,w8,w10 + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + eor w8,w8,w16 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + eor w8,w8,w5 + ror w22,w22,#2 + add w24,w24,w7 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w8,w8,#31 + eor w9,w9,w11 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w9,w9,w17 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w9,w9,w6 + add w23,w23,w8 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w9,w9,#31 + eor w10,w10,w12 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w10,w10,w19 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w10,w10,w7 + add w22,w22,w9 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w10,w10,#31 + eor w11,w11,w13 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w11,w11,w3 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w11,w11,w8 + add w21,w21,w10 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w11,w11,#31 + eor w12,w12,w14 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w12,w12,w4 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w12,w12,w9 + add w20,w20,w11 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w12,w12,#31 + eor w13,w13,w15 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w13,w13,w5 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w13,w13,w10 + add w24,w24,w12 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w13,w13,#31 + eor w14,w14,w16 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w14,w14,w6 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w14,w14,w11 + add w23,w23,w13 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w14,w14,#31 + eor w15,w15,w17 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w15,w15,w7 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w15,w15,w12 + add w22,w22,w14 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w15,w15,#31 + eor w16,w16,w19 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w16,w16,w8 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w16,w16,w13 + add w21,w21,w15 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w16,w16,#31 + eor w17,w17,w3 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w17,w17,w9 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w17,w17,w14 + add w20,w20,w16 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w17,w17,#31 + eor w19,w19,w4 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w19,w19,w10 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w19,w19,w15 + add w24,w24,w17 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w19,w19,#31 + eor w3,w3,w5 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w3,w3,w11 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w3,w3,w16 + add w23,w23,w19 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w3,w3,#31 + eor w4,w4,w6 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w4,w4,w12 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w4,w4,w17 + add w22,w22,w3 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w4,w4,#31 + eor w5,w5,w7 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w5,w5,w13 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w5,w5,w19 + add w21,w21,w4 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w5,w5,#31 + eor w6,w6,w8 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w6,w6,w14 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w6,w6,w3 + add w20,w20,w5 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w6,w6,#31 + eor w7,w7,w9 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w7,w7,w15 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w7,w7,w4 + add w24,w24,w6 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w7,w7,#31 + eor w8,w8,w10 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w8,w8,w16 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w8,w8,w5 + add w23,w23,w7 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w8,w8,#31 + eor w9,w9,w11 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w9,w9,w17 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w9,w9,w6 + add w22,w22,w8 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w9,w9,#31 + eor w10,w10,w12 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w10,w10,w19 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w10,w10,w7 + add w21,w21,w9 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w10,w10,#31 + eor w11,w11,w13 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w11,w11,w3 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w11,w11,w8 + add w20,w20,w10 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w11,w11,#31 + movz w28,#0xbcdc + movk w28,#0x8f1b,lsl#16 + eor w12,w12,w14 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w12,w12,w4 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w12,w12,w9 + add w24,w24,w11 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w12,w12,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w13,w13,w15 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w13,w13,w5 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w13,w13,w10 + add w23,w23,w12 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w13,w13,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w14,w14,w16 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w14,w14,w6 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w14,w14,w11 + add w22,w22,w13 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w14,w14,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w15,w15,w17 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w15,w15,w7 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w15,w15,w12 + add w21,w21,w14 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w15,w15,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w16,w16,w19 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w16,w16,w8 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w16,w16,w13 + add w20,w20,w15 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w16,w16,#31 + orr w25,w22,w23 + and w26,w22,w23 + eor w17,w17,w3 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w17,w17,w9 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w17,w17,w14 + add w24,w24,w16 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w17,w17,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w19,w19,w4 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w19,w19,w10 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w19,w19,w15 + add w23,w23,w17 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w19,w19,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w3,w3,w5 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w3,w3,w11 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w3,w3,w16 + add w22,w22,w19 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w3,w3,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w4,w4,w6 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w4,w4,w12 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w4,w4,w17 + add w21,w21,w3 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w4,w4,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w5,w5,w7 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w5,w5,w13 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w5,w5,w19 + add w20,w20,w4 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w5,w5,#31 + orr w25,w22,w23 + and w26,w22,w23 + eor w6,w6,w8 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w6,w6,w14 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w6,w6,w3 + add w24,w24,w5 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w6,w6,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w7,w7,w9 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w7,w7,w15 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w7,w7,w4 + add w23,w23,w6 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w7,w7,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w8,w8,w10 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w8,w8,w16 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w8,w8,w5 + add w22,w22,w7 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w8,w8,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w9,w9,w11 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w9,w9,w17 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w9,w9,w6 + add w21,w21,w8 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w9,w9,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w10,w10,w12 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w10,w10,w19 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w10,w10,w7 + add w20,w20,w9 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w10,w10,#31 + orr w25,w22,w23 + and w26,w22,w23 + eor w11,w11,w13 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w11,w11,w3 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w11,w11,w8 + add w24,w24,w10 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w11,w11,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w12,w12,w14 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w12,w12,w4 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w12,w12,w9 + add w23,w23,w11 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w12,w12,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w13,w13,w15 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w13,w13,w5 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w13,w13,w10 + add w22,w22,w12 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w13,w13,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w14,w14,w16 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w14,w14,w6 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w14,w14,w11 + add w21,w21,w13 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w14,w14,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w15,w15,w17 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w15,w15,w7 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w15,w15,w12 + add w20,w20,w14 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w15,w15,#31 + movz w28,#0xc1d6 + movk w28,#0xca62,lsl#16 + orr w25,w22,w23 + and w26,w22,w23 + eor w16,w16,w19 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w16,w16,w8 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w16,w16,w13 + add w24,w24,w15 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w16,w16,#31 + eor w17,w17,w3 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w17,w17,w9 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w17,w17,w14 + add w23,w23,w16 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w17,w17,#31 + eor w19,w19,w4 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w19,w19,w10 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w19,w19,w15 + add w22,w22,w17 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w19,w19,#31 + eor w3,w3,w5 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w3,w3,w11 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w3,w3,w16 + add w21,w21,w19 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w3,w3,#31 + eor w4,w4,w6 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w4,w4,w12 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w4,w4,w17 + add w20,w20,w3 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w4,w4,#31 + eor w5,w5,w7 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w5,w5,w13 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w5,w5,w19 + add w24,w24,w4 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w5,w5,#31 + eor w6,w6,w8 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w6,w6,w14 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w6,w6,w3 + add w23,w23,w5 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w6,w6,#31 + eor w7,w7,w9 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w7,w7,w15 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w7,w7,w4 + add w22,w22,w6 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w7,w7,#31 + eor w8,w8,w10 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w8,w8,w16 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w8,w8,w5 + add w21,w21,w7 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w8,w8,#31 + eor w9,w9,w11 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w9,w9,w17 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w9,w9,w6 + add w20,w20,w8 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w9,w9,#31 + eor w10,w10,w12 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w10,w10,w19 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w10,w10,w7 + add w24,w24,w9 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w10,w10,#31 + eor w11,w11,w13 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w11,w11,w3 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w11,w11,w8 + add w23,w23,w10 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w11,w11,#31 + eor w12,w12,w14 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w12,w12,w4 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w12,w12,w9 + add w22,w22,w11 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w12,w12,#31 + eor w13,w13,w15 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w13,w13,w5 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w13,w13,w10 + add w21,w21,w12 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w13,w13,#31 + eor w14,w14,w16 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w14,w14,w6 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w14,w14,w11 + add w20,w20,w13 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w14,w14,#31 + eor w15,w15,w17 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w15,w15,w7 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w15,w15,w12 + add w24,w24,w14 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w15,w15,#31 + eor w16,w16,w19 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w16,w16,w8 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w16,w16,w13 + add w23,w23,w15 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w16,w16,#31 + eor w17,w17,w3 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w17,w17,w9 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w17,w17,w14 + add w22,w22,w16 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w17,w17,#31 + eor w19,w19,w4 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w19,w19,w10 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w19,w19,w15 + add w21,w21,w17 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w19,w19,#31 + ldp w4,w5,[x0] + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w19 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ldp w6,w7,[x0,#8] + eor w25,w24,w22 + ror w27,w21,#27 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + ldr w8,[x0,#16] + add w20,w20,w25 // e+=F(b,c,d) + add w21,w21,w5 + add w22,w22,w6 + add w20,w20,w4 + add w23,w23,w7 + add w24,w24,w8 + stp w20,w21,[x0] + stp w22,w23,[x0,#8] + str w24,[x0,#16] + cbnz x2,.Loop + + ldp x19,x20,[sp,#16] + ldp x21,x22,[sp,#32] + ldp x23,x24,[sp,#48] + ldp x25,x26,[sp,#64] + ldp x27,x28,[sp,#80] + ldr x29,[sp],#96 + ret +.size sha1_block_data_order,.-sha1_block_data_order +.type sha1_block_armv8,%function +.align 6 +sha1_block_armv8: +.Lv8_entry: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + adrp x4,.Lconst + add x4,x4,#:lo12:.Lconst + eor v1.16b,v1.16b,v1.16b + ld1 {v0.4s},[x0],#16 + ld1 {v1.s}[0],[x0] + sub x0,x0,#16 + ld1 {v16.4s,v17.4s,v18.4s,v19.4s},[x4] + +.Loop_hw: + ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 + sub x2,x2,#1 + rev32 v4.16b,v4.16b + rev32 v5.16b,v5.16b + + add v20.4s,v16.4s,v4.4s + rev32 v6.16b,v6.16b + orr v22.16b,v0.16b,v0.16b // offload + + add v21.4s,v16.4s,v5.4s + rev32 v7.16b,v7.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b +.inst 0x5e140020 //sha1c v0.16b,v1.16b,v20.4s // 0 + add v20.4s,v16.4s,v6.4s +.inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 1 +.inst 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s + add v21.4s,v16.4s,v7.4s +.inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b +.inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 2 +.inst 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s + add v20.4s,v16.4s,v4.4s +.inst 0x5e281885 //sha1su1 v5.16b,v4.16b +.inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 3 +.inst 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s + add v21.4s,v17.4s,v5.4s +.inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b +.inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 4 +.inst 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s + add v20.4s,v17.4s,v6.4s +.inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b +.inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 5 +.inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v17.4s,v7.4s +.inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b +.inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 6 +.inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + add v20.4s,v17.4s,v4.4s +.inst 0x5e281885 //sha1su1 v5.16b,v4.16b +.inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 7 +.inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v17.4s,v5.4s +.inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b +.inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 8 +.inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + add v20.4s,v18.4s,v6.4s +.inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b +.inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 9 +.inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v18.4s,v7.4s +.inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b +.inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 10 +.inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s + add v20.4s,v18.4s,v4.4s +.inst 0x5e281885 //sha1su1 v5.16b,v4.16b +.inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 11 +.inst 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s + add v21.4s,v18.4s,v5.4s +.inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b +.inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 12 +.inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s + add v20.4s,v18.4s,v6.4s +.inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b +.inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 13 +.inst 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s + add v21.4s,v19.4s,v7.4s +.inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b +.inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 14 +.inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s + add v20.4s,v19.4s,v4.4s +.inst 0x5e281885 //sha1su1 v5.16b,v4.16b +.inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 15 +.inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v19.4s,v5.4s +.inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b +.inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 16 +.inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + add v20.4s,v19.4s,v6.4s +.inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 17 +.inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v19.4s,v7.4s + +.inst 0x5e280803 //sha1h v3.16b,v0.16b // 18 +.inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + +.inst 0x5e280802 //sha1h v2.16b,v0.16b // 19 +.inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + + add v1.4s,v1.4s,v2.4s + add v0.4s,v0.4s,v22.4s + + cbnz x2,.Loop_hw + + st1 {v0.4s},[x0],#16 + st1 {v1.s}[0],[x0] + + ldr x29,[sp],#16 + ret +.size sha1_block_armv8,.-sha1_block_armv8 + +.section .rodata + +.align 6 +.Lconst: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 //K_20_39 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79 +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 diff --git a/contrib/openssl-cmake/asm/crypto/sha/sha1-mb-x86_64.s b/contrib/openssl-cmake/asm/crypto/sha/sha1-mb-x86_64.s new file mode 100644 index 000000000000..24d9d9375657 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/sha/sha1-mb-x86_64.s @@ -0,0 +1,7304 @@ +.text + + + +.globl sha1_multi_block +.type sha1_multi_block,@function +.align 32 +sha1_multi_block: +.cfi_startproc + movq OPENSSL_ia32cap_P+4(%rip),%rcx + btq $61,%rcx + jc _shaext_shortcut + testl $268435456,%ecx + jnz _avx_shortcut + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbx,-24 + subq $288,%rsp + andq $-256,%rsp + movq %rax,272(%rsp) +.cfi_escape 0x0f,0x06,0x77,0x90,0x02,0x06,0x23,0x08 +.Lbody: + leaq K_XX_XX(%rip),%rbp + leaq 256(%rsp),%rbx + +.Loop_grande: + movl %edx,280(%rsp) + xorl %edx,%edx + + movq 0(%rsi),%r8 + + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r8 + + movq 16(%rsi),%r9 + + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r9 + + movq 32(%rsi),%r10 + + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r10 + + movq 48(%rsi),%r11 + + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r11 + testl %edx,%edx + jz .Ldone + + movdqu 0(%rdi),%xmm10 + leaq 128(%rsp),%rax + movdqu 32(%rdi),%xmm11 + movdqu 64(%rdi),%xmm12 + movdqu 96(%rdi),%xmm13 + movdqu 128(%rdi),%xmm14 + movdqa 96(%rbp),%xmm5 + movdqa -32(%rbp),%xmm15 + jmp .Loop + +.align 32 +.Loop: + movd (%r8),%xmm0 + leaq 64(%r8),%r8 + movd (%r9),%xmm2 + leaq 64(%r9),%r9 + movd (%r10),%xmm3 + leaq 64(%r10),%r10 + movd (%r11),%xmm4 + leaq 64(%r11),%r11 + punpckldq %xmm3,%xmm0 + movd -60(%r8),%xmm1 + punpckldq %xmm4,%xmm2 + movd -60(%r9),%xmm9 + punpckldq %xmm2,%xmm0 + movd -60(%r10),%xmm8 +.byte 102,15,56,0,197 + movd -60(%r11),%xmm7 + punpckldq %xmm8,%xmm1 + movdqa %xmm10,%xmm8 + paddd %xmm15,%xmm14 + punpckldq %xmm7,%xmm9 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm6 + pslld $5,%xmm8 + pandn %xmm13,%xmm7 + pand %xmm12,%xmm6 + punpckldq %xmm9,%xmm1 + movdqa %xmm10,%xmm9 + + movdqa %xmm0,0-128(%rax) + paddd %xmm0,%xmm14 + movd -56(%r8),%xmm2 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm11,%xmm7 + + por %xmm9,%xmm8 + movd -56(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 +.byte 102,15,56,0,205 + movd -56(%r10),%xmm8 + por %xmm7,%xmm11 + movd -56(%r11),%xmm7 + punpckldq %xmm8,%xmm2 + movdqa %xmm14,%xmm8 + paddd %xmm15,%xmm13 + punpckldq %xmm7,%xmm9 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm6 + pslld $5,%xmm8 + pandn %xmm12,%xmm7 + pand %xmm11,%xmm6 + punpckldq %xmm9,%xmm2 + movdqa %xmm14,%xmm9 + + movdqa %xmm1,16-128(%rax) + paddd %xmm1,%xmm13 + movd -52(%r8),%xmm3 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm10,%xmm7 + + por %xmm9,%xmm8 + movd -52(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 +.byte 102,15,56,0,213 + movd -52(%r10),%xmm8 + por %xmm7,%xmm10 + movd -52(%r11),%xmm7 + punpckldq %xmm8,%xmm3 + movdqa %xmm13,%xmm8 + paddd %xmm15,%xmm12 + punpckldq %xmm7,%xmm9 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm6 + pslld $5,%xmm8 + pandn %xmm11,%xmm7 + pand %xmm10,%xmm6 + punpckldq %xmm9,%xmm3 + movdqa %xmm13,%xmm9 + + movdqa %xmm2,32-128(%rax) + paddd %xmm2,%xmm12 + movd -48(%r8),%xmm4 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm14,%xmm7 + + por %xmm9,%xmm8 + movd -48(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 +.byte 102,15,56,0,221 + movd -48(%r10),%xmm8 + por %xmm7,%xmm14 + movd -48(%r11),%xmm7 + punpckldq %xmm8,%xmm4 + movdqa %xmm12,%xmm8 + paddd %xmm15,%xmm11 + punpckldq %xmm7,%xmm9 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm6 + pslld $5,%xmm8 + pandn %xmm10,%xmm7 + pand %xmm14,%xmm6 + punpckldq %xmm9,%xmm4 + movdqa %xmm12,%xmm9 + + movdqa %xmm3,48-128(%rax) + paddd %xmm3,%xmm11 + movd -44(%r8),%xmm0 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm13,%xmm7 + + por %xmm9,%xmm8 + movd -44(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 +.byte 102,15,56,0,229 + movd -44(%r10),%xmm8 + por %xmm7,%xmm13 + movd -44(%r11),%xmm7 + punpckldq %xmm8,%xmm0 + movdqa %xmm11,%xmm8 + paddd %xmm15,%xmm10 + punpckldq %xmm7,%xmm9 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm6 + pslld $5,%xmm8 + pandn %xmm14,%xmm7 + pand %xmm13,%xmm6 + punpckldq %xmm9,%xmm0 + movdqa %xmm11,%xmm9 + + movdqa %xmm4,64-128(%rax) + paddd %xmm4,%xmm10 + movd -40(%r8),%xmm1 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm12,%xmm7 + + por %xmm9,%xmm8 + movd -40(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 +.byte 102,15,56,0,197 + movd -40(%r10),%xmm8 + por %xmm7,%xmm12 + movd -40(%r11),%xmm7 + punpckldq %xmm8,%xmm1 + movdqa %xmm10,%xmm8 + paddd %xmm15,%xmm14 + punpckldq %xmm7,%xmm9 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm6 + pslld $5,%xmm8 + pandn %xmm13,%xmm7 + pand %xmm12,%xmm6 + punpckldq %xmm9,%xmm1 + movdqa %xmm10,%xmm9 + + movdqa %xmm0,80-128(%rax) + paddd %xmm0,%xmm14 + movd -36(%r8),%xmm2 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm11,%xmm7 + + por %xmm9,%xmm8 + movd -36(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 +.byte 102,15,56,0,205 + movd -36(%r10),%xmm8 + por %xmm7,%xmm11 + movd -36(%r11),%xmm7 + punpckldq %xmm8,%xmm2 + movdqa %xmm14,%xmm8 + paddd %xmm15,%xmm13 + punpckldq %xmm7,%xmm9 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm6 + pslld $5,%xmm8 + pandn %xmm12,%xmm7 + pand %xmm11,%xmm6 + punpckldq %xmm9,%xmm2 + movdqa %xmm14,%xmm9 + + movdqa %xmm1,96-128(%rax) + paddd %xmm1,%xmm13 + movd -32(%r8),%xmm3 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm10,%xmm7 + + por %xmm9,%xmm8 + movd -32(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 +.byte 102,15,56,0,213 + movd -32(%r10),%xmm8 + por %xmm7,%xmm10 + movd -32(%r11),%xmm7 + punpckldq %xmm8,%xmm3 + movdqa %xmm13,%xmm8 + paddd %xmm15,%xmm12 + punpckldq %xmm7,%xmm9 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm6 + pslld $5,%xmm8 + pandn %xmm11,%xmm7 + pand %xmm10,%xmm6 + punpckldq %xmm9,%xmm3 + movdqa %xmm13,%xmm9 + + movdqa %xmm2,112-128(%rax) + paddd %xmm2,%xmm12 + movd -28(%r8),%xmm4 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm14,%xmm7 + + por %xmm9,%xmm8 + movd -28(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 +.byte 102,15,56,0,221 + movd -28(%r10),%xmm8 + por %xmm7,%xmm14 + movd -28(%r11),%xmm7 + punpckldq %xmm8,%xmm4 + movdqa %xmm12,%xmm8 + paddd %xmm15,%xmm11 + punpckldq %xmm7,%xmm9 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm6 + pslld $5,%xmm8 + pandn %xmm10,%xmm7 + pand %xmm14,%xmm6 + punpckldq %xmm9,%xmm4 + movdqa %xmm12,%xmm9 + + movdqa %xmm3,128-128(%rax) + paddd %xmm3,%xmm11 + movd -24(%r8),%xmm0 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm13,%xmm7 + + por %xmm9,%xmm8 + movd -24(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 +.byte 102,15,56,0,229 + movd -24(%r10),%xmm8 + por %xmm7,%xmm13 + movd -24(%r11),%xmm7 + punpckldq %xmm8,%xmm0 + movdqa %xmm11,%xmm8 + paddd %xmm15,%xmm10 + punpckldq %xmm7,%xmm9 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm6 + pslld $5,%xmm8 + pandn %xmm14,%xmm7 + pand %xmm13,%xmm6 + punpckldq %xmm9,%xmm0 + movdqa %xmm11,%xmm9 + + movdqa %xmm4,144-128(%rax) + paddd %xmm4,%xmm10 + movd -20(%r8),%xmm1 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm12,%xmm7 + + por %xmm9,%xmm8 + movd -20(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 +.byte 102,15,56,0,197 + movd -20(%r10),%xmm8 + por %xmm7,%xmm12 + movd -20(%r11),%xmm7 + punpckldq %xmm8,%xmm1 + movdqa %xmm10,%xmm8 + paddd %xmm15,%xmm14 + punpckldq %xmm7,%xmm9 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm6 + pslld $5,%xmm8 + pandn %xmm13,%xmm7 + pand %xmm12,%xmm6 + punpckldq %xmm9,%xmm1 + movdqa %xmm10,%xmm9 + + movdqa %xmm0,160-128(%rax) + paddd %xmm0,%xmm14 + movd -16(%r8),%xmm2 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm11,%xmm7 + + por %xmm9,%xmm8 + movd -16(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 +.byte 102,15,56,0,205 + movd -16(%r10),%xmm8 + por %xmm7,%xmm11 + movd -16(%r11),%xmm7 + punpckldq %xmm8,%xmm2 + movdqa %xmm14,%xmm8 + paddd %xmm15,%xmm13 + punpckldq %xmm7,%xmm9 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm6 + pslld $5,%xmm8 + pandn %xmm12,%xmm7 + pand %xmm11,%xmm6 + punpckldq %xmm9,%xmm2 + movdqa %xmm14,%xmm9 + + movdqa %xmm1,176-128(%rax) + paddd %xmm1,%xmm13 + movd -12(%r8),%xmm3 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm10,%xmm7 + + por %xmm9,%xmm8 + movd -12(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 +.byte 102,15,56,0,213 + movd -12(%r10),%xmm8 + por %xmm7,%xmm10 + movd -12(%r11),%xmm7 + punpckldq %xmm8,%xmm3 + movdqa %xmm13,%xmm8 + paddd %xmm15,%xmm12 + punpckldq %xmm7,%xmm9 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm6 + pslld $5,%xmm8 + pandn %xmm11,%xmm7 + pand %xmm10,%xmm6 + punpckldq %xmm9,%xmm3 + movdqa %xmm13,%xmm9 + + movdqa %xmm2,192-128(%rax) + paddd %xmm2,%xmm12 + movd -8(%r8),%xmm4 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm14,%xmm7 + + por %xmm9,%xmm8 + movd -8(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 +.byte 102,15,56,0,221 + movd -8(%r10),%xmm8 + por %xmm7,%xmm14 + movd -8(%r11),%xmm7 + punpckldq %xmm8,%xmm4 + movdqa %xmm12,%xmm8 + paddd %xmm15,%xmm11 + punpckldq %xmm7,%xmm9 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm6 + pslld $5,%xmm8 + pandn %xmm10,%xmm7 + pand %xmm14,%xmm6 + punpckldq %xmm9,%xmm4 + movdqa %xmm12,%xmm9 + + movdqa %xmm3,208-128(%rax) + paddd %xmm3,%xmm11 + movd -4(%r8),%xmm0 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm13,%xmm7 + + por %xmm9,%xmm8 + movd -4(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 +.byte 102,15,56,0,229 + movd -4(%r10),%xmm8 + por %xmm7,%xmm13 + movdqa 0-128(%rax),%xmm1 + movd -4(%r11),%xmm7 + punpckldq %xmm8,%xmm0 + movdqa %xmm11,%xmm8 + paddd %xmm15,%xmm10 + punpckldq %xmm7,%xmm9 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm6 + pslld $5,%xmm8 + prefetcht0 63(%r8) + pandn %xmm14,%xmm7 + pand %xmm13,%xmm6 + punpckldq %xmm9,%xmm0 + movdqa %xmm11,%xmm9 + + movdqa %xmm4,224-128(%rax) + paddd %xmm4,%xmm10 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm12,%xmm7 + prefetcht0 63(%r9) + + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm10 + prefetcht0 63(%r10) + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 +.byte 102,15,56,0,197 + prefetcht0 63(%r11) + por %xmm7,%xmm12 + movdqa 16-128(%rax),%xmm2 + pxor %xmm3,%xmm1 + movdqa 32-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + pxor 128-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + movdqa %xmm11,%xmm7 + pslld $5,%xmm8 + pxor %xmm3,%xmm1 + movdqa %xmm11,%xmm6 + pandn %xmm13,%xmm7 + movdqa %xmm1,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm10,%xmm9 + psrld $31,%xmm5 + paddd %xmm1,%xmm1 + + movdqa %xmm0,240-128(%rax) + paddd %xmm0,%xmm14 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm11,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 48-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + pxor 144-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + movdqa %xmm10,%xmm7 + pslld $5,%xmm8 + pxor %xmm4,%xmm2 + movdqa %xmm10,%xmm6 + pandn %xmm12,%xmm7 + movdqa %xmm2,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm14,%xmm9 + psrld $31,%xmm5 + paddd %xmm2,%xmm2 + + movdqa %xmm1,0-128(%rax) + paddd %xmm1,%xmm13 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm10,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 64-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + pxor 160-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + movdqa %xmm14,%xmm7 + pslld $5,%xmm8 + pxor %xmm0,%xmm3 + movdqa %xmm14,%xmm6 + pandn %xmm11,%xmm7 + movdqa %xmm3,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm13,%xmm9 + psrld $31,%xmm5 + paddd %xmm3,%xmm3 + + movdqa %xmm2,16-128(%rax) + paddd %xmm2,%xmm12 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm14,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 80-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + pxor 176-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + movdqa %xmm13,%xmm7 + pslld $5,%xmm8 + pxor %xmm1,%xmm4 + movdqa %xmm13,%xmm6 + pandn %xmm10,%xmm7 + movdqa %xmm4,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm12,%xmm9 + psrld $31,%xmm5 + paddd %xmm4,%xmm4 + + movdqa %xmm3,32-128(%rax) + paddd %xmm3,%xmm11 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm13,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 96-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + pxor 192-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + movdqa %xmm12,%xmm7 + pslld $5,%xmm8 + pxor %xmm2,%xmm0 + movdqa %xmm12,%xmm6 + pandn %xmm14,%xmm7 + movdqa %xmm0,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm11,%xmm9 + psrld $31,%xmm5 + paddd %xmm0,%xmm0 + + movdqa %xmm4,48-128(%rax) + paddd %xmm4,%xmm10 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm12,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + movdqa 0(%rbp),%xmm15 + pxor %xmm3,%xmm1 + movdqa 112-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 208-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,64-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 128-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 224-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,80-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 144-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 240-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,96-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 160-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 0-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,112-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 176-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 16-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,128-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 192-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 32-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,144-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 208-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 48-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,160-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 224-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 64-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,176-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 240-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 80-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,192-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 0-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 96-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,208-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 16-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 112-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,224-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 32-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 128-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,240-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 48-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 144-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,0-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 64-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 160-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,16-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 80-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 176-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,32-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 96-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 192-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,48-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 112-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 208-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,64-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 128-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 224-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,80-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 144-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 240-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,96-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 160-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 0-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,112-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + movdqa 32(%rbp),%xmm15 + pxor %xmm3,%xmm1 + movdqa 176-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm7 + pxor 16-128(%rax),%xmm1 + pxor %xmm3,%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + movdqa %xmm10,%xmm9 + pand %xmm12,%xmm7 + + movdqa %xmm13,%xmm6 + movdqa %xmm1,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm14 + pxor %xmm12,%xmm6 + + movdqa %xmm0,128-128(%rax) + paddd %xmm0,%xmm14 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + paddd %xmm1,%xmm1 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 192-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm7 + pxor 32-128(%rax),%xmm2 + pxor %xmm4,%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + movdqa %xmm14,%xmm9 + pand %xmm11,%xmm7 + + movdqa %xmm12,%xmm6 + movdqa %xmm2,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm13 + pxor %xmm11,%xmm6 + + movdqa %xmm1,144-128(%rax) + paddd %xmm1,%xmm13 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + paddd %xmm2,%xmm2 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 208-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm7 + pxor 48-128(%rax),%xmm3 + pxor %xmm0,%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + movdqa %xmm13,%xmm9 + pand %xmm10,%xmm7 + + movdqa %xmm11,%xmm6 + movdqa %xmm3,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm12 + pxor %xmm10,%xmm6 + + movdqa %xmm2,160-128(%rax) + paddd %xmm2,%xmm12 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + paddd %xmm3,%xmm3 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 224-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm7 + pxor 64-128(%rax),%xmm4 + pxor %xmm1,%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + movdqa %xmm12,%xmm9 + pand %xmm14,%xmm7 + + movdqa %xmm10,%xmm6 + movdqa %xmm4,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm11 + pxor %xmm14,%xmm6 + + movdqa %xmm3,176-128(%rax) + paddd %xmm3,%xmm11 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + paddd %xmm4,%xmm4 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 240-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm7 + pxor 80-128(%rax),%xmm0 + pxor %xmm2,%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + movdqa %xmm11,%xmm9 + pand %xmm13,%xmm7 + + movdqa %xmm14,%xmm6 + movdqa %xmm0,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm10 + pxor %xmm13,%xmm6 + + movdqa %xmm4,192-128(%rax) + paddd %xmm4,%xmm10 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + paddd %xmm0,%xmm0 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 0-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm7 + pxor 96-128(%rax),%xmm1 + pxor %xmm3,%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + movdqa %xmm10,%xmm9 + pand %xmm12,%xmm7 + + movdqa %xmm13,%xmm6 + movdqa %xmm1,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm14 + pxor %xmm12,%xmm6 + + movdqa %xmm0,208-128(%rax) + paddd %xmm0,%xmm14 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + paddd %xmm1,%xmm1 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 16-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm7 + pxor 112-128(%rax),%xmm2 + pxor %xmm4,%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + movdqa %xmm14,%xmm9 + pand %xmm11,%xmm7 + + movdqa %xmm12,%xmm6 + movdqa %xmm2,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm13 + pxor %xmm11,%xmm6 + + movdqa %xmm1,224-128(%rax) + paddd %xmm1,%xmm13 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + paddd %xmm2,%xmm2 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 32-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm7 + pxor 128-128(%rax),%xmm3 + pxor %xmm0,%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + movdqa %xmm13,%xmm9 + pand %xmm10,%xmm7 + + movdqa %xmm11,%xmm6 + movdqa %xmm3,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm12 + pxor %xmm10,%xmm6 + + movdqa %xmm2,240-128(%rax) + paddd %xmm2,%xmm12 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + paddd %xmm3,%xmm3 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 48-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm7 + pxor 144-128(%rax),%xmm4 + pxor %xmm1,%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + movdqa %xmm12,%xmm9 + pand %xmm14,%xmm7 + + movdqa %xmm10,%xmm6 + movdqa %xmm4,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm11 + pxor %xmm14,%xmm6 + + movdqa %xmm3,0-128(%rax) + paddd %xmm3,%xmm11 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + paddd %xmm4,%xmm4 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 64-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm7 + pxor 160-128(%rax),%xmm0 + pxor %xmm2,%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + movdqa %xmm11,%xmm9 + pand %xmm13,%xmm7 + + movdqa %xmm14,%xmm6 + movdqa %xmm0,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm10 + pxor %xmm13,%xmm6 + + movdqa %xmm4,16-128(%rax) + paddd %xmm4,%xmm10 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + paddd %xmm0,%xmm0 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 80-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm7 + pxor 176-128(%rax),%xmm1 + pxor %xmm3,%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + movdqa %xmm10,%xmm9 + pand %xmm12,%xmm7 + + movdqa %xmm13,%xmm6 + movdqa %xmm1,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm14 + pxor %xmm12,%xmm6 + + movdqa %xmm0,32-128(%rax) + paddd %xmm0,%xmm14 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + paddd %xmm1,%xmm1 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 96-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm7 + pxor 192-128(%rax),%xmm2 + pxor %xmm4,%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + movdqa %xmm14,%xmm9 + pand %xmm11,%xmm7 + + movdqa %xmm12,%xmm6 + movdqa %xmm2,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm13 + pxor %xmm11,%xmm6 + + movdqa %xmm1,48-128(%rax) + paddd %xmm1,%xmm13 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + paddd %xmm2,%xmm2 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 112-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm7 + pxor 208-128(%rax),%xmm3 + pxor %xmm0,%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + movdqa %xmm13,%xmm9 + pand %xmm10,%xmm7 + + movdqa %xmm11,%xmm6 + movdqa %xmm3,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm12 + pxor %xmm10,%xmm6 + + movdqa %xmm2,64-128(%rax) + paddd %xmm2,%xmm12 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + paddd %xmm3,%xmm3 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 128-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm7 + pxor 224-128(%rax),%xmm4 + pxor %xmm1,%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + movdqa %xmm12,%xmm9 + pand %xmm14,%xmm7 + + movdqa %xmm10,%xmm6 + movdqa %xmm4,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm11 + pxor %xmm14,%xmm6 + + movdqa %xmm3,80-128(%rax) + paddd %xmm3,%xmm11 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + paddd %xmm4,%xmm4 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 144-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm7 + pxor 240-128(%rax),%xmm0 + pxor %xmm2,%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + movdqa %xmm11,%xmm9 + pand %xmm13,%xmm7 + + movdqa %xmm14,%xmm6 + movdqa %xmm0,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm10 + pxor %xmm13,%xmm6 + + movdqa %xmm4,96-128(%rax) + paddd %xmm4,%xmm10 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + paddd %xmm0,%xmm0 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 160-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm7 + pxor 0-128(%rax),%xmm1 + pxor %xmm3,%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + movdqa %xmm10,%xmm9 + pand %xmm12,%xmm7 + + movdqa %xmm13,%xmm6 + movdqa %xmm1,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm14 + pxor %xmm12,%xmm6 + + movdqa %xmm0,112-128(%rax) + paddd %xmm0,%xmm14 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + paddd %xmm1,%xmm1 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 176-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm7 + pxor 16-128(%rax),%xmm2 + pxor %xmm4,%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + movdqa %xmm14,%xmm9 + pand %xmm11,%xmm7 + + movdqa %xmm12,%xmm6 + movdqa %xmm2,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm13 + pxor %xmm11,%xmm6 + + movdqa %xmm1,128-128(%rax) + paddd %xmm1,%xmm13 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + paddd %xmm2,%xmm2 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 192-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm7 + pxor 32-128(%rax),%xmm3 + pxor %xmm0,%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + movdqa %xmm13,%xmm9 + pand %xmm10,%xmm7 + + movdqa %xmm11,%xmm6 + movdqa %xmm3,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm12 + pxor %xmm10,%xmm6 + + movdqa %xmm2,144-128(%rax) + paddd %xmm2,%xmm12 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + paddd %xmm3,%xmm3 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 208-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm7 + pxor 48-128(%rax),%xmm4 + pxor %xmm1,%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + movdqa %xmm12,%xmm9 + pand %xmm14,%xmm7 + + movdqa %xmm10,%xmm6 + movdqa %xmm4,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm11 + pxor %xmm14,%xmm6 + + movdqa %xmm3,160-128(%rax) + paddd %xmm3,%xmm11 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + paddd %xmm4,%xmm4 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 224-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm7 + pxor 64-128(%rax),%xmm0 + pxor %xmm2,%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + movdqa %xmm11,%xmm9 + pand %xmm13,%xmm7 + + movdqa %xmm14,%xmm6 + movdqa %xmm0,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm10 + pxor %xmm13,%xmm6 + + movdqa %xmm4,176-128(%rax) + paddd %xmm4,%xmm10 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + paddd %xmm0,%xmm0 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + movdqa 64(%rbp),%xmm15 + pxor %xmm3,%xmm1 + movdqa 240-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 80-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,192-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 0-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 96-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,208-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 16-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 112-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,224-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 32-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 128-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,240-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 48-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 144-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,0-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 64-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 160-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,16-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 80-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 176-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,32-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 96-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 192-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,48-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 112-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 208-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,64-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 128-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 224-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,80-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 144-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 240-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,96-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 160-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 0-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,112-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 176-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 16-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 192-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 32-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 208-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 48-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 224-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 64-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 240-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 80-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 0-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 96-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 16-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 112-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + movdqa %xmm11,%xmm8 + paddd %xmm15,%xmm10 + movdqa %xmm14,%xmm6 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + paddd %xmm4,%xmm10 + psrld $27,%xmm9 + movdqa %xmm12,%xmm7 + pxor %xmm13,%xmm6 + + pslld $30,%xmm7 + por %xmm9,%xmm8 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm7,%xmm12 + movdqa (%rbx),%xmm0 + movl $1,%ecx + cmpl 0(%rbx),%ecx + pxor %xmm8,%xmm8 + cmovgeq %rbp,%r8 + cmpl 4(%rbx),%ecx + movdqa %xmm0,%xmm1 + cmovgeq %rbp,%r9 + cmpl 8(%rbx),%ecx + pcmpgtd %xmm8,%xmm1 + cmovgeq %rbp,%r10 + cmpl 12(%rbx),%ecx + paddd %xmm1,%xmm0 + cmovgeq %rbp,%r11 + + movdqu 0(%rdi),%xmm6 + pand %xmm1,%xmm10 + movdqu 32(%rdi),%xmm7 + pand %xmm1,%xmm11 + paddd %xmm6,%xmm10 + movdqu 64(%rdi),%xmm8 + pand %xmm1,%xmm12 + paddd %xmm7,%xmm11 + movdqu 96(%rdi),%xmm9 + pand %xmm1,%xmm13 + paddd %xmm8,%xmm12 + movdqu 128(%rdi),%xmm5 + pand %xmm1,%xmm14 + movdqu %xmm10,0(%rdi) + paddd %xmm9,%xmm13 + movdqu %xmm11,32(%rdi) + paddd %xmm5,%xmm14 + movdqu %xmm12,64(%rdi) + movdqu %xmm13,96(%rdi) + movdqu %xmm14,128(%rdi) + + movdqa %xmm0,(%rbx) + movdqa 96(%rbp),%xmm5 + movdqa -32(%rbp),%xmm15 + decl %edx + jnz .Loop + + movl 280(%rsp),%edx + leaq 16(%rdi),%rdi + leaq 64(%rsi),%rsi + decl %edx + jnz .Loop_grande + +.Ldone: + movq 272(%rsp),%rax +.cfi_def_cfa %rax,8 + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size sha1_multi_block,.-sha1_multi_block +.type sha1_multi_block_shaext,@function +.align 32 +sha1_multi_block_shaext: +.cfi_startproc +_shaext_shortcut: + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + subq $288,%rsp + shll $1,%edx + andq $-256,%rsp + leaq 64(%rdi),%rdi + movq %rax,272(%rsp) +.Lbody_shaext: + leaq 256(%rsp),%rbx + movdqa K_XX_XX+128(%rip),%xmm3 + +.Loop_grande_shaext: + movl %edx,280(%rsp) + xorl %edx,%edx + + movq 0(%rsi),%r8 + + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rsp,%r8 + + movq 16(%rsi),%r9 + + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rsp,%r9 + testl %edx,%edx + jz .Ldone_shaext + + movq 0-64(%rdi),%xmm0 + movq 32-64(%rdi),%xmm4 + movq 64-64(%rdi),%xmm5 + movq 96-64(%rdi),%xmm6 + movq 128-64(%rdi),%xmm7 + + punpckldq %xmm4,%xmm0 + punpckldq %xmm6,%xmm5 + + movdqa %xmm0,%xmm8 + punpcklqdq %xmm5,%xmm0 + punpckhqdq %xmm5,%xmm8 + + pshufd $63,%xmm7,%xmm1 + pshufd $127,%xmm7,%xmm9 + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm8,%xmm8 + jmp .Loop_shaext + +.align 32 +.Loop_shaext: + movdqu 0(%r8),%xmm4 + movdqu 0(%r9),%xmm11 + movdqu 16(%r8),%xmm5 + movdqu 16(%r9),%xmm12 + movdqu 32(%r8),%xmm6 +.byte 102,15,56,0,227 + movdqu 32(%r9),%xmm13 +.byte 102,68,15,56,0,219 + movdqu 48(%r8),%xmm7 + leaq 64(%r8),%r8 +.byte 102,15,56,0,235 + movdqu 48(%r9),%xmm14 + leaq 64(%r9),%r9 +.byte 102,68,15,56,0,227 + + movdqa %xmm1,80(%rsp) + paddd %xmm4,%xmm1 + movdqa %xmm9,112(%rsp) + paddd %xmm11,%xmm9 + movdqa %xmm0,64(%rsp) + movdqa %xmm0,%xmm2 + movdqa %xmm8,96(%rsp) + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,0 +.byte 15,56,200,213 +.byte 69,15,58,204,193,0 +.byte 69,15,56,200,212 +.byte 102,15,56,0,243 + prefetcht0 127(%r8) +.byte 15,56,201,229 +.byte 102,68,15,56,0,235 + prefetcht0 127(%r9) +.byte 69,15,56,201,220 + +.byte 102,15,56,0,251 + movdqa %xmm0,%xmm1 +.byte 102,68,15,56,0,243 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,0 +.byte 15,56,200,206 +.byte 69,15,58,204,194,0 +.byte 69,15,56,200,205 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + pxor %xmm13,%xmm11 +.byte 69,15,56,201,229 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,0 +.byte 15,56,200,215 +.byte 69,15,58,204,193,0 +.byte 69,15,56,200,214 +.byte 15,56,202,231 +.byte 69,15,56,202,222 + pxor %xmm7,%xmm5 +.byte 15,56,201,247 + pxor %xmm14,%xmm12 +.byte 69,15,56,201,238 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,0 +.byte 15,56,200,204 +.byte 69,15,58,204,194,0 +.byte 69,15,56,200,203 +.byte 15,56,202,236 +.byte 69,15,56,202,227 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 + pxor %xmm11,%xmm13 +.byte 69,15,56,201,243 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,0 +.byte 15,56,200,213 +.byte 69,15,58,204,193,0 +.byte 69,15,56,200,212 +.byte 15,56,202,245 +.byte 69,15,56,202,236 + pxor %xmm5,%xmm7 +.byte 15,56,201,229 + pxor %xmm12,%xmm14 +.byte 69,15,56,201,220 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,1 +.byte 15,56,200,206 +.byte 69,15,58,204,194,1 +.byte 69,15,56,200,205 +.byte 15,56,202,254 +.byte 69,15,56,202,245 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + pxor %xmm13,%xmm11 +.byte 69,15,56,201,229 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,1 +.byte 15,56,200,215 +.byte 69,15,58,204,193,1 +.byte 69,15,56,200,214 +.byte 15,56,202,231 +.byte 69,15,56,202,222 + pxor %xmm7,%xmm5 +.byte 15,56,201,247 + pxor %xmm14,%xmm12 +.byte 69,15,56,201,238 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,1 +.byte 15,56,200,204 +.byte 69,15,58,204,194,1 +.byte 69,15,56,200,203 +.byte 15,56,202,236 +.byte 69,15,56,202,227 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 + pxor %xmm11,%xmm13 +.byte 69,15,56,201,243 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,1 +.byte 15,56,200,213 +.byte 69,15,58,204,193,1 +.byte 69,15,56,200,212 +.byte 15,56,202,245 +.byte 69,15,56,202,236 + pxor %xmm5,%xmm7 +.byte 15,56,201,229 + pxor %xmm12,%xmm14 +.byte 69,15,56,201,220 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,1 +.byte 15,56,200,206 +.byte 69,15,58,204,194,1 +.byte 69,15,56,200,205 +.byte 15,56,202,254 +.byte 69,15,56,202,245 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + pxor %xmm13,%xmm11 +.byte 69,15,56,201,229 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,2 +.byte 15,56,200,215 +.byte 69,15,58,204,193,2 +.byte 69,15,56,200,214 +.byte 15,56,202,231 +.byte 69,15,56,202,222 + pxor %xmm7,%xmm5 +.byte 15,56,201,247 + pxor %xmm14,%xmm12 +.byte 69,15,56,201,238 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,2 +.byte 15,56,200,204 +.byte 69,15,58,204,194,2 +.byte 69,15,56,200,203 +.byte 15,56,202,236 +.byte 69,15,56,202,227 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 + pxor %xmm11,%xmm13 +.byte 69,15,56,201,243 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,2 +.byte 15,56,200,213 +.byte 69,15,58,204,193,2 +.byte 69,15,56,200,212 +.byte 15,56,202,245 +.byte 69,15,56,202,236 + pxor %xmm5,%xmm7 +.byte 15,56,201,229 + pxor %xmm12,%xmm14 +.byte 69,15,56,201,220 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,2 +.byte 15,56,200,206 +.byte 69,15,58,204,194,2 +.byte 69,15,56,200,205 +.byte 15,56,202,254 +.byte 69,15,56,202,245 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + pxor %xmm13,%xmm11 +.byte 69,15,56,201,229 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,2 +.byte 15,56,200,215 +.byte 69,15,58,204,193,2 +.byte 69,15,56,200,214 +.byte 15,56,202,231 +.byte 69,15,56,202,222 + pxor %xmm7,%xmm5 +.byte 15,56,201,247 + pxor %xmm14,%xmm12 +.byte 69,15,56,201,238 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,3 +.byte 15,56,200,204 +.byte 69,15,58,204,194,3 +.byte 69,15,56,200,203 +.byte 15,56,202,236 +.byte 69,15,56,202,227 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 + pxor %xmm11,%xmm13 +.byte 69,15,56,201,243 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,3 +.byte 15,56,200,213 +.byte 69,15,58,204,193,3 +.byte 69,15,56,200,212 +.byte 15,56,202,245 +.byte 69,15,56,202,236 + pxor %xmm5,%xmm7 + pxor %xmm12,%xmm14 + + movl $1,%ecx + pxor %xmm4,%xmm4 + cmpl 0(%rbx),%ecx + cmovgeq %rsp,%r8 + + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,3 +.byte 15,56,200,206 +.byte 69,15,58,204,194,3 +.byte 69,15,56,200,205 +.byte 15,56,202,254 +.byte 69,15,56,202,245 + + cmpl 4(%rbx),%ecx + cmovgeq %rsp,%r9 + movq (%rbx),%xmm6 + + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,3 +.byte 15,56,200,215 +.byte 69,15,58,204,193,3 +.byte 69,15,56,200,214 + + pshufd $0x00,%xmm6,%xmm11 + pshufd $0x55,%xmm6,%xmm12 + movdqa %xmm6,%xmm7 + pcmpgtd %xmm4,%xmm11 + pcmpgtd %xmm4,%xmm12 + + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,3 +.byte 15,56,200,204 +.byte 69,15,58,204,194,3 +.byte 68,15,56,200,204 + + pcmpgtd %xmm4,%xmm7 + pand %xmm11,%xmm0 + pand %xmm11,%xmm1 + pand %xmm12,%xmm8 + pand %xmm12,%xmm9 + paddd %xmm7,%xmm6 + + paddd 64(%rsp),%xmm0 + paddd 80(%rsp),%xmm1 + paddd 96(%rsp),%xmm8 + paddd 112(%rsp),%xmm9 + + movq %xmm6,(%rbx) + decl %edx + jnz .Loop_shaext + + movl 280(%rsp),%edx + + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm8,%xmm8 + + movdqa %xmm0,%xmm6 + punpckldq %xmm8,%xmm0 + punpckhdq %xmm8,%xmm6 + punpckhdq %xmm9,%xmm1 + movq %xmm0,0-64(%rdi) + psrldq $8,%xmm0 + movq %xmm6,64-64(%rdi) + psrldq $8,%xmm6 + movq %xmm0,32-64(%rdi) + psrldq $8,%xmm1 + movq %xmm6,96-64(%rdi) + movq %xmm1,128-64(%rdi) + + leaq 8(%rdi),%rdi + leaq 32(%rsi),%rsi + decl %edx + jnz .Loop_grande_shaext + +.Ldone_shaext: + + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_shaext: + .byte 0xf3,0xc3 +.cfi_endproc +.size sha1_multi_block_shaext,.-sha1_multi_block_shaext +.type sha1_multi_block_avx,@function +.align 32 +sha1_multi_block_avx: +.cfi_startproc +_avx_shortcut: + shrq $32,%rcx + cmpl $2,%edx + jb .Lavx + testl $32,%ecx + jnz _avx2_shortcut + jmp .Lavx +.align 32 +.Lavx: + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + subq $288,%rsp + andq $-256,%rsp + movq %rax,272(%rsp) +.cfi_escape 0x0f,0x06,0x77,0x90,0x02,0x06,0x23,0x08 +.Lbody_avx: + leaq K_XX_XX(%rip),%rbp + leaq 256(%rsp),%rbx + + vzeroupper +.Loop_grande_avx: + movl %edx,280(%rsp) + xorl %edx,%edx + + movq 0(%rsi),%r8 + + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r8 + + movq 16(%rsi),%r9 + + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r9 + + movq 32(%rsi),%r10 + + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r10 + + movq 48(%rsi),%r11 + + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r11 + testl %edx,%edx + jz .Ldone_avx + + vmovdqu 0(%rdi),%xmm10 + leaq 128(%rsp),%rax + vmovdqu 32(%rdi),%xmm11 + vmovdqu 64(%rdi),%xmm12 + vmovdqu 96(%rdi),%xmm13 + vmovdqu 128(%rdi),%xmm14 + vmovdqu 96(%rbp),%xmm5 + jmp .Loop_avx + +.align 32 +.Loop_avx: + vmovdqa -32(%rbp),%xmm15 + vmovd (%r8),%xmm0 + leaq 64(%r8),%r8 + vmovd (%r9),%xmm2 + leaq 64(%r9),%r9 + vpinsrd $1,(%r10),%xmm0,%xmm0 + leaq 64(%r10),%r10 + vpinsrd $1,(%r11),%xmm2,%xmm2 + leaq 64(%r11),%r11 + vmovd -60(%r8),%xmm1 + vpunpckldq %xmm2,%xmm0,%xmm0 + vmovd -60(%r9),%xmm9 + vpshufb %xmm5,%xmm0,%xmm0 + vpinsrd $1,-60(%r10),%xmm1,%xmm1 + vpinsrd $1,-60(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpandn %xmm13,%xmm11,%xmm7 + vpand %xmm12,%xmm11,%xmm6 + + vmovdqa %xmm0,0-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpunpckldq %xmm9,%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -56(%r8),%xmm2 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -56(%r9),%xmm9 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpshufb %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpinsrd $1,-56(%r10),%xmm2,%xmm2 + vpinsrd $1,-56(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpandn %xmm12,%xmm10,%xmm7 + vpand %xmm11,%xmm10,%xmm6 + + vmovdqa %xmm1,16-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpunpckldq %xmm9,%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -52(%r8),%xmm3 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -52(%r9),%xmm9 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpshufb %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpinsrd $1,-52(%r10),%xmm3,%xmm3 + vpinsrd $1,-52(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpandn %xmm11,%xmm14,%xmm7 + vpand %xmm10,%xmm14,%xmm6 + + vmovdqa %xmm2,32-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpunpckldq %xmm9,%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -48(%r8),%xmm4 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -48(%r9),%xmm9 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpshufb %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpinsrd $1,-48(%r10),%xmm4,%xmm4 + vpinsrd $1,-48(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpandn %xmm10,%xmm13,%xmm7 + vpand %xmm14,%xmm13,%xmm6 + + vmovdqa %xmm3,48-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpunpckldq %xmm9,%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -44(%r8),%xmm0 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -44(%r9),%xmm9 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpshufb %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpinsrd $1,-44(%r10),%xmm0,%xmm0 + vpinsrd $1,-44(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpandn %xmm14,%xmm12,%xmm7 + vpand %xmm13,%xmm12,%xmm6 + + vmovdqa %xmm4,64-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpunpckldq %xmm9,%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -40(%r8),%xmm1 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -40(%r9),%xmm9 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpshufb %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpinsrd $1,-40(%r10),%xmm1,%xmm1 + vpinsrd $1,-40(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpandn %xmm13,%xmm11,%xmm7 + vpand %xmm12,%xmm11,%xmm6 + + vmovdqa %xmm0,80-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpunpckldq %xmm9,%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -36(%r8),%xmm2 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -36(%r9),%xmm9 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpshufb %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpinsrd $1,-36(%r10),%xmm2,%xmm2 + vpinsrd $1,-36(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpandn %xmm12,%xmm10,%xmm7 + vpand %xmm11,%xmm10,%xmm6 + + vmovdqa %xmm1,96-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpunpckldq %xmm9,%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -32(%r8),%xmm3 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -32(%r9),%xmm9 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpshufb %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpinsrd $1,-32(%r10),%xmm3,%xmm3 + vpinsrd $1,-32(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpandn %xmm11,%xmm14,%xmm7 + vpand %xmm10,%xmm14,%xmm6 + + vmovdqa %xmm2,112-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpunpckldq %xmm9,%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -28(%r8),%xmm4 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -28(%r9),%xmm9 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpshufb %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpinsrd $1,-28(%r10),%xmm4,%xmm4 + vpinsrd $1,-28(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpandn %xmm10,%xmm13,%xmm7 + vpand %xmm14,%xmm13,%xmm6 + + vmovdqa %xmm3,128-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpunpckldq %xmm9,%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -24(%r8),%xmm0 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -24(%r9),%xmm9 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpshufb %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpinsrd $1,-24(%r10),%xmm0,%xmm0 + vpinsrd $1,-24(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpandn %xmm14,%xmm12,%xmm7 + vpand %xmm13,%xmm12,%xmm6 + + vmovdqa %xmm4,144-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpunpckldq %xmm9,%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -20(%r8),%xmm1 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -20(%r9),%xmm9 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpshufb %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpinsrd $1,-20(%r10),%xmm1,%xmm1 + vpinsrd $1,-20(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpandn %xmm13,%xmm11,%xmm7 + vpand %xmm12,%xmm11,%xmm6 + + vmovdqa %xmm0,160-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpunpckldq %xmm9,%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -16(%r8),%xmm2 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -16(%r9),%xmm9 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpshufb %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpinsrd $1,-16(%r10),%xmm2,%xmm2 + vpinsrd $1,-16(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpandn %xmm12,%xmm10,%xmm7 + vpand %xmm11,%xmm10,%xmm6 + + vmovdqa %xmm1,176-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpunpckldq %xmm9,%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -12(%r8),%xmm3 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -12(%r9),%xmm9 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpshufb %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpinsrd $1,-12(%r10),%xmm3,%xmm3 + vpinsrd $1,-12(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpandn %xmm11,%xmm14,%xmm7 + vpand %xmm10,%xmm14,%xmm6 + + vmovdqa %xmm2,192-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpunpckldq %xmm9,%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -8(%r8),%xmm4 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -8(%r9),%xmm9 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpshufb %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpinsrd $1,-8(%r10),%xmm4,%xmm4 + vpinsrd $1,-8(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpandn %xmm10,%xmm13,%xmm7 + vpand %xmm14,%xmm13,%xmm6 + + vmovdqa %xmm3,208-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpunpckldq %xmm9,%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -4(%r8),%xmm0 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -4(%r9),%xmm9 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpshufb %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vmovdqa 0-128(%rax),%xmm1 + vpinsrd $1,-4(%r10),%xmm0,%xmm0 + vpinsrd $1,-4(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm10,%xmm10 + prefetcht0 63(%r8) + vpslld $5,%xmm11,%xmm8 + vpandn %xmm14,%xmm12,%xmm7 + vpand %xmm13,%xmm12,%xmm6 + + vmovdqa %xmm4,224-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpunpckldq %xmm9,%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + prefetcht0 63(%r9) + vpxor %xmm7,%xmm6,%xmm6 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + prefetcht0 63(%r10) + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + prefetcht0 63(%r11) + vpshufb %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vmovdqa 16-128(%rax),%xmm2 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 32-128(%rax),%xmm3 + + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpandn %xmm13,%xmm11,%xmm7 + + vpand %xmm12,%xmm11,%xmm6 + + vmovdqa %xmm0,240-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 128-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 48-128(%rax),%xmm4 + + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpandn %xmm12,%xmm10,%xmm7 + + vpand %xmm11,%xmm10,%xmm6 + + vmovdqa %xmm1,0-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 144-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 64-128(%rax),%xmm0 + + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpandn %xmm11,%xmm14,%xmm7 + + vpand %xmm10,%xmm14,%xmm6 + + vmovdqa %xmm2,16-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 160-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 80-128(%rax),%xmm1 + + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpandn %xmm10,%xmm13,%xmm7 + + vpand %xmm14,%xmm13,%xmm6 + + vmovdqa %xmm3,32-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 176-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 96-128(%rax),%xmm2 + + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpandn %xmm14,%xmm12,%xmm7 + + vpand %xmm13,%xmm12,%xmm6 + + vmovdqa %xmm4,48-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 192-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vmovdqa 0(%rbp),%xmm15 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 112-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,64-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 208-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 128-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,80-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 224-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 144-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vmovdqa %xmm2,96-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 240-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 160-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vmovdqa %xmm3,112-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 0-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 176-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vmovdqa %xmm4,128-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 16-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 192-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,144-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 32-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 208-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,160-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 48-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 224-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vmovdqa %xmm2,176-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 64-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 240-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vmovdqa %xmm3,192-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 80-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 0-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vmovdqa %xmm4,208-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 96-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 16-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,224-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 112-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 32-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,240-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 128-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 48-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vmovdqa %xmm2,0-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 144-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 64-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vmovdqa %xmm3,16-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 160-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 80-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vmovdqa %xmm4,32-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 176-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 96-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,48-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 192-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 112-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,64-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 208-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 128-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vmovdqa %xmm2,80-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 224-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 144-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vmovdqa %xmm3,96-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 240-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 160-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vmovdqa %xmm4,112-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 0-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vmovdqa 32(%rbp),%xmm15 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 176-128(%rax),%xmm3 + + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpand %xmm12,%xmm13,%xmm7 + vpxor 16-128(%rax),%xmm1,%xmm1 + + vpaddd %xmm7,%xmm14,%xmm14 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm13,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vmovdqu %xmm0,128-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm1,%xmm5 + vpand %xmm11,%xmm6,%xmm6 + vpaddd %xmm1,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 192-128(%rax),%xmm4 + + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpand %xmm11,%xmm12,%xmm7 + vpxor 32-128(%rax),%xmm2,%xmm2 + + vpaddd %xmm7,%xmm13,%xmm13 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm12,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vmovdqu %xmm1,144-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm2,%xmm5 + vpand %xmm10,%xmm6,%xmm6 + vpaddd %xmm2,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 208-128(%rax),%xmm0 + + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpand %xmm10,%xmm11,%xmm7 + vpxor 48-128(%rax),%xmm3,%xmm3 + + vpaddd %xmm7,%xmm12,%xmm12 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm11,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vmovdqu %xmm2,160-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm3,%xmm5 + vpand %xmm14,%xmm6,%xmm6 + vpaddd %xmm3,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 224-128(%rax),%xmm1 + + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpand %xmm14,%xmm10,%xmm7 + vpxor 64-128(%rax),%xmm4,%xmm4 + + vpaddd %xmm7,%xmm11,%xmm11 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm10,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vmovdqu %xmm3,176-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm4,%xmm5 + vpand %xmm13,%xmm6,%xmm6 + vpaddd %xmm4,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 240-128(%rax),%xmm2 + + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpand %xmm13,%xmm14,%xmm7 + vpxor 80-128(%rax),%xmm0,%xmm0 + + vpaddd %xmm7,%xmm10,%xmm10 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm14,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vmovdqu %xmm4,192-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm0,%xmm5 + vpand %xmm12,%xmm6,%xmm6 + vpaddd %xmm0,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 0-128(%rax),%xmm3 + + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpand %xmm12,%xmm13,%xmm7 + vpxor 96-128(%rax),%xmm1,%xmm1 + + vpaddd %xmm7,%xmm14,%xmm14 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm13,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vmovdqu %xmm0,208-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm1,%xmm5 + vpand %xmm11,%xmm6,%xmm6 + vpaddd %xmm1,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 16-128(%rax),%xmm4 + + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpand %xmm11,%xmm12,%xmm7 + vpxor 112-128(%rax),%xmm2,%xmm2 + + vpaddd %xmm7,%xmm13,%xmm13 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm12,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vmovdqu %xmm1,224-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm2,%xmm5 + vpand %xmm10,%xmm6,%xmm6 + vpaddd %xmm2,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 32-128(%rax),%xmm0 + + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpand %xmm10,%xmm11,%xmm7 + vpxor 128-128(%rax),%xmm3,%xmm3 + + vpaddd %xmm7,%xmm12,%xmm12 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm11,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vmovdqu %xmm2,240-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm3,%xmm5 + vpand %xmm14,%xmm6,%xmm6 + vpaddd %xmm3,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 48-128(%rax),%xmm1 + + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpand %xmm14,%xmm10,%xmm7 + vpxor 144-128(%rax),%xmm4,%xmm4 + + vpaddd %xmm7,%xmm11,%xmm11 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm10,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vmovdqu %xmm3,0-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm4,%xmm5 + vpand %xmm13,%xmm6,%xmm6 + vpaddd %xmm4,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 64-128(%rax),%xmm2 + + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpand %xmm13,%xmm14,%xmm7 + vpxor 160-128(%rax),%xmm0,%xmm0 + + vpaddd %xmm7,%xmm10,%xmm10 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm14,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vmovdqu %xmm4,16-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm0,%xmm5 + vpand %xmm12,%xmm6,%xmm6 + vpaddd %xmm0,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 80-128(%rax),%xmm3 + + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpand %xmm12,%xmm13,%xmm7 + vpxor 176-128(%rax),%xmm1,%xmm1 + + vpaddd %xmm7,%xmm14,%xmm14 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm13,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vmovdqu %xmm0,32-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm1,%xmm5 + vpand %xmm11,%xmm6,%xmm6 + vpaddd %xmm1,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 96-128(%rax),%xmm4 + + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpand %xmm11,%xmm12,%xmm7 + vpxor 192-128(%rax),%xmm2,%xmm2 + + vpaddd %xmm7,%xmm13,%xmm13 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm12,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vmovdqu %xmm1,48-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm2,%xmm5 + vpand %xmm10,%xmm6,%xmm6 + vpaddd %xmm2,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 112-128(%rax),%xmm0 + + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpand %xmm10,%xmm11,%xmm7 + vpxor 208-128(%rax),%xmm3,%xmm3 + + vpaddd %xmm7,%xmm12,%xmm12 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm11,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vmovdqu %xmm2,64-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm3,%xmm5 + vpand %xmm14,%xmm6,%xmm6 + vpaddd %xmm3,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 128-128(%rax),%xmm1 + + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpand %xmm14,%xmm10,%xmm7 + vpxor 224-128(%rax),%xmm4,%xmm4 + + vpaddd %xmm7,%xmm11,%xmm11 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm10,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vmovdqu %xmm3,80-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm4,%xmm5 + vpand %xmm13,%xmm6,%xmm6 + vpaddd %xmm4,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 144-128(%rax),%xmm2 + + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpand %xmm13,%xmm14,%xmm7 + vpxor 240-128(%rax),%xmm0,%xmm0 + + vpaddd %xmm7,%xmm10,%xmm10 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm14,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vmovdqu %xmm4,96-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm0,%xmm5 + vpand %xmm12,%xmm6,%xmm6 + vpaddd %xmm0,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 160-128(%rax),%xmm3 + + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpand %xmm12,%xmm13,%xmm7 + vpxor 0-128(%rax),%xmm1,%xmm1 + + vpaddd %xmm7,%xmm14,%xmm14 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm13,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vmovdqu %xmm0,112-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm1,%xmm5 + vpand %xmm11,%xmm6,%xmm6 + vpaddd %xmm1,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 176-128(%rax),%xmm4 + + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpand %xmm11,%xmm12,%xmm7 + vpxor 16-128(%rax),%xmm2,%xmm2 + + vpaddd %xmm7,%xmm13,%xmm13 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm12,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vmovdqu %xmm1,128-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm2,%xmm5 + vpand %xmm10,%xmm6,%xmm6 + vpaddd %xmm2,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 192-128(%rax),%xmm0 + + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpand %xmm10,%xmm11,%xmm7 + vpxor 32-128(%rax),%xmm3,%xmm3 + + vpaddd %xmm7,%xmm12,%xmm12 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm11,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vmovdqu %xmm2,144-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm3,%xmm5 + vpand %xmm14,%xmm6,%xmm6 + vpaddd %xmm3,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 208-128(%rax),%xmm1 + + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpand %xmm14,%xmm10,%xmm7 + vpxor 48-128(%rax),%xmm4,%xmm4 + + vpaddd %xmm7,%xmm11,%xmm11 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm10,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vmovdqu %xmm3,160-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm4,%xmm5 + vpand %xmm13,%xmm6,%xmm6 + vpaddd %xmm4,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 224-128(%rax),%xmm2 + + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpand %xmm13,%xmm14,%xmm7 + vpxor 64-128(%rax),%xmm0,%xmm0 + + vpaddd %xmm7,%xmm10,%xmm10 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm14,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vmovdqu %xmm4,176-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm0,%xmm5 + vpand %xmm12,%xmm6,%xmm6 + vpaddd %xmm0,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vmovdqa 64(%rbp),%xmm15 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 240-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,192-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 80-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 0-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,208-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 96-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 16-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vmovdqa %xmm2,224-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 112-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 32-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vmovdqa %xmm3,240-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 128-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 48-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vmovdqa %xmm4,0-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 144-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 64-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,16-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 160-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 80-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,32-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 176-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 96-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vmovdqa %xmm2,48-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 192-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 112-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vmovdqa %xmm3,64-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 208-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 128-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vmovdqa %xmm4,80-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 224-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 144-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,96-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 240-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 160-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,112-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 0-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 176-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 16-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 192-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 32-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 208-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 48-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 224-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 64-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 240-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 80-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 0-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 96-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 16-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 112-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + + vpsrld $27,%xmm11,%xmm9 + vpaddd %xmm4,%xmm10,%xmm10 + vpxor %xmm13,%xmm6,%xmm6 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm7,%xmm12,%xmm12 + movl $1,%ecx + cmpl 0(%rbx),%ecx + cmovgeq %rbp,%r8 + cmpl 4(%rbx),%ecx + cmovgeq %rbp,%r9 + cmpl 8(%rbx),%ecx + cmovgeq %rbp,%r10 + cmpl 12(%rbx),%ecx + cmovgeq %rbp,%r11 + vmovdqu (%rbx),%xmm6 + vpxor %xmm8,%xmm8,%xmm8 + vmovdqa %xmm6,%xmm7 + vpcmpgtd %xmm8,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + + vpand %xmm7,%xmm10,%xmm10 + vpand %xmm7,%xmm11,%xmm11 + vpaddd 0(%rdi),%xmm10,%xmm10 + vpand %xmm7,%xmm12,%xmm12 + vpaddd 32(%rdi),%xmm11,%xmm11 + vpand %xmm7,%xmm13,%xmm13 + vpaddd 64(%rdi),%xmm12,%xmm12 + vpand %xmm7,%xmm14,%xmm14 + vpaddd 96(%rdi),%xmm13,%xmm13 + vpaddd 128(%rdi),%xmm14,%xmm14 + vmovdqu %xmm10,0(%rdi) + vmovdqu %xmm11,32(%rdi) + vmovdqu %xmm12,64(%rdi) + vmovdqu %xmm13,96(%rdi) + vmovdqu %xmm14,128(%rdi) + + vmovdqu %xmm6,(%rbx) + vmovdqu 96(%rbp),%xmm5 + decl %edx + jnz .Loop_avx + + movl 280(%rsp),%edx + leaq 16(%rdi),%rdi + leaq 64(%rsi),%rsi + decl %edx + jnz .Loop_grande_avx + +.Ldone_avx: + movq 272(%rsp),%rax +.cfi_def_cfa %rax,8 + vzeroupper + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_avx: + .byte 0xf3,0xc3 +.cfi_endproc +.size sha1_multi_block_avx,.-sha1_multi_block_avx +.type sha1_multi_block_avx2,@function +.align 32 +sha1_multi_block_avx2: +.cfi_startproc +_avx2_shortcut: + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + subq $576,%rsp + andq $-256,%rsp + movq %rax,544(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xa0,0x04,0x06,0x23,0x08 +.Lbody_avx2: + leaq K_XX_XX(%rip),%rbp + shrl $1,%edx + + vzeroupper +.Loop_grande_avx2: + movl %edx,552(%rsp) + xorl %edx,%edx + leaq 512(%rsp),%rbx + + movq 0(%rsi),%r12 + + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r12 + + movq 16(%rsi),%r13 + + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r13 + + movq 32(%rsi),%r14 + + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r14 + + movq 48(%rsi),%r15 + + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r15 + + movq 64(%rsi),%r8 + + movl 72(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,16(%rbx) + cmovleq %rbp,%r8 + + movq 80(%rsi),%r9 + + movl 88(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,20(%rbx) + cmovleq %rbp,%r9 + + movq 96(%rsi),%r10 + + movl 104(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,24(%rbx) + cmovleq %rbp,%r10 + + movq 112(%rsi),%r11 + + movl 120(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,28(%rbx) + cmovleq %rbp,%r11 + vmovdqu 0(%rdi),%ymm0 + leaq 128(%rsp),%rax + vmovdqu 32(%rdi),%ymm1 + leaq 256+128(%rsp),%rbx + vmovdqu 64(%rdi),%ymm2 + vmovdqu 96(%rdi),%ymm3 + vmovdqu 128(%rdi),%ymm4 + vmovdqu 96(%rbp),%ymm9 + jmp .Loop_avx2 + +.align 32 +.Loop_avx2: + vmovdqa -32(%rbp),%ymm15 + vmovd (%r12),%xmm10 + leaq 64(%r12),%r12 + vmovd (%r8),%xmm12 + leaq 64(%r8),%r8 + vmovd (%r13),%xmm7 + leaq 64(%r13),%r13 + vmovd (%r9),%xmm6 + leaq 64(%r9),%r9 + vpinsrd $1,(%r14),%xmm10,%xmm10 + leaq 64(%r14),%r14 + vpinsrd $1,(%r10),%xmm12,%xmm12 + leaq 64(%r10),%r10 + vpinsrd $1,(%r15),%xmm7,%xmm7 + leaq 64(%r15),%r15 + vpunpckldq %ymm7,%ymm10,%ymm10 + vpinsrd $1,(%r11),%xmm6,%xmm6 + leaq 64(%r11),%r11 + vpunpckldq %ymm6,%ymm12,%ymm12 + vmovd -60(%r12),%xmm11 + vinserti128 $1,%xmm12,%ymm10,%ymm10 + vmovd -60(%r8),%xmm8 + vpshufb %ymm9,%ymm10,%ymm10 + vmovd -60(%r13),%xmm7 + vmovd -60(%r9),%xmm6 + vpinsrd $1,-60(%r14),%xmm11,%xmm11 + vpinsrd $1,-60(%r10),%xmm8,%xmm8 + vpinsrd $1,-60(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm11,%ymm11 + vpinsrd $1,-60(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpandn %ymm3,%ymm1,%ymm6 + vpand %ymm2,%ymm1,%ymm5 + + vmovdqa %ymm10,0-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vinserti128 $1,%xmm8,%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -56(%r12),%xmm12 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -56(%r8),%xmm8 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpshufb %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vmovd -56(%r13),%xmm7 + vmovd -56(%r9),%xmm6 + vpinsrd $1,-56(%r14),%xmm12,%xmm12 + vpinsrd $1,-56(%r10),%xmm8,%xmm8 + vpinsrd $1,-56(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm12,%ymm12 + vpinsrd $1,-56(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpandn %ymm2,%ymm0,%ymm6 + vpand %ymm1,%ymm0,%ymm5 + + vmovdqa %ymm11,32-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vinserti128 $1,%xmm8,%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -52(%r12),%xmm13 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -52(%r8),%xmm8 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpshufb %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vmovd -52(%r13),%xmm7 + vmovd -52(%r9),%xmm6 + vpinsrd $1,-52(%r14),%xmm13,%xmm13 + vpinsrd $1,-52(%r10),%xmm8,%xmm8 + vpinsrd $1,-52(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm13,%ymm13 + vpinsrd $1,-52(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpandn %ymm1,%ymm4,%ymm6 + vpand %ymm0,%ymm4,%ymm5 + + vmovdqa %ymm12,64-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vinserti128 $1,%xmm8,%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -48(%r12),%xmm14 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -48(%r8),%xmm8 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpshufb %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vmovd -48(%r13),%xmm7 + vmovd -48(%r9),%xmm6 + vpinsrd $1,-48(%r14),%xmm14,%xmm14 + vpinsrd $1,-48(%r10),%xmm8,%xmm8 + vpinsrd $1,-48(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm14,%ymm14 + vpinsrd $1,-48(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpandn %ymm0,%ymm3,%ymm6 + vpand %ymm4,%ymm3,%ymm5 + + vmovdqa %ymm13,96-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vinserti128 $1,%xmm8,%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -44(%r12),%xmm10 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -44(%r8),%xmm8 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpshufb %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vmovd -44(%r13),%xmm7 + vmovd -44(%r9),%xmm6 + vpinsrd $1,-44(%r14),%xmm10,%xmm10 + vpinsrd $1,-44(%r10),%xmm8,%xmm8 + vpinsrd $1,-44(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm10,%ymm10 + vpinsrd $1,-44(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpandn %ymm4,%ymm2,%ymm6 + vpand %ymm3,%ymm2,%ymm5 + + vmovdqa %ymm14,128-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vinserti128 $1,%xmm8,%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -40(%r12),%xmm11 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -40(%r8),%xmm8 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpshufb %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vmovd -40(%r13),%xmm7 + vmovd -40(%r9),%xmm6 + vpinsrd $1,-40(%r14),%xmm11,%xmm11 + vpinsrd $1,-40(%r10),%xmm8,%xmm8 + vpinsrd $1,-40(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm11,%ymm11 + vpinsrd $1,-40(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpandn %ymm3,%ymm1,%ymm6 + vpand %ymm2,%ymm1,%ymm5 + + vmovdqa %ymm10,160-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vinserti128 $1,%xmm8,%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -36(%r12),%xmm12 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -36(%r8),%xmm8 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpshufb %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vmovd -36(%r13),%xmm7 + vmovd -36(%r9),%xmm6 + vpinsrd $1,-36(%r14),%xmm12,%xmm12 + vpinsrd $1,-36(%r10),%xmm8,%xmm8 + vpinsrd $1,-36(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm12,%ymm12 + vpinsrd $1,-36(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpandn %ymm2,%ymm0,%ymm6 + vpand %ymm1,%ymm0,%ymm5 + + vmovdqa %ymm11,192-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vinserti128 $1,%xmm8,%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -32(%r12),%xmm13 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -32(%r8),%xmm8 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpshufb %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vmovd -32(%r13),%xmm7 + vmovd -32(%r9),%xmm6 + vpinsrd $1,-32(%r14),%xmm13,%xmm13 + vpinsrd $1,-32(%r10),%xmm8,%xmm8 + vpinsrd $1,-32(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm13,%ymm13 + vpinsrd $1,-32(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpandn %ymm1,%ymm4,%ymm6 + vpand %ymm0,%ymm4,%ymm5 + + vmovdqa %ymm12,224-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vinserti128 $1,%xmm8,%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -28(%r12),%xmm14 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -28(%r8),%xmm8 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpshufb %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vmovd -28(%r13),%xmm7 + vmovd -28(%r9),%xmm6 + vpinsrd $1,-28(%r14),%xmm14,%xmm14 + vpinsrd $1,-28(%r10),%xmm8,%xmm8 + vpinsrd $1,-28(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm14,%ymm14 + vpinsrd $1,-28(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpandn %ymm0,%ymm3,%ymm6 + vpand %ymm4,%ymm3,%ymm5 + + vmovdqa %ymm13,256-256-128(%rbx) + vpaddd %ymm13,%ymm1,%ymm1 + vinserti128 $1,%xmm8,%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -24(%r12),%xmm10 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -24(%r8),%xmm8 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpshufb %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vmovd -24(%r13),%xmm7 + vmovd -24(%r9),%xmm6 + vpinsrd $1,-24(%r14),%xmm10,%xmm10 + vpinsrd $1,-24(%r10),%xmm8,%xmm8 + vpinsrd $1,-24(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm10,%ymm10 + vpinsrd $1,-24(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpandn %ymm4,%ymm2,%ymm6 + vpand %ymm3,%ymm2,%ymm5 + + vmovdqa %ymm14,288-256-128(%rbx) + vpaddd %ymm14,%ymm0,%ymm0 + vinserti128 $1,%xmm8,%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -20(%r12),%xmm11 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -20(%r8),%xmm8 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpshufb %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vmovd -20(%r13),%xmm7 + vmovd -20(%r9),%xmm6 + vpinsrd $1,-20(%r14),%xmm11,%xmm11 + vpinsrd $1,-20(%r10),%xmm8,%xmm8 + vpinsrd $1,-20(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm11,%ymm11 + vpinsrd $1,-20(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpandn %ymm3,%ymm1,%ymm6 + vpand %ymm2,%ymm1,%ymm5 + + vmovdqa %ymm10,320-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vinserti128 $1,%xmm8,%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -16(%r12),%xmm12 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -16(%r8),%xmm8 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpshufb %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vmovd -16(%r13),%xmm7 + vmovd -16(%r9),%xmm6 + vpinsrd $1,-16(%r14),%xmm12,%xmm12 + vpinsrd $1,-16(%r10),%xmm8,%xmm8 + vpinsrd $1,-16(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm12,%ymm12 + vpinsrd $1,-16(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpandn %ymm2,%ymm0,%ymm6 + vpand %ymm1,%ymm0,%ymm5 + + vmovdqa %ymm11,352-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vinserti128 $1,%xmm8,%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -12(%r12),%xmm13 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -12(%r8),%xmm8 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpshufb %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vmovd -12(%r13),%xmm7 + vmovd -12(%r9),%xmm6 + vpinsrd $1,-12(%r14),%xmm13,%xmm13 + vpinsrd $1,-12(%r10),%xmm8,%xmm8 + vpinsrd $1,-12(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm13,%ymm13 + vpinsrd $1,-12(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpandn %ymm1,%ymm4,%ymm6 + vpand %ymm0,%ymm4,%ymm5 + + vmovdqa %ymm12,384-256-128(%rbx) + vpaddd %ymm12,%ymm2,%ymm2 + vinserti128 $1,%xmm8,%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -8(%r12),%xmm14 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -8(%r8),%xmm8 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpshufb %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vmovd -8(%r13),%xmm7 + vmovd -8(%r9),%xmm6 + vpinsrd $1,-8(%r14),%xmm14,%xmm14 + vpinsrd $1,-8(%r10),%xmm8,%xmm8 + vpinsrd $1,-8(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm14,%ymm14 + vpinsrd $1,-8(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpandn %ymm0,%ymm3,%ymm6 + vpand %ymm4,%ymm3,%ymm5 + + vmovdqa %ymm13,416-256-128(%rbx) + vpaddd %ymm13,%ymm1,%ymm1 + vinserti128 $1,%xmm8,%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -4(%r12),%xmm10 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -4(%r8),%xmm8 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpshufb %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vmovdqa 0-128(%rax),%ymm11 + vmovd -4(%r13),%xmm7 + vmovd -4(%r9),%xmm6 + vpinsrd $1,-4(%r14),%xmm10,%xmm10 + vpinsrd $1,-4(%r10),%xmm8,%xmm8 + vpinsrd $1,-4(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm10,%ymm10 + vpinsrd $1,-4(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm0,%ymm0 + prefetcht0 63(%r12) + vpslld $5,%ymm1,%ymm7 + vpandn %ymm4,%ymm2,%ymm6 + vpand %ymm3,%ymm2,%ymm5 + + vmovdqa %ymm14,448-256-128(%rbx) + vpaddd %ymm14,%ymm0,%ymm0 + vinserti128 $1,%xmm8,%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + prefetcht0 63(%r13) + vpxor %ymm6,%ymm5,%ymm5 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + prefetcht0 63(%r14) + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + prefetcht0 63(%r15) + vpshufb %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vmovdqa 32-128(%rax),%ymm12 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 64-128(%rax),%ymm13 + + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpandn %ymm3,%ymm1,%ymm6 + prefetcht0 63(%r8) + vpand %ymm2,%ymm1,%ymm5 + + vmovdqa %ymm10,480-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 256-256-128(%rbx),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + prefetcht0 63(%r9) + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + prefetcht0 63(%r10) + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + prefetcht0 63(%r11) + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 96-128(%rax),%ymm14 + + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpandn %ymm2,%ymm0,%ymm6 + + vpand %ymm1,%ymm0,%ymm5 + + vmovdqa %ymm11,0-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 288-256-128(%rbx),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 128-128(%rax),%ymm10 + + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpandn %ymm1,%ymm4,%ymm6 + + vpand %ymm0,%ymm4,%ymm5 + + vmovdqa %ymm12,32-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 320-256-128(%rbx),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 160-128(%rax),%ymm11 + + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpandn %ymm0,%ymm3,%ymm6 + + vpand %ymm4,%ymm3,%ymm5 + + vmovdqa %ymm13,64-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 352-256-128(%rbx),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 192-128(%rax),%ymm12 + + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpandn %ymm4,%ymm2,%ymm6 + + vpand %ymm3,%ymm2,%ymm5 + + vmovdqa %ymm14,96-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 384-256-128(%rbx),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vmovdqa 0(%rbp),%ymm15 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 224-128(%rax),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,128-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 416-256-128(%rbx),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 256-256-128(%rbx),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,160-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 448-256-128(%rbx),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 288-256-128(%rbx),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vmovdqa %ymm12,192-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 480-256-128(%rbx),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 320-256-128(%rbx),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vmovdqa %ymm13,224-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 0-128(%rax),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 352-256-128(%rbx),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vmovdqa %ymm14,256-256-128(%rbx) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 32-128(%rax),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 384-256-128(%rbx),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,288-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 64-128(%rax),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 416-256-128(%rbx),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,320-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 96-128(%rax),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 448-256-128(%rbx),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vmovdqa %ymm12,352-256-128(%rbx) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 128-128(%rax),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 480-256-128(%rbx),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vmovdqa %ymm13,384-256-128(%rbx) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 160-128(%rax),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 0-128(%rax),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vmovdqa %ymm14,416-256-128(%rbx) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 192-128(%rax),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 32-128(%rax),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,448-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 224-128(%rax),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 64-128(%rax),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,480-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 256-256-128(%rbx),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 96-128(%rax),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vmovdqa %ymm12,0-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 288-256-128(%rbx),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 128-128(%rax),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vmovdqa %ymm13,32-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 320-256-128(%rbx),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 160-128(%rax),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vmovdqa %ymm14,64-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 352-256-128(%rbx),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 192-128(%rax),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,96-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 384-256-128(%rbx),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 224-128(%rax),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,128-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 416-256-128(%rbx),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 256-256-128(%rbx),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vmovdqa %ymm12,160-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 448-256-128(%rbx),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 288-256-128(%rbx),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vmovdqa %ymm13,192-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 480-256-128(%rbx),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 320-256-128(%rbx),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vmovdqa %ymm14,224-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 0-128(%rax),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vmovdqa 32(%rbp),%ymm15 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 352-256-128(%rbx),%ymm13 + + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpand %ymm2,%ymm3,%ymm6 + vpxor 32-128(%rax),%ymm11,%ymm11 + + vpaddd %ymm6,%ymm4,%ymm4 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm3,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vmovdqu %ymm10,256-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm11,%ymm9 + vpand %ymm1,%ymm5,%ymm5 + vpaddd %ymm11,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 384-256-128(%rbx),%ymm14 + + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpand %ymm1,%ymm2,%ymm6 + vpxor 64-128(%rax),%ymm12,%ymm12 + + vpaddd %ymm6,%ymm3,%ymm3 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm2,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vmovdqu %ymm11,288-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm12,%ymm9 + vpand %ymm0,%ymm5,%ymm5 + vpaddd %ymm12,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 416-256-128(%rbx),%ymm10 + + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpand %ymm0,%ymm1,%ymm6 + vpxor 96-128(%rax),%ymm13,%ymm13 + + vpaddd %ymm6,%ymm2,%ymm2 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm1,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vmovdqu %ymm12,320-256-128(%rbx) + vpaddd %ymm12,%ymm2,%ymm2 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm13,%ymm9 + vpand %ymm4,%ymm5,%ymm5 + vpaddd %ymm13,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 448-256-128(%rbx),%ymm11 + + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpand %ymm4,%ymm0,%ymm6 + vpxor 128-128(%rax),%ymm14,%ymm14 + + vpaddd %ymm6,%ymm1,%ymm1 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm0,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vmovdqu %ymm13,352-256-128(%rbx) + vpaddd %ymm13,%ymm1,%ymm1 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm14,%ymm9 + vpand %ymm3,%ymm5,%ymm5 + vpaddd %ymm14,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 480-256-128(%rbx),%ymm12 + + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpand %ymm3,%ymm4,%ymm6 + vpxor 160-128(%rax),%ymm10,%ymm10 + + vpaddd %ymm6,%ymm0,%ymm0 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm4,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vmovdqu %ymm14,384-256-128(%rbx) + vpaddd %ymm14,%ymm0,%ymm0 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm10,%ymm9 + vpand %ymm2,%ymm5,%ymm5 + vpaddd %ymm10,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 0-128(%rax),%ymm13 + + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpand %ymm2,%ymm3,%ymm6 + vpxor 192-128(%rax),%ymm11,%ymm11 + + vpaddd %ymm6,%ymm4,%ymm4 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm3,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vmovdqu %ymm10,416-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm11,%ymm9 + vpand %ymm1,%ymm5,%ymm5 + vpaddd %ymm11,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 32-128(%rax),%ymm14 + + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpand %ymm1,%ymm2,%ymm6 + vpxor 224-128(%rax),%ymm12,%ymm12 + + vpaddd %ymm6,%ymm3,%ymm3 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm2,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vmovdqu %ymm11,448-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm12,%ymm9 + vpand %ymm0,%ymm5,%ymm5 + vpaddd %ymm12,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 64-128(%rax),%ymm10 + + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpand %ymm0,%ymm1,%ymm6 + vpxor 256-256-128(%rbx),%ymm13,%ymm13 + + vpaddd %ymm6,%ymm2,%ymm2 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm1,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vmovdqu %ymm12,480-256-128(%rbx) + vpaddd %ymm12,%ymm2,%ymm2 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm13,%ymm9 + vpand %ymm4,%ymm5,%ymm5 + vpaddd %ymm13,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 96-128(%rax),%ymm11 + + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpand %ymm4,%ymm0,%ymm6 + vpxor 288-256-128(%rbx),%ymm14,%ymm14 + + vpaddd %ymm6,%ymm1,%ymm1 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm0,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vmovdqu %ymm13,0-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm14,%ymm9 + vpand %ymm3,%ymm5,%ymm5 + vpaddd %ymm14,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 128-128(%rax),%ymm12 + + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpand %ymm3,%ymm4,%ymm6 + vpxor 320-256-128(%rbx),%ymm10,%ymm10 + + vpaddd %ymm6,%ymm0,%ymm0 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm4,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vmovdqu %ymm14,32-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm10,%ymm9 + vpand %ymm2,%ymm5,%ymm5 + vpaddd %ymm10,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 160-128(%rax),%ymm13 + + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpand %ymm2,%ymm3,%ymm6 + vpxor 352-256-128(%rbx),%ymm11,%ymm11 + + vpaddd %ymm6,%ymm4,%ymm4 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm3,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vmovdqu %ymm10,64-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm11,%ymm9 + vpand %ymm1,%ymm5,%ymm5 + vpaddd %ymm11,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 192-128(%rax),%ymm14 + + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpand %ymm1,%ymm2,%ymm6 + vpxor 384-256-128(%rbx),%ymm12,%ymm12 + + vpaddd %ymm6,%ymm3,%ymm3 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm2,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vmovdqu %ymm11,96-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm12,%ymm9 + vpand %ymm0,%ymm5,%ymm5 + vpaddd %ymm12,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 224-128(%rax),%ymm10 + + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpand %ymm0,%ymm1,%ymm6 + vpxor 416-256-128(%rbx),%ymm13,%ymm13 + + vpaddd %ymm6,%ymm2,%ymm2 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm1,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vmovdqu %ymm12,128-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm13,%ymm9 + vpand %ymm4,%ymm5,%ymm5 + vpaddd %ymm13,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 256-256-128(%rbx),%ymm11 + + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpand %ymm4,%ymm0,%ymm6 + vpxor 448-256-128(%rbx),%ymm14,%ymm14 + + vpaddd %ymm6,%ymm1,%ymm1 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm0,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vmovdqu %ymm13,160-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm14,%ymm9 + vpand %ymm3,%ymm5,%ymm5 + vpaddd %ymm14,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 288-256-128(%rbx),%ymm12 + + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpand %ymm3,%ymm4,%ymm6 + vpxor 480-256-128(%rbx),%ymm10,%ymm10 + + vpaddd %ymm6,%ymm0,%ymm0 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm4,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vmovdqu %ymm14,192-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm10,%ymm9 + vpand %ymm2,%ymm5,%ymm5 + vpaddd %ymm10,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 320-256-128(%rbx),%ymm13 + + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpand %ymm2,%ymm3,%ymm6 + vpxor 0-128(%rax),%ymm11,%ymm11 + + vpaddd %ymm6,%ymm4,%ymm4 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm3,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vmovdqu %ymm10,224-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm11,%ymm9 + vpand %ymm1,%ymm5,%ymm5 + vpaddd %ymm11,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 352-256-128(%rbx),%ymm14 + + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpand %ymm1,%ymm2,%ymm6 + vpxor 32-128(%rax),%ymm12,%ymm12 + + vpaddd %ymm6,%ymm3,%ymm3 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm2,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vmovdqu %ymm11,256-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm12,%ymm9 + vpand %ymm0,%ymm5,%ymm5 + vpaddd %ymm12,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 384-256-128(%rbx),%ymm10 + + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpand %ymm0,%ymm1,%ymm6 + vpxor 64-128(%rax),%ymm13,%ymm13 + + vpaddd %ymm6,%ymm2,%ymm2 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm1,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vmovdqu %ymm12,288-256-128(%rbx) + vpaddd %ymm12,%ymm2,%ymm2 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm13,%ymm9 + vpand %ymm4,%ymm5,%ymm5 + vpaddd %ymm13,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 416-256-128(%rbx),%ymm11 + + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpand %ymm4,%ymm0,%ymm6 + vpxor 96-128(%rax),%ymm14,%ymm14 + + vpaddd %ymm6,%ymm1,%ymm1 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm0,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vmovdqu %ymm13,320-256-128(%rbx) + vpaddd %ymm13,%ymm1,%ymm1 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm14,%ymm9 + vpand %ymm3,%ymm5,%ymm5 + vpaddd %ymm14,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 448-256-128(%rbx),%ymm12 + + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpand %ymm3,%ymm4,%ymm6 + vpxor 128-128(%rax),%ymm10,%ymm10 + + vpaddd %ymm6,%ymm0,%ymm0 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm4,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vmovdqu %ymm14,352-256-128(%rbx) + vpaddd %ymm14,%ymm0,%ymm0 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm10,%ymm9 + vpand %ymm2,%ymm5,%ymm5 + vpaddd %ymm10,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vmovdqa 64(%rbp),%ymm15 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 480-256-128(%rbx),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,384-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 160-128(%rax),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 0-128(%rax),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,416-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 192-128(%rax),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 32-128(%rax),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vmovdqa %ymm12,448-256-128(%rbx) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 224-128(%rax),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 64-128(%rax),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vmovdqa %ymm13,480-256-128(%rbx) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 256-256-128(%rbx),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 96-128(%rax),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vmovdqa %ymm14,0-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 288-256-128(%rbx),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 128-128(%rax),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,32-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 320-256-128(%rbx),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 160-128(%rax),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,64-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 352-256-128(%rbx),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 192-128(%rax),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vmovdqa %ymm12,96-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 384-256-128(%rbx),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 224-128(%rax),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vmovdqa %ymm13,128-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 416-256-128(%rbx),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 256-256-128(%rbx),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vmovdqa %ymm14,160-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 448-256-128(%rbx),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 288-256-128(%rbx),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,192-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 480-256-128(%rbx),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 320-256-128(%rbx),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,224-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 0-128(%rax),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 352-256-128(%rbx),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 32-128(%rax),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 384-256-128(%rbx),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 64-128(%rax),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 416-256-128(%rbx),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 96-128(%rax),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 448-256-128(%rbx),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 128-128(%rax),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 480-256-128(%rbx),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 160-128(%rax),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 0-128(%rax),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 192-128(%rax),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 32-128(%rax),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 224-128(%rax),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + + vpsrld $27,%ymm1,%ymm8 + vpaddd %ymm14,%ymm0,%ymm0 + vpxor %ymm3,%ymm5,%ymm5 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm6,%ymm2,%ymm2 + movl $1,%ecx + leaq 512(%rsp),%rbx + cmpl 0(%rbx),%ecx + cmovgeq %rbp,%r12 + cmpl 4(%rbx),%ecx + cmovgeq %rbp,%r13 + cmpl 8(%rbx),%ecx + cmovgeq %rbp,%r14 + cmpl 12(%rbx),%ecx + cmovgeq %rbp,%r15 + cmpl 16(%rbx),%ecx + cmovgeq %rbp,%r8 + cmpl 20(%rbx),%ecx + cmovgeq %rbp,%r9 + cmpl 24(%rbx),%ecx + cmovgeq %rbp,%r10 + cmpl 28(%rbx),%ecx + cmovgeq %rbp,%r11 + vmovdqu (%rbx),%ymm5 + vpxor %ymm7,%ymm7,%ymm7 + vmovdqa %ymm5,%ymm6 + vpcmpgtd %ymm7,%ymm6,%ymm6 + vpaddd %ymm6,%ymm5,%ymm5 + + vpand %ymm6,%ymm0,%ymm0 + vpand %ymm6,%ymm1,%ymm1 + vpaddd 0(%rdi),%ymm0,%ymm0 + vpand %ymm6,%ymm2,%ymm2 + vpaddd 32(%rdi),%ymm1,%ymm1 + vpand %ymm6,%ymm3,%ymm3 + vpaddd 64(%rdi),%ymm2,%ymm2 + vpand %ymm6,%ymm4,%ymm4 + vpaddd 96(%rdi),%ymm3,%ymm3 + vpaddd 128(%rdi),%ymm4,%ymm4 + vmovdqu %ymm0,0(%rdi) + vmovdqu %ymm1,32(%rdi) + vmovdqu %ymm2,64(%rdi) + vmovdqu %ymm3,96(%rdi) + vmovdqu %ymm4,128(%rdi) + + vmovdqu %ymm5,(%rbx) + leaq 256+128(%rsp),%rbx + vmovdqu 96(%rbp),%ymm9 + decl %edx + jnz .Loop_avx2 + + + + + + + +.Ldone_avx2: + movq 544(%rsp),%rax +.cfi_def_cfa %rax,8 + vzeroupper + movq -48(%rax),%r15 +.cfi_restore %r15 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_avx2: + .byte 0xf3,0xc3 +.cfi_endproc +.size sha1_multi_block_avx2,.-sha1_multi_block_avx2 +.section .rodata +.align 256 +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +K_XX_XX: +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 +.byte 83,72,65,49,32,109,117,108,116,105,45,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.previous diff --git a/contrib/openssl-cmake/asm/crypto/sha/sha1-x86_64.s b/contrib/openssl-cmake/asm/crypto/sha/sha1-x86_64.s new file mode 100644 index 000000000000..dd4a58c1fecd --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/sha/sha1-x86_64.s @@ -0,0 +1,5452 @@ +.text + + +.globl sha1_block_data_order +.type sha1_block_data_order,@function +.align 16 +sha1_block_data_order: +.cfi_startproc + movl OPENSSL_ia32cap_P+0(%rip),%r9d + movl OPENSSL_ia32cap_P+4(%rip),%r8d + movl OPENSSL_ia32cap_P+8(%rip),%r10d + testl $512,%r8d + jz .Lialu + testl $536870912,%r10d + jnz _shaext_shortcut + andl $296,%r10d + cmpl $296,%r10d + je _avx2_shortcut + andl $268435456,%r8d + andl $1073741824,%r9d + orl %r9d,%r8d + cmpl $1342177280,%r8d + je _avx_shortcut + jmp _ssse3_shortcut + +.align 16 +.Lialu: + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + movq %rdi,%r8 + subq $72,%rsp + movq %rsi,%r9 + andq $-64,%rsp + movq %rdx,%r10 + movq %rax,64(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xc0,0x00,0x06,0x23,0x08 +.Lprologue: + + movl 0(%r8),%esi + movl 4(%r8),%edi + movl 8(%r8),%r11d + movl 12(%r8),%r12d + movl 16(%r8),%r13d + jmp .Lloop + +.align 16 +.Lloop: + movl 0(%r9),%edx + bswapl %edx + movl 4(%r9),%ebp + movl %r12d,%eax + movl %edx,0(%rsp) + movl %esi,%ecx + bswapl %ebp + xorl %r11d,%eax + roll $5,%ecx + andl %edi,%eax + leal 1518500249(%rdx,%r13,1),%r13d + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl 8(%r9),%r14d + movl %r11d,%eax + movl %ebp,4(%rsp) + movl %r13d,%ecx + bswapl %r14d + xorl %edi,%eax + roll $5,%ecx + andl %esi,%eax + leal 1518500249(%rbp,%r12,1),%r12d + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl 12(%r9),%edx + movl %edi,%eax + movl %r14d,8(%rsp) + movl %r12d,%ecx + bswapl %edx + xorl %esi,%eax + roll $5,%ecx + andl %r13d,%eax + leal 1518500249(%r14,%r11,1),%r11d + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl 16(%r9),%ebp + movl %esi,%eax + movl %edx,12(%rsp) + movl %r11d,%ecx + bswapl %ebp + xorl %r13d,%eax + roll $5,%ecx + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl 20(%r9),%r14d + movl %r13d,%eax + movl %ebp,16(%rsp) + movl %edi,%ecx + bswapl %r14d + xorl %r12d,%eax + roll $5,%ecx + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 24(%r9),%edx + movl %r12d,%eax + movl %r14d,20(%rsp) + movl %esi,%ecx + bswapl %edx + xorl %r11d,%eax + roll $5,%ecx + andl %edi,%eax + leal 1518500249(%r14,%r13,1),%r13d + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl 28(%r9),%ebp + movl %r11d,%eax + movl %edx,24(%rsp) + movl %r13d,%ecx + bswapl %ebp + xorl %edi,%eax + roll $5,%ecx + andl %esi,%eax + leal 1518500249(%rdx,%r12,1),%r12d + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl 32(%r9),%r14d + movl %edi,%eax + movl %ebp,28(%rsp) + movl %r12d,%ecx + bswapl %r14d + xorl %esi,%eax + roll $5,%ecx + andl %r13d,%eax + leal 1518500249(%rbp,%r11,1),%r11d + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl 36(%r9),%edx + movl %esi,%eax + movl %r14d,32(%rsp) + movl %r11d,%ecx + bswapl %edx + xorl %r13d,%eax + roll $5,%ecx + andl %r12d,%eax + leal 1518500249(%r14,%rdi,1),%edi + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl 40(%r9),%ebp + movl %r13d,%eax + movl %edx,36(%rsp) + movl %edi,%ecx + bswapl %ebp + xorl %r12d,%eax + roll $5,%ecx + andl %r11d,%eax + leal 1518500249(%rdx,%rsi,1),%esi + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 44(%r9),%r14d + movl %r12d,%eax + movl %ebp,40(%rsp) + movl %esi,%ecx + bswapl %r14d + xorl %r11d,%eax + roll $5,%ecx + andl %edi,%eax + leal 1518500249(%rbp,%r13,1),%r13d + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl 48(%r9),%edx + movl %r11d,%eax + movl %r14d,44(%rsp) + movl %r13d,%ecx + bswapl %edx + xorl %edi,%eax + roll $5,%ecx + andl %esi,%eax + leal 1518500249(%r14,%r12,1),%r12d + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl 52(%r9),%ebp + movl %edi,%eax + movl %edx,48(%rsp) + movl %r12d,%ecx + bswapl %ebp + xorl %esi,%eax + roll $5,%ecx + andl %r13d,%eax + leal 1518500249(%rdx,%r11,1),%r11d + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl 56(%r9),%r14d + movl %esi,%eax + movl %ebp,52(%rsp) + movl %r11d,%ecx + bswapl %r14d + xorl %r13d,%eax + roll $5,%ecx + andl %r12d,%eax + leal 1518500249(%rbp,%rdi,1),%edi + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl 60(%r9),%edx + movl %r13d,%eax + movl %r14d,56(%rsp) + movl %edi,%ecx + bswapl %edx + xorl %r12d,%eax + roll $5,%ecx + andl %r11d,%eax + leal 1518500249(%r14,%rsi,1),%esi + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + xorl 0(%rsp),%ebp + movl %r12d,%eax + movl %edx,60(%rsp) + movl %esi,%ecx + xorl 8(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 32(%rsp),%ebp + andl %edi,%eax + leal 1518500249(%rdx,%r13,1),%r13d + roll $30,%edi + xorl %r12d,%eax + addl %ecx,%r13d + roll $1,%ebp + addl %eax,%r13d + xorl 4(%rsp),%r14d + movl %r11d,%eax + movl %ebp,0(%rsp) + movl %r13d,%ecx + xorl 12(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 36(%rsp),%r14d + andl %esi,%eax + leal 1518500249(%rbp,%r12,1),%r12d + roll $30,%esi + xorl %r11d,%eax + addl %ecx,%r12d + roll $1,%r14d + addl %eax,%r12d + xorl 8(%rsp),%edx + movl %edi,%eax + movl %r14d,4(%rsp) + movl %r12d,%ecx + xorl 16(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + andl %r13d,%eax + leal 1518500249(%r14,%r11,1),%r11d + roll $30,%r13d + xorl %edi,%eax + addl %ecx,%r11d + roll $1,%edx + addl %eax,%r11d + xorl 12(%rsp),%ebp + movl %esi,%eax + movl %edx,8(%rsp) + movl %r11d,%ecx + xorl 20(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + roll $30,%r12d + xorl %esi,%eax + addl %ecx,%edi + roll $1,%ebp + addl %eax,%edi + xorl 16(%rsp),%r14d + movl %r13d,%eax + movl %ebp,12(%rsp) + movl %edi,%ecx + xorl 24(%rsp),%r14d + xorl %r12d,%eax + roll $5,%ecx + xorl 48(%rsp),%r14d + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + roll $30,%r11d + xorl %r13d,%eax + addl %ecx,%esi + roll $1,%r14d + addl %eax,%esi + xorl 20(%rsp),%edx + movl %edi,%eax + movl %r14d,16(%rsp) + movl %esi,%ecx + xorl 28(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 52(%rsp),%edx + leal 1859775393(%r14,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + xorl 24(%rsp),%ebp + movl %esi,%eax + movl %edx,20(%rsp) + movl %r13d,%ecx + xorl 32(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 56(%rsp),%ebp + leal 1859775393(%rdx,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + xorl 28(%rsp),%r14d + movl %r13d,%eax + movl %ebp,24(%rsp) + movl %r12d,%ecx + xorl 36(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 60(%rsp),%r14d + leal 1859775393(%rbp,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%r14d + xorl 32(%rsp),%edx + movl %r12d,%eax + movl %r14d,28(%rsp) + movl %r11d,%ecx + xorl 40(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 0(%rsp),%edx + leal 1859775393(%r14,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + xorl 36(%rsp),%ebp + movl %r11d,%eax + movl %edx,32(%rsp) + movl %edi,%ecx + xorl 44(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 4(%rsp),%ebp + leal 1859775393(%rdx,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + xorl 40(%rsp),%r14d + movl %edi,%eax + movl %ebp,36(%rsp) + movl %esi,%ecx + xorl 48(%rsp),%r14d + xorl %r12d,%eax + roll $5,%ecx + xorl 8(%rsp),%r14d + leal 1859775393(%rbp,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%r14d + xorl 44(%rsp),%edx + movl %esi,%eax + movl %r14d,40(%rsp) + movl %r13d,%ecx + xorl 52(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + xorl 12(%rsp),%edx + leal 1859775393(%r14,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + xorl 48(%rsp),%ebp + movl %r13d,%eax + movl %edx,44(%rsp) + movl %r12d,%ecx + xorl 56(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + xorl 16(%rsp),%ebp + leal 1859775393(%rdx,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + xorl 52(%rsp),%r14d + movl %r12d,%eax + movl %ebp,48(%rsp) + movl %r11d,%ecx + xorl 60(%rsp),%r14d + xorl %esi,%eax + roll $5,%ecx + xorl 20(%rsp),%r14d + leal 1859775393(%rbp,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%r14d + xorl 56(%rsp),%edx + movl %r11d,%eax + movl %r14d,52(%rsp) + movl %edi,%ecx + xorl 0(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 24(%rsp),%edx + leal 1859775393(%r14,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + xorl 60(%rsp),%ebp + movl %edi,%eax + movl %edx,56(%rsp) + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + xorl 28(%rsp),%ebp + leal 1859775393(%rdx,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + xorl 0(%rsp),%r14d + movl %esi,%eax + movl %ebp,60(%rsp) + movl %r13d,%ecx + xorl 8(%rsp),%r14d + xorl %r11d,%eax + roll $5,%ecx + xorl 32(%rsp),%r14d + leal 1859775393(%rbp,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%r14d + xorl 4(%rsp),%edx + movl %r13d,%eax + movl %r14d,0(%rsp) + movl %r12d,%ecx + xorl 12(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 36(%rsp),%edx + leal 1859775393(%r14,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + xorl 8(%rsp),%ebp + movl %r12d,%eax + movl %edx,4(%rsp) + movl %r11d,%ecx + xorl 16(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 40(%rsp),%ebp + leal 1859775393(%rdx,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + xorl 12(%rsp),%r14d + movl %r11d,%eax + movl %ebp,8(%rsp) + movl %edi,%ecx + xorl 20(%rsp),%r14d + xorl %r13d,%eax + roll $5,%ecx + xorl 44(%rsp),%r14d + leal 1859775393(%rbp,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%r14d + xorl 16(%rsp),%edx + movl %edi,%eax + movl %r14d,12(%rsp) + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 48(%rsp),%edx + leal 1859775393(%r14,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + xorl 20(%rsp),%ebp + movl %esi,%eax + movl %edx,16(%rsp) + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 52(%rsp),%ebp + leal 1859775393(%rdx,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + xorl 24(%rsp),%r14d + movl %r13d,%eax + movl %ebp,20(%rsp) + movl %r12d,%ecx + xorl 32(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 56(%rsp),%r14d + leal 1859775393(%rbp,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%r14d + xorl 28(%rsp),%edx + movl %r12d,%eax + movl %r14d,24(%rsp) + movl %r11d,%ecx + xorl 36(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 60(%rsp),%edx + leal 1859775393(%r14,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + xorl 32(%rsp),%ebp + movl %r11d,%eax + movl %edx,28(%rsp) + movl %edi,%ecx + xorl 40(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 0(%rsp),%ebp + leal 1859775393(%rdx,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + xorl 36(%rsp),%r14d + movl %r12d,%eax + movl %ebp,32(%rsp) + movl %r12d,%ebx + xorl 44(%rsp),%r14d + andl %r11d,%eax + movl %esi,%ecx + xorl 4(%rsp),%r14d + leal -1894007588(%rbp,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%r14d + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 40(%rsp),%edx + movl %r11d,%eax + movl %r14d,36(%rsp) + movl %r11d,%ebx + xorl 48(%rsp),%edx + andl %edi,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + leal -1894007588(%r14,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%edx + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 44(%rsp),%ebp + movl %edi,%eax + movl %edx,40(%rsp) + movl %edi,%ebx + xorl 52(%rsp),%ebp + andl %esi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + leal -1894007588(%rdx,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%ebp + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 48(%rsp),%r14d + movl %esi,%eax + movl %ebp,44(%rsp) + movl %esi,%ebx + xorl 56(%rsp),%r14d + andl %r13d,%eax + movl %r11d,%ecx + xorl 16(%rsp),%r14d + leal -1894007588(%rbp,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%r14d + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 52(%rsp),%edx + movl %r13d,%eax + movl %r14d,48(%rsp) + movl %r13d,%ebx + xorl 60(%rsp),%edx + andl %r12d,%eax + movl %edi,%ecx + xorl 20(%rsp),%edx + leal -1894007588(%r14,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%edx + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 56(%rsp),%ebp + movl %r12d,%eax + movl %edx,52(%rsp) + movl %r12d,%ebx + xorl 0(%rsp),%ebp + andl %r11d,%eax + movl %esi,%ecx + xorl 24(%rsp),%ebp + leal -1894007588(%rdx,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%ebp + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 60(%rsp),%r14d + movl %r11d,%eax + movl %ebp,56(%rsp) + movl %r11d,%ebx + xorl 4(%rsp),%r14d + andl %edi,%eax + movl %r13d,%ecx + xorl 28(%rsp),%r14d + leal -1894007588(%rbp,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%r14d + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 0(%rsp),%edx + movl %edi,%eax + movl %r14d,60(%rsp) + movl %edi,%ebx + xorl 8(%rsp),%edx + andl %esi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + leal -1894007588(%r14,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%edx + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 4(%rsp),%ebp + movl %esi,%eax + movl %edx,0(%rsp) + movl %esi,%ebx + xorl 12(%rsp),%ebp + andl %r13d,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + leal -1894007588(%rdx,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%ebp + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 8(%rsp),%r14d + movl %r13d,%eax + movl %ebp,4(%rsp) + movl %r13d,%ebx + xorl 16(%rsp),%r14d + andl %r12d,%eax + movl %edi,%ecx + xorl 40(%rsp),%r14d + leal -1894007588(%rbp,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%r14d + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 12(%rsp),%edx + movl %r12d,%eax + movl %r14d,8(%rsp) + movl %r12d,%ebx + xorl 20(%rsp),%edx + andl %r11d,%eax + movl %esi,%ecx + xorl 44(%rsp),%edx + leal -1894007588(%r14,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%edx + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 16(%rsp),%ebp + movl %r11d,%eax + movl %edx,12(%rsp) + movl %r11d,%ebx + xorl 24(%rsp),%ebp + andl %edi,%eax + movl %r13d,%ecx + xorl 48(%rsp),%ebp + leal -1894007588(%rdx,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%ebp + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 20(%rsp),%r14d + movl %edi,%eax + movl %ebp,16(%rsp) + movl %edi,%ebx + xorl 28(%rsp),%r14d + andl %esi,%eax + movl %r12d,%ecx + xorl 52(%rsp),%r14d + leal -1894007588(%rbp,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%r14d + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 24(%rsp),%edx + movl %esi,%eax + movl %r14d,20(%rsp) + movl %esi,%ebx + xorl 32(%rsp),%edx + andl %r13d,%eax + movl %r11d,%ecx + xorl 56(%rsp),%edx + leal -1894007588(%r14,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%edx + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 28(%rsp),%ebp + movl %r13d,%eax + movl %edx,24(%rsp) + movl %r13d,%ebx + xorl 36(%rsp),%ebp + andl %r12d,%eax + movl %edi,%ecx + xorl 60(%rsp),%ebp + leal -1894007588(%rdx,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%ebp + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 32(%rsp),%r14d + movl %r12d,%eax + movl %ebp,28(%rsp) + movl %r12d,%ebx + xorl 40(%rsp),%r14d + andl %r11d,%eax + movl %esi,%ecx + xorl 0(%rsp),%r14d + leal -1894007588(%rbp,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%r14d + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 36(%rsp),%edx + movl %r11d,%eax + movl %r14d,32(%rsp) + movl %r11d,%ebx + xorl 44(%rsp),%edx + andl %edi,%eax + movl %r13d,%ecx + xorl 4(%rsp),%edx + leal -1894007588(%r14,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%edx + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 40(%rsp),%ebp + movl %edi,%eax + movl %edx,36(%rsp) + movl %edi,%ebx + xorl 48(%rsp),%ebp + andl %esi,%eax + movl %r12d,%ecx + xorl 8(%rsp),%ebp + leal -1894007588(%rdx,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%ebp + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 44(%rsp),%r14d + movl %esi,%eax + movl %ebp,40(%rsp) + movl %esi,%ebx + xorl 52(%rsp),%r14d + andl %r13d,%eax + movl %r11d,%ecx + xorl 12(%rsp),%r14d + leal -1894007588(%rbp,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%r14d + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 48(%rsp),%edx + movl %r13d,%eax + movl %r14d,44(%rsp) + movl %r13d,%ebx + xorl 56(%rsp),%edx + andl %r12d,%eax + movl %edi,%ecx + xorl 16(%rsp),%edx + leal -1894007588(%r14,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%edx + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 52(%rsp),%ebp + movl %edi,%eax + movl %edx,48(%rsp) + movl %esi,%ecx + xorl 60(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + xorl 20(%rsp),%ebp + leal -899497514(%rdx,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + xorl 56(%rsp),%r14d + movl %esi,%eax + movl %ebp,52(%rsp) + movl %r13d,%ecx + xorl 0(%rsp),%r14d + xorl %r11d,%eax + roll $5,%ecx + xorl 24(%rsp),%r14d + leal -899497514(%rbp,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%r14d + xorl 60(%rsp),%edx + movl %r13d,%eax + movl %r14d,56(%rsp) + movl %r12d,%ecx + xorl 4(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 28(%rsp),%edx + leal -899497514(%r14,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + xorl 0(%rsp),%ebp + movl %r12d,%eax + movl %edx,60(%rsp) + movl %r11d,%ecx + xorl 8(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 32(%rsp),%ebp + leal -899497514(%rdx,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + xorl 4(%rsp),%r14d + movl %r11d,%eax + movl %ebp,0(%rsp) + movl %edi,%ecx + xorl 12(%rsp),%r14d + xorl %r13d,%eax + roll $5,%ecx + xorl 36(%rsp),%r14d + leal -899497514(%rbp,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%r14d + xorl 8(%rsp),%edx + movl %edi,%eax + movl %r14d,4(%rsp) + movl %esi,%ecx + xorl 16(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + leal -899497514(%r14,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + xorl 12(%rsp),%ebp + movl %esi,%eax + movl %edx,8(%rsp) + movl %r13d,%ecx + xorl 20(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + leal -899497514(%rdx,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + xorl 16(%rsp),%r14d + movl %r13d,%eax + movl %ebp,12(%rsp) + movl %r12d,%ecx + xorl 24(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 48(%rsp),%r14d + leal -899497514(%rbp,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%r14d + xorl 20(%rsp),%edx + movl %r12d,%eax + movl %r14d,16(%rsp) + movl %r11d,%ecx + xorl 28(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 52(%rsp),%edx + leal -899497514(%r14,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + xorl 24(%rsp),%ebp + movl %r11d,%eax + movl %edx,20(%rsp) + movl %edi,%ecx + xorl 32(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 56(%rsp),%ebp + leal -899497514(%rdx,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + xorl 28(%rsp),%r14d + movl %edi,%eax + movl %ebp,24(%rsp) + movl %esi,%ecx + xorl 36(%rsp),%r14d + xorl %r12d,%eax + roll $5,%ecx + xorl 60(%rsp),%r14d + leal -899497514(%rbp,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%r14d + xorl 32(%rsp),%edx + movl %esi,%eax + movl %r14d,28(%rsp) + movl %r13d,%ecx + xorl 40(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + xorl 0(%rsp),%edx + leal -899497514(%r14,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + xorl 36(%rsp),%ebp + movl %r13d,%eax + + movl %r12d,%ecx + xorl 44(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + xorl 4(%rsp),%ebp + leal -899497514(%rdx,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + xorl 40(%rsp),%r14d + movl %r12d,%eax + + movl %r11d,%ecx + xorl 48(%rsp),%r14d + xorl %esi,%eax + roll $5,%ecx + xorl 8(%rsp),%r14d + leal -899497514(%rbp,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%r14d + xorl 44(%rsp),%edx + movl %r11d,%eax + + movl %edi,%ecx + xorl 52(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 12(%rsp),%edx + leal -899497514(%r14,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + xorl 48(%rsp),%ebp + movl %edi,%eax + + movl %esi,%ecx + xorl 56(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + xorl 16(%rsp),%ebp + leal -899497514(%rdx,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + xorl 52(%rsp),%r14d + movl %esi,%eax + + movl %r13d,%ecx + xorl 60(%rsp),%r14d + xorl %r11d,%eax + roll $5,%ecx + xorl 20(%rsp),%r14d + leal -899497514(%rbp,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%r14d + xorl 56(%rsp),%edx + movl %r13d,%eax + + movl %r12d,%ecx + xorl 0(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 24(%rsp),%edx + leal -899497514(%r14,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + xorl 60(%rsp),%ebp + movl %r12d,%eax + + movl %r11d,%ecx + xorl 4(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 28(%rsp),%ebp + leal -899497514(%rdx,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %r11d,%eax + movl %edi,%ecx + xorl %r13d,%eax + leal -899497514(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + addl 0(%r8),%esi + addl 4(%r8),%edi + addl 8(%r8),%r11d + addl 12(%r8),%r12d + addl 16(%r8),%r13d + movl %esi,0(%r8) + movl %edi,4(%r8) + movl %r11d,8(%r8) + movl %r12d,12(%r8) + movl %r13d,16(%r8) + + subq $1,%r10 + leaq 64(%r9),%r9 + jnz .Lloop + + movq 64(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size sha1_block_data_order,.-sha1_block_data_order +.type sha1_block_data_order_shaext,@function +.align 32 +sha1_block_data_order_shaext: +_shaext_shortcut: +.cfi_startproc + movdqu (%rdi),%xmm0 + movd 16(%rdi),%xmm1 + movdqa K_XX_XX+160(%rip),%xmm3 + + movdqu (%rsi),%xmm4 + pshufd $27,%xmm0,%xmm0 + movdqu 16(%rsi),%xmm5 + pshufd $27,%xmm1,%xmm1 + movdqu 32(%rsi),%xmm6 +.byte 102,15,56,0,227 + movdqu 48(%rsi),%xmm7 +.byte 102,15,56,0,235 +.byte 102,15,56,0,243 + movdqa %xmm1,%xmm9 +.byte 102,15,56,0,251 + jmp .Loop_shaext + +.align 16 +.Loop_shaext: + decq %rdx + leaq 64(%rsi),%r8 + paddd %xmm4,%xmm1 + cmovneq %r8,%rsi + movdqa %xmm0,%xmm8 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 + movdqu (%rsi),%xmm4 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,213 + movdqu 16(%rsi),%xmm5 +.byte 102,15,56,0,227 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,206 + movdqu 32(%rsi),%xmm6 +.byte 102,15,56,0,235 + + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,215 + movdqu 48(%rsi),%xmm7 +.byte 102,15,56,0,243 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 65,15,56,200,201 +.byte 102,15,56,0,251 + + paddd %xmm8,%xmm0 + movdqa %xmm1,%xmm9 + + jnz .Loop_shaext + + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm1,%xmm1 + movdqu %xmm0,(%rdi) + movd %xmm1,16(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc +.size sha1_block_data_order_shaext,.-sha1_block_data_order_shaext +.type sha1_block_data_order_ssse3,@function +.align 16 +sha1_block_data_order_ssse3: +_ssse3_shortcut: +.cfi_startproc + movq %rsp,%r11 +.cfi_def_cfa_register %r11 + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + leaq -64(%rsp),%rsp + andq $-64,%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r14 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + + movdqa 64(%r14),%xmm6 + movdqa -64(%r14),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 + addq $64,%r9 + paddd %xmm9,%xmm0 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + jmp .Loop_ssse3 +.align 16 +.Loop_ssse3: + rorl $2,%ebx + pshufd $238,%xmm0,%xmm4 + xorl %edx,%esi + movdqa %xmm3,%xmm8 + paddd %xmm3,%xmm9 + movl %eax,%edi + addl 0(%rsp),%ebp + punpcklqdq %xmm1,%xmm4 + xorl %ecx,%ebx + roll $5,%eax + addl %esi,%ebp + psrldq $4,%xmm8 + andl %ebx,%edi + xorl %ecx,%ebx + pxor %xmm0,%xmm4 + addl %eax,%ebp + rorl $7,%eax + pxor %xmm2,%xmm8 + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + pxor %xmm8,%xmm4 + xorl %ebx,%eax + roll $5,%ebp + movdqa %xmm9,48(%rsp) + addl %edi,%edx + andl %eax,%esi + movdqa %xmm4,%xmm10 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + movdqa %xmm4,%xmm8 + xorl %ebx,%esi + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi + addl 8(%rsp),%ecx + psrld $31,%xmm8 + xorl %eax,%ebp + roll $5,%edx + addl %esi,%ecx + movdqa %xmm10,%xmm9 + andl %ebp,%edi + xorl %eax,%ebp + psrld $30,%xmm10 + addl %edx,%ecx + rorl $7,%edx + por %xmm8,%xmm4 + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + xorl %ebp,%edx + movdqa -64(%r14),%xmm10 + roll $5,%ecx + addl %edi,%ebx + andl %edx,%esi + pxor %xmm9,%xmm4 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + pshufd $238,%xmm1,%xmm5 + xorl %ebp,%esi + movdqa %xmm4,%xmm9 + paddd %xmm4,%xmm10 + movl %ebx,%edi + addl 16(%rsp),%eax + punpcklqdq %xmm2,%xmm5 + xorl %edx,%ecx + roll $5,%ebx + addl %esi,%eax + psrldq $4,%xmm9 + andl %ecx,%edi + xorl %edx,%ecx + pxor %xmm1,%xmm5 + addl %ebx,%eax + rorl $7,%ebx + pxor %xmm3,%xmm9 + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + pxor %xmm9,%xmm5 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm10,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + movdqa %xmm5,%xmm8 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + movdqa %xmm5,%xmm9 + xorl %ecx,%esi + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi + addl 24(%rsp),%edx + psrld $31,%xmm9 + xorl %ebx,%eax + roll $5,%ebp + addl %esi,%edx + movdqa %xmm8,%xmm10 + andl %eax,%edi + xorl %ebx,%eax + psrld $30,%xmm8 + addl %ebp,%edx + rorl $7,%ebp + por %xmm9,%xmm5 + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + xorl %eax,%ebp + movdqa -32(%r14),%xmm8 + roll $5,%edx + addl %edi,%ecx + andl %ebp,%esi + pxor %xmm10,%xmm5 + xorl %eax,%ebp + addl %edx,%ecx + rorl $7,%edx + pshufd $238,%xmm2,%xmm6 + xorl %eax,%esi + movdqa %xmm5,%xmm10 + paddd %xmm5,%xmm8 + movl %ecx,%edi + addl 32(%rsp),%ebx + punpcklqdq %xmm3,%xmm6 + xorl %ebp,%edx + roll $5,%ecx + addl %esi,%ebx + psrldq $4,%xmm10 + andl %edx,%edi + xorl %ebp,%edx + pxor %xmm2,%xmm6 + addl %ecx,%ebx + rorl $7,%ecx + pxor %xmm4,%xmm10 + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + pxor %xmm10,%xmm6 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm8,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + movdqa %xmm6,%xmm9 + xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + movdqa %xmm6,%xmm10 + xorl %edx,%esi + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + addl 40(%rsp),%ebp + psrld $31,%xmm10 + xorl %ecx,%ebx + roll $5,%eax + addl %esi,%ebp + movdqa %xmm9,%xmm8 + andl %ebx,%edi + xorl %ecx,%ebx + psrld $30,%xmm9 + addl %eax,%ebp + rorl $7,%eax + por %xmm10,%xmm6 + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + xorl %ebx,%eax + movdqa -32(%r14),%xmm9 + roll $5,%ebp + addl %edi,%edx + andl %eax,%esi + pxor %xmm8,%xmm6 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + pshufd $238,%xmm3,%xmm7 + xorl %ebx,%esi + movdqa %xmm6,%xmm8 + paddd %xmm6,%xmm9 + movl %edx,%edi + addl 48(%rsp),%ecx + punpcklqdq %xmm4,%xmm7 + xorl %eax,%ebp + roll $5,%edx + addl %esi,%ecx + psrldq $4,%xmm8 + andl %ebp,%edi + xorl %eax,%ebp + pxor %xmm3,%xmm7 + addl %edx,%ecx + rorl $7,%edx + pxor %xmm5,%xmm8 + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + pxor %xmm8,%xmm7 + xorl %ebp,%edx + roll $5,%ecx + movdqa %xmm9,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + movdqa %xmm7,%xmm10 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + movdqa %xmm7,%xmm8 + xorl %ebp,%esi + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + addl 56(%rsp),%eax + psrld $31,%xmm8 + xorl %edx,%ecx + roll $5,%ebx + addl %esi,%eax + movdqa %xmm10,%xmm9 + andl %ecx,%edi + xorl %edx,%ecx + psrld $30,%xmm10 + addl %ebx,%eax + rorl $7,%ebx + por %xmm8,%xmm7 + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + xorl %ecx,%ebx + movdqa -32(%r14),%xmm10 + roll $5,%eax + addl %edi,%ebp + andl %ebx,%esi + pxor %xmm9,%xmm7 + pshufd $238,%xmm6,%xmm9 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + pxor %xmm4,%xmm0 + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + punpcklqdq %xmm7,%xmm9 + xorl %ebx,%eax + roll $5,%ebp + pxor %xmm1,%xmm0 + addl %esi,%edx + andl %eax,%edi + movdqa %xmm10,%xmm8 + xorl %ebx,%eax + paddd %xmm7,%xmm10 + addl %ebp,%edx + pxor %xmm9,%xmm0 + rorl $7,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 4(%rsp),%ecx + movdqa %xmm0,%xmm9 + xorl %eax,%ebp + roll $5,%edx + movdqa %xmm10,48(%rsp) + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + pslld $2,%xmm0 + addl %edx,%ecx + rorl $7,%edx + psrld $30,%xmm9 + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + por %xmm9,%xmm0 + xorl %ebp,%edx + roll $5,%ecx + pshufd $238,%xmm7,%xmm10 + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pxor %xmm5,%xmm1 + addl 16(%rsp),%ebp + xorl %ecx,%esi + punpcklqdq %xmm0,%xmm10 + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm8,%xmm9 + rorl $7,%ebx + paddd %xmm0,%xmm8 + addl %eax,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm1,%xmm10 + addl %edi,%edx + xorl %ebx,%esi + movdqa %xmm8,0(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 24(%rsp),%ecx + pslld $2,%xmm1 + xorl %eax,%esi + movl %edx,%edi + psrld $30,%xmm10 + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + por %xmm10,%xmm1 + addl %edx,%ecx + addl 28(%rsp),%ebx + pshufd $238,%xmm0,%xmm8 + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + pxor %xmm6,%xmm2 + addl 32(%rsp),%eax + xorl %edx,%esi + punpcklqdq %xmm1,%xmm8 + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + addl %esi,%eax + xorl %edx,%edi + movdqa 0(%r14),%xmm10 + rorl $7,%ecx + paddd %xmm1,%xmm9 + addl %ebx,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + addl %edi,%ebp + xorl %ecx,%esi + movdqa %xmm9,16(%rsp) + rorl $7,%ebx + addl %eax,%ebp + addl 40(%rsp),%edx + pslld $2,%xmm2 + xorl %ebx,%esi + movl %ebp,%edi + psrld $30,%xmm8 + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + por %xmm8,%xmm2 + addl %ebp,%edx + addl 44(%rsp),%ecx + pshufd $238,%xmm1,%xmm9 + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + pxor %xmm7,%xmm3 + addl 48(%rsp),%ebx + xorl %ebp,%esi + punpcklqdq %xmm2,%xmm9 + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + addl %esi,%ebx + xorl %ebp,%edi + movdqa %xmm10,%xmm8 + rorl $7,%edx + paddd %xmm2,%xmm10 + addl %ecx,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + addl %edi,%eax + xorl %edx,%esi + movdqa %xmm10,32(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 56(%rsp),%ebp + pslld $2,%xmm3 + xorl %ecx,%esi + movl %eax,%edi + psrld $30,%xmm9 + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + por %xmm9,%xmm3 + addl %eax,%ebp + addl 60(%rsp),%edx + pshufd $238,%xmm2,%xmm10 + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + pxor %xmm0,%xmm4 + addl 0(%rsp),%ecx + xorl %eax,%esi + punpcklqdq %xmm3,%xmm10 + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + addl %esi,%ecx + xorl %eax,%edi + movdqa %xmm8,%xmm9 + rorl $7,%ebp + paddd %xmm3,%xmm8 + addl %edx,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + addl %edi,%ebx + xorl %ebp,%esi + movdqa %xmm8,48(%rsp) + rorl $7,%edx + addl %ecx,%ebx + addl 8(%rsp),%eax + pslld $2,%xmm4 + xorl %edx,%esi + movl %ebx,%edi + psrld $30,%xmm10 + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + por %xmm10,%xmm4 + addl %ebx,%eax + addl 12(%rsp),%ebp + pshufd $238,%xmm3,%xmm8 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + pxor %xmm1,%xmm5 + addl 16(%rsp),%edx + xorl %ebx,%esi + punpcklqdq %xmm4,%xmm8 + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + addl %esi,%edx + xorl %ebx,%edi + movdqa %xmm9,%xmm10 + rorl $7,%eax + paddd %xmm4,%xmm9 + addl %ebp,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + addl %edi,%ecx + xorl %eax,%esi + movdqa %xmm9,0(%rsp) + rorl $7,%ebp + addl %edx,%ecx + addl 24(%rsp),%ebx + pslld $2,%xmm5 + xorl %ebp,%esi + movl %ecx,%edi + psrld $30,%xmm8 + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + por %xmm8,%xmm5 + addl %ecx,%ebx + addl 28(%rsp),%eax + pshufd $238,%xmm4,%xmm9 + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + pxor %xmm2,%xmm6 + addl 32(%rsp),%ebp + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + punpcklqdq %xmm5,%xmm9 + movl %eax,%edi + xorl %ecx,%esi + pxor %xmm7,%xmm6 + roll $5,%eax + addl %esi,%ebp + movdqa %xmm10,%xmm8 + xorl %ebx,%edi + paddd %xmm5,%xmm10 + xorl %ecx,%ebx + pxor %xmm9,%xmm6 + addl %eax,%ebp + addl 36(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movdqa %xmm6,%xmm9 + movl %ebp,%esi + xorl %ebx,%edi + movdqa %xmm10,16(%rsp) + roll $5,%ebp + addl %edi,%edx + xorl %eax,%esi + pslld $2,%xmm6 + xorl %ebx,%eax + addl %ebp,%edx + psrld $30,%xmm9 + addl 40(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + por %xmm9,%xmm6 + rorl $7,%ebp + movl %edx,%edi + xorl %eax,%esi + roll $5,%edx + pshufd $238,%xmm5,%xmm10 + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movl %ecx,%esi + xorl %ebp,%edi + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + pxor %xmm3,%xmm7 + addl 48(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + rorl $7,%ecx + punpcklqdq %xmm6,%xmm10 + movl %ebx,%edi + xorl %edx,%esi + pxor %xmm0,%xmm7 + roll $5,%ebx + addl %esi,%eax + movdqa 32(%r14),%xmm9 + xorl %ecx,%edi + paddd %xmm6,%xmm8 + xorl %edx,%ecx + pxor %xmm10,%xmm7 + addl %ebx,%eax + addl 52(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movdqa %xmm7,%xmm10 + movl %eax,%esi + xorl %ecx,%edi + movdqa %xmm8,32(%rsp) + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + pslld $2,%xmm7 + xorl %ecx,%ebx + addl %eax,%ebp + psrld $30,%xmm10 + addl 56(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + por %xmm10,%xmm7 + rorl $7,%eax + movl %ebp,%edi + xorl %ebx,%esi + roll $5,%ebp + pshufd $238,%xmm6,%xmm8 + addl %esi,%edx + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movl %edx,%esi + xorl %eax,%edi + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + pxor %xmm4,%xmm0 + addl 0(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + rorl $7,%edx + punpcklqdq %xmm7,%xmm8 + movl %ecx,%edi + xorl %ebp,%esi + pxor %xmm1,%xmm0 + roll $5,%ecx + addl %esi,%ebx + movdqa %xmm9,%xmm10 + xorl %edx,%edi + paddd %xmm7,%xmm9 + xorl %ebp,%edx + pxor %xmm8,%xmm0 + addl %ecx,%ebx + addl 4(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movdqa %xmm0,%xmm8 + movl %ebx,%esi + xorl %edx,%edi + movdqa %xmm9,48(%rsp) + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + pslld $2,%xmm0 + xorl %edx,%ecx + addl %ebx,%eax + psrld $30,%xmm8 + addl 8(%rsp),%ebp + andl %ecx,%esi + xorl %edx,%ecx + por %xmm8,%xmm0 + rorl $7,%ebx + movl %eax,%edi + xorl %ecx,%esi + roll $5,%eax + pshufd $238,%xmm7,%xmm9 + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movl %ebp,%esi + xorl %ebx,%edi + roll $5,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + pxor %xmm5,%xmm1 + addl 16(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%ebp + punpcklqdq %xmm0,%xmm9 + movl %edx,%edi + xorl %eax,%esi + pxor %xmm2,%xmm1 + roll $5,%edx + addl %esi,%ecx + movdqa %xmm10,%xmm8 + xorl %ebp,%edi + paddd %xmm0,%xmm10 + xorl %eax,%ebp + pxor %xmm9,%xmm1 + addl %edx,%ecx + addl 20(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movdqa %xmm1,%xmm9 + movl %ecx,%esi + xorl %ebp,%edi + movdqa %xmm10,0(%rsp) + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + pslld $2,%xmm1 + xorl %ebp,%edx + addl %ecx,%ebx + psrld $30,%xmm9 + addl 24(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + por %xmm9,%xmm1 + rorl $7,%ecx + movl %ebx,%edi + xorl %edx,%esi + roll $5,%ebx + pshufd $238,%xmm0,%xmm10 + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%edi + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + pxor %xmm6,%xmm2 + addl 32(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + punpcklqdq %xmm1,%xmm10 + movl %ebp,%edi + xorl %ebx,%esi + pxor %xmm3,%xmm2 + roll $5,%ebp + addl %esi,%edx + movdqa %xmm8,%xmm9 + xorl %eax,%edi + paddd %xmm1,%xmm8 + xorl %ebx,%eax + pxor %xmm10,%xmm2 + addl %ebp,%edx + addl 36(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movdqa %xmm2,%xmm10 + movl %edx,%esi + xorl %eax,%edi + movdqa %xmm8,16(%rsp) + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + pslld $2,%xmm2 + xorl %eax,%ebp + addl %edx,%ecx + psrld $30,%xmm10 + addl 40(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + por %xmm10,%xmm2 + rorl $7,%edx + movl %ecx,%edi + xorl %ebp,%esi + roll $5,%ecx + pshufd $238,%xmm1,%xmm8 + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + pxor %xmm7,%xmm3 + addl 48(%rsp),%ebp + xorl %ecx,%esi + punpcklqdq %xmm2,%xmm8 + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm9,%xmm10 + rorl $7,%ebx + paddd %xmm2,%xmm9 + addl %eax,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + addl %edi,%edx + xorl %ebx,%esi + movdqa %xmm9,32(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 56(%rsp),%ecx + pslld $2,%xmm3 + xorl %eax,%esi + movl %edx,%edi + psrld $30,%xmm8 + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + por %xmm8,%xmm3 + addl %edx,%ecx + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + paddd %xmm3,%xmm10 + addl %esi,%eax + xorl %edx,%edi + movdqa %xmm10,48(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + cmpq %r10,%r9 + je .Ldone_ssse3 + movdqa 64(%r14),%xmm6 + movdqa -64(%r14),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi +.byte 102,15,56,0,206 + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + paddd %xmm9,%xmm0 + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + movdqa %xmm0,0(%rsp) + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + psubd %xmm9,%xmm0 + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi +.byte 102,15,56,0,214 + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + paddd %xmm9,%xmm1 + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + movdqa %xmm1,16(%rsp) + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + psubd %xmm9,%xmm1 + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi +.byte 102,15,56,0,222 + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + paddd %xmm9,%xmm2 + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + movdqa %xmm2,32(%rsp) + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + psubd %xmm9,%xmm2 + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %ecx,%edi + movl %edx,12(%r8) + xorl %edx,%edi + movl %ebp,16(%r8) + andl %edi,%esi + jmp .Loop_ssse3 + +.align 16 +.Ldone_ssse3: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + movq -40(%r11),%r14 +.cfi_restore %r14 + movq -32(%r11),%r13 +.cfi_restore %r13 + movq -24(%r11),%r12 +.cfi_restore %r12 + movq -16(%r11),%rbp +.cfi_restore %rbp + movq -8(%r11),%rbx +.cfi_restore %rbx + leaq (%r11),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_ssse3: + .byte 0xf3,0xc3 +.cfi_endproc +.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3 +.type sha1_block_data_order_avx,@function +.align 16 +sha1_block_data_order_avx: +_avx_shortcut: +.cfi_startproc + movq %rsp,%r11 +.cfi_def_cfa_register %r11 + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + leaq -64(%rsp),%rsp + vzeroupper + andq $-64,%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r14 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + + vmovdqa 64(%r14),%xmm6 + vmovdqa -64(%r14),%xmm11 + vmovdqu 0(%r9),%xmm0 + vmovdqu 16(%r9),%xmm1 + vmovdqu 32(%r9),%xmm2 + vmovdqu 48(%r9),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + addq $64,%r9 + vpshufb %xmm6,%xmm1,%xmm1 + vpshufb %xmm6,%xmm2,%xmm2 + vpshufb %xmm6,%xmm3,%xmm3 + vpaddd %xmm11,%xmm0,%xmm4 + vpaddd %xmm11,%xmm1,%xmm5 + vpaddd %xmm11,%xmm2,%xmm6 + vmovdqa %xmm4,0(%rsp) + vmovdqa %xmm5,16(%rsp) + vmovdqa %xmm6,32(%rsp) + jmp .Loop_avx +.align 16 +.Loop_avx: + shrdl $2,%ebx,%ebx + xorl %edx,%esi + vpalignr $8,%xmm0,%xmm1,%xmm4 + movl %eax,%edi + addl 0(%rsp),%ebp + vpaddd %xmm3,%xmm11,%xmm9 + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrldq $4,%xmm3,%xmm8 + addl %esi,%ebp + andl %ebx,%edi + vpxor %xmm0,%xmm4,%xmm4 + xorl %ecx,%ebx + addl %eax,%ebp + vpxor %xmm2,%xmm8,%xmm8 + shrdl $7,%eax,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + vpxor %xmm8,%xmm4,%xmm4 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vmovdqa %xmm9,48(%rsp) + addl %edi,%edx + andl %eax,%esi + vpsrld $31,%xmm4,%xmm8 + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%esi + vpslldq $12,%xmm4,%xmm10 + vpaddd %xmm4,%xmm4,%xmm4 + movl %edx,%edi + addl 8(%rsp),%ecx + xorl %eax,%ebp + shldl $5,%edx,%edx + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm4,%xmm4 + addl %esi,%ecx + andl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm4,%xmm4 + shrdl $7,%edx,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + vpxor %xmm10,%xmm4,%xmm4 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + addl %edi,%ebx + andl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %ebp,%esi + vpalignr $8,%xmm1,%xmm2,%xmm5 + movl %ebx,%edi + addl 16(%rsp),%eax + vpaddd %xmm4,%xmm11,%xmm9 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrldq $4,%xmm4,%xmm8 + addl %esi,%eax + andl %ecx,%edi + vpxor %xmm1,%xmm5,%xmm5 + xorl %edx,%ecx + addl %ebx,%eax + vpxor %xmm3,%xmm8,%xmm8 + shrdl $7,%ebx,%ebx + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + vpxor %xmm8,%xmm5,%xmm5 + xorl %ecx,%ebx + shldl $5,%eax,%eax + vmovdqa %xmm9,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + vpsrld $31,%xmm5,%xmm8 + xorl %ecx,%ebx + addl %eax,%ebp + shrdl $7,%eax,%eax + xorl %ecx,%esi + vpslldq $12,%xmm5,%xmm10 + vpaddd %xmm5,%xmm5,%xmm5 + movl %ebp,%edi + addl 24(%rsp),%edx + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm5,%xmm5 + addl %esi,%edx + andl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm5,%xmm5 + shrdl $7,%ebp,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + vpxor %xmm10,%xmm5,%xmm5 + xorl %eax,%ebp + shldl $5,%edx,%edx + vmovdqa -32(%r14),%xmm11 + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + vpalignr $8,%xmm2,%xmm3,%xmm6 + movl %ecx,%edi + addl 32(%rsp),%ebx + vpaddd %xmm5,%xmm11,%xmm9 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + vpsrldq $4,%xmm5,%xmm8 + addl %esi,%ebx + andl %edx,%edi + vpxor %xmm2,%xmm6,%xmm6 + xorl %ebp,%edx + addl %ecx,%ebx + vpxor %xmm4,%xmm8,%xmm8 + shrdl $7,%ecx,%ecx + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + vpxor %xmm8,%xmm6,%xmm6 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vmovdqa %xmm9,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + vpsrld $31,%xmm6,%xmm8 + xorl %edx,%ecx + addl %ebx,%eax + shrdl $7,%ebx,%ebx + xorl %edx,%esi + vpslldq $12,%xmm6,%xmm10 + vpaddd %xmm6,%xmm6,%xmm6 + movl %eax,%edi + addl 40(%rsp),%ebp + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm6,%xmm6 + addl %esi,%ebp + andl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm6,%xmm6 + shrdl $7,%eax,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + vpxor %xmm10,%xmm6,%xmm6 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + addl %edi,%edx + andl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%esi + vpalignr $8,%xmm3,%xmm4,%xmm7 + movl %edx,%edi + addl 48(%rsp),%ecx + vpaddd %xmm6,%xmm11,%xmm9 + xorl %eax,%ebp + shldl $5,%edx,%edx + vpsrldq $4,%xmm6,%xmm8 + addl %esi,%ecx + andl %ebp,%edi + vpxor %xmm3,%xmm7,%xmm7 + xorl %eax,%ebp + addl %edx,%ecx + vpxor %xmm5,%xmm8,%xmm8 + shrdl $7,%edx,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + vpxor %xmm8,%xmm7,%xmm7 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + vmovdqa %xmm9,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + vpsrld $31,%xmm7,%xmm8 + xorl %ebp,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %ebp,%esi + vpslldq $12,%xmm7,%xmm10 + vpaddd %xmm7,%xmm7,%xmm7 + movl %ebx,%edi + addl 56(%rsp),%eax + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm7,%xmm7 + addl %esi,%eax + andl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm7,%xmm7 + shrdl $7,%ebx,%ebx + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + vpxor %xmm10,%xmm7,%xmm7 + xorl %ecx,%ebx + shldl $5,%eax,%eax + addl %edi,%ebp + andl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm6,%xmm7,%xmm8 + vpxor %xmm4,%xmm0,%xmm0 + shrdl $7,%eax,%eax + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + vpxor %xmm1,%xmm0,%xmm0 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vpaddd %xmm7,%xmm11,%xmm9 + addl %esi,%edx + andl %eax,%edi + vpxor %xmm8,%xmm0,%xmm0 + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%edi + vpsrld $30,%xmm0,%xmm8 + vmovdqa %xmm9,48(%rsp) + movl %edx,%esi + addl 4(%rsp),%ecx + xorl %eax,%ebp + shldl $5,%edx,%edx + vpslld $2,%xmm0,%xmm0 + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + vpor %xmm8,%xmm0,%xmm0 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm7,%xmm0,%xmm8 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + vpxor %xmm2,%xmm1,%xmm1 + addl %esi,%ebp + xorl %ecx,%edi + vpaddd %xmm0,%xmm11,%xmm9 + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpxor %xmm8,%xmm1,%xmm1 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpsrld $30,%xmm1,%xmm8 + vmovdqa %xmm9,0(%rsp) + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpslld $2,%xmm1,%xmm1 + addl 24(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpor %xmm8,%xmm1,%xmm1 + addl 28(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpalignr $8,%xmm0,%xmm1,%xmm8 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + vpxor %xmm3,%xmm2,%xmm2 + addl %esi,%eax + xorl %edx,%edi + vpaddd %xmm1,%xmm11,%xmm9 + vmovdqa 0(%r14),%xmm11 + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpxor %xmm8,%xmm2,%xmm2 + addl 36(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + vpsrld $30,%xmm2,%xmm8 + vmovdqa %xmm9,16(%rsp) + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpslld $2,%xmm2,%xmm2 + addl 40(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpor %xmm8,%xmm2,%xmm2 + addl 44(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpalignr $8,%xmm1,%xmm2,%xmm8 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpxor %xmm4,%xmm3,%xmm3 + addl %esi,%ebx + xorl %ebp,%edi + vpaddd %xmm2,%xmm11,%xmm9 + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpxor %xmm8,%xmm3,%xmm3 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + vpsrld $30,%xmm3,%xmm8 + vmovdqa %xmm9,32(%rsp) + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpslld $2,%xmm3,%xmm3 + addl 56(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpor %xmm8,%xmm3,%xmm3 + addl 60(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpalignr $8,%xmm2,%xmm3,%xmm8 + vpxor %xmm0,%xmm4,%xmm4 + addl 0(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + vpxor %xmm5,%xmm4,%xmm4 + addl %esi,%ecx + xorl %eax,%edi + vpaddd %xmm3,%xmm11,%xmm9 + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpxor %xmm8,%xmm4,%xmm4 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + vpsrld $30,%xmm4,%xmm8 + vmovdqa %xmm9,48(%rsp) + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpslld $2,%xmm4,%xmm4 + addl 8(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpor %xmm8,%xmm4,%xmm4 + addl 12(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm3,%xmm4,%xmm8 + vpxor %xmm1,%xmm5,%xmm5 + addl 16(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpxor %xmm6,%xmm5,%xmm5 + addl %esi,%edx + xorl %ebx,%edi + vpaddd %xmm4,%xmm11,%xmm9 + shrdl $7,%eax,%eax + addl %ebp,%edx + vpxor %xmm8,%xmm5,%xmm5 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + vpsrld $30,%xmm5,%xmm8 + vmovdqa %xmm9,0(%rsp) + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpslld $2,%xmm5,%xmm5 + addl 24(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpor %xmm8,%xmm5,%xmm5 + addl 28(%rsp),%eax + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm4,%xmm5,%xmm8 + vpxor %xmm2,%xmm6,%xmm6 + addl 32(%rsp),%ebp + andl %ecx,%esi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + vpxor %xmm7,%xmm6,%xmm6 + movl %eax,%edi + xorl %ecx,%esi + vpaddd %xmm5,%xmm11,%xmm9 + shldl $5,%eax,%eax + addl %esi,%ebp + vpxor %xmm8,%xmm6,%xmm6 + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 36(%rsp),%edx + vpsrld $30,%xmm6,%xmm8 + vmovdqa %xmm9,16(%rsp) + andl %ebx,%edi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%esi + vpslld $2,%xmm6,%xmm6 + xorl %ebx,%edi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + addl 40(%rsp),%ecx + andl %eax,%esi + vpor %xmm8,%xmm6,%xmm6 + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%edi + xorl %eax,%esi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + shrdl $7,%edx,%edx + movl %ecx,%esi + xorl %ebp,%edi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + vpalignr $8,%xmm5,%xmm6,%xmm8 + vpxor %xmm3,%xmm7,%xmm7 + addl 48(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + vpxor %xmm0,%xmm7,%xmm7 + movl %ebx,%edi + xorl %edx,%esi + vpaddd %xmm6,%xmm11,%xmm9 + vmovdqa 32(%r14),%xmm11 + shldl $5,%ebx,%ebx + addl %esi,%eax + vpxor %xmm8,%xmm7,%xmm7 + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%rsp),%ebp + vpsrld $30,%xmm7,%xmm8 + vmovdqa %xmm9,32(%rsp) + andl %ecx,%edi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + vpslld $2,%xmm7,%xmm7 + xorl %ecx,%edi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + addl 56(%rsp),%edx + andl %ebx,%esi + vpor %xmm8,%xmm7,%xmm7 + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%edi + xorl %ebx,%esi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%esi + xorl %eax,%edi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + vpalignr $8,%xmm6,%xmm7,%xmm8 + vpxor %xmm4,%xmm0,%xmm0 + addl 0(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + shrdl $7,%edx,%edx + vpxor %xmm1,%xmm0,%xmm0 + movl %ecx,%edi + xorl %ebp,%esi + vpaddd %xmm7,%xmm11,%xmm9 + shldl $5,%ecx,%ecx + addl %esi,%ebx + vpxor %xmm8,%xmm0,%xmm0 + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 4(%rsp),%eax + vpsrld $30,%xmm0,%xmm8 + vmovdqa %xmm9,48(%rsp) + andl %edx,%edi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + vpslld $2,%xmm0,%xmm0 + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%rsp),%ebp + andl %ecx,%esi + vpor %xmm8,%xmm0,%xmm0 + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%edi + xorl %ecx,%esi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%esi + xorl %ebx,%edi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + vpalignr $8,%xmm7,%xmm0,%xmm8 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + vpxor %xmm2,%xmm1,%xmm1 + movl %edx,%edi + xorl %eax,%esi + vpaddd %xmm0,%xmm11,%xmm9 + shldl $5,%edx,%edx + addl %esi,%ecx + vpxor %xmm8,%xmm1,%xmm1 + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 20(%rsp),%ebx + vpsrld $30,%xmm1,%xmm8 + vmovdqa %xmm9,0(%rsp) + andl %ebp,%edi + xorl %eax,%ebp + shrdl $7,%edx,%edx + movl %ecx,%esi + vpslld $2,%xmm1,%xmm1 + xorl %ebp,%edi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + addl 24(%rsp),%eax + andl %edx,%esi + vpor %xmm8,%xmm1,%xmm1 + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%edi + xorl %edx,%esi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + xorl %ecx,%edi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm0,%xmm1,%xmm8 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + vpxor %xmm3,%xmm2,%xmm2 + movl %ebp,%edi + xorl %ebx,%esi + vpaddd %xmm1,%xmm11,%xmm9 + shldl $5,%ebp,%ebp + addl %esi,%edx + vpxor %xmm8,%xmm2,%xmm2 + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 36(%rsp),%ecx + vpsrld $30,%xmm2,%xmm8 + vmovdqa %xmm9,16(%rsp) + andl %eax,%edi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%esi + vpslld $2,%xmm2,%xmm2 + xorl %eax,%edi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + addl 40(%rsp),%ebx + andl %ebp,%esi + vpor %xmm8,%xmm2,%xmm2 + xorl %eax,%ebp + shrdl $7,%edx,%edx + movl %ecx,%edi + xorl %ebp,%esi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + vpalignr $8,%xmm1,%xmm2,%xmm8 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + vpxor %xmm4,%xmm3,%xmm3 + addl %esi,%ebp + xorl %ecx,%edi + vpaddd %xmm2,%xmm11,%xmm9 + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpxor %xmm8,%xmm3,%xmm3 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpsrld $30,%xmm3,%xmm8 + vmovdqa %xmm9,32(%rsp) + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpslld $2,%xmm3,%xmm3 + addl 56(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpor %xmm8,%xmm3,%xmm3 + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + vpaddd %xmm3,%xmm11,%xmm9 + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + vmovdqa %xmm9,48(%rsp) + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + cmpq %r10,%r9 + je .Ldone_avx + vmovdqa 64(%r14),%xmm6 + vmovdqa -64(%r14),%xmm11 + vmovdqu 0(%r9),%xmm0 + vmovdqu 16(%r9),%xmm1 + vmovdqu 32(%r9),%xmm2 + vmovdqu 48(%r9),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %ebp,%esi + vpshufb %xmm6,%xmm1,%xmm1 + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpaddd %xmm11,%xmm0,%xmm4 + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vmovdqa %xmm4,0(%rsp) + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + vpshufb %xmm6,%xmm2,%xmm2 + movl %edx,%edi + shldl $5,%edx,%edx + vpaddd %xmm11,%xmm1,%xmm5 + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vmovdqa %xmm5,16(%rsp) + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + vpshufb %xmm6,%xmm3,%xmm3 + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpaddd %xmm11,%xmm2,%xmm6 + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + vmovdqa %xmm6,32(%rsp) + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %ecx,%edi + movl %edx,12(%r8) + xorl %edx,%edi + movl %ebp,16(%r8) + andl %edi,%esi + jmp .Loop_avx + +.align 16 +.Ldone_avx: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vzeroupper + + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + movq -40(%r11),%r14 +.cfi_restore %r14 + movq -32(%r11),%r13 +.cfi_restore %r13 + movq -24(%r11),%r12 +.cfi_restore %r12 + movq -16(%r11),%rbp +.cfi_restore %rbp + movq -8(%r11),%rbx +.cfi_restore %rbx + leaq (%r11),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_avx: + .byte 0xf3,0xc3 +.cfi_endproc +.size sha1_block_data_order_avx,.-sha1_block_data_order_avx +.type sha1_block_data_order_avx2,@function +.align 16 +sha1_block_data_order_avx2: +_avx2_shortcut: +.cfi_startproc + movq %rsp,%r11 +.cfi_def_cfa_register %r11 + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + vzeroupper + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + leaq -640(%rsp),%rsp + shlq $6,%r10 + leaq 64(%r9),%r13 + andq $-128,%rsp + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r14 + + movl 0(%r8),%eax + cmpq %r10,%r13 + cmovaeq %r9,%r13 + movl 4(%r8),%ebp + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl 16(%r8),%esi + vmovdqu 64(%r14),%ymm6 + + vmovdqu (%r9),%xmm0 + vmovdqu 16(%r9),%xmm1 + vmovdqu 32(%r9),%xmm2 + vmovdqu 48(%r9),%xmm3 + leaq 64(%r9),%r9 + vinserti128 $1,(%r13),%ymm0,%ymm0 + vinserti128 $1,16(%r13),%ymm1,%ymm1 + vpshufb %ymm6,%ymm0,%ymm0 + vinserti128 $1,32(%r13),%ymm2,%ymm2 + vpshufb %ymm6,%ymm1,%ymm1 + vinserti128 $1,48(%r13),%ymm3,%ymm3 + vpshufb %ymm6,%ymm2,%ymm2 + vmovdqu -64(%r14),%ymm11 + vpshufb %ymm6,%ymm3,%ymm3 + + vpaddd %ymm11,%ymm0,%ymm4 + vpaddd %ymm11,%ymm1,%ymm5 + vmovdqu %ymm4,0(%rsp) + vpaddd %ymm11,%ymm2,%ymm6 + vmovdqu %ymm5,32(%rsp) + vpaddd %ymm11,%ymm3,%ymm7 + vmovdqu %ymm6,64(%rsp) + vmovdqu %ymm7,96(%rsp) + vpalignr $8,%ymm0,%ymm1,%ymm4 + vpsrldq $4,%ymm3,%ymm8 + vpxor %ymm0,%ymm4,%ymm4 + vpxor %ymm2,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $31,%ymm4,%ymm8 + vpslldq $12,%ymm4,%ymm10 + vpaddd %ymm4,%ymm4,%ymm4 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm4,%ymm4 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm4,%ymm4 + vpxor %ymm10,%ymm4,%ymm4 + vpaddd %ymm11,%ymm4,%ymm9 + vmovdqu %ymm9,128(%rsp) + vpalignr $8,%ymm1,%ymm2,%ymm5 + vpsrldq $4,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm3,%ymm8,%ymm8 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $31,%ymm5,%ymm8 + vmovdqu -32(%r14),%ymm11 + vpslldq $12,%ymm5,%ymm10 + vpaddd %ymm5,%ymm5,%ymm5 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm5,%ymm5 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm10,%ymm5,%ymm5 + vpaddd %ymm11,%ymm5,%ymm9 + vmovdqu %ymm9,160(%rsp) + vpalignr $8,%ymm2,%ymm3,%ymm6 + vpsrldq $4,%ymm5,%ymm8 + vpxor %ymm2,%ymm6,%ymm6 + vpxor %ymm4,%ymm8,%ymm8 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $31,%ymm6,%ymm8 + vpslldq $12,%ymm6,%ymm10 + vpaddd %ymm6,%ymm6,%ymm6 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm6,%ymm6 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm6,%ymm6 + vpxor %ymm10,%ymm6,%ymm6 + vpaddd %ymm11,%ymm6,%ymm9 + vmovdqu %ymm9,192(%rsp) + vpalignr $8,%ymm3,%ymm4,%ymm7 + vpsrldq $4,%ymm6,%ymm8 + vpxor %ymm3,%ymm7,%ymm7 + vpxor %ymm5,%ymm8,%ymm8 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm7,%ymm8 + vpslldq $12,%ymm7,%ymm10 + vpaddd %ymm7,%ymm7,%ymm7 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm7,%ymm7 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm7,%ymm7 + vpxor %ymm10,%ymm7,%ymm7 + vpaddd %ymm11,%ymm7,%ymm9 + vmovdqu %ymm9,224(%rsp) + leaq 128(%rsp),%r13 + jmp .Loop_avx2 +.align 32 +.Loop_avx2: + rorxl $2,%ebp,%ebx + andnl %edx,%ebp,%edi + andl %ecx,%ebp + xorl %edi,%ebp + jmp .Lalign32_1 +.align 32 +.Lalign32_1: + vpalignr $8,%ymm6,%ymm7,%ymm8 + vpxor %ymm4,%ymm0,%ymm0 + addl -128(%r13),%esi + andnl %ecx,%eax,%edi + vpxor %ymm1,%ymm0,%ymm0 + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + vpxor %ymm8,%ymm0,%ymm0 + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + vpsrld $30,%ymm0,%ymm8 + vpslld $2,%ymm0,%ymm0 + addl -124(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + vpor %ymm8,%ymm0,%ymm0 + addl %r12d,%edx + xorl %edi,%esi + addl -120(%r13),%ecx + andnl %ebp,%edx,%edi + vpaddd %ymm11,%ymm0,%ymm9 + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + vmovdqu %ymm9,256(%rsp) + addl %r12d,%ecx + xorl %edi,%edx + addl -116(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl -96(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + vpalignr $8,%ymm7,%ymm0,%ymm8 + vpxor %ymm5,%ymm1,%ymm1 + addl -92(%r13),%eax + andnl %edx,%ebp,%edi + vpxor %ymm2,%ymm1,%ymm1 + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + vpxor %ymm8,%ymm1,%ymm1 + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + vpsrld $30,%ymm1,%ymm8 + vpslld $2,%ymm1,%ymm1 + addl -88(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + vpor %ymm8,%ymm1,%ymm1 + addl %r12d,%esi + xorl %edi,%eax + addl -84(%r13),%edx + andnl %ebx,%esi,%edi + vpaddd %ymm11,%ymm1,%ymm9 + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + vmovdqu %ymm9,288(%rsp) + addl %r12d,%edx + xorl %edi,%esi + addl -64(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -60(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + vpalignr $8,%ymm0,%ymm1,%ymm8 + vpxor %ymm6,%ymm2,%ymm2 + addl -56(%r13),%ebp + andnl %esi,%ebx,%edi + vpxor %ymm3,%ymm2,%ymm2 + vmovdqu 0(%r14),%ymm11 + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + vpxor %ymm8,%ymm2,%ymm2 + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + vpsrld $30,%ymm2,%ymm8 + vpslld $2,%ymm2,%ymm2 + addl -52(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + vpor %ymm8,%ymm2,%ymm2 + addl %r12d,%eax + xorl %edi,%ebp + addl -32(%r13),%esi + andnl %ecx,%eax,%edi + vpaddd %ymm11,%ymm2,%ymm9 + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + vmovdqu %ymm9,320(%rsp) + addl %r12d,%esi + xorl %edi,%eax + addl -28(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -24(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + vpalignr $8,%ymm1,%ymm2,%ymm8 + vpxor %ymm7,%ymm3,%ymm3 + addl -20(%r13),%ebx + andnl %eax,%ecx,%edi + vpxor %ymm4,%ymm3,%ymm3 + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + vpxor %ymm8,%ymm3,%ymm3 + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + vpsrld $30,%ymm3,%ymm8 + vpslld $2,%ymm3,%ymm3 + addl 0(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + vpor %ymm8,%ymm3,%ymm3 + addl %r12d,%ebp + xorl %edi,%ebx + addl 4(%r13),%eax + andnl %edx,%ebp,%edi + vpaddd %ymm11,%ymm3,%ymm9 + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + vmovdqu %ymm9,352(%rsp) + addl %r12d,%eax + xorl %edi,%ebp + addl 8(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl 12(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + vpalignr $8,%ymm2,%ymm3,%ymm8 + vpxor %ymm0,%ymm4,%ymm4 + addl 32(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + vpxor %ymm5,%ymm4,%ymm4 + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + vpxor %ymm8,%ymm4,%ymm4 + addl %r12d,%ecx + xorl %ebp,%edx + addl 36(%r13),%ebx + vpsrld $30,%ymm4,%ymm8 + vpslld $2,%ymm4,%ymm4 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + vpor %ymm8,%ymm4,%ymm4 + addl 40(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + vpaddd %ymm11,%ymm4,%ymm9 + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 44(%r13),%eax + vmovdqu %ymm9,384(%rsp) + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl 64(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vpalignr $8,%ymm3,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + addl 68(%r13),%edx + leal (%rdx,%rax,1),%edx + vpxor %ymm6,%ymm5,%ymm5 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + vpxor %ymm8,%ymm5,%ymm5 + addl %r12d,%edx + xorl %ebx,%esi + addl 72(%r13),%ecx + vpsrld $30,%ymm5,%ymm8 + vpslld $2,%ymm5,%ymm5 + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + vpor %ymm8,%ymm5,%ymm5 + addl 76(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + vpaddd %ymm11,%ymm5,%ymm9 + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl 96(%r13),%ebp + vmovdqu %ymm9,416(%rsp) + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 100(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpalignr $8,%ymm4,%ymm5,%ymm8 + vpxor %ymm2,%ymm6,%ymm6 + addl 104(%r13),%esi + leal (%rsi,%rbp,1),%esi + vpxor %ymm7,%ymm6,%ymm6 + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + vpxor %ymm8,%ymm6,%ymm6 + addl %r12d,%esi + xorl %ecx,%eax + addl 108(%r13),%edx + leaq 256(%r13),%r13 + vpsrld $30,%ymm6,%ymm8 + vpslld $2,%ymm6,%ymm6 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + vpor %ymm8,%ymm6,%ymm6 + addl -128(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + vpaddd %ymm11,%ymm6,%ymm9 + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -124(%r13),%ebx + vmovdqu %ymm9,448(%rsp) + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -120(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vpalignr $8,%ymm5,%ymm6,%ymm8 + vpxor %ymm3,%ymm7,%ymm7 + addl -116(%r13),%eax + leal (%rax,%rbx,1),%eax + vpxor %ymm0,%ymm7,%ymm7 + vmovdqu 32(%r14),%ymm11 + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + vpxor %ymm8,%ymm7,%ymm7 + addl %r12d,%eax + xorl %edx,%ebp + addl -96(%r13),%esi + vpsrld $30,%ymm7,%ymm8 + vpslld $2,%ymm7,%ymm7 + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vpor %ymm8,%ymm7,%ymm7 + addl -92(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpaddd %ymm11,%ymm7,%ymm9 + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -88(%r13),%ecx + vmovdqu %ymm9,480(%rsp) + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -84(%r13),%ebx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + jmp .Lalign32_2 +.align 32 +.Lalign32_2: + vpalignr $8,%ymm6,%ymm7,%ymm8 + vpxor %ymm4,%ymm0,%ymm0 + addl -64(%r13),%ebp + xorl %esi,%ecx + vpxor %ymm1,%ymm0,%ymm0 + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + vpxor %ymm8,%ymm0,%ymm0 + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + vpsrld $30,%ymm0,%ymm8 + vpslld $2,%ymm0,%ymm0 + addl %r12d,%ebp + andl %edi,%ebx + addl -60(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + vpor %ymm8,%ymm0,%ymm0 + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + vpaddd %ymm11,%ymm0,%ymm9 + addl %r12d,%eax + andl %edi,%ebp + addl -56(%r13),%esi + xorl %ecx,%ebp + vmovdqu %ymm9,512(%rsp) + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl -52(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + addl -32(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + andl %edi,%edx + vpalignr $8,%ymm7,%ymm0,%ymm8 + vpxor %ymm5,%ymm1,%ymm1 + addl -28(%r13),%ebx + xorl %eax,%edx + vpxor %ymm2,%ymm1,%ymm1 + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + vpxor %ymm8,%ymm1,%ymm1 + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + vpsrld $30,%ymm1,%ymm8 + vpslld $2,%ymm1,%ymm1 + addl %r12d,%ebx + andl %edi,%ecx + addl -24(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + vpor %ymm8,%ymm1,%ymm1 + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + vpaddd %ymm11,%ymm1,%ymm9 + addl %r12d,%ebp + andl %edi,%ebx + addl -20(%r13),%eax + xorl %edx,%ebx + vmovdqu %ymm9,544(%rsp) + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 0(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl 4(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + vpalignr $8,%ymm0,%ymm1,%ymm8 + vpxor %ymm6,%ymm2,%ymm2 + addl 8(%r13),%ecx + xorl %ebp,%esi + vpxor %ymm3,%ymm2,%ymm2 + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + vpxor %ymm8,%ymm2,%ymm2 + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + vpsrld $30,%ymm2,%ymm8 + vpslld $2,%ymm2,%ymm2 + addl %r12d,%ecx + andl %edi,%edx + addl 12(%r13),%ebx + xorl %eax,%edx + movl %esi,%edi + xorl %eax,%edi + vpor %ymm8,%ymm2,%ymm2 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + vpaddd %ymm11,%ymm2,%ymm9 + addl %r12d,%ebx + andl %edi,%ecx + addl 32(%r13),%ebp + xorl %esi,%ecx + vmovdqu %ymm9,576(%rsp) + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 36(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 40(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + vpalignr $8,%ymm1,%ymm2,%ymm8 + vpxor %ymm7,%ymm3,%ymm3 + addl 44(%r13),%edx + xorl %ebx,%eax + vpxor %ymm4,%ymm3,%ymm3 + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + vpxor %ymm8,%ymm3,%ymm3 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + vpsrld $30,%ymm3,%ymm8 + vpslld $2,%ymm3,%ymm3 + addl %r12d,%edx + andl %edi,%esi + addl 64(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + vpor %ymm8,%ymm3,%ymm3 + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + vpaddd %ymm11,%ymm3,%ymm9 + addl %r12d,%ecx + andl %edi,%edx + addl 68(%r13),%ebx + xorl %eax,%edx + vmovdqu %ymm9,608(%rsp) + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + addl 72(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 76(%r13),%eax + xorl %edx,%ebx + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl 96(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl 100(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl 104(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl 108(%r13),%ebx + leaq 256(%r13),%r13 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -128(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -124(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -120(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -116(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -96(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -92(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -88(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -84(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -64(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -60(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -56(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -52(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -32(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -28(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -24(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -20(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + addl %r12d,%edx + leaq 128(%r9),%r13 + leaq 128(%r9),%rdi + cmpq %r10,%r13 + cmovaeq %r9,%r13 + + + addl 0(%r8),%edx + addl 4(%r8),%esi + addl 8(%r8),%ebp + movl %edx,0(%r8) + addl 12(%r8),%ebx + movl %esi,4(%r8) + movl %edx,%eax + addl 16(%r8),%ecx + movl %ebp,%r12d + movl %ebp,8(%r8) + movl %ebx,%edx + + movl %ebx,12(%r8) + movl %esi,%ebp + movl %ecx,16(%r8) + + movl %ecx,%esi + movl %r12d,%ecx + + + cmpq %r10,%r9 + je .Ldone_avx2 + vmovdqu 64(%r14),%ymm6 + cmpq %r10,%rdi + ja .Last_avx2 + + vmovdqu -64(%rdi),%xmm0 + vmovdqu -48(%rdi),%xmm1 + vmovdqu -32(%rdi),%xmm2 + vmovdqu -16(%rdi),%xmm3 + vinserti128 $1,0(%r13),%ymm0,%ymm0 + vinserti128 $1,16(%r13),%ymm1,%ymm1 + vinserti128 $1,32(%r13),%ymm2,%ymm2 + vinserti128 $1,48(%r13),%ymm3,%ymm3 + jmp .Last_avx2 + +.align 32 +.Last_avx2: + leaq 128+16(%rsp),%r13 + rorxl $2,%ebp,%ebx + andnl %edx,%ebp,%edi + andl %ecx,%ebp + xorl %edi,%ebp + subq $-128,%r9 + addl -128(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl -124(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -120(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -116(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl -96(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + addl -92(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + addl -88(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl -84(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -64(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -60(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl -56(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + addl -52(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + addl -32(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl -28(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -24(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -20(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl 0(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + addl 4(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + addl 8(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl 12(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl 32(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl 36(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl 40(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 44(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl 64(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vmovdqu -64(%r14),%ymm11 + vpshufb %ymm6,%ymm0,%ymm0 + addl 68(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl 72(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl 76(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl 96(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 100(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpshufb %ymm6,%ymm1,%ymm1 + vpaddd %ymm11,%ymm0,%ymm8 + addl 104(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl 108(%r13),%edx + leaq 256(%r13),%r13 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -128(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -124(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -120(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vmovdqu %ymm8,0(%rsp) + vpshufb %ymm6,%ymm2,%ymm2 + vpaddd %ymm11,%ymm1,%ymm9 + addl -116(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -96(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -92(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -88(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -84(%r13),%ebx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + vmovdqu %ymm9,32(%rsp) + vpshufb %ymm6,%ymm3,%ymm3 + vpaddd %ymm11,%ymm2,%ymm6 + addl -64(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl -60(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl -56(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl -52(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + addl -32(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + andl %edi,%edx + jmp .Lalign32_3 +.align 32 +.Lalign32_3: + vmovdqu %ymm6,64(%rsp) + vpaddd %ymm11,%ymm3,%ymm7 + addl -28(%r13),%ebx + xorl %eax,%edx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + addl -24(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl -20(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 0(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl 4(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + vmovdqu %ymm7,96(%rsp) + addl 8(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + andl %edi,%edx + addl 12(%r13),%ebx + xorl %eax,%edx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + addl 32(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 36(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 40(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + vpalignr $8,%ymm0,%ymm1,%ymm4 + addl 44(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + vpsrldq $4,%ymm3,%ymm8 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpxor %ymm0,%ymm4,%ymm4 + vpxor %ymm2,%ymm8,%ymm8 + xorl %ebp,%esi + addl %r12d,%edx + vpxor %ymm8,%ymm4,%ymm4 + andl %edi,%esi + addl 64(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + vpsrld $31,%ymm4,%ymm8 + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + vpslldq $12,%ymm4,%ymm10 + vpaddd %ymm4,%ymm4,%ymm4 + rorxl $2,%edx,%esi + xorl %eax,%edx + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm4,%ymm4 + addl %r12d,%ecx + andl %edi,%edx + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm4,%ymm4 + addl 68(%r13),%ebx + xorl %eax,%edx + vpxor %ymm10,%ymm4,%ymm4 + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + vpaddd %ymm11,%ymm4,%ymm9 + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + vmovdqu %ymm9,128(%rsp) + addl %r12d,%ebx + andl %edi,%ecx + addl 72(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 76(%r13),%eax + xorl %edx,%ebx + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpalignr $8,%ymm1,%ymm2,%ymm5 + addl 96(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + vpsrldq $4,%ymm4,%ymm8 + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm3,%ymm8,%ymm8 + addl 100(%r13),%edx + leal (%rdx,%rax,1),%edx + vpxor %ymm8,%ymm5,%ymm5 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + vpsrld $31,%ymm5,%ymm8 + vmovdqu -32(%r14),%ymm11 + xorl %ebx,%esi + addl 104(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + vpslldq $12,%ymm5,%ymm10 + vpaddd %ymm5,%ymm5,%ymm5 + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm5,%ymm5 + xorl %eax,%edx + addl %r12d,%ecx + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm5,%ymm5 + xorl %ebp,%edx + addl 108(%r13),%ebx + leaq 256(%r13),%r13 + vpxor %ymm10,%ymm5,%ymm5 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + vpaddd %ymm11,%ymm5,%ymm9 + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + vmovdqu %ymm9,160(%rsp) + addl -128(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vpalignr $8,%ymm2,%ymm3,%ymm6 + addl -124(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + vpsrldq $4,%ymm5,%ymm8 + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpxor %ymm2,%ymm6,%ymm6 + vpxor %ymm4,%ymm8,%ymm8 + addl -120(%r13),%esi + leal (%rsi,%rbp,1),%esi + vpxor %ymm8,%ymm6,%ymm6 + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + vpsrld $31,%ymm6,%ymm8 + xorl %ecx,%eax + addl -116(%r13),%edx + leal (%rdx,%rax,1),%edx + vpslldq $12,%ymm6,%ymm10 + vpaddd %ymm6,%ymm6,%ymm6 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm6,%ymm6 + xorl %ebp,%esi + addl %r12d,%edx + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm6,%ymm6 + xorl %ebx,%esi + addl -96(%r13),%ecx + vpxor %ymm10,%ymm6,%ymm6 + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + vpaddd %ymm11,%ymm6,%ymm9 + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + vmovdqu %ymm9,192(%rsp) + addl -92(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + vpalignr $8,%ymm3,%ymm4,%ymm7 + addl -88(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + vpsrldq $4,%ymm6,%ymm8 + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vpxor %ymm3,%ymm7,%ymm7 + vpxor %ymm5,%ymm8,%ymm8 + addl -84(%r13),%eax + leal (%rax,%rbx,1),%eax + vpxor %ymm8,%ymm7,%ymm7 + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + vpsrld $31,%ymm7,%ymm8 + xorl %edx,%ebp + addl -64(%r13),%esi + leal (%rsi,%rbp,1),%esi + vpslldq $12,%ymm7,%ymm10 + vpaddd %ymm7,%ymm7,%ymm7 + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm7,%ymm7 + xorl %ebx,%eax + addl %r12d,%esi + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm7,%ymm7 + xorl %ecx,%eax + addl -60(%r13),%edx + vpxor %ymm10,%ymm7,%ymm7 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpaddd %ymm11,%ymm7,%ymm9 + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + vmovdqu %ymm9,224(%rsp) + addl -56(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -52(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -32(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -28(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -24(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -20(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + addl %r12d,%edx + leaq 128(%rsp),%r13 + + + addl 0(%r8),%edx + addl 4(%r8),%esi + addl 8(%r8),%ebp + movl %edx,0(%r8) + addl 12(%r8),%ebx + movl %esi,4(%r8) + movl %edx,%eax + addl 16(%r8),%ecx + movl %ebp,%r12d + movl %ebp,8(%r8) + movl %ebx,%edx + + movl %ebx,12(%r8) + movl %esi,%ebp + movl %ecx,16(%r8) + + movl %ecx,%esi + movl %r12d,%ecx + + + cmpq %r10,%r9 + jbe .Loop_avx2 + +.Ldone_avx2: + vzeroupper + movq -40(%r11),%r14 +.cfi_restore %r14 + movq -32(%r11),%r13 +.cfi_restore %r13 + movq -24(%r11),%r12 +.cfi_restore %r12 + movq -16(%r11),%rbp +.cfi_restore %rbp + movq -8(%r11),%rbx +.cfi_restore %rbx + leaq (%r11),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_avx2: + .byte 0xf3,0xc3 +.cfi_endproc +.size sha1_block_data_order_avx2,.-sha1_block_data_order_avx2 +.section .rodata +.align 64 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 +.previous +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 diff --git a/contrib/openssl-cmake/asm/crypto/sha/sha256-armv8.S b/contrib/openssl-cmake/asm/crypto/sha/sha256-armv8.S new file mode 100644 index 000000000000..385242dab979 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/sha/sha256-armv8.S @@ -0,0 +1,2062 @@ +// Copyright 2014-2025 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the Apache License 2.0 (the "License"). You may not use +// this file except in compliance with the License. You can obtain a copy +// in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html + +// ==================================================================== +// Written by Andy Polyakov for the OpenSSL +// project. The module is, however, dual licensed under OpenSSL and +// CRYPTOGAMS licenses depending on where you obtain it. For further +// details see http://www.openssl.org/~appro/cryptogams/. +// +// Permission to use under GPLv2 terms is granted. +// ==================================================================== +// +// SHA256/512 for ARMv8. +// +// Performance in cycles per processed byte and improvement coefficient +// over code generated with "default" compiler: +// +// SHA256-hw SHA256(*) SHA512 +// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) +// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) +// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) +// Denver 2.01 10.5 (+26%) 6.70 (+8%) +// X-Gene 20.0 (+100%) 12.8 (+300%(***)) +// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) +// Kryo 1.92 17.4 (+30%) 11.2 (+8%) +// ThunderX2 2.54 13.2 (+40%) 8.40 (+18%) +// +// (*) Software SHA256 results are of lesser relevance, presented +// mostly for informational purposes. +// (**) The result is a trade-off: it's possible to improve it by +// 10% (or by 1 cycle per round), but at the cost of 20% loss +// on Cortex-A53 (or by 4 cycles per round). +// (***) Super-impressive coefficients over gcc-generated code are +// indication of some compiler "pathology", most notably code +// generated with -mgeneral-regs-only is significantly faster +// and the gap is only 40-90%. +// +// October 2016. +// +// Originally it was reckoned that it makes no sense to implement NEON +// version of SHA256 for 64-bit processors. This is because performance +// improvement on most wide-spread Cortex-A5x processors was observed +// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was +// observed that 32-bit NEON SHA256 performs significantly better than +// 64-bit scalar version on *some* of the more recent processors. As +// result 64-bit NEON version of SHA256 was added to provide best +// all-round performance. For example it executes ~30% faster on X-Gene +// and Mongoose. [For reference, NEON version of SHA512 is bound to +// deliver much less improvement, likely *negative* on Cortex-A5x. +// Which is why NEON support is limited to SHA256.] + +// $output is the last argument if it looks like a file (it has an extension) +// $flavour is the first argument if it doesn't look like a file +#include "arm_arch.h" +#ifndef __KERNEL__ + +.hidden OPENSSL_armcap_P +#endif + +.text + +.globl sha256_block_data_order +.type sha256_block_data_order,%function +.align 6 +sha256_block_data_order: + AARCH64_VALID_CALL_TARGET +#ifndef __KERNEL__ + adrp x16,OPENSSL_armcap_P + ldr w16,[x16,#:lo12:OPENSSL_armcap_P] + tst w16,#ARMV8_SHA256 + b.ne .Lv8_entry + tst w16,#ARMV7_NEON + b.ne .Lneon_entry +#endif + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#4*4 + + ldp w20,w21,[x0] // load context + ldp w22,w23,[x0,#2*4] + ldp w24,w25,[x0,#4*4] + add x2,x1,x2,lsl#6 // end of input + ldp w26,w27,[x0,#6*4] + adrp x30,.LK256 + add x30,x30,#:lo12:.LK256 + stp x0,x2,[x29,#96] + +.Loop: + ldp w3,w4,[x1],#2*4 + ldr w19,[x30],#4 // *K++ + eor w28,w21,w22 // magic seed + str x1,[x29,#112] +#ifndef __AARCH64EB__ + rev w3,w3 // 0 +#endif + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + eor w6,w24,w24,ror#14 + and w17,w25,w24 + bic w19,w26,w24 + add w27,w27,w3 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w6,ror#11 // Sigma1(e) + ror w6,w20,#2 + add w27,w27,w17 // h+=Ch(e,f,g) + eor w17,w20,w20,ror#9 + add w27,w27,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w23,w23,w27 // d+=h + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w6,w17,ror#13 // Sigma0(a) + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w27,w27,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w4,w4 // 1 +#endif + ldp w5,w6,[x1],#2*4 + add w27,w27,w17 // h+=Sigma0(a) + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + eor w7,w23,w23,ror#14 + and w17,w24,w23 + bic w28,w25,w23 + add w26,w26,w4 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w7,ror#11 // Sigma1(e) + ror w7,w27,#2 + add w26,w26,w17 // h+=Ch(e,f,g) + eor w17,w27,w27,ror#9 + add w26,w26,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w22,w22,w26 // d+=h + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w7,w17,ror#13 // Sigma0(a) + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w26,w26,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w5,w5 // 2 +#endif + add w26,w26,w17 // h+=Sigma0(a) + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + eor w8,w22,w22,ror#14 + and w17,w23,w22 + bic w19,w24,w22 + add w25,w25,w5 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w8,ror#11 // Sigma1(e) + ror w8,w26,#2 + add w25,w25,w17 // h+=Ch(e,f,g) + eor w17,w26,w26,ror#9 + add w25,w25,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w21,w21,w25 // d+=h + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w8,w17,ror#13 // Sigma0(a) + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w25,w25,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w6,w6 // 3 +#endif + ldp w7,w8,[x1],#2*4 + add w25,w25,w17 // h+=Sigma0(a) + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + eor w9,w21,w21,ror#14 + and w17,w22,w21 + bic w28,w23,w21 + add w24,w24,w6 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w9,ror#11 // Sigma1(e) + ror w9,w25,#2 + add w24,w24,w17 // h+=Ch(e,f,g) + eor w17,w25,w25,ror#9 + add w24,w24,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w20,w20,w24 // d+=h + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w9,w17,ror#13 // Sigma0(a) + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w24,w24,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w7,w7 // 4 +#endif + add w24,w24,w17 // h+=Sigma0(a) + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + eor w10,w20,w20,ror#14 + and w17,w21,w20 + bic w19,w22,w20 + add w23,w23,w7 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w10,ror#11 // Sigma1(e) + ror w10,w24,#2 + add w23,w23,w17 // h+=Ch(e,f,g) + eor w17,w24,w24,ror#9 + add w23,w23,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w27,w27,w23 // d+=h + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w10,w17,ror#13 // Sigma0(a) + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w23,w23,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w8,w8 // 5 +#endif + ldp w9,w10,[x1],#2*4 + add w23,w23,w17 // h+=Sigma0(a) + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + eor w11,w27,w27,ror#14 + and w17,w20,w27 + bic w28,w21,w27 + add w22,w22,w8 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w11,ror#11 // Sigma1(e) + ror w11,w23,#2 + add w22,w22,w17 // h+=Ch(e,f,g) + eor w17,w23,w23,ror#9 + add w22,w22,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w26,w26,w22 // d+=h + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w11,w17,ror#13 // Sigma0(a) + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w22,w22,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w9,w9 // 6 +#endif + add w22,w22,w17 // h+=Sigma0(a) + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + eor w12,w26,w26,ror#14 + and w17,w27,w26 + bic w19,w20,w26 + add w21,w21,w9 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w12,ror#11 // Sigma1(e) + ror w12,w22,#2 + add w21,w21,w17 // h+=Ch(e,f,g) + eor w17,w22,w22,ror#9 + add w21,w21,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w25,w25,w21 // d+=h + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w12,w17,ror#13 // Sigma0(a) + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w21,w21,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w10,w10 // 7 +#endif + ldp w11,w12,[x1],#2*4 + add w21,w21,w17 // h+=Sigma0(a) + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + eor w13,w25,w25,ror#14 + and w17,w26,w25 + bic w28,w27,w25 + add w20,w20,w10 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w13,ror#11 // Sigma1(e) + ror w13,w21,#2 + add w20,w20,w17 // h+=Ch(e,f,g) + eor w17,w21,w21,ror#9 + add w20,w20,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w24,w24,w20 // d+=h + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w13,w17,ror#13 // Sigma0(a) + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w20,w20,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w11,w11 // 8 +#endif + add w20,w20,w17 // h+=Sigma0(a) + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + eor w14,w24,w24,ror#14 + and w17,w25,w24 + bic w19,w26,w24 + add w27,w27,w11 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w14,ror#11 // Sigma1(e) + ror w14,w20,#2 + add w27,w27,w17 // h+=Ch(e,f,g) + eor w17,w20,w20,ror#9 + add w27,w27,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w23,w23,w27 // d+=h + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w14,w17,ror#13 // Sigma0(a) + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w27,w27,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w12,w12 // 9 +#endif + ldp w13,w14,[x1],#2*4 + add w27,w27,w17 // h+=Sigma0(a) + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + eor w15,w23,w23,ror#14 + and w17,w24,w23 + bic w28,w25,w23 + add w26,w26,w12 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w15,ror#11 // Sigma1(e) + ror w15,w27,#2 + add w26,w26,w17 // h+=Ch(e,f,g) + eor w17,w27,w27,ror#9 + add w26,w26,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w22,w22,w26 // d+=h + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w15,w17,ror#13 // Sigma0(a) + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w26,w26,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w13,w13 // 10 +#endif + add w26,w26,w17 // h+=Sigma0(a) + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + eor w0,w22,w22,ror#14 + and w17,w23,w22 + bic w19,w24,w22 + add w25,w25,w13 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w0,ror#11 // Sigma1(e) + ror w0,w26,#2 + add w25,w25,w17 // h+=Ch(e,f,g) + eor w17,w26,w26,ror#9 + add w25,w25,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w21,w21,w25 // d+=h + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w0,w17,ror#13 // Sigma0(a) + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w25,w25,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w14,w14 // 11 +#endif + ldp w15,w0,[x1],#2*4 + add w25,w25,w17 // h+=Sigma0(a) + str w6,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + eor w6,w21,w21,ror#14 + and w17,w22,w21 + bic w28,w23,w21 + add w24,w24,w14 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w6,ror#11 // Sigma1(e) + ror w6,w25,#2 + add w24,w24,w17 // h+=Ch(e,f,g) + eor w17,w25,w25,ror#9 + add w24,w24,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w20,w20,w24 // d+=h + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w6,w17,ror#13 // Sigma0(a) + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w24,w24,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w15,w15 // 12 +#endif + add w24,w24,w17 // h+=Sigma0(a) + str w7,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + eor w7,w20,w20,ror#14 + and w17,w21,w20 + bic w19,w22,w20 + add w23,w23,w15 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w7,ror#11 // Sigma1(e) + ror w7,w24,#2 + add w23,w23,w17 // h+=Ch(e,f,g) + eor w17,w24,w24,ror#9 + add w23,w23,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w27,w27,w23 // d+=h + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w7,w17,ror#13 // Sigma0(a) + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w23,w23,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w0,w0 // 13 +#endif + ldp w1,w2,[x1] + add w23,w23,w17 // h+=Sigma0(a) + str w8,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + eor w8,w27,w27,ror#14 + and w17,w20,w27 + bic w28,w21,w27 + add w22,w22,w0 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w8,ror#11 // Sigma1(e) + ror w8,w23,#2 + add w22,w22,w17 // h+=Ch(e,f,g) + eor w17,w23,w23,ror#9 + add w22,w22,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w26,w26,w22 // d+=h + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w8,w17,ror#13 // Sigma0(a) + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w22,w22,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w1,w1 // 14 +#endif + ldr w6,[sp,#12] + add w22,w22,w17 // h+=Sigma0(a) + str w9,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + eor w9,w26,w26,ror#14 + and w17,w27,w26 + bic w19,w20,w26 + add w21,w21,w1 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w9,ror#11 // Sigma1(e) + ror w9,w22,#2 + add w21,w21,w17 // h+=Ch(e,f,g) + eor w17,w22,w22,ror#9 + add w21,w21,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w25,w25,w21 // d+=h + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w9,w17,ror#13 // Sigma0(a) + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w21,w21,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w2,w2 // 15 +#endif + ldr w7,[sp,#0] + add w21,w21,w17 // h+=Sigma0(a) + str w10,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w9,w4,#7 + and w17,w26,w25 + ror w8,w1,#17 + bic w28,w27,w25 + ror w10,w21,#2 + add w20,w20,w2 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w9,w9,w4,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w10,w10,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w8,w8,w1,ror#19 + eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w10,w21,ror#22 // Sigma0(a) + eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) + add w3,w3,w12 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w3,w3,w9 + add w20,w20,w17 // h+=Sigma0(a) + add w3,w3,w8 +.Loop_16_xx: + ldr w8,[sp,#4] + str w11,[sp,#0] + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + ror w10,w5,#7 + and w17,w25,w24 + ror w9,w2,#17 + bic w19,w26,w24 + ror w11,w20,#2 + add w27,w27,w3 // h+=X[i] + eor w16,w16,w24,ror#11 + eor w10,w10,w5,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w24,ror#25 // Sigma1(e) + eor w11,w11,w20,ror#13 + add w27,w27,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w9,w9,w2,ror#19 + eor w10,w10,w5,lsr#3 // sigma0(X[i+1]) + add w27,w27,w16 // h+=Sigma1(e) + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w11,w20,ror#22 // Sigma0(a) + eor w9,w9,w2,lsr#10 // sigma1(X[i+14]) + add w4,w4,w13 + add w23,w23,w27 // d+=h + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w4,w4,w10 + add w27,w27,w17 // h+=Sigma0(a) + add w4,w4,w9 + ldr w9,[sp,#8] + str w12,[sp,#4] + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + ror w11,w6,#7 + and w17,w24,w23 + ror w10,w3,#17 + bic w28,w25,w23 + ror w12,w27,#2 + add w26,w26,w4 // h+=X[i] + eor w16,w16,w23,ror#11 + eor w11,w11,w6,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w23,ror#25 // Sigma1(e) + eor w12,w12,w27,ror#13 + add w26,w26,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w10,w10,w3,ror#19 + eor w11,w11,w6,lsr#3 // sigma0(X[i+1]) + add w26,w26,w16 // h+=Sigma1(e) + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w12,w27,ror#22 // Sigma0(a) + eor w10,w10,w3,lsr#10 // sigma1(X[i+14]) + add w5,w5,w14 + add w22,w22,w26 // d+=h + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w5,w5,w11 + add w26,w26,w17 // h+=Sigma0(a) + add w5,w5,w10 + ldr w10,[sp,#12] + str w13,[sp,#8] + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + ror w12,w7,#7 + and w17,w23,w22 + ror w11,w4,#17 + bic w19,w24,w22 + ror w13,w26,#2 + add w25,w25,w5 // h+=X[i] + eor w16,w16,w22,ror#11 + eor w12,w12,w7,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w22,ror#25 // Sigma1(e) + eor w13,w13,w26,ror#13 + add w25,w25,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w11,w11,w4,ror#19 + eor w12,w12,w7,lsr#3 // sigma0(X[i+1]) + add w25,w25,w16 // h+=Sigma1(e) + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w13,w26,ror#22 // Sigma0(a) + eor w11,w11,w4,lsr#10 // sigma1(X[i+14]) + add w6,w6,w15 + add w21,w21,w25 // d+=h + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w6,w6,w12 + add w25,w25,w17 // h+=Sigma0(a) + add w6,w6,w11 + ldr w11,[sp,#0] + str w14,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + ror w13,w8,#7 + and w17,w22,w21 + ror w12,w5,#17 + bic w28,w23,w21 + ror w14,w25,#2 + add w24,w24,w6 // h+=X[i] + eor w16,w16,w21,ror#11 + eor w13,w13,w8,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w21,ror#25 // Sigma1(e) + eor w14,w14,w25,ror#13 + add w24,w24,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w12,w12,w5,ror#19 + eor w13,w13,w8,lsr#3 // sigma0(X[i+1]) + add w24,w24,w16 // h+=Sigma1(e) + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w14,w25,ror#22 // Sigma0(a) + eor w12,w12,w5,lsr#10 // sigma1(X[i+14]) + add w7,w7,w0 + add w20,w20,w24 // d+=h + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w7,w7,w13 + add w24,w24,w17 // h+=Sigma0(a) + add w7,w7,w12 + ldr w12,[sp,#4] + str w15,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + ror w14,w9,#7 + and w17,w21,w20 + ror w13,w6,#17 + bic w19,w22,w20 + ror w15,w24,#2 + add w23,w23,w7 // h+=X[i] + eor w16,w16,w20,ror#11 + eor w14,w14,w9,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w20,ror#25 // Sigma1(e) + eor w15,w15,w24,ror#13 + add w23,w23,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w13,w13,w6,ror#19 + eor w14,w14,w9,lsr#3 // sigma0(X[i+1]) + add w23,w23,w16 // h+=Sigma1(e) + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w15,w24,ror#22 // Sigma0(a) + eor w13,w13,w6,lsr#10 // sigma1(X[i+14]) + add w8,w8,w1 + add w27,w27,w23 // d+=h + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w8,w8,w14 + add w23,w23,w17 // h+=Sigma0(a) + add w8,w8,w13 + ldr w13,[sp,#8] + str w0,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + ror w15,w10,#7 + and w17,w20,w27 + ror w14,w7,#17 + bic w28,w21,w27 + ror w0,w23,#2 + add w22,w22,w8 // h+=X[i] + eor w16,w16,w27,ror#11 + eor w15,w15,w10,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w27,ror#25 // Sigma1(e) + eor w0,w0,w23,ror#13 + add w22,w22,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w14,w14,w7,ror#19 + eor w15,w15,w10,lsr#3 // sigma0(X[i+1]) + add w22,w22,w16 // h+=Sigma1(e) + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w0,w23,ror#22 // Sigma0(a) + eor w14,w14,w7,lsr#10 // sigma1(X[i+14]) + add w9,w9,w2 + add w26,w26,w22 // d+=h + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w9,w9,w15 + add w22,w22,w17 // h+=Sigma0(a) + add w9,w9,w14 + ldr w14,[sp,#12] + str w1,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + ror w0,w11,#7 + and w17,w27,w26 + ror w15,w8,#17 + bic w19,w20,w26 + ror w1,w22,#2 + add w21,w21,w9 // h+=X[i] + eor w16,w16,w26,ror#11 + eor w0,w0,w11,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w26,ror#25 // Sigma1(e) + eor w1,w1,w22,ror#13 + add w21,w21,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w15,w15,w8,ror#19 + eor w0,w0,w11,lsr#3 // sigma0(X[i+1]) + add w21,w21,w16 // h+=Sigma1(e) + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w1,w22,ror#22 // Sigma0(a) + eor w15,w15,w8,lsr#10 // sigma1(X[i+14]) + add w10,w10,w3 + add w25,w25,w21 // d+=h + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w10,w10,w0 + add w21,w21,w17 // h+=Sigma0(a) + add w10,w10,w15 + ldr w15,[sp,#0] + str w2,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w1,w12,#7 + and w17,w26,w25 + ror w0,w9,#17 + bic w28,w27,w25 + ror w2,w21,#2 + add w20,w20,w10 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w1,w1,w12,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w2,w2,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w0,w0,w9,ror#19 + eor w1,w1,w12,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w2,w21,ror#22 // Sigma0(a) + eor w0,w0,w9,lsr#10 // sigma1(X[i+14]) + add w11,w11,w4 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w11,w11,w1 + add w20,w20,w17 // h+=Sigma0(a) + add w11,w11,w0 + ldr w0,[sp,#4] + str w3,[sp,#0] + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + ror w2,w13,#7 + and w17,w25,w24 + ror w1,w10,#17 + bic w19,w26,w24 + ror w3,w20,#2 + add w27,w27,w11 // h+=X[i] + eor w16,w16,w24,ror#11 + eor w2,w2,w13,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w24,ror#25 // Sigma1(e) + eor w3,w3,w20,ror#13 + add w27,w27,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w1,w1,w10,ror#19 + eor w2,w2,w13,lsr#3 // sigma0(X[i+1]) + add w27,w27,w16 // h+=Sigma1(e) + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w3,w20,ror#22 // Sigma0(a) + eor w1,w1,w10,lsr#10 // sigma1(X[i+14]) + add w12,w12,w5 + add w23,w23,w27 // d+=h + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w12,w12,w2 + add w27,w27,w17 // h+=Sigma0(a) + add w12,w12,w1 + ldr w1,[sp,#8] + str w4,[sp,#4] + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + ror w3,w14,#7 + and w17,w24,w23 + ror w2,w11,#17 + bic w28,w25,w23 + ror w4,w27,#2 + add w26,w26,w12 // h+=X[i] + eor w16,w16,w23,ror#11 + eor w3,w3,w14,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w23,ror#25 // Sigma1(e) + eor w4,w4,w27,ror#13 + add w26,w26,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w2,w2,w11,ror#19 + eor w3,w3,w14,lsr#3 // sigma0(X[i+1]) + add w26,w26,w16 // h+=Sigma1(e) + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w4,w27,ror#22 // Sigma0(a) + eor w2,w2,w11,lsr#10 // sigma1(X[i+14]) + add w13,w13,w6 + add w22,w22,w26 // d+=h + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w13,w13,w3 + add w26,w26,w17 // h+=Sigma0(a) + add w13,w13,w2 + ldr w2,[sp,#12] + str w5,[sp,#8] + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + ror w4,w15,#7 + and w17,w23,w22 + ror w3,w12,#17 + bic w19,w24,w22 + ror w5,w26,#2 + add w25,w25,w13 // h+=X[i] + eor w16,w16,w22,ror#11 + eor w4,w4,w15,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w22,ror#25 // Sigma1(e) + eor w5,w5,w26,ror#13 + add w25,w25,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w3,w3,w12,ror#19 + eor w4,w4,w15,lsr#3 // sigma0(X[i+1]) + add w25,w25,w16 // h+=Sigma1(e) + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w5,w26,ror#22 // Sigma0(a) + eor w3,w3,w12,lsr#10 // sigma1(X[i+14]) + add w14,w14,w7 + add w21,w21,w25 // d+=h + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w14,w14,w4 + add w25,w25,w17 // h+=Sigma0(a) + add w14,w14,w3 + ldr w3,[sp,#0] + str w6,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + ror w5,w0,#7 + and w17,w22,w21 + ror w4,w13,#17 + bic w28,w23,w21 + ror w6,w25,#2 + add w24,w24,w14 // h+=X[i] + eor w16,w16,w21,ror#11 + eor w5,w5,w0,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w21,ror#25 // Sigma1(e) + eor w6,w6,w25,ror#13 + add w24,w24,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w4,w4,w13,ror#19 + eor w5,w5,w0,lsr#3 // sigma0(X[i+1]) + add w24,w24,w16 // h+=Sigma1(e) + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w6,w25,ror#22 // Sigma0(a) + eor w4,w4,w13,lsr#10 // sigma1(X[i+14]) + add w15,w15,w8 + add w20,w20,w24 // d+=h + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w15,w15,w5 + add w24,w24,w17 // h+=Sigma0(a) + add w15,w15,w4 + ldr w4,[sp,#4] + str w7,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + ror w6,w1,#7 + and w17,w21,w20 + ror w5,w14,#17 + bic w19,w22,w20 + ror w7,w24,#2 + add w23,w23,w15 // h+=X[i] + eor w16,w16,w20,ror#11 + eor w6,w6,w1,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w20,ror#25 // Sigma1(e) + eor w7,w7,w24,ror#13 + add w23,w23,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w5,w5,w14,ror#19 + eor w6,w6,w1,lsr#3 // sigma0(X[i+1]) + add w23,w23,w16 // h+=Sigma1(e) + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w7,w24,ror#22 // Sigma0(a) + eor w5,w5,w14,lsr#10 // sigma1(X[i+14]) + add w0,w0,w9 + add w27,w27,w23 // d+=h + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w0,w0,w6 + add w23,w23,w17 // h+=Sigma0(a) + add w0,w0,w5 + ldr w5,[sp,#8] + str w8,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + ror w7,w2,#7 + and w17,w20,w27 + ror w6,w15,#17 + bic w28,w21,w27 + ror w8,w23,#2 + add w22,w22,w0 // h+=X[i] + eor w16,w16,w27,ror#11 + eor w7,w7,w2,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w27,ror#25 // Sigma1(e) + eor w8,w8,w23,ror#13 + add w22,w22,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w6,w6,w15,ror#19 + eor w7,w7,w2,lsr#3 // sigma0(X[i+1]) + add w22,w22,w16 // h+=Sigma1(e) + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w8,w23,ror#22 // Sigma0(a) + eor w6,w6,w15,lsr#10 // sigma1(X[i+14]) + add w1,w1,w10 + add w26,w26,w22 // d+=h + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w1,w1,w7 + add w22,w22,w17 // h+=Sigma0(a) + add w1,w1,w6 + ldr w6,[sp,#12] + str w9,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + ror w8,w3,#7 + and w17,w27,w26 + ror w7,w0,#17 + bic w19,w20,w26 + ror w9,w22,#2 + add w21,w21,w1 // h+=X[i] + eor w16,w16,w26,ror#11 + eor w8,w8,w3,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w26,ror#25 // Sigma1(e) + eor w9,w9,w22,ror#13 + add w21,w21,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w7,w7,w0,ror#19 + eor w8,w8,w3,lsr#3 // sigma0(X[i+1]) + add w21,w21,w16 // h+=Sigma1(e) + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w9,w22,ror#22 // Sigma0(a) + eor w7,w7,w0,lsr#10 // sigma1(X[i+14]) + add w2,w2,w11 + add w25,w25,w21 // d+=h + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w2,w2,w8 + add w21,w21,w17 // h+=Sigma0(a) + add w2,w2,w7 + ldr w7,[sp,#0] + str w10,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w9,w4,#7 + and w17,w26,w25 + ror w8,w1,#17 + bic w28,w27,w25 + ror w10,w21,#2 + add w20,w20,w2 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w9,w9,w4,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w10,w10,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w8,w8,w1,ror#19 + eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w10,w21,ror#22 // Sigma0(a) + eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) + add w3,w3,w12 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w3,w3,w9 + add w20,w20,w17 // h+=Sigma0(a) + add w3,w3,w8 + cbnz w19,.Loop_16_xx + + ldp x0,x2,[x29,#96] + ldr x1,[x29,#112] + sub x30,x30,#260 // rewind + + ldp w3,w4,[x0] + ldp w5,w6,[x0,#2*4] + add x1,x1,#14*4 // advance input pointer + ldp w7,w8,[x0,#4*4] + add w20,w20,w3 + ldp w9,w10,[x0,#6*4] + add w21,w21,w4 + add w22,w22,w5 + add w23,w23,w6 + stp w20,w21,[x0] + add w24,w24,w7 + add w25,w25,w8 + stp w22,w23,[x0,#2*4] + add w26,w26,w9 + add w27,w27,w10 + cmp x1,x2 + stp w24,w25,[x0,#4*4] + stp w26,w27,[x0,#6*4] + b.ne .Loop + + ldp x19,x20,[x29,#16] + add sp,sp,#4*4 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size sha256_block_data_order,.-sha256_block_data_order + +.section .rodata + +.align 6 +.type .LK256,%object +.LK256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0 //terminator +.size .LK256,.-.LK256 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 + +.text +#ifndef __KERNEL__ +.type sha256_block_armv8,%function +.align 6 +sha256_block_armv8: +.Lv8_entry: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v0.4s,v1.4s},[x0] + adrp x3,.LK256 + add x3,x3,#:lo12:.LK256 + +.Loop_hw: + ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 + sub x2,x2,#1 + ld1 {v16.4s},[x3],#16 + rev32 v4.16b,v4.16b + rev32 v5.16b,v5.16b + rev32 v6.16b,v6.16b + rev32 v7.16b,v7.16b + orr v18.16b,v0.16b,v0.16b // offload + orr v19.16b,v1.16b,v1.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s +.inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s +.inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s +.inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s +.inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s +.inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s +.inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s +.inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s +.inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s +.inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s +.inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s +.inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s +.inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s +.inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s +.inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + ld1 {v17.4s},[x3] + add v16.4s,v16.4s,v6.4s + sub x3,x3,#64*4-16 // rewind + orr v2.16b,v0.16b,v0.16b +.inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s +.inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + add v17.4s,v17.4s,v7.4s + orr v2.16b,v0.16b,v0.16b +.inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s +.inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + add v0.4s,v0.4s,v18.4s + add v1.4s,v1.4s,v19.4s + + cbnz x2,.Loop_hw + + st1 {v0.4s,v1.4s},[x0] + + ldr x29,[sp],#16 + ret +.size sha256_block_armv8,.-sha256_block_armv8 +#endif +#ifdef __KERNEL__ +.globl sha256_block_neon +#endif +.type sha256_block_neon,%function +.align 4 +sha256_block_neon: + AARCH64_VALID_CALL_TARGET +.Lneon_entry: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later + stp x29, x30, [sp, #-16]! + mov x29, sp + sub sp,sp,#16*4 + + adrp x16,.LK256 + add x16,x16,#:lo12:.LK256 + add x2,x1,x2,lsl#6 // len to point at the end of inp + + ld1 {v0.16b},[x1], #16 + ld1 {v1.16b},[x1], #16 + ld1 {v2.16b},[x1], #16 + ld1 {v3.16b},[x1], #16 + ld1 {v4.4s},[x16], #16 + ld1 {v5.4s},[x16], #16 + ld1 {v6.4s},[x16], #16 + ld1 {v7.4s},[x16], #16 + rev32 v0.16b,v0.16b // yes, even on + rev32 v1.16b,v1.16b // big-endian + rev32 v2.16b,v2.16b + rev32 v3.16b,v3.16b + mov x17,sp + add v4.4s,v4.4s,v0.4s + add v5.4s,v5.4s,v1.4s + add v6.4s,v6.4s,v2.4s + st1 {v4.4s,v5.4s},[x17], #32 + add v7.4s,v7.4s,v3.4s + st1 {v6.4s,v7.4s},[x17] + sub x17,x17,#32 + + ldp w3,w4,[x0] + ldp w5,w6,[x0,#8] + ldp w7,w8,[x0,#16] + ldp w9,w10,[x0,#24] + ldr w12,[sp,#0] + mov w13,wzr + eor w14,w4,w5 + mov w15,wzr + b .L_00_48 + +.align 4 +.L_00_48: + ext v4.16b,v0.16b,v1.16b,#4 + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + bic w15,w9,w7 + ext v7.16b,v2.16b,v3.16b,#4 + eor w11,w7,w7,ror#5 + add w3,w3,w13 + mov d19,v3.d[1] + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w3,w3,ror#11 + ushr v5.4s,v4.4s,#3 + add w10,w10,w12 + add v0.4s,v0.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + ushr v7.4s,v4.4s,#18 + add w10,w10,w11 + ldr w12,[sp,#4] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w6,w6,w10 + sli v7.4s,v4.4s,#14 + eor w14,w14,w4 + ushr v16.4s,v19.4s,#17 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + eor v5.16b,v5.16b,v7.16b + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + sli v16.4s,v19.4s,#15 + add w10,w10,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + ushr v7.4s,v19.4s,#19 + add w9,w9,w12 + ror w11,w11,#6 + add v0.4s,v0.4s,v5.4s + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + sli v7.4s,v19.4s,#13 + add w9,w9,w11 + ldr w12,[sp,#8] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + eor v17.16b,v17.16b,v7.16b + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + add v0.4s,v0.4s,v17.4s + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + ushr v18.4s,v0.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v0.4s,#10 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + sli v18.4s,v0.4s,#15 + add w8,w8,w12 + ushr v17.4s,v0.4s,#19 + ror w11,w11,#6 + eor w13,w9,w10 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w9,ror#20 + add w8,w8,w11 + sli v17.4s,v0.4s,#13 + ldr w12,[sp,#12] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w4,w4,w8 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w10 + eor v17.16b,v17.16b,v17.16b + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + mov v17.d[1],v19.d[0] + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + add v0.4s,v0.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add v4.4s,v4.4s,v0.4s + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#16] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + ext v4.16b,v1.16b,v2.16b,#4 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + bic w15,w5,w3 + ext v7.16b,v3.16b,v0.16b,#4 + eor w11,w3,w3,ror#5 + add w7,w7,w13 + mov d19,v0.d[1] + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w7,w7,ror#11 + ushr v5.4s,v4.4s,#3 + add w6,w6,w12 + add v1.4s,v1.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + ushr v7.4s,v4.4s,#18 + add w6,w6,w11 + ldr w12,[sp,#20] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w10,w10,w6 + sli v7.4s,v4.4s,#14 + eor w14,w14,w8 + ushr v16.4s,v19.4s,#17 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + eor v5.16b,v5.16b,v7.16b + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + sli v16.4s,v19.4s,#15 + add w6,w6,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + ushr v7.4s,v19.4s,#19 + add w5,w5,w12 + ror w11,w11,#6 + add v1.4s,v1.4s,v5.4s + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + sli v7.4s,v19.4s,#13 + add w5,w5,w11 + ldr w12,[sp,#24] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + eor v17.16b,v17.16b,v7.16b + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + add v1.4s,v1.4s,v17.4s + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + ushr v18.4s,v1.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v1.4s,#10 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + sli v18.4s,v1.4s,#15 + add w4,w4,w12 + ushr v17.4s,v1.4s,#19 + ror w11,w11,#6 + eor w13,w5,w6 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w5,ror#20 + add w4,w4,w11 + sli v17.4s,v1.4s,#13 + ldr w12,[sp,#28] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w8,w8,w4 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w6 + eor v17.16b,v17.16b,v17.16b + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + mov v17.d[1],v19.d[0] + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + add v1.4s,v1.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add v4.4s,v4.4s,v1.4s + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + ldr w12,[sp,#32] + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + ext v4.16b,v2.16b,v3.16b,#4 + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + bic w15,w9,w7 + ext v7.16b,v0.16b,v1.16b,#4 + eor w11,w7,w7,ror#5 + add w3,w3,w13 + mov d19,v1.d[1] + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w3,w3,ror#11 + ushr v5.4s,v4.4s,#3 + add w10,w10,w12 + add v2.4s,v2.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + ushr v7.4s,v4.4s,#18 + add w10,w10,w11 + ldr w12,[sp,#36] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w6,w6,w10 + sli v7.4s,v4.4s,#14 + eor w14,w14,w4 + ushr v16.4s,v19.4s,#17 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + eor v5.16b,v5.16b,v7.16b + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + sli v16.4s,v19.4s,#15 + add w10,w10,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + ushr v7.4s,v19.4s,#19 + add w9,w9,w12 + ror w11,w11,#6 + add v2.4s,v2.4s,v5.4s + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + sli v7.4s,v19.4s,#13 + add w9,w9,w11 + ldr w12,[sp,#40] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + eor v17.16b,v17.16b,v7.16b + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + add v2.4s,v2.4s,v17.4s + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + ushr v18.4s,v2.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v2.4s,#10 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + sli v18.4s,v2.4s,#15 + add w8,w8,w12 + ushr v17.4s,v2.4s,#19 + ror w11,w11,#6 + eor w13,w9,w10 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w9,ror#20 + add w8,w8,w11 + sli v17.4s,v2.4s,#13 + ldr w12,[sp,#44] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w4,w4,w8 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w10 + eor v17.16b,v17.16b,v17.16b + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + mov v17.d[1],v19.d[0] + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + add v2.4s,v2.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add v4.4s,v4.4s,v2.4s + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#48] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + ext v4.16b,v3.16b,v0.16b,#4 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + bic w15,w5,w3 + ext v7.16b,v1.16b,v2.16b,#4 + eor w11,w3,w3,ror#5 + add w7,w7,w13 + mov d19,v2.d[1] + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w7,w7,ror#11 + ushr v5.4s,v4.4s,#3 + add w6,w6,w12 + add v3.4s,v3.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + ushr v7.4s,v4.4s,#18 + add w6,w6,w11 + ldr w12,[sp,#52] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w10,w10,w6 + sli v7.4s,v4.4s,#14 + eor w14,w14,w8 + ushr v16.4s,v19.4s,#17 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + eor v5.16b,v5.16b,v7.16b + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + sli v16.4s,v19.4s,#15 + add w6,w6,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + ushr v7.4s,v19.4s,#19 + add w5,w5,w12 + ror w11,w11,#6 + add v3.4s,v3.4s,v5.4s + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + sli v7.4s,v19.4s,#13 + add w5,w5,w11 + ldr w12,[sp,#56] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + eor v17.16b,v17.16b,v7.16b + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + add v3.4s,v3.4s,v17.4s + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + ushr v18.4s,v3.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v3.4s,#10 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + sli v18.4s,v3.4s,#15 + add w4,w4,w12 + ushr v17.4s,v3.4s,#19 + ror w11,w11,#6 + eor w13,w5,w6 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w5,ror#20 + add w4,w4,w11 + sli v17.4s,v3.4s,#13 + ldr w12,[sp,#60] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w8,w8,w4 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w6 + eor v17.16b,v17.16b,v17.16b + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + mov v17.d[1],v19.d[0] + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + add v3.4s,v3.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add v4.4s,v4.4s,v3.4s + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + ldr w12,[x16] + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + cmp w12,#0 // check for K256 terminator + ldr w12,[sp,#0] + sub x17,x17,#64 + bne .L_00_48 + + sub x16,x16,#256 // rewind x16 + cmp x1,x2 + mov x17, #64 + csel x17, x17, xzr, eq + sub x1,x1,x17 // avoid SEGV + mov x17,sp + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + ld1 {v0.16b},[x1],#16 + bic w15,w9,w7 + eor w11,w7,w7,ror#5 + ld1 {v4.4s},[x16],#16 + add w3,w3,w13 + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + eor w15,w3,w3,ror#11 + rev32 v0.16b,v0.16b + add w10,w10,w12 + ror w11,w11,#6 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + add v4.4s,v4.4s,v0.4s + add w10,w10,w11 + ldr w12,[sp,#4] + and w14,w14,w13 + ror w15,w15,#2 + add w6,w6,w10 + eor w14,w14,w4 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + add w10,w10,w14 + orr w12,w12,w15 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + add w9,w9,w12 + ror w11,w11,#6 + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + add w9,w9,w11 + ldr w12,[sp,#8] + and w13,w13,w14 + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + orr w12,w12,w15 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + add w8,w8,w12 + ror w11,w11,#6 + eor w13,w9,w10 + eor w15,w15,w9,ror#20 + add w8,w8,w11 + ldr w12,[sp,#12] + and w14,w14,w13 + ror w15,w15,#2 + add w4,w4,w8 + eor w14,w14,w10 + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#16] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + ld1 {v1.16b},[x1],#16 + bic w15,w5,w3 + eor w11,w3,w3,ror#5 + ld1 {v4.4s},[x16],#16 + add w7,w7,w13 + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + eor w15,w7,w7,ror#11 + rev32 v1.16b,v1.16b + add w6,w6,w12 + ror w11,w11,#6 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + add v4.4s,v4.4s,v1.4s + add w6,w6,w11 + ldr w12,[sp,#20] + and w14,w14,w13 + ror w15,w15,#2 + add w10,w10,w6 + eor w14,w14,w8 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + add w6,w6,w14 + orr w12,w12,w15 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + add w5,w5,w12 + ror w11,w11,#6 + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + add w5,w5,w11 + ldr w12,[sp,#24] + and w13,w13,w14 + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + orr w12,w12,w15 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + add w4,w4,w12 + ror w11,w11,#6 + eor w13,w5,w6 + eor w15,w15,w5,ror#20 + add w4,w4,w11 + ldr w12,[sp,#28] + and w14,w14,w13 + ror w15,w15,#2 + add w8,w8,w4 + eor w14,w14,w6 + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + ldr w12,[sp,#32] + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + ld1 {v2.16b},[x1],#16 + bic w15,w9,w7 + eor w11,w7,w7,ror#5 + ld1 {v4.4s},[x16],#16 + add w3,w3,w13 + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + eor w15,w3,w3,ror#11 + rev32 v2.16b,v2.16b + add w10,w10,w12 + ror w11,w11,#6 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + add v4.4s,v4.4s,v2.4s + add w10,w10,w11 + ldr w12,[sp,#36] + and w14,w14,w13 + ror w15,w15,#2 + add w6,w6,w10 + eor w14,w14,w4 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + add w10,w10,w14 + orr w12,w12,w15 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + add w9,w9,w12 + ror w11,w11,#6 + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + add w9,w9,w11 + ldr w12,[sp,#40] + and w13,w13,w14 + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + orr w12,w12,w15 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + add w8,w8,w12 + ror w11,w11,#6 + eor w13,w9,w10 + eor w15,w15,w9,ror#20 + add w8,w8,w11 + ldr w12,[sp,#44] + and w14,w14,w13 + ror w15,w15,#2 + add w4,w4,w8 + eor w14,w14,w10 + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#48] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + ld1 {v3.16b},[x1],#16 + bic w15,w5,w3 + eor w11,w3,w3,ror#5 + ld1 {v4.4s},[x16],#16 + add w7,w7,w13 + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + eor w15,w7,w7,ror#11 + rev32 v3.16b,v3.16b + add w6,w6,w12 + ror w11,w11,#6 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + add v4.4s,v4.4s,v3.4s + add w6,w6,w11 + ldr w12,[sp,#52] + and w14,w14,w13 + ror w15,w15,#2 + add w10,w10,w6 + eor w14,w14,w8 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + add w6,w6,w14 + orr w12,w12,w15 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + add w5,w5,w12 + ror w11,w11,#6 + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + add w5,w5,w11 + ldr w12,[sp,#56] + and w13,w13,w14 + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + orr w12,w12,w15 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + add w4,w4,w12 + ror w11,w11,#6 + eor w13,w5,w6 + eor w15,w15,w5,ror#20 + add w4,w4,w11 + ldr w12,[sp,#60] + and w14,w14,w13 + ror w15,w15,#2 + add w8,w8,w4 + eor w14,w14,w6 + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + add w3,w3,w15 // h+=Sigma0(a) from the past + ldp w11,w12,[x0,#0] + add w3,w3,w13 // h+=Maj(a,b,c) from the past + ldp w13,w14,[x0,#8] + add w3,w3,w11 // accumulate + add w4,w4,w12 + ldp w11,w12,[x0,#16] + add w5,w5,w13 + add w6,w6,w14 + ldp w13,w14,[x0,#24] + add w7,w7,w11 + add w8,w8,w12 + ldr w12,[sp,#0] + stp w3,w4,[x0,#0] + add w9,w9,w13 + mov w13,wzr + stp w5,w6,[x0,#8] + add w10,w10,w14 + stp w7,w8,[x0,#16] + eor w14,w4,w5 + stp w9,w10,[x0,#24] + mov w15,wzr + mov x17,sp + b.ne .L_00_48 + + ldr x29,[x29] + add sp,sp,#16*4+16 + ret +.size sha256_block_neon,.-sha256_block_neon diff --git a/contrib/openssl-cmake/asm/crypto/sha/sha256-mb-x86_64.s b/contrib/openssl-cmake/asm/crypto/sha/sha256-mb-x86_64.s new file mode 100644 index 000000000000..d4bf52940144 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/sha/sha256-mb-x86_64.s @@ -0,0 +1,7986 @@ +.text + + + +.globl sha256_multi_block +.type sha256_multi_block,@function +.align 32 +sha256_multi_block: +.cfi_startproc + movq OPENSSL_ia32cap_P+4(%rip),%rcx + btq $61,%rcx + jc _shaext_shortcut + testl $268435456,%ecx + jnz _avx_shortcut + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + subq $288,%rsp + andq $-256,%rsp + movq %rax,272(%rsp) +.cfi_escape 0x0f,0x06,0x77,0x90,0x02,0x06,0x23,0x08 +.Lbody: + leaq K256+128(%rip),%rbp + leaq 256(%rsp),%rbx + leaq 128(%rdi),%rdi + +.Loop_grande: + movl %edx,280(%rsp) + xorl %edx,%edx + + movq 0(%rsi),%r8 + + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r8 + + movq 16(%rsi),%r9 + + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r9 + + movq 32(%rsi),%r10 + + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r10 + + movq 48(%rsi),%r11 + + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r11 + testl %edx,%edx + jz .Ldone + + movdqu 0-128(%rdi),%xmm8 + leaq 128(%rsp),%rax + movdqu 32-128(%rdi),%xmm9 + movdqu 64-128(%rdi),%xmm10 + movdqu 96-128(%rdi),%xmm11 + movdqu 128-128(%rdi),%xmm12 + movdqu 160-128(%rdi),%xmm13 + movdqu 192-128(%rdi),%xmm14 + movdqu 224-128(%rdi),%xmm15 + movdqu .Lpbswap(%rip),%xmm6 + jmp .Loop + +.align 32 +.Loop: + movdqa %xmm10,%xmm4 + pxor %xmm9,%xmm4 + movd 0(%r8),%xmm5 + movd 0(%r9),%xmm0 + movd 0(%r10),%xmm1 + movd 0(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm12,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm12,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,0-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movd 4(%r8),%xmm5 + movd 4(%r9),%xmm0 + movd 4(%r10),%xmm1 + movd 4(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm11,%xmm7 + + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,16-128(%rax) + paddd %xmm14,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm5,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm14 + paddd %xmm7,%xmm14 + movd 8(%r8),%xmm5 + movd 8(%r9),%xmm0 + movd 8(%r10),%xmm1 + movd 8(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm10,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm10,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,32-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movd 12(%r8),%xmm5 + movd 12(%r9),%xmm0 + movd 12(%r10),%xmm1 + movd 12(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm9,%xmm7 + + movdqa %xmm9,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,48-128(%rax) + paddd %xmm12,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm5,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm12 + paddd %xmm7,%xmm12 + movd 16(%r8),%xmm5 + movd 16(%r9),%xmm0 + movd 16(%r10),%xmm1 + movd 16(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm8,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm8,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,64-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movd 20(%r8),%xmm5 + movd 20(%r9),%xmm0 + movd 20(%r10),%xmm1 + movd 20(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm15,%xmm7 + + movdqa %xmm15,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,80-128(%rax) + paddd %xmm10,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm5,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm10 + paddd %xmm7,%xmm10 + movd 24(%r8),%xmm5 + movd 24(%r9),%xmm0 + movd 24(%r10),%xmm1 + movd 24(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm14,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm14,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,96-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movd 28(%r8),%xmm5 + movd 28(%r9),%xmm0 + movd 28(%r10),%xmm1 + movd 28(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm13,%xmm7 + + movdqa %xmm13,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,112-128(%rax) + paddd %xmm8,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm5,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + movd 32(%r8),%xmm5 + movd 32(%r9),%xmm0 + movd 32(%r10),%xmm1 + movd 32(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm12,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm12,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,128-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movd 36(%r8),%xmm5 + movd 36(%r9),%xmm0 + movd 36(%r10),%xmm1 + movd 36(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm11,%xmm7 + + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,144-128(%rax) + paddd %xmm14,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm5,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm14 + paddd %xmm7,%xmm14 + movd 40(%r8),%xmm5 + movd 40(%r9),%xmm0 + movd 40(%r10),%xmm1 + movd 40(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm10,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm10,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,160-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movd 44(%r8),%xmm5 + movd 44(%r9),%xmm0 + movd 44(%r10),%xmm1 + movd 44(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm9,%xmm7 + + movdqa %xmm9,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,176-128(%rax) + paddd %xmm12,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm5,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm12 + paddd %xmm7,%xmm12 + movd 48(%r8),%xmm5 + movd 48(%r9),%xmm0 + movd 48(%r10),%xmm1 + movd 48(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm8,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm8,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,192-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movd 52(%r8),%xmm5 + movd 52(%r9),%xmm0 + movd 52(%r10),%xmm1 + movd 52(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm15,%xmm7 + + movdqa %xmm15,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,208-128(%rax) + paddd %xmm10,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm5,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm10 + paddd %xmm7,%xmm10 + movd 56(%r8),%xmm5 + movd 56(%r9),%xmm0 + movd 56(%r10),%xmm1 + movd 56(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm14,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm14,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,224-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movd 60(%r8),%xmm5 + leaq 64(%r8),%r8 + movd 60(%r9),%xmm0 + leaq 64(%r9),%r9 + movd 60(%r10),%xmm1 + leaq 64(%r10),%r10 + movd 60(%r11),%xmm2 + leaq 64(%r11),%r11 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm13,%xmm7 + + movdqa %xmm13,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,240-128(%rax) + paddd %xmm8,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + prefetcht0 63(%r8) + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + prefetcht0 63(%r9) + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + prefetcht0 63(%r10) + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + prefetcht0 63(%r11) + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm5,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + movdqu 0-128(%rax),%xmm5 + movl $3,%ecx + jmp .Loop_16_xx +.align 32 +.Loop_16_xx: + movdqa 16-128(%rax),%xmm6 + paddd 144-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 224-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm12,%xmm7 + + movdqa %xmm12,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,0-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movdqa 32-128(%rax),%xmm5 + paddd 160-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 240-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm11,%xmm7 + + movdqa %xmm11,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,16-128(%rax) + paddd %xmm14,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm6,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm14 + paddd %xmm7,%xmm14 + movdqa 48-128(%rax),%xmm6 + paddd 176-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 0-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm10,%xmm7 + + movdqa %xmm10,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,32-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movdqa 64-128(%rax),%xmm5 + paddd 192-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 16-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm9,%xmm7 + + movdqa %xmm9,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,48-128(%rax) + paddd %xmm12,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm6,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm12 + paddd %xmm7,%xmm12 + movdqa 80-128(%rax),%xmm6 + paddd 208-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 32-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm8,%xmm7 + + movdqa %xmm8,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,64-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movdqa 96-128(%rax),%xmm5 + paddd 224-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 48-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm15,%xmm7 + + movdqa %xmm15,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,80-128(%rax) + paddd %xmm10,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm6,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm10 + paddd %xmm7,%xmm10 + movdqa 112-128(%rax),%xmm6 + paddd 240-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 64-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm14,%xmm7 + + movdqa %xmm14,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,96-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movdqa 128-128(%rax),%xmm5 + paddd 0-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 80-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm13,%xmm7 + + movdqa %xmm13,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,112-128(%rax) + paddd %xmm8,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm6,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + movdqa 144-128(%rax),%xmm6 + paddd 16-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 96-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm12,%xmm7 + + movdqa %xmm12,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,128-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movdqa 160-128(%rax),%xmm5 + paddd 32-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 112-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm11,%xmm7 + + movdqa %xmm11,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,144-128(%rax) + paddd %xmm14,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm6,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm14 + paddd %xmm7,%xmm14 + movdqa 176-128(%rax),%xmm6 + paddd 48-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 128-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm10,%xmm7 + + movdqa %xmm10,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,160-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movdqa 192-128(%rax),%xmm5 + paddd 64-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 144-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm9,%xmm7 + + movdqa %xmm9,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,176-128(%rax) + paddd %xmm12,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm6,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm12 + paddd %xmm7,%xmm12 + movdqa 208-128(%rax),%xmm6 + paddd 80-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 160-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm8,%xmm7 + + movdqa %xmm8,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,192-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movdqa 224-128(%rax),%xmm5 + paddd 96-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 176-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm15,%xmm7 + + movdqa %xmm15,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,208-128(%rax) + paddd %xmm10,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm6,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm10 + paddd %xmm7,%xmm10 + movdqa 240-128(%rax),%xmm6 + paddd 112-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 192-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm14,%xmm7 + + movdqa %xmm14,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,224-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movdqa 0-128(%rax),%xmm5 + paddd 128-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 208-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm13,%xmm7 + + movdqa %xmm13,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,240-128(%rax) + paddd %xmm8,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm6,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + decl %ecx + jnz .Loop_16_xx + + movl $1,%ecx + leaq K256+128(%rip),%rbp + + movdqa (%rbx),%xmm7 + cmpl 0(%rbx),%ecx + pxor %xmm0,%xmm0 + cmovgeq %rbp,%r8 + cmpl 4(%rbx),%ecx + movdqa %xmm7,%xmm6 + cmovgeq %rbp,%r9 + cmpl 8(%rbx),%ecx + pcmpgtd %xmm0,%xmm6 + cmovgeq %rbp,%r10 + cmpl 12(%rbx),%ecx + paddd %xmm6,%xmm7 + cmovgeq %rbp,%r11 + + movdqu 0-128(%rdi),%xmm0 + pand %xmm6,%xmm8 + movdqu 32-128(%rdi),%xmm1 + pand %xmm6,%xmm9 + movdqu 64-128(%rdi),%xmm2 + pand %xmm6,%xmm10 + movdqu 96-128(%rdi),%xmm5 + pand %xmm6,%xmm11 + paddd %xmm0,%xmm8 + movdqu 128-128(%rdi),%xmm0 + pand %xmm6,%xmm12 + paddd %xmm1,%xmm9 + movdqu 160-128(%rdi),%xmm1 + pand %xmm6,%xmm13 + paddd %xmm2,%xmm10 + movdqu 192-128(%rdi),%xmm2 + pand %xmm6,%xmm14 + paddd %xmm5,%xmm11 + movdqu 224-128(%rdi),%xmm5 + pand %xmm6,%xmm15 + paddd %xmm0,%xmm12 + paddd %xmm1,%xmm13 + movdqu %xmm8,0-128(%rdi) + paddd %xmm2,%xmm14 + movdqu %xmm9,32-128(%rdi) + paddd %xmm5,%xmm15 + movdqu %xmm10,64-128(%rdi) + movdqu %xmm11,96-128(%rdi) + movdqu %xmm12,128-128(%rdi) + movdqu %xmm13,160-128(%rdi) + movdqu %xmm14,192-128(%rdi) + movdqu %xmm15,224-128(%rdi) + + movdqa %xmm7,(%rbx) + movdqa .Lpbswap(%rip),%xmm6 + decl %edx + jnz .Loop + + movl 280(%rsp),%edx + leaq 16(%rdi),%rdi + leaq 64(%rsi),%rsi + decl %edx + jnz .Loop_grande + +.Ldone: + movq 272(%rsp),%rax +.cfi_def_cfa %rax,8 + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size sha256_multi_block,.-sha256_multi_block +.type sha256_multi_block_shaext,@function +.align 32 +sha256_multi_block_shaext: +.cfi_startproc +_shaext_shortcut: + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + subq $288,%rsp + shll $1,%edx + andq $-256,%rsp + leaq 128(%rdi),%rdi + movq %rax,272(%rsp) +.Lbody_shaext: + leaq 256(%rsp),%rbx + leaq K256_shaext+128(%rip),%rbp + +.Loop_grande_shaext: + movl %edx,280(%rsp) + xorl %edx,%edx + + movq 0(%rsi),%r8 + + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rsp,%r8 + + movq 16(%rsi),%r9 + + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rsp,%r9 + testl %edx,%edx + jz .Ldone_shaext + + movq 0-128(%rdi),%xmm12 + movq 32-128(%rdi),%xmm4 + movq 64-128(%rdi),%xmm13 + movq 96-128(%rdi),%xmm5 + movq 128-128(%rdi),%xmm8 + movq 160-128(%rdi),%xmm9 + movq 192-128(%rdi),%xmm10 + movq 224-128(%rdi),%xmm11 + + punpckldq %xmm4,%xmm12 + punpckldq %xmm5,%xmm13 + punpckldq %xmm9,%xmm8 + punpckldq %xmm11,%xmm10 + movdqa K256_shaext-16(%rip),%xmm3 + + movdqa %xmm12,%xmm14 + movdqa %xmm13,%xmm15 + punpcklqdq %xmm8,%xmm12 + punpcklqdq %xmm10,%xmm13 + punpckhqdq %xmm8,%xmm14 + punpckhqdq %xmm10,%xmm15 + + pshufd $27,%xmm12,%xmm12 + pshufd $27,%xmm13,%xmm13 + pshufd $27,%xmm14,%xmm14 + pshufd $27,%xmm15,%xmm15 + jmp .Loop_shaext + +.align 32 +.Loop_shaext: + movdqu 0(%r8),%xmm4 + movdqu 0(%r9),%xmm8 + movdqu 16(%r8),%xmm5 + movdqu 16(%r9),%xmm9 + movdqu 32(%r8),%xmm6 +.byte 102,15,56,0,227 + movdqu 32(%r9),%xmm10 +.byte 102,68,15,56,0,195 + movdqu 48(%r8),%xmm7 + leaq 64(%r8),%r8 + movdqu 48(%r9),%xmm11 + leaq 64(%r9),%r9 + + movdqa 0-128(%rbp),%xmm0 +.byte 102,15,56,0,235 + paddd %xmm4,%xmm0 + pxor %xmm12,%xmm4 + movdqa %xmm0,%xmm1 + movdqa 0-128(%rbp),%xmm2 +.byte 102,68,15,56,0,203 + paddd %xmm8,%xmm2 + movdqa %xmm13,80(%rsp) +.byte 69,15,56,203,236 + pxor %xmm14,%xmm8 + movdqa %xmm2,%xmm0 + movdqa %xmm15,112(%rsp) +.byte 69,15,56,203,254 + pshufd $0x0e,%xmm1,%xmm0 + pxor %xmm12,%xmm4 + movdqa %xmm12,64(%rsp) +.byte 69,15,56,203,229 + pshufd $0x0e,%xmm2,%xmm0 + pxor %xmm14,%xmm8 + movdqa %xmm14,96(%rsp) + movdqa 16-128(%rbp),%xmm1 + paddd %xmm5,%xmm1 +.byte 102,15,56,0,243 +.byte 69,15,56,203,247 + + movdqa %xmm1,%xmm0 + movdqa 16-128(%rbp),%xmm2 + paddd %xmm9,%xmm2 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + prefetcht0 127(%r8) +.byte 102,15,56,0,251 +.byte 102,68,15,56,0,211 + prefetcht0 127(%r9) +.byte 69,15,56,203,254 + pshufd $0x0e,%xmm1,%xmm0 +.byte 102,68,15,56,0,219 +.byte 15,56,204,229 +.byte 69,15,56,203,229 + pshufd $0x0e,%xmm2,%xmm0 + movdqa 32-128(%rbp),%xmm1 + paddd %xmm6,%xmm1 +.byte 69,15,56,203,247 + + movdqa %xmm1,%xmm0 + movdqa 32-128(%rbp),%xmm2 + paddd %xmm10,%xmm2 +.byte 69,15,56,203,236 +.byte 69,15,56,204,193 + movdqa %xmm2,%xmm0 + movdqa %xmm7,%xmm3 +.byte 69,15,56,203,254 + pshufd $0x0e,%xmm1,%xmm0 +.byte 102,15,58,15,222,4 + paddd %xmm3,%xmm4 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 +.byte 15,56,204,238 +.byte 69,15,56,203,229 + pshufd $0x0e,%xmm2,%xmm0 + movdqa 48-128(%rbp),%xmm1 + paddd %xmm7,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,202 + + movdqa %xmm1,%xmm0 + movdqa 48-128(%rbp),%xmm2 + paddd %xmm3,%xmm8 + paddd %xmm11,%xmm2 +.byte 15,56,205,231 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm4,%xmm3 +.byte 102,15,58,15,223,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,195 + pshufd $0x0e,%xmm1,%xmm0 + paddd %xmm3,%xmm5 + movdqa %xmm8,%xmm3 +.byte 102,65,15,58,15,219,4 +.byte 15,56,204,247 +.byte 69,15,56,203,229 + pshufd $0x0e,%xmm2,%xmm0 + movdqa 64-128(%rbp),%xmm1 + paddd %xmm4,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,211 + movdqa %xmm1,%xmm0 + movdqa 64-128(%rbp),%xmm2 + paddd %xmm3,%xmm9 + paddd %xmm8,%xmm2 +.byte 15,56,205,236 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm5,%xmm3 +.byte 102,15,58,15,220,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,200 + pshufd $0x0e,%xmm1,%xmm0 + paddd %xmm3,%xmm6 + movdqa %xmm9,%xmm3 +.byte 102,65,15,58,15,216,4 +.byte 15,56,204,252 +.byte 69,15,56,203,229 + pshufd $0x0e,%xmm2,%xmm0 + movdqa 80-128(%rbp),%xmm1 + paddd %xmm5,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,216 + movdqa %xmm1,%xmm0 + movdqa 80-128(%rbp),%xmm2 + paddd %xmm3,%xmm10 + paddd %xmm9,%xmm2 +.byte 15,56,205,245 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm6,%xmm3 +.byte 102,15,58,15,221,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,209 + pshufd $0x0e,%xmm1,%xmm0 + paddd %xmm3,%xmm7 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,217,4 +.byte 15,56,204,229 +.byte 69,15,56,203,229 + pshufd $0x0e,%xmm2,%xmm0 + movdqa 96-128(%rbp),%xmm1 + paddd %xmm6,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,193 + movdqa %xmm1,%xmm0 + movdqa 96-128(%rbp),%xmm2 + paddd %xmm3,%xmm11 + paddd %xmm10,%xmm2 +.byte 15,56,205,254 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm7,%xmm3 +.byte 102,15,58,15,222,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,218 + pshufd $0x0e,%xmm1,%xmm0 + paddd %xmm3,%xmm4 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 +.byte 15,56,204,238 +.byte 69,15,56,203,229 + pshufd $0x0e,%xmm2,%xmm0 + movdqa 112-128(%rbp),%xmm1 + paddd %xmm7,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,202 + movdqa %xmm1,%xmm0 + movdqa 112-128(%rbp),%xmm2 + paddd %xmm3,%xmm8 + paddd %xmm11,%xmm2 +.byte 15,56,205,231 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm4,%xmm3 +.byte 102,15,58,15,223,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,195 + pshufd $0x0e,%xmm1,%xmm0 + paddd %xmm3,%xmm5 + movdqa %xmm8,%xmm3 +.byte 102,65,15,58,15,219,4 +.byte 15,56,204,247 +.byte 69,15,56,203,229 + pshufd $0x0e,%xmm2,%xmm0 + movdqa 128-128(%rbp),%xmm1 + paddd %xmm4,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,211 + movdqa %xmm1,%xmm0 + movdqa 128-128(%rbp),%xmm2 + paddd %xmm3,%xmm9 + paddd %xmm8,%xmm2 +.byte 15,56,205,236 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm5,%xmm3 +.byte 102,15,58,15,220,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,200 + pshufd $0x0e,%xmm1,%xmm0 + paddd %xmm3,%xmm6 + movdqa %xmm9,%xmm3 +.byte 102,65,15,58,15,216,4 +.byte 15,56,204,252 +.byte 69,15,56,203,229 + pshufd $0x0e,%xmm2,%xmm0 + movdqa 144-128(%rbp),%xmm1 + paddd %xmm5,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,216 + movdqa %xmm1,%xmm0 + movdqa 144-128(%rbp),%xmm2 + paddd %xmm3,%xmm10 + paddd %xmm9,%xmm2 +.byte 15,56,205,245 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm6,%xmm3 +.byte 102,15,58,15,221,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,209 + pshufd $0x0e,%xmm1,%xmm0 + paddd %xmm3,%xmm7 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,217,4 +.byte 15,56,204,229 +.byte 69,15,56,203,229 + pshufd $0x0e,%xmm2,%xmm0 + movdqa 160-128(%rbp),%xmm1 + paddd %xmm6,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,193 + movdqa %xmm1,%xmm0 + movdqa 160-128(%rbp),%xmm2 + paddd %xmm3,%xmm11 + paddd %xmm10,%xmm2 +.byte 15,56,205,254 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm7,%xmm3 +.byte 102,15,58,15,222,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,218 + pshufd $0x0e,%xmm1,%xmm0 + paddd %xmm3,%xmm4 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 +.byte 15,56,204,238 +.byte 69,15,56,203,229 + pshufd $0x0e,%xmm2,%xmm0 + movdqa 176-128(%rbp),%xmm1 + paddd %xmm7,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,202 + movdqa %xmm1,%xmm0 + movdqa 176-128(%rbp),%xmm2 + paddd %xmm3,%xmm8 + paddd %xmm11,%xmm2 +.byte 15,56,205,231 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm4,%xmm3 +.byte 102,15,58,15,223,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,195 + pshufd $0x0e,%xmm1,%xmm0 + paddd %xmm3,%xmm5 + movdqa %xmm8,%xmm3 +.byte 102,65,15,58,15,219,4 +.byte 15,56,204,247 +.byte 69,15,56,203,229 + pshufd $0x0e,%xmm2,%xmm0 + movdqa 192-128(%rbp),%xmm1 + paddd %xmm4,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,211 + movdqa %xmm1,%xmm0 + movdqa 192-128(%rbp),%xmm2 + paddd %xmm3,%xmm9 + paddd %xmm8,%xmm2 +.byte 15,56,205,236 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm5,%xmm3 +.byte 102,15,58,15,220,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,200 + pshufd $0x0e,%xmm1,%xmm0 + paddd %xmm3,%xmm6 + movdqa %xmm9,%xmm3 +.byte 102,65,15,58,15,216,4 +.byte 15,56,204,252 +.byte 69,15,56,203,229 + pshufd $0x0e,%xmm2,%xmm0 + movdqa 208-128(%rbp),%xmm1 + paddd %xmm5,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,216 + movdqa %xmm1,%xmm0 + movdqa 208-128(%rbp),%xmm2 + paddd %xmm3,%xmm10 + paddd %xmm9,%xmm2 +.byte 15,56,205,245 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm6,%xmm3 +.byte 102,15,58,15,221,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,209 + pshufd $0x0e,%xmm1,%xmm0 + paddd %xmm3,%xmm7 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,217,4 + nop +.byte 69,15,56,203,229 + pshufd $0x0e,%xmm2,%xmm0 + movdqa 224-128(%rbp),%xmm1 + paddd %xmm6,%xmm1 +.byte 69,15,56,203,247 + + movdqa %xmm1,%xmm0 + movdqa 224-128(%rbp),%xmm2 + paddd %xmm3,%xmm11 + paddd %xmm10,%xmm2 +.byte 15,56,205,254 + nop +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movl $1,%ecx + pxor %xmm6,%xmm6 +.byte 69,15,56,203,254 +.byte 69,15,56,205,218 + pshufd $0x0e,%xmm1,%xmm0 + movdqa 240-128(%rbp),%xmm1 + paddd %xmm7,%xmm1 + movq (%rbx),%xmm7 + nop +.byte 69,15,56,203,229 + pshufd $0x0e,%xmm2,%xmm0 + movdqa 240-128(%rbp),%xmm2 + paddd %xmm11,%xmm2 +.byte 69,15,56,203,247 + + movdqa %xmm1,%xmm0 + cmpl 0(%rbx),%ecx + cmovgeq %rsp,%r8 + cmpl 4(%rbx),%ecx + cmovgeq %rsp,%r9 + pshufd $0x00,%xmm7,%xmm9 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + pshufd $0x55,%xmm7,%xmm10 + movdqa %xmm7,%xmm11 +.byte 69,15,56,203,254 + pshufd $0x0e,%xmm1,%xmm0 + pcmpgtd %xmm6,%xmm9 + pcmpgtd %xmm6,%xmm10 +.byte 69,15,56,203,229 + pshufd $0x0e,%xmm2,%xmm0 + pcmpgtd %xmm6,%xmm11 + movdqa K256_shaext-16(%rip),%xmm3 +.byte 69,15,56,203,247 + + pand %xmm9,%xmm13 + pand %xmm10,%xmm15 + pand %xmm9,%xmm12 + pand %xmm10,%xmm14 + paddd %xmm7,%xmm11 + + paddd 80(%rsp),%xmm13 + paddd 112(%rsp),%xmm15 + paddd 64(%rsp),%xmm12 + paddd 96(%rsp),%xmm14 + + movq %xmm11,(%rbx) + decl %edx + jnz .Loop_shaext + + movl 280(%rsp),%edx + + pshufd $27,%xmm12,%xmm12 + pshufd $27,%xmm13,%xmm13 + pshufd $27,%xmm14,%xmm14 + pshufd $27,%xmm15,%xmm15 + + movdqa %xmm12,%xmm5 + movdqa %xmm13,%xmm6 + punpckldq %xmm14,%xmm12 + punpckhdq %xmm14,%xmm5 + punpckldq %xmm15,%xmm13 + punpckhdq %xmm15,%xmm6 + + movq %xmm12,0-128(%rdi) + psrldq $8,%xmm12 + movq %xmm5,128-128(%rdi) + psrldq $8,%xmm5 + movq %xmm12,32-128(%rdi) + movq %xmm5,160-128(%rdi) + + movq %xmm13,64-128(%rdi) + psrldq $8,%xmm13 + movq %xmm6,192-128(%rdi) + psrldq $8,%xmm6 + movq %xmm13,96-128(%rdi) + movq %xmm6,224-128(%rdi) + + leaq 8(%rdi),%rdi + leaq 32(%rsi),%rsi + decl %edx + jnz .Loop_grande_shaext + +.Ldone_shaext: + + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_shaext: + .byte 0xf3,0xc3 +.cfi_endproc +.size sha256_multi_block_shaext,.-sha256_multi_block_shaext +.type sha256_multi_block_avx,@function +.align 32 +sha256_multi_block_avx: +.cfi_startproc +_avx_shortcut: + shrq $32,%rcx + cmpl $2,%edx + jb .Lavx + testl $32,%ecx + jnz _avx2_shortcut + jmp .Lavx +.align 32 +.Lavx: + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + subq $288,%rsp + andq $-256,%rsp + movq %rax,272(%rsp) +.cfi_escape 0x0f,0x06,0x77,0x90,0x02,0x06,0x23,0x08 +.Lbody_avx: + leaq K256+128(%rip),%rbp + leaq 256(%rsp),%rbx + leaq 128(%rdi),%rdi + +.Loop_grande_avx: + movl %edx,280(%rsp) + xorl %edx,%edx + + movq 0(%rsi),%r8 + + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r8 + + movq 16(%rsi),%r9 + + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r9 + + movq 32(%rsi),%r10 + + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r10 + + movq 48(%rsi),%r11 + + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r11 + testl %edx,%edx + jz .Ldone_avx + + vmovdqu 0-128(%rdi),%xmm8 + leaq 128(%rsp),%rax + vmovdqu 32-128(%rdi),%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + vmovdqu 96-128(%rdi),%xmm11 + vmovdqu 128-128(%rdi),%xmm12 + vmovdqu 160-128(%rdi),%xmm13 + vmovdqu 192-128(%rdi),%xmm14 + vmovdqu 224-128(%rdi),%xmm15 + vmovdqu .Lpbswap(%rip),%xmm6 + jmp .Loop_avx + +.align 32 +.Loop_avx: + vpxor %xmm9,%xmm10,%xmm4 + vmovd 0(%r8),%xmm5 + vmovd 0(%r9),%xmm0 + vpinsrd $1,0(%r10),%xmm5,%xmm5 + vpinsrd $1,0(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm12,%xmm7 + vpslld $26,%xmm12,%xmm2 + vmovdqu %xmm5,0-128(%rax) + vpaddd %xmm15,%xmm5,%xmm5 + + vpsrld $11,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm12,%xmm2 + vpaddd -128(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm12,%xmm2 + vpandn %xmm14,%xmm12,%xmm0 + vpand %xmm13,%xmm12,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm8,%xmm15 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm8,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm8,%xmm9,%xmm3 + + vpxor %xmm1,%xmm15,%xmm15 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm8,%xmm1 + + vpslld $19,%xmm8,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm15,%xmm7 + + vpsrld $22,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm8,%xmm2 + vpxor %xmm4,%xmm9,%xmm15 + vpaddd %xmm5,%xmm11,%xmm11 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm15,%xmm15 + vpaddd %xmm7,%xmm15,%xmm15 + vmovd 4(%r8),%xmm5 + vmovd 4(%r9),%xmm0 + vpinsrd $1,4(%r10),%xmm5,%xmm5 + vpinsrd $1,4(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm11,%xmm7 + vpslld $26,%xmm11,%xmm2 + vmovdqu %xmm5,16-128(%rax) + vpaddd %xmm14,%xmm5,%xmm5 + + vpsrld $11,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm11,%xmm2 + vpaddd -96(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm11,%xmm2 + vpandn %xmm13,%xmm11,%xmm0 + vpand %xmm12,%xmm11,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm15,%xmm14 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm15,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm15,%xmm8,%xmm4 + + vpxor %xmm1,%xmm14,%xmm14 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm15,%xmm1 + + vpslld $19,%xmm15,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm14,%xmm7 + + vpsrld $22,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm15,%xmm2 + vpxor %xmm3,%xmm8,%xmm14 + vpaddd %xmm5,%xmm10,%xmm10 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm14,%xmm14 + vpaddd %xmm7,%xmm14,%xmm14 + vmovd 8(%r8),%xmm5 + vmovd 8(%r9),%xmm0 + vpinsrd $1,8(%r10),%xmm5,%xmm5 + vpinsrd $1,8(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm10,%xmm7 + vpslld $26,%xmm10,%xmm2 + vmovdqu %xmm5,32-128(%rax) + vpaddd %xmm13,%xmm5,%xmm5 + + vpsrld $11,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm10,%xmm2 + vpaddd -64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm10,%xmm2 + vpandn %xmm12,%xmm10,%xmm0 + vpand %xmm11,%xmm10,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm14,%xmm13 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm14,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm14,%xmm15,%xmm3 + + vpxor %xmm1,%xmm13,%xmm13 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm14,%xmm1 + + vpslld $19,%xmm14,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm13,%xmm7 + + vpsrld $22,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm14,%xmm2 + vpxor %xmm4,%xmm15,%xmm13 + vpaddd %xmm5,%xmm9,%xmm9 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm13,%xmm13 + vpaddd %xmm7,%xmm13,%xmm13 + vmovd 12(%r8),%xmm5 + vmovd 12(%r9),%xmm0 + vpinsrd $1,12(%r10),%xmm5,%xmm5 + vpinsrd $1,12(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm9,%xmm7 + vpslld $26,%xmm9,%xmm2 + vmovdqu %xmm5,48-128(%rax) + vpaddd %xmm12,%xmm5,%xmm5 + + vpsrld $11,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm9,%xmm2 + vpaddd -32(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm9,%xmm2 + vpandn %xmm11,%xmm9,%xmm0 + vpand %xmm10,%xmm9,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm13,%xmm12 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm13,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm13,%xmm14,%xmm4 + + vpxor %xmm1,%xmm12,%xmm12 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm13,%xmm1 + + vpslld $19,%xmm13,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm12,%xmm7 + + vpsrld $22,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm13,%xmm2 + vpxor %xmm3,%xmm14,%xmm12 + vpaddd %xmm5,%xmm8,%xmm8 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm12,%xmm12 + vpaddd %xmm7,%xmm12,%xmm12 + vmovd 16(%r8),%xmm5 + vmovd 16(%r9),%xmm0 + vpinsrd $1,16(%r10),%xmm5,%xmm5 + vpinsrd $1,16(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm8,%xmm7 + vpslld $26,%xmm8,%xmm2 + vmovdqu %xmm5,64-128(%rax) + vpaddd %xmm11,%xmm5,%xmm5 + + vpsrld $11,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm8,%xmm2 + vpaddd 0(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm8,%xmm2 + vpandn %xmm10,%xmm8,%xmm0 + vpand %xmm9,%xmm8,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm12,%xmm11 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm12,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm12,%xmm13,%xmm3 + + vpxor %xmm1,%xmm11,%xmm11 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm12,%xmm1 + + vpslld $19,%xmm12,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm11,%xmm7 + + vpsrld $22,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm12,%xmm2 + vpxor %xmm4,%xmm13,%xmm11 + vpaddd %xmm5,%xmm15,%xmm15 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm11,%xmm11 + vpaddd %xmm7,%xmm11,%xmm11 + vmovd 20(%r8),%xmm5 + vmovd 20(%r9),%xmm0 + vpinsrd $1,20(%r10),%xmm5,%xmm5 + vpinsrd $1,20(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm15,%xmm7 + vpslld $26,%xmm15,%xmm2 + vmovdqu %xmm5,80-128(%rax) + vpaddd %xmm10,%xmm5,%xmm5 + + vpsrld $11,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm15,%xmm2 + vpaddd 32(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm15,%xmm2 + vpandn %xmm9,%xmm15,%xmm0 + vpand %xmm8,%xmm15,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm11,%xmm10 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm11,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm11,%xmm12,%xmm4 + + vpxor %xmm1,%xmm10,%xmm10 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm11,%xmm1 + + vpslld $19,%xmm11,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm10,%xmm7 + + vpsrld $22,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm11,%xmm2 + vpxor %xmm3,%xmm12,%xmm10 + vpaddd %xmm5,%xmm14,%xmm14 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm10,%xmm10 + vpaddd %xmm7,%xmm10,%xmm10 + vmovd 24(%r8),%xmm5 + vmovd 24(%r9),%xmm0 + vpinsrd $1,24(%r10),%xmm5,%xmm5 + vpinsrd $1,24(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm14,%xmm7 + vpslld $26,%xmm14,%xmm2 + vmovdqu %xmm5,96-128(%rax) + vpaddd %xmm9,%xmm5,%xmm5 + + vpsrld $11,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm14,%xmm2 + vpaddd 64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm14,%xmm2 + vpandn %xmm8,%xmm14,%xmm0 + vpand %xmm15,%xmm14,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm10,%xmm9 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm10,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm10,%xmm11,%xmm3 + + vpxor %xmm1,%xmm9,%xmm9 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm10,%xmm1 + + vpslld $19,%xmm10,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm9,%xmm7 + + vpsrld $22,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm10,%xmm2 + vpxor %xmm4,%xmm11,%xmm9 + vpaddd %xmm5,%xmm13,%xmm13 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm9,%xmm9 + vpaddd %xmm7,%xmm9,%xmm9 + vmovd 28(%r8),%xmm5 + vmovd 28(%r9),%xmm0 + vpinsrd $1,28(%r10),%xmm5,%xmm5 + vpinsrd $1,28(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm13,%xmm7 + vpslld $26,%xmm13,%xmm2 + vmovdqu %xmm5,112-128(%rax) + vpaddd %xmm8,%xmm5,%xmm5 + + vpsrld $11,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm13,%xmm2 + vpaddd 96(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm13,%xmm2 + vpandn %xmm15,%xmm13,%xmm0 + vpand %xmm14,%xmm13,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm9,%xmm8 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm9,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm9,%xmm10,%xmm4 + + vpxor %xmm1,%xmm8,%xmm8 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm9,%xmm1 + + vpslld $19,%xmm9,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm8,%xmm7 + + vpsrld $22,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm9,%xmm2 + vpxor %xmm3,%xmm10,%xmm8 + vpaddd %xmm5,%xmm12,%xmm12 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm8,%xmm8 + vpaddd %xmm7,%xmm8,%xmm8 + addq $256,%rbp + vmovd 32(%r8),%xmm5 + vmovd 32(%r9),%xmm0 + vpinsrd $1,32(%r10),%xmm5,%xmm5 + vpinsrd $1,32(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm12,%xmm7 + vpslld $26,%xmm12,%xmm2 + vmovdqu %xmm5,128-128(%rax) + vpaddd %xmm15,%xmm5,%xmm5 + + vpsrld $11,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm12,%xmm2 + vpaddd -128(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm12,%xmm2 + vpandn %xmm14,%xmm12,%xmm0 + vpand %xmm13,%xmm12,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm8,%xmm15 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm8,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm8,%xmm9,%xmm3 + + vpxor %xmm1,%xmm15,%xmm15 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm8,%xmm1 + + vpslld $19,%xmm8,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm15,%xmm7 + + vpsrld $22,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm8,%xmm2 + vpxor %xmm4,%xmm9,%xmm15 + vpaddd %xmm5,%xmm11,%xmm11 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm15,%xmm15 + vpaddd %xmm7,%xmm15,%xmm15 + vmovd 36(%r8),%xmm5 + vmovd 36(%r9),%xmm0 + vpinsrd $1,36(%r10),%xmm5,%xmm5 + vpinsrd $1,36(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm11,%xmm7 + vpslld $26,%xmm11,%xmm2 + vmovdqu %xmm5,144-128(%rax) + vpaddd %xmm14,%xmm5,%xmm5 + + vpsrld $11,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm11,%xmm2 + vpaddd -96(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm11,%xmm2 + vpandn %xmm13,%xmm11,%xmm0 + vpand %xmm12,%xmm11,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm15,%xmm14 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm15,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm15,%xmm8,%xmm4 + + vpxor %xmm1,%xmm14,%xmm14 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm15,%xmm1 + + vpslld $19,%xmm15,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm14,%xmm7 + + vpsrld $22,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm15,%xmm2 + vpxor %xmm3,%xmm8,%xmm14 + vpaddd %xmm5,%xmm10,%xmm10 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm14,%xmm14 + vpaddd %xmm7,%xmm14,%xmm14 + vmovd 40(%r8),%xmm5 + vmovd 40(%r9),%xmm0 + vpinsrd $1,40(%r10),%xmm5,%xmm5 + vpinsrd $1,40(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm10,%xmm7 + vpslld $26,%xmm10,%xmm2 + vmovdqu %xmm5,160-128(%rax) + vpaddd %xmm13,%xmm5,%xmm5 + + vpsrld $11,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm10,%xmm2 + vpaddd -64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm10,%xmm2 + vpandn %xmm12,%xmm10,%xmm0 + vpand %xmm11,%xmm10,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm14,%xmm13 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm14,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm14,%xmm15,%xmm3 + + vpxor %xmm1,%xmm13,%xmm13 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm14,%xmm1 + + vpslld $19,%xmm14,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm13,%xmm7 + + vpsrld $22,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm14,%xmm2 + vpxor %xmm4,%xmm15,%xmm13 + vpaddd %xmm5,%xmm9,%xmm9 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm13,%xmm13 + vpaddd %xmm7,%xmm13,%xmm13 + vmovd 44(%r8),%xmm5 + vmovd 44(%r9),%xmm0 + vpinsrd $1,44(%r10),%xmm5,%xmm5 + vpinsrd $1,44(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm9,%xmm7 + vpslld $26,%xmm9,%xmm2 + vmovdqu %xmm5,176-128(%rax) + vpaddd %xmm12,%xmm5,%xmm5 + + vpsrld $11,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm9,%xmm2 + vpaddd -32(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm9,%xmm2 + vpandn %xmm11,%xmm9,%xmm0 + vpand %xmm10,%xmm9,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm13,%xmm12 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm13,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm13,%xmm14,%xmm4 + + vpxor %xmm1,%xmm12,%xmm12 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm13,%xmm1 + + vpslld $19,%xmm13,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm12,%xmm7 + + vpsrld $22,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm13,%xmm2 + vpxor %xmm3,%xmm14,%xmm12 + vpaddd %xmm5,%xmm8,%xmm8 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm12,%xmm12 + vpaddd %xmm7,%xmm12,%xmm12 + vmovd 48(%r8),%xmm5 + vmovd 48(%r9),%xmm0 + vpinsrd $1,48(%r10),%xmm5,%xmm5 + vpinsrd $1,48(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm8,%xmm7 + vpslld $26,%xmm8,%xmm2 + vmovdqu %xmm5,192-128(%rax) + vpaddd %xmm11,%xmm5,%xmm5 + + vpsrld $11,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm8,%xmm2 + vpaddd 0(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm8,%xmm2 + vpandn %xmm10,%xmm8,%xmm0 + vpand %xmm9,%xmm8,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm12,%xmm11 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm12,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm12,%xmm13,%xmm3 + + vpxor %xmm1,%xmm11,%xmm11 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm12,%xmm1 + + vpslld $19,%xmm12,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm11,%xmm7 + + vpsrld $22,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm12,%xmm2 + vpxor %xmm4,%xmm13,%xmm11 + vpaddd %xmm5,%xmm15,%xmm15 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm11,%xmm11 + vpaddd %xmm7,%xmm11,%xmm11 + vmovd 52(%r8),%xmm5 + vmovd 52(%r9),%xmm0 + vpinsrd $1,52(%r10),%xmm5,%xmm5 + vpinsrd $1,52(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm15,%xmm7 + vpslld $26,%xmm15,%xmm2 + vmovdqu %xmm5,208-128(%rax) + vpaddd %xmm10,%xmm5,%xmm5 + + vpsrld $11,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm15,%xmm2 + vpaddd 32(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm15,%xmm2 + vpandn %xmm9,%xmm15,%xmm0 + vpand %xmm8,%xmm15,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm11,%xmm10 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm11,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm11,%xmm12,%xmm4 + + vpxor %xmm1,%xmm10,%xmm10 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm11,%xmm1 + + vpslld $19,%xmm11,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm10,%xmm7 + + vpsrld $22,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm11,%xmm2 + vpxor %xmm3,%xmm12,%xmm10 + vpaddd %xmm5,%xmm14,%xmm14 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm10,%xmm10 + vpaddd %xmm7,%xmm10,%xmm10 + vmovd 56(%r8),%xmm5 + vmovd 56(%r9),%xmm0 + vpinsrd $1,56(%r10),%xmm5,%xmm5 + vpinsrd $1,56(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm14,%xmm7 + vpslld $26,%xmm14,%xmm2 + vmovdqu %xmm5,224-128(%rax) + vpaddd %xmm9,%xmm5,%xmm5 + + vpsrld $11,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm14,%xmm2 + vpaddd 64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm14,%xmm2 + vpandn %xmm8,%xmm14,%xmm0 + vpand %xmm15,%xmm14,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm10,%xmm9 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm10,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm10,%xmm11,%xmm3 + + vpxor %xmm1,%xmm9,%xmm9 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm10,%xmm1 + + vpslld $19,%xmm10,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm9,%xmm7 + + vpsrld $22,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm10,%xmm2 + vpxor %xmm4,%xmm11,%xmm9 + vpaddd %xmm5,%xmm13,%xmm13 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm9,%xmm9 + vpaddd %xmm7,%xmm9,%xmm9 + vmovd 60(%r8),%xmm5 + leaq 64(%r8),%r8 + vmovd 60(%r9),%xmm0 + leaq 64(%r9),%r9 + vpinsrd $1,60(%r10),%xmm5,%xmm5 + leaq 64(%r10),%r10 + vpinsrd $1,60(%r11),%xmm0,%xmm0 + leaq 64(%r11),%r11 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm13,%xmm7 + vpslld $26,%xmm13,%xmm2 + vmovdqu %xmm5,240-128(%rax) + vpaddd %xmm8,%xmm5,%xmm5 + + vpsrld $11,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm13,%xmm2 + vpaddd 96(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + prefetcht0 63(%r8) + vpslld $7,%xmm13,%xmm2 + vpandn %xmm15,%xmm13,%xmm0 + vpand %xmm14,%xmm13,%xmm4 + prefetcht0 63(%r9) + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm9,%xmm8 + vpxor %xmm2,%xmm7,%xmm7 + prefetcht0 63(%r10) + vpslld $30,%xmm9,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm9,%xmm10,%xmm4 + prefetcht0 63(%r11) + vpxor %xmm1,%xmm8,%xmm8 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm9,%xmm1 + + vpslld $19,%xmm9,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm8,%xmm7 + + vpsrld $22,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm9,%xmm2 + vpxor %xmm3,%xmm10,%xmm8 + vpaddd %xmm5,%xmm12,%xmm12 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm8,%xmm8 + vpaddd %xmm7,%xmm8,%xmm8 + addq $256,%rbp + vmovdqu 0-128(%rax),%xmm5 + movl $3,%ecx + jmp .Loop_16_xx_avx +.align 32 +.Loop_16_xx_avx: + vmovdqu 16-128(%rax),%xmm6 + vpaddd 144-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 224-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm12,%xmm7 + vpslld $26,%xmm12,%xmm2 + vmovdqu %xmm5,0-128(%rax) + vpaddd %xmm15,%xmm5,%xmm5 + + vpsrld $11,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm12,%xmm2 + vpaddd -128(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm12,%xmm2 + vpandn %xmm14,%xmm12,%xmm0 + vpand %xmm13,%xmm12,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm8,%xmm15 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm8,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm8,%xmm9,%xmm3 + + vpxor %xmm1,%xmm15,%xmm15 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm8,%xmm1 + + vpslld $19,%xmm8,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm15,%xmm7 + + vpsrld $22,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm8,%xmm2 + vpxor %xmm4,%xmm9,%xmm15 + vpaddd %xmm5,%xmm11,%xmm11 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm15,%xmm15 + vpaddd %xmm7,%xmm15,%xmm15 + vmovdqu 32-128(%rax),%xmm5 + vpaddd 160-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 240-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm11,%xmm7 + vpslld $26,%xmm11,%xmm2 + vmovdqu %xmm6,16-128(%rax) + vpaddd %xmm14,%xmm6,%xmm6 + + vpsrld $11,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm11,%xmm2 + vpaddd -96(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm11,%xmm2 + vpandn %xmm13,%xmm11,%xmm0 + vpand %xmm12,%xmm11,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm15,%xmm14 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm15,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm15,%xmm8,%xmm4 + + vpxor %xmm1,%xmm14,%xmm14 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm15,%xmm1 + + vpslld $19,%xmm15,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm14,%xmm7 + + vpsrld $22,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm15,%xmm2 + vpxor %xmm3,%xmm8,%xmm14 + vpaddd %xmm6,%xmm10,%xmm10 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm14,%xmm14 + vpaddd %xmm7,%xmm14,%xmm14 + vmovdqu 48-128(%rax),%xmm6 + vpaddd 176-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 0-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm10,%xmm7 + vpslld $26,%xmm10,%xmm2 + vmovdqu %xmm5,32-128(%rax) + vpaddd %xmm13,%xmm5,%xmm5 + + vpsrld $11,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm10,%xmm2 + vpaddd -64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm10,%xmm2 + vpandn %xmm12,%xmm10,%xmm0 + vpand %xmm11,%xmm10,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm14,%xmm13 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm14,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm14,%xmm15,%xmm3 + + vpxor %xmm1,%xmm13,%xmm13 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm14,%xmm1 + + vpslld $19,%xmm14,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm13,%xmm7 + + vpsrld $22,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm14,%xmm2 + vpxor %xmm4,%xmm15,%xmm13 + vpaddd %xmm5,%xmm9,%xmm9 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm13,%xmm13 + vpaddd %xmm7,%xmm13,%xmm13 + vmovdqu 64-128(%rax),%xmm5 + vpaddd 192-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 16-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm9,%xmm7 + vpslld $26,%xmm9,%xmm2 + vmovdqu %xmm6,48-128(%rax) + vpaddd %xmm12,%xmm6,%xmm6 + + vpsrld $11,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm9,%xmm2 + vpaddd -32(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm9,%xmm2 + vpandn %xmm11,%xmm9,%xmm0 + vpand %xmm10,%xmm9,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm13,%xmm12 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm13,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm13,%xmm14,%xmm4 + + vpxor %xmm1,%xmm12,%xmm12 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm13,%xmm1 + + vpslld $19,%xmm13,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm12,%xmm7 + + vpsrld $22,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm13,%xmm2 + vpxor %xmm3,%xmm14,%xmm12 + vpaddd %xmm6,%xmm8,%xmm8 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm12,%xmm12 + vpaddd %xmm7,%xmm12,%xmm12 + vmovdqu 80-128(%rax),%xmm6 + vpaddd 208-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 32-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm8,%xmm7 + vpslld $26,%xmm8,%xmm2 + vmovdqu %xmm5,64-128(%rax) + vpaddd %xmm11,%xmm5,%xmm5 + + vpsrld $11,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm8,%xmm2 + vpaddd 0(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm8,%xmm2 + vpandn %xmm10,%xmm8,%xmm0 + vpand %xmm9,%xmm8,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm12,%xmm11 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm12,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm12,%xmm13,%xmm3 + + vpxor %xmm1,%xmm11,%xmm11 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm12,%xmm1 + + vpslld $19,%xmm12,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm11,%xmm7 + + vpsrld $22,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm12,%xmm2 + vpxor %xmm4,%xmm13,%xmm11 + vpaddd %xmm5,%xmm15,%xmm15 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm11,%xmm11 + vpaddd %xmm7,%xmm11,%xmm11 + vmovdqu 96-128(%rax),%xmm5 + vpaddd 224-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 48-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm15,%xmm7 + vpslld $26,%xmm15,%xmm2 + vmovdqu %xmm6,80-128(%rax) + vpaddd %xmm10,%xmm6,%xmm6 + + vpsrld $11,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm15,%xmm2 + vpaddd 32(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm15,%xmm2 + vpandn %xmm9,%xmm15,%xmm0 + vpand %xmm8,%xmm15,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm11,%xmm10 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm11,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm11,%xmm12,%xmm4 + + vpxor %xmm1,%xmm10,%xmm10 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm11,%xmm1 + + vpslld $19,%xmm11,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm10,%xmm7 + + vpsrld $22,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm11,%xmm2 + vpxor %xmm3,%xmm12,%xmm10 + vpaddd %xmm6,%xmm14,%xmm14 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm10,%xmm10 + vpaddd %xmm7,%xmm10,%xmm10 + vmovdqu 112-128(%rax),%xmm6 + vpaddd 240-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 64-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm14,%xmm7 + vpslld $26,%xmm14,%xmm2 + vmovdqu %xmm5,96-128(%rax) + vpaddd %xmm9,%xmm5,%xmm5 + + vpsrld $11,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm14,%xmm2 + vpaddd 64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm14,%xmm2 + vpandn %xmm8,%xmm14,%xmm0 + vpand %xmm15,%xmm14,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm10,%xmm9 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm10,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm10,%xmm11,%xmm3 + + vpxor %xmm1,%xmm9,%xmm9 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm10,%xmm1 + + vpslld $19,%xmm10,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm9,%xmm7 + + vpsrld $22,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm10,%xmm2 + vpxor %xmm4,%xmm11,%xmm9 + vpaddd %xmm5,%xmm13,%xmm13 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm9,%xmm9 + vpaddd %xmm7,%xmm9,%xmm9 + vmovdqu 128-128(%rax),%xmm5 + vpaddd 0-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 80-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm13,%xmm7 + vpslld $26,%xmm13,%xmm2 + vmovdqu %xmm6,112-128(%rax) + vpaddd %xmm8,%xmm6,%xmm6 + + vpsrld $11,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm13,%xmm2 + vpaddd 96(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm13,%xmm2 + vpandn %xmm15,%xmm13,%xmm0 + vpand %xmm14,%xmm13,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm9,%xmm8 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm9,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm9,%xmm10,%xmm4 + + vpxor %xmm1,%xmm8,%xmm8 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm9,%xmm1 + + vpslld $19,%xmm9,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm8,%xmm7 + + vpsrld $22,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm9,%xmm2 + vpxor %xmm3,%xmm10,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm8,%xmm8 + vpaddd %xmm7,%xmm8,%xmm8 + addq $256,%rbp + vmovdqu 144-128(%rax),%xmm6 + vpaddd 16-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 96-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm12,%xmm7 + vpslld $26,%xmm12,%xmm2 + vmovdqu %xmm5,128-128(%rax) + vpaddd %xmm15,%xmm5,%xmm5 + + vpsrld $11,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm12,%xmm2 + vpaddd -128(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm12,%xmm2 + vpandn %xmm14,%xmm12,%xmm0 + vpand %xmm13,%xmm12,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm8,%xmm15 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm8,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm8,%xmm9,%xmm3 + + vpxor %xmm1,%xmm15,%xmm15 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm8,%xmm1 + + vpslld $19,%xmm8,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm15,%xmm7 + + vpsrld $22,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm8,%xmm2 + vpxor %xmm4,%xmm9,%xmm15 + vpaddd %xmm5,%xmm11,%xmm11 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm15,%xmm15 + vpaddd %xmm7,%xmm15,%xmm15 + vmovdqu 160-128(%rax),%xmm5 + vpaddd 32-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 112-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm11,%xmm7 + vpslld $26,%xmm11,%xmm2 + vmovdqu %xmm6,144-128(%rax) + vpaddd %xmm14,%xmm6,%xmm6 + + vpsrld $11,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm11,%xmm2 + vpaddd -96(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm11,%xmm2 + vpandn %xmm13,%xmm11,%xmm0 + vpand %xmm12,%xmm11,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm15,%xmm14 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm15,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm15,%xmm8,%xmm4 + + vpxor %xmm1,%xmm14,%xmm14 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm15,%xmm1 + + vpslld $19,%xmm15,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm14,%xmm7 + + vpsrld $22,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm15,%xmm2 + vpxor %xmm3,%xmm8,%xmm14 + vpaddd %xmm6,%xmm10,%xmm10 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm14,%xmm14 + vpaddd %xmm7,%xmm14,%xmm14 + vmovdqu 176-128(%rax),%xmm6 + vpaddd 48-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 128-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm10,%xmm7 + vpslld $26,%xmm10,%xmm2 + vmovdqu %xmm5,160-128(%rax) + vpaddd %xmm13,%xmm5,%xmm5 + + vpsrld $11,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm10,%xmm2 + vpaddd -64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm10,%xmm2 + vpandn %xmm12,%xmm10,%xmm0 + vpand %xmm11,%xmm10,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm14,%xmm13 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm14,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm14,%xmm15,%xmm3 + + vpxor %xmm1,%xmm13,%xmm13 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm14,%xmm1 + + vpslld $19,%xmm14,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm13,%xmm7 + + vpsrld $22,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm14,%xmm2 + vpxor %xmm4,%xmm15,%xmm13 + vpaddd %xmm5,%xmm9,%xmm9 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm13,%xmm13 + vpaddd %xmm7,%xmm13,%xmm13 + vmovdqu 192-128(%rax),%xmm5 + vpaddd 64-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 144-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm9,%xmm7 + vpslld $26,%xmm9,%xmm2 + vmovdqu %xmm6,176-128(%rax) + vpaddd %xmm12,%xmm6,%xmm6 + + vpsrld $11,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm9,%xmm2 + vpaddd -32(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm9,%xmm2 + vpandn %xmm11,%xmm9,%xmm0 + vpand %xmm10,%xmm9,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm13,%xmm12 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm13,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm13,%xmm14,%xmm4 + + vpxor %xmm1,%xmm12,%xmm12 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm13,%xmm1 + + vpslld $19,%xmm13,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm12,%xmm7 + + vpsrld $22,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm13,%xmm2 + vpxor %xmm3,%xmm14,%xmm12 + vpaddd %xmm6,%xmm8,%xmm8 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm12,%xmm12 + vpaddd %xmm7,%xmm12,%xmm12 + vmovdqu 208-128(%rax),%xmm6 + vpaddd 80-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 160-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm8,%xmm7 + vpslld $26,%xmm8,%xmm2 + vmovdqu %xmm5,192-128(%rax) + vpaddd %xmm11,%xmm5,%xmm5 + + vpsrld $11,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm8,%xmm2 + vpaddd 0(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm8,%xmm2 + vpandn %xmm10,%xmm8,%xmm0 + vpand %xmm9,%xmm8,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm12,%xmm11 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm12,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm12,%xmm13,%xmm3 + + vpxor %xmm1,%xmm11,%xmm11 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm12,%xmm1 + + vpslld $19,%xmm12,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm11,%xmm7 + + vpsrld $22,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm12,%xmm2 + vpxor %xmm4,%xmm13,%xmm11 + vpaddd %xmm5,%xmm15,%xmm15 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm11,%xmm11 + vpaddd %xmm7,%xmm11,%xmm11 + vmovdqu 224-128(%rax),%xmm5 + vpaddd 96-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 176-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm15,%xmm7 + vpslld $26,%xmm15,%xmm2 + vmovdqu %xmm6,208-128(%rax) + vpaddd %xmm10,%xmm6,%xmm6 + + vpsrld $11,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm15,%xmm2 + vpaddd 32(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm15,%xmm2 + vpandn %xmm9,%xmm15,%xmm0 + vpand %xmm8,%xmm15,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm11,%xmm10 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm11,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm11,%xmm12,%xmm4 + + vpxor %xmm1,%xmm10,%xmm10 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm11,%xmm1 + + vpslld $19,%xmm11,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm10,%xmm7 + + vpsrld $22,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm11,%xmm2 + vpxor %xmm3,%xmm12,%xmm10 + vpaddd %xmm6,%xmm14,%xmm14 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm10,%xmm10 + vpaddd %xmm7,%xmm10,%xmm10 + vmovdqu 240-128(%rax),%xmm6 + vpaddd 112-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 192-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm14,%xmm7 + vpslld $26,%xmm14,%xmm2 + vmovdqu %xmm5,224-128(%rax) + vpaddd %xmm9,%xmm5,%xmm5 + + vpsrld $11,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm14,%xmm2 + vpaddd 64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm14,%xmm2 + vpandn %xmm8,%xmm14,%xmm0 + vpand %xmm15,%xmm14,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm10,%xmm9 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm10,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm10,%xmm11,%xmm3 + + vpxor %xmm1,%xmm9,%xmm9 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm10,%xmm1 + + vpslld $19,%xmm10,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm9,%xmm7 + + vpsrld $22,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm10,%xmm2 + vpxor %xmm4,%xmm11,%xmm9 + vpaddd %xmm5,%xmm13,%xmm13 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm9,%xmm9 + vpaddd %xmm7,%xmm9,%xmm9 + vmovdqu 0-128(%rax),%xmm5 + vpaddd 128-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 208-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm13,%xmm7 + vpslld $26,%xmm13,%xmm2 + vmovdqu %xmm6,240-128(%rax) + vpaddd %xmm8,%xmm6,%xmm6 + + vpsrld $11,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm13,%xmm2 + vpaddd 96(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm13,%xmm2 + vpandn %xmm15,%xmm13,%xmm0 + vpand %xmm14,%xmm13,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm9,%xmm8 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm9,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm9,%xmm10,%xmm4 + + vpxor %xmm1,%xmm8,%xmm8 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm9,%xmm1 + + vpslld $19,%xmm9,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm8,%xmm7 + + vpsrld $22,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm9,%xmm2 + vpxor %xmm3,%xmm10,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm8,%xmm8 + vpaddd %xmm7,%xmm8,%xmm8 + addq $256,%rbp + decl %ecx + jnz .Loop_16_xx_avx + + movl $1,%ecx + leaq K256+128(%rip),%rbp + cmpl 0(%rbx),%ecx + cmovgeq %rbp,%r8 + cmpl 4(%rbx),%ecx + cmovgeq %rbp,%r9 + cmpl 8(%rbx),%ecx + cmovgeq %rbp,%r10 + cmpl 12(%rbx),%ecx + cmovgeq %rbp,%r11 + vmovdqa (%rbx),%xmm7 + vpxor %xmm0,%xmm0,%xmm0 + vmovdqa %xmm7,%xmm6 + vpcmpgtd %xmm0,%xmm6,%xmm6 + vpaddd %xmm6,%xmm7,%xmm7 + + vmovdqu 0-128(%rdi),%xmm0 + vpand %xmm6,%xmm8,%xmm8 + vmovdqu 32-128(%rdi),%xmm1 + vpand %xmm6,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm2 + vpand %xmm6,%xmm10,%xmm10 + vmovdqu 96-128(%rdi),%xmm5 + vpand %xmm6,%xmm11,%xmm11 + vpaddd %xmm0,%xmm8,%xmm8 + vmovdqu 128-128(%rdi),%xmm0 + vpand %xmm6,%xmm12,%xmm12 + vpaddd %xmm1,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm1 + vpand %xmm6,%xmm13,%xmm13 + vpaddd %xmm2,%xmm10,%xmm10 + vmovdqu 192-128(%rdi),%xmm2 + vpand %xmm6,%xmm14,%xmm14 + vpaddd %xmm5,%xmm11,%xmm11 + vmovdqu 224-128(%rdi),%xmm5 + vpand %xmm6,%xmm15,%xmm15 + vpaddd %xmm0,%xmm12,%xmm12 + vpaddd %xmm1,%xmm13,%xmm13 + vmovdqu %xmm8,0-128(%rdi) + vpaddd %xmm2,%xmm14,%xmm14 + vmovdqu %xmm9,32-128(%rdi) + vpaddd %xmm5,%xmm15,%xmm15 + vmovdqu %xmm10,64-128(%rdi) + vmovdqu %xmm11,96-128(%rdi) + vmovdqu %xmm12,128-128(%rdi) + vmovdqu %xmm13,160-128(%rdi) + vmovdqu %xmm14,192-128(%rdi) + vmovdqu %xmm15,224-128(%rdi) + + vmovdqu %xmm7,(%rbx) + vmovdqu .Lpbswap(%rip),%xmm6 + decl %edx + jnz .Loop_avx + + movl 280(%rsp),%edx + leaq 16(%rdi),%rdi + leaq 64(%rsi),%rsi + decl %edx + jnz .Loop_grande_avx + +.Ldone_avx: + movq 272(%rsp),%rax +.cfi_def_cfa %rax,8 + vzeroupper + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_avx: + .byte 0xf3,0xc3 +.cfi_endproc +.size sha256_multi_block_avx,.-sha256_multi_block_avx +.type sha256_multi_block_avx2,@function +.align 32 +sha256_multi_block_avx2: +.cfi_startproc +_avx2_shortcut: + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + subq $576,%rsp + andq $-256,%rsp + movq %rax,544(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xa0,0x04,0x06,0x23,0x08 +.Lbody_avx2: + leaq K256+128(%rip),%rbp + leaq 128(%rdi),%rdi + +.Loop_grande_avx2: + movl %edx,552(%rsp) + xorl %edx,%edx + leaq 512(%rsp),%rbx + + movq 0(%rsi),%r12 + + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r12 + + movq 16(%rsi),%r13 + + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r13 + + movq 32(%rsi),%r14 + + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r14 + + movq 48(%rsi),%r15 + + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r15 + + movq 64(%rsi),%r8 + + movl 72(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,16(%rbx) + cmovleq %rbp,%r8 + + movq 80(%rsi),%r9 + + movl 88(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,20(%rbx) + cmovleq %rbp,%r9 + + movq 96(%rsi),%r10 + + movl 104(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,24(%rbx) + cmovleq %rbp,%r10 + + movq 112(%rsi),%r11 + + movl 120(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,28(%rbx) + cmovleq %rbp,%r11 + vmovdqu 0-128(%rdi),%ymm8 + leaq 128(%rsp),%rax + vmovdqu 32-128(%rdi),%ymm9 + leaq 256+128(%rsp),%rbx + vmovdqu 64-128(%rdi),%ymm10 + vmovdqu 96-128(%rdi),%ymm11 + vmovdqu 128-128(%rdi),%ymm12 + vmovdqu 160-128(%rdi),%ymm13 + vmovdqu 192-128(%rdi),%ymm14 + vmovdqu 224-128(%rdi),%ymm15 + vmovdqu .Lpbswap(%rip),%ymm6 + jmp .Loop_avx2 + +.align 32 +.Loop_avx2: + vpxor %ymm9,%ymm10,%ymm4 + vmovd 0(%r12),%xmm5 + vmovd 0(%r8),%xmm0 + vmovd 0(%r13),%xmm1 + vmovd 0(%r9),%xmm2 + vpinsrd $1,0(%r14),%xmm5,%xmm5 + vpinsrd $1,0(%r10),%xmm0,%xmm0 + vpinsrd $1,0(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,0(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm12,%ymm7 + vpslld $26,%ymm12,%ymm2 + vmovdqu %ymm5,0-128(%rax) + vpaddd %ymm15,%ymm5,%ymm5 + + vpsrld $11,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm12,%ymm2 + vpaddd -128(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm12,%ymm2 + vpandn %ymm14,%ymm12,%ymm0 + vpand %ymm13,%ymm12,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm8,%ymm15 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm8,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm8,%ymm9,%ymm3 + + vpxor %ymm1,%ymm15,%ymm15 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm8,%ymm1 + + vpslld $19,%ymm8,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm15,%ymm7 + + vpsrld $22,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm8,%ymm2 + vpxor %ymm4,%ymm9,%ymm15 + vpaddd %ymm5,%ymm11,%ymm11 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm15,%ymm15 + vpaddd %ymm7,%ymm15,%ymm15 + vmovd 4(%r12),%xmm5 + vmovd 4(%r8),%xmm0 + vmovd 4(%r13),%xmm1 + vmovd 4(%r9),%xmm2 + vpinsrd $1,4(%r14),%xmm5,%xmm5 + vpinsrd $1,4(%r10),%xmm0,%xmm0 + vpinsrd $1,4(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,4(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm11,%ymm7 + vpslld $26,%ymm11,%ymm2 + vmovdqu %ymm5,32-128(%rax) + vpaddd %ymm14,%ymm5,%ymm5 + + vpsrld $11,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm11,%ymm2 + vpaddd -96(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm11,%ymm2 + vpandn %ymm13,%ymm11,%ymm0 + vpand %ymm12,%ymm11,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm15,%ymm14 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm15,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm15,%ymm8,%ymm4 + + vpxor %ymm1,%ymm14,%ymm14 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm15,%ymm1 + + vpslld $19,%ymm15,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm14,%ymm7 + + vpsrld $22,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm15,%ymm2 + vpxor %ymm3,%ymm8,%ymm14 + vpaddd %ymm5,%ymm10,%ymm10 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm14,%ymm14 + vpaddd %ymm7,%ymm14,%ymm14 + vmovd 8(%r12),%xmm5 + vmovd 8(%r8),%xmm0 + vmovd 8(%r13),%xmm1 + vmovd 8(%r9),%xmm2 + vpinsrd $1,8(%r14),%xmm5,%xmm5 + vpinsrd $1,8(%r10),%xmm0,%xmm0 + vpinsrd $1,8(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,8(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm10,%ymm7 + vpslld $26,%ymm10,%ymm2 + vmovdqu %ymm5,64-128(%rax) + vpaddd %ymm13,%ymm5,%ymm5 + + vpsrld $11,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm10,%ymm2 + vpaddd -64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm10,%ymm2 + vpandn %ymm12,%ymm10,%ymm0 + vpand %ymm11,%ymm10,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm14,%ymm13 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm14,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm14,%ymm15,%ymm3 + + vpxor %ymm1,%ymm13,%ymm13 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm14,%ymm1 + + vpslld $19,%ymm14,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm13,%ymm7 + + vpsrld $22,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm14,%ymm2 + vpxor %ymm4,%ymm15,%ymm13 + vpaddd %ymm5,%ymm9,%ymm9 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm13,%ymm13 + vpaddd %ymm7,%ymm13,%ymm13 + vmovd 12(%r12),%xmm5 + vmovd 12(%r8),%xmm0 + vmovd 12(%r13),%xmm1 + vmovd 12(%r9),%xmm2 + vpinsrd $1,12(%r14),%xmm5,%xmm5 + vpinsrd $1,12(%r10),%xmm0,%xmm0 + vpinsrd $1,12(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,12(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm9,%ymm7 + vpslld $26,%ymm9,%ymm2 + vmovdqu %ymm5,96-128(%rax) + vpaddd %ymm12,%ymm5,%ymm5 + + vpsrld $11,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm9,%ymm2 + vpaddd -32(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm9,%ymm2 + vpandn %ymm11,%ymm9,%ymm0 + vpand %ymm10,%ymm9,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm13,%ymm12 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm13,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm13,%ymm14,%ymm4 + + vpxor %ymm1,%ymm12,%ymm12 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm13,%ymm1 + + vpslld $19,%ymm13,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm12,%ymm7 + + vpsrld $22,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm13,%ymm2 + vpxor %ymm3,%ymm14,%ymm12 + vpaddd %ymm5,%ymm8,%ymm8 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm12,%ymm12 + vpaddd %ymm7,%ymm12,%ymm12 + vmovd 16(%r12),%xmm5 + vmovd 16(%r8),%xmm0 + vmovd 16(%r13),%xmm1 + vmovd 16(%r9),%xmm2 + vpinsrd $1,16(%r14),%xmm5,%xmm5 + vpinsrd $1,16(%r10),%xmm0,%xmm0 + vpinsrd $1,16(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,16(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm8,%ymm7 + vpslld $26,%ymm8,%ymm2 + vmovdqu %ymm5,128-128(%rax) + vpaddd %ymm11,%ymm5,%ymm5 + + vpsrld $11,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm8,%ymm2 + vpaddd 0(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm8,%ymm2 + vpandn %ymm10,%ymm8,%ymm0 + vpand %ymm9,%ymm8,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm12,%ymm11 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm12,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm12,%ymm13,%ymm3 + + vpxor %ymm1,%ymm11,%ymm11 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm12,%ymm1 + + vpslld $19,%ymm12,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm11,%ymm7 + + vpsrld $22,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm12,%ymm2 + vpxor %ymm4,%ymm13,%ymm11 + vpaddd %ymm5,%ymm15,%ymm15 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm11,%ymm11 + vpaddd %ymm7,%ymm11,%ymm11 + vmovd 20(%r12),%xmm5 + vmovd 20(%r8),%xmm0 + vmovd 20(%r13),%xmm1 + vmovd 20(%r9),%xmm2 + vpinsrd $1,20(%r14),%xmm5,%xmm5 + vpinsrd $1,20(%r10),%xmm0,%xmm0 + vpinsrd $1,20(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,20(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm15,%ymm7 + vpslld $26,%ymm15,%ymm2 + vmovdqu %ymm5,160-128(%rax) + vpaddd %ymm10,%ymm5,%ymm5 + + vpsrld $11,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm15,%ymm2 + vpaddd 32(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm15,%ymm2 + vpandn %ymm9,%ymm15,%ymm0 + vpand %ymm8,%ymm15,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm11,%ymm10 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm11,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm11,%ymm12,%ymm4 + + vpxor %ymm1,%ymm10,%ymm10 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm11,%ymm1 + + vpslld $19,%ymm11,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm10,%ymm7 + + vpsrld $22,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm11,%ymm2 + vpxor %ymm3,%ymm12,%ymm10 + vpaddd %ymm5,%ymm14,%ymm14 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm10,%ymm10 + vpaddd %ymm7,%ymm10,%ymm10 + vmovd 24(%r12),%xmm5 + vmovd 24(%r8),%xmm0 + vmovd 24(%r13),%xmm1 + vmovd 24(%r9),%xmm2 + vpinsrd $1,24(%r14),%xmm5,%xmm5 + vpinsrd $1,24(%r10),%xmm0,%xmm0 + vpinsrd $1,24(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,24(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm14,%ymm7 + vpslld $26,%ymm14,%ymm2 + vmovdqu %ymm5,192-128(%rax) + vpaddd %ymm9,%ymm5,%ymm5 + + vpsrld $11,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm14,%ymm2 + vpaddd 64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm14,%ymm2 + vpandn %ymm8,%ymm14,%ymm0 + vpand %ymm15,%ymm14,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm10,%ymm9 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm10,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm10,%ymm11,%ymm3 + + vpxor %ymm1,%ymm9,%ymm9 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm10,%ymm1 + + vpslld $19,%ymm10,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm9,%ymm7 + + vpsrld $22,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm10,%ymm2 + vpxor %ymm4,%ymm11,%ymm9 + vpaddd %ymm5,%ymm13,%ymm13 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm9,%ymm9 + vpaddd %ymm7,%ymm9,%ymm9 + vmovd 28(%r12),%xmm5 + vmovd 28(%r8),%xmm0 + vmovd 28(%r13),%xmm1 + vmovd 28(%r9),%xmm2 + vpinsrd $1,28(%r14),%xmm5,%xmm5 + vpinsrd $1,28(%r10),%xmm0,%xmm0 + vpinsrd $1,28(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,28(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm13,%ymm7 + vpslld $26,%ymm13,%ymm2 + vmovdqu %ymm5,224-128(%rax) + vpaddd %ymm8,%ymm5,%ymm5 + + vpsrld $11,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm13,%ymm2 + vpaddd 96(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm13,%ymm2 + vpandn %ymm15,%ymm13,%ymm0 + vpand %ymm14,%ymm13,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm9,%ymm8 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm9,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm9,%ymm10,%ymm4 + + vpxor %ymm1,%ymm8,%ymm8 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm9,%ymm1 + + vpslld $19,%ymm9,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm8,%ymm7 + + vpsrld $22,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm9,%ymm2 + vpxor %ymm3,%ymm10,%ymm8 + vpaddd %ymm5,%ymm12,%ymm12 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm8,%ymm8 + vpaddd %ymm7,%ymm8,%ymm8 + addq $256,%rbp + vmovd 32(%r12),%xmm5 + vmovd 32(%r8),%xmm0 + vmovd 32(%r13),%xmm1 + vmovd 32(%r9),%xmm2 + vpinsrd $1,32(%r14),%xmm5,%xmm5 + vpinsrd $1,32(%r10),%xmm0,%xmm0 + vpinsrd $1,32(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,32(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm12,%ymm7 + vpslld $26,%ymm12,%ymm2 + vmovdqu %ymm5,256-256-128(%rbx) + vpaddd %ymm15,%ymm5,%ymm5 + + vpsrld $11,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm12,%ymm2 + vpaddd -128(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm12,%ymm2 + vpandn %ymm14,%ymm12,%ymm0 + vpand %ymm13,%ymm12,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm8,%ymm15 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm8,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm8,%ymm9,%ymm3 + + vpxor %ymm1,%ymm15,%ymm15 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm8,%ymm1 + + vpslld $19,%ymm8,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm15,%ymm7 + + vpsrld $22,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm8,%ymm2 + vpxor %ymm4,%ymm9,%ymm15 + vpaddd %ymm5,%ymm11,%ymm11 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm15,%ymm15 + vpaddd %ymm7,%ymm15,%ymm15 + vmovd 36(%r12),%xmm5 + vmovd 36(%r8),%xmm0 + vmovd 36(%r13),%xmm1 + vmovd 36(%r9),%xmm2 + vpinsrd $1,36(%r14),%xmm5,%xmm5 + vpinsrd $1,36(%r10),%xmm0,%xmm0 + vpinsrd $1,36(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,36(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm11,%ymm7 + vpslld $26,%ymm11,%ymm2 + vmovdqu %ymm5,288-256-128(%rbx) + vpaddd %ymm14,%ymm5,%ymm5 + + vpsrld $11,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm11,%ymm2 + vpaddd -96(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm11,%ymm2 + vpandn %ymm13,%ymm11,%ymm0 + vpand %ymm12,%ymm11,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm15,%ymm14 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm15,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm15,%ymm8,%ymm4 + + vpxor %ymm1,%ymm14,%ymm14 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm15,%ymm1 + + vpslld $19,%ymm15,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm14,%ymm7 + + vpsrld $22,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm15,%ymm2 + vpxor %ymm3,%ymm8,%ymm14 + vpaddd %ymm5,%ymm10,%ymm10 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm14,%ymm14 + vpaddd %ymm7,%ymm14,%ymm14 + vmovd 40(%r12),%xmm5 + vmovd 40(%r8),%xmm0 + vmovd 40(%r13),%xmm1 + vmovd 40(%r9),%xmm2 + vpinsrd $1,40(%r14),%xmm5,%xmm5 + vpinsrd $1,40(%r10),%xmm0,%xmm0 + vpinsrd $1,40(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,40(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm10,%ymm7 + vpslld $26,%ymm10,%ymm2 + vmovdqu %ymm5,320-256-128(%rbx) + vpaddd %ymm13,%ymm5,%ymm5 + + vpsrld $11,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm10,%ymm2 + vpaddd -64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm10,%ymm2 + vpandn %ymm12,%ymm10,%ymm0 + vpand %ymm11,%ymm10,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm14,%ymm13 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm14,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm14,%ymm15,%ymm3 + + vpxor %ymm1,%ymm13,%ymm13 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm14,%ymm1 + + vpslld $19,%ymm14,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm13,%ymm7 + + vpsrld $22,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm14,%ymm2 + vpxor %ymm4,%ymm15,%ymm13 + vpaddd %ymm5,%ymm9,%ymm9 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm13,%ymm13 + vpaddd %ymm7,%ymm13,%ymm13 + vmovd 44(%r12),%xmm5 + vmovd 44(%r8),%xmm0 + vmovd 44(%r13),%xmm1 + vmovd 44(%r9),%xmm2 + vpinsrd $1,44(%r14),%xmm5,%xmm5 + vpinsrd $1,44(%r10),%xmm0,%xmm0 + vpinsrd $1,44(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,44(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm9,%ymm7 + vpslld $26,%ymm9,%ymm2 + vmovdqu %ymm5,352-256-128(%rbx) + vpaddd %ymm12,%ymm5,%ymm5 + + vpsrld $11,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm9,%ymm2 + vpaddd -32(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm9,%ymm2 + vpandn %ymm11,%ymm9,%ymm0 + vpand %ymm10,%ymm9,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm13,%ymm12 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm13,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm13,%ymm14,%ymm4 + + vpxor %ymm1,%ymm12,%ymm12 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm13,%ymm1 + + vpslld $19,%ymm13,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm12,%ymm7 + + vpsrld $22,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm13,%ymm2 + vpxor %ymm3,%ymm14,%ymm12 + vpaddd %ymm5,%ymm8,%ymm8 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm12,%ymm12 + vpaddd %ymm7,%ymm12,%ymm12 + vmovd 48(%r12),%xmm5 + vmovd 48(%r8),%xmm0 + vmovd 48(%r13),%xmm1 + vmovd 48(%r9),%xmm2 + vpinsrd $1,48(%r14),%xmm5,%xmm5 + vpinsrd $1,48(%r10),%xmm0,%xmm0 + vpinsrd $1,48(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,48(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm8,%ymm7 + vpslld $26,%ymm8,%ymm2 + vmovdqu %ymm5,384-256-128(%rbx) + vpaddd %ymm11,%ymm5,%ymm5 + + vpsrld $11,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm8,%ymm2 + vpaddd 0(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm8,%ymm2 + vpandn %ymm10,%ymm8,%ymm0 + vpand %ymm9,%ymm8,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm12,%ymm11 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm12,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm12,%ymm13,%ymm3 + + vpxor %ymm1,%ymm11,%ymm11 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm12,%ymm1 + + vpslld $19,%ymm12,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm11,%ymm7 + + vpsrld $22,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm12,%ymm2 + vpxor %ymm4,%ymm13,%ymm11 + vpaddd %ymm5,%ymm15,%ymm15 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm11,%ymm11 + vpaddd %ymm7,%ymm11,%ymm11 + vmovd 52(%r12),%xmm5 + vmovd 52(%r8),%xmm0 + vmovd 52(%r13),%xmm1 + vmovd 52(%r9),%xmm2 + vpinsrd $1,52(%r14),%xmm5,%xmm5 + vpinsrd $1,52(%r10),%xmm0,%xmm0 + vpinsrd $1,52(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,52(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm15,%ymm7 + vpslld $26,%ymm15,%ymm2 + vmovdqu %ymm5,416-256-128(%rbx) + vpaddd %ymm10,%ymm5,%ymm5 + + vpsrld $11,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm15,%ymm2 + vpaddd 32(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm15,%ymm2 + vpandn %ymm9,%ymm15,%ymm0 + vpand %ymm8,%ymm15,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm11,%ymm10 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm11,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm11,%ymm12,%ymm4 + + vpxor %ymm1,%ymm10,%ymm10 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm11,%ymm1 + + vpslld $19,%ymm11,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm10,%ymm7 + + vpsrld $22,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm11,%ymm2 + vpxor %ymm3,%ymm12,%ymm10 + vpaddd %ymm5,%ymm14,%ymm14 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm10,%ymm10 + vpaddd %ymm7,%ymm10,%ymm10 + vmovd 56(%r12),%xmm5 + vmovd 56(%r8),%xmm0 + vmovd 56(%r13),%xmm1 + vmovd 56(%r9),%xmm2 + vpinsrd $1,56(%r14),%xmm5,%xmm5 + vpinsrd $1,56(%r10),%xmm0,%xmm0 + vpinsrd $1,56(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,56(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm14,%ymm7 + vpslld $26,%ymm14,%ymm2 + vmovdqu %ymm5,448-256-128(%rbx) + vpaddd %ymm9,%ymm5,%ymm5 + + vpsrld $11,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm14,%ymm2 + vpaddd 64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm14,%ymm2 + vpandn %ymm8,%ymm14,%ymm0 + vpand %ymm15,%ymm14,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm10,%ymm9 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm10,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm10,%ymm11,%ymm3 + + vpxor %ymm1,%ymm9,%ymm9 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm10,%ymm1 + + vpslld $19,%ymm10,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm9,%ymm7 + + vpsrld $22,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm10,%ymm2 + vpxor %ymm4,%ymm11,%ymm9 + vpaddd %ymm5,%ymm13,%ymm13 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm9,%ymm9 + vpaddd %ymm7,%ymm9,%ymm9 + vmovd 60(%r12),%xmm5 + leaq 64(%r12),%r12 + vmovd 60(%r8),%xmm0 + leaq 64(%r8),%r8 + vmovd 60(%r13),%xmm1 + leaq 64(%r13),%r13 + vmovd 60(%r9),%xmm2 + leaq 64(%r9),%r9 + vpinsrd $1,60(%r14),%xmm5,%xmm5 + leaq 64(%r14),%r14 + vpinsrd $1,60(%r10),%xmm0,%xmm0 + leaq 64(%r10),%r10 + vpinsrd $1,60(%r15),%xmm1,%xmm1 + leaq 64(%r15),%r15 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,60(%r11),%xmm2,%xmm2 + leaq 64(%r11),%r11 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm13,%ymm7 + vpslld $26,%ymm13,%ymm2 + vmovdqu %ymm5,480-256-128(%rbx) + vpaddd %ymm8,%ymm5,%ymm5 + + vpsrld $11,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm13,%ymm2 + vpaddd 96(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + prefetcht0 63(%r12) + vpslld $7,%ymm13,%ymm2 + vpandn %ymm15,%ymm13,%ymm0 + vpand %ymm14,%ymm13,%ymm4 + prefetcht0 63(%r13) + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm9,%ymm8 + vpxor %ymm2,%ymm7,%ymm7 + prefetcht0 63(%r14) + vpslld $30,%ymm9,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm9,%ymm10,%ymm4 + prefetcht0 63(%r15) + vpxor %ymm1,%ymm8,%ymm8 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm9,%ymm1 + prefetcht0 63(%r8) + vpslld $19,%ymm9,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + prefetcht0 63(%r9) + vpxor %ymm1,%ymm8,%ymm7 + + vpsrld $22,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + prefetcht0 63(%r10) + vpslld $10,%ymm9,%ymm2 + vpxor %ymm3,%ymm10,%ymm8 + vpaddd %ymm5,%ymm12,%ymm12 + prefetcht0 63(%r11) + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm8,%ymm8 + vpaddd %ymm7,%ymm8,%ymm8 + addq $256,%rbp + vmovdqu 0-128(%rax),%ymm5 + movl $3,%ecx + jmp .Loop_16_xx_avx2 +.align 32 +.Loop_16_xx_avx2: + vmovdqu 32-128(%rax),%ymm6 + vpaddd 288-256-128(%rbx),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 448-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm12,%ymm7 + vpslld $26,%ymm12,%ymm2 + vmovdqu %ymm5,0-128(%rax) + vpaddd %ymm15,%ymm5,%ymm5 + + vpsrld $11,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm12,%ymm2 + vpaddd -128(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm12,%ymm2 + vpandn %ymm14,%ymm12,%ymm0 + vpand %ymm13,%ymm12,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm8,%ymm15 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm8,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm8,%ymm9,%ymm3 + + vpxor %ymm1,%ymm15,%ymm15 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm8,%ymm1 + + vpslld $19,%ymm8,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm15,%ymm7 + + vpsrld $22,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm8,%ymm2 + vpxor %ymm4,%ymm9,%ymm15 + vpaddd %ymm5,%ymm11,%ymm11 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm15,%ymm15 + vpaddd %ymm7,%ymm15,%ymm15 + vmovdqu 64-128(%rax),%ymm5 + vpaddd 320-256-128(%rbx),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 480-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm11,%ymm7 + vpslld $26,%ymm11,%ymm2 + vmovdqu %ymm6,32-128(%rax) + vpaddd %ymm14,%ymm6,%ymm6 + + vpsrld $11,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm11,%ymm2 + vpaddd -96(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm11,%ymm2 + vpandn %ymm13,%ymm11,%ymm0 + vpand %ymm12,%ymm11,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm15,%ymm14 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm15,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm15,%ymm8,%ymm4 + + vpxor %ymm1,%ymm14,%ymm14 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm15,%ymm1 + + vpslld $19,%ymm15,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm14,%ymm7 + + vpsrld $22,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm15,%ymm2 + vpxor %ymm3,%ymm8,%ymm14 + vpaddd %ymm6,%ymm10,%ymm10 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm14,%ymm14 + vpaddd %ymm7,%ymm14,%ymm14 + vmovdqu 96-128(%rax),%ymm6 + vpaddd 352-256-128(%rbx),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 0-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm10,%ymm7 + vpslld $26,%ymm10,%ymm2 + vmovdqu %ymm5,64-128(%rax) + vpaddd %ymm13,%ymm5,%ymm5 + + vpsrld $11,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm10,%ymm2 + vpaddd -64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm10,%ymm2 + vpandn %ymm12,%ymm10,%ymm0 + vpand %ymm11,%ymm10,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm14,%ymm13 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm14,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm14,%ymm15,%ymm3 + + vpxor %ymm1,%ymm13,%ymm13 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm14,%ymm1 + + vpslld $19,%ymm14,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm13,%ymm7 + + vpsrld $22,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm14,%ymm2 + vpxor %ymm4,%ymm15,%ymm13 + vpaddd %ymm5,%ymm9,%ymm9 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm13,%ymm13 + vpaddd %ymm7,%ymm13,%ymm13 + vmovdqu 128-128(%rax),%ymm5 + vpaddd 384-256-128(%rbx),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 32-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm9,%ymm7 + vpslld $26,%ymm9,%ymm2 + vmovdqu %ymm6,96-128(%rax) + vpaddd %ymm12,%ymm6,%ymm6 + + vpsrld $11,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm9,%ymm2 + vpaddd -32(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm9,%ymm2 + vpandn %ymm11,%ymm9,%ymm0 + vpand %ymm10,%ymm9,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm13,%ymm12 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm13,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm13,%ymm14,%ymm4 + + vpxor %ymm1,%ymm12,%ymm12 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm13,%ymm1 + + vpslld $19,%ymm13,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm12,%ymm7 + + vpsrld $22,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm13,%ymm2 + vpxor %ymm3,%ymm14,%ymm12 + vpaddd %ymm6,%ymm8,%ymm8 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm12,%ymm12 + vpaddd %ymm7,%ymm12,%ymm12 + vmovdqu 160-128(%rax),%ymm6 + vpaddd 416-256-128(%rbx),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 64-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm8,%ymm7 + vpslld $26,%ymm8,%ymm2 + vmovdqu %ymm5,128-128(%rax) + vpaddd %ymm11,%ymm5,%ymm5 + + vpsrld $11,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm8,%ymm2 + vpaddd 0(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm8,%ymm2 + vpandn %ymm10,%ymm8,%ymm0 + vpand %ymm9,%ymm8,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm12,%ymm11 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm12,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm12,%ymm13,%ymm3 + + vpxor %ymm1,%ymm11,%ymm11 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm12,%ymm1 + + vpslld $19,%ymm12,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm11,%ymm7 + + vpsrld $22,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm12,%ymm2 + vpxor %ymm4,%ymm13,%ymm11 + vpaddd %ymm5,%ymm15,%ymm15 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm11,%ymm11 + vpaddd %ymm7,%ymm11,%ymm11 + vmovdqu 192-128(%rax),%ymm5 + vpaddd 448-256-128(%rbx),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 96-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm15,%ymm7 + vpslld $26,%ymm15,%ymm2 + vmovdqu %ymm6,160-128(%rax) + vpaddd %ymm10,%ymm6,%ymm6 + + vpsrld $11,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm15,%ymm2 + vpaddd 32(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm15,%ymm2 + vpandn %ymm9,%ymm15,%ymm0 + vpand %ymm8,%ymm15,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm11,%ymm10 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm11,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm11,%ymm12,%ymm4 + + vpxor %ymm1,%ymm10,%ymm10 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm11,%ymm1 + + vpslld $19,%ymm11,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm10,%ymm7 + + vpsrld $22,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm11,%ymm2 + vpxor %ymm3,%ymm12,%ymm10 + vpaddd %ymm6,%ymm14,%ymm14 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm10,%ymm10 + vpaddd %ymm7,%ymm10,%ymm10 + vmovdqu 224-128(%rax),%ymm6 + vpaddd 480-256-128(%rbx),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 128-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm14,%ymm7 + vpslld $26,%ymm14,%ymm2 + vmovdqu %ymm5,192-128(%rax) + vpaddd %ymm9,%ymm5,%ymm5 + + vpsrld $11,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm14,%ymm2 + vpaddd 64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm14,%ymm2 + vpandn %ymm8,%ymm14,%ymm0 + vpand %ymm15,%ymm14,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm10,%ymm9 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm10,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm10,%ymm11,%ymm3 + + vpxor %ymm1,%ymm9,%ymm9 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm10,%ymm1 + + vpslld $19,%ymm10,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm9,%ymm7 + + vpsrld $22,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm10,%ymm2 + vpxor %ymm4,%ymm11,%ymm9 + vpaddd %ymm5,%ymm13,%ymm13 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm9,%ymm9 + vpaddd %ymm7,%ymm9,%ymm9 + vmovdqu 256-256-128(%rbx),%ymm5 + vpaddd 0-128(%rax),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 160-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm13,%ymm7 + vpslld $26,%ymm13,%ymm2 + vmovdqu %ymm6,224-128(%rax) + vpaddd %ymm8,%ymm6,%ymm6 + + vpsrld $11,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm13,%ymm2 + vpaddd 96(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm13,%ymm2 + vpandn %ymm15,%ymm13,%ymm0 + vpand %ymm14,%ymm13,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm9,%ymm8 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm9,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm9,%ymm10,%ymm4 + + vpxor %ymm1,%ymm8,%ymm8 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm9,%ymm1 + + vpslld $19,%ymm9,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm8,%ymm7 + + vpsrld $22,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm9,%ymm2 + vpxor %ymm3,%ymm10,%ymm8 + vpaddd %ymm6,%ymm12,%ymm12 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm8,%ymm8 + vpaddd %ymm7,%ymm8,%ymm8 + addq $256,%rbp + vmovdqu 288-256-128(%rbx),%ymm6 + vpaddd 32-128(%rax),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 192-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm12,%ymm7 + vpslld $26,%ymm12,%ymm2 + vmovdqu %ymm5,256-256-128(%rbx) + vpaddd %ymm15,%ymm5,%ymm5 + + vpsrld $11,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm12,%ymm2 + vpaddd -128(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm12,%ymm2 + vpandn %ymm14,%ymm12,%ymm0 + vpand %ymm13,%ymm12,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm8,%ymm15 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm8,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm8,%ymm9,%ymm3 + + vpxor %ymm1,%ymm15,%ymm15 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm8,%ymm1 + + vpslld $19,%ymm8,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm15,%ymm7 + + vpsrld $22,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm8,%ymm2 + vpxor %ymm4,%ymm9,%ymm15 + vpaddd %ymm5,%ymm11,%ymm11 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm15,%ymm15 + vpaddd %ymm7,%ymm15,%ymm15 + vmovdqu 320-256-128(%rbx),%ymm5 + vpaddd 64-128(%rax),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 224-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm11,%ymm7 + vpslld $26,%ymm11,%ymm2 + vmovdqu %ymm6,288-256-128(%rbx) + vpaddd %ymm14,%ymm6,%ymm6 + + vpsrld $11,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm11,%ymm2 + vpaddd -96(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm11,%ymm2 + vpandn %ymm13,%ymm11,%ymm0 + vpand %ymm12,%ymm11,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm15,%ymm14 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm15,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm15,%ymm8,%ymm4 + + vpxor %ymm1,%ymm14,%ymm14 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm15,%ymm1 + + vpslld $19,%ymm15,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm14,%ymm7 + + vpsrld $22,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm15,%ymm2 + vpxor %ymm3,%ymm8,%ymm14 + vpaddd %ymm6,%ymm10,%ymm10 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm14,%ymm14 + vpaddd %ymm7,%ymm14,%ymm14 + vmovdqu 352-256-128(%rbx),%ymm6 + vpaddd 96-128(%rax),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 256-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm10,%ymm7 + vpslld $26,%ymm10,%ymm2 + vmovdqu %ymm5,320-256-128(%rbx) + vpaddd %ymm13,%ymm5,%ymm5 + + vpsrld $11,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm10,%ymm2 + vpaddd -64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm10,%ymm2 + vpandn %ymm12,%ymm10,%ymm0 + vpand %ymm11,%ymm10,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm14,%ymm13 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm14,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm14,%ymm15,%ymm3 + + vpxor %ymm1,%ymm13,%ymm13 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm14,%ymm1 + + vpslld $19,%ymm14,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm13,%ymm7 + + vpsrld $22,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm14,%ymm2 + vpxor %ymm4,%ymm15,%ymm13 + vpaddd %ymm5,%ymm9,%ymm9 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm13,%ymm13 + vpaddd %ymm7,%ymm13,%ymm13 + vmovdqu 384-256-128(%rbx),%ymm5 + vpaddd 128-128(%rax),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 288-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm9,%ymm7 + vpslld $26,%ymm9,%ymm2 + vmovdqu %ymm6,352-256-128(%rbx) + vpaddd %ymm12,%ymm6,%ymm6 + + vpsrld $11,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm9,%ymm2 + vpaddd -32(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm9,%ymm2 + vpandn %ymm11,%ymm9,%ymm0 + vpand %ymm10,%ymm9,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm13,%ymm12 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm13,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm13,%ymm14,%ymm4 + + vpxor %ymm1,%ymm12,%ymm12 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm13,%ymm1 + + vpslld $19,%ymm13,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm12,%ymm7 + + vpsrld $22,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm13,%ymm2 + vpxor %ymm3,%ymm14,%ymm12 + vpaddd %ymm6,%ymm8,%ymm8 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm12,%ymm12 + vpaddd %ymm7,%ymm12,%ymm12 + vmovdqu 416-256-128(%rbx),%ymm6 + vpaddd 160-128(%rax),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 320-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm8,%ymm7 + vpslld $26,%ymm8,%ymm2 + vmovdqu %ymm5,384-256-128(%rbx) + vpaddd %ymm11,%ymm5,%ymm5 + + vpsrld $11,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm8,%ymm2 + vpaddd 0(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm8,%ymm2 + vpandn %ymm10,%ymm8,%ymm0 + vpand %ymm9,%ymm8,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm12,%ymm11 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm12,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm12,%ymm13,%ymm3 + + vpxor %ymm1,%ymm11,%ymm11 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm12,%ymm1 + + vpslld $19,%ymm12,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm11,%ymm7 + + vpsrld $22,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm12,%ymm2 + vpxor %ymm4,%ymm13,%ymm11 + vpaddd %ymm5,%ymm15,%ymm15 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm11,%ymm11 + vpaddd %ymm7,%ymm11,%ymm11 + vmovdqu 448-256-128(%rbx),%ymm5 + vpaddd 192-128(%rax),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 352-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm15,%ymm7 + vpslld $26,%ymm15,%ymm2 + vmovdqu %ymm6,416-256-128(%rbx) + vpaddd %ymm10,%ymm6,%ymm6 + + vpsrld $11,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm15,%ymm2 + vpaddd 32(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm15,%ymm2 + vpandn %ymm9,%ymm15,%ymm0 + vpand %ymm8,%ymm15,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm11,%ymm10 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm11,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm11,%ymm12,%ymm4 + + vpxor %ymm1,%ymm10,%ymm10 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm11,%ymm1 + + vpslld $19,%ymm11,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm10,%ymm7 + + vpsrld $22,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm11,%ymm2 + vpxor %ymm3,%ymm12,%ymm10 + vpaddd %ymm6,%ymm14,%ymm14 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm10,%ymm10 + vpaddd %ymm7,%ymm10,%ymm10 + vmovdqu 480-256-128(%rbx),%ymm6 + vpaddd 224-128(%rax),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 384-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm14,%ymm7 + vpslld $26,%ymm14,%ymm2 + vmovdqu %ymm5,448-256-128(%rbx) + vpaddd %ymm9,%ymm5,%ymm5 + + vpsrld $11,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm14,%ymm2 + vpaddd 64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm14,%ymm2 + vpandn %ymm8,%ymm14,%ymm0 + vpand %ymm15,%ymm14,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm10,%ymm9 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm10,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm10,%ymm11,%ymm3 + + vpxor %ymm1,%ymm9,%ymm9 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm10,%ymm1 + + vpslld $19,%ymm10,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm9,%ymm7 + + vpsrld $22,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm10,%ymm2 + vpxor %ymm4,%ymm11,%ymm9 + vpaddd %ymm5,%ymm13,%ymm13 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm9,%ymm9 + vpaddd %ymm7,%ymm9,%ymm9 + vmovdqu 0-128(%rax),%ymm5 + vpaddd 256-256-128(%rbx),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 416-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm13,%ymm7 + vpslld $26,%ymm13,%ymm2 + vmovdqu %ymm6,480-256-128(%rbx) + vpaddd %ymm8,%ymm6,%ymm6 + + vpsrld $11,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm13,%ymm2 + vpaddd 96(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm13,%ymm2 + vpandn %ymm15,%ymm13,%ymm0 + vpand %ymm14,%ymm13,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm9,%ymm8 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm9,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm9,%ymm10,%ymm4 + + vpxor %ymm1,%ymm8,%ymm8 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm9,%ymm1 + + vpslld $19,%ymm9,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm8,%ymm7 + + vpsrld $22,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm9,%ymm2 + vpxor %ymm3,%ymm10,%ymm8 + vpaddd %ymm6,%ymm12,%ymm12 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm8,%ymm8 + vpaddd %ymm7,%ymm8,%ymm8 + addq $256,%rbp + decl %ecx + jnz .Loop_16_xx_avx2 + + movl $1,%ecx + leaq 512(%rsp),%rbx + leaq K256+128(%rip),%rbp + cmpl 0(%rbx),%ecx + cmovgeq %rbp,%r12 + cmpl 4(%rbx),%ecx + cmovgeq %rbp,%r13 + cmpl 8(%rbx),%ecx + cmovgeq %rbp,%r14 + cmpl 12(%rbx),%ecx + cmovgeq %rbp,%r15 + cmpl 16(%rbx),%ecx + cmovgeq %rbp,%r8 + cmpl 20(%rbx),%ecx + cmovgeq %rbp,%r9 + cmpl 24(%rbx),%ecx + cmovgeq %rbp,%r10 + cmpl 28(%rbx),%ecx + cmovgeq %rbp,%r11 + vmovdqa (%rbx),%ymm7 + vpxor %ymm0,%ymm0,%ymm0 + vmovdqa %ymm7,%ymm6 + vpcmpgtd %ymm0,%ymm6,%ymm6 + vpaddd %ymm6,%ymm7,%ymm7 + + vmovdqu 0-128(%rdi),%ymm0 + vpand %ymm6,%ymm8,%ymm8 + vmovdqu 32-128(%rdi),%ymm1 + vpand %ymm6,%ymm9,%ymm9 + vmovdqu 64-128(%rdi),%ymm2 + vpand %ymm6,%ymm10,%ymm10 + vmovdqu 96-128(%rdi),%ymm5 + vpand %ymm6,%ymm11,%ymm11 + vpaddd %ymm0,%ymm8,%ymm8 + vmovdqu 128-128(%rdi),%ymm0 + vpand %ymm6,%ymm12,%ymm12 + vpaddd %ymm1,%ymm9,%ymm9 + vmovdqu 160-128(%rdi),%ymm1 + vpand %ymm6,%ymm13,%ymm13 + vpaddd %ymm2,%ymm10,%ymm10 + vmovdqu 192-128(%rdi),%ymm2 + vpand %ymm6,%ymm14,%ymm14 + vpaddd %ymm5,%ymm11,%ymm11 + vmovdqu 224-128(%rdi),%ymm5 + vpand %ymm6,%ymm15,%ymm15 + vpaddd %ymm0,%ymm12,%ymm12 + vpaddd %ymm1,%ymm13,%ymm13 + vmovdqu %ymm8,0-128(%rdi) + vpaddd %ymm2,%ymm14,%ymm14 + vmovdqu %ymm9,32-128(%rdi) + vpaddd %ymm5,%ymm15,%ymm15 + vmovdqu %ymm10,64-128(%rdi) + vmovdqu %ymm11,96-128(%rdi) + vmovdqu %ymm12,128-128(%rdi) + vmovdqu %ymm13,160-128(%rdi) + vmovdqu %ymm14,192-128(%rdi) + vmovdqu %ymm15,224-128(%rdi) + + vmovdqu %ymm7,(%rbx) + leaq 256+128(%rsp),%rbx + vmovdqu .Lpbswap(%rip),%ymm6 + decl %edx + jnz .Loop_avx2 + + + + + + + +.Ldone_avx2: + movq 544(%rsp),%rax +.cfi_def_cfa %rax,8 + vzeroupper + movq -48(%rax),%r15 +.cfi_restore %r15 + movq -40(%rax),%r14 +.cfi_restore %r14 + movq -32(%rax),%r13 +.cfi_restore %r13 + movq -24(%rax),%r12 +.cfi_restore %r12 + movq -16(%rax),%rbp +.cfi_restore %rbp + movq -8(%rax),%rbx +.cfi_restore %rbx + leaq (%rax),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_avx2: + .byte 0xf3,0xc3 +.cfi_endproc +.size sha256_multi_block_avx2,.-sha256_multi_block_avx2 +.section .rodata +.align 256 +K256: +.long 1116352408,1116352408,1116352408,1116352408 +.long 1116352408,1116352408,1116352408,1116352408 +.long 1899447441,1899447441,1899447441,1899447441 +.long 1899447441,1899447441,1899447441,1899447441 +.long 3049323471,3049323471,3049323471,3049323471 +.long 3049323471,3049323471,3049323471,3049323471 +.long 3921009573,3921009573,3921009573,3921009573 +.long 3921009573,3921009573,3921009573,3921009573 +.long 961987163,961987163,961987163,961987163 +.long 961987163,961987163,961987163,961987163 +.long 1508970993,1508970993,1508970993,1508970993 +.long 1508970993,1508970993,1508970993,1508970993 +.long 2453635748,2453635748,2453635748,2453635748 +.long 2453635748,2453635748,2453635748,2453635748 +.long 2870763221,2870763221,2870763221,2870763221 +.long 2870763221,2870763221,2870763221,2870763221 +.long 3624381080,3624381080,3624381080,3624381080 +.long 3624381080,3624381080,3624381080,3624381080 +.long 310598401,310598401,310598401,310598401 +.long 310598401,310598401,310598401,310598401 +.long 607225278,607225278,607225278,607225278 +.long 607225278,607225278,607225278,607225278 +.long 1426881987,1426881987,1426881987,1426881987 +.long 1426881987,1426881987,1426881987,1426881987 +.long 1925078388,1925078388,1925078388,1925078388 +.long 1925078388,1925078388,1925078388,1925078388 +.long 2162078206,2162078206,2162078206,2162078206 +.long 2162078206,2162078206,2162078206,2162078206 +.long 2614888103,2614888103,2614888103,2614888103 +.long 2614888103,2614888103,2614888103,2614888103 +.long 3248222580,3248222580,3248222580,3248222580 +.long 3248222580,3248222580,3248222580,3248222580 +.long 3835390401,3835390401,3835390401,3835390401 +.long 3835390401,3835390401,3835390401,3835390401 +.long 4022224774,4022224774,4022224774,4022224774 +.long 4022224774,4022224774,4022224774,4022224774 +.long 264347078,264347078,264347078,264347078 +.long 264347078,264347078,264347078,264347078 +.long 604807628,604807628,604807628,604807628 +.long 604807628,604807628,604807628,604807628 +.long 770255983,770255983,770255983,770255983 +.long 770255983,770255983,770255983,770255983 +.long 1249150122,1249150122,1249150122,1249150122 +.long 1249150122,1249150122,1249150122,1249150122 +.long 1555081692,1555081692,1555081692,1555081692 +.long 1555081692,1555081692,1555081692,1555081692 +.long 1996064986,1996064986,1996064986,1996064986 +.long 1996064986,1996064986,1996064986,1996064986 +.long 2554220882,2554220882,2554220882,2554220882 +.long 2554220882,2554220882,2554220882,2554220882 +.long 2821834349,2821834349,2821834349,2821834349 +.long 2821834349,2821834349,2821834349,2821834349 +.long 2952996808,2952996808,2952996808,2952996808 +.long 2952996808,2952996808,2952996808,2952996808 +.long 3210313671,3210313671,3210313671,3210313671 +.long 3210313671,3210313671,3210313671,3210313671 +.long 3336571891,3336571891,3336571891,3336571891 +.long 3336571891,3336571891,3336571891,3336571891 +.long 3584528711,3584528711,3584528711,3584528711 +.long 3584528711,3584528711,3584528711,3584528711 +.long 113926993,113926993,113926993,113926993 +.long 113926993,113926993,113926993,113926993 +.long 338241895,338241895,338241895,338241895 +.long 338241895,338241895,338241895,338241895 +.long 666307205,666307205,666307205,666307205 +.long 666307205,666307205,666307205,666307205 +.long 773529912,773529912,773529912,773529912 +.long 773529912,773529912,773529912,773529912 +.long 1294757372,1294757372,1294757372,1294757372 +.long 1294757372,1294757372,1294757372,1294757372 +.long 1396182291,1396182291,1396182291,1396182291 +.long 1396182291,1396182291,1396182291,1396182291 +.long 1695183700,1695183700,1695183700,1695183700 +.long 1695183700,1695183700,1695183700,1695183700 +.long 1986661051,1986661051,1986661051,1986661051 +.long 1986661051,1986661051,1986661051,1986661051 +.long 2177026350,2177026350,2177026350,2177026350 +.long 2177026350,2177026350,2177026350,2177026350 +.long 2456956037,2456956037,2456956037,2456956037 +.long 2456956037,2456956037,2456956037,2456956037 +.long 2730485921,2730485921,2730485921,2730485921 +.long 2730485921,2730485921,2730485921,2730485921 +.long 2820302411,2820302411,2820302411,2820302411 +.long 2820302411,2820302411,2820302411,2820302411 +.long 3259730800,3259730800,3259730800,3259730800 +.long 3259730800,3259730800,3259730800,3259730800 +.long 3345764771,3345764771,3345764771,3345764771 +.long 3345764771,3345764771,3345764771,3345764771 +.long 3516065817,3516065817,3516065817,3516065817 +.long 3516065817,3516065817,3516065817,3516065817 +.long 3600352804,3600352804,3600352804,3600352804 +.long 3600352804,3600352804,3600352804,3600352804 +.long 4094571909,4094571909,4094571909,4094571909 +.long 4094571909,4094571909,4094571909,4094571909 +.long 275423344,275423344,275423344,275423344 +.long 275423344,275423344,275423344,275423344 +.long 430227734,430227734,430227734,430227734 +.long 430227734,430227734,430227734,430227734 +.long 506948616,506948616,506948616,506948616 +.long 506948616,506948616,506948616,506948616 +.long 659060556,659060556,659060556,659060556 +.long 659060556,659060556,659060556,659060556 +.long 883997877,883997877,883997877,883997877 +.long 883997877,883997877,883997877,883997877 +.long 958139571,958139571,958139571,958139571 +.long 958139571,958139571,958139571,958139571 +.long 1322822218,1322822218,1322822218,1322822218 +.long 1322822218,1322822218,1322822218,1322822218 +.long 1537002063,1537002063,1537002063,1537002063 +.long 1537002063,1537002063,1537002063,1537002063 +.long 1747873779,1747873779,1747873779,1747873779 +.long 1747873779,1747873779,1747873779,1747873779 +.long 1955562222,1955562222,1955562222,1955562222 +.long 1955562222,1955562222,1955562222,1955562222 +.long 2024104815,2024104815,2024104815,2024104815 +.long 2024104815,2024104815,2024104815,2024104815 +.long 2227730452,2227730452,2227730452,2227730452 +.long 2227730452,2227730452,2227730452,2227730452 +.long 2361852424,2361852424,2361852424,2361852424 +.long 2361852424,2361852424,2361852424,2361852424 +.long 2428436474,2428436474,2428436474,2428436474 +.long 2428436474,2428436474,2428436474,2428436474 +.long 2756734187,2756734187,2756734187,2756734187 +.long 2756734187,2756734187,2756734187,2756734187 +.long 3204031479,3204031479,3204031479,3204031479 +.long 3204031479,3204031479,3204031479,3204031479 +.long 3329325298,3329325298,3329325298,3329325298 +.long 3329325298,3329325298,3329325298,3329325298 +.Lpbswap: +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +K256_shaext: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.byte 83,72,65,50,53,54,32,109,117,108,116,105,45,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.previous diff --git a/contrib/openssl-cmake/asm/crypto/sha/sha256-x86_64.s b/contrib/openssl-cmake/asm/crypto/sha/sha256-x86_64.s new file mode 100644 index 000000000000..f0de76776278 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/sha/sha256-x86_64.s @@ -0,0 +1,5458 @@ +.text + + +.globl sha256_block_data_order +.type sha256_block_data_order,@function +.align 16 +sha256_block_data_order: +.cfi_startproc + leaq OPENSSL_ia32cap_P(%rip),%r11 + movl 0(%r11),%r9d + movl 4(%r11),%r10d + movl 8(%r11),%r11d + testl $536870912,%r11d + jnz _shaext_shortcut + andl $296,%r11d + cmpl $296,%r11d + je .Lavx2_shortcut + andl $1073741824,%r9d + andl $268435968,%r10d + orl %r9d,%r10d + cmpl $1342177792,%r10d + je .Lavx_shortcut + testl $512,%r10d + jnz .Lssse3_shortcut + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + shlq $4,%rdx + subq $64+32,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %rax,88(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08 +.Lprologue: + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + jmp .Lloop + +.align 16 +.Lloop: + movl %ebx,%edi + leaq K256(%rip),%rbp + xorl %ecx,%edi + movl 0(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + addl %r14d,%r11d + movl 4(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + addl %r14d,%r10d + movl 8(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + addl %r14d,%r9d + movl 12(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + addl %r14d,%r8d + movl 16(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + addl %r14d,%edx + movl 20(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + addl %r14d,%ecx + movl 24(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + addl %r14d,%ebx + movl 28(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + addl %r14d,%eax + movl 32(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + addl %r14d,%r11d + movl 36(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + addl %r14d,%r10d + movl 40(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + addl %r14d,%r9d + movl 44(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + addl %r14d,%r8d + movl 48(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + addl %r14d,%edx + movl 52(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + addl %r14d,%ecx + movl 56(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + addl %r14d,%ebx + movl 60(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + jmp .Lrounds_16_xx +.align 16 +.Lrounds_16_xx: + movl 4(%rsp),%r13d + movl 56(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 36(%rsp),%r12d + + addl 0(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + movl 8(%rsp),%r13d + movl 60(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 40(%rsp),%r12d + + addl 4(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + movl 12(%rsp),%r13d + movl 0(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 44(%rsp),%r12d + + addl 8(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + movl 16(%rsp),%r13d + movl 4(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 48(%rsp),%r12d + + addl 12(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + movl 20(%rsp),%r13d + movl 8(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 52(%rsp),%r12d + + addl 16(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + movl 24(%rsp),%r13d + movl 12(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 56(%rsp),%r12d + + addl 20(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + movl 28(%rsp),%r13d + movl 16(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 60(%rsp),%r12d + + addl 24(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + movl 32(%rsp),%r13d + movl 20(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 0(%rsp),%r12d + + addl 28(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + movl 36(%rsp),%r13d + movl 24(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 4(%rsp),%r12d + + addl 32(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + movl 40(%rsp),%r13d + movl 28(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 8(%rsp),%r12d + + addl 36(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + movl 44(%rsp),%r13d + movl 32(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 12(%rsp),%r12d + + addl 40(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + movl 48(%rsp),%r13d + movl 36(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 16(%rsp),%r12d + + addl 44(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + movl 52(%rsp),%r13d + movl 40(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 20(%rsp),%r12d + + addl 48(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + movl 56(%rsp),%r13d + movl 44(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 24(%rsp),%r12d + + addl 52(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + movl 60(%rsp),%r13d + movl 48(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 28(%rsp),%r12d + + addl 56(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + movl 0(%rsp),%r13d + movl 52(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 32(%rsp),%r12d + + addl 60(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + cmpb $0,3(%rbp) + jnz .Lrounds_16_xx + + movq 64+0(%rsp),%rdi + addl %r14d,%eax + leaq 64(%rsi),%rsi + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop + + movq 88(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size sha256_block_data_order,.-sha256_block_data_order +.section .rodata +.align 64 +.type K256,@object +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.previous +.type sha256_block_data_order_shaext,@function +.align 64 +sha256_block_data_order_shaext: +_shaext_shortcut: +.cfi_startproc + leaq K256+128(%rip),%rcx + movdqu (%rdi),%xmm1 + movdqu 16(%rdi),%xmm2 + movdqa 512-128(%rcx),%xmm7 + + pshufd $0x1b,%xmm1,%xmm0 + pshufd $0xb1,%xmm1,%xmm1 + pshufd $0x1b,%xmm2,%xmm2 + movdqa %xmm7,%xmm8 +.byte 102,15,58,15,202,8 + punpcklqdq %xmm0,%xmm2 + jmp .Loop_shaext + +.align 16 +.Loop_shaext: + movdqu (%rsi),%xmm3 + movdqu 16(%rsi),%xmm4 + movdqu 32(%rsi),%xmm5 +.byte 102,15,56,0,223 + movdqu 48(%rsi),%xmm6 + + movdqa 0-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 102,15,56,0,231 + movdqa %xmm2,%xmm10 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + nop + movdqa %xmm1,%xmm9 +.byte 15,56,203,202 + + movdqa 32-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 102,15,56,0,239 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + leaq 64(%rsi),%rsi +.byte 15,56,204,220 +.byte 15,56,203,202 + + movdqa 64-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 102,15,56,0,247 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + + movdqa 96-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 128-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 160-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 192-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 224-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 256-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 288-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 320-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 352-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 384-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 416-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 +.byte 15,56,203,202 + paddd %xmm7,%xmm6 + + movdqa 448-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 +.byte 15,56,205,245 + movdqa %xmm8,%xmm7 +.byte 15,56,203,202 + + movdqa 480-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 + nop +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + decq %rdx + nop +.byte 15,56,203,202 + + paddd %xmm10,%xmm2 + paddd %xmm9,%xmm1 + jnz .Loop_shaext + + pshufd $0xb1,%xmm2,%xmm2 + pshufd $0x1b,%xmm1,%xmm7 + pshufd $0xb1,%xmm1,%xmm1 + punpckhqdq %xmm2,%xmm1 +.byte 102,15,58,15,215,8 + + movdqu %xmm1,(%rdi) + movdqu %xmm2,16(%rdi) + .byte 0xf3,0xc3 +.cfi_endproc +.size sha256_block_data_order_shaext,.-sha256_block_data_order_shaext +.type sha256_block_data_order_ssse3,@function +.align 64 +sha256_block_data_order_ssse3: +.cfi_startproc +.Lssse3_shortcut: + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + shlq $4,%rdx + subq $96,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %rax,88(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08 +.Lprologue_ssse3: + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + + + jmp .Lloop_ssse3 +.align 16 +.Lloop_ssse3: + movdqa K256+512(%rip),%xmm7 + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 +.byte 102,15,56,0,199 + movdqu 48(%rsi),%xmm3 + leaq K256(%rip),%rbp +.byte 102,15,56,0,207 + movdqa 0(%rbp),%xmm4 + movdqa 32(%rbp),%xmm5 +.byte 102,15,56,0,215 + paddd %xmm0,%xmm4 + movdqa 64(%rbp),%xmm6 +.byte 102,15,56,0,223 + movdqa 96(%rbp),%xmm7 + paddd %xmm1,%xmm5 + paddd %xmm2,%xmm6 + paddd %xmm3,%xmm7 + movdqa %xmm4,0(%rsp) + movl %eax,%r14d + movdqa %xmm5,16(%rsp) + movl %ebx,%edi + movdqa %xmm6,32(%rsp) + xorl %ecx,%edi + movdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp .Lssse3_00_47 + +.align 16 +.Lssse3_00_47: + subq $-128,%rbp + rorl $14,%r13d + movdqa %xmm1,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm3,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,224,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,250,4 + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm3,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 4(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm0 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm0 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm0,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 0(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm0,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,0(%rsp) + rorl $14,%r13d + movdqa %xmm2,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm0,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,225,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,251,4 + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm0,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 20(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm1 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm1 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm1,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 32(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm1,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,16(%rsp) + rorl $14,%r13d + movdqa %xmm3,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm1,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,226,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,248,4 + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm1,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 36(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm2 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm2 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm2,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 64(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm2,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,32(%rsp) + rorl $14,%r13d + movdqa %xmm0,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm2,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,227,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,249,4 + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm2,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 52(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm3 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm3 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm3,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 96(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm3,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,48(%rsp) + cmpb $0,131(%rbp) + jne .Lssse3_00_47 + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%rdi + movl %r14d,%eax + + addl 0(%rdi),%eax + leaq 64(%rsi),%rsi + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop_ssse3 + + movq 88(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_ssse3: + .byte 0xf3,0xc3 +.cfi_endproc +.size sha256_block_data_order_ssse3,.-sha256_block_data_order_ssse3 +.type sha256_block_data_order_avx,@function +.align 64 +sha256_block_data_order_avx: +.cfi_startproc +.Lavx_shortcut: + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + shlq $4,%rdx + subq $96,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %rax,88(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08 +.Lprologue_avx: + + vzeroupper + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + vmovdqa K256+512+32(%rip),%xmm8 + vmovdqa K256+512+64(%rip),%xmm9 + jmp .Lloop_avx +.align 16 +.Lloop_avx: + vmovdqa K256+512(%rip),%xmm7 + vmovdqu 0(%rsi),%xmm0 + vmovdqu 16(%rsi),%xmm1 + vmovdqu 32(%rsi),%xmm2 + vmovdqu 48(%rsi),%xmm3 + vpshufb %xmm7,%xmm0,%xmm0 + leaq K256(%rip),%rbp + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd 0(%rbp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 32(%rbp),%xmm1,%xmm5 + vpaddd 64(%rbp),%xmm2,%xmm6 + vpaddd 96(%rbp),%xmm3,%xmm7 + vmovdqa %xmm4,0(%rsp) + movl %eax,%r14d + vmovdqa %xmm5,16(%rsp) + movl %ebx,%edi + vmovdqa %xmm6,32(%rsp) + xorl %ecx,%edi + vmovdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp .Lavx_00_47 + +.align 16 +.Lavx_00_47: + subq $-128,%rbp + vpalignr $4,%xmm0,%xmm1,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + vpalignr $4,%xmm2,%xmm3,%xmm7 + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpaddd %xmm7,%xmm0,%xmm0 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + vpshufd $250,%xmm3,%xmm7 + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + vpsrld $11,%xmm6,%xmm6 + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + vpaddd %xmm4,%xmm0,%xmm0 + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + vpxor %xmm7,%xmm6,%xmm6 + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + vpaddd %xmm6,%xmm0,%xmm0 + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + vpshufd $80,%xmm0,%xmm7 + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + vpxor %xmm7,%xmm6,%xmm6 + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + vpaddd %xmm6,%xmm0,%xmm0 + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vpaddd 0(%rbp),%xmm0,%xmm6 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,0(%rsp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + vpalignr $4,%xmm3,%xmm0,%xmm7 + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpaddd %xmm7,%xmm1,%xmm1 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + vpshufd $250,%xmm0,%xmm7 + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + vpsrld $11,%xmm6,%xmm6 + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + vpaddd %xmm4,%xmm1,%xmm1 + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + vpaddd %xmm6,%xmm1,%xmm1 + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + vpshufd $80,%xmm1,%xmm7 + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + vpxor %xmm7,%xmm6,%xmm6 + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + vpaddd %xmm6,%xmm1,%xmm1 + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpaddd 32(%rbp),%xmm1,%xmm6 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,16(%rsp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + vpalignr $4,%xmm0,%xmm1,%xmm7 + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpaddd %xmm7,%xmm2,%xmm2 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + vpshufd $250,%xmm1,%xmm7 + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + vpsrld $11,%xmm6,%xmm6 + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + vpaddd %xmm4,%xmm2,%xmm2 + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + vpxor %xmm7,%xmm6,%xmm6 + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + vpaddd %xmm6,%xmm2,%xmm2 + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + vpshufd $80,%xmm2,%xmm7 + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + vpxor %xmm7,%xmm6,%xmm6 + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + vpaddd %xmm6,%xmm2,%xmm2 + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vpaddd 64(%rbp),%xmm2,%xmm6 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,32(%rsp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + vpalignr $4,%xmm1,%xmm2,%xmm7 + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpaddd %xmm7,%xmm3,%xmm3 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + vpshufd $250,%xmm2,%xmm7 + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + vpsrld $11,%xmm6,%xmm6 + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + vpaddd %xmm4,%xmm3,%xmm3 + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + vpaddd %xmm6,%xmm3,%xmm3 + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + vpshufd $80,%xmm3,%xmm7 + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + vpxor %xmm7,%xmm6,%xmm6 + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + vpaddd %xmm6,%xmm3,%xmm3 + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpaddd 96(%rbp),%xmm3,%xmm6 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,48(%rsp) + cmpb $0,131(%rbp) + jne .Lavx_00_47 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%rdi + movl %r14d,%eax + + addl 0(%rdi),%eax + leaq 64(%rsi),%rsi + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop_avx + + movq 88(%rsp),%rsi +.cfi_def_cfa %rsi,8 + vzeroupper + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_avx: + .byte 0xf3,0xc3 +.cfi_endproc +.size sha256_block_data_order_avx,.-sha256_block_data_order_avx +.type sha256_block_data_order_avx2,@function +.align 64 +sha256_block_data_order_avx2: +.cfi_startproc +.Lavx2_shortcut: + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + subq $544,%rsp + shlq $4,%rdx + andq $-1024,%rsp + leaq (%rsi,%rdx,4),%rdx + addq $448,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %rax,88(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08 +.Lprologue_avx2: + + vzeroupper + subq $-64,%rsi + movl 0(%rdi),%eax + movq %rsi,%r12 + movl 4(%rdi),%ebx + cmpq %rdx,%rsi + movl 8(%rdi),%ecx + cmoveq %rsp,%r12 + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + vmovdqa K256+512+32(%rip),%ymm8 + vmovdqa K256+512+64(%rip),%ymm9 + jmp .Loop_avx2 +.align 16 +.Loop_avx2: + vmovdqa K256+512(%rip),%ymm7 + vmovdqu -64+0(%rsi),%xmm0 + vmovdqu -64+16(%rsi),%xmm1 + vmovdqu -64+32(%rsi),%xmm2 + vmovdqu -64+48(%rsi),%xmm3 + + vinserti128 $1,(%r12),%ymm0,%ymm0 + vinserti128 $1,16(%r12),%ymm1,%ymm1 + vpshufb %ymm7,%ymm0,%ymm0 + vinserti128 $1,32(%r12),%ymm2,%ymm2 + vpshufb %ymm7,%ymm1,%ymm1 + vinserti128 $1,48(%r12),%ymm3,%ymm3 + + leaq K256(%rip),%rbp + vpshufb %ymm7,%ymm2,%ymm2 + vpaddd 0(%rbp),%ymm0,%ymm4 + vpshufb %ymm7,%ymm3,%ymm3 + vpaddd 32(%rbp),%ymm1,%ymm5 + vpaddd 64(%rbp),%ymm2,%ymm6 + vpaddd 96(%rbp),%ymm3,%ymm7 + vmovdqa %ymm4,0(%rsp) + xorl %r14d,%r14d + vmovdqa %ymm5,32(%rsp) + + movq 88(%rsp),%rdi +.cfi_def_cfa %rdi,8 + leaq -64(%rsp),%rsp + + + + movq %rdi,-8(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08 + movl %ebx,%edi + vmovdqa %ymm6,0(%rsp) + xorl %ecx,%edi + vmovdqa %ymm7,32(%rsp) + movl %r9d,%r12d + subq $-32*4,%rbp + jmp .Lavx2_00_47 + +.align 16 +.Lavx2_00_47: + leaq -64(%rsp),%rsp +.cfi_escape 0x0f,0x05,0x77,0x38,0x06,0x23,0x08 + + pushq 64-8(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x00,0x06,0x23,0x08 + leaq 8(%rsp),%rsp +.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08 + vpalignr $4,%ymm0,%ymm1,%ymm4 + addl 0+128(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + vpalignr $4,%ymm2,%ymm3,%ymm7 + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + vpsrld $7,%ymm4,%ymm6 + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + vpaddd %ymm7,%ymm0,%ymm0 + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + vpshufd $250,%ymm3,%ymm7 + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 4+128(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + vpslld $11,%ymm5,%ymm5 + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + vpsrlq $17,%ymm7,%ymm7 + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + vpaddd %ymm4,%ymm0,%ymm0 + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 8+128(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + vpxor %ymm7,%ymm6,%ymm6 + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + vpshufb %ymm8,%ymm6,%ymm6 + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + vpaddd %ymm6,%ymm0,%ymm0 + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + vpshufd $80,%ymm0,%ymm7 + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + vpsrld $10,%ymm7,%ymm6 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + vpsrlq $17,%ymm7,%ymm7 + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 12+128(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + vpxor %ymm7,%ymm6,%ymm6 + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + vpshufb %ymm9,%ymm6,%ymm6 + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + vpaddd %ymm6,%ymm0,%ymm0 + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + vpaddd 0(%rbp),%ymm0,%ymm6 + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + vmovdqa %ymm6,0(%rsp) + vpalignr $4,%ymm1,%ymm2,%ymm4 + addl 32+128(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + vpalignr $4,%ymm3,%ymm0,%ymm7 + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + vpsrld $7,%ymm4,%ymm6 + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + vpaddd %ymm7,%ymm1,%ymm1 + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + vpshufd $250,%ymm0,%ymm7 + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 36+128(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + vpslld $11,%ymm5,%ymm5 + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + vpsrlq $17,%ymm7,%ymm7 + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + vpaddd %ymm4,%ymm1,%ymm1 + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 40+128(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + vpxor %ymm7,%ymm6,%ymm6 + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + vpshufb %ymm8,%ymm6,%ymm6 + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + vpaddd %ymm6,%ymm1,%ymm1 + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + vpshufd $80,%ymm1,%ymm7 + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + vpsrld $10,%ymm7,%ymm6 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + vpsrlq $17,%ymm7,%ymm7 + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 44+128(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + vpxor %ymm7,%ymm6,%ymm6 + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + vpshufb %ymm9,%ymm6,%ymm6 + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + vpaddd %ymm6,%ymm1,%ymm1 + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + vpaddd 32(%rbp),%ymm1,%ymm6 + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovdqa %ymm6,32(%rsp) + leaq -64(%rsp),%rsp +.cfi_escape 0x0f,0x05,0x77,0x38,0x06,0x23,0x08 + + pushq 64-8(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x00,0x06,0x23,0x08 + leaq 8(%rsp),%rsp +.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08 + vpalignr $4,%ymm2,%ymm3,%ymm4 + addl 0+128(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + vpalignr $4,%ymm0,%ymm1,%ymm7 + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + vpsrld $7,%ymm4,%ymm6 + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + vpaddd %ymm7,%ymm2,%ymm2 + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + vpshufd $250,%ymm1,%ymm7 + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 4+128(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + vpslld $11,%ymm5,%ymm5 + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + vpsrlq $17,%ymm7,%ymm7 + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + vpaddd %ymm4,%ymm2,%ymm2 + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 8+128(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + vpxor %ymm7,%ymm6,%ymm6 + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + vpshufb %ymm8,%ymm6,%ymm6 + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + vpaddd %ymm6,%ymm2,%ymm2 + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + vpshufd $80,%ymm2,%ymm7 + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + vpsrld $10,%ymm7,%ymm6 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + vpsrlq $17,%ymm7,%ymm7 + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 12+128(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + vpxor %ymm7,%ymm6,%ymm6 + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + vpshufb %ymm9,%ymm6,%ymm6 + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + vpaddd %ymm6,%ymm2,%ymm2 + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + vpaddd 64(%rbp),%ymm2,%ymm6 + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + vmovdqa %ymm6,0(%rsp) + vpalignr $4,%ymm3,%ymm0,%ymm4 + addl 32+128(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + vpalignr $4,%ymm1,%ymm2,%ymm7 + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + vpsrld $7,%ymm4,%ymm6 + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + vpaddd %ymm7,%ymm3,%ymm3 + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + vpshufd $250,%ymm2,%ymm7 + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 36+128(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + vpslld $11,%ymm5,%ymm5 + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + vpsrlq $17,%ymm7,%ymm7 + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + vpaddd %ymm4,%ymm3,%ymm3 + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 40+128(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + vpxor %ymm7,%ymm6,%ymm6 + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + vpshufb %ymm8,%ymm6,%ymm6 + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + vpaddd %ymm6,%ymm3,%ymm3 + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + vpshufd $80,%ymm3,%ymm7 + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + vpsrld $10,%ymm7,%ymm6 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + vpsrlq $17,%ymm7,%ymm7 + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 44+128(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + vpxor %ymm7,%ymm6,%ymm6 + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + vpshufb %ymm9,%ymm6,%ymm6 + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + vpaddd %ymm6,%ymm3,%ymm3 + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + vpaddd 96(%rbp),%ymm3,%ymm6 + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovdqa %ymm6,32(%rsp) + leaq 128(%rbp),%rbp + cmpb $0,3(%rbp) + jne .Lavx2_00_47 + addl 0+64(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + addl 4+64(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+64(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + addl 12+64(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+64(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + addl 36+64(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+64(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + addl 44+64(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + addl 0(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + addl 4(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + addl 12(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + addl 36(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + addl 44(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + movq 512(%rsp),%rdi + addl %r14d,%eax + + leaq 448(%rsp),%rbp + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + + cmpq 80(%rbp),%rsi + je .Ldone_avx2 + + xorl %r14d,%r14d + movl %ebx,%edi + xorl %ecx,%edi + movl %r9d,%r12d + jmp .Lower_avx2 +.align 16 +.Lower_avx2: + addl 0+16(%rbp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + addl 4+16(%rbp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+16(%rbp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + addl 12+16(%rbp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+16(%rbp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + addl 36+16(%rbp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+16(%rbp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + addl 44+16(%rbp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + leaq -64(%rbp),%rbp + cmpq %rsp,%rbp + jae .Lower_avx2 + + movq 512(%rsp),%rdi + addl %r14d,%eax + + leaq 448(%rsp),%rsp + +.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08 + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + leaq 128(%rsi),%rsi + addl 24(%rdi),%r10d + movq %rsi,%r12 + addl 28(%rdi),%r11d + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + cmoveq %rsp,%r12 + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + + jbe .Loop_avx2 + leaq (%rsp),%rbp + + +.cfi_escape 0x0f,0x06,0x76,0xd8,0x00,0x06,0x23,0x08 + +.Ldone_avx2: + movq 88(%rbp),%rsi +.cfi_def_cfa %rsi,8 + vzeroupper + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_avx2: + .byte 0xf3,0xc3 +.cfi_endproc +.size sha256_block_data_order_avx2,.-sha256_block_data_order_avx2 diff --git a/contrib/openssl-cmake/asm/crypto/sha/sha512-armv8.S b/contrib/openssl-cmake/asm/crypto/sha/sha512-armv8.S new file mode 100644 index 000000000000..29c38b0bb075 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/sha/sha512-armv8.S @@ -0,0 +1,1614 @@ +// Copyright 2014-2025 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the Apache License 2.0 (the "License"). You may not use +// this file except in compliance with the License. You can obtain a copy +// in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html + +// ==================================================================== +// Written by Andy Polyakov for the OpenSSL +// project. The module is, however, dual licensed under OpenSSL and +// CRYPTOGAMS licenses depending on where you obtain it. For further +// details see http://www.openssl.org/~appro/cryptogams/. +// +// Permission to use under GPLv2 terms is granted. +// ==================================================================== +// +// SHA256/512 for ARMv8. +// +// Performance in cycles per processed byte and improvement coefficient +// over code generated with "default" compiler: +// +// SHA256-hw SHA256(*) SHA512 +// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) +// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) +// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) +// Denver 2.01 10.5 (+26%) 6.70 (+8%) +// X-Gene 20.0 (+100%) 12.8 (+300%(***)) +// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) +// Kryo 1.92 17.4 (+30%) 11.2 (+8%) +// ThunderX2 2.54 13.2 (+40%) 8.40 (+18%) +// +// (*) Software SHA256 results are of lesser relevance, presented +// mostly for informational purposes. +// (**) The result is a trade-off: it's possible to improve it by +// 10% (or by 1 cycle per round), but at the cost of 20% loss +// on Cortex-A53 (or by 4 cycles per round). +// (***) Super-impressive coefficients over gcc-generated code are +// indication of some compiler "pathology", most notably code +// generated with -mgeneral-regs-only is significantly faster +// and the gap is only 40-90%. +// +// October 2016. +// +// Originally it was reckoned that it makes no sense to implement NEON +// version of SHA256 for 64-bit processors. This is because performance +// improvement on most wide-spread Cortex-A5x processors was observed +// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was +// observed that 32-bit NEON SHA256 performs significantly better than +// 64-bit scalar version on *some* of the more recent processors. As +// result 64-bit NEON version of SHA256 was added to provide best +// all-round performance. For example it executes ~30% faster on X-Gene +// and Mongoose. [For reference, NEON version of SHA512 is bound to +// deliver much less improvement, likely *negative* on Cortex-A5x. +// Which is why NEON support is limited to SHA256.] + +// $output is the last argument if it looks like a file (it has an extension) +// $flavour is the first argument if it doesn't look like a file +#include "arm_arch.h" +#ifndef __KERNEL__ + +.hidden OPENSSL_armcap_P +#endif + +.text + +.globl sha512_block_data_order +.type sha512_block_data_order,%function +.align 6 +sha512_block_data_order: + AARCH64_VALID_CALL_TARGET +#ifndef __KERNEL__ + adrp x16,OPENSSL_armcap_P + ldr w16,[x16,#:lo12:OPENSSL_armcap_P] + tst w16,#ARMV8_SHA512 + b.ne .Lv8_entry +#endif + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#4*8 + + ldp x20,x21,[x0] // load context + ldp x22,x23,[x0,#2*8] + ldp x24,x25,[x0,#4*8] + add x2,x1,x2,lsl#7 // end of input + ldp x26,x27,[x0,#6*8] + adrp x30,.LK512 + add x30,x30,#:lo12:.LK512 + stp x0,x2,[x29,#96] + +.Loop: + ldp x3,x4,[x1],#2*8 + ldr x19,[x30],#8 // *K++ + eor x28,x21,x22 // magic seed + str x1,[x29,#112] +#ifndef __AARCH64EB__ + rev x3,x3 // 0 +#endif + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + eor x6,x24,x24,ror#23 + and x17,x25,x24 + bic x19,x26,x24 + add x27,x27,x3 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x6,ror#18 // Sigma1(e) + ror x6,x20,#28 + add x27,x27,x17 // h+=Ch(e,f,g) + eor x17,x20,x20,ror#5 + add x27,x27,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x23,x23,x27 // d+=h + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x6,x17,ror#34 // Sigma0(a) + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x27,x27,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x4,x4 // 1 +#endif + ldp x5,x6,[x1],#2*8 + add x27,x27,x17 // h+=Sigma0(a) + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + eor x7,x23,x23,ror#23 + and x17,x24,x23 + bic x28,x25,x23 + add x26,x26,x4 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x7,ror#18 // Sigma1(e) + ror x7,x27,#28 + add x26,x26,x17 // h+=Ch(e,f,g) + eor x17,x27,x27,ror#5 + add x26,x26,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x22,x22,x26 // d+=h + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x7,x17,ror#34 // Sigma0(a) + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x26,x26,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x5,x5 // 2 +#endif + add x26,x26,x17 // h+=Sigma0(a) + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + eor x8,x22,x22,ror#23 + and x17,x23,x22 + bic x19,x24,x22 + add x25,x25,x5 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x8,ror#18 // Sigma1(e) + ror x8,x26,#28 + add x25,x25,x17 // h+=Ch(e,f,g) + eor x17,x26,x26,ror#5 + add x25,x25,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x21,x21,x25 // d+=h + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x8,x17,ror#34 // Sigma0(a) + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x25,x25,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x6,x6 // 3 +#endif + ldp x7,x8,[x1],#2*8 + add x25,x25,x17 // h+=Sigma0(a) + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + eor x9,x21,x21,ror#23 + and x17,x22,x21 + bic x28,x23,x21 + add x24,x24,x6 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x9,ror#18 // Sigma1(e) + ror x9,x25,#28 + add x24,x24,x17 // h+=Ch(e,f,g) + eor x17,x25,x25,ror#5 + add x24,x24,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x20,x20,x24 // d+=h + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x9,x17,ror#34 // Sigma0(a) + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x24,x24,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x7,x7 // 4 +#endif + add x24,x24,x17 // h+=Sigma0(a) + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + eor x10,x20,x20,ror#23 + and x17,x21,x20 + bic x19,x22,x20 + add x23,x23,x7 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x10,ror#18 // Sigma1(e) + ror x10,x24,#28 + add x23,x23,x17 // h+=Ch(e,f,g) + eor x17,x24,x24,ror#5 + add x23,x23,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x27,x27,x23 // d+=h + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x10,x17,ror#34 // Sigma0(a) + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x23,x23,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x8,x8 // 5 +#endif + ldp x9,x10,[x1],#2*8 + add x23,x23,x17 // h+=Sigma0(a) + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + eor x11,x27,x27,ror#23 + and x17,x20,x27 + bic x28,x21,x27 + add x22,x22,x8 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x11,ror#18 // Sigma1(e) + ror x11,x23,#28 + add x22,x22,x17 // h+=Ch(e,f,g) + eor x17,x23,x23,ror#5 + add x22,x22,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x26,x26,x22 // d+=h + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x11,x17,ror#34 // Sigma0(a) + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x22,x22,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x9,x9 // 6 +#endif + add x22,x22,x17 // h+=Sigma0(a) + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + eor x12,x26,x26,ror#23 + and x17,x27,x26 + bic x19,x20,x26 + add x21,x21,x9 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x12,ror#18 // Sigma1(e) + ror x12,x22,#28 + add x21,x21,x17 // h+=Ch(e,f,g) + eor x17,x22,x22,ror#5 + add x21,x21,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x25,x25,x21 // d+=h + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x12,x17,ror#34 // Sigma0(a) + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x21,x21,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x10,x10 // 7 +#endif + ldp x11,x12,[x1],#2*8 + add x21,x21,x17 // h+=Sigma0(a) + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + eor x13,x25,x25,ror#23 + and x17,x26,x25 + bic x28,x27,x25 + add x20,x20,x10 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x13,ror#18 // Sigma1(e) + ror x13,x21,#28 + add x20,x20,x17 // h+=Ch(e,f,g) + eor x17,x21,x21,ror#5 + add x20,x20,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x24,x24,x20 // d+=h + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x13,x17,ror#34 // Sigma0(a) + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x20,x20,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x11,x11 // 8 +#endif + add x20,x20,x17 // h+=Sigma0(a) + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + eor x14,x24,x24,ror#23 + and x17,x25,x24 + bic x19,x26,x24 + add x27,x27,x11 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x14,ror#18 // Sigma1(e) + ror x14,x20,#28 + add x27,x27,x17 // h+=Ch(e,f,g) + eor x17,x20,x20,ror#5 + add x27,x27,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x23,x23,x27 // d+=h + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x14,x17,ror#34 // Sigma0(a) + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x27,x27,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x12,x12 // 9 +#endif + ldp x13,x14,[x1],#2*8 + add x27,x27,x17 // h+=Sigma0(a) + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + eor x15,x23,x23,ror#23 + and x17,x24,x23 + bic x28,x25,x23 + add x26,x26,x12 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x15,ror#18 // Sigma1(e) + ror x15,x27,#28 + add x26,x26,x17 // h+=Ch(e,f,g) + eor x17,x27,x27,ror#5 + add x26,x26,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x22,x22,x26 // d+=h + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x15,x17,ror#34 // Sigma0(a) + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x26,x26,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x13,x13 // 10 +#endif + add x26,x26,x17 // h+=Sigma0(a) + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + eor x0,x22,x22,ror#23 + and x17,x23,x22 + bic x19,x24,x22 + add x25,x25,x13 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x0,ror#18 // Sigma1(e) + ror x0,x26,#28 + add x25,x25,x17 // h+=Ch(e,f,g) + eor x17,x26,x26,ror#5 + add x25,x25,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x21,x21,x25 // d+=h + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x0,x17,ror#34 // Sigma0(a) + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x25,x25,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x14,x14 // 11 +#endif + ldp x15,x0,[x1],#2*8 + add x25,x25,x17 // h+=Sigma0(a) + str x6,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + eor x6,x21,x21,ror#23 + and x17,x22,x21 + bic x28,x23,x21 + add x24,x24,x14 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x6,ror#18 // Sigma1(e) + ror x6,x25,#28 + add x24,x24,x17 // h+=Ch(e,f,g) + eor x17,x25,x25,ror#5 + add x24,x24,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x20,x20,x24 // d+=h + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x6,x17,ror#34 // Sigma0(a) + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x24,x24,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x15,x15 // 12 +#endif + add x24,x24,x17 // h+=Sigma0(a) + str x7,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + eor x7,x20,x20,ror#23 + and x17,x21,x20 + bic x19,x22,x20 + add x23,x23,x15 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x7,ror#18 // Sigma1(e) + ror x7,x24,#28 + add x23,x23,x17 // h+=Ch(e,f,g) + eor x17,x24,x24,ror#5 + add x23,x23,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x27,x27,x23 // d+=h + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x7,x17,ror#34 // Sigma0(a) + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x23,x23,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x0,x0 // 13 +#endif + ldp x1,x2,[x1] + add x23,x23,x17 // h+=Sigma0(a) + str x8,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + eor x8,x27,x27,ror#23 + and x17,x20,x27 + bic x28,x21,x27 + add x22,x22,x0 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x8,ror#18 // Sigma1(e) + ror x8,x23,#28 + add x22,x22,x17 // h+=Ch(e,f,g) + eor x17,x23,x23,ror#5 + add x22,x22,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x26,x26,x22 // d+=h + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x8,x17,ror#34 // Sigma0(a) + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x22,x22,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x1,x1 // 14 +#endif + ldr x6,[sp,#24] + add x22,x22,x17 // h+=Sigma0(a) + str x9,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + eor x9,x26,x26,ror#23 + and x17,x27,x26 + bic x19,x20,x26 + add x21,x21,x1 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x9,ror#18 // Sigma1(e) + ror x9,x22,#28 + add x21,x21,x17 // h+=Ch(e,f,g) + eor x17,x22,x22,ror#5 + add x21,x21,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x25,x25,x21 // d+=h + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x9,x17,ror#34 // Sigma0(a) + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x21,x21,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x2,x2 // 15 +#endif + ldr x7,[sp,#0] + add x21,x21,x17 // h+=Sigma0(a) + str x10,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x9,x4,#1 + and x17,x26,x25 + ror x8,x1,#19 + bic x28,x27,x25 + ror x10,x21,#28 + add x20,x20,x2 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x9,x9,x4,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x10,x10,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x8,x8,x1,ror#61 + eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x10,x21,ror#39 // Sigma0(a) + eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) + add x3,x3,x12 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x3,x3,x9 + add x20,x20,x17 // h+=Sigma0(a) + add x3,x3,x8 +.Loop_16_xx: + ldr x8,[sp,#8] + str x11,[sp,#0] + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + ror x10,x5,#1 + and x17,x25,x24 + ror x9,x2,#19 + bic x19,x26,x24 + ror x11,x20,#28 + add x27,x27,x3 // h+=X[i] + eor x16,x16,x24,ror#18 + eor x10,x10,x5,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x24,ror#41 // Sigma1(e) + eor x11,x11,x20,ror#34 + add x27,x27,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x9,x9,x2,ror#61 + eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) + add x27,x27,x16 // h+=Sigma1(e) + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x11,x20,ror#39 // Sigma0(a) + eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) + add x4,x4,x13 + add x23,x23,x27 // d+=h + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x4,x4,x10 + add x27,x27,x17 // h+=Sigma0(a) + add x4,x4,x9 + ldr x9,[sp,#16] + str x12,[sp,#8] + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + ror x11,x6,#1 + and x17,x24,x23 + ror x10,x3,#19 + bic x28,x25,x23 + ror x12,x27,#28 + add x26,x26,x4 // h+=X[i] + eor x16,x16,x23,ror#18 + eor x11,x11,x6,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x23,ror#41 // Sigma1(e) + eor x12,x12,x27,ror#34 + add x26,x26,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x10,x10,x3,ror#61 + eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) + add x26,x26,x16 // h+=Sigma1(e) + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x12,x27,ror#39 // Sigma0(a) + eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) + add x5,x5,x14 + add x22,x22,x26 // d+=h + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x5,x5,x11 + add x26,x26,x17 // h+=Sigma0(a) + add x5,x5,x10 + ldr x10,[sp,#24] + str x13,[sp,#16] + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + ror x12,x7,#1 + and x17,x23,x22 + ror x11,x4,#19 + bic x19,x24,x22 + ror x13,x26,#28 + add x25,x25,x5 // h+=X[i] + eor x16,x16,x22,ror#18 + eor x12,x12,x7,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x22,ror#41 // Sigma1(e) + eor x13,x13,x26,ror#34 + add x25,x25,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x11,x11,x4,ror#61 + eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) + add x25,x25,x16 // h+=Sigma1(e) + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x13,x26,ror#39 // Sigma0(a) + eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) + add x6,x6,x15 + add x21,x21,x25 // d+=h + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x6,x6,x12 + add x25,x25,x17 // h+=Sigma0(a) + add x6,x6,x11 + ldr x11,[sp,#0] + str x14,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + ror x13,x8,#1 + and x17,x22,x21 + ror x12,x5,#19 + bic x28,x23,x21 + ror x14,x25,#28 + add x24,x24,x6 // h+=X[i] + eor x16,x16,x21,ror#18 + eor x13,x13,x8,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x21,ror#41 // Sigma1(e) + eor x14,x14,x25,ror#34 + add x24,x24,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x12,x12,x5,ror#61 + eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) + add x24,x24,x16 // h+=Sigma1(e) + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x14,x25,ror#39 // Sigma0(a) + eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) + add x7,x7,x0 + add x20,x20,x24 // d+=h + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x7,x7,x13 + add x24,x24,x17 // h+=Sigma0(a) + add x7,x7,x12 + ldr x12,[sp,#8] + str x15,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + ror x14,x9,#1 + and x17,x21,x20 + ror x13,x6,#19 + bic x19,x22,x20 + ror x15,x24,#28 + add x23,x23,x7 // h+=X[i] + eor x16,x16,x20,ror#18 + eor x14,x14,x9,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x20,ror#41 // Sigma1(e) + eor x15,x15,x24,ror#34 + add x23,x23,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x13,x13,x6,ror#61 + eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) + add x23,x23,x16 // h+=Sigma1(e) + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x15,x24,ror#39 // Sigma0(a) + eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) + add x8,x8,x1 + add x27,x27,x23 // d+=h + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x8,x8,x14 + add x23,x23,x17 // h+=Sigma0(a) + add x8,x8,x13 + ldr x13,[sp,#16] + str x0,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + ror x15,x10,#1 + and x17,x20,x27 + ror x14,x7,#19 + bic x28,x21,x27 + ror x0,x23,#28 + add x22,x22,x8 // h+=X[i] + eor x16,x16,x27,ror#18 + eor x15,x15,x10,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x27,ror#41 // Sigma1(e) + eor x0,x0,x23,ror#34 + add x22,x22,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x14,x14,x7,ror#61 + eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) + add x22,x22,x16 // h+=Sigma1(e) + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x0,x23,ror#39 // Sigma0(a) + eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) + add x9,x9,x2 + add x26,x26,x22 // d+=h + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x9,x9,x15 + add x22,x22,x17 // h+=Sigma0(a) + add x9,x9,x14 + ldr x14,[sp,#24] + str x1,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + ror x0,x11,#1 + and x17,x27,x26 + ror x15,x8,#19 + bic x19,x20,x26 + ror x1,x22,#28 + add x21,x21,x9 // h+=X[i] + eor x16,x16,x26,ror#18 + eor x0,x0,x11,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x26,ror#41 // Sigma1(e) + eor x1,x1,x22,ror#34 + add x21,x21,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x15,x15,x8,ror#61 + eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) + add x21,x21,x16 // h+=Sigma1(e) + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x1,x22,ror#39 // Sigma0(a) + eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) + add x10,x10,x3 + add x25,x25,x21 // d+=h + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x10,x10,x0 + add x21,x21,x17 // h+=Sigma0(a) + add x10,x10,x15 + ldr x15,[sp,#0] + str x2,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x1,x12,#1 + and x17,x26,x25 + ror x0,x9,#19 + bic x28,x27,x25 + ror x2,x21,#28 + add x20,x20,x10 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x1,x1,x12,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x2,x2,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x0,x0,x9,ror#61 + eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x2,x21,ror#39 // Sigma0(a) + eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) + add x11,x11,x4 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x11,x11,x1 + add x20,x20,x17 // h+=Sigma0(a) + add x11,x11,x0 + ldr x0,[sp,#8] + str x3,[sp,#0] + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + ror x2,x13,#1 + and x17,x25,x24 + ror x1,x10,#19 + bic x19,x26,x24 + ror x3,x20,#28 + add x27,x27,x11 // h+=X[i] + eor x16,x16,x24,ror#18 + eor x2,x2,x13,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x24,ror#41 // Sigma1(e) + eor x3,x3,x20,ror#34 + add x27,x27,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x1,x1,x10,ror#61 + eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) + add x27,x27,x16 // h+=Sigma1(e) + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x3,x20,ror#39 // Sigma0(a) + eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) + add x12,x12,x5 + add x23,x23,x27 // d+=h + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x12,x12,x2 + add x27,x27,x17 // h+=Sigma0(a) + add x12,x12,x1 + ldr x1,[sp,#16] + str x4,[sp,#8] + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + ror x3,x14,#1 + and x17,x24,x23 + ror x2,x11,#19 + bic x28,x25,x23 + ror x4,x27,#28 + add x26,x26,x12 // h+=X[i] + eor x16,x16,x23,ror#18 + eor x3,x3,x14,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x23,ror#41 // Sigma1(e) + eor x4,x4,x27,ror#34 + add x26,x26,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x2,x2,x11,ror#61 + eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) + add x26,x26,x16 // h+=Sigma1(e) + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x4,x27,ror#39 // Sigma0(a) + eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) + add x13,x13,x6 + add x22,x22,x26 // d+=h + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x13,x13,x3 + add x26,x26,x17 // h+=Sigma0(a) + add x13,x13,x2 + ldr x2,[sp,#24] + str x5,[sp,#16] + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + ror x4,x15,#1 + and x17,x23,x22 + ror x3,x12,#19 + bic x19,x24,x22 + ror x5,x26,#28 + add x25,x25,x13 // h+=X[i] + eor x16,x16,x22,ror#18 + eor x4,x4,x15,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x22,ror#41 // Sigma1(e) + eor x5,x5,x26,ror#34 + add x25,x25,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x3,x3,x12,ror#61 + eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) + add x25,x25,x16 // h+=Sigma1(e) + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x5,x26,ror#39 // Sigma0(a) + eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) + add x14,x14,x7 + add x21,x21,x25 // d+=h + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x14,x14,x4 + add x25,x25,x17 // h+=Sigma0(a) + add x14,x14,x3 + ldr x3,[sp,#0] + str x6,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + ror x5,x0,#1 + and x17,x22,x21 + ror x4,x13,#19 + bic x28,x23,x21 + ror x6,x25,#28 + add x24,x24,x14 // h+=X[i] + eor x16,x16,x21,ror#18 + eor x5,x5,x0,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x21,ror#41 // Sigma1(e) + eor x6,x6,x25,ror#34 + add x24,x24,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x4,x4,x13,ror#61 + eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) + add x24,x24,x16 // h+=Sigma1(e) + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x6,x25,ror#39 // Sigma0(a) + eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) + add x15,x15,x8 + add x20,x20,x24 // d+=h + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x15,x15,x5 + add x24,x24,x17 // h+=Sigma0(a) + add x15,x15,x4 + ldr x4,[sp,#8] + str x7,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + ror x6,x1,#1 + and x17,x21,x20 + ror x5,x14,#19 + bic x19,x22,x20 + ror x7,x24,#28 + add x23,x23,x15 // h+=X[i] + eor x16,x16,x20,ror#18 + eor x6,x6,x1,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x20,ror#41 // Sigma1(e) + eor x7,x7,x24,ror#34 + add x23,x23,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x5,x5,x14,ror#61 + eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) + add x23,x23,x16 // h+=Sigma1(e) + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x7,x24,ror#39 // Sigma0(a) + eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) + add x0,x0,x9 + add x27,x27,x23 // d+=h + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x0,x0,x6 + add x23,x23,x17 // h+=Sigma0(a) + add x0,x0,x5 + ldr x5,[sp,#16] + str x8,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + ror x7,x2,#1 + and x17,x20,x27 + ror x6,x15,#19 + bic x28,x21,x27 + ror x8,x23,#28 + add x22,x22,x0 // h+=X[i] + eor x16,x16,x27,ror#18 + eor x7,x7,x2,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x27,ror#41 // Sigma1(e) + eor x8,x8,x23,ror#34 + add x22,x22,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x6,x6,x15,ror#61 + eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) + add x22,x22,x16 // h+=Sigma1(e) + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x8,x23,ror#39 // Sigma0(a) + eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) + add x1,x1,x10 + add x26,x26,x22 // d+=h + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x1,x1,x7 + add x22,x22,x17 // h+=Sigma0(a) + add x1,x1,x6 + ldr x6,[sp,#24] + str x9,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + ror x8,x3,#1 + and x17,x27,x26 + ror x7,x0,#19 + bic x19,x20,x26 + ror x9,x22,#28 + add x21,x21,x1 // h+=X[i] + eor x16,x16,x26,ror#18 + eor x8,x8,x3,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x26,ror#41 // Sigma1(e) + eor x9,x9,x22,ror#34 + add x21,x21,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x7,x7,x0,ror#61 + eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) + add x21,x21,x16 // h+=Sigma1(e) + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x9,x22,ror#39 // Sigma0(a) + eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) + add x2,x2,x11 + add x25,x25,x21 // d+=h + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x2,x2,x8 + add x21,x21,x17 // h+=Sigma0(a) + add x2,x2,x7 + ldr x7,[sp,#0] + str x10,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x9,x4,#1 + and x17,x26,x25 + ror x8,x1,#19 + bic x28,x27,x25 + ror x10,x21,#28 + add x20,x20,x2 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x9,x9,x4,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x10,x10,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x8,x8,x1,ror#61 + eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x10,x21,ror#39 // Sigma0(a) + eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) + add x3,x3,x12 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x3,x3,x9 + add x20,x20,x17 // h+=Sigma0(a) + add x3,x3,x8 + cbnz x19,.Loop_16_xx + + ldp x0,x2,[x29,#96] + ldr x1,[x29,#112] + sub x30,x30,#648 // rewind + + ldp x3,x4,[x0] + ldp x5,x6,[x0,#2*8] + add x1,x1,#14*8 // advance input pointer + ldp x7,x8,[x0,#4*8] + add x20,x20,x3 + ldp x9,x10,[x0,#6*8] + add x21,x21,x4 + add x22,x22,x5 + add x23,x23,x6 + stp x20,x21,[x0] + add x24,x24,x7 + add x25,x25,x8 + stp x22,x23,[x0,#2*8] + add x26,x26,x9 + add x27,x27,x10 + cmp x1,x2 + stp x24,x25,[x0,#4*8] + stp x26,x27,[x0,#6*8] + b.ne .Loop + + ldp x19,x20,[x29,#16] + add sp,sp,#4*8 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size sha512_block_data_order,.-sha512_block_data_order + +.section .rodata + +.align 6 +.type .LK512,%object +.LK512: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +.quad 0 // terminator +.size .LK512,.-.LK512 +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 + +.text +#ifndef __KERNEL__ +.type sha512_block_armv8,%function +.align 6 +sha512_block_armv8: +.Lv8_entry: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 + + ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context + adrp x3,.LK512 + add x3,x3,#:lo12:.LK512 + + rev64 v16.16b,v16.16b + rev64 v17.16b,v17.16b + rev64 v18.16b,v18.16b + rev64 v19.16b,v19.16b + rev64 v20.16b,v20.16b + rev64 v21.16b,v21.16b + rev64 v22.16b,v22.16b + rev64 v23.16b,v23.16b + b .Loop_hw + +.align 4 +.Loop_hw: + ld1 {v24.2d},[x3],#16 + subs x2,x2,#1 + sub x4,x1,#128 + orr v26.16b,v0.16b,v0.16b // offload + orr v27.16b,v1.16b,v1.16b + orr v28.16b,v2.16b,v2.16b + orr v29.16b,v3.16b,v3.16b + csel x1,x1,x4,ne // conditional rewind + add v24.2d,v24.2d,v16.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec08230 //sha512su0 v16.16b,v17.16b + ext v7.16b,v20.16b,v21.16b,#8 +.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v25.2d,v25.2d,v17.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec08251 //sha512su0 v17.16b,v18.16b + ext v7.16b,v21.16b,v22.16b,#8 +.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v24.2d,v24.2d,v18.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec08272 //sha512su0 v18.16b,v19.16b + ext v7.16b,v22.16b,v23.16b,#8 +.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b +.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v25.2d,v25.2d,v19.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec08293 //sha512su0 v19.16b,v20.16b + ext v7.16b,v23.16b,v16.16b,#8 +.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b +.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v24.2d,v24.2d,v20.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b + ext v7.16b,v16.16b,v17.16b,#8 +.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b +.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v25.2d,v25.2d,v21.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b + ext v7.16b,v17.16b,v18.16b,#8 +.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v24.2d,v24.2d,v22.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b + ext v7.16b,v18.16b,v19.16b,#8 +.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v25.2d,v25.2d,v23.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec08217 //sha512su0 v23.16b,v16.16b + ext v7.16b,v19.16b,v20.16b,#8 +.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b +.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v24.2d,v24.2d,v16.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec08230 //sha512su0 v16.16b,v17.16b + ext v7.16b,v20.16b,v21.16b,#8 +.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b +.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v25.2d,v25.2d,v17.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec08251 //sha512su0 v17.16b,v18.16b + ext v7.16b,v21.16b,v22.16b,#8 +.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b +.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v24.2d,v24.2d,v18.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec08272 //sha512su0 v18.16b,v19.16b + ext v7.16b,v22.16b,v23.16b,#8 +.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v25.2d,v25.2d,v19.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec08293 //sha512su0 v19.16b,v20.16b + ext v7.16b,v23.16b,v16.16b,#8 +.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v24.2d,v24.2d,v20.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b + ext v7.16b,v16.16b,v17.16b,#8 +.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b +.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v25.2d,v25.2d,v21.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b + ext v7.16b,v17.16b,v18.16b,#8 +.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b +.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v24.2d,v24.2d,v22.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b + ext v7.16b,v18.16b,v19.16b,#8 +.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b +.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v25.2d,v25.2d,v23.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec08217 //sha512su0 v23.16b,v16.16b + ext v7.16b,v19.16b,v20.16b,#8 +.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v24.2d,v24.2d,v16.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec08230 //sha512su0 v16.16b,v17.16b + ext v7.16b,v20.16b,v21.16b,#8 +.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v25.2d,v25.2d,v17.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec08251 //sha512su0 v17.16b,v18.16b + ext v7.16b,v21.16b,v22.16b,#8 +.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b +.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v24.2d,v24.2d,v18.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec08272 //sha512su0 v18.16b,v19.16b + ext v7.16b,v22.16b,v23.16b,#8 +.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b +.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v25.2d,v25.2d,v19.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec08293 //sha512su0 v19.16b,v20.16b + ext v7.16b,v23.16b,v16.16b,#8 +.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b +.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v24.2d,v24.2d,v20.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b + ext v7.16b,v16.16b,v17.16b,#8 +.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v25.2d,v25.2d,v21.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b + ext v7.16b,v17.16b,v18.16b,#8 +.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v24.2d,v24.2d,v22.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b + ext v7.16b,v18.16b,v19.16b,#8 +.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b +.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v25.2d,v25.2d,v23.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec08217 //sha512su0 v23.16b,v16.16b + ext v7.16b,v19.16b,v20.16b,#8 +.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b +.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v24.2d,v24.2d,v16.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec08230 //sha512su0 v16.16b,v17.16b + ext v7.16b,v20.16b,v21.16b,#8 +.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b +.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v25.2d,v25.2d,v17.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec08251 //sha512su0 v17.16b,v18.16b + ext v7.16b,v21.16b,v22.16b,#8 +.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v24.2d,v24.2d,v18.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec08272 //sha512su0 v18.16b,v19.16b + ext v7.16b,v22.16b,v23.16b,#8 +.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v25.2d,v25.2d,v19.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec08293 //sha512su0 v19.16b,v20.16b + ext v7.16b,v23.16b,v16.16b,#8 +.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b +.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v24.2d,v24.2d,v20.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b + ext v7.16b,v16.16b,v17.16b,#8 +.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b +.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v25.2d,v25.2d,v21.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b + ext v7.16b,v17.16b,v18.16b,#8 +.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b +.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v24.2d,v24.2d,v22.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b + ext v7.16b,v18.16b,v19.16b,#8 +.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b +.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v25.2d,v25.2d,v23.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xcec08217 //sha512su0 v23.16b,v16.16b + ext v7.16b,v19.16b,v20.16b,#8 +.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b +.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + ld1 {v25.2d},[x3],#16 + add v24.2d,v24.2d,v16.2d + ld1 {v16.16b},[x1],#16 // load next input + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b + rev64 v16.16b,v16.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + ld1 {v24.2d},[x3],#16 + add v25.2d,v25.2d,v17.2d + ld1 {v17.16b},[x1],#16 // load next input + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b + rev64 v17.16b,v17.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + ld1 {v25.2d},[x3],#16 + add v24.2d,v24.2d,v18.2d + ld1 {v18.16b},[x1],#16 // load next input + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b + rev64 v18.16b,v18.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + ld1 {v24.2d},[x3],#16 + add v25.2d,v25.2d,v19.2d + ld1 {v19.16b},[x1],#16 // load next input + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b + rev64 v19.16b,v19.16b + add v4.2d,v1.2d,v3.2d // "D + T1" +.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + ld1 {v25.2d},[x3],#16 + add v24.2d,v24.2d,v20.2d + ld1 {v20.16b},[x1],#16 // load next input + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b + rev64 v20.16b,v20.16b + add v1.2d,v0.2d,v2.2d // "D + T1" +.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + ld1 {v24.2d},[x3],#16 + add v25.2d,v25.2d,v21.2d + ld1 {v21.16b},[x1],#16 // load next input + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b + rev64 v21.16b,v21.16b + add v0.2d,v3.2d,v4.2d // "D + T1" +.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + ld1 {v25.2d},[x3],#16 + add v24.2d,v24.2d,v22.2d + ld1 {v22.16b},[x1],#16 // load next input + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" +.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b + rev64 v22.16b,v22.16b + add v3.2d,v2.2d,v1.2d // "D + T1" +.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + sub x3,x3,#80*8 // rewind + add v25.2d,v25.2d,v23.2d + ld1 {v23.16b},[x1],#16 // load next input + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" +.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b + rev64 v23.16b,v23.16b + add v2.2d,v4.2d,v0.2d // "D + T1" +.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v0.2d,v0.2d,v26.2d // accumulate + add v1.2d,v1.2d,v27.2d + add v2.2d,v2.2d,v28.2d + add v3.2d,v3.2d,v29.2d + + cbnz x2,.Loop_hw + + st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context + + ldr x29,[sp],#16 + ret +.size sha512_block_armv8,.-sha512_block_armv8 +#endif diff --git a/contrib/openssl-cmake/asm/crypto/sha/sha512-x86_64.s b/contrib/openssl-cmake/asm/crypto/sha/sha512-x86_64.s new file mode 100644 index 000000000000..3a3809ea2f93 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/sha/sha512-x86_64.s @@ -0,0 +1,5463 @@ +.text + + +.globl sha512_block_data_order +.type sha512_block_data_order,@function +.align 16 +sha512_block_data_order: +.cfi_startproc + leaq OPENSSL_ia32cap_P(%rip),%r11 + movl 0(%r11),%r9d + movl 4(%r11),%r10d + movl 8(%r11),%r11d + testl $2048,%r10d + jnz .Lxop_shortcut + andl $296,%r11d + cmpl $296,%r11d + je .Lavx2_shortcut + andl $1073741824,%r9d + andl $268435968,%r10d + orl %r9d,%r10d + cmpl $1342177792,%r10d + je .Lavx_shortcut + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + shlq $4,%rdx + subq $128+32,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %rax,152(%rsp) +.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08 +.Lprologue: + + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp .Lloop + +.align 16 +.Lloop: + movq %rbx,%rdi + leaq K512(%rip),%rbp + xorq %rcx,%rdi + movq 0(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,0(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + addq %r14,%r11 + movq 8(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,8(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + addq %r14,%r10 + movq 16(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,16(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + addq %r14,%r9 + movq 24(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,24(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + addq %r14,%r8 + movq 32(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,32(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + addq %r14,%rdx + movq 40(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,40(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + addq %r14,%rcx + movq 48(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,48(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + addq %r14,%rbx + movq 56(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,56(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + addq %r14,%rax + movq 64(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,64(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + addq %r14,%r11 + movq 72(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,72(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + addq %r14,%r10 + movq 80(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,80(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + addq %r14,%r9 + movq 88(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,88(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + addq %r14,%r8 + movq 96(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,96(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + addq %r14,%rdx + movq 104(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,104(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + addq %r14,%rcx + movq 112(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,112(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + addq %r14,%rbx + movq 120(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,120(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + jmp .Lrounds_16_xx +.align 16 +.Lrounds_16_xx: + movq 8(%rsp),%r13 + movq 112(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rax + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 72(%rsp),%r12 + + addq 0(%rsp),%r12 + movq %r8,%r13 + addq %r15,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,0(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + movq 16(%rsp),%r13 + movq 120(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r11 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 80(%rsp),%r12 + + addq 8(%rsp),%r12 + movq %rdx,%r13 + addq %rdi,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,8(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + movq 24(%rsp),%r13 + movq 0(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r10 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 88(%rsp),%r12 + + addq 16(%rsp),%r12 + movq %rcx,%r13 + addq %r15,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,16(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + movq 32(%rsp),%r13 + movq 8(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r9 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 96(%rsp),%r12 + + addq 24(%rsp),%r12 + movq %rbx,%r13 + addq %rdi,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,24(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + movq 40(%rsp),%r13 + movq 16(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r8 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 104(%rsp),%r12 + + addq 32(%rsp),%r12 + movq %rax,%r13 + addq %r15,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,32(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + movq 48(%rsp),%r13 + movq 24(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rdx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 112(%rsp),%r12 + + addq 40(%rsp),%r12 + movq %r11,%r13 + addq %rdi,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,40(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + movq 56(%rsp),%r13 + movq 32(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rcx + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 120(%rsp),%r12 + + addq 48(%rsp),%r12 + movq %r10,%r13 + addq %r15,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,48(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + movq 64(%rsp),%r13 + movq 40(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rbx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 0(%rsp),%r12 + + addq 56(%rsp),%r12 + movq %r9,%r13 + addq %rdi,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,56(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + movq 72(%rsp),%r13 + movq 48(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rax + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 8(%rsp),%r12 + + addq 64(%rsp),%r12 + movq %r8,%r13 + addq %r15,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,64(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + movq 80(%rsp),%r13 + movq 56(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r11 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 16(%rsp),%r12 + + addq 72(%rsp),%r12 + movq %rdx,%r13 + addq %rdi,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,72(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + movq 88(%rsp),%r13 + movq 64(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r10 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 24(%rsp),%r12 + + addq 80(%rsp),%r12 + movq %rcx,%r13 + addq %r15,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,80(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + movq 96(%rsp),%r13 + movq 72(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r9 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 32(%rsp),%r12 + + addq 88(%rsp),%r12 + movq %rbx,%r13 + addq %rdi,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,88(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + movq 104(%rsp),%r13 + movq 80(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r8 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 40(%rsp),%r12 + + addq 96(%rsp),%r12 + movq %rax,%r13 + addq %r15,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,96(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + movq 112(%rsp),%r13 + movq 88(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rdx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 48(%rsp),%r12 + + addq 104(%rsp),%r12 + movq %r11,%r13 + addq %rdi,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,104(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + movq 120(%rsp),%r13 + movq 96(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rcx + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 56(%rsp),%r12 + + addq 112(%rsp),%r12 + movq %r10,%r13 + addq %r15,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,112(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + movq 0(%rsp),%r13 + movq 104(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rbx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 64(%rsp),%r12 + + addq 120(%rsp),%r12 + movq %r9,%r13 + addq %rdi,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,120(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + cmpb $0,7(%rbp) + jnz .Lrounds_16_xx + + movq 128+0(%rsp),%rdi + addq %r14,%rax + leaq 128(%rsi),%rsi + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb .Lloop + + movq 152(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size sha512_block_data_order,.-sha512_block_data_order +.section .rodata +.align 64 +.type K512,@object +K512: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + +.quad 0x0001020304050607,0x08090a0b0c0d0e0f +.quad 0x0001020304050607,0x08090a0b0c0d0e0f +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.previous +.type sha512_block_data_order_xop,@function +.align 64 +sha512_block_data_order_xop: +.cfi_startproc +.Lxop_shortcut: + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + shlq $4,%rdx + subq $160,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %rax,152(%rsp) +.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08 +.Lprologue_xop: + + vzeroupper + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp .Lloop_xop +.align 16 +.Lloop_xop: + vmovdqa K512+1280(%rip),%xmm11 + vmovdqu 0(%rsi),%xmm0 + leaq K512+128(%rip),%rbp + vmovdqu 16(%rsi),%xmm1 + vmovdqu 32(%rsi),%xmm2 + vpshufb %xmm11,%xmm0,%xmm0 + vmovdqu 48(%rsi),%xmm3 + vpshufb %xmm11,%xmm1,%xmm1 + vmovdqu 64(%rsi),%xmm4 + vpshufb %xmm11,%xmm2,%xmm2 + vmovdqu 80(%rsi),%xmm5 + vpshufb %xmm11,%xmm3,%xmm3 + vmovdqu 96(%rsi),%xmm6 + vpshufb %xmm11,%xmm4,%xmm4 + vmovdqu 112(%rsi),%xmm7 + vpshufb %xmm11,%xmm5,%xmm5 + vpaddq -128(%rbp),%xmm0,%xmm8 + vpshufb %xmm11,%xmm6,%xmm6 + vpaddq -96(%rbp),%xmm1,%xmm9 + vpshufb %xmm11,%xmm7,%xmm7 + vpaddq -64(%rbp),%xmm2,%xmm10 + vpaddq -32(%rbp),%xmm3,%xmm11 + vmovdqa %xmm8,0(%rsp) + vpaddq 0(%rbp),%xmm4,%xmm8 + vmovdqa %xmm9,16(%rsp) + vpaddq 32(%rbp),%xmm5,%xmm9 + vmovdqa %xmm10,32(%rsp) + vpaddq 64(%rbp),%xmm6,%xmm10 + vmovdqa %xmm11,48(%rsp) + vpaddq 96(%rbp),%xmm7,%xmm11 + vmovdqa %xmm8,64(%rsp) + movq %rax,%r14 + vmovdqa %xmm9,80(%rsp) + movq %rbx,%rdi + vmovdqa %xmm10,96(%rsp) + xorq %rcx,%rdi + vmovdqa %xmm11,112(%rsp) + movq %r8,%r13 + jmp .Lxop_00_47 + +.align 16 +.Lxop_00_47: + addq $256,%rbp + vpalignr $8,%xmm0,%xmm1,%xmm8 + rorq $23,%r13 + movq %r14,%rax + vpalignr $8,%xmm4,%xmm5,%xmm11 + movq %r9,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %r8,%r13 + xorq %r10,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %rax,%r14 + vpaddq %xmm11,%xmm0,%xmm0 + andq %r8,%r12 + xorq %r8,%r13 + addq 0(%rsp),%r11 + movq %rax,%r15 +.byte 143,72,120,195,209,7 + xorq %r10,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %rbx,%r15 + addq %r12,%r11 + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,223,3 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm7,%xmm10 + addq %r11,%rdx + addq %rdi,%r11 + vpaddq %xmm8,%xmm0,%xmm0 + movq %rdx,%r13 + addq %r11,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%r11 + vpxor %xmm10,%xmm11,%xmm11 + movq %r8,%r12 + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + vpaddq %xmm11,%xmm0,%xmm0 + addq 8(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + rorq $6,%r14 + vpaddq -128(%rbp),%xmm0,%xmm10 + xorq %rax,%rdi + addq %r12,%r10 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + rorq $28,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,0(%rsp) + vpalignr $8,%xmm1,%xmm2,%xmm8 + rorq $23,%r13 + movq %r14,%r10 + vpalignr $8,%xmm5,%xmm6,%xmm11 + movq %rdx,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %rcx,%r13 + xorq %r8,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %r10,%r14 + vpaddq %xmm11,%xmm1,%xmm1 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 16(%rsp),%r9 + movq %r10,%r15 +.byte 143,72,120,195,209,7 + xorq %r8,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %r11,%r15 + addq %r12,%r9 + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,216,3 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm0,%xmm10 + addq %r9,%rbx + addq %rdi,%r9 + vpaddq %xmm8,%xmm1,%xmm1 + movq %rbx,%r13 + addq %r9,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%r9 + vpxor %xmm10,%xmm11,%xmm11 + movq %rcx,%r12 + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + vpaddq %xmm11,%xmm1,%xmm1 + addq 24(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + rorq $6,%r14 + vpaddq -96(%rbp),%xmm1,%xmm10 + xorq %r10,%rdi + addq %r12,%r8 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + rorq $28,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,16(%rsp) + vpalignr $8,%xmm2,%xmm3,%xmm8 + rorq $23,%r13 + movq %r14,%r8 + vpalignr $8,%xmm6,%xmm7,%xmm11 + movq %rbx,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %rax,%r13 + xorq %rcx,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %r8,%r14 + vpaddq %xmm11,%xmm2,%xmm2 + andq %rax,%r12 + xorq %rax,%r13 + addq 32(%rsp),%rdx + movq %r8,%r15 +.byte 143,72,120,195,209,7 + xorq %rcx,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %r9,%r15 + addq %r12,%rdx + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,217,3 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm1,%xmm10 + addq %rdx,%r11 + addq %rdi,%rdx + vpaddq %xmm8,%xmm2,%xmm2 + movq %r11,%r13 + addq %rdx,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%rdx + vpxor %xmm10,%xmm11,%xmm11 + movq %rax,%r12 + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + vpaddq %xmm11,%xmm2,%xmm2 + addq 40(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + rorq $6,%r14 + vpaddq -64(%rbp),%xmm2,%xmm10 + xorq %r8,%rdi + addq %r12,%rcx + rorq $14,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + rorq $28,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,32(%rsp) + vpalignr $8,%xmm3,%xmm4,%xmm8 + rorq $23,%r13 + movq %r14,%rcx + vpalignr $8,%xmm7,%xmm0,%xmm11 + movq %r11,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %r10,%r13 + xorq %rax,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %rcx,%r14 + vpaddq %xmm11,%xmm3,%xmm3 + andq %r10,%r12 + xorq %r10,%r13 + addq 48(%rsp),%rbx + movq %rcx,%r15 +.byte 143,72,120,195,209,7 + xorq %rax,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %rdx,%r15 + addq %r12,%rbx + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,218,3 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm2,%xmm10 + addq %rbx,%r9 + addq %rdi,%rbx + vpaddq %xmm8,%xmm3,%xmm3 + movq %r9,%r13 + addq %rbx,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%rbx + vpxor %xmm10,%xmm11,%xmm11 + movq %r10,%r12 + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + vpaddq %xmm11,%xmm3,%xmm3 + addq 56(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + rorq $6,%r14 + vpaddq -32(%rbp),%xmm3,%xmm10 + xorq %rcx,%rdi + addq %r12,%rax + rorq $14,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + rorq $28,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,48(%rsp) + vpalignr $8,%xmm4,%xmm5,%xmm8 + rorq $23,%r13 + movq %r14,%rax + vpalignr $8,%xmm0,%xmm1,%xmm11 + movq %r9,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %r8,%r13 + xorq %r10,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %rax,%r14 + vpaddq %xmm11,%xmm4,%xmm4 + andq %r8,%r12 + xorq %r8,%r13 + addq 64(%rsp),%r11 + movq %rax,%r15 +.byte 143,72,120,195,209,7 + xorq %r10,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %rbx,%r15 + addq %r12,%r11 + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,219,3 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm3,%xmm10 + addq %r11,%rdx + addq %rdi,%r11 + vpaddq %xmm8,%xmm4,%xmm4 + movq %rdx,%r13 + addq %r11,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%r11 + vpxor %xmm10,%xmm11,%xmm11 + movq %r8,%r12 + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + vpaddq %xmm11,%xmm4,%xmm4 + addq 72(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + rorq $6,%r14 + vpaddq 0(%rbp),%xmm4,%xmm10 + xorq %rax,%rdi + addq %r12,%r10 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + rorq $28,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,64(%rsp) + vpalignr $8,%xmm5,%xmm6,%xmm8 + rorq $23,%r13 + movq %r14,%r10 + vpalignr $8,%xmm1,%xmm2,%xmm11 + movq %rdx,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %rcx,%r13 + xorq %r8,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %r10,%r14 + vpaddq %xmm11,%xmm5,%xmm5 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 80(%rsp),%r9 + movq %r10,%r15 +.byte 143,72,120,195,209,7 + xorq %r8,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %r11,%r15 + addq %r12,%r9 + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,220,3 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm4,%xmm10 + addq %r9,%rbx + addq %rdi,%r9 + vpaddq %xmm8,%xmm5,%xmm5 + movq %rbx,%r13 + addq %r9,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%r9 + vpxor %xmm10,%xmm11,%xmm11 + movq %rcx,%r12 + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + vpaddq %xmm11,%xmm5,%xmm5 + addq 88(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + rorq $6,%r14 + vpaddq 32(%rbp),%xmm5,%xmm10 + xorq %r10,%rdi + addq %r12,%r8 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + rorq $28,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,80(%rsp) + vpalignr $8,%xmm6,%xmm7,%xmm8 + rorq $23,%r13 + movq %r14,%r8 + vpalignr $8,%xmm2,%xmm3,%xmm11 + movq %rbx,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %rax,%r13 + xorq %rcx,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %r8,%r14 + vpaddq %xmm11,%xmm6,%xmm6 + andq %rax,%r12 + xorq %rax,%r13 + addq 96(%rsp),%rdx + movq %r8,%r15 +.byte 143,72,120,195,209,7 + xorq %rcx,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %r9,%r15 + addq %r12,%rdx + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,221,3 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm5,%xmm10 + addq %rdx,%r11 + addq %rdi,%rdx + vpaddq %xmm8,%xmm6,%xmm6 + movq %r11,%r13 + addq %rdx,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%rdx + vpxor %xmm10,%xmm11,%xmm11 + movq %rax,%r12 + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + vpaddq %xmm11,%xmm6,%xmm6 + addq 104(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + rorq $6,%r14 + vpaddq 64(%rbp),%xmm6,%xmm10 + xorq %r8,%rdi + addq %r12,%rcx + rorq $14,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + rorq $28,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,96(%rsp) + vpalignr $8,%xmm7,%xmm0,%xmm8 + rorq $23,%r13 + movq %r14,%rcx + vpalignr $8,%xmm3,%xmm4,%xmm11 + movq %r11,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %r10,%r13 + xorq %rax,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %rcx,%r14 + vpaddq %xmm11,%xmm7,%xmm7 + andq %r10,%r12 + xorq %r10,%r13 + addq 112(%rsp),%rbx + movq %rcx,%r15 +.byte 143,72,120,195,209,7 + xorq %rax,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %rdx,%r15 + addq %r12,%rbx + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,222,3 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm6,%xmm10 + addq %rbx,%r9 + addq %rdi,%rbx + vpaddq %xmm8,%xmm7,%xmm7 + movq %r9,%r13 + addq %rbx,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%rbx + vpxor %xmm10,%xmm11,%xmm11 + movq %r10,%r12 + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + vpaddq %xmm11,%xmm7,%xmm7 + addq 120(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + rorq $6,%r14 + vpaddq 96(%rbp),%xmm7,%xmm10 + xorq %rcx,%rdi + addq %r12,%rax + rorq $14,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + rorq $28,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,112(%rsp) + cmpb $0,135(%rbp) + jne .Lxop_00_47 + rorq $23,%r13 + movq %r14,%rax + movq %r9,%r12 + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + rorq $4,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 0(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + rorq $6,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + rorq $14,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + rorq $28,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + rorq $23,%r13 + movq %r14,%r11 + movq %r8,%r12 + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + rorq $4,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 8(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + rorq $6,%r14 + xorq %rax,%rdi + addq %r12,%r10 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + rorq $28,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + rorq $23,%r13 + movq %r14,%r10 + movq %rdx,%r12 + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + rorq $4,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 16(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + rorq $6,%r14 + xorq %r11,%r15 + addq %r12,%r9 + rorq $14,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + rorq $28,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + rorq $23,%r13 + movq %r14,%r9 + movq %rcx,%r12 + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + rorq $4,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 24(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + rorq $6,%r14 + xorq %r10,%rdi + addq %r12,%r8 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + rorq $28,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + rorq $23,%r13 + movq %r14,%r8 + movq %rbx,%r12 + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + rorq $4,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 32(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + rorq $6,%r14 + xorq %r9,%r15 + addq %r12,%rdx + rorq $14,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + rorq $28,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + rorq $23,%r13 + movq %r14,%rdx + movq %rax,%r12 + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + rorq $4,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 40(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + rorq $6,%r14 + xorq %r8,%rdi + addq %r12,%rcx + rorq $14,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + rorq $28,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + rorq $23,%r13 + movq %r14,%rcx + movq %r11,%r12 + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + rorq $4,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 48(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + rorq $6,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + rorq $14,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + rorq $28,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + rorq $23,%r13 + movq %r14,%rbx + movq %r10,%r12 + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + rorq $4,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 56(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + rorq $6,%r14 + xorq %rcx,%rdi + addq %r12,%rax + rorq $14,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + rorq $28,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + rorq $23,%r13 + movq %r14,%rax + movq %r9,%r12 + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + rorq $4,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 64(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + rorq $6,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + rorq $14,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + rorq $28,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + rorq $23,%r13 + movq %r14,%r11 + movq %r8,%r12 + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + rorq $4,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 72(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + rorq $6,%r14 + xorq %rax,%rdi + addq %r12,%r10 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + rorq $28,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + rorq $23,%r13 + movq %r14,%r10 + movq %rdx,%r12 + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + rorq $4,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 80(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + rorq $6,%r14 + xorq %r11,%r15 + addq %r12,%r9 + rorq $14,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + rorq $28,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + rorq $23,%r13 + movq %r14,%r9 + movq %rcx,%r12 + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + rorq $4,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 88(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + rorq $6,%r14 + xorq %r10,%rdi + addq %r12,%r8 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + rorq $28,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + rorq $23,%r13 + movq %r14,%r8 + movq %rbx,%r12 + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + rorq $4,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 96(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + rorq $6,%r14 + xorq %r9,%r15 + addq %r12,%rdx + rorq $14,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + rorq $28,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + rorq $23,%r13 + movq %r14,%rdx + movq %rax,%r12 + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + rorq $4,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 104(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + rorq $6,%r14 + xorq %r8,%rdi + addq %r12,%rcx + rorq $14,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + rorq $28,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + rorq $23,%r13 + movq %r14,%rcx + movq %r11,%r12 + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + rorq $4,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 112(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + rorq $6,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + rorq $14,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + rorq $28,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + rorq $23,%r13 + movq %r14,%rbx + movq %r10,%r12 + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + rorq $4,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 120(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + rorq $6,%r14 + xorq %rcx,%rdi + addq %r12,%rax + rorq $14,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + rorq $28,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + movq 128+0(%rsp),%rdi + movq %r14,%rax + + addq 0(%rdi),%rax + leaq 128(%rsi),%rsi + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb .Lloop_xop + + movq 152(%rsp),%rsi +.cfi_def_cfa %rsi,8 + vzeroupper + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_xop: + .byte 0xf3,0xc3 +.cfi_endproc +.size sha512_block_data_order_xop,.-sha512_block_data_order_xop +.type sha512_block_data_order_avx,@function +.align 64 +sha512_block_data_order_avx: +.cfi_startproc +.Lavx_shortcut: + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + shlq $4,%rdx + subq $160,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %rax,152(%rsp) +.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08 +.Lprologue_avx: + + vzeroupper + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp .Lloop_avx +.align 16 +.Lloop_avx: + vmovdqa K512+1280(%rip),%xmm11 + vmovdqu 0(%rsi),%xmm0 + leaq K512+128(%rip),%rbp + vmovdqu 16(%rsi),%xmm1 + vmovdqu 32(%rsi),%xmm2 + vpshufb %xmm11,%xmm0,%xmm0 + vmovdqu 48(%rsi),%xmm3 + vpshufb %xmm11,%xmm1,%xmm1 + vmovdqu 64(%rsi),%xmm4 + vpshufb %xmm11,%xmm2,%xmm2 + vmovdqu 80(%rsi),%xmm5 + vpshufb %xmm11,%xmm3,%xmm3 + vmovdqu 96(%rsi),%xmm6 + vpshufb %xmm11,%xmm4,%xmm4 + vmovdqu 112(%rsi),%xmm7 + vpshufb %xmm11,%xmm5,%xmm5 + vpaddq -128(%rbp),%xmm0,%xmm8 + vpshufb %xmm11,%xmm6,%xmm6 + vpaddq -96(%rbp),%xmm1,%xmm9 + vpshufb %xmm11,%xmm7,%xmm7 + vpaddq -64(%rbp),%xmm2,%xmm10 + vpaddq -32(%rbp),%xmm3,%xmm11 + vmovdqa %xmm8,0(%rsp) + vpaddq 0(%rbp),%xmm4,%xmm8 + vmovdqa %xmm9,16(%rsp) + vpaddq 32(%rbp),%xmm5,%xmm9 + vmovdqa %xmm10,32(%rsp) + vpaddq 64(%rbp),%xmm6,%xmm10 + vmovdqa %xmm11,48(%rsp) + vpaddq 96(%rbp),%xmm7,%xmm11 + vmovdqa %xmm8,64(%rsp) + movq %rax,%r14 + vmovdqa %xmm9,80(%rsp) + movq %rbx,%rdi + vmovdqa %xmm10,96(%rsp) + xorq %rcx,%rdi + vmovdqa %xmm11,112(%rsp) + movq %r8,%r13 + jmp .Lavx_00_47 + +.align 16 +.Lavx_00_47: + addq $256,%rbp + vpalignr $8,%xmm0,%xmm1,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rax + vpalignr $8,%xmm4,%xmm5,%xmm11 + movq %r9,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r8,%r13 + xorq %r10,%r12 + vpaddq %xmm11,%xmm0,%xmm0 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r8,%r12 + xorq %r8,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 0(%rsp),%r11 + movq %rax,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rbx,%r15 + addq %r12,%r11 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm7,%xmm11 + addq %r11,%rdx + addq %rdi,%r11 + vpxor %xmm9,%xmm8,%xmm8 + movq %rdx,%r13 + addq %r11,%r14 + vpsllq $3,%xmm7,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r11 + vpaddq %xmm8,%xmm0,%xmm0 + movq %r8,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm7,%xmm9 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rdx,%r12 + xorq %rdx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 8(%rsp),%r10 + movq %r11,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r9,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rax,%rdi + addq %r12,%r10 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm0,%xmm0 + xorq %r11,%r14 + addq %r13,%r10 + vpaddq -128(%rbp),%xmm0,%xmm10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,0(%rsp) + vpalignr $8,%xmm1,%xmm2,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r10 + vpalignr $8,%xmm5,%xmm6,%xmm11 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rcx,%r13 + xorq %r8,%r12 + vpaddq %xmm11,%xmm1,%xmm1 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rcx,%r12 + xorq %rcx,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 16(%rsp),%r9 + movq %r10,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r11,%r15 + addq %r12,%r9 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm0,%xmm11 + addq %r9,%rbx + addq %rdi,%r9 + vpxor %xmm9,%xmm8,%xmm8 + movq %rbx,%r13 + addq %r9,%r14 + vpsllq $3,%xmm0,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r9 + vpaddq %xmm8,%xmm1,%xmm1 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm0,%xmm9 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rbx,%r12 + xorq %rbx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 24(%rsp),%r8 + movq %r9,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r10,%rdi + addq %r12,%r8 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm1,%xmm1 + xorq %r9,%r14 + addq %r13,%r8 + vpaddq -96(%rbp),%xmm1,%xmm10 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,16(%rsp) + vpalignr $8,%xmm2,%xmm3,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r8 + vpalignr $8,%xmm6,%xmm7,%xmm11 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rax,%r13 + xorq %rcx,%r12 + vpaddq %xmm11,%xmm2,%xmm2 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rax,%r12 + xorq %rax,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 32(%rsp),%rdx + movq %r8,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r9,%r15 + addq %r12,%rdx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm1,%xmm11 + addq %rdx,%r11 + addq %rdi,%rdx + vpxor %xmm9,%xmm8,%xmm8 + movq %r11,%r13 + addq %rdx,%r14 + vpsllq $3,%xmm1,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rdx + vpaddq %xmm8,%xmm2,%xmm2 + movq %rax,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm1,%xmm9 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r11,%r12 + xorq %r11,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 40(%rsp),%rcx + movq %rdx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r8,%rdi + addq %r12,%rcx + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm2,%xmm2 + xorq %rdx,%r14 + addq %r13,%rcx + vpaddq -64(%rbp),%xmm2,%xmm10 + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,32(%rsp) + vpalignr $8,%xmm3,%xmm4,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rcx + vpalignr $8,%xmm7,%xmm0,%xmm11 + movq %r11,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r10,%r13 + xorq %rax,%r12 + vpaddq %xmm11,%xmm3,%xmm3 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r10,%r12 + xorq %r10,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 48(%rsp),%rbx + movq %rcx,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rdx,%r15 + addq %r12,%rbx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm2,%xmm11 + addq %rbx,%r9 + addq %rdi,%rbx + vpxor %xmm9,%xmm8,%xmm8 + movq %r9,%r13 + addq %rbx,%r14 + vpsllq $3,%xmm2,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rbx + vpaddq %xmm8,%xmm3,%xmm3 + movq %r10,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm2,%xmm9 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r9,%r12 + xorq %r9,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 56(%rsp),%rax + movq %rbx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r11,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rcx,%rdi + addq %r12,%rax + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm3,%xmm3 + xorq %rbx,%r14 + addq %r13,%rax + vpaddq -32(%rbp),%xmm3,%xmm10 + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,48(%rsp) + vpalignr $8,%xmm4,%xmm5,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rax + vpalignr $8,%xmm0,%xmm1,%xmm11 + movq %r9,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r8,%r13 + xorq %r10,%r12 + vpaddq %xmm11,%xmm4,%xmm4 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r8,%r12 + xorq %r8,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 64(%rsp),%r11 + movq %rax,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rbx,%r15 + addq %r12,%r11 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm3,%xmm11 + addq %r11,%rdx + addq %rdi,%r11 + vpxor %xmm9,%xmm8,%xmm8 + movq %rdx,%r13 + addq %r11,%r14 + vpsllq $3,%xmm3,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r11 + vpaddq %xmm8,%xmm4,%xmm4 + movq %r8,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm3,%xmm9 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rdx,%r12 + xorq %rdx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 72(%rsp),%r10 + movq %r11,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r9,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rax,%rdi + addq %r12,%r10 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm4,%xmm4 + xorq %r11,%r14 + addq %r13,%r10 + vpaddq 0(%rbp),%xmm4,%xmm10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,64(%rsp) + vpalignr $8,%xmm5,%xmm6,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r10 + vpalignr $8,%xmm1,%xmm2,%xmm11 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rcx,%r13 + xorq %r8,%r12 + vpaddq %xmm11,%xmm5,%xmm5 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rcx,%r12 + xorq %rcx,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 80(%rsp),%r9 + movq %r10,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r11,%r15 + addq %r12,%r9 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm4,%xmm11 + addq %r9,%rbx + addq %rdi,%r9 + vpxor %xmm9,%xmm8,%xmm8 + movq %rbx,%r13 + addq %r9,%r14 + vpsllq $3,%xmm4,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r9 + vpaddq %xmm8,%xmm5,%xmm5 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm4,%xmm9 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rbx,%r12 + xorq %rbx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 88(%rsp),%r8 + movq %r9,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r10,%rdi + addq %r12,%r8 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm5,%xmm5 + xorq %r9,%r14 + addq %r13,%r8 + vpaddq 32(%rbp),%xmm5,%xmm10 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,80(%rsp) + vpalignr $8,%xmm6,%xmm7,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r8 + vpalignr $8,%xmm2,%xmm3,%xmm11 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rax,%r13 + xorq %rcx,%r12 + vpaddq %xmm11,%xmm6,%xmm6 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rax,%r12 + xorq %rax,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 96(%rsp),%rdx + movq %r8,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r9,%r15 + addq %r12,%rdx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm5,%xmm11 + addq %rdx,%r11 + addq %rdi,%rdx + vpxor %xmm9,%xmm8,%xmm8 + movq %r11,%r13 + addq %rdx,%r14 + vpsllq $3,%xmm5,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rdx + vpaddq %xmm8,%xmm6,%xmm6 + movq %rax,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm5,%xmm9 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r11,%r12 + xorq %r11,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 104(%rsp),%rcx + movq %rdx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r8,%rdi + addq %r12,%rcx + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm6,%xmm6 + xorq %rdx,%r14 + addq %r13,%rcx + vpaddq 64(%rbp),%xmm6,%xmm10 + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,96(%rsp) + vpalignr $8,%xmm7,%xmm0,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rcx + vpalignr $8,%xmm3,%xmm4,%xmm11 + movq %r11,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r10,%r13 + xorq %rax,%r12 + vpaddq %xmm11,%xmm7,%xmm7 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r10,%r12 + xorq %r10,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 112(%rsp),%rbx + movq %rcx,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rdx,%r15 + addq %r12,%rbx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm6,%xmm11 + addq %rbx,%r9 + addq %rdi,%rbx + vpxor %xmm9,%xmm8,%xmm8 + movq %r9,%r13 + addq %rbx,%r14 + vpsllq $3,%xmm6,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rbx + vpaddq %xmm8,%xmm7,%xmm7 + movq %r10,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm6,%xmm9 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r9,%r12 + xorq %r9,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 120(%rsp),%rax + movq %rbx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r11,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rcx,%rdi + addq %r12,%rax + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm7,%xmm7 + xorq %rbx,%r14 + addq %r13,%rax + vpaddq 96(%rbp),%xmm7,%xmm10 + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,112(%rsp) + cmpb $0,135(%rbp) + jne .Lavx_00_47 + shrdq $23,%r13,%r13 + movq %r14,%rax + movq %r9,%r12 + shrdq $5,%r14,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 0(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r11 + movq %r8,%r12 + shrdq $5,%r14,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 8(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + shrdq $6,%r14,%r14 + xorq %rax,%rdi + addq %r12,%r10 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r10 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 16(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + xorq %r11,%r15 + addq %r12,%r9 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r9 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 24(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + xorq %r10,%rdi + addq %r12,%r8 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r8 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 32(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + xorq %r9,%r15 + addq %r12,%rdx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + shrdq $28,%r14,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rdx + movq %rax,%r12 + shrdq $5,%r14,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 40(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + xorq %r8,%rdi + addq %r12,%rcx + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rcx + movq %r11,%r12 + shrdq $5,%r14,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 48(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rbx + movq %r10,%r12 + shrdq $5,%r14,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 56(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + shrdq $6,%r14,%r14 + xorq %rcx,%rdi + addq %r12,%rax + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rax + movq %r9,%r12 + shrdq $5,%r14,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 64(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r11 + movq %r8,%r12 + shrdq $5,%r14,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 72(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + shrdq $6,%r14,%r14 + xorq %rax,%rdi + addq %r12,%r10 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r10 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 80(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + xorq %r11,%r15 + addq %r12,%r9 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r9 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 88(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + xorq %r10,%rdi + addq %r12,%r8 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r8 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 96(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + xorq %r9,%r15 + addq %r12,%rdx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + shrdq $28,%r14,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rdx + movq %rax,%r12 + shrdq $5,%r14,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 104(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + xorq %r8,%rdi + addq %r12,%rcx + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rcx + movq %r11,%r12 + shrdq $5,%r14,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 112(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rbx + movq %r10,%r12 + shrdq $5,%r14,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 120(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + shrdq $6,%r14,%r14 + xorq %rcx,%rdi + addq %r12,%rax + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + movq 128+0(%rsp),%rdi + movq %r14,%rax + + addq 0(%rdi),%rax + leaq 128(%rsi),%rsi + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb .Lloop_avx + + movq 152(%rsp),%rsi +.cfi_def_cfa %rsi,8 + vzeroupper + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_avx: + .byte 0xf3,0xc3 +.cfi_endproc +.size sha512_block_data_order_avx,.-sha512_block_data_order_avx +.type sha512_block_data_order_avx2,@function +.align 64 +sha512_block_data_order_avx2: +.cfi_startproc +.Lavx2_shortcut: + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + subq $1312,%rsp + shlq $4,%rdx + andq $-2048,%rsp + leaq (%rsi,%rdx,8),%rdx + addq $1152,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %rax,152(%rsp) +.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08 +.Lprologue_avx2: + + vzeroupper + subq $-128,%rsi + movq 0(%rdi),%rax + movq %rsi,%r12 + movq 8(%rdi),%rbx + cmpq %rdx,%rsi + movq 16(%rdi),%rcx + cmoveq %rsp,%r12 + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp .Loop_avx2 +.align 16 +.Loop_avx2: + vmovdqu -128(%rsi),%xmm0 + vmovdqu -128+16(%rsi),%xmm1 + vmovdqu -128+32(%rsi),%xmm2 + leaq K512+128(%rip),%rbp + vmovdqu -128+48(%rsi),%xmm3 + vmovdqu -128+64(%rsi),%xmm4 + vmovdqu -128+80(%rsi),%xmm5 + vmovdqu -128+96(%rsi),%xmm6 + vmovdqu -128+112(%rsi),%xmm7 + + vmovdqa 1152(%rbp),%ymm10 + vinserti128 $1,(%r12),%ymm0,%ymm0 + vinserti128 $1,16(%r12),%ymm1,%ymm1 + vpshufb %ymm10,%ymm0,%ymm0 + vinserti128 $1,32(%r12),%ymm2,%ymm2 + vpshufb %ymm10,%ymm1,%ymm1 + vinserti128 $1,48(%r12),%ymm3,%ymm3 + vpshufb %ymm10,%ymm2,%ymm2 + vinserti128 $1,64(%r12),%ymm4,%ymm4 + vpshufb %ymm10,%ymm3,%ymm3 + vinserti128 $1,80(%r12),%ymm5,%ymm5 + vpshufb %ymm10,%ymm4,%ymm4 + vinserti128 $1,96(%r12),%ymm6,%ymm6 + vpshufb %ymm10,%ymm5,%ymm5 + vinserti128 $1,112(%r12),%ymm7,%ymm7 + + vpaddq -128(%rbp),%ymm0,%ymm8 + vpshufb %ymm10,%ymm6,%ymm6 + vpaddq -96(%rbp),%ymm1,%ymm9 + vpshufb %ymm10,%ymm7,%ymm7 + vpaddq -64(%rbp),%ymm2,%ymm10 + vpaddq -32(%rbp),%ymm3,%ymm11 + vmovdqa %ymm8,0(%rsp) + vpaddq 0(%rbp),%ymm4,%ymm8 + vmovdqa %ymm9,32(%rsp) + vpaddq 32(%rbp),%ymm5,%ymm9 + vmovdqa %ymm10,64(%rsp) + vpaddq 64(%rbp),%ymm6,%ymm10 + vmovdqa %ymm11,96(%rsp) + + movq 152(%rsp),%rdi +.cfi_def_cfa %rdi,8 + leaq -128(%rsp),%rsp + + + + movq %rdi,-8(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08 + vpaddq 96(%rbp),%ymm7,%ymm11 + vmovdqa %ymm8,0(%rsp) + xorq %r14,%r14 + vmovdqa %ymm9,32(%rsp) + movq %rbx,%rdi + vmovdqa %ymm10,64(%rsp) + xorq %rcx,%rdi + vmovdqa %ymm11,96(%rsp) + movq %r9,%r12 + addq $32*8,%rbp + jmp .Lavx2_00_47 + +.align 16 +.Lavx2_00_47: + leaq -128(%rsp),%rsp +.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08 + + pushq 128-8(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x00,0x06,0x23,0x08 + leaq 8(%rsp),%rsp +.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08 + vpalignr $8,%ymm0,%ymm1,%ymm8 + addq 0+256(%rsp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + vpalignr $8,%ymm4,%ymm5,%ymm11 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + vpsrlq $1,%ymm8,%ymm10 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + vpaddq %ymm11,%ymm0,%ymm0 + vpsrlq $7,%ymm8,%ymm11 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + vpsrlq $6,%ymm7,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + vpsllq $3,%ymm7,%ymm10 + vpaddq %ymm8,%ymm0,%ymm0 + addq 8+256(%rsp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + vpsrlq $19,%ymm7,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + vpaddq %ymm11,%ymm0,%ymm0 + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + vpaddq -128(%rbp),%ymm0,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + vmovdqa %ymm10,0(%rsp) + vpalignr $8,%ymm1,%ymm2,%ymm8 + addq 32+256(%rsp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + vpalignr $8,%ymm5,%ymm6,%ymm11 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + vpsrlq $1,%ymm8,%ymm10 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + vpaddq %ymm11,%ymm1,%ymm1 + vpsrlq $7,%ymm8,%ymm11 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + vpsrlq $6,%ymm0,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + vpsllq $3,%ymm0,%ymm10 + vpaddq %ymm8,%ymm1,%ymm1 + addq 40+256(%rsp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + vpsrlq $19,%ymm0,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + vpaddq %ymm11,%ymm1,%ymm1 + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + vpaddq -96(%rbp),%ymm1,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + vmovdqa %ymm10,32(%rsp) + vpalignr $8,%ymm2,%ymm3,%ymm8 + addq 64+256(%rsp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + vpalignr $8,%ymm6,%ymm7,%ymm11 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + vpsrlq $1,%ymm8,%ymm10 + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + vpaddq %ymm11,%ymm2,%ymm2 + vpsrlq $7,%ymm8,%ymm11 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + vpsrlq $6,%ymm1,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + vpsllq $3,%ymm1,%ymm10 + vpaddq %ymm8,%ymm2,%ymm2 + addq 72+256(%rsp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + vpsrlq $19,%ymm1,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + vpaddq %ymm11,%ymm2,%ymm2 + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + vpaddq -64(%rbp),%ymm2,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + vmovdqa %ymm10,64(%rsp) + vpalignr $8,%ymm3,%ymm4,%ymm8 + addq 96+256(%rsp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + vpalignr $8,%ymm7,%ymm0,%ymm11 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + vpsrlq $1,%ymm8,%ymm10 + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + vpaddq %ymm11,%ymm3,%ymm3 + vpsrlq $7,%ymm8,%ymm11 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + vpsrlq $6,%ymm2,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + vpsllq $3,%ymm2,%ymm10 + vpaddq %ymm8,%ymm3,%ymm3 + addq 104+256(%rsp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + vpsrlq $19,%ymm2,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + vpaddq %ymm11,%ymm3,%ymm3 + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + vpaddq -32(%rbp),%ymm3,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + vmovdqa %ymm10,96(%rsp) + leaq -128(%rsp),%rsp +.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08 + + pushq 128-8(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x00,0x06,0x23,0x08 + leaq 8(%rsp),%rsp +.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08 + vpalignr $8,%ymm4,%ymm5,%ymm8 + addq 0+256(%rsp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + vpalignr $8,%ymm0,%ymm1,%ymm11 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + vpsrlq $1,%ymm8,%ymm10 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + vpaddq %ymm11,%ymm4,%ymm4 + vpsrlq $7,%ymm8,%ymm11 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + vpsrlq $6,%ymm3,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + vpsllq $3,%ymm3,%ymm10 + vpaddq %ymm8,%ymm4,%ymm4 + addq 8+256(%rsp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + vpsrlq $19,%ymm3,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + vpaddq %ymm11,%ymm4,%ymm4 + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + vpaddq 0(%rbp),%ymm4,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + vmovdqa %ymm10,0(%rsp) + vpalignr $8,%ymm5,%ymm6,%ymm8 + addq 32+256(%rsp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + vpalignr $8,%ymm1,%ymm2,%ymm11 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + vpsrlq $1,%ymm8,%ymm10 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + vpaddq %ymm11,%ymm5,%ymm5 + vpsrlq $7,%ymm8,%ymm11 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + vpsrlq $6,%ymm4,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + vpsllq $3,%ymm4,%ymm10 + vpaddq %ymm8,%ymm5,%ymm5 + addq 40+256(%rsp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + vpsrlq $19,%ymm4,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + vpaddq %ymm11,%ymm5,%ymm5 + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + vpaddq 32(%rbp),%ymm5,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + vmovdqa %ymm10,32(%rsp) + vpalignr $8,%ymm6,%ymm7,%ymm8 + addq 64+256(%rsp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + vpalignr $8,%ymm2,%ymm3,%ymm11 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + vpsrlq $1,%ymm8,%ymm10 + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + vpaddq %ymm11,%ymm6,%ymm6 + vpsrlq $7,%ymm8,%ymm11 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + vpsrlq $6,%ymm5,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + vpsllq $3,%ymm5,%ymm10 + vpaddq %ymm8,%ymm6,%ymm6 + addq 72+256(%rsp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + vpsrlq $19,%ymm5,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + vpaddq %ymm11,%ymm6,%ymm6 + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + vpaddq 64(%rbp),%ymm6,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + vmovdqa %ymm10,64(%rsp) + vpalignr $8,%ymm7,%ymm0,%ymm8 + addq 96+256(%rsp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + vpalignr $8,%ymm3,%ymm4,%ymm11 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + vpsrlq $1,%ymm8,%ymm10 + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + vpaddq %ymm11,%ymm7,%ymm7 + vpsrlq $7,%ymm8,%ymm11 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + vpsrlq $6,%ymm6,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + vpsllq $3,%ymm6,%ymm10 + vpaddq %ymm8,%ymm7,%ymm7 + addq 104+256(%rsp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + vpsrlq $19,%ymm6,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + vpaddq %ymm11,%ymm7,%ymm7 + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + vpaddq 96(%rbp),%ymm7,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + vmovdqa %ymm10,96(%rsp) + leaq 256(%rbp),%rbp + cmpb $0,-121(%rbp) + jne .Lavx2_00_47 + addq 0+128(%rsp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + addq 8+128(%rsp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + addq 32+128(%rsp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + addq 40+128(%rsp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + addq 64+128(%rsp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + addq 72+128(%rsp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + addq 96+128(%rsp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + addq 104+128(%rsp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + addq 0(%rsp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + addq 8(%rsp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + addq 32(%rsp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + addq 40(%rsp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + addq 64(%rsp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + addq 72(%rsp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + addq 96(%rsp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + addq 104(%rsp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + movq 1280(%rsp),%rdi + addq %r14,%rax + + leaq 1152(%rsp),%rbp + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + + cmpq 144(%rbp),%rsi + je .Ldone_avx2 + + xorq %r14,%r14 + movq %rbx,%rdi + xorq %rcx,%rdi + movq %r9,%r12 + jmp .Lower_avx2 +.align 16 +.Lower_avx2: + addq 0+16(%rbp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + addq 8+16(%rbp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + addq 32+16(%rbp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + addq 40+16(%rbp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + addq 64+16(%rbp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + addq 72+16(%rbp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + addq 96+16(%rbp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + addq 104+16(%rbp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + leaq -128(%rbp),%rbp + cmpq %rsp,%rbp + jae .Lower_avx2 + + movq 1280(%rsp),%rdi + addq %r14,%rax + + leaq 1152(%rsp),%rsp + +.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08 + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + leaq 256(%rsi),%rsi + addq 48(%rdi),%r10 + movq %rsi,%r12 + addq 56(%rdi),%r11 + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + cmoveq %rsp,%r12 + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + + jbe .Loop_avx2 + leaq (%rsp),%rbp + + +.cfi_escape 0x0f,0x06,0x76,0x98,0x01,0x06,0x23,0x08 + +.Ldone_avx2: + movq 152(%rbp),%rsi +.cfi_def_cfa %rsi,8 + vzeroupper + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_avx2: + .byte 0xf3,0xc3 +.cfi_endproc +.size sha512_block_data_order_avx2,.-sha512_block_data_order_avx2 diff --git a/contrib/openssl-cmake/asm/crypto/sm3/asm/sm3-armv8.S b/contrib/openssl-cmake/asm/crypto/sm3/asm/sm3-armv8.S new file mode 100644 index 000000000000..5c6167e6f250 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/sm3/asm/sm3-armv8.S @@ -0,0 +1,508 @@ +// Copyright 2021-2025 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the Apache License 2.0 (the "License"). You may not use +// this file except in compliance with the License. You can obtain a copy +// in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html +// +// This module implements support for Armv8 SM3 instructions + +// $output is the last argument if it looks like a file (it has an extension) +// $flavour is the first argument if it doesn't look like a file +#include "arm_arch.h" +.text +.globl ossl_hwsm3_block_data_order +.type ossl_hwsm3_block_data_order,%function +.align 5 +ossl_hwsm3_block_data_order: + AARCH64_VALID_CALL_TARGET + // load state + ld1 {v5.4s,v6.4s}, [x0] + rev64 v5.4s, v5.4s + rev64 v6.4s, v6.4s + ext v5.16b, v5.16b, v5.16b, #8 + ext v6.16b, v6.16b, v6.16b, #8 + adrp x8, .Tj + add x8, x8, #:lo12:.Tj + ldp s16, s17, [x8] + +.Loop: + // load input + ld1 {v0.4s,v1.4s,v2.4s,v3.4s}, [x1], #64 + sub w2, w2, #1 + + mov v18.16b, v5.16b + mov v19.16b, v6.16b + +#ifndef __AARCH64EB__ + rev32 v0.16b, v0.16b + rev32 v1.16b, v1.16b + rev32 v2.16b, v2.16b + rev32 v3.16b, v3.16b +#endif + + ext v20.16b, v16.16b, v16.16b, #4 + // s4 = w7 | w8 | w9 | w10 + ext v4.16b, v1.16b, v2.16b, #12 + // vtmp1 = w3 | w4 | w5 | w6 + ext v22.16b, v0.16b, v1.16b, #12 + // vtmp2 = w10 | w11 | w12 | w13 + ext v23.16b, v2.16b, v3.16b, #8 +.inst 0xce63c004 //sm3partw1 v4.4s, v0.4s, v3.4s +.inst 0xce76c6e4 //sm3partw2 v4.4s, v23.4s, v22.4s + eor v22.16b, v0.16b, v1.16b +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce5682e5 //sm3tt1a v5.4s, v23.4s, v22.4s[0] +.inst 0xce408ae6 //sm3tt2a v6.4s, v23.4s, v0.4s[0] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce5692e5 //sm3tt1a v5.4s, v23.4s, v22.4s[1] +.inst 0xce409ae6 //sm3tt2a v6.4s, v23.4s, v0.4s[1] +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce56a2e5 //sm3tt1a v5.4s, v23.4s, v22.4s[2] +.inst 0xce40aae6 //sm3tt2a v6.4s, v23.4s, v0.4s[2] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce56b2e5 //sm3tt1a v5.4s, v23.4s, v22.4s[3] +.inst 0xce40bae6 //sm3tt2a v6.4s, v23.4s, v0.4s[3] + // s4 = w7 | w8 | w9 | w10 + ext v0.16b, v2.16b, v3.16b, #12 + // vtmp1 = w3 | w4 | w5 | w6 + ext v22.16b, v1.16b, v2.16b, #12 + // vtmp2 = w10 | w11 | w12 | w13 + ext v23.16b, v3.16b, v4.16b, #8 +.inst 0xce64c020 //sm3partw1 v0.4s, v1.4s, v4.4s +.inst 0xce76c6e0 //sm3partw2 v0.4s, v23.4s, v22.4s + eor v22.16b, v1.16b, v2.16b +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce5682e5 //sm3tt1a v5.4s, v23.4s, v22.4s[0] +.inst 0xce418ae6 //sm3tt2a v6.4s, v23.4s, v1.4s[0] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce5692e5 //sm3tt1a v5.4s, v23.4s, v22.4s[1] +.inst 0xce419ae6 //sm3tt2a v6.4s, v23.4s, v1.4s[1] +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce56a2e5 //sm3tt1a v5.4s, v23.4s, v22.4s[2] +.inst 0xce41aae6 //sm3tt2a v6.4s, v23.4s, v1.4s[2] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce56b2e5 //sm3tt1a v5.4s, v23.4s, v22.4s[3] +.inst 0xce41bae6 //sm3tt2a v6.4s, v23.4s, v1.4s[3] + // s4 = w7 | w8 | w9 | w10 + ext v1.16b, v3.16b, v4.16b, #12 + // vtmp1 = w3 | w4 | w5 | w6 + ext v22.16b, v2.16b, v3.16b, #12 + // vtmp2 = w10 | w11 | w12 | w13 + ext v23.16b, v4.16b, v0.16b, #8 +.inst 0xce60c041 //sm3partw1 v1.4s, v2.4s, v0.4s +.inst 0xce76c6e1 //sm3partw2 v1.4s, v23.4s, v22.4s + eor v22.16b, v2.16b, v3.16b +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce5682e5 //sm3tt1a v5.4s, v23.4s, v22.4s[0] +.inst 0xce428ae6 //sm3tt2a v6.4s, v23.4s, v2.4s[0] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce5692e5 //sm3tt1a v5.4s, v23.4s, v22.4s[1] +.inst 0xce429ae6 //sm3tt2a v6.4s, v23.4s, v2.4s[1] +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce56a2e5 //sm3tt1a v5.4s, v23.4s, v22.4s[2] +.inst 0xce42aae6 //sm3tt2a v6.4s, v23.4s, v2.4s[2] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce56b2e5 //sm3tt1a v5.4s, v23.4s, v22.4s[3] +.inst 0xce42bae6 //sm3tt2a v6.4s, v23.4s, v2.4s[3] + // s4 = w7 | w8 | w9 | w10 + ext v2.16b, v4.16b, v0.16b, #12 + // vtmp1 = w3 | w4 | w5 | w6 + ext v22.16b, v3.16b, v4.16b, #12 + // vtmp2 = w10 | w11 | w12 | w13 + ext v23.16b, v0.16b, v1.16b, #8 +.inst 0xce61c062 //sm3partw1 v2.4s, v3.4s, v1.4s +.inst 0xce76c6e2 //sm3partw2 v2.4s, v23.4s, v22.4s + eor v22.16b, v3.16b, v4.16b +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce5682e5 //sm3tt1a v5.4s, v23.4s, v22.4s[0] +.inst 0xce438ae6 //sm3tt2a v6.4s, v23.4s, v3.4s[0] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce5692e5 //sm3tt1a v5.4s, v23.4s, v22.4s[1] +.inst 0xce439ae6 //sm3tt2a v6.4s, v23.4s, v3.4s[1] +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce56a2e5 //sm3tt1a v5.4s, v23.4s, v22.4s[2] +.inst 0xce43aae6 //sm3tt2a v6.4s, v23.4s, v3.4s[2] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce56b2e5 //sm3tt1a v5.4s, v23.4s, v22.4s[3] +.inst 0xce43bae6 //sm3tt2a v6.4s, v23.4s, v3.4s[3] + ext v20.16b, v17.16b, v17.16b, #4 + // s4 = w7 | w8 | w9 | w10 + ext v3.16b, v0.16b, v1.16b, #12 + // vtmp1 = w3 | w4 | w5 | w6 + ext v22.16b, v4.16b, v0.16b, #12 + // vtmp2 = w10 | w11 | w12 | w13 + ext v23.16b, v1.16b, v2.16b, #8 +.inst 0xce62c083 //sm3partw1 v3.4s, v4.4s, v2.4s +.inst 0xce76c6e3 //sm3partw2 v3.4s, v23.4s, v22.4s + eor v22.16b, v4.16b, v0.16b +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0] +.inst 0xce448ee6 //sm3tt2b v6.4s, v23.4s, v4.4s[0] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1] +.inst 0xce449ee6 //sm3tt2b v6.4s, v23.4s, v4.4s[1] +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2] +.inst 0xce44aee6 //sm3tt2b v6.4s, v23.4s, v4.4s[2] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3] +.inst 0xce44bee6 //sm3tt2b v6.4s, v23.4s, v4.4s[3] + // s4 = w7 | w8 | w9 | w10 + ext v4.16b, v1.16b, v2.16b, #12 + // vtmp1 = w3 | w4 | w5 | w6 + ext v22.16b, v0.16b, v1.16b, #12 + // vtmp2 = w10 | w11 | w12 | w13 + ext v23.16b, v2.16b, v3.16b, #8 +.inst 0xce63c004 //sm3partw1 v4.4s, v0.4s, v3.4s +.inst 0xce76c6e4 //sm3partw2 v4.4s, v23.4s, v22.4s + eor v22.16b, v0.16b, v1.16b +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0] +.inst 0xce408ee6 //sm3tt2b v6.4s, v23.4s, v0.4s[0] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1] +.inst 0xce409ee6 //sm3tt2b v6.4s, v23.4s, v0.4s[1] +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2] +.inst 0xce40aee6 //sm3tt2b v6.4s, v23.4s, v0.4s[2] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3] +.inst 0xce40bee6 //sm3tt2b v6.4s, v23.4s, v0.4s[3] + // s4 = w7 | w8 | w9 | w10 + ext v0.16b, v2.16b, v3.16b, #12 + // vtmp1 = w3 | w4 | w5 | w6 + ext v22.16b, v1.16b, v2.16b, #12 + // vtmp2 = w10 | w11 | w12 | w13 + ext v23.16b, v3.16b, v4.16b, #8 +.inst 0xce64c020 //sm3partw1 v0.4s, v1.4s, v4.4s +.inst 0xce76c6e0 //sm3partw2 v0.4s, v23.4s, v22.4s + eor v22.16b, v1.16b, v2.16b +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0] +.inst 0xce418ee6 //sm3tt2b v6.4s, v23.4s, v1.4s[0] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1] +.inst 0xce419ee6 //sm3tt2b v6.4s, v23.4s, v1.4s[1] +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2] +.inst 0xce41aee6 //sm3tt2b v6.4s, v23.4s, v1.4s[2] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3] +.inst 0xce41bee6 //sm3tt2b v6.4s, v23.4s, v1.4s[3] + // s4 = w7 | w8 | w9 | w10 + ext v1.16b, v3.16b, v4.16b, #12 + // vtmp1 = w3 | w4 | w5 | w6 + ext v22.16b, v2.16b, v3.16b, #12 + // vtmp2 = w10 | w11 | w12 | w13 + ext v23.16b, v4.16b, v0.16b, #8 +.inst 0xce60c041 //sm3partw1 v1.4s, v2.4s, v0.4s +.inst 0xce76c6e1 //sm3partw2 v1.4s, v23.4s, v22.4s + eor v22.16b, v2.16b, v3.16b +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0] +.inst 0xce428ee6 //sm3tt2b v6.4s, v23.4s, v2.4s[0] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1] +.inst 0xce429ee6 //sm3tt2b v6.4s, v23.4s, v2.4s[1] +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2] +.inst 0xce42aee6 //sm3tt2b v6.4s, v23.4s, v2.4s[2] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3] +.inst 0xce42bee6 //sm3tt2b v6.4s, v23.4s, v2.4s[3] + // s4 = w7 | w8 | w9 | w10 + ext v2.16b, v4.16b, v0.16b, #12 + // vtmp1 = w3 | w4 | w5 | w6 + ext v22.16b, v3.16b, v4.16b, #12 + // vtmp2 = w10 | w11 | w12 | w13 + ext v23.16b, v0.16b, v1.16b, #8 +.inst 0xce61c062 //sm3partw1 v2.4s, v3.4s, v1.4s +.inst 0xce76c6e2 //sm3partw2 v2.4s, v23.4s, v22.4s + eor v22.16b, v3.16b, v4.16b +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0] +.inst 0xce438ee6 //sm3tt2b v6.4s, v23.4s, v3.4s[0] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1] +.inst 0xce439ee6 //sm3tt2b v6.4s, v23.4s, v3.4s[1] +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2] +.inst 0xce43aee6 //sm3tt2b v6.4s, v23.4s, v3.4s[2] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3] +.inst 0xce43bee6 //sm3tt2b v6.4s, v23.4s, v3.4s[3] + // s4 = w7 | w8 | w9 | w10 + ext v3.16b, v0.16b, v1.16b, #12 + // vtmp1 = w3 | w4 | w5 | w6 + ext v22.16b, v4.16b, v0.16b, #12 + // vtmp2 = w10 | w11 | w12 | w13 + ext v23.16b, v1.16b, v2.16b, #8 +.inst 0xce62c083 //sm3partw1 v3.4s, v4.4s, v2.4s +.inst 0xce76c6e3 //sm3partw2 v3.4s, v23.4s, v22.4s + eor v22.16b, v4.16b, v0.16b +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0] +.inst 0xce448ee6 //sm3tt2b v6.4s, v23.4s, v4.4s[0] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1] +.inst 0xce449ee6 //sm3tt2b v6.4s, v23.4s, v4.4s[1] +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2] +.inst 0xce44aee6 //sm3tt2b v6.4s, v23.4s, v4.4s[2] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3] +.inst 0xce44bee6 //sm3tt2b v6.4s, v23.4s, v4.4s[3] + // s4 = w7 | w8 | w9 | w10 + ext v4.16b, v1.16b, v2.16b, #12 + // vtmp1 = w3 | w4 | w5 | w6 + ext v22.16b, v0.16b, v1.16b, #12 + // vtmp2 = w10 | w11 | w12 | w13 + ext v23.16b, v2.16b, v3.16b, #8 +.inst 0xce63c004 //sm3partw1 v4.4s, v0.4s, v3.4s +.inst 0xce76c6e4 //sm3partw2 v4.4s, v23.4s, v22.4s + eor v22.16b, v0.16b, v1.16b +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0] +.inst 0xce408ee6 //sm3tt2b v6.4s, v23.4s, v0.4s[0] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1] +.inst 0xce409ee6 //sm3tt2b v6.4s, v23.4s, v0.4s[1] +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2] +.inst 0xce40aee6 //sm3tt2b v6.4s, v23.4s, v0.4s[2] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3] +.inst 0xce40bee6 //sm3tt2b v6.4s, v23.4s, v0.4s[3] + // s4 = w7 | w8 | w9 | w10 + ext v0.16b, v2.16b, v3.16b, #12 + // vtmp1 = w3 | w4 | w5 | w6 + ext v22.16b, v1.16b, v2.16b, #12 + // vtmp2 = w10 | w11 | w12 | w13 + ext v23.16b, v3.16b, v4.16b, #8 +.inst 0xce64c020 //sm3partw1 v0.4s, v1.4s, v4.4s +.inst 0xce76c6e0 //sm3partw2 v0.4s, v23.4s, v22.4s + eor v22.16b, v1.16b, v2.16b +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0] +.inst 0xce418ee6 //sm3tt2b v6.4s, v23.4s, v1.4s[0] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1] +.inst 0xce419ee6 //sm3tt2b v6.4s, v23.4s, v1.4s[1] +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2] +.inst 0xce41aee6 //sm3tt2b v6.4s, v23.4s, v1.4s[2] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3] +.inst 0xce41bee6 //sm3tt2b v6.4s, v23.4s, v1.4s[3] + // s4 = w7 | w8 | w9 | w10 + ext v1.16b, v3.16b, v4.16b, #12 + // vtmp1 = w3 | w4 | w5 | w6 + ext v22.16b, v2.16b, v3.16b, #12 + // vtmp2 = w10 | w11 | w12 | w13 + ext v23.16b, v4.16b, v0.16b, #8 +.inst 0xce60c041 //sm3partw1 v1.4s, v2.4s, v0.4s +.inst 0xce76c6e1 //sm3partw2 v1.4s, v23.4s, v22.4s + eor v22.16b, v2.16b, v3.16b +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0] +.inst 0xce428ee6 //sm3tt2b v6.4s, v23.4s, v2.4s[0] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1] +.inst 0xce429ee6 //sm3tt2b v6.4s, v23.4s, v2.4s[1] +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2] +.inst 0xce42aee6 //sm3tt2b v6.4s, v23.4s, v2.4s[2] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3] +.inst 0xce42bee6 //sm3tt2b v6.4s, v23.4s, v2.4s[3] + eor v22.16b, v3.16b, v4.16b +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0] +.inst 0xce438ee6 //sm3tt2b v6.4s, v23.4s, v3.4s[0] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1] +.inst 0xce439ee6 //sm3tt2b v6.4s, v23.4s, v3.4s[1] +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2] +.inst 0xce43aee6 //sm3tt2b v6.4s, v23.4s, v3.4s[2] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3] +.inst 0xce43bee6 //sm3tt2b v6.4s, v23.4s, v3.4s[3] + eor v22.16b, v4.16b, v0.16b +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0] +.inst 0xce448ee6 //sm3tt2b v6.4s, v23.4s, v4.4s[0] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1] +.inst 0xce449ee6 //sm3tt2b v6.4s, v23.4s, v4.4s[1] +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2] +.inst 0xce44aee6 //sm3tt2b v6.4s, v23.4s, v4.4s[2] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3] +.inst 0xce44bee6 //sm3tt2b v6.4s, v23.4s, v4.4s[3] + eor v22.16b, v0.16b, v1.16b +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0] +.inst 0xce408ee6 //sm3tt2b v6.4s, v23.4s, v0.4s[0] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1] +.inst 0xce409ee6 //sm3tt2b v6.4s, v23.4s, v0.4s[1] +.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s + shl v21.4s, v20.4s, #1 + sri v21.4s, v20.4s, #31 +.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2] +.inst 0xce40aee6 //sm3tt2b v6.4s, v23.4s, v0.4s[2] +.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s + shl v20.4s, v21.4s, #1 + sri v20.4s, v21.4s, #31 +.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3] +.inst 0xce40bee6 //sm3tt2b v6.4s, v23.4s, v0.4s[3] + eor v5.16b, v5.16b, v18.16b + eor v6.16b, v6.16b, v19.16b + + // any remained blocks? + cbnz w2, .Loop + + // save state + rev64 v5.4s, v5.4s + rev64 v6.4s, v6.4s + ext v5.16b, v5.16b, v5.16b, #8 + ext v6.16b, v6.16b, v6.16b, #8 + st1 {v5.4s,v6.4s}, [x0] + ret +.size ossl_hwsm3_block_data_order,.-ossl_hwsm3_block_data_order +.section .rodata + +.type _sm3_consts,%object +.align 3 +_sm3_consts: +.Tj: +.word 0x79cc4519, 0x9d8a7a87 +.size _sm3_consts,.-_sm3_consts +.previous diff --git a/contrib/openssl-cmake/asm/crypto/sm4/asm/sm4-armv8.S b/contrib/openssl-cmake/asm/crypto/sm4/asm/sm4-armv8.S new file mode 100644 index 000000000000..0135313075e4 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/sm4/asm/sm4-armv8.S @@ -0,0 +1,1092 @@ +// Copyright 2022-2025 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the Apache License 2.0 (the "License"). You may not use +// this file except in compliance with the License. You can obtain a copy +// in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html + +// +// This module implements support for SM4 hw support on aarch64 +// Oct 2021 +// + +// $output is the last argument if it looks like a file (it has an extension) +// $flavour is the first argument if it doesn't look like a file +#include "arm_arch.h" +.arch armv8-a+crypto +.text + +.section .rodata +.type _sm4_v8_consts,%object +.align 6 +_sm4_v8_consts: +.Lck: +.long 0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269 +.long 0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9 +.long 0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B4249 +.long 0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B9 +.long 0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B2229 +.long 0x30373E45, 0x4C535A61, 0x686F767D, 0x848B9299 +.long 0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209 +.long 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279 +.Lfk: +.long 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc +.size _sm4_v8_consts,.-_sm4_v8_consts +.previous + +.globl sm4_v8_set_encrypt_key +.type sm4_v8_set_encrypt_key,%function +.align 5 +sm4_v8_set_encrypt_key: + AARCH64_VALID_CALL_TARGET + ld1 {v0.4s},[x0] + adrp x2,.Lfk + add x2,x2,#:lo12:.Lfk + ld1 {v24.4s},[x2] + adrp x2,.Lck + add x2,x2,#:lo12:.Lck + ld1 {v16.4s,v17.4s,v18.4s,v19.4s},[x2],64 +#ifndef __AARCH64EB__ + rev32 v0.16b,v0.16b +#endif + ld1 {v20.4s,v21.4s,v22.4s,v23.4s},[x2] + eor v0.16b,v0.16b,v24.16b; +.inst 0xce70c800 //sm4ekey v0.4S,v0.4S,v16.4S +.inst 0xce71c801 //sm4ekey v1.4S,v0.4S,v17.4S +.inst 0xce72c822 //sm4ekey v2.4S,v1.4S,v18.4S +.inst 0xce73c843 //sm4ekey v3.4S,v2.4S,v19.4S +.inst 0xce74c864 //sm4ekey v4.4S,v3.4S,v20.4S + st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],64 +.inst 0xce75c885 //sm4ekey v5.4S,v4.4S,v21.4S +.inst 0xce76c8a6 //sm4ekey v6.4S,v5.4S,v22.4S +.inst 0xce77c8c7 //sm4ekey v7.4S,v6.4S,v23.4S + st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x1] + ret +.size sm4_v8_set_encrypt_key,.-sm4_v8_set_encrypt_key +.globl sm4_v8_set_decrypt_key +.type sm4_v8_set_decrypt_key,%function +.align 5 +sm4_v8_set_decrypt_key: + AARCH64_VALID_CALL_TARGET + ld1 {v7.4s},[x0] + adrp x2,.Lfk + add x2,x2,#:lo12:.Lfk + ld1 {v24.4s},[x2] + adrp x2,.Lck + add x2,x2,#:lo12:.Lck + ld1 {v16.4s,v17.4s,v18.4s,v19.4s},[x2],64 +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif + ld1 {v20.4s,v21.4s,v22.4s,v23.4s},[x2] + eor v7.16b, v7.16b,v24.16b; +.inst 0xce70c8e7 //sm4ekey v7.4S,v7.4S,v16.4S +.inst 0xce71c8e6 //sm4ekey v6.4S,v7.4S,v17.4S +.inst 0xce72c8c5 //sm4ekey v5.4S,v6.4S,v18.4S + rev64 v7.4s,v7.4s + rev64 v6.4s,v6.4s + ext v7.16b,v7.16b,v7.16b,#8 + ext v6.16b,v6.16b,v6.16b,#8 +.inst 0xce73c8a4 //sm4ekey v4.4S,v5.4S,v19.4S +.inst 0xce74c883 //sm4ekey v3.4S,v4.4S,v20.4S + rev64 v5.4s,v5.4s + rev64 v4.4s,v4.4s + ext v5.16b,v5.16b,v5.16b,#8 + ext v4.16b,v4.16b,v4.16b,#8 +.inst 0xce75c862 //sm4ekey v2.4S,v3.4S,v21.4S +.inst 0xce76c841 //sm4ekey v1.4S,v2.4S,v22.4S + rev64 v3.4s,v3.4s + rev64 v2.4s,v2.4s + ext v3.16b,v3.16b,v3.16b,#8 + ext v2.16b,v2.16b,v2.16b,#8 +.inst 0xce77c820 //sm4ekey v0.4S,v1.4S,v23.4S + rev64 v1.4s, v1.4s + rev64 v0.4s, v0.4s + ext v1.16b,v1.16b,v1.16b,#8 + ext v0.16b,v0.16b,v0.16b,#8 + st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],64 + st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x1] + ret +.size sm4_v8_set_decrypt_key,.-sm4_v8_set_decrypt_key +.globl sm4_v8_encrypt +.type sm4_v8_encrypt,%function +.align 5 +sm4_v8_encrypt: + AARCH64_VALID_CALL_TARGET + ld1 {v16.4s},[x0] + ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x2],64 + ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x2] +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif +.inst 0xcec08410 //sm4e v16.4s,v0.4s +.inst 0xcec08430 //sm4e v16.4s,v1.4s +.inst 0xcec08450 //sm4e v16.4s,v2.4s +.inst 0xcec08470 //sm4e v16.4s,v3.4s +.inst 0xcec08490 //sm4e v16.4s,v4.4s +.inst 0xcec084b0 //sm4e v16.4s,v5.4s +.inst 0xcec084d0 //sm4e v16.4s,v6.4s +.inst 0xcec084f0 //sm4e v16.4s,v7.4s + rev64 v16.4S,v16.4S + ext v16.16b,v16.16b,v16.16b,#8 +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif + st1 {v16.4s},[x1] + ret +.size sm4_v8_encrypt,.-sm4_v8_encrypt +.globl sm4_v8_decrypt +.type sm4_v8_decrypt,%function +.align 5 +sm4_v8_decrypt: + AARCH64_VALID_CALL_TARGET + ld1 {v16.4s},[x0] + ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x2],64 + ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x2] +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif +.inst 0xcec08410 //sm4e v16.4s,v0.4s +.inst 0xcec08430 //sm4e v16.4s,v1.4s +.inst 0xcec08450 //sm4e v16.4s,v2.4s +.inst 0xcec08470 //sm4e v16.4s,v3.4s +.inst 0xcec08490 //sm4e v16.4s,v4.4s +.inst 0xcec084b0 //sm4e v16.4s,v5.4s +.inst 0xcec084d0 //sm4e v16.4s,v6.4s +.inst 0xcec084f0 //sm4e v16.4s,v7.4s + rev64 v16.4S,v16.4S + ext v16.16b,v16.16b,v16.16b,#8 +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif + st1 {v16.4s},[x1] + ret +.size sm4_v8_decrypt,.-sm4_v8_decrypt +.globl sm4_v8_ecb_encrypt +.type sm4_v8_ecb_encrypt,%function +.align 5 +sm4_v8_ecb_encrypt: + AARCH64_VALID_CALL_TARGET + ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x3],#64 + ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x3] +1: + cmp x2,#64 + b.lt 1f + ld1 {v16.4s,v17.4s,v18.4s,v19.4s},[x0],#64 + cmp x2,#128 + b.lt 2f + ld1 {v20.4s,v21.4s,v22.4s,v23.4s},[x0],#64 + // 8 blocks +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif +#ifndef __AARCH64EB__ + rev32 v17.16b,v17.16b +#endif +#ifndef __AARCH64EB__ + rev32 v18.16b,v18.16b +#endif +#ifndef __AARCH64EB__ + rev32 v19.16b,v19.16b +#endif +#ifndef __AARCH64EB__ + rev32 v20.16b,v20.16b +#endif +#ifndef __AARCH64EB__ + rev32 v21.16b,v21.16b +#endif +#ifndef __AARCH64EB__ + rev32 v22.16b,v22.16b +#endif +#ifndef __AARCH64EB__ + rev32 v23.16b,v23.16b +#endif +.inst 0xcec08410 //sm4e v16.4s,v0.4s +.inst 0xcec08411 //sm4e v17.4s,v0.4s +.inst 0xcec08412 //sm4e v18.4s,v0.4s +.inst 0xcec08413 //sm4e v19.4s,v0.4s + +.inst 0xcec08430 //sm4e v16.4s,v1.4s +.inst 0xcec08431 //sm4e v17.4s,v1.4s +.inst 0xcec08432 //sm4e v18.4s,v1.4s +.inst 0xcec08433 //sm4e v19.4s,v1.4s + +.inst 0xcec08450 //sm4e v16.4s,v2.4s +.inst 0xcec08451 //sm4e v17.4s,v2.4s +.inst 0xcec08452 //sm4e v18.4s,v2.4s +.inst 0xcec08453 //sm4e v19.4s,v2.4s + +.inst 0xcec08470 //sm4e v16.4s,v3.4s +.inst 0xcec08471 //sm4e v17.4s,v3.4s +.inst 0xcec08472 //sm4e v18.4s,v3.4s +.inst 0xcec08473 //sm4e v19.4s,v3.4s + +.inst 0xcec08490 //sm4e v16.4s,v4.4s +.inst 0xcec08491 //sm4e v17.4s,v4.4s +.inst 0xcec08492 //sm4e v18.4s,v4.4s +.inst 0xcec08493 //sm4e v19.4s,v4.4s + +.inst 0xcec084b0 //sm4e v16.4s,v5.4s +.inst 0xcec084b1 //sm4e v17.4s,v5.4s +.inst 0xcec084b2 //sm4e v18.4s,v5.4s +.inst 0xcec084b3 //sm4e v19.4s,v5.4s + +.inst 0xcec084d0 //sm4e v16.4s,v6.4s +.inst 0xcec084d1 //sm4e v17.4s,v6.4s +.inst 0xcec084d2 //sm4e v18.4s,v6.4s +.inst 0xcec084d3 //sm4e v19.4s,v6.4s + +.inst 0xcec084f0 //sm4e v16.4s,v7.4s + rev64 v16.4S,v16.4S +.inst 0xcec084f1 //sm4e v17.4s,v7.4s + ext v16.16b,v16.16b,v16.16b,#8 + rev64 v17.4S,v17.4S +.inst 0xcec084f2 //sm4e v18.4s,v7.4s + ext v17.16b,v17.16b,v17.16b,#8 + rev64 v18.4S,v18.4S +.inst 0xcec084f3 //sm4e v19.4s,v7.4s + ext v18.16b,v18.16b,v18.16b,#8 + rev64 v19.4S,v19.4S + ext v19.16b,v19.16b,v19.16b,#8 +.inst 0xcec08414 //sm4e v20.4s,v0.4s +.inst 0xcec08415 //sm4e v21.4s,v0.4s +.inst 0xcec08416 //sm4e v22.4s,v0.4s +.inst 0xcec08417 //sm4e v23.4s,v0.4s + +.inst 0xcec08434 //sm4e v20.4s,v1.4s +.inst 0xcec08435 //sm4e v21.4s,v1.4s +.inst 0xcec08436 //sm4e v22.4s,v1.4s +.inst 0xcec08437 //sm4e v23.4s,v1.4s + +.inst 0xcec08454 //sm4e v20.4s,v2.4s +.inst 0xcec08455 //sm4e v21.4s,v2.4s +.inst 0xcec08456 //sm4e v22.4s,v2.4s +.inst 0xcec08457 //sm4e v23.4s,v2.4s + +.inst 0xcec08474 //sm4e v20.4s,v3.4s +.inst 0xcec08475 //sm4e v21.4s,v3.4s +.inst 0xcec08476 //sm4e v22.4s,v3.4s +.inst 0xcec08477 //sm4e v23.4s,v3.4s + +.inst 0xcec08494 //sm4e v20.4s,v4.4s +.inst 0xcec08495 //sm4e v21.4s,v4.4s +.inst 0xcec08496 //sm4e v22.4s,v4.4s +.inst 0xcec08497 //sm4e v23.4s,v4.4s + +.inst 0xcec084b4 //sm4e v20.4s,v5.4s +.inst 0xcec084b5 //sm4e v21.4s,v5.4s +.inst 0xcec084b6 //sm4e v22.4s,v5.4s +.inst 0xcec084b7 //sm4e v23.4s,v5.4s + +.inst 0xcec084d4 //sm4e v20.4s,v6.4s +.inst 0xcec084d5 //sm4e v21.4s,v6.4s +.inst 0xcec084d6 //sm4e v22.4s,v6.4s +.inst 0xcec084d7 //sm4e v23.4s,v6.4s + +.inst 0xcec084f4 //sm4e v20.4s,v7.4s + rev64 v20.4S,v20.4S +.inst 0xcec084f5 //sm4e v21.4s,v7.4s + ext v20.16b,v20.16b,v20.16b,#8 + rev64 v21.4S,v21.4S +.inst 0xcec084f6 //sm4e v22.4s,v7.4s + ext v21.16b,v21.16b,v21.16b,#8 + rev64 v22.4S,v22.4S +.inst 0xcec084f7 //sm4e v23.4s,v7.4s + ext v22.16b,v22.16b,v22.16b,#8 + rev64 v23.4S,v23.4S + ext v23.16b,v23.16b,v23.16b,#8 +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif +#ifndef __AARCH64EB__ + rev32 v17.16b,v17.16b +#endif +#ifndef __AARCH64EB__ + rev32 v18.16b,v18.16b +#endif +#ifndef __AARCH64EB__ + rev32 v19.16b,v19.16b +#endif +#ifndef __AARCH64EB__ + rev32 v20.16b,v20.16b +#endif +#ifndef __AARCH64EB__ + rev32 v21.16b,v21.16b +#endif + st1 {v16.4s,v17.4s,v18.4s,v19.4s},[x1],#64 +#ifndef __AARCH64EB__ + rev32 v22.16b,v22.16b +#endif +#ifndef __AARCH64EB__ + rev32 v23.16b,v23.16b +#endif + st1 {v20.4s,v21.4s,v22.4s,v23.4s},[x1],#64 + subs x2,x2,#128 + b.gt 1b + ret + // 4 blocks +2: +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif +#ifndef __AARCH64EB__ + rev32 v17.16b,v17.16b +#endif +#ifndef __AARCH64EB__ + rev32 v18.16b,v18.16b +#endif +#ifndef __AARCH64EB__ + rev32 v19.16b,v19.16b +#endif +.inst 0xcec08410 //sm4e v16.4s,v0.4s +.inst 0xcec08411 //sm4e v17.4s,v0.4s +.inst 0xcec08412 //sm4e v18.4s,v0.4s +.inst 0xcec08413 //sm4e v19.4s,v0.4s + +.inst 0xcec08430 //sm4e v16.4s,v1.4s +.inst 0xcec08431 //sm4e v17.4s,v1.4s +.inst 0xcec08432 //sm4e v18.4s,v1.4s +.inst 0xcec08433 //sm4e v19.4s,v1.4s + +.inst 0xcec08450 //sm4e v16.4s,v2.4s +.inst 0xcec08451 //sm4e v17.4s,v2.4s +.inst 0xcec08452 //sm4e v18.4s,v2.4s +.inst 0xcec08453 //sm4e v19.4s,v2.4s + +.inst 0xcec08470 //sm4e v16.4s,v3.4s +.inst 0xcec08471 //sm4e v17.4s,v3.4s +.inst 0xcec08472 //sm4e v18.4s,v3.4s +.inst 0xcec08473 //sm4e v19.4s,v3.4s + +.inst 0xcec08490 //sm4e v16.4s,v4.4s +.inst 0xcec08491 //sm4e v17.4s,v4.4s +.inst 0xcec08492 //sm4e v18.4s,v4.4s +.inst 0xcec08493 //sm4e v19.4s,v4.4s + +.inst 0xcec084b0 //sm4e v16.4s,v5.4s +.inst 0xcec084b1 //sm4e v17.4s,v5.4s +.inst 0xcec084b2 //sm4e v18.4s,v5.4s +.inst 0xcec084b3 //sm4e v19.4s,v5.4s + +.inst 0xcec084d0 //sm4e v16.4s,v6.4s +.inst 0xcec084d1 //sm4e v17.4s,v6.4s +.inst 0xcec084d2 //sm4e v18.4s,v6.4s +.inst 0xcec084d3 //sm4e v19.4s,v6.4s + +.inst 0xcec084f0 //sm4e v16.4s,v7.4s + rev64 v16.4S,v16.4S +.inst 0xcec084f1 //sm4e v17.4s,v7.4s + ext v16.16b,v16.16b,v16.16b,#8 + rev64 v17.4S,v17.4S +.inst 0xcec084f2 //sm4e v18.4s,v7.4s + ext v17.16b,v17.16b,v17.16b,#8 + rev64 v18.4S,v18.4S +.inst 0xcec084f3 //sm4e v19.4s,v7.4s + ext v18.16b,v18.16b,v18.16b,#8 + rev64 v19.4S,v19.4S + ext v19.16b,v19.16b,v19.16b,#8 +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif +#ifndef __AARCH64EB__ + rev32 v17.16b,v17.16b +#endif +#ifndef __AARCH64EB__ + rev32 v18.16b,v18.16b +#endif +#ifndef __AARCH64EB__ + rev32 v19.16b,v19.16b +#endif + st1 {v16.4s,v17.4s,v18.4s,v19.4s},[x1],#64 + subs x2,x2,#64 + b.gt 1b +1: + subs x2,x2,#16 + b.lt 1f + ld1 {v16.4s},[x0],#16 +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif +.inst 0xcec08410 //sm4e v16.4s,v0.4s +.inst 0xcec08430 //sm4e v16.4s,v1.4s +.inst 0xcec08450 //sm4e v16.4s,v2.4s +.inst 0xcec08470 //sm4e v16.4s,v3.4s +.inst 0xcec08490 //sm4e v16.4s,v4.4s +.inst 0xcec084b0 //sm4e v16.4s,v5.4s +.inst 0xcec084d0 //sm4e v16.4s,v6.4s +.inst 0xcec084f0 //sm4e v16.4s,v7.4s + rev64 v16.4S,v16.4S + ext v16.16b,v16.16b,v16.16b,#8 +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif + st1 {v16.4s},[x1],#16 + b.ne 1b +1: + ret +.size sm4_v8_ecb_encrypt,.-sm4_v8_ecb_encrypt +.globl sm4_v8_cbc_encrypt +.type sm4_v8_cbc_encrypt,%function +.align 5 +sm4_v8_cbc_encrypt: + AARCH64_VALID_CALL_TARGET + stp d8,d9,[sp, #-16]! + + ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x3],#64 + ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x3] + ld1 {v8.4s},[x4] + cmp w5,#0 + b.eq .Ldec +1: + cmp x2, #64 + b.lt 1f + ld1 {v16.4s,v17.4s,v18.4s,v19.4s},[x0],#64 + eor v16.16b,v16.16b,v8.16b +#ifndef __AARCH64EB__ + rev32 v17.16b,v17.16b +#endif +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif +#ifndef __AARCH64EB__ + rev32 v18.16b,v18.16b +#endif +#ifndef __AARCH64EB__ + rev32 v19.16b,v19.16b +#endif +.inst 0xcec08410 //sm4e v16.4s,v0.4s +.inst 0xcec08430 //sm4e v16.4s,v1.4s +.inst 0xcec08450 //sm4e v16.4s,v2.4s +.inst 0xcec08470 //sm4e v16.4s,v3.4s +.inst 0xcec08490 //sm4e v16.4s,v4.4s +.inst 0xcec084b0 //sm4e v16.4s,v5.4s +.inst 0xcec084d0 //sm4e v16.4s,v6.4s +.inst 0xcec084f0 //sm4e v16.4s,v7.4s + rev64 v16.4S,v16.4S + ext v16.16b,v16.16b,v16.16b,#8 + eor v17.16b,v17.16b,v16.16b +.inst 0xcec08411 //sm4e v17.4s,v0.4s +.inst 0xcec08431 //sm4e v17.4s,v1.4s +.inst 0xcec08451 //sm4e v17.4s,v2.4s +.inst 0xcec08471 //sm4e v17.4s,v3.4s +.inst 0xcec08491 //sm4e v17.4s,v4.4s +.inst 0xcec084b1 //sm4e v17.4s,v5.4s +.inst 0xcec084d1 //sm4e v17.4s,v6.4s +.inst 0xcec084f1 //sm4e v17.4s,v7.4s + rev64 v17.4S,v17.4S + ext v17.16b,v17.16b,v17.16b,#8 +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif + eor v18.16b,v18.16b,v17.16b +.inst 0xcec08412 //sm4e v18.4s,v0.4s +.inst 0xcec08432 //sm4e v18.4s,v1.4s +.inst 0xcec08452 //sm4e v18.4s,v2.4s +.inst 0xcec08472 //sm4e v18.4s,v3.4s +.inst 0xcec08492 //sm4e v18.4s,v4.4s +.inst 0xcec084b2 //sm4e v18.4s,v5.4s +.inst 0xcec084d2 //sm4e v18.4s,v6.4s +.inst 0xcec084f2 //sm4e v18.4s,v7.4s + rev64 v18.4S,v18.4S + ext v18.16b,v18.16b,v18.16b,#8 +#ifndef __AARCH64EB__ + rev32 v17.16b,v17.16b +#endif + eor v19.16b,v19.16b,v18.16b +.inst 0xcec08413 //sm4e v19.4s,v0.4s +.inst 0xcec08433 //sm4e v19.4s,v1.4s +.inst 0xcec08453 //sm4e v19.4s,v2.4s +.inst 0xcec08473 //sm4e v19.4s,v3.4s +.inst 0xcec08493 //sm4e v19.4s,v4.4s +.inst 0xcec084b3 //sm4e v19.4s,v5.4s +.inst 0xcec084d3 //sm4e v19.4s,v6.4s +.inst 0xcec084f3 //sm4e v19.4s,v7.4s + rev64 v19.4S,v19.4S + ext v19.16b,v19.16b,v19.16b,#8 +#ifndef __AARCH64EB__ + rev32 v18.16b,v18.16b +#endif +#ifndef __AARCH64EB__ + rev32 v19.16b,v19.16b +#endif + mov v8.16b,v19.16b + st1 {v16.4s,v17.4s,v18.4s,v19.4s},[x1],#64 + subs x2,x2,#64 + b.ne 1b +1: + subs x2,x2,#16 + b.lt 3f + ld1 {v16.4s},[x0],#16 + eor v8.16b,v8.16b,v16.16b +#ifndef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif +.inst 0xcec08408 //sm4e v8.4s,v0.4s +.inst 0xcec08428 //sm4e v8.4s,v1.4s +.inst 0xcec08448 //sm4e v8.4s,v2.4s +.inst 0xcec08468 //sm4e v8.4s,v3.4s +.inst 0xcec08488 //sm4e v8.4s,v4.4s +.inst 0xcec084a8 //sm4e v8.4s,v5.4s +.inst 0xcec084c8 //sm4e v8.4s,v6.4s +.inst 0xcec084e8 //sm4e v8.4s,v7.4s + rev64 v8.4S,v8.4S + ext v8.16b,v8.16b,v8.16b,#8 +#ifndef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif + st1 {v8.4s},[x1],#16 + b.ne 1b + b 3f +.Ldec: +1: + cmp x2, #64 + b.lt 1f + ld1 {v16.4s,v17.4s,v18.4s,v19.4s},[x0] + ld1 {v24.4s,v25.4s,v26.4s,v27.4s},[x0],#64 + cmp x2,#128 + b.lt 2f + // 8 blocks mode + ld1 {v20.4s,v21.4s,v22.4s,v23.4s},[x0] + ld1 {v28.4s,v29.4s,v30.4s,v31.4s},[x0],#64 +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif +#ifndef __AARCH64EB__ + rev32 v17.16b,v17.16b +#endif +#ifndef __AARCH64EB__ + rev32 v18.16b,v18.16b +#endif +#ifndef __AARCH64EB__ + rev32 v19.16b,v19.16b +#endif +#ifndef __AARCH64EB__ + rev32 v20.16b,v20.16b +#endif +#ifndef __AARCH64EB__ + rev32 v21.16b,v21.16b +#endif +#ifndef __AARCH64EB__ + rev32 v22.16b,v22.16b +#endif +#ifndef __AARCH64EB__ + rev32 v23.16b,v23.16b +#endif +.inst 0xcec08410 //sm4e v16.4s,v0.4s +.inst 0xcec08411 //sm4e v17.4s,v0.4s +.inst 0xcec08412 //sm4e v18.4s,v0.4s +.inst 0xcec08413 //sm4e v19.4s,v0.4s + +.inst 0xcec08430 //sm4e v16.4s,v1.4s +.inst 0xcec08431 //sm4e v17.4s,v1.4s +.inst 0xcec08432 //sm4e v18.4s,v1.4s +.inst 0xcec08433 //sm4e v19.4s,v1.4s + +.inst 0xcec08450 //sm4e v16.4s,v2.4s +.inst 0xcec08451 //sm4e v17.4s,v2.4s +.inst 0xcec08452 //sm4e v18.4s,v2.4s +.inst 0xcec08453 //sm4e v19.4s,v2.4s + +.inst 0xcec08470 //sm4e v16.4s,v3.4s +.inst 0xcec08471 //sm4e v17.4s,v3.4s +.inst 0xcec08472 //sm4e v18.4s,v3.4s +.inst 0xcec08473 //sm4e v19.4s,v3.4s + +.inst 0xcec08490 //sm4e v16.4s,v4.4s +.inst 0xcec08491 //sm4e v17.4s,v4.4s +.inst 0xcec08492 //sm4e v18.4s,v4.4s +.inst 0xcec08493 //sm4e v19.4s,v4.4s + +.inst 0xcec084b0 //sm4e v16.4s,v5.4s +.inst 0xcec084b1 //sm4e v17.4s,v5.4s +.inst 0xcec084b2 //sm4e v18.4s,v5.4s +.inst 0xcec084b3 //sm4e v19.4s,v5.4s + +.inst 0xcec084d0 //sm4e v16.4s,v6.4s +.inst 0xcec084d1 //sm4e v17.4s,v6.4s +.inst 0xcec084d2 //sm4e v18.4s,v6.4s +.inst 0xcec084d3 //sm4e v19.4s,v6.4s + +.inst 0xcec084f0 //sm4e v16.4s,v7.4s + rev64 v16.4S,v16.4S +.inst 0xcec084f1 //sm4e v17.4s,v7.4s + ext v16.16b,v16.16b,v16.16b,#8 + rev64 v17.4S,v17.4S +.inst 0xcec084f2 //sm4e v18.4s,v7.4s + ext v17.16b,v17.16b,v17.16b,#8 + rev64 v18.4S,v18.4S +.inst 0xcec084f3 //sm4e v19.4s,v7.4s + ext v18.16b,v18.16b,v18.16b,#8 + rev64 v19.4S,v19.4S + ext v19.16b,v19.16b,v19.16b,#8 +.inst 0xcec08414 //sm4e v20.4s,v0.4s +.inst 0xcec08415 //sm4e v21.4s,v0.4s +.inst 0xcec08416 //sm4e v22.4s,v0.4s +.inst 0xcec08417 //sm4e v23.4s,v0.4s + +.inst 0xcec08434 //sm4e v20.4s,v1.4s +.inst 0xcec08435 //sm4e v21.4s,v1.4s +.inst 0xcec08436 //sm4e v22.4s,v1.4s +.inst 0xcec08437 //sm4e v23.4s,v1.4s + +.inst 0xcec08454 //sm4e v20.4s,v2.4s +.inst 0xcec08455 //sm4e v21.4s,v2.4s +.inst 0xcec08456 //sm4e v22.4s,v2.4s +.inst 0xcec08457 //sm4e v23.4s,v2.4s + +.inst 0xcec08474 //sm4e v20.4s,v3.4s +.inst 0xcec08475 //sm4e v21.4s,v3.4s +.inst 0xcec08476 //sm4e v22.4s,v3.4s +.inst 0xcec08477 //sm4e v23.4s,v3.4s + +.inst 0xcec08494 //sm4e v20.4s,v4.4s +.inst 0xcec08495 //sm4e v21.4s,v4.4s +.inst 0xcec08496 //sm4e v22.4s,v4.4s +.inst 0xcec08497 //sm4e v23.4s,v4.4s + +.inst 0xcec084b4 //sm4e v20.4s,v5.4s +.inst 0xcec084b5 //sm4e v21.4s,v5.4s +.inst 0xcec084b6 //sm4e v22.4s,v5.4s +.inst 0xcec084b7 //sm4e v23.4s,v5.4s + +.inst 0xcec084d4 //sm4e v20.4s,v6.4s +.inst 0xcec084d5 //sm4e v21.4s,v6.4s +.inst 0xcec084d6 //sm4e v22.4s,v6.4s +.inst 0xcec084d7 //sm4e v23.4s,v6.4s + +.inst 0xcec084f4 //sm4e v20.4s,v7.4s + rev64 v20.4S,v20.4S +.inst 0xcec084f5 //sm4e v21.4s,v7.4s + ext v20.16b,v20.16b,v20.16b,#8 + rev64 v21.4S,v21.4S +.inst 0xcec084f6 //sm4e v22.4s,v7.4s + ext v21.16b,v21.16b,v21.16b,#8 + rev64 v22.4S,v22.4S +.inst 0xcec084f7 //sm4e v23.4s,v7.4s + ext v22.16b,v22.16b,v22.16b,#8 + rev64 v23.4S,v23.4S + ext v23.16b,v23.16b,v23.16b,#8 +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif +#ifndef __AARCH64EB__ + rev32 v17.16b,v17.16b +#endif +#ifndef __AARCH64EB__ + rev32 v18.16b,v18.16b +#endif +#ifndef __AARCH64EB__ + rev32 v19.16b,v19.16b +#endif +#ifndef __AARCH64EB__ + rev32 v20.16b,v20.16b +#endif +#ifndef __AARCH64EB__ + rev32 v21.16b,v21.16b +#endif +#ifndef __AARCH64EB__ + rev32 v22.16b,v22.16b +#endif +#ifndef __AARCH64EB__ + rev32 v23.16b,v23.16b +#endif + eor v16.16b,v16.16b,v8.16b + eor v17.16b,v17.16b,v24.16b + eor v18.16b,v18.16b,v25.16b + mov v8.16b,v31.16b + eor v19.16b,v19.16b,v26.16b + eor v20.16b,v20.16b,v27.16b + eor v21.16b,v21.16b,v28.16b + eor v22.16b,v22.16b,v29.16b + eor v23.16b,v23.16b,v30.16b + st1 {v16.4s,v17.4s,v18.4s,v19.4s},[x1],#64 + st1 {v20.4s,v21.4s,v22.4s,v23.4s},[x1],#64 + subs x2,x2,128 + b.gt 1b + b 3f + // 4 blocks mode +2: +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif +#ifndef __AARCH64EB__ + rev32 v17.16b,v17.16b +#endif +#ifndef __AARCH64EB__ + rev32 v18.16b,v18.16b +#endif +#ifndef __AARCH64EB__ + rev32 v19.16b,v19.16b +#endif +.inst 0xcec08410 //sm4e v16.4s,v0.4s +.inst 0xcec08411 //sm4e v17.4s,v0.4s +.inst 0xcec08412 //sm4e v18.4s,v0.4s +.inst 0xcec08413 //sm4e v19.4s,v0.4s + +.inst 0xcec08430 //sm4e v16.4s,v1.4s +.inst 0xcec08431 //sm4e v17.4s,v1.4s +.inst 0xcec08432 //sm4e v18.4s,v1.4s +.inst 0xcec08433 //sm4e v19.4s,v1.4s + +.inst 0xcec08450 //sm4e v16.4s,v2.4s +.inst 0xcec08451 //sm4e v17.4s,v2.4s +.inst 0xcec08452 //sm4e v18.4s,v2.4s +.inst 0xcec08453 //sm4e v19.4s,v2.4s + +.inst 0xcec08470 //sm4e v16.4s,v3.4s +.inst 0xcec08471 //sm4e v17.4s,v3.4s +.inst 0xcec08472 //sm4e v18.4s,v3.4s +.inst 0xcec08473 //sm4e v19.4s,v3.4s + +.inst 0xcec08490 //sm4e v16.4s,v4.4s +.inst 0xcec08491 //sm4e v17.4s,v4.4s +.inst 0xcec08492 //sm4e v18.4s,v4.4s +.inst 0xcec08493 //sm4e v19.4s,v4.4s + +.inst 0xcec084b0 //sm4e v16.4s,v5.4s +.inst 0xcec084b1 //sm4e v17.4s,v5.4s +.inst 0xcec084b2 //sm4e v18.4s,v5.4s +.inst 0xcec084b3 //sm4e v19.4s,v5.4s + +.inst 0xcec084d0 //sm4e v16.4s,v6.4s +.inst 0xcec084d1 //sm4e v17.4s,v6.4s +.inst 0xcec084d2 //sm4e v18.4s,v6.4s +.inst 0xcec084d3 //sm4e v19.4s,v6.4s + +.inst 0xcec084f0 //sm4e v16.4s,v7.4s + rev64 v16.4S,v16.4S +.inst 0xcec084f1 //sm4e v17.4s,v7.4s + ext v16.16b,v16.16b,v16.16b,#8 + rev64 v17.4S,v17.4S +.inst 0xcec084f2 //sm4e v18.4s,v7.4s + ext v17.16b,v17.16b,v17.16b,#8 + rev64 v18.4S,v18.4S +.inst 0xcec084f3 //sm4e v19.4s,v7.4s + ext v18.16b,v18.16b,v18.16b,#8 + rev64 v19.4S,v19.4S + ext v19.16b,v19.16b,v19.16b,#8 +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif +#ifndef __AARCH64EB__ + rev32 v17.16b,v17.16b +#endif +#ifndef __AARCH64EB__ + rev32 v18.16b,v18.16b +#endif +#ifndef __AARCH64EB__ + rev32 v19.16b,v19.16b +#endif + eor v16.16b,v16.16b,v8.16b + eor v17.16b,v17.16b,v24.16b + mov v8.16b,v27.16b + eor v18.16b,v18.16b,v25.16b + eor v19.16b,v19.16b,v26.16b + st1 {v16.4s,v17.4s,v18.4s,v19.4s},[x1],#64 + subs x2,x2,#64 + b.gt 1b +1: + subs x2,x2,#16 + b.lt 3f + ld1 {v16.4s},[x0],#16 + mov v24.16b,v16.16b +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif +.inst 0xcec08410 //sm4e v16.4s,v0.4s +.inst 0xcec08430 //sm4e v16.4s,v1.4s +.inst 0xcec08450 //sm4e v16.4s,v2.4s +.inst 0xcec08470 //sm4e v16.4s,v3.4s +.inst 0xcec08490 //sm4e v16.4s,v4.4s +.inst 0xcec084b0 //sm4e v16.4s,v5.4s +.inst 0xcec084d0 //sm4e v16.4s,v6.4s +.inst 0xcec084f0 //sm4e v16.4s,v7.4s + rev64 v16.4S,v16.4S + ext v16.16b,v16.16b,v16.16b,#8 +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif + eor v16.16b,v16.16b,v8.16b + mov v8.16b,v24.16b + st1 {v16.4s},[x1],#16 + b.ne 1b +3: + // save back IV + st1 {v8.4s},[x4] + ldp d8,d9,[sp],#16 + ret +.size sm4_v8_cbc_encrypt,.-sm4_v8_cbc_encrypt +.globl sm4_v8_ctr32_encrypt_blocks +.type sm4_v8_ctr32_encrypt_blocks,%function +.align 5 +sm4_v8_ctr32_encrypt_blocks: + AARCH64_VALID_CALL_TARGET + stp d8,d9,[sp, #-16]! + + ld1 {v8.4s},[x4] + ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x3],64 + ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x3] +#ifndef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif + mov w5,v8.s[3] +1: + cmp x2,#4 + b.lt 1f + ld1 {v24.4s,v25.4s,v26.4s,v27.4s},[x0],#64 + mov v16.16b,v8.16b + mov v17.16b,v8.16b + mov v18.16b,v8.16b + mov v19.16b,v8.16b + add w5,w5,#1 + mov v17.s[3],w5 + add w5,w5,#1 + mov v18.s[3],w5 + add w5,w5,#1 + mov v19.s[3],w5 + cmp x2,#8 + b.lt 2f + ld1 {v28.4s,v29.4s,v30.4s,v31.4s},[x0],#64 + mov v20.16b,v8.16b + mov v21.16b,v8.16b + mov v22.16b,v8.16b + mov v23.16b,v8.16b + add w5,w5,#1 + mov v20.s[3],w5 + add w5,w5,#1 + mov v21.s[3],w5 + add w5,w5,#1 + mov v22.s[3],w5 + add w5,w5,#1 + mov v23.s[3],w5 +.inst 0xcec08410 //sm4e v16.4s,v0.4s +.inst 0xcec08411 //sm4e v17.4s,v0.4s +.inst 0xcec08412 //sm4e v18.4s,v0.4s +.inst 0xcec08413 //sm4e v19.4s,v0.4s + +.inst 0xcec08430 //sm4e v16.4s,v1.4s +.inst 0xcec08431 //sm4e v17.4s,v1.4s +.inst 0xcec08432 //sm4e v18.4s,v1.4s +.inst 0xcec08433 //sm4e v19.4s,v1.4s + +.inst 0xcec08450 //sm4e v16.4s,v2.4s +.inst 0xcec08451 //sm4e v17.4s,v2.4s +.inst 0xcec08452 //sm4e v18.4s,v2.4s +.inst 0xcec08453 //sm4e v19.4s,v2.4s + +.inst 0xcec08470 //sm4e v16.4s,v3.4s +.inst 0xcec08471 //sm4e v17.4s,v3.4s +.inst 0xcec08472 //sm4e v18.4s,v3.4s +.inst 0xcec08473 //sm4e v19.4s,v3.4s + +.inst 0xcec08490 //sm4e v16.4s,v4.4s +.inst 0xcec08491 //sm4e v17.4s,v4.4s +.inst 0xcec08492 //sm4e v18.4s,v4.4s +.inst 0xcec08493 //sm4e v19.4s,v4.4s + +.inst 0xcec084b0 //sm4e v16.4s,v5.4s +.inst 0xcec084b1 //sm4e v17.4s,v5.4s +.inst 0xcec084b2 //sm4e v18.4s,v5.4s +.inst 0xcec084b3 //sm4e v19.4s,v5.4s + +.inst 0xcec084d0 //sm4e v16.4s,v6.4s +.inst 0xcec084d1 //sm4e v17.4s,v6.4s +.inst 0xcec084d2 //sm4e v18.4s,v6.4s +.inst 0xcec084d3 //sm4e v19.4s,v6.4s + +.inst 0xcec084f0 //sm4e v16.4s,v7.4s + rev64 v16.4S,v16.4S +.inst 0xcec084f1 //sm4e v17.4s,v7.4s + ext v16.16b,v16.16b,v16.16b,#8 + rev64 v17.4S,v17.4S +.inst 0xcec084f2 //sm4e v18.4s,v7.4s + ext v17.16b,v17.16b,v17.16b,#8 + rev64 v18.4S,v18.4S +.inst 0xcec084f3 //sm4e v19.4s,v7.4s + ext v18.16b,v18.16b,v18.16b,#8 + rev64 v19.4S,v19.4S + ext v19.16b,v19.16b,v19.16b,#8 +.inst 0xcec08414 //sm4e v20.4s,v0.4s +.inst 0xcec08415 //sm4e v21.4s,v0.4s +.inst 0xcec08416 //sm4e v22.4s,v0.4s +.inst 0xcec08417 //sm4e v23.4s,v0.4s + +.inst 0xcec08434 //sm4e v20.4s,v1.4s +.inst 0xcec08435 //sm4e v21.4s,v1.4s +.inst 0xcec08436 //sm4e v22.4s,v1.4s +.inst 0xcec08437 //sm4e v23.4s,v1.4s + +.inst 0xcec08454 //sm4e v20.4s,v2.4s +.inst 0xcec08455 //sm4e v21.4s,v2.4s +.inst 0xcec08456 //sm4e v22.4s,v2.4s +.inst 0xcec08457 //sm4e v23.4s,v2.4s + +.inst 0xcec08474 //sm4e v20.4s,v3.4s +.inst 0xcec08475 //sm4e v21.4s,v3.4s +.inst 0xcec08476 //sm4e v22.4s,v3.4s +.inst 0xcec08477 //sm4e v23.4s,v3.4s + +.inst 0xcec08494 //sm4e v20.4s,v4.4s +.inst 0xcec08495 //sm4e v21.4s,v4.4s +.inst 0xcec08496 //sm4e v22.4s,v4.4s +.inst 0xcec08497 //sm4e v23.4s,v4.4s + +.inst 0xcec084b4 //sm4e v20.4s,v5.4s +.inst 0xcec084b5 //sm4e v21.4s,v5.4s +.inst 0xcec084b6 //sm4e v22.4s,v5.4s +.inst 0xcec084b7 //sm4e v23.4s,v5.4s + +.inst 0xcec084d4 //sm4e v20.4s,v6.4s +.inst 0xcec084d5 //sm4e v21.4s,v6.4s +.inst 0xcec084d6 //sm4e v22.4s,v6.4s +.inst 0xcec084d7 //sm4e v23.4s,v6.4s + +.inst 0xcec084f4 //sm4e v20.4s,v7.4s + rev64 v20.4S,v20.4S +.inst 0xcec084f5 //sm4e v21.4s,v7.4s + ext v20.16b,v20.16b,v20.16b,#8 + rev64 v21.4S,v21.4S +.inst 0xcec084f6 //sm4e v22.4s,v7.4s + ext v21.16b,v21.16b,v21.16b,#8 + rev64 v22.4S,v22.4S +.inst 0xcec084f7 //sm4e v23.4s,v7.4s + ext v22.16b,v22.16b,v22.16b,#8 + rev64 v23.4S,v23.4S + ext v23.16b,v23.16b,v23.16b,#8 +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif +#ifndef __AARCH64EB__ + rev32 v17.16b,v17.16b +#endif +#ifndef __AARCH64EB__ + rev32 v18.16b,v18.16b +#endif +#ifndef __AARCH64EB__ + rev32 v19.16b,v19.16b +#endif +#ifndef __AARCH64EB__ + rev32 v20.16b,v20.16b +#endif +#ifndef __AARCH64EB__ + rev32 v21.16b,v21.16b +#endif +#ifndef __AARCH64EB__ + rev32 v22.16b,v22.16b +#endif +#ifndef __AARCH64EB__ + rev32 v23.16b,v23.16b +#endif + eor v16.16b,v16.16b,v24.16b + eor v17.16b,v17.16b,v25.16b + eor v18.16b,v18.16b,v26.16b + eor v19.16b,v19.16b,v27.16b + eor v20.16b,v20.16b,v28.16b + eor v21.16b,v21.16b,v29.16b + eor v22.16b,v22.16b,v30.16b + eor v23.16b,v23.16b,v31.16b + st1 {v16.4s,v17.4s,v18.4s,v19.4s},[x1],#64 + st1 {v20.4s,v21.4s,v22.4s,v23.4s},[x1],#64 + subs x2,x2,#8 + b.eq 3f + add w5,w5,#1 + mov v8.s[3],w5 + b 1b +2: +.inst 0xcec08410 //sm4e v16.4s,v0.4s +.inst 0xcec08411 //sm4e v17.4s,v0.4s +.inst 0xcec08412 //sm4e v18.4s,v0.4s +.inst 0xcec08413 //sm4e v19.4s,v0.4s + +.inst 0xcec08430 //sm4e v16.4s,v1.4s +.inst 0xcec08431 //sm4e v17.4s,v1.4s +.inst 0xcec08432 //sm4e v18.4s,v1.4s +.inst 0xcec08433 //sm4e v19.4s,v1.4s + +.inst 0xcec08450 //sm4e v16.4s,v2.4s +.inst 0xcec08451 //sm4e v17.4s,v2.4s +.inst 0xcec08452 //sm4e v18.4s,v2.4s +.inst 0xcec08453 //sm4e v19.4s,v2.4s + +.inst 0xcec08470 //sm4e v16.4s,v3.4s +.inst 0xcec08471 //sm4e v17.4s,v3.4s +.inst 0xcec08472 //sm4e v18.4s,v3.4s +.inst 0xcec08473 //sm4e v19.4s,v3.4s + +.inst 0xcec08490 //sm4e v16.4s,v4.4s +.inst 0xcec08491 //sm4e v17.4s,v4.4s +.inst 0xcec08492 //sm4e v18.4s,v4.4s +.inst 0xcec08493 //sm4e v19.4s,v4.4s + +.inst 0xcec084b0 //sm4e v16.4s,v5.4s +.inst 0xcec084b1 //sm4e v17.4s,v5.4s +.inst 0xcec084b2 //sm4e v18.4s,v5.4s +.inst 0xcec084b3 //sm4e v19.4s,v5.4s + +.inst 0xcec084d0 //sm4e v16.4s,v6.4s +.inst 0xcec084d1 //sm4e v17.4s,v6.4s +.inst 0xcec084d2 //sm4e v18.4s,v6.4s +.inst 0xcec084d3 //sm4e v19.4s,v6.4s + +.inst 0xcec084f0 //sm4e v16.4s,v7.4s + rev64 v16.4S,v16.4S +.inst 0xcec084f1 //sm4e v17.4s,v7.4s + ext v16.16b,v16.16b,v16.16b,#8 + rev64 v17.4S,v17.4S +.inst 0xcec084f2 //sm4e v18.4s,v7.4s + ext v17.16b,v17.16b,v17.16b,#8 + rev64 v18.4S,v18.4S +.inst 0xcec084f3 //sm4e v19.4s,v7.4s + ext v18.16b,v18.16b,v18.16b,#8 + rev64 v19.4S,v19.4S + ext v19.16b,v19.16b,v19.16b,#8 +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif +#ifndef __AARCH64EB__ + rev32 v17.16b,v17.16b +#endif +#ifndef __AARCH64EB__ + rev32 v18.16b,v18.16b +#endif +#ifndef __AARCH64EB__ + rev32 v19.16b,v19.16b +#endif + eor v16.16b,v16.16b,v24.16b + eor v17.16b,v17.16b,v25.16b + eor v18.16b,v18.16b,v26.16b + eor v19.16b,v19.16b,v27.16b + st1 {v16.4s,v17.4s,v18.4s,v19.4s},[x1],#64 + subs x2,x2,#4 + b.eq 3f + add w5,w5,#1 + mov v8.s[3],w5 + b 1b +1: + subs x2,x2,#1 + b.lt 3f + mov v16.16b,v8.16b + ld1 {v24.4s},[x0],#16 +.inst 0xcec08410 //sm4e v16.4s,v0.4s +.inst 0xcec08430 //sm4e v16.4s,v1.4s +.inst 0xcec08450 //sm4e v16.4s,v2.4s +.inst 0xcec08470 //sm4e v16.4s,v3.4s +.inst 0xcec08490 //sm4e v16.4s,v4.4s +.inst 0xcec084b0 //sm4e v16.4s,v5.4s +.inst 0xcec084d0 //sm4e v16.4s,v6.4s +.inst 0xcec084f0 //sm4e v16.4s,v7.4s + rev64 v16.4S,v16.4S + ext v16.16b,v16.16b,v16.16b,#8 +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif + eor v16.16b,v16.16b,v24.16b + st1 {v16.4s},[x1],#16 + b.eq 3f + add w5,w5,#1 + mov v8.s[3],w5 + b 1b +3: + ldp d8,d9,[sp],#16 + ret +.size sm4_v8_ctr32_encrypt_blocks,.-sm4_v8_ctr32_encrypt_blocks diff --git a/contrib/openssl-cmake/asm/crypto/sm4/asm/sm4-riscv64-zvksed.S b/contrib/openssl-cmake/asm/crypto/sm4/asm/sm4-riscv64-zvksed.S new file mode 100644 index 000000000000..c353c27e1c06 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/sm4/asm/sm4-riscv64-zvksed.S @@ -0,0 +1,188 @@ +.text +.p2align 3 +.globl rv64i_zvksed_sm4_set_encrypt_key +.type rv64i_zvksed_sm4_set_encrypt_key,@function +rv64i_zvksed_sm4_set_encrypt_key: + .word 0xc1027057 + + # Load the user key + .word 33906823 + .word 1242865879 + + # Load the FK. + la t0, FK + .word 33743111 + + # Generate round keys. + .word 772866263 + .word 2249204215 # rk[0:3] + .word 2251334263 # rk[4:7] + .word 2252415735 # rk[8:11] + .word 2253497207 # rk[12:15] + .word 2254578679 # rk[16:19] + .word 2255660151 # rk[20:23] + .word 2256741623 # rk[24:27] + .word 2257823095 # rk[28:31] + + # Store round keys + .word 33939879 # rk[0:3] + addi a1, a1, 16 + .word 33940007 # rk[4:7] + addi a1, a1, 16 + .word 33940135 # rk[8:11] + addi a1, a1, 16 + .word 33940263 # rk[12:15] + addi a1, a1, 16 + .word 33940391 # rk[16:19] + addi a1, a1, 16 + .word 33940519 # rk[20:23] + addi a1, a1, 16 + .word 33940647 # rk[24:27] + addi a1, a1, 16 + .word 33940775 # rk[28:31] + + li a0, 1 + ret +.size rv64i_zvksed_sm4_set_encrypt_key,.-rv64i_zvksed_sm4_set_encrypt_key +.p2align 3 +.globl rv64i_zvksed_sm4_set_decrypt_key +.type rv64i_zvksed_sm4_set_decrypt_key,@function +rv64i_zvksed_sm4_set_decrypt_key: + .word 0xc1027057 + + # Load the user key + .word 33906823 + .word 1242865879 + + # Load the FK. + la t0, FK + .word 33743111 + + # Generate round keys. + .word 772866263 + .word 2249204215 # rk[0:3] + .word 2251334263 # rk[4:7] + .word 2252415735 # rk[8:11] + .word 2253497207 # rk[12:15] + .word 2254578679 # rk[16:19] + .word 2255660151 # rk[20:23] + .word 2256741623 # rk[24:27] + .word 2257823095 # rk[28:31] + + # Store round keys in reverse order + addi a1, a1, 12 + li t1, -4 + .word 174449959 # rk[31:28] + addi a1, a1, 16 + .word 174449831 # rk[27:24] + addi a1, a1, 16 + .word 174449703 # rk[23:20] + addi a1, a1, 16 + .word 174449575 # rk[19:16] + addi a1, a1, 16 + .word 174449447 # rk[15:12] + addi a1, a1, 16 + .word 174449319 # rk[11:8] + addi a1, a1, 16 + .word 174449191 # rk[7:4] + addi a1, a1, 16 + .word 174449063 # rk[3:0] + + li a0, 1 + ret +.size rv64i_zvksed_sm4_set_decrypt_key,.-rv64i_zvksed_sm4_set_decrypt_key +.p2align 3 +.globl rv64i_zvksed_sm4_encrypt +.type rv64i_zvksed_sm4_encrypt,@function +rv64i_zvksed_sm4_encrypt: + .word 0xc1027057 + + # Order of elements was adjusted in set_encrypt_key() + .word 33972487 # rk[0:3] + addi a2, a2, 16 + .word 33972615 # rk[4:7] + addi a2, a2, 16 + .word 33972743 # rk[8:11] + addi a2, a2, 16 + .word 33972871 # rk[12:15] + addi a2, a2, 16 + .word 33972999 # rk[16:19] + addi a2, a2, 16 + .word 33973127 # rk[20:23] + addi a2, a2, 16 + .word 33973255 # rk[24:27] + addi a2, a2, 16 + .word 33973383 # rk[28:31] + + # Load input data + .word 33906823 + .word 1242865879 + + # Encrypt with all keys + .word 2787647735 + .word 2788696311 + .word 2789744887 + .word 2790793463 + .word 2791842039 + .word 2792890615 + .word 2793939191 + .word 2794987767 + + # Save the ciphertext (in reverse element order) + .word 1242865879 + li t0, -4 + addi a1, a1, 12 + .word 173400231 + + ret +.size rv64i_zvksed_sm4_encrypt,.-rv64i_zvksed_sm4_encrypt +.p2align 3 +.globl rv64i_zvksed_sm4_decrypt +.type rv64i_zvksed_sm4_decrypt,@function +rv64i_zvksed_sm4_decrypt: + .word 0xc1027057 + + # Order of elements was adjusted in set_decrypt_key() + .word 33973383 # rk[31:28] + addi a2, a2, 16 + .word 33973255 # rk[27:24] + addi a2, a2, 16 + .word 33973127 # rk[23:20] + addi a2, a2, 16 + .word 33972999 # rk[19:16] + addi a2, a2, 16 + .word 33972871 # rk[15:11] + addi a2, a2, 16 + .word 33972743 # rk[11:8] + addi a2, a2, 16 + .word 33972615 # rk[7:4] + addi a2, a2, 16 + .word 33972487 # rk[3:0] + + # Load input data + .word 33906823 + .word 1242865879 + + # Encrypt with all keys + .word 2794987767 + .word 2793939191 + .word 2792890615 + .word 2791842039 + .word 2790793463 + .word 2789744887 + .word 2788696311 + .word 2787647735 + + # Save the ciphertext (in reverse element order) + .word 1242865879 + li t0, -4 + addi a1, a1, 12 + .word 173400231 + + ret +.size rv64i_zvksed_sm4_decrypt,.-rv64i_zvksed_sm4_decrypt +# Family Key (little-endian 32-bit chunks) +.p2align 3 +FK: + .word 0xA3B1BAC6, 0x56AA3350, 0x677D9197, 0xB27022DC +.size FK,.-FK diff --git a/contrib/openssl-cmake/asm/crypto/sm4/asm/vpsm4-armv8.S b/contrib/openssl-cmake/asm/crypto/sm4/asm/vpsm4-armv8.S new file mode 100644 index 000000000000..b2aad3252e11 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/sm4/asm/vpsm4-armv8.S @@ -0,0 +1,5020 @@ +// Copyright 2020-2025 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the Apache License 2.0 (the "License"). You may not use +// this file except in compliance with the License. You can obtain a copy +// in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html + +// +// This module implements SM4 with ASIMD on aarch64 +// +// Feb 2022 +// + +// $output is the last argument if it looks like a file (it has an extension) +// $flavour is the first argument if it doesn't look like a file +#include "arm_arch.h" +.arch armv8-a +.text + +.section .rodata +.type _vpsm4_consts,%object +.align 7 +_vpsm4_consts: +.Lsbox: +.byte 0xD6,0x90,0xE9,0xFE,0xCC,0xE1,0x3D,0xB7,0x16,0xB6,0x14,0xC2,0x28,0xFB,0x2C,0x05 +.byte 0x2B,0x67,0x9A,0x76,0x2A,0xBE,0x04,0xC3,0xAA,0x44,0x13,0x26,0x49,0x86,0x06,0x99 +.byte 0x9C,0x42,0x50,0xF4,0x91,0xEF,0x98,0x7A,0x33,0x54,0x0B,0x43,0xED,0xCF,0xAC,0x62 +.byte 0xE4,0xB3,0x1C,0xA9,0xC9,0x08,0xE8,0x95,0x80,0xDF,0x94,0xFA,0x75,0x8F,0x3F,0xA6 +.byte 0x47,0x07,0xA7,0xFC,0xF3,0x73,0x17,0xBA,0x83,0x59,0x3C,0x19,0xE6,0x85,0x4F,0xA8 +.byte 0x68,0x6B,0x81,0xB2,0x71,0x64,0xDA,0x8B,0xF8,0xEB,0x0F,0x4B,0x70,0x56,0x9D,0x35 +.byte 0x1E,0x24,0x0E,0x5E,0x63,0x58,0xD1,0xA2,0x25,0x22,0x7C,0x3B,0x01,0x21,0x78,0x87 +.byte 0xD4,0x00,0x46,0x57,0x9F,0xD3,0x27,0x52,0x4C,0x36,0x02,0xE7,0xA0,0xC4,0xC8,0x9E +.byte 0xEA,0xBF,0x8A,0xD2,0x40,0xC7,0x38,0xB5,0xA3,0xF7,0xF2,0xCE,0xF9,0x61,0x15,0xA1 +.byte 0xE0,0xAE,0x5D,0xA4,0x9B,0x34,0x1A,0x55,0xAD,0x93,0x32,0x30,0xF5,0x8C,0xB1,0xE3 +.byte 0x1D,0xF6,0xE2,0x2E,0x82,0x66,0xCA,0x60,0xC0,0x29,0x23,0xAB,0x0D,0x53,0x4E,0x6F +.byte 0xD5,0xDB,0x37,0x45,0xDE,0xFD,0x8E,0x2F,0x03,0xFF,0x6A,0x72,0x6D,0x6C,0x5B,0x51 +.byte 0x8D,0x1B,0xAF,0x92,0xBB,0xDD,0xBC,0x7F,0x11,0xD9,0x5C,0x41,0x1F,0x10,0x5A,0xD8 +.byte 0x0A,0xC1,0x31,0x88,0xA5,0xCD,0x7B,0xBD,0x2D,0x74,0xD0,0x12,0xB8,0xE5,0xB4,0xB0 +.byte 0x89,0x69,0x97,0x4A,0x0C,0x96,0x77,0x7E,0x65,0xB9,0xF1,0x09,0xC5,0x6E,0xC6,0x84 +.byte 0x18,0xF0,0x7D,0xEC,0x3A,0xDC,0x4D,0x20,0x79,0xEE,0x5F,0x3E,0xD7,0xCB,0x39,0x48 +.Lck: +.long 0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269 +.long 0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9 +.long 0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B4249 +.long 0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B9 +.long 0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B2229 +.long 0x30373E45, 0x4C535A61, 0x686F767D, 0x848B9299 +.long 0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209 +.long 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279 +.Lfk: +.quad 0x56aa3350a3b1bac6,0xb27022dc677d9197 +.Lshuffles: +.quad 0x0B0A090807060504,0x030201000F0E0D0C +.Lxts_magic: +.quad 0x0101010101010187,0x0101010101010101 + +.size _vpsm4_consts,.-_vpsm4_consts + +.previous + +.type _vpsm4_set_key,%function +.align 4 +_vpsm4_set_key: + AARCH64_VALID_CALL_TARGET + ld1 {v5.4s},[x0] + adrp x10,.Lsbox + add x10,x10,#:lo12:.Lsbox + ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x10],#64 + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x10],#64 + ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x10],#64 + ld1 {v28.16b,v29.16b,v30.16b,v31.16b},[x10] +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif + adrp x5,.Lshuffles + add x5,x5,#:lo12:.Lshuffles + ld1 {v7.2d},[x5] + adrp x5,.Lfk + add x5,x5,#:lo12:.Lfk + ld1 {v6.2d},[x5] + eor v5.16b,v5.16b,v6.16b + mov x6,#32 + adrp x5,.Lck + add x5,x5,#:lo12:.Lck + movi v0.16b,#64 + cbnz w2,1f + add x1,x1,124 +1: + mov w7,v5.s[1] + ldr w8,[x5],#4 + eor w8,w8,w7 + mov w7,v5.s[2] + eor w8,w8,w7 + mov w7,v5.s[3] + eor w8,w8,w7 + // sbox lookup + mov v4.s[0],w8 + tbl v1.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v4.16b + sub v4.16b,v4.16b,v0.16b + tbx v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v4.16b + sub v4.16b,v4.16b,v0.16b + tbx v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v4.16b + sub v4.16b,v4.16b,v0.16b + tbx v1.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v4.16b + mov w7,v1.s[0] + eor w8,w7,w7,ror #19 + eor w8,w8,w7,ror #9 + mov w7,v5.s[0] + eor w8,w8,w7 + mov v5.s[0],w8 + cbz w2,2f + str w8,[x1],#4 + b 3f +2: + str w8,[x1],#-4 +3: + tbl v5.16b,{v5.16b},v7.16b + subs x6,x6,#1 + b.ne 1b + ret +.size _vpsm4_set_key,.-_vpsm4_set_key +.type _vpsm4_enc_4blks,%function +.align 4 +_vpsm4_enc_4blks: + AARCH64_VALID_CALL_TARGET + mov x10,x3 + mov w11,#8 +10: + ldp w7,w8,[x10],8 + dup v12.4s,w7 + dup v13.4s,w8 + + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor v14.16b,v6.16b,v7.16b + eor v12.16b,v5.16b,v12.16b + eor v12.16b,v14.16b,v12.16b + movi v0.16b,#64 + movi v1.16b,#128 + movi v2.16b,#192 + sub v0.16b,v12.16b,v0.16b + sub v1.16b,v12.16b,v1.16b + sub v2.16b,v12.16b,v2.16b + tbl v12.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v12.16b + tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b + tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b + tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b + add v0.2d,v0.2d,v1.2d + add v2.2d,v2.2d,v12.2d + add v12.2d,v0.2d,v2.2d + + ushr v0.4s,v12.4s,32-2 + sli v0.4s,v12.4s,2 + ushr v2.4s,v12.4s,32-10 + eor v1.16b,v0.16b,v12.16b + sli v2.4s,v12.4s,10 + eor v1.16b,v2.16b,v1.16b + ushr v0.4s,v12.4s,32-18 + sli v0.4s,v12.4s,18 + ushr v2.4s,v12.4s,32-24 + eor v1.16b,v0.16b,v1.16b + sli v2.4s,v12.4s,24 + eor v12.16b,v2.16b,v1.16b + eor v4.16b,v4.16b,v12.16b + + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor v14.16b,v14.16b,v4.16b + eor v13.16b,v14.16b,v13.16b + movi v0.16b,#64 + movi v1.16b,#128 + movi v2.16b,#192 + sub v0.16b,v13.16b,v0.16b + sub v1.16b,v13.16b,v1.16b + sub v2.16b,v13.16b,v2.16b + tbl v13.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v13.16b + tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b + tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b + tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b + add v0.2d,v0.2d,v1.2d + add v2.2d,v2.2d,v13.2d + add v13.2d,v0.2d,v2.2d + + ushr v0.4s,v13.4s,32-2 + sli v0.4s,v13.4s,2 + ushr v2.4s,v13.4s,32-10 + eor v1.16b,v0.16b,v13.16b + sli v2.4s,v13.4s,10 + eor v1.16b,v2.16b,v1.16b + ushr v0.4s,v13.4s,32-18 + sli v0.4s,v13.4s,18 + ushr v2.4s,v13.4s,32-24 + eor v1.16b,v0.16b,v1.16b + sli v2.4s,v13.4s,24 + eor v13.16b,v2.16b,v1.16b + ldp w7,w8,[x10],8 + eor v5.16b,v5.16b,v13.16b + + dup v12.4s,w7 + dup v13.4s,w8 + + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor v14.16b,v4.16b,v5.16b + eor v12.16b,v7.16b,v12.16b + eor v12.16b,v14.16b,v12.16b + movi v0.16b,#64 + movi v1.16b,#128 + movi v2.16b,#192 + sub v0.16b,v12.16b,v0.16b + sub v1.16b,v12.16b,v1.16b + sub v2.16b,v12.16b,v2.16b + tbl v12.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v12.16b + tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b + tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b + tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b + add v0.2d,v0.2d,v1.2d + add v2.2d,v2.2d,v12.2d + add v12.2d,v0.2d,v2.2d + + ushr v0.4s,v12.4s,32-2 + sli v0.4s,v12.4s,2 + ushr v2.4s,v12.4s,32-10 + eor v1.16b,v0.16b,v12.16b + sli v2.4s,v12.4s,10 + eor v1.16b,v2.16b,v1.16b + ushr v0.4s,v12.4s,32-18 + sli v0.4s,v12.4s,18 + ushr v2.4s,v12.4s,32-24 + eor v1.16b,v0.16b,v1.16b + sli v2.4s,v12.4s,24 + eor v12.16b,v2.16b,v1.16b + eor v6.16b,v6.16b,v12.16b + + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor v14.16b,v14.16b,v6.16b + eor v13.16b,v14.16b,v13.16b + movi v0.16b,#64 + movi v1.16b,#128 + movi v2.16b,#192 + sub v0.16b,v13.16b,v0.16b + sub v1.16b,v13.16b,v1.16b + sub v2.16b,v13.16b,v2.16b + tbl v13.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v13.16b + tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b + tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b + tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b + add v0.2d,v0.2d,v1.2d + add v2.2d,v2.2d,v13.2d + add v13.2d,v0.2d,v2.2d + + ushr v0.4s,v13.4s,32-2 + sli v0.4s,v13.4s,2 + ushr v2.4s,v13.4s,32-10 + eor v1.16b,v0.16b,v13.16b + sli v2.4s,v13.4s,10 + eor v1.16b,v2.16b,v1.16b + ushr v0.4s,v13.4s,32-18 + sli v0.4s,v13.4s,18 + ushr v2.4s,v13.4s,32-24 + eor v1.16b,v0.16b,v1.16b + sli v2.4s,v13.4s,24 + eor v13.16b,v2.16b,v1.16b + eor v7.16b,v7.16b,v13.16b + subs w11,w11,#1 + b.ne 10b +#ifndef __AARCH64EB__ + rev32 v3.16b,v4.16b +#else + mov v3.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v2.16b,v5.16b +#else + mov v2.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v1.16b,v6.16b +#else + mov v1.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v0.16b,v7.16b +#else + mov v0.16b,v7.16b +#endif + ret +.size _vpsm4_enc_4blks,.-_vpsm4_enc_4blks +.type _vpsm4_enc_8blks,%function +.align 4 +_vpsm4_enc_8blks: + AARCH64_VALID_CALL_TARGET + mov x10,x3 + mov w11,#8 +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + dup v12.4s,w7 + eor v14.16b,v6.16b,v7.16b + eor v15.16b,v10.16b,v11.16b + eor v0.16b,v5.16b,v12.16b + eor v1.16b,v9.16b,v12.16b + eor v12.16b,v14.16b,v0.16b + eor v13.16b,v15.16b,v1.16b + movi v3.16b,#64 + sub v0.16b,v12.16b,v3.16b + sub v1.16b,v0.16b,v3.16b + sub v2.16b,v1.16b,v3.16b + tbl v12.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v12.16b + tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b + tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b + tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b + add v1.2d,v0.2d,v1.2d + add v12.2d,v2.2d,v12.2d + add v12.2d,v1.2d,v12.2d + + sub v0.16b,v13.16b,v3.16b + sub v1.16b,v0.16b,v3.16b + sub v2.16b,v1.16b,v3.16b + tbl v13.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v13.16b + tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b + tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b + tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b + add v1.2d,v0.2d,v1.2d + add v13.2d,v2.2d,v13.2d + add v13.2d,v1.2d,v13.2d + + ushr v0.4s,v12.4s,32-2 + sli v0.4s,v12.4s,2 + ushr v2.4s,v13.4s,32-2 + eor v1.16b,v0.16b,v12.16b + sli v2.4s,v13.4s,2 + + ushr v0.4s,v12.4s,32-10 + eor v3.16b,v2.16b,v13.16b + sli v0.4s,v12.4s,10 + ushr v2.4s,v13.4s,32-10 + eor v1.16b,v0.16b,v1.16b + sli v2.4s,v13.4s,10 + + ushr v0.4s,v12.4s,32-18 + eor v3.16b,v2.16b,v3.16b + sli v0.4s,v12.4s,18 + ushr v2.4s,v13.4s,32-18 + eor v1.16b,v0.16b,v1.16b + sli v2.4s,v13.4s,18 + + ushr v0.4s,v12.4s,32-24 + eor v3.16b,v2.16b,v3.16b + sli v0.4s,v12.4s,24 + ushr v2.4s,v13.4s,32-24 + eor v12.16b,v0.16b,v1.16b + sli v2.4s,v13.4s,24 + eor v13.16b,v2.16b,v3.16b + eor v4.16b,v4.16b,v12.16b + eor v8.16b,v8.16b,v13.16b + + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + dup v13.4s,w8 + eor v14.16b,v14.16b,v4.16b + eor v15.16b,v15.16b,v8.16b + eor v12.16b,v14.16b,v13.16b + eor v13.16b,v15.16b,v13.16b + movi v3.16b,#64 + sub v0.16b,v12.16b,v3.16b + sub v1.16b,v0.16b,v3.16b + sub v2.16b,v1.16b,v3.16b + tbl v12.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v12.16b + tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b + tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b + tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b + add v1.2d,v0.2d,v1.2d + add v12.2d,v2.2d,v12.2d + add v12.2d,v1.2d,v12.2d + + sub v0.16b,v13.16b,v3.16b + sub v1.16b,v0.16b,v3.16b + sub v2.16b,v1.16b,v3.16b + tbl v13.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v13.16b + tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b + tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b + tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b + add v1.2d,v0.2d,v1.2d + add v13.2d,v2.2d,v13.2d + add v13.2d,v1.2d,v13.2d + + ushr v0.4s,v12.4s,32-2 + sli v0.4s,v12.4s,2 + ushr v2.4s,v13.4s,32-2 + eor v1.16b,v0.16b,v12.16b + sli v2.4s,v13.4s,2 + + ushr v0.4s,v12.4s,32-10 + eor v3.16b,v2.16b,v13.16b + sli v0.4s,v12.4s,10 + ushr v2.4s,v13.4s,32-10 + eor v1.16b,v0.16b,v1.16b + sli v2.4s,v13.4s,10 + + ushr v0.4s,v12.4s,32-18 + eor v3.16b,v2.16b,v3.16b + sli v0.4s,v12.4s,18 + ushr v2.4s,v13.4s,32-18 + eor v1.16b,v0.16b,v1.16b + sli v2.4s,v13.4s,18 + + ushr v0.4s,v12.4s,32-24 + eor v3.16b,v2.16b,v3.16b + sli v0.4s,v12.4s,24 + ushr v2.4s,v13.4s,32-24 + eor v12.16b,v0.16b,v1.16b + sli v2.4s,v13.4s,24 + eor v13.16b,v2.16b,v3.16b + ldp w7,w8,[x10],8 + eor v5.16b,v5.16b,v12.16b + eor v9.16b,v9.16b,v13.16b + + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + dup v12.4s,w7 + eor v14.16b,v4.16b,v5.16b + eor v15.16b,v8.16b,v9.16b + eor v0.16b,v7.16b,v12.16b + eor v1.16b,v11.16b,v12.16b + eor v12.16b,v14.16b,v0.16b + eor v13.16b,v15.16b,v1.16b + movi v3.16b,#64 + sub v0.16b,v12.16b,v3.16b + sub v1.16b,v0.16b,v3.16b + sub v2.16b,v1.16b,v3.16b + tbl v12.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v12.16b + tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b + tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b + tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b + add v1.2d,v0.2d,v1.2d + add v12.2d,v2.2d,v12.2d + add v12.2d,v1.2d,v12.2d + + sub v0.16b,v13.16b,v3.16b + sub v1.16b,v0.16b,v3.16b + sub v2.16b,v1.16b,v3.16b + tbl v13.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v13.16b + tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b + tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b + tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b + add v1.2d,v0.2d,v1.2d + add v13.2d,v2.2d,v13.2d + add v13.2d,v1.2d,v13.2d + + ushr v0.4s,v12.4s,32-2 + sli v0.4s,v12.4s,2 + ushr v2.4s,v13.4s,32-2 + eor v1.16b,v0.16b,v12.16b + sli v2.4s,v13.4s,2 + + ushr v0.4s,v12.4s,32-10 + eor v3.16b,v2.16b,v13.16b + sli v0.4s,v12.4s,10 + ushr v2.4s,v13.4s,32-10 + eor v1.16b,v0.16b,v1.16b + sli v2.4s,v13.4s,10 + + ushr v0.4s,v12.4s,32-18 + eor v3.16b,v2.16b,v3.16b + sli v0.4s,v12.4s,18 + ushr v2.4s,v13.4s,32-18 + eor v1.16b,v0.16b,v1.16b + sli v2.4s,v13.4s,18 + + ushr v0.4s,v12.4s,32-24 + eor v3.16b,v2.16b,v3.16b + sli v0.4s,v12.4s,24 + ushr v2.4s,v13.4s,32-24 + eor v12.16b,v0.16b,v1.16b + sli v2.4s,v13.4s,24 + eor v13.16b,v2.16b,v3.16b + eor v6.16b,v6.16b,v12.16b + eor v10.16b,v10.16b,v13.16b + + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + dup v13.4s,w8 + eor v14.16b,v14.16b,v6.16b + eor v15.16b,v15.16b,v10.16b + eor v12.16b,v14.16b,v13.16b + eor v13.16b,v15.16b,v13.16b + movi v3.16b,#64 + sub v0.16b,v12.16b,v3.16b + sub v1.16b,v0.16b,v3.16b + sub v2.16b,v1.16b,v3.16b + tbl v12.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v12.16b + tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b + tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b + tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b + add v1.2d,v0.2d,v1.2d + add v12.2d,v2.2d,v12.2d + add v12.2d,v1.2d,v12.2d + + sub v0.16b,v13.16b,v3.16b + sub v1.16b,v0.16b,v3.16b + sub v2.16b,v1.16b,v3.16b + tbl v13.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v13.16b + tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b + tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b + tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b + add v1.2d,v0.2d,v1.2d + add v13.2d,v2.2d,v13.2d + add v13.2d,v1.2d,v13.2d + + ushr v0.4s,v12.4s,32-2 + sli v0.4s,v12.4s,2 + ushr v2.4s,v13.4s,32-2 + eor v1.16b,v0.16b,v12.16b + sli v2.4s,v13.4s,2 + + ushr v0.4s,v12.4s,32-10 + eor v3.16b,v2.16b,v13.16b + sli v0.4s,v12.4s,10 + ushr v2.4s,v13.4s,32-10 + eor v1.16b,v0.16b,v1.16b + sli v2.4s,v13.4s,10 + + ushr v0.4s,v12.4s,32-18 + eor v3.16b,v2.16b,v3.16b + sli v0.4s,v12.4s,18 + ushr v2.4s,v13.4s,32-18 + eor v1.16b,v0.16b,v1.16b + sli v2.4s,v13.4s,18 + + ushr v0.4s,v12.4s,32-24 + eor v3.16b,v2.16b,v3.16b + sli v0.4s,v12.4s,24 + ushr v2.4s,v13.4s,32-24 + eor v12.16b,v0.16b,v1.16b + sli v2.4s,v13.4s,24 + eor v13.16b,v2.16b,v3.16b + eor v7.16b,v7.16b,v12.16b + eor v11.16b,v11.16b,v13.16b + subs w11,w11,#1 + b.ne 10b +#ifndef __AARCH64EB__ + rev32 v3.16b,v4.16b +#else + mov v3.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v2.16b,v5.16b +#else + mov v2.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v1.16b,v6.16b +#else + mov v1.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v0.16b,v7.16b +#else + mov v0.16b,v7.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v8.16b +#else + mov v7.16b,v8.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v9.16b +#else + mov v6.16b,v9.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v10.16b +#else + mov v5.16b,v10.16b +#endif +#ifndef __AARCH64EB__ + rev32 v4.16b,v11.16b +#else + mov v4.16b,v11.16b +#endif + ret +.size _vpsm4_enc_8blks,.-_vpsm4_enc_8blks +.globl vpsm4_set_encrypt_key +.type vpsm4_set_encrypt_key,%function +.align 5 +vpsm4_set_encrypt_key: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + mov w2,1 + bl _vpsm4_set_key + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpsm4_set_encrypt_key,.-vpsm4_set_encrypt_key +.globl vpsm4_set_decrypt_key +.type vpsm4_set_decrypt_key,%function +.align 5 +vpsm4_set_decrypt_key: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + mov w2,0 + bl _vpsm4_set_key + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpsm4_set_decrypt_key,.-vpsm4_set_decrypt_key +.globl vpsm4_encrypt +.type vpsm4_encrypt,%function +.align 5 +vpsm4_encrypt: + AARCH64_VALID_CALL_TARGET + ld1 {v4.4s},[x0] + adrp x10,.Lsbox + add x10,x10,#:lo12:.Lsbox + ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x10],#64 + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x10],#64 + ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x10],#64 + ld1 {v28.16b,v29.16b,v30.16b,v31.16b},[x10] +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + mov x3,x2 + mov x10,x3 + mov w11,#8 + mov w12,v4.s[0] + mov w13,v4.s[1] + mov w14,v4.s[2] + mov w15,v4.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v4.s[0],w15 + mov v4.s[1],w14 + mov v4.s[2],w13 + mov v4.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + st1 {v4.4s},[x1] + ret +.size vpsm4_encrypt,.-vpsm4_encrypt +.globl vpsm4_decrypt +.type vpsm4_decrypt,%function +.align 5 +vpsm4_decrypt: + AARCH64_VALID_CALL_TARGET + ld1 {v4.4s},[x0] + adrp x10,.Lsbox + add x10,x10,#:lo12:.Lsbox + ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x10],#64 + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x10],#64 + ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x10],#64 + ld1 {v28.16b,v29.16b,v30.16b,v31.16b},[x10] +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + mov x3,x2 + mov x10,x3 + mov w11,#8 + mov w12,v4.s[0] + mov w13,v4.s[1] + mov w14,v4.s[2] + mov w15,v4.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v4.s[0],w15 + mov v4.s[1],w14 + mov v4.s[2],w13 + mov v4.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + st1 {v4.4s},[x1] + ret +.size vpsm4_decrypt,.-vpsm4_decrypt +.globl vpsm4_ecb_encrypt +.type vpsm4_ecb_encrypt,%function +.align 5 +vpsm4_ecb_encrypt: + AARCH64_SIGN_LINK_REGISTER + // convert length into blocks + lsr x2,x2,4 + stp d8,d9,[sp,#-80]! + stp d10,d11,[sp,#16] + stp d12,d13,[sp,#32] + stp d14,d15,[sp,#48] + stp x29,x30,[sp,#64] + adrp x10,.Lsbox + add x10,x10,#:lo12:.Lsbox + ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x10],#64 + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x10],#64 + ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x10],#64 + ld1 {v28.16b,v29.16b,v30.16b,v31.16b},[x10] +.Lecb_8_blocks_process: + cmp w2,#8 + b.lt .Lecb_4_blocks_process + ld4 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 + ld4 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif +#ifndef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif +#ifndef __AARCH64EB__ + rev32 v9.16b,v9.16b +#endif +#ifndef __AARCH64EB__ + rev32 v10.16b,v10.16b +#endif +#ifndef __AARCH64EB__ + rev32 v11.16b,v11.16b +#endif + bl _vpsm4_enc_8blks + st4 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 + st4 {v4.4s,v5.4s,v6.4s,v7.4s},[x1],#64 + subs w2,w2,#8 + b.gt .Lecb_8_blocks_process + b 100f +.Lecb_4_blocks_process: + cmp w2,#4 + b.lt 1f + ld4 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif + bl _vpsm4_enc_4blks + st4 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 + sub w2,w2,#4 +1: + // process last block + cmp w2,#1 + b.lt 100f + b.gt 1f + ld1 {v4.4s},[x0] +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + mov x10,x3 + mov w11,#8 + mov w12,v4.s[0] + mov w13,v4.s[1] + mov w14,v4.s[2] + mov w15,v4.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v4.s[0],w15 + mov v4.s[1],w14 + mov v4.s[2],w13 + mov v4.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + st1 {v4.4s},[x1] + b 100f +1: // process last 2 blocks + ld4 {v4.s,v5.s,v6.s,v7.s}[0],[x0],#16 + ld4 {v4.s,v5.s,v6.s,v7.s}[1],[x0],#16 + cmp w2,#2 + b.gt 1f +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif + bl _vpsm4_enc_4blks + st4 {v0.s,v1.s,v2.s,v3.s}[0],[x1],#16 + st4 {v0.s,v1.s,v2.s,v3.s}[1],[x1] + b 100f +1: // process last 3 blocks + ld4 {v4.s,v5.s,v6.s,v7.s}[2],[x0],#16 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif + bl _vpsm4_enc_4blks + st4 {v0.s,v1.s,v2.s,v3.s}[0],[x1],#16 + st4 {v0.s,v1.s,v2.s,v3.s}[1],[x1],#16 + st4 {v0.s,v1.s,v2.s,v3.s}[2],[x1] +100: + ldp d10,d11,[sp,#16] + ldp d12,d13,[sp,#32] + ldp d14,d15,[sp,#48] + ldp x29,x30,[sp,#64] + ldp d8,d9,[sp],#80 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpsm4_ecb_encrypt,.-vpsm4_ecb_encrypt +.globl vpsm4_cbc_encrypt +.type vpsm4_cbc_encrypt,%function +.align 5 +vpsm4_cbc_encrypt: + AARCH64_VALID_CALL_TARGET + lsr x2,x2,4 + adrp x10,.Lsbox + add x10,x10,#:lo12:.Lsbox + ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x10],#64 + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x10],#64 + ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x10],#64 + ld1 {v28.16b,v29.16b,v30.16b,v31.16b},[x10] + cbz w5,.Ldec + ld1 {v3.4s},[x4] +.Lcbc_4_blocks_enc: + cmp w2,#4 + b.lt 1f + ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 + eor v4.16b,v4.16b,v3.16b +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif + mov x10,x3 + mov w11,#8 + mov w12,v4.s[0] + mov w13,v4.s[1] + mov w14,v4.s[2] + mov w15,v4.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v4.s[0],w15 + mov v4.s[1],w14 + mov v4.s[2],w13 + mov v4.s[3],w12 + eor v5.16b,v5.16b,v4.16b + mov x10,x3 + mov w11,#8 + mov w12,v5.s[0] + mov w13,v5.s[1] + mov w14,v5.s[2] + mov w15,v5.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v5.s[0],w15 + mov v5.s[1],w14 + mov v5.s[2],w13 + mov v5.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + eor v6.16b,v6.16b,v5.16b + mov x10,x3 + mov w11,#8 + mov w12,v6.s[0] + mov w13,v6.s[1] + mov w14,v6.s[2] + mov w15,v6.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v6.s[0],w15 + mov v6.s[1],w14 + mov v6.s[2],w13 + mov v6.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif + eor v7.16b,v7.16b,v6.16b + mov x10,x3 + mov w11,#8 + mov w12,v7.s[0] + mov w13,v7.s[1] + mov w14,v7.s[2] + mov w15,v7.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v7.s[0],w15 + mov v7.s[1],w14 + mov v7.s[2],w13 + mov v7.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif + orr v3.16b,v7.16b,v7.16b + st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x1],#64 + subs w2,w2,#4 + b.ne .Lcbc_4_blocks_enc + b 2f +1: + subs w2,w2,#1 + b.lt 2f + ld1 {v4.4s},[x0],#16 + eor v3.16b,v3.16b,v4.16b +#ifndef __AARCH64EB__ + rev32 v3.16b,v3.16b +#endif + mov x10,x3 + mov w11,#8 + mov w12,v3.s[0] + mov w13,v3.s[1] + mov w14,v3.s[2] + mov w15,v3.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v3.s[0],w15 + mov v3.s[1],w14 + mov v3.s[2],w13 + mov v3.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v3.16b,v3.16b +#endif + st1 {v3.4s},[x1],#16 + b 1b +2: + // save back IV + st1 {v3.4s},[x4] + ret + +.Ldec: + // decryption mode starts + AARCH64_SIGN_LINK_REGISTER + stp d8,d9,[sp,#-80]! + stp d10,d11,[sp,#16] + stp d12,d13,[sp,#32] + stp d14,d15,[sp,#48] + stp x29,x30,[sp,#64] +.Lcbc_8_blocks_dec: + cmp w2,#8 + b.lt 1f + ld4 {v4.4s,v5.4s,v6.4s,v7.4s},[x0] + add x10,x0,#64 + ld4 {v8.4s,v9.4s,v10.4s,v11.4s},[x10] +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif +#ifndef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif +#ifndef __AARCH64EB__ + rev32 v9.16b,v9.16b +#endif +#ifndef __AARCH64EB__ + rev32 v10.16b,v10.16b +#endif +#ifndef __AARCH64EB__ + rev32 v11.16b,v11.16b +#endif + bl _vpsm4_enc_8blks + zip1 v8.4s,v0.4s,v1.4s + zip2 v9.4s,v0.4s,v1.4s + zip1 v10.4s,v2.4s,v3.4s + zip2 v11.4s,v2.4s,v3.4s + zip1 v0.2d,v8.2d,v10.2d + zip2 v1.2d,v8.2d,v10.2d + zip1 v2.2d,v9.2d,v11.2d + zip2 v3.2d,v9.2d,v11.2d + zip1 v8.4s,v4.4s,v5.4s + zip2 v9.4s,v4.4s,v5.4s + zip1 v10.4s,v6.4s,v7.4s + zip2 v11.4s,v6.4s,v7.4s + zip1 v4.2d,v8.2d,v10.2d + zip2 v5.2d,v8.2d,v10.2d + zip1 v6.2d,v9.2d,v11.2d + zip2 v7.2d,v9.2d,v11.2d + ld1 {v15.4s},[x4] + ld1 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64 + // note ivec1 and vtmpx[3] are reusing the same register + // care needs to be taken to avoid conflict + eor v0.16b,v0.16b,v15.16b + ld1 {v12.4s,v13.4s,v14.4s,v15.4s},[x0],#64 + eor v1.16b,v1.16b,v8.16b + eor v2.16b,v2.16b,v9.16b + eor v3.16b,v3.16b,v10.16b + // save back IV + st1 {v15.4s}, [x4] + eor v4.16b,v4.16b,v11.16b + eor v5.16b,v5.16b,v12.16b + eor v6.16b,v6.16b,v13.16b + eor v7.16b,v7.16b,v14.16b + st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 + st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x1],#64 + subs w2,w2,#8 + b.gt .Lcbc_8_blocks_dec + b.eq 100f +1: + ld1 {v15.4s},[x4] +.Lcbc_4_blocks_dec: + cmp w2,#4 + b.lt 1f + ld4 {v4.4s,v5.4s,v6.4s,v7.4s},[x0] +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif + bl _vpsm4_enc_4blks + ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 + zip1 v8.4s,v0.4s,v1.4s + zip2 v9.4s,v0.4s,v1.4s + zip1 v10.4s,v2.4s,v3.4s + zip2 v11.4s,v2.4s,v3.4s + zip1 v0.2d,v8.2d,v10.2d + zip2 v1.2d,v8.2d,v10.2d + zip1 v2.2d,v9.2d,v11.2d + zip2 v3.2d,v9.2d,v11.2d + eor v0.16b,v0.16b,v15.16b + eor v1.16b,v1.16b,v4.16b + orr v15.16b,v7.16b,v7.16b + eor v2.16b,v2.16b,v5.16b + eor v3.16b,v3.16b,v6.16b + st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 + subs w2,w2,#4 + b.gt .Lcbc_4_blocks_dec + // save back IV + st1 {v7.4s}, [x4] + b 100f +1: // last block + subs w2,w2,#1 + b.lt 100f + b.gt 1f + ld1 {v4.4s},[x0],#16 + // save back IV + st1 {v4.4s}, [x4] +#ifndef __AARCH64EB__ + rev32 v8.16b,v4.16b +#else + mov v8.16b,v4.16b +#endif + mov x10,x3 + mov w11,#8 + mov w12,v8.s[0] + mov w13,v8.s[1] + mov w14,v8.s[2] + mov w15,v8.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v8.s[0],w15 + mov v8.s[1],w14 + mov v8.s[2],w13 + mov v8.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif + eor v8.16b,v8.16b,v15.16b + st1 {v8.4s},[x1],#16 + b 100f +1: // last two blocks + ld4 {v4.s,v5.s,v6.s,v7.s}[0],[x0] + add x10,x0,#16 + ld4 {v4.s,v5.s,v6.s,v7.s}[1],[x10],#16 + subs w2,w2,1 + b.gt 1f +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif + bl _vpsm4_enc_4blks + ld1 {v4.4s,v5.4s},[x0],#32 + zip1 v8.4s,v0.4s,v1.4s + zip2 v9.4s,v0.4s,v1.4s + zip1 v10.4s,v2.4s,v3.4s + zip2 v11.4s,v2.4s,v3.4s + zip1 v0.2d,v8.2d,v10.2d + zip2 v1.2d,v8.2d,v10.2d + zip1 v2.2d,v9.2d,v11.2d + zip2 v3.2d,v9.2d,v11.2d + eor v0.16b,v0.16b,v15.16b + eor v1.16b,v1.16b,v4.16b + st1 {v0.4s,v1.4s},[x1],#32 + // save back IV + st1 {v5.4s}, [x4] + b 100f +1: // last 3 blocks + ld4 {v4.s,v5.s,v6.s,v7.s}[2],[x10] +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif + bl _vpsm4_enc_4blks + ld1 {v4.4s,v5.4s,v6.4s},[x0],#48 + zip1 v8.4s,v0.4s,v1.4s + zip2 v9.4s,v0.4s,v1.4s + zip1 v10.4s,v2.4s,v3.4s + zip2 v11.4s,v2.4s,v3.4s + zip1 v0.2d,v8.2d,v10.2d + zip2 v1.2d,v8.2d,v10.2d + zip1 v2.2d,v9.2d,v11.2d + zip2 v3.2d,v9.2d,v11.2d + eor v0.16b,v0.16b,v15.16b + eor v1.16b,v1.16b,v4.16b + eor v2.16b,v2.16b,v5.16b + st1 {v0.4s,v1.4s,v2.4s},[x1],#48 + // save back IV + st1 {v6.4s}, [x4] +100: + ldp d10,d11,[sp,#16] + ldp d12,d13,[sp,#32] + ldp d14,d15,[sp,#48] + ldp x29,x30,[sp,#64] + ldp d8,d9,[sp],#80 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpsm4_cbc_encrypt,.-vpsm4_cbc_encrypt +.globl vpsm4_ctr32_encrypt_blocks +.type vpsm4_ctr32_encrypt_blocks,%function +.align 5 +vpsm4_ctr32_encrypt_blocks: + AARCH64_VALID_CALL_TARGET + ld1 {v3.4s},[x4] +#ifndef __AARCH64EB__ + rev32 v3.16b,v3.16b +#endif + adrp x10,.Lsbox + add x10,x10,#:lo12:.Lsbox + ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x10],#64 + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x10],#64 + ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x10],#64 + ld1 {v28.16b,v29.16b,v30.16b,v31.16b},[x10] + cmp w2,#1 + b.ne 1f + // fast processing for one single block without + // context saving overhead + mov x10,x3 + mov w11,#8 + mov w12,v3.s[0] + mov w13,v3.s[1] + mov w14,v3.s[2] + mov w15,v3.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v3.s[0],w15 + mov v3.s[1],w14 + mov v3.s[2],w13 + mov v3.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v3.16b,v3.16b +#endif + ld1 {v4.4s},[x0] + eor v4.16b,v4.16b,v3.16b + st1 {v4.4s},[x1] + ret +1: + AARCH64_SIGN_LINK_REGISTER + stp d8,d9,[sp,#-80]! + stp d10,d11,[sp,#16] + stp d12,d13,[sp,#32] + stp d14,d15,[sp,#48] + stp x29,x30,[sp,#64] + mov w12,v3.s[0] + mov w13,v3.s[1] + mov w14,v3.s[2] + mov w5,v3.s[3] +.Lctr32_4_blocks_process: + cmp w2,#4 + b.lt 1f + dup v4.4s,w12 + dup v5.4s,w13 + dup v6.4s,w14 + mov v7.s[0],w5 + add w5,w5,#1 + mov v7.s[1],w5 + add w5,w5,#1 + mov v7.s[2],w5 + add w5,w5,#1 + mov v7.s[3],w5 + add w5,w5,#1 + cmp w2,#8 + b.ge .Lctr32_8_blocks_process + bl _vpsm4_enc_4blks + ld4 {v12.4s,v13.4s,v14.4s,v15.4s},[x0],#64 + eor v0.16b,v0.16b,v12.16b + eor v1.16b,v1.16b,v13.16b + eor v2.16b,v2.16b,v14.16b + eor v3.16b,v3.16b,v15.16b + st4 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 + subs w2,w2,#4 + b.ne .Lctr32_4_blocks_process + b 100f +.Lctr32_8_blocks_process: + dup v8.4s,w12 + dup v9.4s,w13 + dup v10.4s,w14 + mov v11.s[0],w5 + add w5,w5,#1 + mov v11.s[1],w5 + add w5,w5,#1 + mov v11.s[2],w5 + add w5,w5,#1 + mov v11.s[3],w5 + add w5,w5,#1 + bl _vpsm4_enc_8blks + ld4 {v12.4s,v13.4s,v14.4s,v15.4s},[x0],#64 + ld4 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64 + eor v0.16b,v0.16b,v12.16b + eor v1.16b,v1.16b,v13.16b + eor v2.16b,v2.16b,v14.16b + eor v3.16b,v3.16b,v15.16b + eor v4.16b,v4.16b,v8.16b + eor v5.16b,v5.16b,v9.16b + eor v6.16b,v6.16b,v10.16b + eor v7.16b,v7.16b,v11.16b + st4 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 + st4 {v4.4s,v5.4s,v6.4s,v7.4s},[x1],#64 + subs w2,w2,#8 + b.ne .Lctr32_4_blocks_process + b 100f +1: // last block processing + subs w2,w2,#1 + b.lt 100f + b.gt 1f + mov v3.s[0],w12 + mov v3.s[1],w13 + mov v3.s[2],w14 + mov v3.s[3],w5 + mov x10,x3 + mov w11,#8 + mov w12,v3.s[0] + mov w13,v3.s[1] + mov w14,v3.s[2] + mov w15,v3.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v3.s[0],w15 + mov v3.s[1],w14 + mov v3.s[2],w13 + mov v3.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v3.16b,v3.16b +#endif + ld1 {v4.4s},[x0] + eor v4.16b,v4.16b,v3.16b + st1 {v4.4s},[x1] + b 100f +1: // last 2 blocks processing + dup v4.4s,w12 + dup v5.4s,w13 + dup v6.4s,w14 + mov v7.s[0],w5 + add w5,w5,#1 + mov v7.s[1],w5 + subs w2,w2,#1 + b.ne 1f + bl _vpsm4_enc_4blks + ld4 {v12.s,v13.s,v14.s,v15.s}[0],[x0],#16 + ld4 {v12.s,v13.s,v14.s,v15.s}[1],[x0],#16 + eor v0.16b,v0.16b,v12.16b + eor v1.16b,v1.16b,v13.16b + eor v2.16b,v2.16b,v14.16b + eor v3.16b,v3.16b,v15.16b + st4 {v0.s,v1.s,v2.s,v3.s}[0],[x1],#16 + st4 {v0.s,v1.s,v2.s,v3.s}[1],[x1],#16 + b 100f +1: // last 3 blocks processing + add w5,w5,#1 + mov v7.s[2],w5 + bl _vpsm4_enc_4blks + ld4 {v12.s,v13.s,v14.s,v15.s}[0],[x0],#16 + ld4 {v12.s,v13.s,v14.s,v15.s}[1],[x0],#16 + ld4 {v12.s,v13.s,v14.s,v15.s}[2],[x0],#16 + eor v0.16b,v0.16b,v12.16b + eor v1.16b,v1.16b,v13.16b + eor v2.16b,v2.16b,v14.16b + eor v3.16b,v3.16b,v15.16b + st4 {v0.s,v1.s,v2.s,v3.s}[0],[x1],#16 + st4 {v0.s,v1.s,v2.s,v3.s}[1],[x1],#16 + st4 {v0.s,v1.s,v2.s,v3.s}[2],[x1],#16 +100: + ldp d10,d11,[sp,#16] + ldp d12,d13,[sp,#32] + ldp d14,d15,[sp,#48] + ldp x29,x30,[sp,#64] + ldp d8,d9,[sp],#80 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpsm4_ctr32_encrypt_blocks,.-vpsm4_ctr32_encrypt_blocks +.globl vpsm4_xts_encrypt_gb +.type vpsm4_xts_encrypt_gb,%function +.align 5 +vpsm4_xts_encrypt_gb: + AARCH64_SIGN_LINK_REGISTER + stp x15, x16, [sp, #-0x10]! + stp x17, x18, [sp, #-0x10]! + stp x19, x20, [sp, #-0x10]! + stp x21, x22, [sp, #-0x10]! + stp x23, x24, [sp, #-0x10]! + stp x25, x26, [sp, #-0x10]! + stp x27, x28, [sp, #-0x10]! + stp x29, x30, [sp, #-0x10]! + stp d8, d9, [sp, #-0x10]! + stp d10, d11, [sp, #-0x10]! + stp d12, d13, [sp, #-0x10]! + stp d14, d15, [sp, #-0x10]! + mov x26,x3 + mov x27,x4 + mov w28,w6 + ld1 {v8.4s}, [x5] + mov x3,x27 + adrp x10,.Lsbox + add x10,x10,#:lo12:.Lsbox + ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x10],#64 + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x10],#64 + ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x10],#64 + ld1 {v28.16b,v29.16b,v30.16b,v31.16b},[x10] +#ifndef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif + mov x10,x3 + mov w11,#8 + mov w12,v8.s[0] + mov w13,v8.s[1] + mov w14,v8.s[2] + mov w15,v8.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v8.s[0],w15 + mov v8.s[1],w14 + mov v8.s[2],w13 + mov v8.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif + mov x3,x26 + and x29,x2,#0x0F + // convert length into blocks + lsr x2,x2,4 + cmp x2,#1 + b.lt .return_gb + + cmp x29,0 + // If the encryption/decryption Length is N times of 16, + // the all blocks are encrypted/decrypted in .xts_encrypt_blocks_gb + b.eq .xts_encrypt_blocks_gb + + // If the encryption/decryption length is not N times of 16, + // the last two blocks are encrypted/decrypted in .last_2blks_tweak_gb or .only_2blks_tweak_gb + // the other blocks are encrypted/decrypted in .xts_encrypt_blocks_gb + subs x2,x2,#1 + b.eq .only_2blks_tweak_gb +.xts_encrypt_blocks_gb: + rbit v8.16b,v8.16b +#ifdef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif + mov x12,v8.d[0] + mov x13,v8.d[1] + mov w7,0x87 + extr x9,x13,x13,#32 + extr x15,x13,x12,#63 + and w8,w7,w9,asr#31 + eor x14,x8,x12,lsl#1 + mov w7,0x87 + extr x9,x15,x15,#32 + extr x17,x15,x14,#63 + and w8,w7,w9,asr#31 + eor x16,x8,x14,lsl#1 + mov w7,0x87 + extr x9,x17,x17,#32 + extr x19,x17,x16,#63 + and w8,w7,w9,asr#31 + eor x18,x8,x16,lsl#1 + mov w7,0x87 + extr x9,x19,x19,#32 + extr x21,x19,x18,#63 + and w8,w7,w9,asr#31 + eor x20,x8,x18,lsl#1 + mov w7,0x87 + extr x9,x21,x21,#32 + extr x23,x21,x20,#63 + and w8,w7,w9,asr#31 + eor x22,x8,x20,lsl#1 + mov w7,0x87 + extr x9,x23,x23,#32 + extr x25,x23,x22,#63 + and w8,w7,w9,asr#31 + eor x24,x8,x22,lsl#1 + mov w7,0x87 + extr x9,x25,x25,#32 + extr x27,x25,x24,#63 + and w8,w7,w9,asr#31 + eor x26,x8,x24,lsl#1 +.Lxts_8_blocks_process_gb: + cmp x2,#8 + b.lt .Lxts_4_blocks_process_gb + mov v0.d[0],x12 + mov v0.d[1],x13 +#ifdef __AARCH64EB__ + rev32 v0.16b,v0.16b +#endif + mov v1.d[0],x14 + mov v1.d[1],x15 +#ifdef __AARCH64EB__ + rev32 v1.16b,v1.16b +#endif + mov v2.d[0],x16 + mov v2.d[1],x17 +#ifdef __AARCH64EB__ + rev32 v2.16b,v2.16b +#endif + mov v3.d[0],x18 + mov v3.d[1],x19 +#ifdef __AARCH64EB__ + rev32 v3.16b,v3.16b +#endif + mov v12.d[0],x20 + mov v12.d[1],x21 +#ifdef __AARCH64EB__ + rev32 v12.16b,v12.16b +#endif + mov v13.d[0],x22 + mov v13.d[1],x23 +#ifdef __AARCH64EB__ + rev32 v13.16b,v13.16b +#endif + mov v14.d[0],x24 + mov v14.d[1],x25 +#ifdef __AARCH64EB__ + rev32 v14.16b,v14.16b +#endif + mov v15.d[0],x26 + mov v15.d[1],x27 +#ifdef __AARCH64EB__ + rev32 v15.16b,v15.16b +#endif + ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 + rbit v0.16b,v0.16b + rbit v1.16b,v1.16b + rbit v2.16b,v2.16b + rbit v3.16b,v3.16b + eor v4.16b, v4.16b, v0.16b + eor v5.16b, v5.16b, v1.16b + eor v6.16b, v6.16b, v2.16b + eor v7.16b, v7.16b, v3.16b + ld1 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64 + rbit v12.16b,v12.16b + rbit v13.16b,v13.16b + rbit v14.16b,v14.16b + rbit v15.16b,v15.16b + eor v8.16b, v8.16b, v12.16b + eor v9.16b, v9.16b, v13.16b + eor v10.16b, v10.16b, v14.16b + eor v11.16b, v11.16b, v15.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif +#ifndef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif +#ifndef __AARCH64EB__ + rev32 v9.16b,v9.16b +#endif +#ifndef __AARCH64EB__ + rev32 v10.16b,v10.16b +#endif +#ifndef __AARCH64EB__ + rev32 v11.16b,v11.16b +#endif + zip1 v0.4s,v4.4s,v5.4s + zip2 v1.4s,v4.4s,v5.4s + zip1 v2.4s,v6.4s,v7.4s + zip2 v3.4s,v6.4s,v7.4s + zip1 v4.2d,v0.2d,v2.2d + zip2 v5.2d,v0.2d,v2.2d + zip1 v6.2d,v1.2d,v3.2d + zip2 v7.2d,v1.2d,v3.2d + zip1 v0.4s,v8.4s,v9.4s + zip2 v1.4s,v8.4s,v9.4s + zip1 v2.4s,v10.4s,v11.4s + zip2 v3.4s,v10.4s,v11.4s + zip1 v8.2d,v0.2d,v2.2d + zip2 v9.2d,v0.2d,v2.2d + zip1 v10.2d,v1.2d,v3.2d + zip2 v11.2d,v1.2d,v3.2d + bl _vpsm4_enc_8blks + zip1 v8.4s,v0.4s,v1.4s + zip2 v9.4s,v0.4s,v1.4s + zip1 v10.4s,v2.4s,v3.4s + zip2 v11.4s,v2.4s,v3.4s + zip1 v0.2d,v8.2d,v10.2d + zip2 v1.2d,v8.2d,v10.2d + zip1 v2.2d,v9.2d,v11.2d + zip2 v3.2d,v9.2d,v11.2d + zip1 v8.4s,v4.4s,v5.4s + zip2 v9.4s,v4.4s,v5.4s + zip1 v10.4s,v6.4s,v7.4s + zip2 v11.4s,v6.4s,v7.4s + zip1 v4.2d,v8.2d,v10.2d + zip2 v5.2d,v8.2d,v10.2d + zip1 v6.2d,v9.2d,v11.2d + zip2 v7.2d,v9.2d,v11.2d + mov v12.d[0],x12 + mov v12.d[1],x13 +#ifdef __AARCH64EB__ + rev32 v12.16b,v12.16b +#endif + mov w7,0x87 + extr x9,x27,x27,#32 + extr x13,x27,x26,#63 + and w8,w7,w9,asr#31 + eor x12,x8,x26,lsl#1 + mov v13.d[0],x14 + mov v13.d[1],x15 +#ifdef __AARCH64EB__ + rev32 v13.16b,v13.16b +#endif + mov w7,0x87 + extr x9,x13,x13,#32 + extr x15,x13,x12,#63 + and w8,w7,w9,asr#31 + eor x14,x8,x12,lsl#1 + mov v14.d[0],x16 + mov v14.d[1],x17 +#ifdef __AARCH64EB__ + rev32 v14.16b,v14.16b +#endif + mov w7,0x87 + extr x9,x15,x15,#32 + extr x17,x15,x14,#63 + and w8,w7,w9,asr#31 + eor x16,x8,x14,lsl#1 + mov v15.d[0],x18 + mov v15.d[1],x19 +#ifdef __AARCH64EB__ + rev32 v15.16b,v15.16b +#endif + mov w7,0x87 + extr x9,x17,x17,#32 + extr x19,x17,x16,#63 + and w8,w7,w9,asr#31 + eor x18,x8,x16,lsl#1 + mov v8.d[0],x20 + mov v8.d[1],x21 +#ifdef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif + mov w7,0x87 + extr x9,x19,x19,#32 + extr x21,x19,x18,#63 + and w8,w7,w9,asr#31 + eor x20,x8,x18,lsl#1 + mov v9.d[0],x22 + mov v9.d[1],x23 +#ifdef __AARCH64EB__ + rev32 v9.16b,v9.16b +#endif + mov w7,0x87 + extr x9,x21,x21,#32 + extr x23,x21,x20,#63 + and w8,w7,w9,asr#31 + eor x22,x8,x20,lsl#1 + mov v10.d[0],x24 + mov v10.d[1],x25 +#ifdef __AARCH64EB__ + rev32 v10.16b,v10.16b +#endif + mov w7,0x87 + extr x9,x23,x23,#32 + extr x25,x23,x22,#63 + and w8,w7,w9,asr#31 + eor x24,x8,x22,lsl#1 + mov v11.d[0],x26 + mov v11.d[1],x27 +#ifdef __AARCH64EB__ + rev32 v11.16b,v11.16b +#endif + mov w7,0x87 + extr x9,x25,x25,#32 + extr x27,x25,x24,#63 + and w8,w7,w9,asr#31 + eor x26,x8,x24,lsl#1 + eor v0.16b, v0.16b, v12.16b + eor v1.16b, v1.16b, v13.16b + eor v2.16b, v2.16b, v14.16b + eor v3.16b, v3.16b, v15.16b + eor v4.16b, v4.16b, v8.16b + eor v5.16b, v5.16b, v9.16b + eor v6.16b, v6.16b, v10.16b + eor v7.16b, v7.16b, v11.16b + + // save the last tweak + st1 {v11.4s},[x5] + st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 + st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x1],#64 + subs x2,x2,#8 + b.gt .Lxts_8_blocks_process_gb + b 100f +.Lxts_4_blocks_process_gb: + mov v8.d[0],x12 + mov v8.d[1],x13 +#ifdef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif + mov v9.d[0],x14 + mov v9.d[1],x15 +#ifdef __AARCH64EB__ + rev32 v9.16b,v9.16b +#endif + mov v10.d[0],x16 + mov v10.d[1],x17 +#ifdef __AARCH64EB__ + rev32 v10.16b,v10.16b +#endif + mov v11.d[0],x18 + mov v11.d[1],x19 +#ifdef __AARCH64EB__ + rev32 v11.16b,v11.16b +#endif + cmp x2,#4 + b.lt 1f + ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 + rbit v8.16b,v8.16b + rbit v9.16b,v9.16b + rbit v10.16b,v10.16b + rbit v11.16b,v11.16b + eor v4.16b, v4.16b, v8.16b + eor v5.16b, v5.16b, v9.16b + eor v6.16b, v6.16b, v10.16b + eor v7.16b, v7.16b, v11.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif + zip1 v0.4s,v4.4s,v5.4s + zip2 v1.4s,v4.4s,v5.4s + zip1 v2.4s,v6.4s,v7.4s + zip2 v3.4s,v6.4s,v7.4s + zip1 v4.2d,v0.2d,v2.2d + zip2 v5.2d,v0.2d,v2.2d + zip1 v6.2d,v1.2d,v3.2d + zip2 v7.2d,v1.2d,v3.2d + bl _vpsm4_enc_4blks + zip1 v4.4s,v0.4s,v1.4s + zip2 v5.4s,v0.4s,v1.4s + zip1 v6.4s,v2.4s,v3.4s + zip2 v7.4s,v2.4s,v3.4s + zip1 v0.2d,v4.2d,v6.2d + zip2 v1.2d,v4.2d,v6.2d + zip1 v2.2d,v5.2d,v7.2d + zip2 v3.2d,v5.2d,v7.2d + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b + st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 + sub x2,x2,#4 + mov v8.d[0],x20 + mov v8.d[1],x21 +#ifdef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif + mov v9.d[0],x22 + mov v9.d[1],x23 +#ifdef __AARCH64EB__ + rev32 v9.16b,v9.16b +#endif + mov v10.d[0],x24 + mov v10.d[1],x25 +#ifdef __AARCH64EB__ + rev32 v10.16b,v10.16b +#endif + // save the last tweak + st1 {v11.4s},[x5] +1: + // process last block + cmp x2,#1 + b.lt 100f + b.gt 1f + ld1 {v4.4s},[x0],#16 + rbit v8.16b,v8.16b + eor v4.16b, v4.16b, v8.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + mov x10,x3 + mov w11,#8 + mov w12,v4.s[0] + mov w13,v4.s[1] + mov w14,v4.s[2] + mov w15,v4.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v4.s[0],w15 + mov v4.s[1],w14 + mov v4.s[2],w13 + mov v4.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + eor v4.16b, v4.16b, v8.16b + st1 {v4.4s},[x1],#16 + // save the last tweak + st1 {v8.4s},[x5] + b 100f +1: // process last 2 blocks + cmp x2,#2 + b.gt 1f + ld1 {v4.4s,v5.4s},[x0],#32 + rbit v8.16b,v8.16b + rbit v9.16b,v9.16b + eor v4.16b, v4.16b, v8.16b + eor v5.16b, v5.16b, v9.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif + zip1 v0.4s,v4.4s,v5.4s + zip2 v1.4s,v4.4s,v5.4s + zip1 v2.4s,v6.4s,v7.4s + zip2 v3.4s,v6.4s,v7.4s + zip1 v4.2d,v0.2d,v2.2d + zip2 v5.2d,v0.2d,v2.2d + zip1 v6.2d,v1.2d,v3.2d + zip2 v7.2d,v1.2d,v3.2d + bl _vpsm4_enc_4blks + zip1 v4.4s,v0.4s,v1.4s + zip2 v5.4s,v0.4s,v1.4s + zip1 v6.4s,v2.4s,v3.4s + zip2 v7.4s,v2.4s,v3.4s + zip1 v0.2d,v4.2d,v6.2d + zip2 v1.2d,v4.2d,v6.2d + zip1 v2.2d,v5.2d,v7.2d + zip2 v3.2d,v5.2d,v7.2d + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + st1 {v0.4s,v1.4s},[x1],#32 + // save the last tweak + st1 {v9.4s},[x5] + b 100f +1: // process last 3 blocks + ld1 {v4.4s,v5.4s,v6.4s},[x0],#48 + rbit v8.16b,v8.16b + rbit v9.16b,v9.16b + rbit v10.16b,v10.16b + eor v4.16b, v4.16b, v8.16b + eor v5.16b, v5.16b, v9.16b + eor v6.16b, v6.16b, v10.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif + zip1 v0.4s,v4.4s,v5.4s + zip2 v1.4s,v4.4s,v5.4s + zip1 v2.4s,v6.4s,v7.4s + zip2 v3.4s,v6.4s,v7.4s + zip1 v4.2d,v0.2d,v2.2d + zip2 v5.2d,v0.2d,v2.2d + zip1 v6.2d,v1.2d,v3.2d + zip2 v7.2d,v1.2d,v3.2d + bl _vpsm4_enc_4blks + zip1 v4.4s,v0.4s,v1.4s + zip2 v5.4s,v0.4s,v1.4s + zip1 v6.4s,v2.4s,v3.4s + zip2 v7.4s,v2.4s,v3.4s + zip1 v0.2d,v4.2d,v6.2d + zip2 v1.2d,v4.2d,v6.2d + zip1 v2.2d,v5.2d,v7.2d + zip2 v3.2d,v5.2d,v7.2d + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + st1 {v0.4s,v1.4s,v2.4s},[x1],#48 + // save the last tweak + st1 {v10.4s},[x5] +100: + cmp x29,0 + b.eq .return_gb + +// This branch calculates the last two tweaks, +// while the encryption/decryption length is larger than 32 +.last_2blks_tweak_gb: + ld1 {v8.4s},[x5] +#ifdef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif + rbit v2.16b,v8.16b + adrp x10,.Lxts_magic + ldr q0, [x10, #:lo12:.Lxts_magic] + shl v9.16b, v2.16b, #1 + ext v1.16b, v2.16b, v2.16b,#15 + ushr v1.16b, v1.16b, #7 + mul v1.16b, v1.16b, v0.16b + eor v9.16b, v9.16b, v1.16b + rbit v9.16b,v9.16b + rbit v2.16b,v9.16b + adrp x10,.Lxts_magic + ldr q0, [x10, #:lo12:.Lxts_magic] + shl v10.16b, v2.16b, #1 + ext v1.16b, v2.16b, v2.16b,#15 + ushr v1.16b, v1.16b, #7 + mul v1.16b, v1.16b, v0.16b + eor v10.16b, v10.16b, v1.16b + rbit v10.16b,v10.16b + b .check_dec_gb + + +// This branch calculates the last two tweaks, +// while the encryption/decryption length is equal to 32, who only need two tweaks +.only_2blks_tweak_gb: + mov v9.16b,v8.16b +#ifdef __AARCH64EB__ + rev32 v9.16b,v9.16b +#endif + rbit v2.16b,v9.16b + adrp x10,.Lxts_magic + ldr q0, [x10, #:lo12:.Lxts_magic] + shl v10.16b, v2.16b, #1 + ext v1.16b, v2.16b, v2.16b,#15 + ushr v1.16b, v1.16b, #7 + mul v1.16b, v1.16b, v0.16b + eor v10.16b, v10.16b, v1.16b + rbit v10.16b,v10.16b + b .check_dec_gb + + +// Determine whether encryption or decryption is required. +// The last two tweaks need to be swapped for decryption. +.check_dec_gb: + // encryption:1 decryption:0 + cmp w28,1 + b.eq .process_last_2blks_gb + mov v0.16B,v9.16b + mov v9.16B,v10.16b + mov v10.16B,v0.16b + +.process_last_2blks_gb: +#ifdef __AARCH64EB__ + rev32 v9.16b,v9.16b +#endif +#ifdef __AARCH64EB__ + rev32 v10.16b,v10.16b +#endif + ld1 {v4.4s},[x0],#16 + eor v4.16b, v4.16b, v9.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + mov x10,x3 + mov w11,#8 + mov w12,v4.s[0] + mov w13,v4.s[1] + mov w14,v4.s[2] + mov w15,v4.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v4.s[0],w15 + mov v4.s[1],w14 + mov v4.s[2],w13 + mov v4.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + eor v4.16b, v4.16b, v9.16b + st1 {v4.4s},[x1],#16 + + sub x26,x1,16 +.loop_gb: + subs x29,x29,1 + ldrb w7,[x26,x29] + ldrb w8,[x0,x29] + strb w8,[x26,x29] + strb w7,[x1,x29] + b.gt .loop_gb + ld1 {v4.4s}, [x26] + eor v4.16b, v4.16b, v10.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + mov x10,x3 + mov w11,#8 + mov w12,v4.s[0] + mov w13,v4.s[1] + mov w14,v4.s[2] + mov w15,v4.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v4.s[0],w15 + mov v4.s[1],w14 + mov v4.s[2],w13 + mov v4.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + eor v4.16b, v4.16b, v10.16b + st1 {v4.4s}, [x26] +.return_gb: + ldp d14, d15, [sp], #0x10 + ldp d12, d13, [sp], #0x10 + ldp d10, d11, [sp], #0x10 + ldp d8, d9, [sp], #0x10 + ldp x29, x30, [sp], #0x10 + ldp x27, x28, [sp], #0x10 + ldp x25, x26, [sp], #0x10 + ldp x23, x24, [sp], #0x10 + ldp x21, x22, [sp], #0x10 + ldp x19, x20, [sp], #0x10 + ldp x17, x18, [sp], #0x10 + ldp x15, x16, [sp], #0x10 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpsm4_xts_encrypt_gb,.-vpsm4_xts_encrypt_gb +.globl vpsm4_xts_encrypt +.type vpsm4_xts_encrypt,%function +.align 5 +vpsm4_xts_encrypt: + AARCH64_SIGN_LINK_REGISTER + stp x15, x16, [sp, #-0x10]! + stp x17, x18, [sp, #-0x10]! + stp x19, x20, [sp, #-0x10]! + stp x21, x22, [sp, #-0x10]! + stp x23, x24, [sp, #-0x10]! + stp x25, x26, [sp, #-0x10]! + stp x27, x28, [sp, #-0x10]! + stp x29, x30, [sp, #-0x10]! + stp d8, d9, [sp, #-0x10]! + stp d10, d11, [sp, #-0x10]! + stp d12, d13, [sp, #-0x10]! + stp d14, d15, [sp, #-0x10]! + mov x26,x3 + mov x27,x4 + mov w28,w6 + ld1 {v8.4s}, [x5] + mov x3,x27 + adrp x10,.Lsbox + add x10,x10,#:lo12:.Lsbox + ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x10],#64 + ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x10],#64 + ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x10],#64 + ld1 {v28.16b,v29.16b,v30.16b,v31.16b},[x10] +#ifndef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif + mov x10,x3 + mov w11,#8 + mov w12,v8.s[0] + mov w13,v8.s[1] + mov w14,v8.s[2] + mov w15,v8.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v8.s[0],w15 + mov v8.s[1],w14 + mov v8.s[2],w13 + mov v8.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif + mov x3,x26 + and x29,x2,#0x0F + // convert length into blocks + lsr x2,x2,4 + cmp x2,#1 + b.lt .return + + cmp x29,0 + // If the encryption/decryption Length is N times of 16, + // the all blocks are encrypted/decrypted in .xts_encrypt_blocks + b.eq .xts_encrypt_blocks + + // If the encryption/decryption length is not N times of 16, + // the last two blocks are encrypted/decrypted in .last_2blks_tweak or .only_2blks_tweak + // the other blocks are encrypted/decrypted in .xts_encrypt_blocks + subs x2,x2,#1 + b.eq .only_2blks_tweak +.xts_encrypt_blocks: +#ifdef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif + mov x12,v8.d[0] + mov x13,v8.d[1] + mov w7,0x87 + extr x9,x13,x13,#32 + extr x15,x13,x12,#63 + and w8,w7,w9,asr#31 + eor x14,x8,x12,lsl#1 + mov w7,0x87 + extr x9,x15,x15,#32 + extr x17,x15,x14,#63 + and w8,w7,w9,asr#31 + eor x16,x8,x14,lsl#1 + mov w7,0x87 + extr x9,x17,x17,#32 + extr x19,x17,x16,#63 + and w8,w7,w9,asr#31 + eor x18,x8,x16,lsl#1 + mov w7,0x87 + extr x9,x19,x19,#32 + extr x21,x19,x18,#63 + and w8,w7,w9,asr#31 + eor x20,x8,x18,lsl#1 + mov w7,0x87 + extr x9,x21,x21,#32 + extr x23,x21,x20,#63 + and w8,w7,w9,asr#31 + eor x22,x8,x20,lsl#1 + mov w7,0x87 + extr x9,x23,x23,#32 + extr x25,x23,x22,#63 + and w8,w7,w9,asr#31 + eor x24,x8,x22,lsl#1 + mov w7,0x87 + extr x9,x25,x25,#32 + extr x27,x25,x24,#63 + and w8,w7,w9,asr#31 + eor x26,x8,x24,lsl#1 +.Lxts_8_blocks_process: + cmp x2,#8 + b.lt .Lxts_4_blocks_process + mov v0.d[0],x12 + mov v0.d[1],x13 +#ifdef __AARCH64EB__ + rev32 v0.16b,v0.16b +#endif + mov v1.d[0],x14 + mov v1.d[1],x15 +#ifdef __AARCH64EB__ + rev32 v1.16b,v1.16b +#endif + mov v2.d[0],x16 + mov v2.d[1],x17 +#ifdef __AARCH64EB__ + rev32 v2.16b,v2.16b +#endif + mov v3.d[0],x18 + mov v3.d[1],x19 +#ifdef __AARCH64EB__ + rev32 v3.16b,v3.16b +#endif + mov v12.d[0],x20 + mov v12.d[1],x21 +#ifdef __AARCH64EB__ + rev32 v12.16b,v12.16b +#endif + mov v13.d[0],x22 + mov v13.d[1],x23 +#ifdef __AARCH64EB__ + rev32 v13.16b,v13.16b +#endif + mov v14.d[0],x24 + mov v14.d[1],x25 +#ifdef __AARCH64EB__ + rev32 v14.16b,v14.16b +#endif + mov v15.d[0],x26 + mov v15.d[1],x27 +#ifdef __AARCH64EB__ + rev32 v15.16b,v15.16b +#endif + ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 + eor v4.16b, v4.16b, v0.16b + eor v5.16b, v5.16b, v1.16b + eor v6.16b, v6.16b, v2.16b + eor v7.16b, v7.16b, v3.16b + ld1 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64 + eor v8.16b, v8.16b, v12.16b + eor v9.16b, v9.16b, v13.16b + eor v10.16b, v10.16b, v14.16b + eor v11.16b, v11.16b, v15.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif +#ifndef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif +#ifndef __AARCH64EB__ + rev32 v9.16b,v9.16b +#endif +#ifndef __AARCH64EB__ + rev32 v10.16b,v10.16b +#endif +#ifndef __AARCH64EB__ + rev32 v11.16b,v11.16b +#endif + zip1 v0.4s,v4.4s,v5.4s + zip2 v1.4s,v4.4s,v5.4s + zip1 v2.4s,v6.4s,v7.4s + zip2 v3.4s,v6.4s,v7.4s + zip1 v4.2d,v0.2d,v2.2d + zip2 v5.2d,v0.2d,v2.2d + zip1 v6.2d,v1.2d,v3.2d + zip2 v7.2d,v1.2d,v3.2d + zip1 v0.4s,v8.4s,v9.4s + zip2 v1.4s,v8.4s,v9.4s + zip1 v2.4s,v10.4s,v11.4s + zip2 v3.4s,v10.4s,v11.4s + zip1 v8.2d,v0.2d,v2.2d + zip2 v9.2d,v0.2d,v2.2d + zip1 v10.2d,v1.2d,v3.2d + zip2 v11.2d,v1.2d,v3.2d + bl _vpsm4_enc_8blks + zip1 v8.4s,v0.4s,v1.4s + zip2 v9.4s,v0.4s,v1.4s + zip1 v10.4s,v2.4s,v3.4s + zip2 v11.4s,v2.4s,v3.4s + zip1 v0.2d,v8.2d,v10.2d + zip2 v1.2d,v8.2d,v10.2d + zip1 v2.2d,v9.2d,v11.2d + zip2 v3.2d,v9.2d,v11.2d + zip1 v8.4s,v4.4s,v5.4s + zip2 v9.4s,v4.4s,v5.4s + zip1 v10.4s,v6.4s,v7.4s + zip2 v11.4s,v6.4s,v7.4s + zip1 v4.2d,v8.2d,v10.2d + zip2 v5.2d,v8.2d,v10.2d + zip1 v6.2d,v9.2d,v11.2d + zip2 v7.2d,v9.2d,v11.2d + mov v12.d[0],x12 + mov v12.d[1],x13 +#ifdef __AARCH64EB__ + rev32 v12.16b,v12.16b +#endif + mov w7,0x87 + extr x9,x27,x27,#32 + extr x13,x27,x26,#63 + and w8,w7,w9,asr#31 + eor x12,x8,x26,lsl#1 + mov v13.d[0],x14 + mov v13.d[1],x15 +#ifdef __AARCH64EB__ + rev32 v13.16b,v13.16b +#endif + mov w7,0x87 + extr x9,x13,x13,#32 + extr x15,x13,x12,#63 + and w8,w7,w9,asr#31 + eor x14,x8,x12,lsl#1 + mov v14.d[0],x16 + mov v14.d[1],x17 +#ifdef __AARCH64EB__ + rev32 v14.16b,v14.16b +#endif + mov w7,0x87 + extr x9,x15,x15,#32 + extr x17,x15,x14,#63 + and w8,w7,w9,asr#31 + eor x16,x8,x14,lsl#1 + mov v15.d[0],x18 + mov v15.d[1],x19 +#ifdef __AARCH64EB__ + rev32 v15.16b,v15.16b +#endif + mov w7,0x87 + extr x9,x17,x17,#32 + extr x19,x17,x16,#63 + and w8,w7,w9,asr#31 + eor x18,x8,x16,lsl#1 + mov v8.d[0],x20 + mov v8.d[1],x21 +#ifdef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif + mov w7,0x87 + extr x9,x19,x19,#32 + extr x21,x19,x18,#63 + and w8,w7,w9,asr#31 + eor x20,x8,x18,lsl#1 + mov v9.d[0],x22 + mov v9.d[1],x23 +#ifdef __AARCH64EB__ + rev32 v9.16b,v9.16b +#endif + mov w7,0x87 + extr x9,x21,x21,#32 + extr x23,x21,x20,#63 + and w8,w7,w9,asr#31 + eor x22,x8,x20,lsl#1 + mov v10.d[0],x24 + mov v10.d[1],x25 +#ifdef __AARCH64EB__ + rev32 v10.16b,v10.16b +#endif + mov w7,0x87 + extr x9,x23,x23,#32 + extr x25,x23,x22,#63 + and w8,w7,w9,asr#31 + eor x24,x8,x22,lsl#1 + mov v11.d[0],x26 + mov v11.d[1],x27 +#ifdef __AARCH64EB__ + rev32 v11.16b,v11.16b +#endif + mov w7,0x87 + extr x9,x25,x25,#32 + extr x27,x25,x24,#63 + and w8,w7,w9,asr#31 + eor x26,x8,x24,lsl#1 + eor v0.16b, v0.16b, v12.16b + eor v1.16b, v1.16b, v13.16b + eor v2.16b, v2.16b, v14.16b + eor v3.16b, v3.16b, v15.16b + eor v4.16b, v4.16b, v8.16b + eor v5.16b, v5.16b, v9.16b + eor v6.16b, v6.16b, v10.16b + eor v7.16b, v7.16b, v11.16b + + // save the last tweak + st1 {v11.4s},[x5] + st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 + st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x1],#64 + subs x2,x2,#8 + b.gt .Lxts_8_blocks_process + b 100f +.Lxts_4_blocks_process: + mov v8.d[0],x12 + mov v8.d[1],x13 +#ifdef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif + mov v9.d[0],x14 + mov v9.d[1],x15 +#ifdef __AARCH64EB__ + rev32 v9.16b,v9.16b +#endif + mov v10.d[0],x16 + mov v10.d[1],x17 +#ifdef __AARCH64EB__ + rev32 v10.16b,v10.16b +#endif + mov v11.d[0],x18 + mov v11.d[1],x19 +#ifdef __AARCH64EB__ + rev32 v11.16b,v11.16b +#endif + cmp x2,#4 + b.lt 1f + ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 + eor v4.16b, v4.16b, v8.16b + eor v5.16b, v5.16b, v9.16b + eor v6.16b, v6.16b, v10.16b + eor v7.16b, v7.16b, v11.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif + zip1 v0.4s,v4.4s,v5.4s + zip2 v1.4s,v4.4s,v5.4s + zip1 v2.4s,v6.4s,v7.4s + zip2 v3.4s,v6.4s,v7.4s + zip1 v4.2d,v0.2d,v2.2d + zip2 v5.2d,v0.2d,v2.2d + zip1 v6.2d,v1.2d,v3.2d + zip2 v7.2d,v1.2d,v3.2d + bl _vpsm4_enc_4blks + zip1 v4.4s,v0.4s,v1.4s + zip2 v5.4s,v0.4s,v1.4s + zip1 v6.4s,v2.4s,v3.4s + zip2 v7.4s,v2.4s,v3.4s + zip1 v0.2d,v4.2d,v6.2d + zip2 v1.2d,v4.2d,v6.2d + zip1 v2.2d,v5.2d,v7.2d + zip2 v3.2d,v5.2d,v7.2d + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b + st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 + sub x2,x2,#4 + mov v8.d[0],x20 + mov v8.d[1],x21 +#ifdef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif + mov v9.d[0],x22 + mov v9.d[1],x23 +#ifdef __AARCH64EB__ + rev32 v9.16b,v9.16b +#endif + mov v10.d[0],x24 + mov v10.d[1],x25 +#ifdef __AARCH64EB__ + rev32 v10.16b,v10.16b +#endif + // save the last tweak + st1 {v11.4s},[x5] +1: + // process last block + cmp x2,#1 + b.lt 100f + b.gt 1f + ld1 {v4.4s},[x0],#16 + eor v4.16b, v4.16b, v8.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + mov x10,x3 + mov w11,#8 + mov w12,v4.s[0] + mov w13,v4.s[1] + mov w14,v4.s[2] + mov w15,v4.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v4.s[0],w15 + mov v4.s[1],w14 + mov v4.s[2],w13 + mov v4.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + eor v4.16b, v4.16b, v8.16b + st1 {v4.4s},[x1],#16 + // save the last tweak + st1 {v8.4s},[x5] + b 100f +1: // process last 2 blocks + cmp x2,#2 + b.gt 1f + ld1 {v4.4s,v5.4s},[x0],#32 + eor v4.16b, v4.16b, v8.16b + eor v5.16b, v5.16b, v9.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif + zip1 v0.4s,v4.4s,v5.4s + zip2 v1.4s,v4.4s,v5.4s + zip1 v2.4s,v6.4s,v7.4s + zip2 v3.4s,v6.4s,v7.4s + zip1 v4.2d,v0.2d,v2.2d + zip2 v5.2d,v0.2d,v2.2d + zip1 v6.2d,v1.2d,v3.2d + zip2 v7.2d,v1.2d,v3.2d + bl _vpsm4_enc_4blks + zip1 v4.4s,v0.4s,v1.4s + zip2 v5.4s,v0.4s,v1.4s + zip1 v6.4s,v2.4s,v3.4s + zip2 v7.4s,v2.4s,v3.4s + zip1 v0.2d,v4.2d,v6.2d + zip2 v1.2d,v4.2d,v6.2d + zip1 v2.2d,v5.2d,v7.2d + zip2 v3.2d,v5.2d,v7.2d + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + st1 {v0.4s,v1.4s},[x1],#32 + // save the last tweak + st1 {v9.4s},[x5] + b 100f +1: // process last 3 blocks + ld1 {v4.4s,v5.4s,v6.4s},[x0],#48 + eor v4.16b, v4.16b, v8.16b + eor v5.16b, v5.16b, v9.16b + eor v6.16b, v6.16b, v10.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif + zip1 v0.4s,v4.4s,v5.4s + zip2 v1.4s,v4.4s,v5.4s + zip1 v2.4s,v6.4s,v7.4s + zip2 v3.4s,v6.4s,v7.4s + zip1 v4.2d,v0.2d,v2.2d + zip2 v5.2d,v0.2d,v2.2d + zip1 v6.2d,v1.2d,v3.2d + zip2 v7.2d,v1.2d,v3.2d + bl _vpsm4_enc_4blks + zip1 v4.4s,v0.4s,v1.4s + zip2 v5.4s,v0.4s,v1.4s + zip1 v6.4s,v2.4s,v3.4s + zip2 v7.4s,v2.4s,v3.4s + zip1 v0.2d,v4.2d,v6.2d + zip2 v1.2d,v4.2d,v6.2d + zip1 v2.2d,v5.2d,v7.2d + zip2 v3.2d,v5.2d,v7.2d + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + st1 {v0.4s,v1.4s,v2.4s},[x1],#48 + // save the last tweak + st1 {v10.4s},[x5] +100: + cmp x29,0 + b.eq .return + +// This branch calculates the last two tweaks, +// while the encryption/decryption length is larger than 32 +.last_2blks_tweak: + ld1 {v8.4s},[x5] +#ifdef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif + mov v2.16b,v8.16b + adrp x10,.Lxts_magic + ldr q0, [x10, #:lo12:.Lxts_magic] + shl v9.16b, v2.16b, #1 + ext v1.16b, v2.16b, v2.16b,#15 + ushr v1.16b, v1.16b, #7 + mul v1.16b, v1.16b, v0.16b + eor v9.16b, v9.16b, v1.16b + mov v2.16b,v9.16b + adrp x10,.Lxts_magic + ldr q0, [x10, #:lo12:.Lxts_magic] + shl v10.16b, v2.16b, #1 + ext v1.16b, v2.16b, v2.16b,#15 + ushr v1.16b, v1.16b, #7 + mul v1.16b, v1.16b, v0.16b + eor v10.16b, v10.16b, v1.16b + b .check_dec + + +// This branch calculates the last two tweaks, +// while the encryption/decryption length is equal to 32, who only need two tweaks +.only_2blks_tweak: + mov v9.16b,v8.16b +#ifdef __AARCH64EB__ + rev32 v9.16b,v9.16b +#endif + mov v2.16b,v9.16b + adrp x10,.Lxts_magic + ldr q0, [x10, #:lo12:.Lxts_magic] + shl v10.16b, v2.16b, #1 + ext v1.16b, v2.16b, v2.16b,#15 + ushr v1.16b, v1.16b, #7 + mul v1.16b, v1.16b, v0.16b + eor v10.16b, v10.16b, v1.16b + b .check_dec + + +// Determine whether encryption or decryption is required. +// The last two tweaks need to be swapped for decryption. +.check_dec: + // encryption:1 decryption:0 + cmp w28,1 + b.eq .process_last_2blks + mov v0.16B,v9.16b + mov v9.16B,v10.16b + mov v10.16B,v0.16b + +.process_last_2blks: +#ifdef __AARCH64EB__ + rev32 v9.16b,v9.16b +#endif +#ifdef __AARCH64EB__ + rev32 v10.16b,v10.16b +#endif + ld1 {v4.4s},[x0],#16 + eor v4.16b, v4.16b, v9.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + mov x10,x3 + mov w11,#8 + mov w12,v4.s[0] + mov w13,v4.s[1] + mov w14,v4.s[2] + mov w15,v4.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v4.s[0],w15 + mov v4.s[1],w14 + mov v4.s[2],w13 + mov v4.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + eor v4.16b, v4.16b, v9.16b + st1 {v4.4s},[x1],#16 + + sub x26,x1,16 +.loop: + subs x29,x29,1 + ldrb w7,[x26,x29] + ldrb w8,[x0,x29] + strb w8,[x26,x29] + strb w7,[x1,x29] + b.gt .loop + ld1 {v4.4s}, [x26] + eor v4.16b, v4.16b, v10.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + mov x10,x3 + mov w11,#8 + mov w12,v4.s[0] + mov w13,v4.s[1] + mov w14,v4.s[2] + mov w15,v4.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + movi v1.16b,#64 + movi v2.16b,#128 + movi v3.16b,#192 + mov v0.s[0],w6 + + sub v1.16b,v0.16b,v1.16b + sub v2.16b,v0.16b,v2.16b + sub v3.16b,v0.16b,v3.16b + + tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b + tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b + tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b + tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b + + mov w6,v0.s[0] + mov w7,v1.s[0] + mov w9,v2.s[0] + add w7,w6,w7 + mov w6,v3.s[0] + add w7,w7,w9 + add w7,w7,w6 + + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v4.s[0],w15 + mov v4.s[1],w14 + mov v4.s[2],w13 + mov v4.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + eor v4.16b, v4.16b, v10.16b + st1 {v4.4s}, [x26] +.return: + ldp d14, d15, [sp], #0x10 + ldp d12, d13, [sp], #0x10 + ldp d10, d11, [sp], #0x10 + ldp d8, d9, [sp], #0x10 + ldp x29, x30, [sp], #0x10 + ldp x27, x28, [sp], #0x10 + ldp x25, x26, [sp], #0x10 + ldp x23, x24, [sp], #0x10 + ldp x21, x22, [sp], #0x10 + ldp x19, x20, [sp], #0x10 + ldp x17, x18, [sp], #0x10 + ldp x15, x16, [sp], #0x10 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpsm4_xts_encrypt,.-vpsm4_xts_encrypt diff --git a/contrib/openssl-cmake/asm/crypto/sm4/asm/vpsm4_ex-armv8.S b/contrib/openssl-cmake/asm/crypto/sm4/asm/vpsm4_ex-armv8.S new file mode 100644 index 000000000000..2ff0246cc604 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/sm4/asm/vpsm4_ex-armv8.S @@ -0,0 +1,4522 @@ +// Copyright 2022-2025 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the Apache License 2.0 (the "License"). You may not use +// this file except in compliance with the License. You can obtain a copy +// in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html + +// +// This module implements SM4 with ASIMD and AESE on AARCH64 +// +// Dec 2022 +// + +// $output is the last argument if it looks like a file (it has an extension) +// $flavour is the first argument if it doesn't look like a file +#include "arm_arch.h" +.arch armv8-a+crypto +.text + +.type _vpsm4_ex_consts,%object +.align 7 +_vpsm4_ex_consts: +.Lck: +.long 0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269 +.long 0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9 +.long 0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B4249 +.long 0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B9 +.long 0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B2229 +.long 0x30373E45, 0x4C535A61, 0x686F767D, 0x848B9299 +.long 0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209 +.long 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279 +.Lfk: +.quad 0x56aa3350a3b1bac6,0xb27022dc677d9197 +.Lshuffles: +.quad 0x0B0A090807060504,0x030201000F0E0D0C +.Lxts_magic: +.quad 0x0101010101010187,0x0101010101010101 +.Lsbox_magic: +.quad 0x0b0e0104070a0d00,0x0306090c0f020508 +.quad 0x62185a2042387a00,0x22581a6002783a40 +.quad 0x15df62a89e54e923,0xc10bb67c4a803df7 +.quad 0xb9aa6b78c1d21300,0x1407c6d56c7fbead +.quad 0x6404462679195b3b,0xe383c1a1fe9edcbc +.quad 0x0f0f0f0f0f0f0f0f,0x0f0f0f0f0f0f0f0f + +.size _vpsm4_ex_consts,.-_vpsm4_ex_consts +.type _vpsm4_ex_set_key,%function +.align 4 +_vpsm4_ex_set_key: + AARCH64_VALID_CALL_TARGET + ld1 {v5.4s},[x0] + adrp x9, .Lsbox_magic + ldr q26, [x9, #:lo12:.Lsbox_magic] + ldr q27, [x9, #:lo12:.Lsbox_magic+16] + ldr q28, [x9, #:lo12:.Lsbox_magic+32] + ldr q29, [x9, #:lo12:.Lsbox_magic+48] + ldr q30, [x9, #:lo12:.Lsbox_magic+64] + ldr q31, [x9, #:lo12:.Lsbox_magic+80] +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif + adrp x5,.Lshuffles + add x5,x5,#:lo12:.Lshuffles + ld1 {v7.2d},[x5] + adrp x5,.Lfk + add x5,x5,#:lo12:.Lfk + ld1 {v6.2d},[x5] + eor v5.16b,v5.16b,v6.16b + mov x6,#32 + adrp x5,.Lck + add x5,x5,#:lo12:.Lck + movi v0.16b,#64 + cbnz w2,1f + add x1,x1,124 +1: + mov w7,v5.s[1] + ldr w8,[x5],#4 + eor w8,w8,w7 + mov w7,v5.s[2] + eor w8,w8,w7 + mov w7,v5.s[3] + eor w8,w8,w7 + // optimize sbox using AESE instruction + mov v4.s[0],w8 + tbl v0.16b, {v4.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + mov w7,v0.s[0] + eor w8,w7,w7,ror #19 + eor w8,w8,w7,ror #9 + mov w7,v5.s[0] + eor w8,w8,w7 + mov v5.s[0],w8 + cbz w2,2f + str w8,[x1],#4 + b 3f +2: + str w8,[x1],#-4 +3: + tbl v5.16b,{v5.16b},v7.16b + subs x6,x6,#1 + b.ne 1b + ret +.size _vpsm4_ex_set_key,.-_vpsm4_ex_set_key +.type _vpsm4_ex_enc_4blks,%function +.align 4 +_vpsm4_ex_enc_4blks: + AARCH64_VALID_CALL_TARGET + mov x10,x3 + mov w11,#8 +10: + ldp w7,w8,[x10],8 + dup v12.4s,w7 + dup v13.4s,w8 + + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor v14.16b,v6.16b,v7.16b + eor v12.16b,v5.16b,v12.16b + eor v12.16b,v14.16b,v12.16b + // optimize sbox using AESE instruction + tbl v0.16b, {v12.16b}, v26.16b + ushr v24.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v24.16b, {v27.16b}, v24.16b + eor v0.16b, v0.16b, v24.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v24.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v24.16b, {v29.16b}, v24.16b + eor v0.16b, v0.16b, v24.16b + mov v12.16b,v0.16b + + // linear transformation + ushr v0.4s,v12.4s,32-2 + ushr v1.4s,v12.4s,32-10 + ushr v2.4s,v12.4s,32-18 + ushr v3.4s,v12.4s,32-24 + sli v0.4s,v12.4s,2 + sli v1.4s,v12.4s,10 + sli v2.4s,v12.4s,18 + sli v3.4s,v12.4s,24 + eor v24.16b,v0.16b,v12.16b + eor v24.16b,v24.16b,v1.16b + eor v12.16b,v2.16b,v3.16b + eor v12.16b,v12.16b,v24.16b + eor v4.16b,v4.16b,v12.16b + + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor v14.16b,v14.16b,v4.16b + eor v13.16b,v14.16b,v13.16b + // optimize sbox using AESE instruction + tbl v0.16b, {v13.16b}, v26.16b + ushr v24.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v24.16b, {v27.16b}, v24.16b + eor v0.16b, v0.16b, v24.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v24.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v24.16b, {v29.16b}, v24.16b + eor v0.16b, v0.16b, v24.16b + mov v13.16b,v0.16b + + // linear transformation + ushr v0.4s,v13.4s,32-2 + ushr v1.4s,v13.4s,32-10 + ushr v2.4s,v13.4s,32-18 + ushr v3.4s,v13.4s,32-24 + sli v0.4s,v13.4s,2 + sli v1.4s,v13.4s,10 + sli v2.4s,v13.4s,18 + sli v3.4s,v13.4s,24 + eor v24.16b,v0.16b,v13.16b + eor v24.16b,v24.16b,v1.16b + eor v13.16b,v2.16b,v3.16b + eor v13.16b,v13.16b,v24.16b + ldp w7,w8,[x10],8 + eor v5.16b,v5.16b,v13.16b + + dup v12.4s,w7 + dup v13.4s,w8 + + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor v14.16b,v4.16b,v5.16b + eor v12.16b,v7.16b,v12.16b + eor v12.16b,v14.16b,v12.16b + // optimize sbox using AESE instruction + tbl v0.16b, {v12.16b}, v26.16b + ushr v24.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v24.16b, {v27.16b}, v24.16b + eor v0.16b, v0.16b, v24.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v24.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v24.16b, {v29.16b}, v24.16b + eor v0.16b, v0.16b, v24.16b + mov v12.16b,v0.16b + + // linear transformation + ushr v0.4s,v12.4s,32-2 + ushr v1.4s,v12.4s,32-10 + ushr v2.4s,v12.4s,32-18 + ushr v3.4s,v12.4s,32-24 + sli v0.4s,v12.4s,2 + sli v1.4s,v12.4s,10 + sli v2.4s,v12.4s,18 + sli v3.4s,v12.4s,24 + eor v24.16b,v0.16b,v12.16b + eor v24.16b,v24.16b,v1.16b + eor v12.16b,v2.16b,v3.16b + eor v12.16b,v12.16b,v24.16b + eor v6.16b,v6.16b,v12.16b + + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor v14.16b,v14.16b,v6.16b + eor v13.16b,v14.16b,v13.16b + // optimize sbox using AESE instruction + tbl v0.16b, {v13.16b}, v26.16b + ushr v24.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v24.16b, {v27.16b}, v24.16b + eor v0.16b, v0.16b, v24.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v24.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v24.16b, {v29.16b}, v24.16b + eor v0.16b, v0.16b, v24.16b + mov v13.16b,v0.16b + + // linear transformation + ushr v0.4s,v13.4s,32-2 + ushr v1.4s,v13.4s,32-10 + ushr v2.4s,v13.4s,32-18 + ushr v3.4s,v13.4s,32-24 + sli v0.4s,v13.4s,2 + sli v1.4s,v13.4s,10 + sli v2.4s,v13.4s,18 + sli v3.4s,v13.4s,24 + eor v24.16b,v0.16b,v13.16b + eor v24.16b,v24.16b,v1.16b + eor v13.16b,v2.16b,v3.16b + eor v13.16b,v13.16b,v24.16b + eor v7.16b,v7.16b,v13.16b + subs w11,w11,#1 + b.ne 10b +#ifndef __AARCH64EB__ + rev32 v3.16b,v4.16b +#else + mov v3.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v2.16b,v5.16b +#else + mov v2.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v1.16b,v6.16b +#else + mov v1.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v0.16b,v7.16b +#else + mov v0.16b,v7.16b +#endif + ret +.size _vpsm4_ex_enc_4blks,.-_vpsm4_ex_enc_4blks +.type _vpsm4_ex_enc_8blks,%function +.align 4 +_vpsm4_ex_enc_8blks: + AARCH64_VALID_CALL_TARGET + mov x10,x3 + mov w11,#8 +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + dup v12.4s,w7 + eor v14.16b,v6.16b,v7.16b + eor v15.16b,v10.16b,v11.16b + eor v0.16b,v5.16b,v12.16b + eor v1.16b,v9.16b,v12.16b + eor v12.16b,v14.16b,v0.16b + eor v13.16b,v15.16b,v1.16b + // optimize sbox using AESE instruction + tbl v0.16b, {v12.16b}, v26.16b + tbl v1.16b, {v13.16b}, v26.16b + ushr v24.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v24.16b, {v27.16b}, v24.16b + eor v0.16b, v0.16b, v24.16b + ushr v24.16b, v1.16b, 4 + and v1.16b, v1.16b, v31.16b + tbl v1.16b, {v28.16b}, v1.16b + tbl v24.16b, {v27.16b}, v24.16b + eor v1.16b, v1.16b, v24.16b + eor v25.16b, v25.16b, v25.16b + aese v0.16b,v25.16b + aese v1.16b,v25.16b + ushr v24.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v24.16b, {v29.16b}, v24.16b + eor v0.16b, v0.16b, v24.16b + ushr v24.16b, v1.16b, 4 + and v1.16b, v1.16b, v31.16b + tbl v1.16b, {v30.16b}, v1.16b + tbl v24.16b, {v29.16b}, v24.16b + eor v1.16b, v1.16b, v24.16b + mov v12.16b,v0.16b + mov v13.16b,v1.16b + + // linear transformation + ushr v0.4s,v12.4s,32-2 + ushr v25.4s,v13.4s,32-2 + ushr v1.4s,v12.4s,32-10 + ushr v2.4s,v12.4s,32-18 + ushr v3.4s,v12.4s,32-24 + sli v0.4s,v12.4s,2 + sli v25.4s,v13.4s,2 + sli v1.4s,v12.4s,10 + sli v2.4s,v12.4s,18 + sli v3.4s,v12.4s,24 + eor v24.16b,v0.16b,v12.16b + eor v24.16b,v24.16b,v1.16b + eor v12.16b,v2.16b,v3.16b + eor v12.16b,v12.16b,v24.16b + ushr v1.4s,v13.4s,32-10 + ushr v2.4s,v13.4s,32-18 + ushr v3.4s,v13.4s,32-24 + sli v1.4s,v13.4s,10 + sli v2.4s,v13.4s,18 + sli v3.4s,v13.4s,24 + eor v24.16b,v25.16b,v13.16b + eor v24.16b,v24.16b,v1.16b + eor v13.16b,v2.16b,v3.16b + eor v13.16b,v13.16b,v24.16b + eor v4.16b,v4.16b,v12.16b + eor v8.16b,v8.16b,v13.16b + + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + dup v13.4s,w8 + eor v14.16b,v14.16b,v4.16b + eor v15.16b,v15.16b,v8.16b + eor v12.16b,v14.16b,v13.16b + eor v13.16b,v15.16b,v13.16b + // optimize sbox using AESE instruction + tbl v0.16b, {v12.16b}, v26.16b + tbl v1.16b, {v13.16b}, v26.16b + ushr v24.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v24.16b, {v27.16b}, v24.16b + eor v0.16b, v0.16b, v24.16b + ushr v24.16b, v1.16b, 4 + and v1.16b, v1.16b, v31.16b + tbl v1.16b, {v28.16b}, v1.16b + tbl v24.16b, {v27.16b}, v24.16b + eor v1.16b, v1.16b, v24.16b + eor v25.16b, v25.16b, v25.16b + aese v0.16b,v25.16b + aese v1.16b,v25.16b + ushr v24.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v24.16b, {v29.16b}, v24.16b + eor v0.16b, v0.16b, v24.16b + ushr v24.16b, v1.16b, 4 + and v1.16b, v1.16b, v31.16b + tbl v1.16b, {v30.16b}, v1.16b + tbl v24.16b, {v29.16b}, v24.16b + eor v1.16b, v1.16b, v24.16b + mov v12.16b,v0.16b + mov v13.16b,v1.16b + + // linear transformation + ushr v0.4s,v12.4s,32-2 + ushr v25.4s,v13.4s,32-2 + ushr v1.4s,v12.4s,32-10 + ushr v2.4s,v12.4s,32-18 + ushr v3.4s,v12.4s,32-24 + sli v0.4s,v12.4s,2 + sli v25.4s,v13.4s,2 + sli v1.4s,v12.4s,10 + sli v2.4s,v12.4s,18 + sli v3.4s,v12.4s,24 + eor v24.16b,v0.16b,v12.16b + eor v24.16b,v24.16b,v1.16b + eor v12.16b,v2.16b,v3.16b + eor v12.16b,v12.16b,v24.16b + ushr v1.4s,v13.4s,32-10 + ushr v2.4s,v13.4s,32-18 + ushr v3.4s,v13.4s,32-24 + sli v1.4s,v13.4s,10 + sli v2.4s,v13.4s,18 + sli v3.4s,v13.4s,24 + eor v24.16b,v25.16b,v13.16b + eor v24.16b,v24.16b,v1.16b + eor v13.16b,v2.16b,v3.16b + eor v13.16b,v13.16b,v24.16b + ldp w7,w8,[x10],8 + eor v5.16b,v5.16b,v12.16b + eor v9.16b,v9.16b,v13.16b + + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + dup v12.4s,w7 + eor v14.16b,v4.16b,v5.16b + eor v15.16b,v8.16b,v9.16b + eor v0.16b,v7.16b,v12.16b + eor v1.16b,v11.16b,v12.16b + eor v12.16b,v14.16b,v0.16b + eor v13.16b,v15.16b,v1.16b + // optimize sbox using AESE instruction + tbl v0.16b, {v12.16b}, v26.16b + tbl v1.16b, {v13.16b}, v26.16b + ushr v24.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v24.16b, {v27.16b}, v24.16b + eor v0.16b, v0.16b, v24.16b + ushr v24.16b, v1.16b, 4 + and v1.16b, v1.16b, v31.16b + tbl v1.16b, {v28.16b}, v1.16b + tbl v24.16b, {v27.16b}, v24.16b + eor v1.16b, v1.16b, v24.16b + eor v25.16b, v25.16b, v25.16b + aese v0.16b,v25.16b + aese v1.16b,v25.16b + ushr v24.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v24.16b, {v29.16b}, v24.16b + eor v0.16b, v0.16b, v24.16b + ushr v24.16b, v1.16b, 4 + and v1.16b, v1.16b, v31.16b + tbl v1.16b, {v30.16b}, v1.16b + tbl v24.16b, {v29.16b}, v24.16b + eor v1.16b, v1.16b, v24.16b + mov v12.16b,v0.16b + mov v13.16b,v1.16b + + // linear transformation + ushr v0.4s,v12.4s,32-2 + ushr v25.4s,v13.4s,32-2 + ushr v1.4s,v12.4s,32-10 + ushr v2.4s,v12.4s,32-18 + ushr v3.4s,v12.4s,32-24 + sli v0.4s,v12.4s,2 + sli v25.4s,v13.4s,2 + sli v1.4s,v12.4s,10 + sli v2.4s,v12.4s,18 + sli v3.4s,v12.4s,24 + eor v24.16b,v0.16b,v12.16b + eor v24.16b,v24.16b,v1.16b + eor v12.16b,v2.16b,v3.16b + eor v12.16b,v12.16b,v24.16b + ushr v1.4s,v13.4s,32-10 + ushr v2.4s,v13.4s,32-18 + ushr v3.4s,v13.4s,32-24 + sli v1.4s,v13.4s,10 + sli v2.4s,v13.4s,18 + sli v3.4s,v13.4s,24 + eor v24.16b,v25.16b,v13.16b + eor v24.16b,v24.16b,v1.16b + eor v13.16b,v2.16b,v3.16b + eor v13.16b,v13.16b,v24.16b + eor v6.16b,v6.16b,v12.16b + eor v10.16b,v10.16b,v13.16b + + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + dup v13.4s,w8 + eor v14.16b,v14.16b,v6.16b + eor v15.16b,v15.16b,v10.16b + eor v12.16b,v14.16b,v13.16b + eor v13.16b,v15.16b,v13.16b + // optimize sbox using AESE instruction + tbl v0.16b, {v12.16b}, v26.16b + tbl v1.16b, {v13.16b}, v26.16b + ushr v24.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v24.16b, {v27.16b}, v24.16b + eor v0.16b, v0.16b, v24.16b + ushr v24.16b, v1.16b, 4 + and v1.16b, v1.16b, v31.16b + tbl v1.16b, {v28.16b}, v1.16b + tbl v24.16b, {v27.16b}, v24.16b + eor v1.16b, v1.16b, v24.16b + eor v25.16b, v25.16b, v25.16b + aese v0.16b,v25.16b + aese v1.16b,v25.16b + ushr v24.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v24.16b, {v29.16b}, v24.16b + eor v0.16b, v0.16b, v24.16b + ushr v24.16b, v1.16b, 4 + and v1.16b, v1.16b, v31.16b + tbl v1.16b, {v30.16b}, v1.16b + tbl v24.16b, {v29.16b}, v24.16b + eor v1.16b, v1.16b, v24.16b + mov v12.16b,v0.16b + mov v13.16b,v1.16b + + // linear transformation + ushr v0.4s,v12.4s,32-2 + ushr v25.4s,v13.4s,32-2 + ushr v1.4s,v12.4s,32-10 + ushr v2.4s,v12.4s,32-18 + ushr v3.4s,v12.4s,32-24 + sli v0.4s,v12.4s,2 + sli v25.4s,v13.4s,2 + sli v1.4s,v12.4s,10 + sli v2.4s,v12.4s,18 + sli v3.4s,v12.4s,24 + eor v24.16b,v0.16b,v12.16b + eor v24.16b,v24.16b,v1.16b + eor v12.16b,v2.16b,v3.16b + eor v12.16b,v12.16b,v24.16b + ushr v1.4s,v13.4s,32-10 + ushr v2.4s,v13.4s,32-18 + ushr v3.4s,v13.4s,32-24 + sli v1.4s,v13.4s,10 + sli v2.4s,v13.4s,18 + sli v3.4s,v13.4s,24 + eor v24.16b,v25.16b,v13.16b + eor v24.16b,v24.16b,v1.16b + eor v13.16b,v2.16b,v3.16b + eor v13.16b,v13.16b,v24.16b + eor v7.16b,v7.16b,v12.16b + eor v11.16b,v11.16b,v13.16b + subs w11,w11,#1 + b.ne 10b +#ifndef __AARCH64EB__ + rev32 v3.16b,v4.16b +#else + mov v3.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v2.16b,v5.16b +#else + mov v2.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v1.16b,v6.16b +#else + mov v1.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v0.16b,v7.16b +#else + mov v0.16b,v7.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v8.16b +#else + mov v7.16b,v8.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v9.16b +#else + mov v6.16b,v9.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v10.16b +#else + mov v5.16b,v10.16b +#endif +#ifndef __AARCH64EB__ + rev32 v4.16b,v11.16b +#else + mov v4.16b,v11.16b +#endif + ret +.size _vpsm4_ex_enc_8blks,.-_vpsm4_ex_enc_8blks +.globl vpsm4_ex_set_encrypt_key +.type vpsm4_ex_set_encrypt_key,%function +.align 5 +vpsm4_ex_set_encrypt_key: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + mov w2,1 + bl _vpsm4_ex_set_key + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpsm4_ex_set_encrypt_key,.-vpsm4_ex_set_encrypt_key +.globl vpsm4_ex_set_decrypt_key +.type vpsm4_ex_set_decrypt_key,%function +.align 5 +vpsm4_ex_set_decrypt_key: + AARCH64_SIGN_LINK_REGISTER + stp x29,x30,[sp,#-16]! + mov w2,0 + bl _vpsm4_ex_set_key + ldp x29,x30,[sp],#16 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpsm4_ex_set_decrypt_key,.-vpsm4_ex_set_decrypt_key +.globl vpsm4_ex_encrypt +.type vpsm4_ex_encrypt,%function +.align 5 +vpsm4_ex_encrypt: + AARCH64_VALID_CALL_TARGET + ld1 {v4.4s},[x0] + adrp x9, .Lsbox_magic + ldr q26, [x9, #:lo12:.Lsbox_magic] + ldr q27, [x9, #:lo12:.Lsbox_magic+16] + ldr q28, [x9, #:lo12:.Lsbox_magic+32] + ldr q29, [x9, #:lo12:.Lsbox_magic+48] + ldr q30, [x9, #:lo12:.Lsbox_magic+64] + ldr q31, [x9, #:lo12:.Lsbox_magic+80] +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + mov x3,x2 + mov x10,x3 + mov w11,#8 + mov w12,v4.s[0] + mov w13,v4.s[1] + mov w14,v4.s[2] + mov w15,v4.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v4.s[0],w15 + mov v4.s[1],w14 + mov v4.s[2],w13 + mov v4.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + st1 {v4.4s},[x1] + ret +.size vpsm4_ex_encrypt,.-vpsm4_ex_encrypt +.globl vpsm4_ex_decrypt +.type vpsm4_ex_decrypt,%function +.align 5 +vpsm4_ex_decrypt: + AARCH64_VALID_CALL_TARGET + ld1 {v4.4s},[x0] + adrp x9, .Lsbox_magic + ldr q26, [x9, #:lo12:.Lsbox_magic] + ldr q27, [x9, #:lo12:.Lsbox_magic+16] + ldr q28, [x9, #:lo12:.Lsbox_magic+32] + ldr q29, [x9, #:lo12:.Lsbox_magic+48] + ldr q30, [x9, #:lo12:.Lsbox_magic+64] + ldr q31, [x9, #:lo12:.Lsbox_magic+80] +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + mov x3,x2 + mov x10,x3 + mov w11,#8 + mov w12,v4.s[0] + mov w13,v4.s[1] + mov w14,v4.s[2] + mov w15,v4.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v4.s[0],w15 + mov v4.s[1],w14 + mov v4.s[2],w13 + mov v4.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + st1 {v4.4s},[x1] + ret +.size vpsm4_ex_decrypt,.-vpsm4_ex_decrypt +.globl vpsm4_ex_ecb_encrypt +.type vpsm4_ex_ecb_encrypt,%function +.align 5 +vpsm4_ex_ecb_encrypt: + AARCH64_SIGN_LINK_REGISTER + // convert length into blocks + lsr x2,x2,4 + stp d8,d9,[sp,#-80]! + stp d10,d11,[sp,#16] + stp d12,d13,[sp,#32] + stp d14,d15,[sp,#48] + stp x29,x30,[sp,#64] + adrp x9, .Lsbox_magic + ldr q26, [x9, #:lo12:.Lsbox_magic] + ldr q27, [x9, #:lo12:.Lsbox_magic+16] + ldr q28, [x9, #:lo12:.Lsbox_magic+32] + ldr q29, [x9, #:lo12:.Lsbox_magic+48] + ldr q30, [x9, #:lo12:.Lsbox_magic+64] + ldr q31, [x9, #:lo12:.Lsbox_magic+80] +.Lecb_8_blocks_process: + cmp w2,#8 + b.lt .Lecb_4_blocks_process + ld4 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 + ld4 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif +#ifndef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif +#ifndef __AARCH64EB__ + rev32 v9.16b,v9.16b +#endif +#ifndef __AARCH64EB__ + rev32 v10.16b,v10.16b +#endif +#ifndef __AARCH64EB__ + rev32 v11.16b,v11.16b +#endif + bl _vpsm4_ex_enc_8blks + st4 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 + st4 {v4.4s,v5.4s,v6.4s,v7.4s},[x1],#64 + subs w2,w2,#8 + b.gt .Lecb_8_blocks_process + b 100f +.Lecb_4_blocks_process: + cmp w2,#4 + b.lt 1f + ld4 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif + bl _vpsm4_ex_enc_4blks + st4 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 + sub w2,w2,#4 +1: + // process last block + cmp w2,#1 + b.lt 100f + b.gt 1f + ld1 {v4.4s},[x0] +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + mov x10,x3 + mov w11,#8 + mov w12,v4.s[0] + mov w13,v4.s[1] + mov w14,v4.s[2] + mov w15,v4.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v4.s[0],w15 + mov v4.s[1],w14 + mov v4.s[2],w13 + mov v4.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + st1 {v4.4s},[x1] + b 100f +1: // process last 2 blocks + ld4 {v4.s,v5.s,v6.s,v7.s}[0],[x0],#16 + ld4 {v4.s,v5.s,v6.s,v7.s}[1],[x0],#16 + cmp w2,#2 + b.gt 1f +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif + bl _vpsm4_ex_enc_4blks + st4 {v0.s,v1.s,v2.s,v3.s}[0],[x1],#16 + st4 {v0.s,v1.s,v2.s,v3.s}[1],[x1] + b 100f +1: // process last 3 blocks + ld4 {v4.s,v5.s,v6.s,v7.s}[2],[x0],#16 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif + bl _vpsm4_ex_enc_4blks + st4 {v0.s,v1.s,v2.s,v3.s}[0],[x1],#16 + st4 {v0.s,v1.s,v2.s,v3.s}[1],[x1],#16 + st4 {v0.s,v1.s,v2.s,v3.s}[2],[x1] +100: + ldp d10,d11,[sp,#16] + ldp d12,d13,[sp,#32] + ldp d14,d15,[sp,#48] + ldp x29,x30,[sp,#64] + ldp d8,d9,[sp],#80 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpsm4_ex_ecb_encrypt,.-vpsm4_ex_ecb_encrypt +.globl vpsm4_ex_cbc_encrypt +.type vpsm4_ex_cbc_encrypt,%function +.align 5 +vpsm4_ex_cbc_encrypt: + AARCH64_VALID_CALL_TARGET + lsr x2,x2,4 + adrp x9, .Lsbox_magic + ldr q26, [x9, #:lo12:.Lsbox_magic] + ldr q27, [x9, #:lo12:.Lsbox_magic+16] + ldr q28, [x9, #:lo12:.Lsbox_magic+32] + ldr q29, [x9, #:lo12:.Lsbox_magic+48] + ldr q30, [x9, #:lo12:.Lsbox_magic+64] + ldr q31, [x9, #:lo12:.Lsbox_magic+80] + cbz w5,.Ldec + ld1 {v3.4s},[x4] +.Lcbc_4_blocks_enc: + cmp w2,#4 + b.lt 1f + ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 + eor v4.16b,v4.16b,v3.16b +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif + mov x10,x3 + mov w11,#8 + mov w12,v4.s[0] + mov w13,v4.s[1] + mov w14,v4.s[2] + mov w15,v4.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v4.s[0],w15 + mov v4.s[1],w14 + mov v4.s[2],w13 + mov v4.s[3],w12 + eor v5.16b,v5.16b,v4.16b + mov x10,x3 + mov w11,#8 + mov w12,v5.s[0] + mov w13,v5.s[1] + mov w14,v5.s[2] + mov w15,v5.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v5.s[0],w15 + mov v5.s[1],w14 + mov v5.s[2],w13 + mov v5.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + eor v6.16b,v6.16b,v5.16b + mov x10,x3 + mov w11,#8 + mov w12,v6.s[0] + mov w13,v6.s[1] + mov w14,v6.s[2] + mov w15,v6.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v6.s[0],w15 + mov v6.s[1],w14 + mov v6.s[2],w13 + mov v6.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif + eor v7.16b,v7.16b,v6.16b + mov x10,x3 + mov w11,#8 + mov w12,v7.s[0] + mov w13,v7.s[1] + mov w14,v7.s[2] + mov w15,v7.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v7.s[0],w15 + mov v7.s[1],w14 + mov v7.s[2],w13 + mov v7.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif + orr v3.16b,v7.16b,v7.16b + st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x1],#64 + subs w2,w2,#4 + b.ne .Lcbc_4_blocks_enc + b 2f +1: + subs w2,w2,#1 + b.lt 2f + ld1 {v4.4s},[x0],#16 + eor v3.16b,v3.16b,v4.16b +#ifndef __AARCH64EB__ + rev32 v3.16b,v3.16b +#endif + mov x10,x3 + mov w11,#8 + mov w12,v3.s[0] + mov w13,v3.s[1] + mov w14,v3.s[2] + mov w15,v3.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v3.s[0],w15 + mov v3.s[1],w14 + mov v3.s[2],w13 + mov v3.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v3.16b,v3.16b +#endif + st1 {v3.4s},[x1],#16 + b 1b +2: + // save back IV + st1 {v3.4s},[x4] + ret + +.Ldec: + // decryption mode starts + AARCH64_SIGN_LINK_REGISTER + stp d8,d9,[sp,#-80]! + stp d10,d11,[sp,#16] + stp d12,d13,[sp,#32] + stp d14,d15,[sp,#48] + stp x29,x30,[sp,#64] +.Lcbc_8_blocks_dec: + cmp w2,#8 + b.lt 1f + ld4 {v4.4s,v5.4s,v6.4s,v7.4s},[x0] + add x10,x0,#64 + ld4 {v8.4s,v9.4s,v10.4s,v11.4s},[x10] +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif +#ifndef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif +#ifndef __AARCH64EB__ + rev32 v9.16b,v9.16b +#endif +#ifndef __AARCH64EB__ + rev32 v10.16b,v10.16b +#endif +#ifndef __AARCH64EB__ + rev32 v11.16b,v11.16b +#endif + bl _vpsm4_ex_enc_8blks + zip1 v8.4s,v0.4s,v1.4s + zip2 v9.4s,v0.4s,v1.4s + zip1 v10.4s,v2.4s,v3.4s + zip2 v11.4s,v2.4s,v3.4s + zip1 v0.2d,v8.2d,v10.2d + zip2 v1.2d,v8.2d,v10.2d + zip1 v2.2d,v9.2d,v11.2d + zip2 v3.2d,v9.2d,v11.2d + zip1 v8.4s,v4.4s,v5.4s + zip2 v9.4s,v4.4s,v5.4s + zip1 v10.4s,v6.4s,v7.4s + zip2 v11.4s,v6.4s,v7.4s + zip1 v4.2d,v8.2d,v10.2d + zip2 v5.2d,v8.2d,v10.2d + zip1 v6.2d,v9.2d,v11.2d + zip2 v7.2d,v9.2d,v11.2d + ld1 {v15.4s},[x4] + ld1 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64 + // note ivec1 and vtmpx[3] are reusing the same register + // care needs to be taken to avoid conflict + eor v0.16b,v0.16b,v15.16b + ld1 {v12.4s,v13.4s,v14.4s,v15.4s},[x0],#64 + eor v1.16b,v1.16b,v8.16b + eor v2.16b,v2.16b,v9.16b + eor v3.16b,v3.16b,v10.16b + // save back IV + st1 {v15.4s}, [x4] + eor v4.16b,v4.16b,v11.16b + eor v5.16b,v5.16b,v12.16b + eor v6.16b,v6.16b,v13.16b + eor v7.16b,v7.16b,v14.16b + st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 + st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x1],#64 + subs w2,w2,#8 + b.gt .Lcbc_8_blocks_dec + b.eq 100f +1: + ld1 {v15.4s},[x4] +.Lcbc_4_blocks_dec: + cmp w2,#4 + b.lt 1f + ld4 {v4.4s,v5.4s,v6.4s,v7.4s},[x0] +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif + bl _vpsm4_ex_enc_4blks + ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 + zip1 v8.4s,v0.4s,v1.4s + zip2 v9.4s,v0.4s,v1.4s + zip1 v10.4s,v2.4s,v3.4s + zip2 v11.4s,v2.4s,v3.4s + zip1 v0.2d,v8.2d,v10.2d + zip2 v1.2d,v8.2d,v10.2d + zip1 v2.2d,v9.2d,v11.2d + zip2 v3.2d,v9.2d,v11.2d + eor v0.16b,v0.16b,v15.16b + eor v1.16b,v1.16b,v4.16b + orr v15.16b,v7.16b,v7.16b + eor v2.16b,v2.16b,v5.16b + eor v3.16b,v3.16b,v6.16b + st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 + subs w2,w2,#4 + b.gt .Lcbc_4_blocks_dec + // save back IV + st1 {v7.4s}, [x4] + b 100f +1: // last block + subs w2,w2,#1 + b.lt 100f + b.gt 1f + ld1 {v4.4s},[x0],#16 + // save back IV + st1 {v4.4s}, [x4] +#ifndef __AARCH64EB__ + rev32 v8.16b,v4.16b +#else + mov v8.16b,v4.16b +#endif + mov x10,x3 + mov w11,#8 + mov w12,v8.s[0] + mov w13,v8.s[1] + mov w14,v8.s[2] + mov w15,v8.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v8.s[0],w15 + mov v8.s[1],w14 + mov v8.s[2],w13 + mov v8.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif + eor v8.16b,v8.16b,v15.16b + st1 {v8.4s},[x1],#16 + b 100f +1: // last two blocks + ld4 {v4.s,v5.s,v6.s,v7.s}[0],[x0] + add x10,x0,#16 + ld4 {v4.s,v5.s,v6.s,v7.s}[1],[x10],#16 + subs w2,w2,1 + b.gt 1f +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif + bl _vpsm4_ex_enc_4blks + ld1 {v4.4s,v5.4s},[x0],#32 + zip1 v8.4s,v0.4s,v1.4s + zip2 v9.4s,v0.4s,v1.4s + zip1 v10.4s,v2.4s,v3.4s + zip2 v11.4s,v2.4s,v3.4s + zip1 v0.2d,v8.2d,v10.2d + zip2 v1.2d,v8.2d,v10.2d + zip1 v2.2d,v9.2d,v11.2d + zip2 v3.2d,v9.2d,v11.2d + eor v0.16b,v0.16b,v15.16b + eor v1.16b,v1.16b,v4.16b + st1 {v0.4s,v1.4s},[x1],#32 + // save back IV + st1 {v5.4s}, [x4] + b 100f +1: // last 3 blocks + ld4 {v4.s,v5.s,v6.s,v7.s}[2],[x10] +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif + bl _vpsm4_ex_enc_4blks + ld1 {v4.4s,v5.4s,v6.4s},[x0],#48 + zip1 v8.4s,v0.4s,v1.4s + zip2 v9.4s,v0.4s,v1.4s + zip1 v10.4s,v2.4s,v3.4s + zip2 v11.4s,v2.4s,v3.4s + zip1 v0.2d,v8.2d,v10.2d + zip2 v1.2d,v8.2d,v10.2d + zip1 v2.2d,v9.2d,v11.2d + zip2 v3.2d,v9.2d,v11.2d + eor v0.16b,v0.16b,v15.16b + eor v1.16b,v1.16b,v4.16b + eor v2.16b,v2.16b,v5.16b + st1 {v0.4s,v1.4s,v2.4s},[x1],#48 + // save back IV + st1 {v6.4s}, [x4] +100: + ldp d10,d11,[sp,#16] + ldp d12,d13,[sp,#32] + ldp d14,d15,[sp,#48] + ldp x29,x30,[sp,#64] + ldp d8,d9,[sp],#80 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpsm4_ex_cbc_encrypt,.-vpsm4_ex_cbc_encrypt +.globl vpsm4_ex_ctr32_encrypt_blocks +.type vpsm4_ex_ctr32_encrypt_blocks,%function +.align 5 +vpsm4_ex_ctr32_encrypt_blocks: + AARCH64_VALID_CALL_TARGET + ld1 {v3.4s},[x4] +#ifndef __AARCH64EB__ + rev32 v3.16b,v3.16b +#endif + adrp x9, .Lsbox_magic + ldr q26, [x9, #:lo12:.Lsbox_magic] + ldr q27, [x9, #:lo12:.Lsbox_magic+16] + ldr q28, [x9, #:lo12:.Lsbox_magic+32] + ldr q29, [x9, #:lo12:.Lsbox_magic+48] + ldr q30, [x9, #:lo12:.Lsbox_magic+64] + ldr q31, [x9, #:lo12:.Lsbox_magic+80] + cmp w2,#1 + b.ne 1f + // fast processing for one single block without + // context saving overhead + mov x10,x3 + mov w11,#8 + mov w12,v3.s[0] + mov w13,v3.s[1] + mov w14,v3.s[2] + mov w15,v3.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v3.s[0],w15 + mov v3.s[1],w14 + mov v3.s[2],w13 + mov v3.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v3.16b,v3.16b +#endif + ld1 {v4.4s},[x0] + eor v4.16b,v4.16b,v3.16b + st1 {v4.4s},[x1] + ret +1: + AARCH64_SIGN_LINK_REGISTER + stp d8,d9,[sp,#-80]! + stp d10,d11,[sp,#16] + stp d12,d13,[sp,#32] + stp d14,d15,[sp,#48] + stp x29,x30,[sp,#64] + mov w12,v3.s[0] + mov w13,v3.s[1] + mov w14,v3.s[2] + mov w5,v3.s[3] +.Lctr32_4_blocks_process: + cmp w2,#4 + b.lt 1f + dup v4.4s,w12 + dup v5.4s,w13 + dup v6.4s,w14 + mov v7.s[0],w5 + add w5,w5,#1 + mov v7.s[1],w5 + add w5,w5,#1 + mov v7.s[2],w5 + add w5,w5,#1 + mov v7.s[3],w5 + add w5,w5,#1 + cmp w2,#8 + b.ge .Lctr32_8_blocks_process + bl _vpsm4_ex_enc_4blks + ld4 {v12.4s,v13.4s,v14.4s,v15.4s},[x0],#64 + eor v0.16b,v0.16b,v12.16b + eor v1.16b,v1.16b,v13.16b + eor v2.16b,v2.16b,v14.16b + eor v3.16b,v3.16b,v15.16b + st4 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 + subs w2,w2,#4 + b.ne .Lctr32_4_blocks_process + b 100f +.Lctr32_8_blocks_process: + dup v8.4s,w12 + dup v9.4s,w13 + dup v10.4s,w14 + mov v11.s[0],w5 + add w5,w5,#1 + mov v11.s[1],w5 + add w5,w5,#1 + mov v11.s[2],w5 + add w5,w5,#1 + mov v11.s[3],w5 + add w5,w5,#1 + bl _vpsm4_ex_enc_8blks + ld4 {v12.4s,v13.4s,v14.4s,v15.4s},[x0],#64 + ld4 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64 + eor v0.16b,v0.16b,v12.16b + eor v1.16b,v1.16b,v13.16b + eor v2.16b,v2.16b,v14.16b + eor v3.16b,v3.16b,v15.16b + eor v4.16b,v4.16b,v8.16b + eor v5.16b,v5.16b,v9.16b + eor v6.16b,v6.16b,v10.16b + eor v7.16b,v7.16b,v11.16b + st4 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 + st4 {v4.4s,v5.4s,v6.4s,v7.4s},[x1],#64 + subs w2,w2,#8 + b.ne .Lctr32_4_blocks_process + b 100f +1: // last block processing + subs w2,w2,#1 + b.lt 100f + b.gt 1f + mov v3.s[0],w12 + mov v3.s[1],w13 + mov v3.s[2],w14 + mov v3.s[3],w5 + mov x10,x3 + mov w11,#8 + mov w12,v3.s[0] + mov w13,v3.s[1] + mov w14,v3.s[2] + mov w15,v3.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v3.s[0],w15 + mov v3.s[1],w14 + mov v3.s[2],w13 + mov v3.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v3.16b,v3.16b +#endif + ld1 {v4.4s},[x0] + eor v4.16b,v4.16b,v3.16b + st1 {v4.4s},[x1] + b 100f +1: // last 2 blocks processing + dup v4.4s,w12 + dup v5.4s,w13 + dup v6.4s,w14 + mov v7.s[0],w5 + add w5,w5,#1 + mov v7.s[1],w5 + subs w2,w2,#1 + b.ne 1f + bl _vpsm4_ex_enc_4blks + ld4 {v12.s,v13.s,v14.s,v15.s}[0],[x0],#16 + ld4 {v12.s,v13.s,v14.s,v15.s}[1],[x0],#16 + eor v0.16b,v0.16b,v12.16b + eor v1.16b,v1.16b,v13.16b + eor v2.16b,v2.16b,v14.16b + eor v3.16b,v3.16b,v15.16b + st4 {v0.s,v1.s,v2.s,v3.s}[0],[x1],#16 + st4 {v0.s,v1.s,v2.s,v3.s}[1],[x1],#16 + b 100f +1: // last 3 blocks processing + add w5,w5,#1 + mov v7.s[2],w5 + bl _vpsm4_ex_enc_4blks + ld4 {v12.s,v13.s,v14.s,v15.s}[0],[x0],#16 + ld4 {v12.s,v13.s,v14.s,v15.s}[1],[x0],#16 + ld4 {v12.s,v13.s,v14.s,v15.s}[2],[x0],#16 + eor v0.16b,v0.16b,v12.16b + eor v1.16b,v1.16b,v13.16b + eor v2.16b,v2.16b,v14.16b + eor v3.16b,v3.16b,v15.16b + st4 {v0.s,v1.s,v2.s,v3.s}[0],[x1],#16 + st4 {v0.s,v1.s,v2.s,v3.s}[1],[x1],#16 + st4 {v0.s,v1.s,v2.s,v3.s}[2],[x1],#16 +100: + ldp d10,d11,[sp,#16] + ldp d12,d13,[sp,#32] + ldp d14,d15,[sp,#48] + ldp x29,x30,[sp,#64] + ldp d8,d9,[sp],#80 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpsm4_ex_ctr32_encrypt_blocks,.-vpsm4_ex_ctr32_encrypt_blocks +.globl vpsm4_ex_xts_encrypt_gb +.type vpsm4_ex_xts_encrypt_gb,%function +.align 5 +vpsm4_ex_xts_encrypt_gb: + AARCH64_SIGN_LINK_REGISTER + stp x15, x16, [sp, #-0x10]! + stp x17, x18, [sp, #-0x10]! + stp x19, x20, [sp, #-0x10]! + stp x21, x22, [sp, #-0x10]! + stp x23, x24, [sp, #-0x10]! + stp x25, x26, [sp, #-0x10]! + stp x27, x28, [sp, #-0x10]! + stp x29, x30, [sp, #-0x10]! + stp d8, d9, [sp, #-0x10]! + stp d10, d11, [sp, #-0x10]! + stp d12, d13, [sp, #-0x10]! + stp d14, d15, [sp, #-0x10]! + mov x26,x3 + mov x27,x4 + mov w28,w6 + ld1 {v16.4s}, [x5] + mov x3,x27 + adrp x9, .Lsbox_magic + ldr q26, [x9, #:lo12:.Lsbox_magic] + ldr q27, [x9, #:lo12:.Lsbox_magic+16] + ldr q28, [x9, #:lo12:.Lsbox_magic+32] + ldr q29, [x9, #:lo12:.Lsbox_magic+48] + ldr q30, [x9, #:lo12:.Lsbox_magic+64] + ldr q31, [x9, #:lo12:.Lsbox_magic+80] +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif + mov x10,x3 + mov w11,#8 + mov w12,v16.s[0] + mov w13,v16.s[1] + mov w14,v16.s[2] + mov w15,v16.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v16.s[0],w15 + mov v16.s[1],w14 + mov v16.s[2],w13 + mov v16.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif + mov x3,x26 + and x29,x2,#0x0F + // convert length into blocks + lsr x2,x2,4 + cmp x2,#1 + b.lt .return_gb + + cmp x29,0 + // If the encryption/decryption Length is N times of 16, + // the all blocks are encrypted/decrypted in .xts_encrypt_blocks_gb + b.eq .xts_encrypt_blocks_gb + + // If the encryption/decryption length is not N times of 16, + // the last two blocks are encrypted/decrypted in .last_2blks_tweak_gb or .only_2blks_tweak_gb + // the other blocks are encrypted/decrypted in .xts_encrypt_blocks_gb + subs x2,x2,#1 + b.eq .only_2blks_tweak_gb +.xts_encrypt_blocks_gb: + rbit v16.16b,v16.16b +#ifdef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif + mov x12,v16.d[0] + mov x13,v16.d[1] + mov w7,0x87 + extr x9,x13,x13,#32 + extr x15,x13,x12,#63 + and w8,w7,w9,asr#31 + eor x14,x8,x12,lsl#1 + mov w7,0x87 + extr x9,x15,x15,#32 + extr x17,x15,x14,#63 + and w8,w7,w9,asr#31 + eor x16,x8,x14,lsl#1 + mov w7,0x87 + extr x9,x17,x17,#32 + extr x19,x17,x16,#63 + and w8,w7,w9,asr#31 + eor x18,x8,x16,lsl#1 + mov w7,0x87 + extr x9,x19,x19,#32 + extr x21,x19,x18,#63 + and w8,w7,w9,asr#31 + eor x20,x8,x18,lsl#1 + mov w7,0x87 + extr x9,x21,x21,#32 + extr x23,x21,x20,#63 + and w8,w7,w9,asr#31 + eor x22,x8,x20,lsl#1 + mov w7,0x87 + extr x9,x23,x23,#32 + extr x25,x23,x22,#63 + and w8,w7,w9,asr#31 + eor x24,x8,x22,lsl#1 + mov w7,0x87 + extr x9,x25,x25,#32 + extr x27,x25,x24,#63 + and w8,w7,w9,asr#31 + eor x26,x8,x24,lsl#1 +.Lxts_8_blocks_process_gb: + cmp x2,#8 + mov v16.d[0],x12 + mov v16.d[1],x13 +#ifdef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif + mov w7,0x87 + extr x9,x27,x27,#32 + extr x13,x27,x26,#63 + and w8,w7,w9,asr#31 + eor x12,x8,x26,lsl#1 + mov v17.d[0],x14 + mov v17.d[1],x15 +#ifdef __AARCH64EB__ + rev32 v17.16b,v17.16b +#endif + mov w7,0x87 + extr x9,x13,x13,#32 + extr x15,x13,x12,#63 + and w8,w7,w9,asr#31 + eor x14,x8,x12,lsl#1 + mov v18.d[0],x16 + mov v18.d[1],x17 +#ifdef __AARCH64EB__ + rev32 v18.16b,v18.16b +#endif + mov w7,0x87 + extr x9,x15,x15,#32 + extr x17,x15,x14,#63 + and w8,w7,w9,asr#31 + eor x16,x8,x14,lsl#1 + mov v19.d[0],x18 + mov v19.d[1],x19 +#ifdef __AARCH64EB__ + rev32 v19.16b,v19.16b +#endif + mov w7,0x87 + extr x9,x17,x17,#32 + extr x19,x17,x16,#63 + and w8,w7,w9,asr#31 + eor x18,x8,x16,lsl#1 + mov v20.d[0],x20 + mov v20.d[1],x21 +#ifdef __AARCH64EB__ + rev32 v20.16b,v20.16b +#endif + mov w7,0x87 + extr x9,x19,x19,#32 + extr x21,x19,x18,#63 + and w8,w7,w9,asr#31 + eor x20,x8,x18,lsl#1 + mov v21.d[0],x22 + mov v21.d[1],x23 +#ifdef __AARCH64EB__ + rev32 v21.16b,v21.16b +#endif + mov w7,0x87 + extr x9,x21,x21,#32 + extr x23,x21,x20,#63 + and w8,w7,w9,asr#31 + eor x22,x8,x20,lsl#1 + mov v22.d[0],x24 + mov v22.d[1],x25 +#ifdef __AARCH64EB__ + rev32 v22.16b,v22.16b +#endif + mov w7,0x87 + extr x9,x23,x23,#32 + extr x25,x23,x22,#63 + and w8,w7,w9,asr#31 + eor x24,x8,x22,lsl#1 + mov v23.d[0],x26 + mov v23.d[1],x27 +#ifdef __AARCH64EB__ + rev32 v23.16b,v23.16b +#endif + mov w7,0x87 + extr x9,x25,x25,#32 + extr x27,x25,x24,#63 + and w8,w7,w9,asr#31 + eor x26,x8,x24,lsl#1 + b.lt .Lxts_4_blocks_process_gb + ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 + rbit v16.16b,v16.16b + rbit v17.16b,v17.16b + rbit v18.16b,v18.16b + rbit v19.16b,v19.16b + eor v4.16b, v4.16b, v16.16b + eor v5.16b, v5.16b, v17.16b + eor v6.16b, v6.16b, v18.16b + eor v7.16b, v7.16b, v19.16b + ld1 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64 + rbit v20.16b,v20.16b + rbit v21.16b,v21.16b + rbit v22.16b,v22.16b + rbit v23.16b,v23.16b + eor v8.16b, v8.16b, v20.16b + eor v9.16b, v9.16b, v21.16b + eor v10.16b, v10.16b, v22.16b + eor v11.16b, v11.16b, v23.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif +#ifndef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif +#ifndef __AARCH64EB__ + rev32 v9.16b,v9.16b +#endif +#ifndef __AARCH64EB__ + rev32 v10.16b,v10.16b +#endif +#ifndef __AARCH64EB__ + rev32 v11.16b,v11.16b +#endif + zip1 v0.4s,v4.4s,v5.4s + zip2 v1.4s,v4.4s,v5.4s + zip1 v2.4s,v6.4s,v7.4s + zip2 v3.4s,v6.4s,v7.4s + zip1 v4.2d,v0.2d,v2.2d + zip2 v5.2d,v0.2d,v2.2d + zip1 v6.2d,v1.2d,v3.2d + zip2 v7.2d,v1.2d,v3.2d + zip1 v0.4s,v8.4s,v9.4s + zip2 v1.4s,v8.4s,v9.4s + zip1 v2.4s,v10.4s,v11.4s + zip2 v3.4s,v10.4s,v11.4s + zip1 v8.2d,v0.2d,v2.2d + zip2 v9.2d,v0.2d,v2.2d + zip1 v10.2d,v1.2d,v3.2d + zip2 v11.2d,v1.2d,v3.2d + bl _vpsm4_ex_enc_8blks + zip1 v8.4s,v0.4s,v1.4s + zip2 v9.4s,v0.4s,v1.4s + zip1 v10.4s,v2.4s,v3.4s + zip2 v11.4s,v2.4s,v3.4s + zip1 v0.2d,v8.2d,v10.2d + zip2 v1.2d,v8.2d,v10.2d + zip1 v2.2d,v9.2d,v11.2d + zip2 v3.2d,v9.2d,v11.2d + zip1 v8.4s,v4.4s,v5.4s + zip2 v9.4s,v4.4s,v5.4s + zip1 v10.4s,v6.4s,v7.4s + zip2 v11.4s,v6.4s,v7.4s + zip1 v4.2d,v8.2d,v10.2d + zip2 v5.2d,v8.2d,v10.2d + zip1 v6.2d,v9.2d,v11.2d + zip2 v7.2d,v9.2d,v11.2d + eor v0.16b, v0.16b, v16.16b + eor v1.16b, v1.16b, v17.16b + eor v2.16b, v2.16b, v18.16b + eor v3.16b, v3.16b, v19.16b + eor v4.16b, v4.16b, v20.16b + eor v5.16b, v5.16b, v21.16b + eor v6.16b, v6.16b, v22.16b + eor v7.16b, v7.16b, v23.16b + + // save the last tweak + mov v25.16b,v23.16b + st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 + st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x1],#64 + subs x2,x2,#8 + b.gt .Lxts_8_blocks_process_gb + b 100f +.Lxts_4_blocks_process_gb: + cmp x2,#4 + b.lt 1f + ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 + rbit v16.16b,v16.16b + rbit v17.16b,v17.16b + rbit v18.16b,v18.16b + rbit v19.16b,v19.16b + eor v4.16b, v4.16b, v16.16b + eor v5.16b, v5.16b, v17.16b + eor v6.16b, v6.16b, v18.16b + eor v7.16b, v7.16b, v19.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif + zip1 v0.4s,v4.4s,v5.4s + zip2 v1.4s,v4.4s,v5.4s + zip1 v2.4s,v6.4s,v7.4s + zip2 v3.4s,v6.4s,v7.4s + zip1 v4.2d,v0.2d,v2.2d + zip2 v5.2d,v0.2d,v2.2d + zip1 v6.2d,v1.2d,v3.2d + zip2 v7.2d,v1.2d,v3.2d + bl _vpsm4_ex_enc_4blks + zip1 v4.4s,v0.4s,v1.4s + zip2 v5.4s,v0.4s,v1.4s + zip1 v6.4s,v2.4s,v3.4s + zip2 v7.4s,v2.4s,v3.4s + zip1 v0.2d,v4.2d,v6.2d + zip2 v1.2d,v4.2d,v6.2d + zip1 v2.2d,v5.2d,v7.2d + zip2 v3.2d,v5.2d,v7.2d + eor v0.16b, v0.16b, v16.16b + eor v1.16b, v1.16b, v17.16b + eor v2.16b, v2.16b, v18.16b + eor v3.16b, v3.16b, v19.16b + st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 + sub x2,x2,#4 + mov v16.16b,v20.16b + mov v17.16b,v21.16b + mov v18.16b,v22.16b + // save the last tweak + mov v25.16b,v19.16b +1: + // process last block + cmp x2,#1 + b.lt 100f + b.gt 1f + ld1 {v4.4s},[x0],#16 + rbit v16.16b,v16.16b + eor v4.16b, v4.16b, v16.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + mov x10,x3 + mov w11,#8 + mov w12,v4.s[0] + mov w13,v4.s[1] + mov w14,v4.s[2] + mov w15,v4.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v4.s[0],w15 + mov v4.s[1],w14 + mov v4.s[2],w13 + mov v4.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + eor v4.16b, v4.16b, v16.16b + st1 {v4.4s},[x1],#16 + // save the last tweak + mov v25.16b,v16.16b + b 100f +1: // process last 2 blocks + cmp x2,#2 + b.gt 1f + ld1 {v4.4s,v5.4s},[x0],#32 + rbit v16.16b,v16.16b + rbit v17.16b,v17.16b + eor v4.16b, v4.16b, v16.16b + eor v5.16b, v5.16b, v17.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif + zip1 v0.4s,v4.4s,v5.4s + zip2 v1.4s,v4.4s,v5.4s + zip1 v2.4s,v6.4s,v7.4s + zip2 v3.4s,v6.4s,v7.4s + zip1 v4.2d,v0.2d,v2.2d + zip2 v5.2d,v0.2d,v2.2d + zip1 v6.2d,v1.2d,v3.2d + zip2 v7.2d,v1.2d,v3.2d + bl _vpsm4_ex_enc_4blks + zip1 v4.4s,v0.4s,v1.4s + zip2 v5.4s,v0.4s,v1.4s + zip1 v6.4s,v2.4s,v3.4s + zip2 v7.4s,v2.4s,v3.4s + zip1 v0.2d,v4.2d,v6.2d + zip2 v1.2d,v4.2d,v6.2d + zip1 v2.2d,v5.2d,v7.2d + zip2 v3.2d,v5.2d,v7.2d + eor v0.16b, v0.16b, v16.16b + eor v1.16b, v1.16b, v17.16b + st1 {v0.4s,v1.4s},[x1],#32 + // save the last tweak + mov v25.16b,v17.16b + b 100f +1: // process last 3 blocks + ld1 {v4.4s,v5.4s,v6.4s},[x0],#48 + rbit v16.16b,v16.16b + rbit v17.16b,v17.16b + rbit v18.16b,v18.16b + eor v4.16b, v4.16b, v16.16b + eor v5.16b, v5.16b, v17.16b + eor v6.16b, v6.16b, v18.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif + zip1 v0.4s,v4.4s,v5.4s + zip2 v1.4s,v4.4s,v5.4s + zip1 v2.4s,v6.4s,v7.4s + zip2 v3.4s,v6.4s,v7.4s + zip1 v4.2d,v0.2d,v2.2d + zip2 v5.2d,v0.2d,v2.2d + zip1 v6.2d,v1.2d,v3.2d + zip2 v7.2d,v1.2d,v3.2d + bl _vpsm4_ex_enc_4blks + zip1 v4.4s,v0.4s,v1.4s + zip2 v5.4s,v0.4s,v1.4s + zip1 v6.4s,v2.4s,v3.4s + zip2 v7.4s,v2.4s,v3.4s + zip1 v0.2d,v4.2d,v6.2d + zip2 v1.2d,v4.2d,v6.2d + zip1 v2.2d,v5.2d,v7.2d + zip2 v3.2d,v5.2d,v7.2d + eor v0.16b, v0.16b, v16.16b + eor v1.16b, v1.16b, v17.16b + eor v2.16b, v2.16b, v18.16b + st1 {v0.4s,v1.4s,v2.4s},[x1],#48 + // save the last tweak + mov v25.16b,v18.16b +100: + cmp x29,0 + b.eq .return_gb + +// This branch calculates the last two tweaks, +// while the encryption/decryption length is larger than 32 +.last_2blks_tweak_gb: +#ifdef __AARCH64EB__ + rev32 v25.16b,v25.16b +#endif + rbit v2.16b,v25.16b + adrp x9, .Lxts_magic + ldr q0, [x9, #:lo12:.Lxts_magic] + shl v17.16b, v2.16b, #1 + ext v1.16b, v2.16b, v2.16b,#15 + ushr v1.16b, v1.16b, #7 + mul v1.16b, v1.16b, v0.16b + eor v17.16b, v17.16b, v1.16b + rbit v17.16b,v17.16b + rbit v2.16b,v17.16b + adrp x9, .Lxts_magic + ldr q0, [x9, #:lo12:.Lxts_magic] + shl v18.16b, v2.16b, #1 + ext v1.16b, v2.16b, v2.16b,#15 + ushr v1.16b, v1.16b, #7 + mul v1.16b, v1.16b, v0.16b + eor v18.16b, v18.16b, v1.16b + rbit v18.16b,v18.16b + b .check_dec_gb + + +// This branch calculates the last two tweaks, +// while the encryption/decryption length is equal to 32, who only need two tweaks +.only_2blks_tweak_gb: + mov v17.16b,v16.16b +#ifdef __AARCH64EB__ + rev32 v17.16b,v17.16b +#endif + rbit v2.16b,v17.16b + adrp x9, .Lxts_magic + ldr q0, [x9, #:lo12:.Lxts_magic] + shl v18.16b, v2.16b, #1 + ext v1.16b, v2.16b, v2.16b,#15 + ushr v1.16b, v1.16b, #7 + mul v1.16b, v1.16b, v0.16b + eor v18.16b, v18.16b, v1.16b + rbit v18.16b,v18.16b + b .check_dec_gb + + +// Determine whether encryption or decryption is required. +// The last two tweaks need to be swapped for decryption. +.check_dec_gb: + // encryption:1 decryption:0 + cmp w28,1 + b.eq .process_last_2blks_gb + mov v0.16B,v17.16b + mov v17.16B,v18.16b + mov v18.16B,v0.16b + +.process_last_2blks_gb: +#ifdef __AARCH64EB__ + rev32 v17.16b,v17.16b +#endif +#ifdef __AARCH64EB__ + rev32 v18.16b,v18.16b +#endif + ld1 {v4.4s},[x0],#16 + eor v4.16b, v4.16b, v17.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + mov x10,x3 + mov w11,#8 + mov w12,v4.s[0] + mov w13,v4.s[1] + mov w14,v4.s[2] + mov w15,v4.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v4.s[0],w15 + mov v4.s[1],w14 + mov v4.s[2],w13 + mov v4.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + eor v4.16b, v4.16b, v17.16b + st1 {v4.4s},[x1],#16 + + sub x26,x1,16 +.loop_gb: + subs x29,x29,1 + ldrb w7,[x26,x29] + ldrb w8,[x0,x29] + strb w8,[x26,x29] + strb w7,[x1,x29] + b.gt .loop_gb + ld1 {v4.4s}, [x26] + eor v4.16b, v4.16b, v18.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + mov x10,x3 + mov w11,#8 + mov w12,v4.s[0] + mov w13,v4.s[1] + mov w14,v4.s[2] + mov w15,v4.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v4.s[0],w15 + mov v4.s[1],w14 + mov v4.s[2],w13 + mov v4.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + eor v4.16b, v4.16b, v18.16b + st1 {v4.4s}, [x26] +.return_gb: + ldp d14, d15, [sp], #0x10 + ldp d12, d13, [sp], #0x10 + ldp d10, d11, [sp], #0x10 + ldp d8, d9, [sp], #0x10 + ldp x29, x30, [sp], #0x10 + ldp x27, x28, [sp], #0x10 + ldp x25, x26, [sp], #0x10 + ldp x23, x24, [sp], #0x10 + ldp x21, x22, [sp], #0x10 + ldp x19, x20, [sp], #0x10 + ldp x17, x18, [sp], #0x10 + ldp x15, x16, [sp], #0x10 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpsm4_ex_xts_encrypt_gb,.-vpsm4_ex_xts_encrypt_gb +.globl vpsm4_ex_xts_encrypt +.type vpsm4_ex_xts_encrypt,%function +.align 5 +vpsm4_ex_xts_encrypt: + AARCH64_SIGN_LINK_REGISTER + stp x15, x16, [sp, #-0x10]! + stp x17, x18, [sp, #-0x10]! + stp x19, x20, [sp, #-0x10]! + stp x21, x22, [sp, #-0x10]! + stp x23, x24, [sp, #-0x10]! + stp x25, x26, [sp, #-0x10]! + stp x27, x28, [sp, #-0x10]! + stp x29, x30, [sp, #-0x10]! + stp d8, d9, [sp, #-0x10]! + stp d10, d11, [sp, #-0x10]! + stp d12, d13, [sp, #-0x10]! + stp d14, d15, [sp, #-0x10]! + mov x26,x3 + mov x27,x4 + mov w28,w6 + ld1 {v16.4s}, [x5] + mov x3,x27 + adrp x9, .Lsbox_magic + ldr q26, [x9, #:lo12:.Lsbox_magic] + ldr q27, [x9, #:lo12:.Lsbox_magic+16] + ldr q28, [x9, #:lo12:.Lsbox_magic+32] + ldr q29, [x9, #:lo12:.Lsbox_magic+48] + ldr q30, [x9, #:lo12:.Lsbox_magic+64] + ldr q31, [x9, #:lo12:.Lsbox_magic+80] +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif + mov x10,x3 + mov w11,#8 + mov w12,v16.s[0] + mov w13,v16.s[1] + mov w14,v16.s[2] + mov w15,v16.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v16.s[0],w15 + mov v16.s[1],w14 + mov v16.s[2],w13 + mov v16.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif + mov x3,x26 + and x29,x2,#0x0F + // convert length into blocks + lsr x2,x2,4 + cmp x2,#1 + b.lt .return + + cmp x29,0 + // If the encryption/decryption Length is N times of 16, + // the all blocks are encrypted/decrypted in .xts_encrypt_blocks + b.eq .xts_encrypt_blocks + + // If the encryption/decryption length is not N times of 16, + // the last two blocks are encrypted/decrypted in .last_2blks_tweak or .only_2blks_tweak + // the other blocks are encrypted/decrypted in .xts_encrypt_blocks + subs x2,x2,#1 + b.eq .only_2blks_tweak +.xts_encrypt_blocks: +#ifdef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif + mov x12,v16.d[0] + mov x13,v16.d[1] + mov w7,0x87 + extr x9,x13,x13,#32 + extr x15,x13,x12,#63 + and w8,w7,w9,asr#31 + eor x14,x8,x12,lsl#1 + mov w7,0x87 + extr x9,x15,x15,#32 + extr x17,x15,x14,#63 + and w8,w7,w9,asr#31 + eor x16,x8,x14,lsl#1 + mov w7,0x87 + extr x9,x17,x17,#32 + extr x19,x17,x16,#63 + and w8,w7,w9,asr#31 + eor x18,x8,x16,lsl#1 + mov w7,0x87 + extr x9,x19,x19,#32 + extr x21,x19,x18,#63 + and w8,w7,w9,asr#31 + eor x20,x8,x18,lsl#1 + mov w7,0x87 + extr x9,x21,x21,#32 + extr x23,x21,x20,#63 + and w8,w7,w9,asr#31 + eor x22,x8,x20,lsl#1 + mov w7,0x87 + extr x9,x23,x23,#32 + extr x25,x23,x22,#63 + and w8,w7,w9,asr#31 + eor x24,x8,x22,lsl#1 + mov w7,0x87 + extr x9,x25,x25,#32 + extr x27,x25,x24,#63 + and w8,w7,w9,asr#31 + eor x26,x8,x24,lsl#1 +.Lxts_8_blocks_process: + cmp x2,#8 + mov v16.d[0],x12 + mov v16.d[1],x13 +#ifdef __AARCH64EB__ + rev32 v16.16b,v16.16b +#endif + mov w7,0x87 + extr x9,x27,x27,#32 + extr x13,x27,x26,#63 + and w8,w7,w9,asr#31 + eor x12,x8,x26,lsl#1 + mov v17.d[0],x14 + mov v17.d[1],x15 +#ifdef __AARCH64EB__ + rev32 v17.16b,v17.16b +#endif + mov w7,0x87 + extr x9,x13,x13,#32 + extr x15,x13,x12,#63 + and w8,w7,w9,asr#31 + eor x14,x8,x12,lsl#1 + mov v18.d[0],x16 + mov v18.d[1],x17 +#ifdef __AARCH64EB__ + rev32 v18.16b,v18.16b +#endif + mov w7,0x87 + extr x9,x15,x15,#32 + extr x17,x15,x14,#63 + and w8,w7,w9,asr#31 + eor x16,x8,x14,lsl#1 + mov v19.d[0],x18 + mov v19.d[1],x19 +#ifdef __AARCH64EB__ + rev32 v19.16b,v19.16b +#endif + mov w7,0x87 + extr x9,x17,x17,#32 + extr x19,x17,x16,#63 + and w8,w7,w9,asr#31 + eor x18,x8,x16,lsl#1 + mov v20.d[0],x20 + mov v20.d[1],x21 +#ifdef __AARCH64EB__ + rev32 v20.16b,v20.16b +#endif + mov w7,0x87 + extr x9,x19,x19,#32 + extr x21,x19,x18,#63 + and w8,w7,w9,asr#31 + eor x20,x8,x18,lsl#1 + mov v21.d[0],x22 + mov v21.d[1],x23 +#ifdef __AARCH64EB__ + rev32 v21.16b,v21.16b +#endif + mov w7,0x87 + extr x9,x21,x21,#32 + extr x23,x21,x20,#63 + and w8,w7,w9,asr#31 + eor x22,x8,x20,lsl#1 + mov v22.d[0],x24 + mov v22.d[1],x25 +#ifdef __AARCH64EB__ + rev32 v22.16b,v22.16b +#endif + mov w7,0x87 + extr x9,x23,x23,#32 + extr x25,x23,x22,#63 + and w8,w7,w9,asr#31 + eor x24,x8,x22,lsl#1 + mov v23.d[0],x26 + mov v23.d[1],x27 +#ifdef __AARCH64EB__ + rev32 v23.16b,v23.16b +#endif + mov w7,0x87 + extr x9,x25,x25,#32 + extr x27,x25,x24,#63 + and w8,w7,w9,asr#31 + eor x26,x8,x24,lsl#1 + b.lt .Lxts_4_blocks_process + ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 + eor v4.16b, v4.16b, v16.16b + eor v5.16b, v5.16b, v17.16b + eor v6.16b, v6.16b, v18.16b + eor v7.16b, v7.16b, v19.16b + ld1 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64 + eor v8.16b, v8.16b, v20.16b + eor v9.16b, v9.16b, v21.16b + eor v10.16b, v10.16b, v22.16b + eor v11.16b, v11.16b, v23.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif +#ifndef __AARCH64EB__ + rev32 v8.16b,v8.16b +#endif +#ifndef __AARCH64EB__ + rev32 v9.16b,v9.16b +#endif +#ifndef __AARCH64EB__ + rev32 v10.16b,v10.16b +#endif +#ifndef __AARCH64EB__ + rev32 v11.16b,v11.16b +#endif + zip1 v0.4s,v4.4s,v5.4s + zip2 v1.4s,v4.4s,v5.4s + zip1 v2.4s,v6.4s,v7.4s + zip2 v3.4s,v6.4s,v7.4s + zip1 v4.2d,v0.2d,v2.2d + zip2 v5.2d,v0.2d,v2.2d + zip1 v6.2d,v1.2d,v3.2d + zip2 v7.2d,v1.2d,v3.2d + zip1 v0.4s,v8.4s,v9.4s + zip2 v1.4s,v8.4s,v9.4s + zip1 v2.4s,v10.4s,v11.4s + zip2 v3.4s,v10.4s,v11.4s + zip1 v8.2d,v0.2d,v2.2d + zip2 v9.2d,v0.2d,v2.2d + zip1 v10.2d,v1.2d,v3.2d + zip2 v11.2d,v1.2d,v3.2d + bl _vpsm4_ex_enc_8blks + zip1 v8.4s,v0.4s,v1.4s + zip2 v9.4s,v0.4s,v1.4s + zip1 v10.4s,v2.4s,v3.4s + zip2 v11.4s,v2.4s,v3.4s + zip1 v0.2d,v8.2d,v10.2d + zip2 v1.2d,v8.2d,v10.2d + zip1 v2.2d,v9.2d,v11.2d + zip2 v3.2d,v9.2d,v11.2d + zip1 v8.4s,v4.4s,v5.4s + zip2 v9.4s,v4.4s,v5.4s + zip1 v10.4s,v6.4s,v7.4s + zip2 v11.4s,v6.4s,v7.4s + zip1 v4.2d,v8.2d,v10.2d + zip2 v5.2d,v8.2d,v10.2d + zip1 v6.2d,v9.2d,v11.2d + zip2 v7.2d,v9.2d,v11.2d + eor v0.16b, v0.16b, v16.16b + eor v1.16b, v1.16b, v17.16b + eor v2.16b, v2.16b, v18.16b + eor v3.16b, v3.16b, v19.16b + eor v4.16b, v4.16b, v20.16b + eor v5.16b, v5.16b, v21.16b + eor v6.16b, v6.16b, v22.16b + eor v7.16b, v7.16b, v23.16b + + // save the last tweak + mov v25.16b,v23.16b + st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 + st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x1],#64 + subs x2,x2,#8 + b.gt .Lxts_8_blocks_process + b 100f +.Lxts_4_blocks_process: + cmp x2,#4 + b.lt 1f + ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 + eor v4.16b, v4.16b, v16.16b + eor v5.16b, v5.16b, v17.16b + eor v6.16b, v6.16b, v18.16b + eor v7.16b, v7.16b, v19.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif +#ifndef __AARCH64EB__ + rev32 v7.16b,v7.16b +#endif + zip1 v0.4s,v4.4s,v5.4s + zip2 v1.4s,v4.4s,v5.4s + zip1 v2.4s,v6.4s,v7.4s + zip2 v3.4s,v6.4s,v7.4s + zip1 v4.2d,v0.2d,v2.2d + zip2 v5.2d,v0.2d,v2.2d + zip1 v6.2d,v1.2d,v3.2d + zip2 v7.2d,v1.2d,v3.2d + bl _vpsm4_ex_enc_4blks + zip1 v4.4s,v0.4s,v1.4s + zip2 v5.4s,v0.4s,v1.4s + zip1 v6.4s,v2.4s,v3.4s + zip2 v7.4s,v2.4s,v3.4s + zip1 v0.2d,v4.2d,v6.2d + zip2 v1.2d,v4.2d,v6.2d + zip1 v2.2d,v5.2d,v7.2d + zip2 v3.2d,v5.2d,v7.2d + eor v0.16b, v0.16b, v16.16b + eor v1.16b, v1.16b, v17.16b + eor v2.16b, v2.16b, v18.16b + eor v3.16b, v3.16b, v19.16b + st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 + sub x2,x2,#4 + mov v16.16b,v20.16b + mov v17.16b,v21.16b + mov v18.16b,v22.16b + // save the last tweak + mov v25.16b,v19.16b +1: + // process last block + cmp x2,#1 + b.lt 100f + b.gt 1f + ld1 {v4.4s},[x0],#16 + eor v4.16b, v4.16b, v16.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + mov x10,x3 + mov w11,#8 + mov w12,v4.s[0] + mov w13,v4.s[1] + mov w14,v4.s[2] + mov w15,v4.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v4.s[0],w15 + mov v4.s[1],w14 + mov v4.s[2],w13 + mov v4.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + eor v4.16b, v4.16b, v16.16b + st1 {v4.4s},[x1],#16 + // save the last tweak + mov v25.16b,v16.16b + b 100f +1: // process last 2 blocks + cmp x2,#2 + b.gt 1f + ld1 {v4.4s,v5.4s},[x0],#32 + eor v4.16b, v4.16b, v16.16b + eor v5.16b, v5.16b, v17.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif + zip1 v0.4s,v4.4s,v5.4s + zip2 v1.4s,v4.4s,v5.4s + zip1 v2.4s,v6.4s,v7.4s + zip2 v3.4s,v6.4s,v7.4s + zip1 v4.2d,v0.2d,v2.2d + zip2 v5.2d,v0.2d,v2.2d + zip1 v6.2d,v1.2d,v3.2d + zip2 v7.2d,v1.2d,v3.2d + bl _vpsm4_ex_enc_4blks + zip1 v4.4s,v0.4s,v1.4s + zip2 v5.4s,v0.4s,v1.4s + zip1 v6.4s,v2.4s,v3.4s + zip2 v7.4s,v2.4s,v3.4s + zip1 v0.2d,v4.2d,v6.2d + zip2 v1.2d,v4.2d,v6.2d + zip1 v2.2d,v5.2d,v7.2d + zip2 v3.2d,v5.2d,v7.2d + eor v0.16b, v0.16b, v16.16b + eor v1.16b, v1.16b, v17.16b + st1 {v0.4s,v1.4s},[x1],#32 + // save the last tweak + mov v25.16b,v17.16b + b 100f +1: // process last 3 blocks + ld1 {v4.4s,v5.4s,v6.4s},[x0],#48 + eor v4.16b, v4.16b, v16.16b + eor v5.16b, v5.16b, v17.16b + eor v6.16b, v6.16b, v18.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif +#ifndef __AARCH64EB__ + rev32 v5.16b,v5.16b +#endif +#ifndef __AARCH64EB__ + rev32 v6.16b,v6.16b +#endif + zip1 v0.4s,v4.4s,v5.4s + zip2 v1.4s,v4.4s,v5.4s + zip1 v2.4s,v6.4s,v7.4s + zip2 v3.4s,v6.4s,v7.4s + zip1 v4.2d,v0.2d,v2.2d + zip2 v5.2d,v0.2d,v2.2d + zip1 v6.2d,v1.2d,v3.2d + zip2 v7.2d,v1.2d,v3.2d + bl _vpsm4_ex_enc_4blks + zip1 v4.4s,v0.4s,v1.4s + zip2 v5.4s,v0.4s,v1.4s + zip1 v6.4s,v2.4s,v3.4s + zip2 v7.4s,v2.4s,v3.4s + zip1 v0.2d,v4.2d,v6.2d + zip2 v1.2d,v4.2d,v6.2d + zip1 v2.2d,v5.2d,v7.2d + zip2 v3.2d,v5.2d,v7.2d + eor v0.16b, v0.16b, v16.16b + eor v1.16b, v1.16b, v17.16b + eor v2.16b, v2.16b, v18.16b + st1 {v0.4s,v1.4s,v2.4s},[x1],#48 + // save the last tweak + mov v25.16b,v18.16b +100: + cmp x29,0 + b.eq .return + +// This branch calculates the last two tweaks, +// while the encryption/decryption length is larger than 32 +.last_2blks_tweak: +#ifdef __AARCH64EB__ + rev32 v25.16b,v25.16b +#endif + mov v2.16b,v25.16b + adrp x9, .Lxts_magic + ldr q0, [x9, #:lo12:.Lxts_magic] + shl v17.16b, v2.16b, #1 + ext v1.16b, v2.16b, v2.16b,#15 + ushr v1.16b, v1.16b, #7 + mul v1.16b, v1.16b, v0.16b + eor v17.16b, v17.16b, v1.16b + mov v2.16b,v17.16b + adrp x9, .Lxts_magic + ldr q0, [x9, #:lo12:.Lxts_magic] + shl v18.16b, v2.16b, #1 + ext v1.16b, v2.16b, v2.16b,#15 + ushr v1.16b, v1.16b, #7 + mul v1.16b, v1.16b, v0.16b + eor v18.16b, v18.16b, v1.16b + b .check_dec + + +// This branch calculates the last two tweaks, +// while the encryption/decryption length is equal to 32, who only need two tweaks +.only_2blks_tweak: + mov v17.16b,v16.16b +#ifdef __AARCH64EB__ + rev32 v17.16b,v17.16b +#endif + mov v2.16b,v17.16b + adrp x9, .Lxts_magic + ldr q0, [x9, #:lo12:.Lxts_magic] + shl v18.16b, v2.16b, #1 + ext v1.16b, v2.16b, v2.16b,#15 + ushr v1.16b, v1.16b, #7 + mul v1.16b, v1.16b, v0.16b + eor v18.16b, v18.16b, v1.16b + b .check_dec + + +// Determine whether encryption or decryption is required. +// The last two tweaks need to be swapped for decryption. +.check_dec: + // encryption:1 decryption:0 + cmp w28,1 + b.eq .process_last_2blks + mov v0.16B,v17.16b + mov v17.16B,v18.16b + mov v18.16B,v0.16b + +.process_last_2blks: +#ifdef __AARCH64EB__ + rev32 v17.16b,v17.16b +#endif +#ifdef __AARCH64EB__ + rev32 v18.16b,v18.16b +#endif + ld1 {v4.4s},[x0],#16 + eor v4.16b, v4.16b, v17.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + mov x10,x3 + mov w11,#8 + mov w12,v4.s[0] + mov w13,v4.s[1] + mov w14,v4.s[2] + mov w15,v4.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v4.s[0],w15 + mov v4.s[1],w14 + mov v4.s[2],w13 + mov v4.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + eor v4.16b, v4.16b, v17.16b + st1 {v4.4s},[x1],#16 + + sub x26,x1,16 +.loop: + subs x29,x29,1 + ldrb w7,[x26,x29] + ldrb w8,[x0,x29] + strb w8,[x26,x29] + strb w7,[x1,x29] + b.gt .loop + ld1 {v4.4s}, [x26] + eor v4.16b, v4.16b, v18.16b +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + mov x10,x3 + mov w11,#8 + mov w12,v4.s[0] + mov w13,v4.s[1] + mov w14,v4.s[2] + mov w15,v4.s[3] +10: + ldp w7,w8,[x10],8 + // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) + eor w6,w14,w15 + eor w9,w7,w13 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w12,w12,w6 + // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) + eor w6,w14,w15 + eor w9,w12,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + ldp w7,w8,[x10],8 + eor w13,w13,w6 + // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) + eor w6,w12,w13 + eor w9,w7,w15 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w14,w14,w6 + // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) + eor w6,w12,w13 + eor w9,w14,w8 + eor w6,w6,w9 + mov v3.s[0],w6 + // optimize sbox using AESE instruction + tbl v0.16b, {v3.16b}, v26.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v28.16b}, v0.16b + tbl v2.16b, {v27.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v1.16b + aese v0.16b,v1.16b + ushr v2.16b, v0.16b, 4 + and v0.16b, v0.16b, v31.16b + tbl v0.16b, {v30.16b}, v0.16b + tbl v2.16b, {v29.16b}, v2.16b + eor v0.16b, v0.16b, v2.16b + + mov w7,v0.s[0] + eor w6,w7,w7,ror #32-2 + eor w6,w6,w7,ror #32-10 + eor w6,w6,w7,ror #32-18 + eor w6,w6,w7,ror #32-24 + eor w15,w15,w6 + subs w11,w11,#1 + b.ne 10b + mov v4.s[0],w15 + mov v4.s[1],w14 + mov v4.s[2],w13 + mov v4.s[3],w12 +#ifndef __AARCH64EB__ + rev32 v4.16b,v4.16b +#endif + eor v4.16b, v4.16b, v18.16b + st1 {v4.4s}, [x26] +.return: + ldp d14, d15, [sp], #0x10 + ldp d12, d13, [sp], #0x10 + ldp d10, d11, [sp], #0x10 + ldp d8, d9, [sp], #0x10 + ldp x29, x30, [sp], #0x10 + ldp x27, x28, [sp], #0x10 + ldp x25, x26, [sp], #0x10 + ldp x23, x24, [sp], #0x10 + ldp x21, x22, [sp], #0x10 + ldp x19, x20, [sp], #0x10 + ldp x17, x18, [sp], #0x10 + ldp x15, x16, [sp], #0x10 + AARCH64_VALIDATE_LINK_REGISTER + ret +.size vpsm4_ex_xts_encrypt,.-vpsm4_ex_xts_encrypt diff --git a/contrib/openssl-cmake/asm/crypto/sm4/sm4-riscv64-zvksed.S b/contrib/openssl-cmake/asm/crypto/sm4/sm4-riscv64-zvksed.S new file mode 100644 index 000000000000..c353c27e1c06 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/sm4/sm4-riscv64-zvksed.S @@ -0,0 +1,188 @@ +.text +.p2align 3 +.globl rv64i_zvksed_sm4_set_encrypt_key +.type rv64i_zvksed_sm4_set_encrypt_key,@function +rv64i_zvksed_sm4_set_encrypt_key: + .word 0xc1027057 + + # Load the user key + .word 33906823 + .word 1242865879 + + # Load the FK. + la t0, FK + .word 33743111 + + # Generate round keys. + .word 772866263 + .word 2249204215 # rk[0:3] + .word 2251334263 # rk[4:7] + .word 2252415735 # rk[8:11] + .word 2253497207 # rk[12:15] + .word 2254578679 # rk[16:19] + .word 2255660151 # rk[20:23] + .word 2256741623 # rk[24:27] + .word 2257823095 # rk[28:31] + + # Store round keys + .word 33939879 # rk[0:3] + addi a1, a1, 16 + .word 33940007 # rk[4:7] + addi a1, a1, 16 + .word 33940135 # rk[8:11] + addi a1, a1, 16 + .word 33940263 # rk[12:15] + addi a1, a1, 16 + .word 33940391 # rk[16:19] + addi a1, a1, 16 + .word 33940519 # rk[20:23] + addi a1, a1, 16 + .word 33940647 # rk[24:27] + addi a1, a1, 16 + .word 33940775 # rk[28:31] + + li a0, 1 + ret +.size rv64i_zvksed_sm4_set_encrypt_key,.-rv64i_zvksed_sm4_set_encrypt_key +.p2align 3 +.globl rv64i_zvksed_sm4_set_decrypt_key +.type rv64i_zvksed_sm4_set_decrypt_key,@function +rv64i_zvksed_sm4_set_decrypt_key: + .word 0xc1027057 + + # Load the user key + .word 33906823 + .word 1242865879 + + # Load the FK. + la t0, FK + .word 33743111 + + # Generate round keys. + .word 772866263 + .word 2249204215 # rk[0:3] + .word 2251334263 # rk[4:7] + .word 2252415735 # rk[8:11] + .word 2253497207 # rk[12:15] + .word 2254578679 # rk[16:19] + .word 2255660151 # rk[20:23] + .word 2256741623 # rk[24:27] + .word 2257823095 # rk[28:31] + + # Store round keys in reverse order + addi a1, a1, 12 + li t1, -4 + .word 174449959 # rk[31:28] + addi a1, a1, 16 + .word 174449831 # rk[27:24] + addi a1, a1, 16 + .word 174449703 # rk[23:20] + addi a1, a1, 16 + .word 174449575 # rk[19:16] + addi a1, a1, 16 + .word 174449447 # rk[15:12] + addi a1, a1, 16 + .word 174449319 # rk[11:8] + addi a1, a1, 16 + .word 174449191 # rk[7:4] + addi a1, a1, 16 + .word 174449063 # rk[3:0] + + li a0, 1 + ret +.size rv64i_zvksed_sm4_set_decrypt_key,.-rv64i_zvksed_sm4_set_decrypt_key +.p2align 3 +.globl rv64i_zvksed_sm4_encrypt +.type rv64i_zvksed_sm4_encrypt,@function +rv64i_zvksed_sm4_encrypt: + .word 0xc1027057 + + # Order of elements was adjusted in set_encrypt_key() + .word 33972487 # rk[0:3] + addi a2, a2, 16 + .word 33972615 # rk[4:7] + addi a2, a2, 16 + .word 33972743 # rk[8:11] + addi a2, a2, 16 + .word 33972871 # rk[12:15] + addi a2, a2, 16 + .word 33972999 # rk[16:19] + addi a2, a2, 16 + .word 33973127 # rk[20:23] + addi a2, a2, 16 + .word 33973255 # rk[24:27] + addi a2, a2, 16 + .word 33973383 # rk[28:31] + + # Load input data + .word 33906823 + .word 1242865879 + + # Encrypt with all keys + .word 2787647735 + .word 2788696311 + .word 2789744887 + .word 2790793463 + .word 2791842039 + .word 2792890615 + .word 2793939191 + .word 2794987767 + + # Save the ciphertext (in reverse element order) + .word 1242865879 + li t0, -4 + addi a1, a1, 12 + .word 173400231 + + ret +.size rv64i_zvksed_sm4_encrypt,.-rv64i_zvksed_sm4_encrypt +.p2align 3 +.globl rv64i_zvksed_sm4_decrypt +.type rv64i_zvksed_sm4_decrypt,@function +rv64i_zvksed_sm4_decrypt: + .word 0xc1027057 + + # Order of elements was adjusted in set_decrypt_key() + .word 33973383 # rk[31:28] + addi a2, a2, 16 + .word 33973255 # rk[27:24] + addi a2, a2, 16 + .word 33973127 # rk[23:20] + addi a2, a2, 16 + .word 33972999 # rk[19:16] + addi a2, a2, 16 + .word 33972871 # rk[15:11] + addi a2, a2, 16 + .word 33972743 # rk[11:8] + addi a2, a2, 16 + .word 33972615 # rk[7:4] + addi a2, a2, 16 + .word 33972487 # rk[3:0] + + # Load input data + .word 33906823 + .word 1242865879 + + # Encrypt with all keys + .word 2794987767 + .word 2793939191 + .word 2792890615 + .word 2791842039 + .word 2790793463 + .word 2789744887 + .word 2788696311 + .word 2787647735 + + # Save the ciphertext (in reverse element order) + .word 1242865879 + li t0, -4 + addi a1, a1, 12 + .word 173400231 + + ret +.size rv64i_zvksed_sm4_decrypt,.-rv64i_zvksed_sm4_decrypt +# Family Key (little-endian 32-bit chunks) +.p2align 3 +FK: + .word 0xA3B1BAC6, 0x56AA3350, 0x677D9197, 0xB27022DC +.size FK,.-FK diff --git a/contrib/openssl-cmake/asm/crypto/whrlpool/wp-x86_64.s b/contrib/openssl-cmake/asm/crypto/whrlpool/wp-x86_64.s new file mode 100644 index 000000000000..9f59c5d7efd4 --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/whrlpool/wp-x86_64.s @@ -0,0 +1,880 @@ +.text + +.globl whirlpool_block +.type whirlpool_block,@function +.align 16 +whirlpool_block: +.cfi_startproc + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + + subq $128+40,%rsp + andq $-64,%rsp + + leaq 128(%rsp),%r10 + movq %rdi,0(%r10) + movq %rsi,8(%r10) + movq %rdx,16(%r10) + movq %rax,32(%r10) +.cfi_escape 0x0f,0x06,0x77,0xa0,0x01,0x06,0x23,0x08 +.Lprologue: + + movq %r10,%rbx + leaq .Ltable(%rip),%rbp + + xorq %rcx,%rcx + xorq %rdx,%rdx + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 +.Louterloop: + movq %r8,0(%rsp) + movq %r9,8(%rsp) + movq %r10,16(%rsp) + movq %r11,24(%rsp) + movq %r12,32(%rsp) + movq %r13,40(%rsp) + movq %r14,48(%rsp) + movq %r15,56(%rsp) + xorq 0(%rsi),%r8 + xorq 8(%rsi),%r9 + xorq 16(%rsi),%r10 + xorq 24(%rsi),%r11 + xorq 32(%rsi),%r12 + xorq 40(%rsi),%r13 + xorq 48(%rsi),%r14 + xorq 56(%rsi),%r15 + movq %r8,64+0(%rsp) + movq %r9,64+8(%rsp) + movq %r10,64+16(%rsp) + movq %r11,64+24(%rsp) + movq %r12,64+32(%rsp) + movq %r13,64+40(%rsp) + movq %r14,64+48(%rsp) + movq %r15,64+56(%rsp) + xorq %rsi,%rsi + movq %rsi,24(%rbx) + jmp .Lround +.align 16 +.Lround: + movq 4096(%rbp,%rsi,8),%r8 + movl 0(%rsp),%eax + movl 4(%rsp),%ebx + movzbl %al,%ecx + movzbl %ah,%edx + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r8 + movq 7(%rbp,%rdi,8),%r9 + movl 0+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + movq 6(%rbp,%rsi,8),%r10 + movq 5(%rbp,%rdi,8),%r11 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + movq 4(%rbp,%rsi,8),%r12 + movq 3(%rbp,%rdi,8),%r13 + movl 0+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + movq 2(%rbp,%rsi,8),%r14 + movq 1(%rbp,%rdi,8),%r15 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r9 + xorq 7(%rbp,%rdi,8),%r10 + movl 8+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r11 + xorq 5(%rbp,%rdi,8),%r12 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r13 + xorq 3(%rbp,%rdi,8),%r14 + movl 8+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r15 + xorq 1(%rbp,%rdi,8),%r8 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r10 + xorq 7(%rbp,%rdi,8),%r11 + movl 16+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r12 + xorq 5(%rbp,%rdi,8),%r13 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r14 + xorq 3(%rbp,%rdi,8),%r15 + movl 16+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r8 + xorq 1(%rbp,%rdi,8),%r9 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r11 + xorq 7(%rbp,%rdi,8),%r12 + movl 24+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r13 + xorq 5(%rbp,%rdi,8),%r14 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r15 + xorq 3(%rbp,%rdi,8),%r8 + movl 24+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r9 + xorq 1(%rbp,%rdi,8),%r10 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r12 + xorq 7(%rbp,%rdi,8),%r13 + movl 32+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r14 + xorq 5(%rbp,%rdi,8),%r15 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r8 + xorq 3(%rbp,%rdi,8),%r9 + movl 32+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r10 + xorq 1(%rbp,%rdi,8),%r11 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r13 + xorq 7(%rbp,%rdi,8),%r14 + movl 40+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r15 + xorq 5(%rbp,%rdi,8),%r8 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r9 + xorq 3(%rbp,%rdi,8),%r10 + movl 40+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r11 + xorq 1(%rbp,%rdi,8),%r12 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r14 + xorq 7(%rbp,%rdi,8),%r15 + movl 48+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r8 + xorq 5(%rbp,%rdi,8),%r9 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r10 + xorq 3(%rbp,%rdi,8),%r11 + movl 48+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r12 + xorq 1(%rbp,%rdi,8),%r13 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r15 + xorq 7(%rbp,%rdi,8),%r8 + movl 56+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r9 + xorq 5(%rbp,%rdi,8),%r10 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r11 + xorq 3(%rbp,%rdi,8),%r12 + movl 56+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r13 + xorq 1(%rbp,%rdi,8),%r14 + movq %r8,0(%rsp) + movq %r9,8(%rsp) + movq %r10,16(%rsp) + movq %r11,24(%rsp) + movq %r12,32(%rsp) + movq %r13,40(%rsp) + movq %r14,48(%rsp) + movq %r15,56(%rsp) + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r8 + xorq 7(%rbp,%rdi,8),%r9 + movl 64+0+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r10 + xorq 5(%rbp,%rdi,8),%r11 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r12 + xorq 3(%rbp,%rdi,8),%r13 + movl 64+0+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r14 + xorq 1(%rbp,%rdi,8),%r15 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r9 + xorq 7(%rbp,%rdi,8),%r10 + movl 64+8+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r11 + xorq 5(%rbp,%rdi,8),%r12 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r13 + xorq 3(%rbp,%rdi,8),%r14 + movl 64+8+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r15 + xorq 1(%rbp,%rdi,8),%r8 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r10 + xorq 7(%rbp,%rdi,8),%r11 + movl 64+16+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r12 + xorq 5(%rbp,%rdi,8),%r13 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r14 + xorq 3(%rbp,%rdi,8),%r15 + movl 64+16+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r8 + xorq 1(%rbp,%rdi,8),%r9 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r11 + xorq 7(%rbp,%rdi,8),%r12 + movl 64+24+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r13 + xorq 5(%rbp,%rdi,8),%r14 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r15 + xorq 3(%rbp,%rdi,8),%r8 + movl 64+24+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r9 + xorq 1(%rbp,%rdi,8),%r10 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r12 + xorq 7(%rbp,%rdi,8),%r13 + movl 64+32+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r14 + xorq 5(%rbp,%rdi,8),%r15 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r8 + xorq 3(%rbp,%rdi,8),%r9 + movl 64+32+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r10 + xorq 1(%rbp,%rdi,8),%r11 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r13 + xorq 7(%rbp,%rdi,8),%r14 + movl 64+40+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r15 + xorq 5(%rbp,%rdi,8),%r8 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r9 + xorq 3(%rbp,%rdi,8),%r10 + movl 64+40+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r11 + xorq 1(%rbp,%rdi,8),%r12 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r14 + xorq 7(%rbp,%rdi,8),%r15 + movl 64+48+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r8 + xorq 5(%rbp,%rdi,8),%r9 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r10 + xorq 3(%rbp,%rdi,8),%r11 + movl 64+48+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r12 + xorq 1(%rbp,%rdi,8),%r13 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r15 + xorq 7(%rbp,%rdi,8),%r8 + + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r9 + xorq 5(%rbp,%rdi,8),%r10 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r11 + xorq 3(%rbp,%rdi,8),%r12 + + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r13 + xorq 1(%rbp,%rdi,8),%r14 + leaq 128(%rsp),%rbx + movq 24(%rbx),%rsi + addq $1,%rsi + cmpq $10,%rsi + je .Lroundsdone + + movq %rsi,24(%rbx) + movq %r8,64+0(%rsp) + movq %r9,64+8(%rsp) + movq %r10,64+16(%rsp) + movq %r11,64+24(%rsp) + movq %r12,64+32(%rsp) + movq %r13,64+40(%rsp) + movq %r14,64+48(%rsp) + movq %r15,64+56(%rsp) + jmp .Lround +.align 16 +.Lroundsdone: + movq 0(%rbx),%rdi + movq 8(%rbx),%rsi + movq 16(%rbx),%rax + xorq 0(%rsi),%r8 + xorq 8(%rsi),%r9 + xorq 16(%rsi),%r10 + xorq 24(%rsi),%r11 + xorq 32(%rsi),%r12 + xorq 40(%rsi),%r13 + xorq 48(%rsi),%r14 + xorq 56(%rsi),%r15 + xorq 0(%rdi),%r8 + xorq 8(%rdi),%r9 + xorq 16(%rdi),%r10 + xorq 24(%rdi),%r11 + xorq 32(%rdi),%r12 + xorq 40(%rdi),%r13 + xorq 48(%rdi),%r14 + xorq 56(%rdi),%r15 + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + leaq 64(%rsi),%rsi + subq $1,%rax + jz .Lalldone + movq %rsi,8(%rbx) + movq %rax,16(%rbx) + jmp .Louterloop +.Lalldone: + movq 32(%rbx),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue: + .byte 0xf3,0xc3 +.cfi_endproc +.size whirlpool_block,.-whirlpool_block + +.section .rodata +.align 64 +.type .Ltable,@object +.Ltable: +.byte 24,24,96,24,192,120,48,216,24,24,96,24,192,120,48,216 +.byte 35,35,140,35,5,175,70,38,35,35,140,35,5,175,70,38 +.byte 198,198,63,198,126,249,145,184,198,198,63,198,126,249,145,184 +.byte 232,232,135,232,19,111,205,251,232,232,135,232,19,111,205,251 +.byte 135,135,38,135,76,161,19,203,135,135,38,135,76,161,19,203 +.byte 184,184,218,184,169,98,109,17,184,184,218,184,169,98,109,17 +.byte 1,1,4,1,8,5,2,9,1,1,4,1,8,5,2,9 +.byte 79,79,33,79,66,110,158,13,79,79,33,79,66,110,158,13 +.byte 54,54,216,54,173,238,108,155,54,54,216,54,173,238,108,155 +.byte 166,166,162,166,89,4,81,255,166,166,162,166,89,4,81,255 +.byte 210,210,111,210,222,189,185,12,210,210,111,210,222,189,185,12 +.byte 245,245,243,245,251,6,247,14,245,245,243,245,251,6,247,14 +.byte 121,121,249,121,239,128,242,150,121,121,249,121,239,128,242,150 +.byte 111,111,161,111,95,206,222,48,111,111,161,111,95,206,222,48 +.byte 145,145,126,145,252,239,63,109,145,145,126,145,252,239,63,109 +.byte 82,82,85,82,170,7,164,248,82,82,85,82,170,7,164,248 +.byte 96,96,157,96,39,253,192,71,96,96,157,96,39,253,192,71 +.byte 188,188,202,188,137,118,101,53,188,188,202,188,137,118,101,53 +.byte 155,155,86,155,172,205,43,55,155,155,86,155,172,205,43,55 +.byte 142,142,2,142,4,140,1,138,142,142,2,142,4,140,1,138 +.byte 163,163,182,163,113,21,91,210,163,163,182,163,113,21,91,210 +.byte 12,12,48,12,96,60,24,108,12,12,48,12,96,60,24,108 +.byte 123,123,241,123,255,138,246,132,123,123,241,123,255,138,246,132 +.byte 53,53,212,53,181,225,106,128,53,53,212,53,181,225,106,128 +.byte 29,29,116,29,232,105,58,245,29,29,116,29,232,105,58,245 +.byte 224,224,167,224,83,71,221,179,224,224,167,224,83,71,221,179 +.byte 215,215,123,215,246,172,179,33,215,215,123,215,246,172,179,33 +.byte 194,194,47,194,94,237,153,156,194,194,47,194,94,237,153,156 +.byte 46,46,184,46,109,150,92,67,46,46,184,46,109,150,92,67 +.byte 75,75,49,75,98,122,150,41,75,75,49,75,98,122,150,41 +.byte 254,254,223,254,163,33,225,93,254,254,223,254,163,33,225,93 +.byte 87,87,65,87,130,22,174,213,87,87,65,87,130,22,174,213 +.byte 21,21,84,21,168,65,42,189,21,21,84,21,168,65,42,189 +.byte 119,119,193,119,159,182,238,232,119,119,193,119,159,182,238,232 +.byte 55,55,220,55,165,235,110,146,55,55,220,55,165,235,110,146 +.byte 229,229,179,229,123,86,215,158,229,229,179,229,123,86,215,158 +.byte 159,159,70,159,140,217,35,19,159,159,70,159,140,217,35,19 +.byte 240,240,231,240,211,23,253,35,240,240,231,240,211,23,253,35 +.byte 74,74,53,74,106,127,148,32,74,74,53,74,106,127,148,32 +.byte 218,218,79,218,158,149,169,68,218,218,79,218,158,149,169,68 +.byte 88,88,125,88,250,37,176,162,88,88,125,88,250,37,176,162 +.byte 201,201,3,201,6,202,143,207,201,201,3,201,6,202,143,207 +.byte 41,41,164,41,85,141,82,124,41,41,164,41,85,141,82,124 +.byte 10,10,40,10,80,34,20,90,10,10,40,10,80,34,20,90 +.byte 177,177,254,177,225,79,127,80,177,177,254,177,225,79,127,80 +.byte 160,160,186,160,105,26,93,201,160,160,186,160,105,26,93,201 +.byte 107,107,177,107,127,218,214,20,107,107,177,107,127,218,214,20 +.byte 133,133,46,133,92,171,23,217,133,133,46,133,92,171,23,217 +.byte 189,189,206,189,129,115,103,60,189,189,206,189,129,115,103,60 +.byte 93,93,105,93,210,52,186,143,93,93,105,93,210,52,186,143 +.byte 16,16,64,16,128,80,32,144,16,16,64,16,128,80,32,144 +.byte 244,244,247,244,243,3,245,7,244,244,247,244,243,3,245,7 +.byte 203,203,11,203,22,192,139,221,203,203,11,203,22,192,139,221 +.byte 62,62,248,62,237,198,124,211,62,62,248,62,237,198,124,211 +.byte 5,5,20,5,40,17,10,45,5,5,20,5,40,17,10,45 +.byte 103,103,129,103,31,230,206,120,103,103,129,103,31,230,206,120 +.byte 228,228,183,228,115,83,213,151,228,228,183,228,115,83,213,151 +.byte 39,39,156,39,37,187,78,2,39,39,156,39,37,187,78,2 +.byte 65,65,25,65,50,88,130,115,65,65,25,65,50,88,130,115 +.byte 139,139,22,139,44,157,11,167,139,139,22,139,44,157,11,167 +.byte 167,167,166,167,81,1,83,246,167,167,166,167,81,1,83,246 +.byte 125,125,233,125,207,148,250,178,125,125,233,125,207,148,250,178 +.byte 149,149,110,149,220,251,55,73,149,149,110,149,220,251,55,73 +.byte 216,216,71,216,142,159,173,86,216,216,71,216,142,159,173,86 +.byte 251,251,203,251,139,48,235,112,251,251,203,251,139,48,235,112 +.byte 238,238,159,238,35,113,193,205,238,238,159,238,35,113,193,205 +.byte 124,124,237,124,199,145,248,187,124,124,237,124,199,145,248,187 +.byte 102,102,133,102,23,227,204,113,102,102,133,102,23,227,204,113 +.byte 221,221,83,221,166,142,167,123,221,221,83,221,166,142,167,123 +.byte 23,23,92,23,184,75,46,175,23,23,92,23,184,75,46,175 +.byte 71,71,1,71,2,70,142,69,71,71,1,71,2,70,142,69 +.byte 158,158,66,158,132,220,33,26,158,158,66,158,132,220,33,26 +.byte 202,202,15,202,30,197,137,212,202,202,15,202,30,197,137,212 +.byte 45,45,180,45,117,153,90,88,45,45,180,45,117,153,90,88 +.byte 191,191,198,191,145,121,99,46,191,191,198,191,145,121,99,46 +.byte 7,7,28,7,56,27,14,63,7,7,28,7,56,27,14,63 +.byte 173,173,142,173,1,35,71,172,173,173,142,173,1,35,71,172 +.byte 90,90,117,90,234,47,180,176,90,90,117,90,234,47,180,176 +.byte 131,131,54,131,108,181,27,239,131,131,54,131,108,181,27,239 +.byte 51,51,204,51,133,255,102,182,51,51,204,51,133,255,102,182 +.byte 99,99,145,99,63,242,198,92,99,99,145,99,63,242,198,92 +.byte 2,2,8,2,16,10,4,18,2,2,8,2,16,10,4,18 +.byte 170,170,146,170,57,56,73,147,170,170,146,170,57,56,73,147 +.byte 113,113,217,113,175,168,226,222,113,113,217,113,175,168,226,222 +.byte 200,200,7,200,14,207,141,198,200,200,7,200,14,207,141,198 +.byte 25,25,100,25,200,125,50,209,25,25,100,25,200,125,50,209 +.byte 73,73,57,73,114,112,146,59,73,73,57,73,114,112,146,59 +.byte 217,217,67,217,134,154,175,95,217,217,67,217,134,154,175,95 +.byte 242,242,239,242,195,29,249,49,242,242,239,242,195,29,249,49 +.byte 227,227,171,227,75,72,219,168,227,227,171,227,75,72,219,168 +.byte 91,91,113,91,226,42,182,185,91,91,113,91,226,42,182,185 +.byte 136,136,26,136,52,146,13,188,136,136,26,136,52,146,13,188 +.byte 154,154,82,154,164,200,41,62,154,154,82,154,164,200,41,62 +.byte 38,38,152,38,45,190,76,11,38,38,152,38,45,190,76,11 +.byte 50,50,200,50,141,250,100,191,50,50,200,50,141,250,100,191 +.byte 176,176,250,176,233,74,125,89,176,176,250,176,233,74,125,89 +.byte 233,233,131,233,27,106,207,242,233,233,131,233,27,106,207,242 +.byte 15,15,60,15,120,51,30,119,15,15,60,15,120,51,30,119 +.byte 213,213,115,213,230,166,183,51,213,213,115,213,230,166,183,51 +.byte 128,128,58,128,116,186,29,244,128,128,58,128,116,186,29,244 +.byte 190,190,194,190,153,124,97,39,190,190,194,190,153,124,97,39 +.byte 205,205,19,205,38,222,135,235,205,205,19,205,38,222,135,235 +.byte 52,52,208,52,189,228,104,137,52,52,208,52,189,228,104,137 +.byte 72,72,61,72,122,117,144,50,72,72,61,72,122,117,144,50 +.byte 255,255,219,255,171,36,227,84,255,255,219,255,171,36,227,84 +.byte 122,122,245,122,247,143,244,141,122,122,245,122,247,143,244,141 +.byte 144,144,122,144,244,234,61,100,144,144,122,144,244,234,61,100 +.byte 95,95,97,95,194,62,190,157,95,95,97,95,194,62,190,157 +.byte 32,32,128,32,29,160,64,61,32,32,128,32,29,160,64,61 +.byte 104,104,189,104,103,213,208,15,104,104,189,104,103,213,208,15 +.byte 26,26,104,26,208,114,52,202,26,26,104,26,208,114,52,202 +.byte 174,174,130,174,25,44,65,183,174,174,130,174,25,44,65,183 +.byte 180,180,234,180,201,94,117,125,180,180,234,180,201,94,117,125 +.byte 84,84,77,84,154,25,168,206,84,84,77,84,154,25,168,206 +.byte 147,147,118,147,236,229,59,127,147,147,118,147,236,229,59,127 +.byte 34,34,136,34,13,170,68,47,34,34,136,34,13,170,68,47 +.byte 100,100,141,100,7,233,200,99,100,100,141,100,7,233,200,99 +.byte 241,241,227,241,219,18,255,42,241,241,227,241,219,18,255,42 +.byte 115,115,209,115,191,162,230,204,115,115,209,115,191,162,230,204 +.byte 18,18,72,18,144,90,36,130,18,18,72,18,144,90,36,130 +.byte 64,64,29,64,58,93,128,122,64,64,29,64,58,93,128,122 +.byte 8,8,32,8,64,40,16,72,8,8,32,8,64,40,16,72 +.byte 195,195,43,195,86,232,155,149,195,195,43,195,86,232,155,149 +.byte 236,236,151,236,51,123,197,223,236,236,151,236,51,123,197,223 +.byte 219,219,75,219,150,144,171,77,219,219,75,219,150,144,171,77 +.byte 161,161,190,161,97,31,95,192,161,161,190,161,97,31,95,192 +.byte 141,141,14,141,28,131,7,145,141,141,14,141,28,131,7,145 +.byte 61,61,244,61,245,201,122,200,61,61,244,61,245,201,122,200 +.byte 151,151,102,151,204,241,51,91,151,151,102,151,204,241,51,91 +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +.byte 207,207,27,207,54,212,131,249,207,207,27,207,54,212,131,249 +.byte 43,43,172,43,69,135,86,110,43,43,172,43,69,135,86,110 +.byte 118,118,197,118,151,179,236,225,118,118,197,118,151,179,236,225 +.byte 130,130,50,130,100,176,25,230,130,130,50,130,100,176,25,230 +.byte 214,214,127,214,254,169,177,40,214,214,127,214,254,169,177,40 +.byte 27,27,108,27,216,119,54,195,27,27,108,27,216,119,54,195 +.byte 181,181,238,181,193,91,119,116,181,181,238,181,193,91,119,116 +.byte 175,175,134,175,17,41,67,190,175,175,134,175,17,41,67,190 +.byte 106,106,181,106,119,223,212,29,106,106,181,106,119,223,212,29 +.byte 80,80,93,80,186,13,160,234,80,80,93,80,186,13,160,234 +.byte 69,69,9,69,18,76,138,87,69,69,9,69,18,76,138,87 +.byte 243,243,235,243,203,24,251,56,243,243,235,243,203,24,251,56 +.byte 48,48,192,48,157,240,96,173,48,48,192,48,157,240,96,173 +.byte 239,239,155,239,43,116,195,196,239,239,155,239,43,116,195,196 +.byte 63,63,252,63,229,195,126,218,63,63,252,63,229,195,126,218 +.byte 85,85,73,85,146,28,170,199,85,85,73,85,146,28,170,199 +.byte 162,162,178,162,121,16,89,219,162,162,178,162,121,16,89,219 +.byte 234,234,143,234,3,101,201,233,234,234,143,234,3,101,201,233 +.byte 101,101,137,101,15,236,202,106,101,101,137,101,15,236,202,106 +.byte 186,186,210,186,185,104,105,3,186,186,210,186,185,104,105,3 +.byte 47,47,188,47,101,147,94,74,47,47,188,47,101,147,94,74 +.byte 192,192,39,192,78,231,157,142,192,192,39,192,78,231,157,142 +.byte 222,222,95,222,190,129,161,96,222,222,95,222,190,129,161,96 +.byte 28,28,112,28,224,108,56,252,28,28,112,28,224,108,56,252 +.byte 253,253,211,253,187,46,231,70,253,253,211,253,187,46,231,70 +.byte 77,77,41,77,82,100,154,31,77,77,41,77,82,100,154,31 +.byte 146,146,114,146,228,224,57,118,146,146,114,146,228,224,57,118 +.byte 117,117,201,117,143,188,234,250,117,117,201,117,143,188,234,250 +.byte 6,6,24,6,48,30,12,54,6,6,24,6,48,30,12,54 +.byte 138,138,18,138,36,152,9,174,138,138,18,138,36,152,9,174 +.byte 178,178,242,178,249,64,121,75,178,178,242,178,249,64,121,75 +.byte 230,230,191,230,99,89,209,133,230,230,191,230,99,89,209,133 +.byte 14,14,56,14,112,54,28,126,14,14,56,14,112,54,28,126 +.byte 31,31,124,31,248,99,62,231,31,31,124,31,248,99,62,231 +.byte 98,98,149,98,55,247,196,85,98,98,149,98,55,247,196,85 +.byte 212,212,119,212,238,163,181,58,212,212,119,212,238,163,181,58 +.byte 168,168,154,168,41,50,77,129,168,168,154,168,41,50,77,129 +.byte 150,150,98,150,196,244,49,82,150,150,98,150,196,244,49,82 +.byte 249,249,195,249,155,58,239,98,249,249,195,249,155,58,239,98 +.byte 197,197,51,197,102,246,151,163,197,197,51,197,102,246,151,163 +.byte 37,37,148,37,53,177,74,16,37,37,148,37,53,177,74,16 +.byte 89,89,121,89,242,32,178,171,89,89,121,89,242,32,178,171 +.byte 132,132,42,132,84,174,21,208,132,132,42,132,84,174,21,208 +.byte 114,114,213,114,183,167,228,197,114,114,213,114,183,167,228,197 +.byte 57,57,228,57,213,221,114,236,57,57,228,57,213,221,114,236 +.byte 76,76,45,76,90,97,152,22,76,76,45,76,90,97,152,22 +.byte 94,94,101,94,202,59,188,148,94,94,101,94,202,59,188,148 +.byte 120,120,253,120,231,133,240,159,120,120,253,120,231,133,240,159 +.byte 56,56,224,56,221,216,112,229,56,56,224,56,221,216,112,229 +.byte 140,140,10,140,20,134,5,152,140,140,10,140,20,134,5,152 +.byte 209,209,99,209,198,178,191,23,209,209,99,209,198,178,191,23 +.byte 165,165,174,165,65,11,87,228,165,165,174,165,65,11,87,228 +.byte 226,226,175,226,67,77,217,161,226,226,175,226,67,77,217,161 +.byte 97,97,153,97,47,248,194,78,97,97,153,97,47,248,194,78 +.byte 179,179,246,179,241,69,123,66,179,179,246,179,241,69,123,66 +.byte 33,33,132,33,21,165,66,52,33,33,132,33,21,165,66,52 +.byte 156,156,74,156,148,214,37,8,156,156,74,156,148,214,37,8 +.byte 30,30,120,30,240,102,60,238,30,30,120,30,240,102,60,238 +.byte 67,67,17,67,34,82,134,97,67,67,17,67,34,82,134,97 +.byte 199,199,59,199,118,252,147,177,199,199,59,199,118,252,147,177 +.byte 252,252,215,252,179,43,229,79,252,252,215,252,179,43,229,79 +.byte 4,4,16,4,32,20,8,36,4,4,16,4,32,20,8,36 +.byte 81,81,89,81,178,8,162,227,81,81,89,81,178,8,162,227 +.byte 153,153,94,153,188,199,47,37,153,153,94,153,188,199,47,37 +.byte 109,109,169,109,79,196,218,34,109,109,169,109,79,196,218,34 +.byte 13,13,52,13,104,57,26,101,13,13,52,13,104,57,26,101 +.byte 250,250,207,250,131,53,233,121,250,250,207,250,131,53,233,121 +.byte 223,223,91,223,182,132,163,105,223,223,91,223,182,132,163,105 +.byte 126,126,229,126,215,155,252,169,126,126,229,126,215,155,252,169 +.byte 36,36,144,36,61,180,72,25,36,36,144,36,61,180,72,25 +.byte 59,59,236,59,197,215,118,254,59,59,236,59,197,215,118,254 +.byte 171,171,150,171,49,61,75,154,171,171,150,171,49,61,75,154 +.byte 206,206,31,206,62,209,129,240,206,206,31,206,62,209,129,240 +.byte 17,17,68,17,136,85,34,153,17,17,68,17,136,85,34,153 +.byte 143,143,6,143,12,137,3,131,143,143,6,143,12,137,3,131 +.byte 78,78,37,78,74,107,156,4,78,78,37,78,74,107,156,4 +.byte 183,183,230,183,209,81,115,102,183,183,230,183,209,81,115,102 +.byte 235,235,139,235,11,96,203,224,235,235,139,235,11,96,203,224 +.byte 60,60,240,60,253,204,120,193,60,60,240,60,253,204,120,193 +.byte 129,129,62,129,124,191,31,253,129,129,62,129,124,191,31,253 +.byte 148,148,106,148,212,254,53,64,148,148,106,148,212,254,53,64 +.byte 247,247,251,247,235,12,243,28,247,247,251,247,235,12,243,28 +.byte 185,185,222,185,161,103,111,24,185,185,222,185,161,103,111,24 +.byte 19,19,76,19,152,95,38,139,19,19,76,19,152,95,38,139 +.byte 44,44,176,44,125,156,88,81,44,44,176,44,125,156,88,81 +.byte 211,211,107,211,214,184,187,5,211,211,107,211,214,184,187,5 +.byte 231,231,187,231,107,92,211,140,231,231,187,231,107,92,211,140 +.byte 110,110,165,110,87,203,220,57,110,110,165,110,87,203,220,57 +.byte 196,196,55,196,110,243,149,170,196,196,55,196,110,243,149,170 +.byte 3,3,12,3,24,15,6,27,3,3,12,3,24,15,6,27 +.byte 86,86,69,86,138,19,172,220,86,86,69,86,138,19,172,220 +.byte 68,68,13,68,26,73,136,94,68,68,13,68,26,73,136,94 +.byte 127,127,225,127,223,158,254,160,127,127,225,127,223,158,254,160 +.byte 169,169,158,169,33,55,79,136,169,169,158,169,33,55,79,136 +.byte 42,42,168,42,77,130,84,103,42,42,168,42,77,130,84,103 +.byte 187,187,214,187,177,109,107,10,187,187,214,187,177,109,107,10 +.byte 193,193,35,193,70,226,159,135,193,193,35,193,70,226,159,135 +.byte 83,83,81,83,162,2,166,241,83,83,81,83,162,2,166,241 +.byte 220,220,87,220,174,139,165,114,220,220,87,220,174,139,165,114 +.byte 11,11,44,11,88,39,22,83,11,11,44,11,88,39,22,83 +.byte 157,157,78,157,156,211,39,1,157,157,78,157,156,211,39,1 +.byte 108,108,173,108,71,193,216,43,108,108,173,108,71,193,216,43 +.byte 49,49,196,49,149,245,98,164,49,49,196,49,149,245,98,164 +.byte 116,116,205,116,135,185,232,243,116,116,205,116,135,185,232,243 +.byte 246,246,255,246,227,9,241,21,246,246,255,246,227,9,241,21 +.byte 70,70,5,70,10,67,140,76,70,70,5,70,10,67,140,76 +.byte 172,172,138,172,9,38,69,165,172,172,138,172,9,38,69,165 +.byte 137,137,30,137,60,151,15,181,137,137,30,137,60,151,15,181 +.byte 20,20,80,20,160,68,40,180,20,20,80,20,160,68,40,180 +.byte 225,225,163,225,91,66,223,186,225,225,163,225,91,66,223,186 +.byte 22,22,88,22,176,78,44,166,22,22,88,22,176,78,44,166 +.byte 58,58,232,58,205,210,116,247,58,58,232,58,205,210,116,247 +.byte 105,105,185,105,111,208,210,6,105,105,185,105,111,208,210,6 +.byte 9,9,36,9,72,45,18,65,9,9,36,9,72,45,18,65 +.byte 112,112,221,112,167,173,224,215,112,112,221,112,167,173,224,215 +.byte 182,182,226,182,217,84,113,111,182,182,226,182,217,84,113,111 +.byte 208,208,103,208,206,183,189,30,208,208,103,208,206,183,189,30 +.byte 237,237,147,237,59,126,199,214,237,237,147,237,59,126,199,214 +.byte 204,204,23,204,46,219,133,226,204,204,23,204,46,219,133,226 +.byte 66,66,21,66,42,87,132,104,66,66,21,66,42,87,132,104 +.byte 152,152,90,152,180,194,45,44,152,152,90,152,180,194,45,44 +.byte 164,164,170,164,73,14,85,237,164,164,170,164,73,14,85,237 +.byte 40,40,160,40,93,136,80,117,40,40,160,40,93,136,80,117 +.byte 92,92,109,92,218,49,184,134,92,92,109,92,218,49,184,134 +.byte 248,248,199,248,147,63,237,107,248,248,199,248,147,63,237,107 +.byte 134,134,34,134,68,164,17,194,134,134,34,134,68,164,17,194 +.byte 24,35,198,232,135,184,1,79 +.byte 54,166,210,245,121,111,145,82 +.byte 96,188,155,142,163,12,123,53 +.byte 29,224,215,194,46,75,254,87 +.byte 21,119,55,229,159,240,74,218 +.byte 88,201,41,10,177,160,107,133 +.byte 189,93,16,244,203,62,5,103 +.byte 228,39,65,139,167,125,149,216 +.byte 251,238,124,102,221,23,71,158 +.byte 202,45,191,7,173,90,131,51 diff --git a/contrib/openssl-cmake/asm/crypto/x86_64cpuid.s b/contrib/openssl-cmake/asm/crypto/x86_64cpuid.s new file mode 100644 index 000000000000..3b40b616c91c --- /dev/null +++ b/contrib/openssl-cmake/asm/crypto/x86_64cpuid.s @@ -0,0 +1,517 @@ + + +.hidden OPENSSL_cpuid_setup +.section .init + call OPENSSL_cpuid_setup + +.hidden OPENSSL_ia32cap_P +.comm OPENSSL_ia32cap_P,40,4 +.text + +.globl OPENSSL_atomic_add +.type OPENSSL_atomic_add,@function +.align 16 +OPENSSL_atomic_add: +.cfi_startproc +.byte 243,15,30,250 + movl (%rdi),%eax +.Lspin: leaq (%rsi,%rax,1),%r8 +.byte 0xf0 + cmpxchgl %r8d,(%rdi) + jne .Lspin + movl %r8d,%eax +.byte 0x48,0x98 + .byte 0xf3,0xc3 +.cfi_endproc +.size OPENSSL_atomic_add,.-OPENSSL_atomic_add + +.globl OPENSSL_rdtsc +.type OPENSSL_rdtsc,@function +.align 16 +OPENSSL_rdtsc: +.cfi_startproc +.byte 243,15,30,250 + rdtsc + shlq $32,%rdx + orq %rdx,%rax + .byte 0xf3,0xc3 +.cfi_endproc +.size OPENSSL_rdtsc,.-OPENSSL_rdtsc + +.globl OPENSSL_ia32_cpuid +.type OPENSSL_ia32_cpuid,@function +.align 16 +OPENSSL_ia32_cpuid: +.cfi_startproc +.byte 243,15,30,250 + movq %rbx,%r8 +.cfi_register %rbx,%r8 + + xorl %eax,%eax + movq %rax,8(%rdi) + cpuid + movl %eax,%r11d + + xorl %eax,%eax + cmpl $0x756e6547,%ebx + setne %al + movl %eax,%r9d + cmpl $0x49656e69,%edx + setne %al + orl %eax,%r9d + cmpl $0x6c65746e,%ecx + setne %al + orl %eax,%r9d + jz .Lintel + + cmpl $0x68747541,%ebx + setne %al + movl %eax,%r10d + cmpl $0x69746E65,%edx + setne %al + orl %eax,%r10d + cmpl $0x444D4163,%ecx + setne %al + orl %eax,%r10d + jnz .Lintel + + + movl $0x80000000,%eax + cpuid + cmpl $0x80000001,%eax + jb .Lintel + movl %eax,%r10d + movl $0x80000001,%eax + cpuid + orl %ecx,%r9d + andl $0x00000801,%r9d + + cmpl $0x80000008,%r10d + jb .Lintel + + movl $0x80000008,%eax + cpuid + movzbq %cl,%r10 + incq %r10 + + movl $1,%eax + cpuid + btl $28,%edx + jnc .Lgeneric + shrl $16,%ebx + cmpb %r10b,%bl + ja .Lgeneric + andl $0xefffffff,%edx + jmp .Lgeneric + +.Lintel: + cmpl $4,%r11d + movl $-1,%r10d + jb .Lnocacheinfo + + movl $4,%eax + movl $0,%ecx + cpuid + movl %eax,%r10d + shrl $14,%r10d + andl $0xfff,%r10d + +.Lnocacheinfo: + movl $1,%eax + cpuid + movd %eax,%xmm0 + andl $0xbfefffff,%edx + cmpl $0,%r9d + jne .Lnotintel + orl $0x40000000,%edx + andb $15,%ah + cmpb $15,%ah + jne .LnotP4 + orl $0x00100000,%edx +.LnotP4: + cmpb $6,%ah + jne .Lnotintel + andl $0x0fff0ff0,%eax + cmpl $0x00050670,%eax + je .Lknights + cmpl $0x00080650,%eax + jne .Lnotintel +.Lknights: + andl $0xfbffffff,%ecx + +.Lnotintel: + btl $28,%edx + jnc .Lgeneric + andl $0xefffffff,%edx + cmpl $0,%r10d + je .Lgeneric + + orl $0x10000000,%edx + shrl $16,%ebx + cmpb $1,%bl + ja .Lgeneric + andl $0xefffffff,%edx +.Lgeneric: + andl $0x00000800,%r9d + andl $0xfffff7ff,%ecx + orl %ecx,%r9d + + movl %edx,%r10d + + cmpl $7,%r11d + jb .Lno_extended_info + movl $7,%eax + xorl %ecx,%ecx + cpuid + movd %eax,%xmm1 + btl $26,%r9d + jc .Lnotknights + andl $0xfff7ffff,%ebx +.Lnotknights: + movd %xmm0,%eax + andl $0x0fff0ff0,%eax + cmpl $0x00050650,%eax + jne .Lnotskylakex + andl $0xfffeffff,%ebx + + +.Lnotskylakex: + movl %ebx,8(%rdi) + movl %ecx,12(%rdi) + movl %edx,16(%rdi) + + movd %xmm1,%eax + cmpl $0x1,%eax + jb .Lno_extended_info + movl $0x7,%eax + movl $0x1,%ecx + cpuid + movl %eax,20(%rdi) + movl %edx,24(%rdi) + movl %ebx,28(%rdi) + movl %ecx,32(%rdi) + + andl $0x80000,%edx + cmpl $0x0,%edx + je .Lno_extended_info + movl $0x24,%eax + movl $0x0,%ecx + cpuid + movl %ebx,36(%rdi) + +.Lno_extended_info: + + btl $27,%r9d + jnc .Lclear_avx + xorl %ecx,%ecx +.byte 0x0f,0x01,0xd0 + andl $0xe6,%eax + cmpl $0xe6,%eax + je .Ldone + andl $0x3fdeffff,8(%rdi) + + + + + andl $6,%eax + cmpl $6,%eax + je .Ldone +.Lclear_avx: + andl $0xff7fffff,20(%rdi) + + + movl $0xefffe7ff,%eax + andl %eax,%r9d + movl $0x3fdeffdf,%eax + andl %eax,8(%rdi) +.Ldone: + shlq $32,%r9 + movl %r10d,%eax + movq %r8,%rbx +.cfi_restore %rbx + orq %r9,%rax + .byte 0xf3,0xc3 +.cfi_endproc +.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid + +.globl OPENSSL_cleanse +.type OPENSSL_cleanse,@function +.align 16 +OPENSSL_cleanse: +.cfi_startproc +.byte 243,15,30,250 + xorq %rax,%rax + cmpq $15,%rsi + jae .Lot + cmpq $0,%rsi + je .Lret +.Little: + movb %al,(%rdi) + subq $1,%rsi + leaq 1(%rdi),%rdi + jnz .Little +.Lret: + .byte 0xf3,0xc3 +.align 16 +.Lot: + testq $7,%rdi + jz .Laligned + movb %al,(%rdi) + leaq -1(%rsi),%rsi + leaq 1(%rdi),%rdi + jmp .Lot +.Laligned: + movq %rax,(%rdi) + leaq -8(%rsi),%rsi + testq $-8,%rsi + leaq 8(%rdi),%rdi + jnz .Laligned + cmpq $0,%rsi + jne .Little + .byte 0xf3,0xc3 +.cfi_endproc +.size OPENSSL_cleanse,.-OPENSSL_cleanse + +.globl CRYPTO_memcmp +.type CRYPTO_memcmp,@function +.align 16 +CRYPTO_memcmp: +.cfi_startproc +.byte 243,15,30,250 + xorq %rax,%rax + xorq %r10,%r10 + cmpq $0,%rdx + je .Lno_data + cmpq $16,%rdx + jne .Loop_cmp + movq (%rdi),%r10 + movq 8(%rdi),%r11 + movq $1,%rdx + xorq (%rsi),%r10 + xorq 8(%rsi),%r11 + orq %r11,%r10 + cmovnzq %rdx,%rax + .byte 0xf3,0xc3 + +.align 16 +.Loop_cmp: + movb (%rdi),%r10b + leaq 1(%rdi),%rdi + xorb (%rsi),%r10b + leaq 1(%rsi),%rsi + orb %r10b,%al + decq %rdx + jnz .Loop_cmp + negq %rax + shrq $63,%rax +.Lno_data: + .byte 0xf3,0xc3 +.cfi_endproc +.size CRYPTO_memcmp,.-CRYPTO_memcmp +.globl OPENSSL_wipe_cpu +.type OPENSSL_wipe_cpu,@function +.align 16 +OPENSSL_wipe_cpu: +.cfi_startproc +.byte 243,15,30,250 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 + pxor %xmm10,%xmm10 + pxor %xmm11,%xmm11 + pxor %xmm12,%xmm12 + pxor %xmm13,%xmm13 + pxor %xmm14,%xmm14 + pxor %xmm15,%xmm15 + xorq %rcx,%rcx + xorq %rdx,%rdx + xorq %rsi,%rsi + xorq %rdi,%rdi + xorq %r8,%r8 + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 + leaq 8(%rsp),%rax + .byte 0xf3,0xc3 +.cfi_endproc +.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu +.globl OPENSSL_instrument_bus +.type OPENSSL_instrument_bus,@function +.align 16 +OPENSSL_instrument_bus: +.cfi_startproc +.byte 243,15,30,250 + movq %rdi,%r10 + movq %rsi,%rcx + movq %rsi,%r11 + + rdtsc + movl %eax,%r8d + movl $0,%r9d + clflush (%r10) +.byte 0xf0 + addl %r9d,(%r10) + jmp .Loop +.align 16 +.Loop: rdtsc + movl %eax,%edx + subl %r8d,%eax + movl %edx,%r8d + movl %eax,%r9d + clflush (%r10) +.byte 0xf0 + addl %eax,(%r10) + leaq 4(%r10),%r10 + subq $1,%rcx + jnz .Loop + + movq %r11,%rax + .byte 0xf3,0xc3 +.cfi_endproc +.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus + +.globl OPENSSL_instrument_bus2 +.type OPENSSL_instrument_bus2,@function +.align 16 +OPENSSL_instrument_bus2: +.cfi_startproc +.byte 243,15,30,250 + movq %rdi,%r10 + movq %rsi,%rcx + movq %rdx,%r11 + movq %rcx,8(%rsp) + + rdtsc + movl %eax,%r8d + movl $0,%r9d + + clflush (%r10) +.byte 0xf0 + addl %r9d,(%r10) + + rdtsc + movl %eax,%edx + subl %r8d,%eax + movl %edx,%r8d + movl %eax,%r9d +.Loop2: + clflush (%r10) +.byte 0xf0 + addl %eax,(%r10) + + subq $1,%r11 + jz .Ldone2 + + rdtsc + movl %eax,%edx + subl %r8d,%eax + movl %edx,%r8d + cmpl %r9d,%eax + movl %eax,%r9d + movl $0,%edx + setne %dl + subq %rdx,%rcx + leaq (%r10,%rdx,4),%r10 + jnz .Loop2 + +.Ldone2: + movq 8(%rsp),%rax + subq %rcx,%rax + .byte 0xf3,0xc3 +.cfi_endproc +.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2 +.globl OPENSSL_ia32_rdrand_bytes +.type OPENSSL_ia32_rdrand_bytes,@function +.align 16 +OPENSSL_ia32_rdrand_bytes: +.cfi_startproc +.byte 243,15,30,250 + xorq %rax,%rax + cmpq $0,%rsi + je .Ldone_rdrand_bytes + + movq $8,%r11 +.Loop_rdrand_bytes: +.byte 73,15,199,242 + jc .Lbreak_rdrand_bytes + decq %r11 + jnz .Loop_rdrand_bytes + jmp .Ldone_rdrand_bytes + +.align 16 +.Lbreak_rdrand_bytes: + cmpq $8,%rsi + jb .Ltail_rdrand_bytes + movq %r10,(%rdi) + leaq 8(%rdi),%rdi + addq $8,%rax + subq $8,%rsi + jz .Ldone_rdrand_bytes + movq $8,%r11 + jmp .Loop_rdrand_bytes + +.align 16 +.Ltail_rdrand_bytes: + movb %r10b,(%rdi) + leaq 1(%rdi),%rdi + incq %rax + shrq $8,%r10 + decq %rsi + jnz .Ltail_rdrand_bytes + +.Ldone_rdrand_bytes: + xorq %r10,%r10 + .byte 0xf3,0xc3 +.cfi_endproc +.size OPENSSL_ia32_rdrand_bytes,.-OPENSSL_ia32_rdrand_bytes +.globl OPENSSL_ia32_rdseed_bytes +.type OPENSSL_ia32_rdseed_bytes,@function +.align 16 +OPENSSL_ia32_rdseed_bytes: +.cfi_startproc +.byte 243,15,30,250 + xorq %rax,%rax + cmpq $0,%rsi + je .Ldone_rdseed_bytes + + movq $8,%r11 +.Loop_rdseed_bytes: +.byte 73,15,199,250 + jc .Lbreak_rdseed_bytes + decq %r11 + jnz .Loop_rdseed_bytes + jmp .Ldone_rdseed_bytes + +.align 16 +.Lbreak_rdseed_bytes: + cmpq $8,%rsi + jb .Ltail_rdseed_bytes + movq %r10,(%rdi) + leaq 8(%rdi),%rdi + addq $8,%rax + subq $8,%rsi + jz .Ldone_rdseed_bytes + movq $8,%r11 + jmp .Loop_rdseed_bytes + +.align 16 +.Ltail_rdseed_bytes: + movb %r10b,(%rdi) + leaq 1(%rdi),%rdi + incq %rax + shrq $8,%r10 + decq %rsi + jnz .Ltail_rdseed_bytes + +.Ldone_rdseed_bytes: + xorq %r10,%r10 + .byte 0xf3,0xc3 +.cfi_endproc +.size OPENSSL_ia32_rdseed_bytes,.-OPENSSL_ia32_rdseed_bytes diff --git a/contrib/openssl-cmake/asm/generate_asm.sh b/contrib/openssl-cmake/asm/generate_asm.sh new file mode 100755 index 000000000000..ac37736f9e4e --- /dev/null +++ b/contrib/openssl-cmake/asm/generate_asm.sh @@ -0,0 +1,125 @@ +#!/usr/bin/env bash +set -euo pipefail + +OPENSSL_SOURCE_DIR="${1:-${OPENSSL_SOURCE_DIR:-$(pwd)/../../openssl}}" +OPENSSL_BINARY_DIR="${2:-${OPENSSL_BINARY_DIR:-$(pwd)}}" +CC="${CC:-cc}" + +mkdir_and_run() { + local file_in="$1" + local file_out="$2" + shift 2 + local args=("$@") + echo "Generating ${file_out}" + mkdir -p "$(dirname "$file_out")" + CC="$CC" perl "$file_in" "${args[@]}" "$file_out" +} + +declare -a jobs=( + # ARCH_AMD64 + "crypto/aes/asm/aes-x86_64.pl crypto/aes/aes-x86_64.s" + "crypto/aes/asm/aesni-mb-x86_64.pl crypto/aes/aesni-mb-x86_64.s" + "crypto/aes/asm/aesni-sha1-x86_64.pl crypto/aes/aesni-sha1-x86_64.s" + "crypto/aes/asm/aesni-sha256-x86_64.pl crypto/aes/aesni-sha256-x86_64.s" + "crypto/aes/asm/aesni-x86_64.pl crypto/aes/aesni-x86_64.s" + "crypto/aes/asm/aesni-xts-avx512.pl crypto/aes/aesni-xts-avx512.s" + "crypto/aes/asm/bsaes-x86_64.pl crypto/aes/bsaes-x86_64.s" + "crypto/aes/asm/vpaes-x86_64.pl crypto/aes/vpaes-x86_64.s" + "crypto/bn/asm/rsaz-2k-avx512.pl crypto/bn/rsaz-2k-avx512.s" + "crypto/bn/asm/rsaz-2k-avxifma.pl crypto/bn/rsaz-2k-avxifma.s" + "crypto/bn/asm/rsaz-3k-avx512.pl crypto/bn/rsaz-3k-avx512.s" + "crypto/bn/asm/rsaz-3k-avxifma.pl crypto/bn/rsaz-3k-avxifma.s" + "crypto/bn/asm/rsaz-4k-avx512.pl crypto/bn/rsaz-4k-avx512.s" + "crypto/bn/asm/rsaz-4k-avxifma.pl crypto/bn/rsaz-4k-avxifma.s" + "crypto/bn/asm/rsaz-avx2.pl crypto/bn/rsaz-avx2.s" + "crypto/bn/asm/rsaz-x86_64.pl crypto/bn/rsaz-x86_64.s" + "crypto/bn/asm/x86_64-gf2m.pl crypto/bn/x86_64-gf2m.s" + "crypto/bn/asm/x86_64-mont.pl crypto/bn/x86_64-mont.s" + "crypto/bn/asm/x86_64-mont5.pl crypto/bn/x86_64-mont5.s" + "crypto/camellia/asm/cmll-x86_64.pl crypto/camellia/cmll-x86_64.s" + "crypto/chacha/asm/chacha-x86_64.pl crypto/chacha/chacha-x86_64.s" + "crypto/ec/asm/ecp_nistz256-x86_64.pl crypto/ec/ecp_nistz256-x86_64.s" + "crypto/ec/asm/x25519-x86_64.pl crypto/ec/x25519-x86_64.s" + "crypto/md5/asm/md5-x86_64.pl crypto/md5/md5-x86_64.s" + "crypto/modes/asm/aes-gcm-avx512.pl crypto/modes/aes-gcm-avx512.s" + "crypto/modes/asm/aesni-gcm-x86_64.pl crypto/modes/aesni-gcm-x86_64.s" + "crypto/modes/asm/ghash-x86_64.pl crypto/modes/ghash-x86_64.s" + "crypto/poly1305/asm/poly1305-x86_64.pl crypto/poly1305/poly1305-x86_64.s" + "crypto/rc4/asm/rc4-md5-x86_64.pl crypto/rc4/rc4-md5-x86_64.s" + "crypto/rc4/asm/rc4-x86_64.pl crypto/rc4/rc4-x86_64.s" + "crypto/sha/asm/keccak1600-avx2.pl crypto/sha/keccak1600-avx2.s" + "crypto/sha/asm/keccak1600-avx512.pl crypto/sha/keccak1600-avx512.s" + "crypto/sha/asm/keccak1600-avx512vl.pl crypto/sha/keccak1600-avx512vl.s" + "crypto/sha/asm/keccak1600-x86_64.pl crypto/sha/keccak1600-x86_64.s" + "crypto/sha/asm/sha1-mb-x86_64.pl crypto/sha/sha1-mb-x86_64.s" + "crypto/sha/asm/sha1-x86_64.pl crypto/sha/sha1-x86_64.s" + "crypto/sha/asm/sha256-mb-x86_64.pl crypto/sha/sha256-mb-x86_64.s" + "crypto/sha/asm/sha512-x86_64.pl crypto/sha/sha256-x86_64.s" + "crypto/sha/asm/sha512-x86_64.pl crypto/sha/sha512-x86_64.s" + "crypto/whrlpool/asm/wp-x86_64.pl crypto/whrlpool/wp-x86_64.s" + "crypto/x86_64cpuid.pl crypto/x86_64cpuid.s" + + # ARCH_AARCH64 + "crypto/aes/asm/aesv8-armx.pl crypto/aes/aesv8-armx.S linux64" + "crypto/aes/asm/bsaes-armv8.pl crypto/aes/bsaes-armv8.S linux64" + "crypto/aes/asm/vpaes-armv8.pl crypto/aes/vpaes-armv8.S linux64" + "crypto/arm64cpuid.pl crypto/arm64cpuid.S linux64" + "crypto/bn/asm/armv8-mont.pl crypto/bn/armv8-mont.S linux64" + "crypto/chacha/asm/chacha-armv8-sve.pl crypto/chacha/chacha-armv8-sve.S linux64" + "crypto/chacha/asm/chacha-armv8.pl crypto/chacha/chacha-armv8.S linux64" + "crypto/ec/asm/ecp_nistz256-armv8.pl crypto/ec/ecp_nistz256-armv8.S linux64" + "crypto/ec/asm/ecp_sm2p256-armv8.pl crypto/ec/ecp_sm2p256-armv8.S linux64" + "crypto/md5/asm/md5-aarch64.pl crypto/md5/asm/md5-aarch64.S linux64" + "crypto/modes/asm/aes-gcm-armv8-unroll8_64.pl crypto/modes/asm/aes-gcm-armv8-unroll8_64.S linux64" + "crypto/modes/asm/aes-gcm-armv8_64.pl crypto/modes/asm/aes-gcm-armv8_64.S linux64" + "crypto/modes/asm/ghashv8-armx.pl crypto/modes/ghashv8-armx.S linux64" + "crypto/poly1305/asm/poly1305-armv8.pl crypto/poly1305/poly1305-armv8.S linux64" + "crypto/sha/asm/keccak1600-armv8.pl crypto/sha/keccak1600-armv8.S linux64" + "crypto/sha/asm/sha1-armv8.pl crypto/sha/sha1-armv8.S linux64" + "crypto/sha/asm/sha512-armv8.pl crypto/sha/sha256-armv8.S linux64" + "crypto/sha/asm/sha512-armv8.pl crypto/sha/sha512-armv8.S linux64" + "crypto/sm3/asm/sm3-armv8.pl crypto/sm3/asm/sm3-armv8.S linux64" + "crypto/sm4/asm/sm4-armv8.pl crypto/sm4/asm/sm4-armv8.S linux64" + "crypto/sm4/asm/vpsm4-armv8.pl crypto/sm4/asm/vpsm4-armv8.S linux64" + "crypto/sm4/asm/vpsm4_ex-armv8.pl crypto/sm4/asm/vpsm4_ex-armv8.S linux64" + + # ARCH_PPC64LE + "crypto/aes/asm/aesp8-ppc.pl crypto/aes/aesp8-ppc.s linux64v2" + "crypto/modes/asm/aes-gcm-ppc.pl crypto/modes/aes-gcm-ppc.s linux64v2" + "crypto/modes/asm/ghashp8-ppc.pl crypto/modes/ghashp8-ppc.s linux64v2" + "crypto/ppccpuid.pl crypto/ppccpuid.s linux64v2" + + # ARCH_S390X + "crypto/aes/asm/aes-s390x.pl crypto/aes/aes-s390x.S linux64" + "crypto/chacha/asm/chacha-s390x.pl crypto/chacha/chacha-s390x.S linux64" + "crypto/rc4/asm/rc4-s390x.pl crypto/rc4/rc4-s390x.S linux64" + "crypto/s390xcpuid.pl crypto/s390xcpuid.S linux64" + "crypto/sha/asm/keccak1600-s390x.pl crypto/sha/keccak1600-s390x.S linux64" + + # ARCH_RISCV64 + "crypto/aes/asm/aes-riscv64-zkn.pl crypto/aes/aes-riscv64-zkn.S linux64" + "crypto/aes/asm/aes-riscv64-zvbb-zvkg-zvkned.pl crypto/aes/aes-riscv64-zvbb-zvkg-zvkned.S linux64" + "crypto/aes/asm/aes-riscv64-zvkb-zvkned.pl crypto/aes/aes-riscv64-zvkb-zvkned.S linux64" + "crypto/aes/asm/aes-riscv64-zvkned.pl crypto/aes/aes-riscv64-zvkned.S linux64" + "crypto/modes/asm/aes-gcm-riscv64-zvkb-zvkg-zvkned.pl crypto/modes/aes-gcm-riscv64-zvkb-zvkg-zvkned.S linux64" + "crypto/modes/asm/ghash-riscv64-zvkb-zvbc.pl crypto/modes/ghash-riscv64-zvkb-zvbc.S linux64" + "crypto/modes/asm/ghash-riscv64-zvkg.pl crypto/modes/ghash-riscv64-zvkg.S linux64" + "crypto/modes/asm/ghash-riscv64.pl crypto/modes/ghash-riscv64.S linux64" + "crypto/riscv64cpuid.pl crypto/riscv64cpuid.S linux64" + "crypto/sm4/asm/sm4-riscv64-zvksed.pl crypto/sm4/sm4-riscv64-zvksed.S linux64" + + # ARCH_LOONGARCH64 + "crypto/loongarch64cpuid.pl crypto/loongarch64cpuid.S linux64" +) + +for entry in "${jobs[@]}"; do + read -r rel_in rel_out arg <<<"${entry:-}" + full_in="$OPENSSL_SOURCE_DIR/$rel_in" + full_out="$OPENSSL_BINARY_DIR/$rel_out" + + if [[ -n "${arg:-}" ]]; then + mkdir_and_run "$full_in" "$full_out" "$arg" + else + mkdir_and_run "$full_in" "$full_out" + fi +done diff --git a/contrib/openssl-cmake/common/include/crypto/bn_conf.h b/contrib/openssl-cmake/common/include/crypto/bn_conf.h new file mode 100644 index 000000000000..0347a6ddc067 --- /dev/null +++ b/contrib/openssl-cmake/common/include/crypto/bn_conf.h @@ -0,0 +1,29 @@ +/* WARNING: do not edit! */ +/* Generated by Makefile from include/crypto/bn_conf.h.in */ +/* + * Copyright 2016-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#ifndef OSSL_CRYPTO_BN_CONF_H +# define OSSL_CRYPTO_BN_CONF_H +# pragma once + +/* + * The contents of this file are not used in the UEFI build, as + * both 32-bit and 64-bit builds are supported from a single run + * of the Configure script. + */ + +/* Should we define BN_DIV2W here? */ + +/* Only one for the following should be defined */ +#define SIXTY_FOUR_BIT_LONG +#undef SIXTY_FOUR_BIT +#undef THIRTY_TWO_BIT + +#endif diff --git a/contrib/openssl-cmake/common/include/crypto/dso_conf.h b/contrib/openssl-cmake/common/include/crypto/dso_conf.h new file mode 100644 index 000000000000..795dfa0f1a66 --- /dev/null +++ b/contrib/openssl-cmake/common/include/crypto/dso_conf.h @@ -0,0 +1,19 @@ +/* WARNING: do not edit! */ +/* Generated by Makefile from include/crypto/dso_conf.h.in */ +/* + * Copyright 2016-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#ifndef OSSL_CRYPTO_DSO_CONF_H +# define OSSL_CRYPTO_DSO_CONF_H +# pragma once + +# define DSO_DLFCN +# define HAVE_DLFCN_H +# define DSO_EXTENSION ".so" +#endif diff --git a/contrib/openssl-cmake/common/include/internal/param_names.h b/contrib/openssl-cmake/common/include/internal/param_names.h new file mode 100644 index 000000000000..0a0404a57e82 --- /dev/null +++ b/contrib/openssl-cmake/common/include/internal/param_names.h @@ -0,0 +1,469 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/internal/param_names.h.in + * + * Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + +int ossl_param_find_pidx(const char *s); + +/* Parameter name definitions - generated by util/perl/OpenSSL/paramnames.pm */ +#define NUM_PIDX 346 + +#define PIDX_ALG_PARAM_ALGORITHM_ID 0 +#define PIDX_ALG_PARAM_ALGORITHM_ID_PARAMS 1 +#define PIDX_ALG_PARAM_CIPHER 2 +#define PIDX_ALG_PARAM_DIGEST 3 +#define PIDX_ALG_PARAM_ENGINE 4 +#define PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR 5 +#define PIDX_ALG_PARAM_MAC 6 +#define PIDX_ALG_PARAM_PROPERTIES 7 +#define PIDX_ASYM_CIPHER_PARAM_DIGEST PIDX_PKEY_PARAM_DIGEST +#define PIDX_ASYM_CIPHER_PARAM_ENGINE PIDX_PKEY_PARAM_ENGINE +#define PIDX_ASYM_CIPHER_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR +#define PIDX_ASYM_CIPHER_PARAM_FIPS_KEY_CHECK PIDX_PKEY_PARAM_FIPS_KEY_CHECK +#define PIDX_ASYM_CIPHER_PARAM_FIPS_RSA_PKCS15_PAD_DISABLED PIDX_PROV_PARAM_RSA_PKCS15_PAD_DISABLED +#define PIDX_ASYM_CIPHER_PARAM_IMPLICIT_REJECTION 8 +#define PIDX_ASYM_CIPHER_PARAM_MGF1_DIGEST PIDX_PKEY_PARAM_MGF1_DIGEST +#define PIDX_ASYM_CIPHER_PARAM_MGF1_DIGEST_PROPS PIDX_PKEY_PARAM_MGF1_PROPERTIES +#define PIDX_ASYM_CIPHER_PARAM_OAEP_DIGEST PIDX_ALG_PARAM_DIGEST +#define PIDX_ASYM_CIPHER_PARAM_OAEP_DIGEST_PROPS 9 +#define PIDX_ASYM_CIPHER_PARAM_OAEP_LABEL 10 +#define PIDX_ASYM_CIPHER_PARAM_PAD_MODE PIDX_PKEY_PARAM_PAD_MODE +#define PIDX_ASYM_CIPHER_PARAM_PROPERTIES PIDX_PKEY_PARAM_PROPERTIES +#define PIDX_ASYM_CIPHER_PARAM_TLS_CLIENT_VERSION 11 +#define PIDX_ASYM_CIPHER_PARAM_TLS_NEGOTIATED_VERSION 12 +#define PIDX_CAPABILITY_TLS_GROUP_ALG 13 +#define PIDX_CAPABILITY_TLS_GROUP_ID 14 +#define PIDX_CAPABILITY_TLS_GROUP_IS_KEM 15 +#define PIDX_CAPABILITY_TLS_GROUP_MAX_DTLS 16 +#define PIDX_CAPABILITY_TLS_GROUP_MAX_TLS 17 +#define PIDX_CAPABILITY_TLS_GROUP_MIN_DTLS 18 +#define PIDX_CAPABILITY_TLS_GROUP_MIN_TLS 19 +#define PIDX_CAPABILITY_TLS_GROUP_NAME 20 +#define PIDX_CAPABILITY_TLS_GROUP_NAME_INTERNAL 21 +#define PIDX_CAPABILITY_TLS_GROUP_SECURITY_BITS 22 +#define PIDX_CAPABILITY_TLS_SIGALG_CODE_POINT 23 +#define PIDX_CAPABILITY_TLS_SIGALG_HASH_NAME 24 +#define PIDX_CAPABILITY_TLS_SIGALG_HASH_OID 25 +#define PIDX_CAPABILITY_TLS_SIGALG_IANA_NAME 26 +#define PIDX_CAPABILITY_TLS_SIGALG_KEYTYPE 27 +#define PIDX_CAPABILITY_TLS_SIGALG_KEYTYPE_OID 28 +#define PIDX_CAPABILITY_TLS_SIGALG_MAX_DTLS 16 +#define PIDX_CAPABILITY_TLS_SIGALG_MAX_TLS 17 +#define PIDX_CAPABILITY_TLS_SIGALG_MIN_DTLS 18 +#define PIDX_CAPABILITY_TLS_SIGALG_MIN_TLS 19 +#define PIDX_CAPABILITY_TLS_SIGALG_NAME 29 +#define PIDX_CAPABILITY_TLS_SIGALG_OID 30 +#define PIDX_CAPABILITY_TLS_SIGALG_SECURITY_BITS 31 +#define PIDX_CAPABILITY_TLS_SIGALG_SIG_NAME 32 +#define PIDX_CAPABILITY_TLS_SIGALG_SIG_OID 33 +#define PIDX_CIPHER_PARAM_AEAD 34 +#define PIDX_CIPHER_PARAM_AEAD_IVLEN PIDX_CIPHER_PARAM_IVLEN +#define PIDX_CIPHER_PARAM_AEAD_IV_GENERATED 35 +#define PIDX_CIPHER_PARAM_AEAD_MAC_KEY 36 +#define PIDX_CIPHER_PARAM_AEAD_TAG 37 +#define PIDX_CIPHER_PARAM_AEAD_TAGLEN 38 +#define PIDX_CIPHER_PARAM_AEAD_TLS1_AAD 39 +#define PIDX_CIPHER_PARAM_AEAD_TLS1_AAD_PAD 40 +#define PIDX_CIPHER_PARAM_AEAD_TLS1_GET_IV_GEN 41 +#define PIDX_CIPHER_PARAM_AEAD_TLS1_IV_FIXED 42 +#define PIDX_CIPHER_PARAM_AEAD_TLS1_SET_IV_INV 43 +#define PIDX_CIPHER_PARAM_ALGORITHM_ID PIDX_ALG_PARAM_ALGORITHM_ID +#define PIDX_CIPHER_PARAM_ALGORITHM_ID_PARAMS PIDX_ALG_PARAM_ALGORITHM_ID_PARAMS +#define PIDX_CIPHER_PARAM_ALGORITHM_ID_PARAMS_OLD 44 +#define PIDX_CIPHER_PARAM_BLOCK_SIZE 45 +#define PIDX_CIPHER_PARAM_CTS 46 +#define PIDX_CIPHER_PARAM_CTS_MODE 47 +#define PIDX_CIPHER_PARAM_CUSTOM_IV 48 +#define PIDX_CIPHER_PARAM_DECRYPT_ONLY 49 +#define PIDX_CIPHER_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR +#define PIDX_CIPHER_PARAM_FIPS_ENCRYPT_CHECK 50 +#define PIDX_CIPHER_PARAM_HAS_RAND_KEY 51 +#define PIDX_CIPHER_PARAM_IV 52 +#define PIDX_CIPHER_PARAM_IVLEN 53 +#define PIDX_CIPHER_PARAM_KEYLEN 54 +#define PIDX_CIPHER_PARAM_MODE 55 +#define PIDX_CIPHER_PARAM_NUM 56 +#define PIDX_CIPHER_PARAM_PADDING 57 +#define PIDX_CIPHER_PARAM_PIPELINE_AEAD_TAG 58 +#define PIDX_CIPHER_PARAM_RANDOM_KEY 59 +#define PIDX_CIPHER_PARAM_RC2_KEYBITS 60 +#define PIDX_CIPHER_PARAM_ROUNDS 61 +#define PIDX_CIPHER_PARAM_SPEED 62 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK 63 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD 64 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD_PACKLEN 65 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC 66 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_IN 67 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_LEN 68 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_INTERLEAVE 69 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_BUFSIZE 70 +#define PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_SEND_FRAGMENT 71 +#define PIDX_CIPHER_PARAM_TLS_MAC 72 +#define PIDX_CIPHER_PARAM_TLS_MAC_SIZE 73 +#define PIDX_CIPHER_PARAM_TLS_VERSION 74 +#define PIDX_CIPHER_PARAM_UPDATED_IV 75 +#define PIDX_CIPHER_PARAM_USE_BITS 76 +#define PIDX_CIPHER_PARAM_XTS_STANDARD 77 +#define PIDX_DECODER_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES +#define PIDX_DIGEST_PARAM_ALGID_ABSENT 78 +#define PIDX_DIGEST_PARAM_BLOCK_SIZE 45 +#define PIDX_DIGEST_PARAM_MICALG 79 +#define PIDX_DIGEST_PARAM_PAD_TYPE 80 +#define PIDX_DIGEST_PARAM_SIZE 81 +#define PIDX_DIGEST_PARAM_SSL3_MS 82 +#define PIDX_DIGEST_PARAM_XOF 83 +#define PIDX_DIGEST_PARAM_XOFLEN 84 +#define PIDX_DRBG_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER +#define PIDX_DRBG_PARAM_DIGEST PIDX_ALG_PARAM_DIGEST +#define PIDX_DRBG_PARAM_ENTROPY_REQUIRED 85 +#define PIDX_DRBG_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR +#define PIDX_DRBG_PARAM_FIPS_DIGEST_CHECK PIDX_PKEY_PARAM_FIPS_DIGEST_CHECK +#define PIDX_DRBG_PARAM_MAC PIDX_ALG_PARAM_MAC +#define PIDX_DRBG_PARAM_MAX_ADINLEN 86 +#define PIDX_DRBG_PARAM_MAX_ENTROPYLEN 87 +#define PIDX_DRBG_PARAM_MAX_LENGTH 88 +#define PIDX_DRBG_PARAM_MAX_NONCELEN 89 +#define PIDX_DRBG_PARAM_MAX_PERSLEN 90 +#define PIDX_DRBG_PARAM_MIN_ENTROPYLEN 91 +#define PIDX_DRBG_PARAM_MIN_LENGTH 92 +#define PIDX_DRBG_PARAM_MIN_NONCELEN 93 +#define PIDX_DRBG_PARAM_PREDICTION_RESISTANCE 94 +#define PIDX_DRBG_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES +#define PIDX_DRBG_PARAM_RANDOM_DATA 95 +#define PIDX_DRBG_PARAM_RESEED_COUNTER 96 +#define PIDX_DRBG_PARAM_RESEED_REQUESTS 97 +#define PIDX_DRBG_PARAM_RESEED_TIME 98 +#define PIDX_DRBG_PARAM_RESEED_TIME_INTERVAL 99 +#define PIDX_DRBG_PARAM_SIZE 81 +#define PIDX_DRBG_PARAM_USE_DF 100 +#define PIDX_ENCODER_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER +#define PIDX_ENCODER_PARAM_ENCRYPT_LEVEL 101 +#define PIDX_ENCODER_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES +#define PIDX_ENCODER_PARAM_SAVE_PARAMETERS 102 +#define PIDX_EXCHANGE_PARAM_EC_ECDH_COFACTOR_MODE 103 +#define PIDX_EXCHANGE_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR +#define PIDX_EXCHANGE_PARAM_FIPS_DIGEST_CHECK PIDX_PKEY_PARAM_FIPS_DIGEST_CHECK +#define PIDX_EXCHANGE_PARAM_FIPS_ECDH_COFACTOR_CHECK PIDX_PROV_PARAM_ECDH_COFACTOR_CHECK +#define PIDX_EXCHANGE_PARAM_FIPS_KEY_CHECK PIDX_PKEY_PARAM_FIPS_KEY_CHECK +#define PIDX_EXCHANGE_PARAM_KDF_DIGEST 104 +#define PIDX_EXCHANGE_PARAM_KDF_DIGEST_PROPS 105 +#define PIDX_EXCHANGE_PARAM_KDF_OUTLEN 106 +#define PIDX_EXCHANGE_PARAM_KDF_TYPE 107 +#define PIDX_EXCHANGE_PARAM_KDF_UKM 108 +#define PIDX_EXCHANGE_PARAM_PAD 109 +#define PIDX_GEN_PARAM_ITERATION 110 +#define PIDX_GEN_PARAM_POTENTIAL 111 +#define PIDX_KDF_PARAM_ARGON2_AD 112 +#define PIDX_KDF_PARAM_ARGON2_LANES 113 +#define PIDX_KDF_PARAM_ARGON2_MEMCOST 114 +#define PIDX_KDF_PARAM_ARGON2_VERSION 115 +#define PIDX_KDF_PARAM_CEK_ALG 116 +#define PIDX_KDF_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER +#define PIDX_KDF_PARAM_CONSTANT 117 +#define PIDX_KDF_PARAM_DATA 118 +#define PIDX_KDF_PARAM_DIGEST PIDX_ALG_PARAM_DIGEST +#define PIDX_KDF_PARAM_EARLY_CLEAN 119 +#define PIDX_KDF_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR +#define PIDX_KDF_PARAM_FIPS_DIGEST_CHECK PIDX_PKEY_PARAM_FIPS_DIGEST_CHECK +#define PIDX_KDF_PARAM_FIPS_EMS_CHECK 120 +#define PIDX_KDF_PARAM_FIPS_KEY_CHECK PIDX_PKEY_PARAM_FIPS_KEY_CHECK +#define PIDX_KDF_PARAM_HMACDRBG_ENTROPY 121 +#define PIDX_KDF_PARAM_HMACDRBG_NONCE 122 +#define PIDX_KDF_PARAM_INFO 123 +#define PIDX_KDF_PARAM_ITER 124 +#define PIDX_KDF_PARAM_KBKDF_R 125 +#define PIDX_KDF_PARAM_KBKDF_USE_L 126 +#define PIDX_KDF_PARAM_KBKDF_USE_SEPARATOR 127 +#define PIDX_KDF_PARAM_KEY 128 +#define PIDX_KDF_PARAM_LABEL 129 +#define PIDX_KDF_PARAM_MAC PIDX_ALG_PARAM_MAC +#define PIDX_KDF_PARAM_MAC_SIZE 130 +#define PIDX_KDF_PARAM_MODE 55 +#define PIDX_KDF_PARAM_PASSWORD 131 +#define PIDX_KDF_PARAM_PKCS12_ID 132 +#define PIDX_KDF_PARAM_PKCS5 133 +#define PIDX_KDF_PARAM_PREFIX 134 +#define PIDX_KDF_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES +#define PIDX_KDF_PARAM_SALT 135 +#define PIDX_KDF_PARAM_SCRYPT_MAXMEM 136 +#define PIDX_KDF_PARAM_SCRYPT_N 137 +#define PIDX_KDF_PARAM_SCRYPT_P 138 +#define PIDX_KDF_PARAM_SCRYPT_R 125 +#define PIDX_KDF_PARAM_SECRET 139 +#define PIDX_KDF_PARAM_SEED 140 +#define PIDX_KDF_PARAM_SIZE 81 +#define PIDX_KDF_PARAM_SSHKDF_SESSION_ID 141 +#define PIDX_KDF_PARAM_SSHKDF_TYPE 142 +#define PIDX_KDF_PARAM_SSHKDF_XCGHASH 143 +#define PIDX_KDF_PARAM_THREADS 144 +#define PIDX_KDF_PARAM_UKM 145 +#define PIDX_KDF_PARAM_X942_ACVPINFO 146 +#define PIDX_KDF_PARAM_X942_PARTYUINFO 147 +#define PIDX_KDF_PARAM_X942_PARTYVINFO 148 +#define PIDX_KDF_PARAM_X942_SUPP_PRIVINFO 149 +#define PIDX_KDF_PARAM_X942_SUPP_PUBINFO 150 +#define PIDX_KDF_PARAM_X942_USE_KEYBITS 151 +#define PIDX_KEM_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR +#define PIDX_KEM_PARAM_FIPS_KEY_CHECK PIDX_PKEY_PARAM_FIPS_KEY_CHECK +#define PIDX_KEM_PARAM_IKME 152 +#define PIDX_KEM_PARAM_OPERATION 153 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_BLOCK_PADDING 154 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_HS_PADDING 155 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_MAX_EARLY_DATA 156 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_MAX_FRAG_LEN 157 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_MODE 55 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_OPTIONS 158 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_READ_AHEAD 159 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_STREAM_MAC 160 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_TLSTREE 161 +#define PIDX_LIBSSL_RECORD_LAYER_PARAM_USE_ETM 162 +#define PIDX_LIBSSL_RECORD_LAYER_READ_BUFFER_LEN 163 +#define PIDX_MAC_PARAM_BLOCK_SIZE 164 +#define PIDX_MAC_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER +#define PIDX_MAC_PARAM_CUSTOM 165 +#define PIDX_MAC_PARAM_C_ROUNDS 166 +#define PIDX_MAC_PARAM_DIGEST PIDX_ALG_PARAM_DIGEST +#define PIDX_MAC_PARAM_DIGEST_NOINIT 167 +#define PIDX_MAC_PARAM_DIGEST_ONESHOT 168 +#define PIDX_MAC_PARAM_D_ROUNDS 169 +#define PIDX_MAC_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR +#define PIDX_MAC_PARAM_FIPS_KEY_CHECK PIDX_PKEY_PARAM_FIPS_KEY_CHECK +#define PIDX_MAC_PARAM_FIPS_NO_SHORT_MAC PIDX_PROV_PARAM_NO_SHORT_MAC +#define PIDX_MAC_PARAM_IV 52 +#define PIDX_MAC_PARAM_KEY 128 +#define PIDX_MAC_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES +#define PIDX_MAC_PARAM_SALT 135 +#define PIDX_MAC_PARAM_SIZE 81 +#define PIDX_MAC_PARAM_TLS_DATA_SIZE 170 +#define PIDX_MAC_PARAM_XOF 83 +#define PIDX_OBJECT_PARAM_DATA 118 +#define PIDX_OBJECT_PARAM_DATA_STRUCTURE 171 +#define PIDX_OBJECT_PARAM_DATA_TYPE 172 +#define PIDX_OBJECT_PARAM_DESC 173 +#define PIDX_OBJECT_PARAM_INPUT_TYPE 174 +#define PIDX_OBJECT_PARAM_REFERENCE 175 +#define PIDX_OBJECT_PARAM_TYPE 142 +#define PIDX_PASSPHRASE_PARAM_INFO 123 +#define PIDX_PKEY_PARAM_ALGORITHM_ID PIDX_ALG_PARAM_ALGORITHM_ID +#define PIDX_PKEY_PARAM_ALGORITHM_ID_PARAMS PIDX_ALG_PARAM_ALGORITHM_ID_PARAMS +#define PIDX_PKEY_PARAM_BITS 176 +#define PIDX_PKEY_PARAM_CIPHER PIDX_ALG_PARAM_CIPHER +#define PIDX_PKEY_PARAM_DEFAULT_DIGEST 177 +#define PIDX_PKEY_PARAM_DHKEM_IKM 178 +#define PIDX_PKEY_PARAM_DH_GENERATOR 179 +#define PIDX_PKEY_PARAM_DH_PRIV_LEN 180 +#define PIDX_PKEY_PARAM_DIGEST PIDX_ALG_PARAM_DIGEST +#define PIDX_PKEY_PARAM_DIGEST_SIZE 181 +#define PIDX_PKEY_PARAM_DIST_ID 182 +#define PIDX_PKEY_PARAM_EC_A 183 +#define PIDX_PKEY_PARAM_EC_B 184 +#define PIDX_PKEY_PARAM_EC_CHAR2_M 185 +#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K1 186 +#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K2 187 +#define PIDX_PKEY_PARAM_EC_CHAR2_PP_K3 188 +#define PIDX_PKEY_PARAM_EC_CHAR2_TP_BASIS 189 +#define PIDX_PKEY_PARAM_EC_CHAR2_TYPE 190 +#define PIDX_PKEY_PARAM_EC_COFACTOR 191 +#define PIDX_PKEY_PARAM_EC_DECODED_FROM_EXPLICIT_PARAMS 192 +#define PIDX_PKEY_PARAM_EC_ENCODING 193 +#define PIDX_PKEY_PARAM_EC_FIELD_TYPE 194 +#define PIDX_PKEY_PARAM_EC_GENERATOR 195 +#define PIDX_PKEY_PARAM_EC_GROUP_CHECK_TYPE 196 +#define PIDX_PKEY_PARAM_EC_INCLUDE_PUBLIC 197 +#define PIDX_PKEY_PARAM_EC_ORDER 198 +#define PIDX_PKEY_PARAM_EC_P 138 +#define PIDX_PKEY_PARAM_EC_POINT_CONVERSION_FORMAT 199 +#define PIDX_PKEY_PARAM_EC_PUB_X 200 +#define PIDX_PKEY_PARAM_EC_PUB_Y 201 +#define PIDX_PKEY_PARAM_EC_SEED 140 +#define PIDX_PKEY_PARAM_ENCODED_PUBLIC_KEY 202 +#define PIDX_PKEY_PARAM_ENGINE PIDX_ALG_PARAM_ENGINE +#define PIDX_PKEY_PARAM_FFC_COFACTOR 203 +#define PIDX_PKEY_PARAM_FFC_DIGEST PIDX_PKEY_PARAM_DIGEST +#define PIDX_PKEY_PARAM_FFC_DIGEST_PROPS PIDX_PKEY_PARAM_PROPERTIES +#define PIDX_PKEY_PARAM_FFC_G 204 +#define PIDX_PKEY_PARAM_FFC_GINDEX 205 +#define PIDX_PKEY_PARAM_FFC_H 206 +#define PIDX_PKEY_PARAM_FFC_P 138 +#define PIDX_PKEY_PARAM_FFC_PBITS 207 +#define PIDX_PKEY_PARAM_FFC_PCOUNTER 208 +#define PIDX_PKEY_PARAM_FFC_Q 209 +#define PIDX_PKEY_PARAM_FFC_QBITS 210 +#define PIDX_PKEY_PARAM_FFC_SEED 140 +#define PIDX_PKEY_PARAM_FFC_TYPE 142 +#define PIDX_PKEY_PARAM_FFC_VALIDATE_G 211 +#define PIDX_PKEY_PARAM_FFC_VALIDATE_LEGACY 212 +#define PIDX_PKEY_PARAM_FFC_VALIDATE_PQ 213 +#define PIDX_PKEY_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR +#define PIDX_PKEY_PARAM_FIPS_DIGEST_CHECK 214 +#define PIDX_PKEY_PARAM_FIPS_KEY_CHECK 215 +#define PIDX_PKEY_PARAM_FIPS_SIGN_CHECK 216 +#define PIDX_PKEY_PARAM_GROUP_NAME 217 +#define PIDX_PKEY_PARAM_IMPLICIT_REJECTION 8 +#define PIDX_PKEY_PARAM_MANDATORY_DIGEST 218 +#define PIDX_PKEY_PARAM_MASKGENFUNC 219 +#define PIDX_PKEY_PARAM_MAX_SIZE 220 +#define PIDX_PKEY_PARAM_MGF1_DIGEST 221 +#define PIDX_PKEY_PARAM_MGF1_PROPERTIES 222 +#define PIDX_PKEY_PARAM_ML_DSA_INPUT_FORMATS 223 +#define PIDX_PKEY_PARAM_ML_DSA_OUTPUT_FORMATS 224 +#define PIDX_PKEY_PARAM_ML_DSA_PREFER_SEED 225 +#define PIDX_PKEY_PARAM_ML_DSA_RETAIN_SEED 226 +#define PIDX_PKEY_PARAM_ML_DSA_SEED 140 +#define PIDX_PKEY_PARAM_ML_KEM_IMPORT_PCT_TYPE 227 +#define PIDX_PKEY_PARAM_ML_KEM_INPUT_FORMATS 228 +#define PIDX_PKEY_PARAM_ML_KEM_OUTPUT_FORMATS 229 +#define PIDX_PKEY_PARAM_ML_KEM_PREFER_SEED 230 +#define PIDX_PKEY_PARAM_ML_KEM_RETAIN_SEED 231 +#define PIDX_PKEY_PARAM_ML_KEM_SEED 140 +#define PIDX_PKEY_PARAM_PAD_MODE 232 +#define PIDX_PKEY_PARAM_PRIV_KEY 233 +#define PIDX_PKEY_PARAM_PROPERTIES PIDX_ALG_PARAM_PROPERTIES +#define PIDX_PKEY_PARAM_PUB_KEY 234 +#define PIDX_PKEY_PARAM_RSA_BITS PIDX_PKEY_PARAM_BITS +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT 235 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT1 236 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT2 237 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT3 238 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT4 239 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT5 240 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT6 241 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT7 242 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT8 243 +#define PIDX_PKEY_PARAM_RSA_COEFFICIENT9 244 +#define PIDX_PKEY_PARAM_RSA_D 245 +#define PIDX_PKEY_PARAM_RSA_DERIVE_FROM_PQ 246 +#define PIDX_PKEY_PARAM_RSA_DIGEST PIDX_PKEY_PARAM_DIGEST +#define PIDX_PKEY_PARAM_RSA_DIGEST_PROPS PIDX_PKEY_PARAM_PROPERTIES +#define PIDX_PKEY_PARAM_RSA_E 247 +#define PIDX_PKEY_PARAM_RSA_EXPONENT 248 +#define PIDX_PKEY_PARAM_RSA_EXPONENT1 249 +#define PIDX_PKEY_PARAM_RSA_EXPONENT10 250 +#define PIDX_PKEY_PARAM_RSA_EXPONENT2 251 +#define PIDX_PKEY_PARAM_RSA_EXPONENT3 252 +#define PIDX_PKEY_PARAM_RSA_EXPONENT4 253 +#define PIDX_PKEY_PARAM_RSA_EXPONENT5 254 +#define PIDX_PKEY_PARAM_RSA_EXPONENT6 255 +#define PIDX_PKEY_PARAM_RSA_EXPONENT7 256 +#define PIDX_PKEY_PARAM_RSA_EXPONENT8 257 +#define PIDX_PKEY_PARAM_RSA_EXPONENT9 258 +#define PIDX_PKEY_PARAM_RSA_FACTOR 259 +#define PIDX_PKEY_PARAM_RSA_FACTOR1 260 +#define PIDX_PKEY_PARAM_RSA_FACTOR10 261 +#define PIDX_PKEY_PARAM_RSA_FACTOR2 262 +#define PIDX_PKEY_PARAM_RSA_FACTOR3 263 +#define PIDX_PKEY_PARAM_RSA_FACTOR4 264 +#define PIDX_PKEY_PARAM_RSA_FACTOR5 265 +#define PIDX_PKEY_PARAM_RSA_FACTOR6 266 +#define PIDX_PKEY_PARAM_RSA_FACTOR7 267 +#define PIDX_PKEY_PARAM_RSA_FACTOR8 268 +#define PIDX_PKEY_PARAM_RSA_FACTOR9 269 +#define PIDX_PKEY_PARAM_RSA_MASKGENFUNC PIDX_PKEY_PARAM_MASKGENFUNC +#define PIDX_PKEY_PARAM_RSA_MGF1_DIGEST PIDX_PKEY_PARAM_MGF1_DIGEST +#define PIDX_PKEY_PARAM_RSA_N 137 +#define PIDX_PKEY_PARAM_RSA_PRIMES 270 +#define PIDX_PKEY_PARAM_RSA_PSS_SALTLEN 271 +#define PIDX_PKEY_PARAM_RSA_TEST_P1 272 +#define PIDX_PKEY_PARAM_RSA_TEST_P2 273 +#define PIDX_PKEY_PARAM_RSA_TEST_Q1 274 +#define PIDX_PKEY_PARAM_RSA_TEST_Q2 275 +#define PIDX_PKEY_PARAM_RSA_TEST_XP 276 +#define PIDX_PKEY_PARAM_RSA_TEST_XP1 277 +#define PIDX_PKEY_PARAM_RSA_TEST_XP2 278 +#define PIDX_PKEY_PARAM_RSA_TEST_XQ 279 +#define PIDX_PKEY_PARAM_RSA_TEST_XQ1 280 +#define PIDX_PKEY_PARAM_RSA_TEST_XQ2 281 +#define PIDX_PKEY_PARAM_SECURITY_BITS 282 +#define PIDX_PKEY_PARAM_SLH_DSA_SEED 140 +#define PIDX_PKEY_PARAM_USE_COFACTOR_ECDH PIDX_PKEY_PARAM_USE_COFACTOR_FLAG +#define PIDX_PKEY_PARAM_USE_COFACTOR_FLAG 283 +#define PIDX_PROV_PARAM_BUILDINFO 284 +#define PIDX_PROV_PARAM_CORE_MODULE_FILENAME 285 +#define PIDX_PROV_PARAM_CORE_PROV_NAME 286 +#define PIDX_PROV_PARAM_CORE_VERSION 287 +#define PIDX_PROV_PARAM_DRBG_TRUNC_DIGEST 288 +#define PIDX_PROV_PARAM_DSA_SIGN_DISABLED 289 +#define PIDX_PROV_PARAM_ECDH_COFACTOR_CHECK 290 +#define PIDX_PROV_PARAM_HKDF_DIGEST_CHECK 291 +#define PIDX_PROV_PARAM_HKDF_KEY_CHECK 292 +#define PIDX_PROV_PARAM_HMAC_KEY_CHECK 293 +#define PIDX_PROV_PARAM_KBKDF_KEY_CHECK 294 +#define PIDX_PROV_PARAM_KMAC_KEY_CHECK 295 +#define PIDX_PROV_PARAM_NAME 296 +#define PIDX_PROV_PARAM_NO_SHORT_MAC 297 +#define PIDX_PROV_PARAM_PBKDF2_LOWER_BOUND_CHECK 298 +#define PIDX_PROV_PARAM_RSA_PKCS15_PAD_DISABLED 299 +#define PIDX_PROV_PARAM_RSA_PSS_SALTLEN_CHECK 300 +#define PIDX_PROV_PARAM_RSA_SIGN_X931_PAD_DISABLED 301 +#define PIDX_PROV_PARAM_SECURITY_CHECKS 302 +#define PIDX_PROV_PARAM_SELF_TEST_DESC 303 +#define PIDX_PROV_PARAM_SELF_TEST_PHASE 304 +#define PIDX_PROV_PARAM_SELF_TEST_TYPE 305 +#define PIDX_PROV_PARAM_SIGNATURE_DIGEST_CHECK 306 +#define PIDX_PROV_PARAM_SSHKDF_DIGEST_CHECK 307 +#define PIDX_PROV_PARAM_SSHKDF_KEY_CHECK 308 +#define PIDX_PROV_PARAM_SSKDF_DIGEST_CHECK 309 +#define PIDX_PROV_PARAM_SSKDF_KEY_CHECK 310 +#define PIDX_PROV_PARAM_STATUS 311 +#define PIDX_PROV_PARAM_TDES_ENCRYPT_DISABLED 312 +#define PIDX_PROV_PARAM_TLS13_KDF_DIGEST_CHECK 313 +#define PIDX_PROV_PARAM_TLS13_KDF_KEY_CHECK 314 +#define PIDX_PROV_PARAM_TLS1_PRF_DIGEST_CHECK 315 +#define PIDX_PROV_PARAM_TLS1_PRF_EMS_CHECK 316 +#define PIDX_PROV_PARAM_TLS1_PRF_KEY_CHECK 317 +#define PIDX_PROV_PARAM_VERSION 115 +#define PIDX_PROV_PARAM_X942KDF_KEY_CHECK 318 +#define PIDX_PROV_PARAM_X963KDF_DIGEST_CHECK 319 +#define PIDX_PROV_PARAM_X963KDF_KEY_CHECK 320 +#define PIDX_RAND_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR +#define PIDX_RAND_PARAM_GENERATE 321 +#define PIDX_RAND_PARAM_MAX_REQUEST 322 +#define PIDX_RAND_PARAM_STATE 323 +#define PIDX_RAND_PARAM_STRENGTH 324 +#define PIDX_RAND_PARAM_TEST_ENTROPY 325 +#define PIDX_RAND_PARAM_TEST_NONCE 326 +#define PIDX_SIGNATURE_PARAM_ADD_RANDOM 327 +#define PIDX_SIGNATURE_PARAM_ALGORITHM_ID PIDX_PKEY_PARAM_ALGORITHM_ID +#define PIDX_SIGNATURE_PARAM_ALGORITHM_ID_PARAMS PIDX_PKEY_PARAM_ALGORITHM_ID_PARAMS +#define PIDX_SIGNATURE_PARAM_CONTEXT_STRING 328 +#define PIDX_SIGNATURE_PARAM_DETERMINISTIC 329 +#define PIDX_SIGNATURE_PARAM_DIGEST PIDX_PKEY_PARAM_DIGEST +#define PIDX_SIGNATURE_PARAM_DIGEST_SIZE PIDX_PKEY_PARAM_DIGEST_SIZE +#define PIDX_SIGNATURE_PARAM_FIPS_APPROVED_INDICATOR PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR +#define PIDX_SIGNATURE_PARAM_FIPS_DIGEST_CHECK PIDX_PKEY_PARAM_FIPS_DIGEST_CHECK +#define PIDX_SIGNATURE_PARAM_FIPS_KEY_CHECK PIDX_PKEY_PARAM_FIPS_KEY_CHECK +#define PIDX_SIGNATURE_PARAM_FIPS_RSA_PSS_SALTLEN_CHECK 300 +#define PIDX_SIGNATURE_PARAM_FIPS_SIGN_CHECK PIDX_PKEY_PARAM_FIPS_SIGN_CHECK +#define PIDX_SIGNATURE_PARAM_FIPS_SIGN_X931_PAD_CHECK 330 +#define PIDX_SIGNATURE_PARAM_FIPS_VERIFY_MESSAGE 331 +#define PIDX_SIGNATURE_PARAM_INSTANCE 332 +#define PIDX_SIGNATURE_PARAM_KAT 333 +#define PIDX_SIGNATURE_PARAM_MESSAGE_ENCODING 334 +#define PIDX_SIGNATURE_PARAM_MGF1_DIGEST PIDX_PKEY_PARAM_MGF1_DIGEST +#define PIDX_SIGNATURE_PARAM_MGF1_PROPERTIES PIDX_PKEY_PARAM_MGF1_PROPERTIES +#define PIDX_SIGNATURE_PARAM_MU 335 +#define PIDX_SIGNATURE_PARAM_NONCE_TYPE 336 +#define PIDX_SIGNATURE_PARAM_PAD_MODE PIDX_PKEY_PARAM_PAD_MODE +#define PIDX_SIGNATURE_PARAM_PROPERTIES PIDX_PKEY_PARAM_PROPERTIES +#define PIDX_SIGNATURE_PARAM_PSS_SALTLEN 271 +#define PIDX_SIGNATURE_PARAM_SIGNATURE 337 +#define PIDX_SIGNATURE_PARAM_TEST_ENTROPY 338 +#define PIDX_SKEY_PARAM_KEY_LENGTH 339 +#define PIDX_SKEY_PARAM_RAW_BYTES 340 +#define PIDX_STORE_PARAM_ALIAS 341 +#define PIDX_STORE_PARAM_DIGEST 3 +#define PIDX_STORE_PARAM_EXPECT 342 +#define PIDX_STORE_PARAM_FINGERPRINT 343 +#define PIDX_STORE_PARAM_INPUT_TYPE 174 +#define PIDX_STORE_PARAM_ISSUER 296 +#define PIDX_STORE_PARAM_PROPERTIES 7 +#define PIDX_STORE_PARAM_SERIAL 344 +#define PIDX_STORE_PARAM_SUBJECT 345 diff --git a/contrib/openssl-cmake/common/include/openssl/asn1.h b/contrib/openssl-cmake/common/include/openssl/asn1.h new file mode 100644 index 000000000000..15e9e44674b0 --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/asn1.h @@ -0,0 +1,1134 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/asn1.h.in + * + * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_ASN1_H +# define OPENSSL_ASN1_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_ASN1_H +# endif + +# ifndef OPENSSL_NO_STDIO +# include +# endif +# include +# include +# include +# include +# include +# include +# include + +# include +# include + +# ifdef OPENSSL_BUILD_SHLIBCRYPTO +# undef OPENSSL_EXTERN +# define OPENSSL_EXTERN OPENSSL_EXPORT +# endif + +#ifdef __cplusplus +extern "C" { +#endif + +# define V_ASN1_UNIVERSAL 0x00 +# define V_ASN1_APPLICATION 0x40 +# define V_ASN1_CONTEXT_SPECIFIC 0x80 +# define V_ASN1_PRIVATE 0xc0 + +# define V_ASN1_CONSTRUCTED 0x20 +# define V_ASN1_PRIMITIVE_TAG 0x1f +# define V_ASN1_PRIMATIVE_TAG /*compat*/ V_ASN1_PRIMITIVE_TAG + +# define V_ASN1_APP_CHOOSE -2 /* let the recipient choose */ +# define V_ASN1_OTHER -3 /* used in ASN1_TYPE */ +# define V_ASN1_ANY -4 /* used in ASN1 template code */ + +# define V_ASN1_UNDEF -1 +/* ASN.1 tag values */ +# define V_ASN1_EOC 0 +# define V_ASN1_BOOLEAN 1 +# define V_ASN1_INTEGER 2 +# define V_ASN1_BIT_STRING 3 +# define V_ASN1_OCTET_STRING 4 +# define V_ASN1_NULL 5 +# define V_ASN1_OBJECT 6 +# define V_ASN1_OBJECT_DESCRIPTOR 7 +# define V_ASN1_EXTERNAL 8 +# define V_ASN1_REAL 9 +# define V_ASN1_ENUMERATED 10 +# define V_ASN1_UTF8STRING 12 +# define V_ASN1_SEQUENCE 16 +# define V_ASN1_SET 17 +# define V_ASN1_NUMERICSTRING 18 +# define V_ASN1_PRINTABLESTRING 19 +# define V_ASN1_T61STRING 20 +# define V_ASN1_TELETEXSTRING 20 /* alias */ +# define V_ASN1_VIDEOTEXSTRING 21 +# define V_ASN1_IA5STRING 22 +# define V_ASN1_UTCTIME 23 +# define V_ASN1_GENERALIZEDTIME 24 +# define V_ASN1_GRAPHICSTRING 25 +# define V_ASN1_ISO64STRING 26 +# define V_ASN1_VISIBLESTRING 26 /* alias */ +# define V_ASN1_GENERALSTRING 27 +# define V_ASN1_UNIVERSALSTRING 28 +# define V_ASN1_BMPSTRING 30 + +/* + * NB the constants below are used internally by ASN1_INTEGER + * and ASN1_ENUMERATED to indicate the sign. They are *not* on + * the wire tag values. + */ + +# define V_ASN1_NEG 0x100 +# define V_ASN1_NEG_INTEGER (2 | V_ASN1_NEG) +# define V_ASN1_NEG_ENUMERATED (10 | V_ASN1_NEG) + +/* For use with d2i_ASN1_type_bytes() */ +# define B_ASN1_NUMERICSTRING 0x0001 +# define B_ASN1_PRINTABLESTRING 0x0002 +# define B_ASN1_T61STRING 0x0004 +# define B_ASN1_TELETEXSTRING 0x0004 +# define B_ASN1_VIDEOTEXSTRING 0x0008 +# define B_ASN1_IA5STRING 0x0010 +# define B_ASN1_GRAPHICSTRING 0x0020 +# define B_ASN1_ISO64STRING 0x0040 +# define B_ASN1_VISIBLESTRING 0x0040 +# define B_ASN1_GENERALSTRING 0x0080 +# define B_ASN1_UNIVERSALSTRING 0x0100 +# define B_ASN1_OCTET_STRING 0x0200 +# define B_ASN1_BIT_STRING 0x0400 +# define B_ASN1_BMPSTRING 0x0800 +# define B_ASN1_UNKNOWN 0x1000 +# define B_ASN1_UTF8STRING 0x2000 +# define B_ASN1_UTCTIME 0x4000 +# define B_ASN1_GENERALIZEDTIME 0x8000 +# define B_ASN1_SEQUENCE 0x10000 +/* For use with ASN1_mbstring_copy() */ +# define MBSTRING_FLAG 0x1000 +# define MBSTRING_UTF8 (MBSTRING_FLAG) +# define MBSTRING_ASC (MBSTRING_FLAG|1) +# define MBSTRING_BMP (MBSTRING_FLAG|2) +# define MBSTRING_UNIV (MBSTRING_FLAG|4) +# define SMIME_OLDMIME 0x400 +# define SMIME_CRLFEOL 0x800 +# define SMIME_STREAM 0x1000 + +/* Stacks for types not otherwise defined in this header */ +SKM_DEFINE_STACK_OF_INTERNAL(X509_ALGOR, X509_ALGOR, X509_ALGOR) +#define sk_X509_ALGOR_num(sk) OPENSSL_sk_num(ossl_check_const_X509_ALGOR_sk_type(sk)) +#define sk_X509_ALGOR_value(sk, idx) ((X509_ALGOR *)OPENSSL_sk_value(ossl_check_const_X509_ALGOR_sk_type(sk), (idx))) +#define sk_X509_ALGOR_new(cmp) ((STACK_OF(X509_ALGOR) *)OPENSSL_sk_new(ossl_check_X509_ALGOR_compfunc_type(cmp))) +#define sk_X509_ALGOR_new_null() ((STACK_OF(X509_ALGOR) *)OPENSSL_sk_new_null()) +#define sk_X509_ALGOR_new_reserve(cmp, n) ((STACK_OF(X509_ALGOR) *)OPENSSL_sk_new_reserve(ossl_check_X509_ALGOR_compfunc_type(cmp), (n))) +#define sk_X509_ALGOR_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_ALGOR_sk_type(sk), (n)) +#define sk_X509_ALGOR_free(sk) OPENSSL_sk_free(ossl_check_X509_ALGOR_sk_type(sk)) +#define sk_X509_ALGOR_zero(sk) OPENSSL_sk_zero(ossl_check_X509_ALGOR_sk_type(sk)) +#define sk_X509_ALGOR_delete(sk, i) ((X509_ALGOR *)OPENSSL_sk_delete(ossl_check_X509_ALGOR_sk_type(sk), (i))) +#define sk_X509_ALGOR_delete_ptr(sk, ptr) ((X509_ALGOR *)OPENSSL_sk_delete_ptr(ossl_check_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_type(ptr))) +#define sk_X509_ALGOR_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_type(ptr)) +#define sk_X509_ALGOR_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_type(ptr)) +#define sk_X509_ALGOR_pop(sk) ((X509_ALGOR *)OPENSSL_sk_pop(ossl_check_X509_ALGOR_sk_type(sk))) +#define sk_X509_ALGOR_shift(sk) ((X509_ALGOR *)OPENSSL_sk_shift(ossl_check_X509_ALGOR_sk_type(sk))) +#define sk_X509_ALGOR_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_ALGOR_sk_type(sk),ossl_check_X509_ALGOR_freefunc_type(freefunc)) +#define sk_X509_ALGOR_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_type(ptr), (idx)) +#define sk_X509_ALGOR_set(sk, idx, ptr) ((X509_ALGOR *)OPENSSL_sk_set(ossl_check_X509_ALGOR_sk_type(sk), (idx), ossl_check_X509_ALGOR_type(ptr))) +#define sk_X509_ALGOR_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_type(ptr)) +#define sk_X509_ALGOR_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_type(ptr)) +#define sk_X509_ALGOR_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_type(ptr), pnum) +#define sk_X509_ALGOR_sort(sk) OPENSSL_sk_sort(ossl_check_X509_ALGOR_sk_type(sk)) +#define sk_X509_ALGOR_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_ALGOR_sk_type(sk)) +#define sk_X509_ALGOR_dup(sk) ((STACK_OF(X509_ALGOR) *)OPENSSL_sk_dup(ossl_check_const_X509_ALGOR_sk_type(sk))) +#define sk_X509_ALGOR_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_ALGOR) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_copyfunc_type(copyfunc), ossl_check_X509_ALGOR_freefunc_type(freefunc))) +#define sk_X509_ALGOR_set_cmp_func(sk, cmp) ((sk_X509_ALGOR_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_ALGOR_sk_type(sk), ossl_check_X509_ALGOR_compfunc_type(cmp))) + + + +# define ASN1_STRING_FLAG_BITS_LEFT 0x08 /* Set if 0x07 has bits left value */ +/* + * This indicates that the ASN1_STRING is not a real value but just a place + * holder for the location where indefinite length constructed data should be + * inserted in the memory buffer + */ +# define ASN1_STRING_FLAG_NDEF 0x010 + +/* + * This flag is used by the CMS code to indicate that a string is not + * complete and is a place holder for content when it had all been accessed. + * The flag will be reset when content has been written to it. + */ + +# define ASN1_STRING_FLAG_CONT 0x020 +/* + * This flag is used by ASN1 code to indicate an ASN1_STRING is an MSTRING + * type. + */ +# define ASN1_STRING_FLAG_MSTRING 0x040 +/* String is embedded and only content should be freed */ +# define ASN1_STRING_FLAG_EMBED 0x080 +/* String should be parsed in RFC 5280's time format */ +# define ASN1_STRING_FLAG_X509_TIME 0x100 +/* This is the base type that holds just about everything :-) */ +struct asn1_string_st { + int length; + int type; + unsigned char *data; + /* + * The value of the following field depends on the type being held. It + * is mostly being used for BIT_STRING so if the input data has a + * non-zero 'unused bits' value, it will be handled correctly + */ + long flags; +}; + +/* + * ASN1_ENCODING structure: this is used to save the received encoding of an + * ASN1 type. This is useful to get round problems with invalid encodings + * which can break signatures. + */ + +typedef struct ASN1_ENCODING_st { + unsigned char *enc; /* DER encoding */ + long len; /* Length of encoding */ + int modified; /* set to 1 if 'enc' is invalid */ +} ASN1_ENCODING; + +/* Used with ASN1 LONG type: if a long is set to this it is omitted */ +# define ASN1_LONG_UNDEF 0x7fffffffL + +# define STABLE_FLAGS_MALLOC 0x01 +/* + * A zero passed to ASN1_STRING_TABLE_new_add for the flags is interpreted + * as "don't change" and STABLE_FLAGS_MALLOC is always set. By setting + * STABLE_FLAGS_MALLOC only we can clear the existing value. Use the alias + * STABLE_FLAGS_CLEAR to reflect this. + */ +# define STABLE_FLAGS_CLEAR STABLE_FLAGS_MALLOC +# define STABLE_NO_MASK 0x02 +# define DIRSTRING_TYPE \ + (B_ASN1_PRINTABLESTRING|B_ASN1_T61STRING|B_ASN1_BMPSTRING|B_ASN1_UTF8STRING) +# define PKCS9STRING_TYPE (DIRSTRING_TYPE|B_ASN1_IA5STRING) + +struct asn1_string_table_st { + int nid; + long minsize; + long maxsize; + unsigned long mask; + unsigned long flags; +}; + +SKM_DEFINE_STACK_OF_INTERNAL(ASN1_STRING_TABLE, ASN1_STRING_TABLE, ASN1_STRING_TABLE) +#define sk_ASN1_STRING_TABLE_num(sk) OPENSSL_sk_num(ossl_check_const_ASN1_STRING_TABLE_sk_type(sk)) +#define sk_ASN1_STRING_TABLE_value(sk, idx) ((ASN1_STRING_TABLE *)OPENSSL_sk_value(ossl_check_const_ASN1_STRING_TABLE_sk_type(sk), (idx))) +#define sk_ASN1_STRING_TABLE_new(cmp) ((STACK_OF(ASN1_STRING_TABLE) *)OPENSSL_sk_new(ossl_check_ASN1_STRING_TABLE_compfunc_type(cmp))) +#define sk_ASN1_STRING_TABLE_new_null() ((STACK_OF(ASN1_STRING_TABLE) *)OPENSSL_sk_new_null()) +#define sk_ASN1_STRING_TABLE_new_reserve(cmp, n) ((STACK_OF(ASN1_STRING_TABLE) *)OPENSSL_sk_new_reserve(ossl_check_ASN1_STRING_TABLE_compfunc_type(cmp), (n))) +#define sk_ASN1_STRING_TABLE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASN1_STRING_TABLE_sk_type(sk), (n)) +#define sk_ASN1_STRING_TABLE_free(sk) OPENSSL_sk_free(ossl_check_ASN1_STRING_TABLE_sk_type(sk)) +#define sk_ASN1_STRING_TABLE_zero(sk) OPENSSL_sk_zero(ossl_check_ASN1_STRING_TABLE_sk_type(sk)) +#define sk_ASN1_STRING_TABLE_delete(sk, i) ((ASN1_STRING_TABLE *)OPENSSL_sk_delete(ossl_check_ASN1_STRING_TABLE_sk_type(sk), (i))) +#define sk_ASN1_STRING_TABLE_delete_ptr(sk, ptr) ((ASN1_STRING_TABLE *)OPENSSL_sk_delete_ptr(ossl_check_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_type(ptr))) +#define sk_ASN1_STRING_TABLE_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_type(ptr)) +#define sk_ASN1_STRING_TABLE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_type(ptr)) +#define sk_ASN1_STRING_TABLE_pop(sk) ((ASN1_STRING_TABLE *)OPENSSL_sk_pop(ossl_check_ASN1_STRING_TABLE_sk_type(sk))) +#define sk_ASN1_STRING_TABLE_shift(sk) ((ASN1_STRING_TABLE *)OPENSSL_sk_shift(ossl_check_ASN1_STRING_TABLE_sk_type(sk))) +#define sk_ASN1_STRING_TABLE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASN1_STRING_TABLE_sk_type(sk),ossl_check_ASN1_STRING_TABLE_freefunc_type(freefunc)) +#define sk_ASN1_STRING_TABLE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_type(ptr), (idx)) +#define sk_ASN1_STRING_TABLE_set(sk, idx, ptr) ((ASN1_STRING_TABLE *)OPENSSL_sk_set(ossl_check_ASN1_STRING_TABLE_sk_type(sk), (idx), ossl_check_ASN1_STRING_TABLE_type(ptr))) +#define sk_ASN1_STRING_TABLE_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_type(ptr)) +#define sk_ASN1_STRING_TABLE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_type(ptr)) +#define sk_ASN1_STRING_TABLE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_type(ptr), pnum) +#define sk_ASN1_STRING_TABLE_sort(sk) OPENSSL_sk_sort(ossl_check_ASN1_STRING_TABLE_sk_type(sk)) +#define sk_ASN1_STRING_TABLE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASN1_STRING_TABLE_sk_type(sk)) +#define sk_ASN1_STRING_TABLE_dup(sk) ((STACK_OF(ASN1_STRING_TABLE) *)OPENSSL_sk_dup(ossl_check_const_ASN1_STRING_TABLE_sk_type(sk))) +#define sk_ASN1_STRING_TABLE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASN1_STRING_TABLE) *)OPENSSL_sk_deep_copy(ossl_check_const_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_copyfunc_type(copyfunc), ossl_check_ASN1_STRING_TABLE_freefunc_type(freefunc))) +#define sk_ASN1_STRING_TABLE_set_cmp_func(sk, cmp) ((sk_ASN1_STRING_TABLE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASN1_STRING_TABLE_sk_type(sk), ossl_check_ASN1_STRING_TABLE_compfunc_type(cmp))) + + +/* size limits: this stuff is taken straight from RFC2459 */ + +# define ub_name 32768 +# define ub_common_name 64 +# define ub_locality_name 128 +# define ub_state_name 128 +# define ub_organization_name 64 +# define ub_organization_unit_name 64 +# define ub_title 64 +# define ub_email_address 128 + +/* + * Declarations for template structures: for full definitions see asn1t.h + */ +typedef struct ASN1_TEMPLATE_st ASN1_TEMPLATE; +typedef struct ASN1_TLC_st ASN1_TLC; +/* This is just an opaque pointer */ +typedef struct ASN1_VALUE_st ASN1_VALUE; + +/* Declare ASN1 functions: the implement macro is in asn1t.h */ + +/* + * The mysterious 'extern' that's passed to some macros is innocuous, + * and is there to quiet pre-C99 compilers that may complain about empty + * arguments in macro calls. + */ + +# define DECLARE_ASN1_FUNCTIONS_attr(attr, type) \ + DECLARE_ASN1_FUNCTIONS_name_attr(attr, type, type) +# define DECLARE_ASN1_FUNCTIONS(type) \ + DECLARE_ASN1_FUNCTIONS_attr(extern, type) + +# define DECLARE_ASN1_ALLOC_FUNCTIONS_attr(attr, type) \ + DECLARE_ASN1_ALLOC_FUNCTIONS_name_attr(attr, type, type) +# define DECLARE_ASN1_ALLOC_FUNCTIONS(type) \ + DECLARE_ASN1_ALLOC_FUNCTIONS_attr(extern, type) + +# define DECLARE_ASN1_FUNCTIONS_name_attr(attr, type, name) \ + DECLARE_ASN1_ALLOC_FUNCTIONS_name_attr(attr, type, name) \ + DECLARE_ASN1_ENCODE_FUNCTIONS_name_attr(attr, type, name) +# define DECLARE_ASN1_FUNCTIONS_name(type, name) \ + DECLARE_ASN1_FUNCTIONS_name_attr(extern, type, name) + +# define DECLARE_ASN1_ENCODE_FUNCTIONS_attr(attr, type, itname, name) \ + DECLARE_ASN1_ENCODE_FUNCTIONS_only_attr(attr, type, name) \ + DECLARE_ASN1_ITEM_attr(attr, itname) +# define DECLARE_ASN1_ENCODE_FUNCTIONS(type, itname, name) \ + DECLARE_ASN1_ENCODE_FUNCTIONS_attr(extern, type, itname, name) + +# define DECLARE_ASN1_ENCODE_FUNCTIONS_name_attr(attr, type, name) \ + DECLARE_ASN1_ENCODE_FUNCTIONS_attr(attr, type, name, name) +# define DECLARE_ASN1_ENCODE_FUNCTIONS_name(type, name) \ + DECLARE_ASN1_ENCODE_FUNCTIONS_name_attr(extern, type, name) + +# define DECLARE_ASN1_ENCODE_FUNCTIONS_only_attr(attr, type, name) \ + attr type *d2i_##name(type **a, const unsigned char **in, long len); \ + attr int i2d_##name(const type *a, unsigned char **out); +# define DECLARE_ASN1_ENCODE_FUNCTIONS_only(type, name) \ + DECLARE_ASN1_ENCODE_FUNCTIONS_only_attr(extern, type, name) + +# define DECLARE_ASN1_NDEF_FUNCTION_attr(attr, name) \ + attr int i2d_##name##_NDEF(const name *a, unsigned char **out); +# define DECLARE_ASN1_NDEF_FUNCTION(name) \ + DECLARE_ASN1_NDEF_FUNCTION_attr(extern, name) + +# define DECLARE_ASN1_ALLOC_FUNCTIONS_name_attr(attr, type, name) \ + attr type *name##_new(void); \ + attr void name##_free(type *a); +# define DECLARE_ASN1_ALLOC_FUNCTIONS_name(type, name) \ + DECLARE_ASN1_ALLOC_FUNCTIONS_name_attr(extern, type, name) + +# define DECLARE_ASN1_DUP_FUNCTION_attr(attr, type) \ + DECLARE_ASN1_DUP_FUNCTION_name_attr(attr, type, type) +# define DECLARE_ASN1_DUP_FUNCTION(type) \ + DECLARE_ASN1_DUP_FUNCTION_attr(extern, type) + +# define DECLARE_ASN1_DUP_FUNCTION_name_attr(attr, type, name) \ + attr type *name##_dup(const type *a); +# define DECLARE_ASN1_DUP_FUNCTION_name(type, name) \ + DECLARE_ASN1_DUP_FUNCTION_name_attr(extern, type, name) + +# define DECLARE_ASN1_PRINT_FUNCTION_attr(attr, stname) \ + DECLARE_ASN1_PRINT_FUNCTION_fname_attr(attr, stname, stname) +# define DECLARE_ASN1_PRINT_FUNCTION(stname) \ + DECLARE_ASN1_PRINT_FUNCTION_attr(extern, stname) + +# define DECLARE_ASN1_PRINT_FUNCTION_fname_attr(attr, stname, fname) \ + attr int fname##_print_ctx(BIO *out, const stname *x, int indent, \ + const ASN1_PCTX *pctx); +# define DECLARE_ASN1_PRINT_FUNCTION_fname(stname, fname) \ + DECLARE_ASN1_PRINT_FUNCTION_fname_attr(extern, stname, fname) + +# define D2I_OF(type) type *(*)(type **,const unsigned char **,long) +# define I2D_OF(type) int (*)(const type *,unsigned char **) + +# define CHECKED_D2I_OF(type, d2i) \ + ((d2i_of_void*) (1 ? d2i : ((D2I_OF(type))0))) +# define CHECKED_I2D_OF(type, i2d) \ + ((i2d_of_void*) (1 ? i2d : ((I2D_OF(type))0))) +# define CHECKED_NEW_OF(type, xnew) \ + ((void *(*)(void)) (1 ? xnew : ((type *(*)(void))0))) +# define CHECKED_PTR_OF(type, p) \ + ((void*) (1 ? p : (type*)0)) +# define CHECKED_PPTR_OF(type, p) \ + ((void**) (1 ? p : (type**)0)) + +# define TYPEDEF_D2I_OF(type) typedef type *d2i_of_##type(type **,const unsigned char **,long) +# define TYPEDEF_I2D_OF(type) typedef int i2d_of_##type(const type *,unsigned char **) +# define TYPEDEF_D2I2D_OF(type) TYPEDEF_D2I_OF(type); TYPEDEF_I2D_OF(type) + +typedef void *d2i_of_void(void **, const unsigned char **, long); +typedef int i2d_of_void(const void *, unsigned char **); +typedef int OSSL_i2d_of_void_ctx(const void *, unsigned char **, void *vctx); + +/*- + * The following macros and typedefs allow an ASN1_ITEM + * to be embedded in a structure and referenced. Since + * the ASN1_ITEM pointers need to be globally accessible + * (possibly from shared libraries) they may exist in + * different forms. On platforms that support it the + * ASN1_ITEM structure itself will be globally exported. + * Other platforms will export a function that returns + * an ASN1_ITEM pointer. + * + * To handle both cases transparently the macros below + * should be used instead of hard coding an ASN1_ITEM + * pointer in a structure. + * + * The structure will look like this: + * + * typedef struct SOMETHING_st { + * ... + * ASN1_ITEM_EXP *iptr; + * ... + * } SOMETHING; + * + * It would be initialised as e.g.: + * + * SOMETHING somevar = {...,ASN1_ITEM_ref(X509),...}; + * + * and the actual pointer extracted with: + * + * const ASN1_ITEM *it = ASN1_ITEM_ptr(somevar.iptr); + * + * Finally an ASN1_ITEM pointer can be extracted from an + * appropriate reference with: ASN1_ITEM_rptr(X509). This + * would be used when a function takes an ASN1_ITEM * argument. + * + */ + + +/* + * Platforms that can't easily handle shared global variables are declared as + * functions returning ASN1_ITEM pointers. + */ + +/* ASN1_ITEM pointer exported type */ +typedef const ASN1_ITEM *ASN1_ITEM_EXP (void); + +/* Macro to obtain ASN1_ITEM pointer from exported type */ +# define ASN1_ITEM_ptr(iptr) (iptr()) + +/* Macro to include ASN1_ITEM pointer from base type */ +# define ASN1_ITEM_ref(iptr) (iptr##_it) + +# define ASN1_ITEM_rptr(ref) (ref##_it()) + +# define DECLARE_ASN1_ITEM_attr(attr, name) \ + attr const ASN1_ITEM * name##_it(void); +# define DECLARE_ASN1_ITEM(name) \ + DECLARE_ASN1_ITEM_attr(extern, name) + +/* Parameters used by ASN1_STRING_print_ex() */ + +/* + * These determine which characters to escape: RFC2253 special characters, + * control characters and MSB set characters + */ + +# define ASN1_STRFLGS_ESC_2253 1 +# define ASN1_STRFLGS_ESC_CTRL 2 +# define ASN1_STRFLGS_ESC_MSB 4 + +/* Lower 8 bits are reserved as an output type specifier */ +# define ASN1_DTFLGS_TYPE_MASK 0x0FUL +# define ASN1_DTFLGS_RFC822 0x00UL +# define ASN1_DTFLGS_ISO8601 0x01UL + +/* + * This flag determines how we do escaping: normally RC2253 backslash only, + * set this to use backslash and quote. + */ + +# define ASN1_STRFLGS_ESC_QUOTE 8 + +/* These three flags are internal use only. */ + +/* Character is a valid PrintableString character */ +# define CHARTYPE_PRINTABLESTRING 0x10 +/* Character needs escaping if it is the first character */ +# define CHARTYPE_FIRST_ESC_2253 0x20 +/* Character needs escaping if it is the last character */ +# define CHARTYPE_LAST_ESC_2253 0x40 + +/* + * NB the internal flags are safely reused below by flags handled at the top + * level. + */ + +/* + * If this is set we convert all character strings to UTF8 first + */ + +# define ASN1_STRFLGS_UTF8_CONVERT 0x10 + +/* + * If this is set we don't attempt to interpret content: just assume all + * strings are 1 byte per character. This will produce some pretty odd + * looking output! + */ + +# define ASN1_STRFLGS_IGNORE_TYPE 0x20 + +/* If this is set we include the string type in the output */ +# define ASN1_STRFLGS_SHOW_TYPE 0x40 + +/* + * This determines which strings to display and which to 'dump' (hex dump of + * content octets or DER encoding). We can only dump non character strings or + * everything. If we don't dump 'unknown' they are interpreted as character + * strings with 1 octet per character and are subject to the usual escaping + * options. + */ + +# define ASN1_STRFLGS_DUMP_ALL 0x80 +# define ASN1_STRFLGS_DUMP_UNKNOWN 0x100 + +/* + * These determine what 'dumping' does, we can dump the content octets or the + * DER encoding: both use the RFC2253 #XXXXX notation. + */ + +# define ASN1_STRFLGS_DUMP_DER 0x200 + +/* + * This flag specifies that RC2254 escaping shall be performed. + */ +#define ASN1_STRFLGS_ESC_2254 0x400 + +/* + * All the string flags consistent with RFC2253, escaping control characters + * isn't essential in RFC2253 but it is advisable anyway. + */ + +# define ASN1_STRFLGS_RFC2253 (ASN1_STRFLGS_ESC_2253 | \ + ASN1_STRFLGS_ESC_CTRL | \ + ASN1_STRFLGS_ESC_MSB | \ + ASN1_STRFLGS_UTF8_CONVERT | \ + ASN1_STRFLGS_DUMP_UNKNOWN | \ + ASN1_STRFLGS_DUMP_DER) + + +struct asn1_type_st { + int type; + union { + char *ptr; + ASN1_BOOLEAN boolean; + ASN1_STRING *asn1_string; + ASN1_OBJECT *object; + ASN1_INTEGER *integer; + ASN1_ENUMERATED *enumerated; + ASN1_BIT_STRING *bit_string; + ASN1_OCTET_STRING *octet_string; + ASN1_PRINTABLESTRING *printablestring; + ASN1_T61STRING *t61string; + ASN1_IA5STRING *ia5string; + ASN1_GENERALSTRING *generalstring; + ASN1_BMPSTRING *bmpstring; + ASN1_UNIVERSALSTRING *universalstring; + ASN1_UTCTIME *utctime; + ASN1_GENERALIZEDTIME *generalizedtime; + ASN1_VISIBLESTRING *visiblestring; + ASN1_UTF8STRING *utf8string; + /* + * set and sequence are left complete and still contain the set or + * sequence bytes + */ + ASN1_STRING *set; + ASN1_STRING *sequence; + ASN1_VALUE *asn1_value; + } value; +}; + +SKM_DEFINE_STACK_OF_INTERNAL(ASN1_TYPE, ASN1_TYPE, ASN1_TYPE) +#define sk_ASN1_TYPE_num(sk) OPENSSL_sk_num(ossl_check_const_ASN1_TYPE_sk_type(sk)) +#define sk_ASN1_TYPE_value(sk, idx) ((ASN1_TYPE *)OPENSSL_sk_value(ossl_check_const_ASN1_TYPE_sk_type(sk), (idx))) +#define sk_ASN1_TYPE_new(cmp) ((STACK_OF(ASN1_TYPE) *)OPENSSL_sk_new(ossl_check_ASN1_TYPE_compfunc_type(cmp))) +#define sk_ASN1_TYPE_new_null() ((STACK_OF(ASN1_TYPE) *)OPENSSL_sk_new_null()) +#define sk_ASN1_TYPE_new_reserve(cmp, n) ((STACK_OF(ASN1_TYPE) *)OPENSSL_sk_new_reserve(ossl_check_ASN1_TYPE_compfunc_type(cmp), (n))) +#define sk_ASN1_TYPE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASN1_TYPE_sk_type(sk), (n)) +#define sk_ASN1_TYPE_free(sk) OPENSSL_sk_free(ossl_check_ASN1_TYPE_sk_type(sk)) +#define sk_ASN1_TYPE_zero(sk) OPENSSL_sk_zero(ossl_check_ASN1_TYPE_sk_type(sk)) +#define sk_ASN1_TYPE_delete(sk, i) ((ASN1_TYPE *)OPENSSL_sk_delete(ossl_check_ASN1_TYPE_sk_type(sk), (i))) +#define sk_ASN1_TYPE_delete_ptr(sk, ptr) ((ASN1_TYPE *)OPENSSL_sk_delete_ptr(ossl_check_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_type(ptr))) +#define sk_ASN1_TYPE_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_type(ptr)) +#define sk_ASN1_TYPE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_type(ptr)) +#define sk_ASN1_TYPE_pop(sk) ((ASN1_TYPE *)OPENSSL_sk_pop(ossl_check_ASN1_TYPE_sk_type(sk))) +#define sk_ASN1_TYPE_shift(sk) ((ASN1_TYPE *)OPENSSL_sk_shift(ossl_check_ASN1_TYPE_sk_type(sk))) +#define sk_ASN1_TYPE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASN1_TYPE_sk_type(sk),ossl_check_ASN1_TYPE_freefunc_type(freefunc)) +#define sk_ASN1_TYPE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_type(ptr), (idx)) +#define sk_ASN1_TYPE_set(sk, idx, ptr) ((ASN1_TYPE *)OPENSSL_sk_set(ossl_check_ASN1_TYPE_sk_type(sk), (idx), ossl_check_ASN1_TYPE_type(ptr))) +#define sk_ASN1_TYPE_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_type(ptr)) +#define sk_ASN1_TYPE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_type(ptr)) +#define sk_ASN1_TYPE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_type(ptr), pnum) +#define sk_ASN1_TYPE_sort(sk) OPENSSL_sk_sort(ossl_check_ASN1_TYPE_sk_type(sk)) +#define sk_ASN1_TYPE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASN1_TYPE_sk_type(sk)) +#define sk_ASN1_TYPE_dup(sk) ((STACK_OF(ASN1_TYPE) *)OPENSSL_sk_dup(ossl_check_const_ASN1_TYPE_sk_type(sk))) +#define sk_ASN1_TYPE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASN1_TYPE) *)OPENSSL_sk_deep_copy(ossl_check_const_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_copyfunc_type(copyfunc), ossl_check_ASN1_TYPE_freefunc_type(freefunc))) +#define sk_ASN1_TYPE_set_cmp_func(sk, cmp) ((sk_ASN1_TYPE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASN1_TYPE_sk_type(sk), ossl_check_ASN1_TYPE_compfunc_type(cmp))) + + +typedef STACK_OF(ASN1_TYPE) ASN1_SEQUENCE_ANY; + +DECLARE_ASN1_ENCODE_FUNCTIONS_name(ASN1_SEQUENCE_ANY, ASN1_SEQUENCE_ANY) +DECLARE_ASN1_ENCODE_FUNCTIONS_name(ASN1_SEQUENCE_ANY, ASN1_SET_ANY) + +/* This is used to contain a list of bit names */ +typedef struct BIT_STRING_BITNAME_st { + int bitnum; + const char *lname; + const char *sname; +} BIT_STRING_BITNAME; + +# define B_ASN1_TIME \ + B_ASN1_UTCTIME | \ + B_ASN1_GENERALIZEDTIME + +# define B_ASN1_PRINTABLE \ + B_ASN1_NUMERICSTRING| \ + B_ASN1_PRINTABLESTRING| \ + B_ASN1_T61STRING| \ + B_ASN1_IA5STRING| \ + B_ASN1_BIT_STRING| \ + B_ASN1_UNIVERSALSTRING|\ + B_ASN1_BMPSTRING|\ + B_ASN1_UTF8STRING|\ + B_ASN1_SEQUENCE|\ + B_ASN1_UNKNOWN + +# define B_ASN1_DIRECTORYSTRING \ + B_ASN1_PRINTABLESTRING| \ + B_ASN1_TELETEXSTRING|\ + B_ASN1_BMPSTRING|\ + B_ASN1_UNIVERSALSTRING|\ + B_ASN1_UTF8STRING + +# define B_ASN1_DISPLAYTEXT \ + B_ASN1_IA5STRING| \ + B_ASN1_VISIBLESTRING| \ + B_ASN1_BMPSTRING|\ + B_ASN1_UTF8STRING + +DECLARE_ASN1_ALLOC_FUNCTIONS_name(ASN1_TYPE, ASN1_TYPE) +DECLARE_ASN1_ENCODE_FUNCTIONS(ASN1_TYPE, ASN1_ANY, ASN1_TYPE) + +int ASN1_TYPE_get(const ASN1_TYPE *a); +void ASN1_TYPE_set(ASN1_TYPE *a, int type, void *value); +int ASN1_TYPE_set1(ASN1_TYPE *a, int type, const void *value); +int ASN1_TYPE_cmp(const ASN1_TYPE *a, const ASN1_TYPE *b); + +ASN1_TYPE *ASN1_TYPE_pack_sequence(const ASN1_ITEM *it, void *s, ASN1_TYPE **t); +void *ASN1_TYPE_unpack_sequence(const ASN1_ITEM *it, const ASN1_TYPE *t); + +SKM_DEFINE_STACK_OF_INTERNAL(ASN1_OBJECT, ASN1_OBJECT, ASN1_OBJECT) +#define sk_ASN1_OBJECT_num(sk) OPENSSL_sk_num(ossl_check_const_ASN1_OBJECT_sk_type(sk)) +#define sk_ASN1_OBJECT_value(sk, idx) ((ASN1_OBJECT *)OPENSSL_sk_value(ossl_check_const_ASN1_OBJECT_sk_type(sk), (idx))) +#define sk_ASN1_OBJECT_new(cmp) ((STACK_OF(ASN1_OBJECT) *)OPENSSL_sk_new(ossl_check_ASN1_OBJECT_compfunc_type(cmp))) +#define sk_ASN1_OBJECT_new_null() ((STACK_OF(ASN1_OBJECT) *)OPENSSL_sk_new_null()) +#define sk_ASN1_OBJECT_new_reserve(cmp, n) ((STACK_OF(ASN1_OBJECT) *)OPENSSL_sk_new_reserve(ossl_check_ASN1_OBJECT_compfunc_type(cmp), (n))) +#define sk_ASN1_OBJECT_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASN1_OBJECT_sk_type(sk), (n)) +#define sk_ASN1_OBJECT_free(sk) OPENSSL_sk_free(ossl_check_ASN1_OBJECT_sk_type(sk)) +#define sk_ASN1_OBJECT_zero(sk) OPENSSL_sk_zero(ossl_check_ASN1_OBJECT_sk_type(sk)) +#define sk_ASN1_OBJECT_delete(sk, i) ((ASN1_OBJECT *)OPENSSL_sk_delete(ossl_check_ASN1_OBJECT_sk_type(sk), (i))) +#define sk_ASN1_OBJECT_delete_ptr(sk, ptr) ((ASN1_OBJECT *)OPENSSL_sk_delete_ptr(ossl_check_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_type(ptr))) +#define sk_ASN1_OBJECT_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_type(ptr)) +#define sk_ASN1_OBJECT_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_type(ptr)) +#define sk_ASN1_OBJECT_pop(sk) ((ASN1_OBJECT *)OPENSSL_sk_pop(ossl_check_ASN1_OBJECT_sk_type(sk))) +#define sk_ASN1_OBJECT_shift(sk) ((ASN1_OBJECT *)OPENSSL_sk_shift(ossl_check_ASN1_OBJECT_sk_type(sk))) +#define sk_ASN1_OBJECT_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASN1_OBJECT_sk_type(sk),ossl_check_ASN1_OBJECT_freefunc_type(freefunc)) +#define sk_ASN1_OBJECT_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_type(ptr), (idx)) +#define sk_ASN1_OBJECT_set(sk, idx, ptr) ((ASN1_OBJECT *)OPENSSL_sk_set(ossl_check_ASN1_OBJECT_sk_type(sk), (idx), ossl_check_ASN1_OBJECT_type(ptr))) +#define sk_ASN1_OBJECT_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_type(ptr)) +#define sk_ASN1_OBJECT_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_type(ptr)) +#define sk_ASN1_OBJECT_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_type(ptr), pnum) +#define sk_ASN1_OBJECT_sort(sk) OPENSSL_sk_sort(ossl_check_ASN1_OBJECT_sk_type(sk)) +#define sk_ASN1_OBJECT_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASN1_OBJECT_sk_type(sk)) +#define sk_ASN1_OBJECT_dup(sk) ((STACK_OF(ASN1_OBJECT) *)OPENSSL_sk_dup(ossl_check_const_ASN1_OBJECT_sk_type(sk))) +#define sk_ASN1_OBJECT_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASN1_OBJECT) *)OPENSSL_sk_deep_copy(ossl_check_const_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_copyfunc_type(copyfunc), ossl_check_ASN1_OBJECT_freefunc_type(freefunc))) +#define sk_ASN1_OBJECT_set_cmp_func(sk, cmp) ((sk_ASN1_OBJECT_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASN1_OBJECT_sk_type(sk), ossl_check_ASN1_OBJECT_compfunc_type(cmp))) + + +DECLARE_ASN1_FUNCTIONS(ASN1_OBJECT) + +ASN1_STRING *ASN1_STRING_new(void); +void ASN1_STRING_free(ASN1_STRING *a); +void ASN1_STRING_clear_free(ASN1_STRING *a); +int ASN1_STRING_copy(ASN1_STRING *dst, const ASN1_STRING *str); +DECLARE_ASN1_DUP_FUNCTION(ASN1_STRING) +ASN1_STRING *ASN1_STRING_type_new(int type); +int ASN1_STRING_cmp(const ASN1_STRING *a, const ASN1_STRING *b); + /* + * Since this is used to store all sorts of things, via macros, for now, + * make its data void * + */ +int ASN1_STRING_set(ASN1_STRING *str, const void *data, int len); +void ASN1_STRING_set0(ASN1_STRING *str, void *data, int len); +int ASN1_STRING_length(const ASN1_STRING *x); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 void ASN1_STRING_length_set(ASN1_STRING *x, int n); +# endif +int ASN1_STRING_type(const ASN1_STRING *x); +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 unsigned char *ASN1_STRING_data(ASN1_STRING *x); +# endif +const unsigned char *ASN1_STRING_get0_data(const ASN1_STRING *x); + +DECLARE_ASN1_FUNCTIONS(ASN1_BIT_STRING) +int ASN1_BIT_STRING_set(ASN1_BIT_STRING *a, unsigned char *d, int length); +int ASN1_BIT_STRING_set_bit(ASN1_BIT_STRING *a, int n, int value); +int ASN1_BIT_STRING_get_bit(const ASN1_BIT_STRING *a, int n); +int ASN1_BIT_STRING_check(const ASN1_BIT_STRING *a, + const unsigned char *flags, int flags_len); + +int ASN1_BIT_STRING_name_print(BIO *out, ASN1_BIT_STRING *bs, + BIT_STRING_BITNAME *tbl, int indent); +int ASN1_BIT_STRING_num_asc(const char *name, BIT_STRING_BITNAME *tbl); +int ASN1_BIT_STRING_set_asc(ASN1_BIT_STRING *bs, const char *name, int value, + BIT_STRING_BITNAME *tbl); + +SKM_DEFINE_STACK_OF_INTERNAL(ASN1_INTEGER, ASN1_INTEGER, ASN1_INTEGER) +#define sk_ASN1_INTEGER_num(sk) OPENSSL_sk_num(ossl_check_const_ASN1_INTEGER_sk_type(sk)) +#define sk_ASN1_INTEGER_value(sk, idx) ((ASN1_INTEGER *)OPENSSL_sk_value(ossl_check_const_ASN1_INTEGER_sk_type(sk), (idx))) +#define sk_ASN1_INTEGER_new(cmp) ((STACK_OF(ASN1_INTEGER) *)OPENSSL_sk_new(ossl_check_ASN1_INTEGER_compfunc_type(cmp))) +#define sk_ASN1_INTEGER_new_null() ((STACK_OF(ASN1_INTEGER) *)OPENSSL_sk_new_null()) +#define sk_ASN1_INTEGER_new_reserve(cmp, n) ((STACK_OF(ASN1_INTEGER) *)OPENSSL_sk_new_reserve(ossl_check_ASN1_INTEGER_compfunc_type(cmp), (n))) +#define sk_ASN1_INTEGER_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASN1_INTEGER_sk_type(sk), (n)) +#define sk_ASN1_INTEGER_free(sk) OPENSSL_sk_free(ossl_check_ASN1_INTEGER_sk_type(sk)) +#define sk_ASN1_INTEGER_zero(sk) OPENSSL_sk_zero(ossl_check_ASN1_INTEGER_sk_type(sk)) +#define sk_ASN1_INTEGER_delete(sk, i) ((ASN1_INTEGER *)OPENSSL_sk_delete(ossl_check_ASN1_INTEGER_sk_type(sk), (i))) +#define sk_ASN1_INTEGER_delete_ptr(sk, ptr) ((ASN1_INTEGER *)OPENSSL_sk_delete_ptr(ossl_check_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_type(ptr))) +#define sk_ASN1_INTEGER_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_type(ptr)) +#define sk_ASN1_INTEGER_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_type(ptr)) +#define sk_ASN1_INTEGER_pop(sk) ((ASN1_INTEGER *)OPENSSL_sk_pop(ossl_check_ASN1_INTEGER_sk_type(sk))) +#define sk_ASN1_INTEGER_shift(sk) ((ASN1_INTEGER *)OPENSSL_sk_shift(ossl_check_ASN1_INTEGER_sk_type(sk))) +#define sk_ASN1_INTEGER_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASN1_INTEGER_sk_type(sk),ossl_check_ASN1_INTEGER_freefunc_type(freefunc)) +#define sk_ASN1_INTEGER_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_type(ptr), (idx)) +#define sk_ASN1_INTEGER_set(sk, idx, ptr) ((ASN1_INTEGER *)OPENSSL_sk_set(ossl_check_ASN1_INTEGER_sk_type(sk), (idx), ossl_check_ASN1_INTEGER_type(ptr))) +#define sk_ASN1_INTEGER_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_type(ptr)) +#define sk_ASN1_INTEGER_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_type(ptr)) +#define sk_ASN1_INTEGER_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_type(ptr), pnum) +#define sk_ASN1_INTEGER_sort(sk) OPENSSL_sk_sort(ossl_check_ASN1_INTEGER_sk_type(sk)) +#define sk_ASN1_INTEGER_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASN1_INTEGER_sk_type(sk)) +#define sk_ASN1_INTEGER_dup(sk) ((STACK_OF(ASN1_INTEGER) *)OPENSSL_sk_dup(ossl_check_const_ASN1_INTEGER_sk_type(sk))) +#define sk_ASN1_INTEGER_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASN1_INTEGER) *)OPENSSL_sk_deep_copy(ossl_check_const_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_copyfunc_type(copyfunc), ossl_check_ASN1_INTEGER_freefunc_type(freefunc))) +#define sk_ASN1_INTEGER_set_cmp_func(sk, cmp) ((sk_ASN1_INTEGER_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASN1_INTEGER_sk_type(sk), ossl_check_ASN1_INTEGER_compfunc_type(cmp))) + + + +DECLARE_ASN1_FUNCTIONS(ASN1_INTEGER) +ASN1_INTEGER *d2i_ASN1_UINTEGER(ASN1_INTEGER **a, const unsigned char **pp, + long length); +DECLARE_ASN1_DUP_FUNCTION(ASN1_INTEGER) +int ASN1_INTEGER_cmp(const ASN1_INTEGER *x, const ASN1_INTEGER *y); + +DECLARE_ASN1_FUNCTIONS(ASN1_ENUMERATED) + +int ASN1_UTCTIME_check(const ASN1_UTCTIME *a); +ASN1_UTCTIME *ASN1_UTCTIME_set(ASN1_UTCTIME *s, time_t t); +ASN1_UTCTIME *ASN1_UTCTIME_adj(ASN1_UTCTIME *s, time_t t, + int offset_day, long offset_sec); +int ASN1_UTCTIME_set_string(ASN1_UTCTIME *s, const char *str); +int ASN1_UTCTIME_cmp_time_t(const ASN1_UTCTIME *s, time_t t); + +int ASN1_GENERALIZEDTIME_check(const ASN1_GENERALIZEDTIME *a); +ASN1_GENERALIZEDTIME *ASN1_GENERALIZEDTIME_set(ASN1_GENERALIZEDTIME *s, + time_t t); +ASN1_GENERALIZEDTIME *ASN1_GENERALIZEDTIME_adj(ASN1_GENERALIZEDTIME *s, + time_t t, int offset_day, + long offset_sec); +int ASN1_GENERALIZEDTIME_set_string(ASN1_GENERALIZEDTIME *s, const char *str); + +int ASN1_TIME_diff(int *pday, int *psec, + const ASN1_TIME *from, const ASN1_TIME *to); + +DECLARE_ASN1_FUNCTIONS(ASN1_OCTET_STRING) +DECLARE_ASN1_DUP_FUNCTION(ASN1_OCTET_STRING) +int ASN1_OCTET_STRING_cmp(const ASN1_OCTET_STRING *a, + const ASN1_OCTET_STRING *b); +int ASN1_OCTET_STRING_set(ASN1_OCTET_STRING *str, const unsigned char *data, + int len); + +SKM_DEFINE_STACK_OF_INTERNAL(ASN1_UTF8STRING, ASN1_UTF8STRING, ASN1_UTF8STRING) +#define sk_ASN1_UTF8STRING_num(sk) OPENSSL_sk_num(ossl_check_const_ASN1_UTF8STRING_sk_type(sk)) +#define sk_ASN1_UTF8STRING_value(sk, idx) ((ASN1_UTF8STRING *)OPENSSL_sk_value(ossl_check_const_ASN1_UTF8STRING_sk_type(sk), (idx))) +#define sk_ASN1_UTF8STRING_new(cmp) ((STACK_OF(ASN1_UTF8STRING) *)OPENSSL_sk_new(ossl_check_ASN1_UTF8STRING_compfunc_type(cmp))) +#define sk_ASN1_UTF8STRING_new_null() ((STACK_OF(ASN1_UTF8STRING) *)OPENSSL_sk_new_null()) +#define sk_ASN1_UTF8STRING_new_reserve(cmp, n) ((STACK_OF(ASN1_UTF8STRING) *)OPENSSL_sk_new_reserve(ossl_check_ASN1_UTF8STRING_compfunc_type(cmp), (n))) +#define sk_ASN1_UTF8STRING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASN1_UTF8STRING_sk_type(sk), (n)) +#define sk_ASN1_UTF8STRING_free(sk) OPENSSL_sk_free(ossl_check_ASN1_UTF8STRING_sk_type(sk)) +#define sk_ASN1_UTF8STRING_zero(sk) OPENSSL_sk_zero(ossl_check_ASN1_UTF8STRING_sk_type(sk)) +#define sk_ASN1_UTF8STRING_delete(sk, i) ((ASN1_UTF8STRING *)OPENSSL_sk_delete(ossl_check_ASN1_UTF8STRING_sk_type(sk), (i))) +#define sk_ASN1_UTF8STRING_delete_ptr(sk, ptr) ((ASN1_UTF8STRING *)OPENSSL_sk_delete_ptr(ossl_check_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_type(ptr))) +#define sk_ASN1_UTF8STRING_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_type(ptr)) +#define sk_ASN1_UTF8STRING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_type(ptr)) +#define sk_ASN1_UTF8STRING_pop(sk) ((ASN1_UTF8STRING *)OPENSSL_sk_pop(ossl_check_ASN1_UTF8STRING_sk_type(sk))) +#define sk_ASN1_UTF8STRING_shift(sk) ((ASN1_UTF8STRING *)OPENSSL_sk_shift(ossl_check_ASN1_UTF8STRING_sk_type(sk))) +#define sk_ASN1_UTF8STRING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASN1_UTF8STRING_sk_type(sk),ossl_check_ASN1_UTF8STRING_freefunc_type(freefunc)) +#define sk_ASN1_UTF8STRING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_type(ptr), (idx)) +#define sk_ASN1_UTF8STRING_set(sk, idx, ptr) ((ASN1_UTF8STRING *)OPENSSL_sk_set(ossl_check_ASN1_UTF8STRING_sk_type(sk), (idx), ossl_check_ASN1_UTF8STRING_type(ptr))) +#define sk_ASN1_UTF8STRING_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_type(ptr)) +#define sk_ASN1_UTF8STRING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_type(ptr)) +#define sk_ASN1_UTF8STRING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_type(ptr), pnum) +#define sk_ASN1_UTF8STRING_sort(sk) OPENSSL_sk_sort(ossl_check_ASN1_UTF8STRING_sk_type(sk)) +#define sk_ASN1_UTF8STRING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASN1_UTF8STRING_sk_type(sk)) +#define sk_ASN1_UTF8STRING_dup(sk) ((STACK_OF(ASN1_UTF8STRING) *)OPENSSL_sk_dup(ossl_check_const_ASN1_UTF8STRING_sk_type(sk))) +#define sk_ASN1_UTF8STRING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASN1_UTF8STRING) *)OPENSSL_sk_deep_copy(ossl_check_const_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_copyfunc_type(copyfunc), ossl_check_ASN1_UTF8STRING_freefunc_type(freefunc))) +#define sk_ASN1_UTF8STRING_set_cmp_func(sk, cmp) ((sk_ASN1_UTF8STRING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASN1_UTF8STRING_sk_type(sk), ossl_check_ASN1_UTF8STRING_compfunc_type(cmp))) + + +DECLARE_ASN1_FUNCTIONS(ASN1_VISIBLESTRING) +DECLARE_ASN1_FUNCTIONS(ASN1_UNIVERSALSTRING) +DECLARE_ASN1_FUNCTIONS(ASN1_UTF8STRING) +DECLARE_ASN1_FUNCTIONS(ASN1_NULL) +DECLARE_ASN1_FUNCTIONS(ASN1_BMPSTRING) + +int UTF8_getc(const unsigned char *str, int len, unsigned long *val); +int UTF8_putc(unsigned char *str, int len, unsigned long value); + +SKM_DEFINE_STACK_OF_INTERNAL(ASN1_GENERALSTRING, ASN1_GENERALSTRING, ASN1_GENERALSTRING) +#define sk_ASN1_GENERALSTRING_num(sk) OPENSSL_sk_num(ossl_check_const_ASN1_GENERALSTRING_sk_type(sk)) +#define sk_ASN1_GENERALSTRING_value(sk, idx) ((ASN1_GENERALSTRING *)OPENSSL_sk_value(ossl_check_const_ASN1_GENERALSTRING_sk_type(sk), (idx))) +#define sk_ASN1_GENERALSTRING_new(cmp) ((STACK_OF(ASN1_GENERALSTRING) *)OPENSSL_sk_new(ossl_check_ASN1_GENERALSTRING_compfunc_type(cmp))) +#define sk_ASN1_GENERALSTRING_new_null() ((STACK_OF(ASN1_GENERALSTRING) *)OPENSSL_sk_new_null()) +#define sk_ASN1_GENERALSTRING_new_reserve(cmp, n) ((STACK_OF(ASN1_GENERALSTRING) *)OPENSSL_sk_new_reserve(ossl_check_ASN1_GENERALSTRING_compfunc_type(cmp), (n))) +#define sk_ASN1_GENERALSTRING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASN1_GENERALSTRING_sk_type(sk), (n)) +#define sk_ASN1_GENERALSTRING_free(sk) OPENSSL_sk_free(ossl_check_ASN1_GENERALSTRING_sk_type(sk)) +#define sk_ASN1_GENERALSTRING_zero(sk) OPENSSL_sk_zero(ossl_check_ASN1_GENERALSTRING_sk_type(sk)) +#define sk_ASN1_GENERALSTRING_delete(sk, i) ((ASN1_GENERALSTRING *)OPENSSL_sk_delete(ossl_check_ASN1_GENERALSTRING_sk_type(sk), (i))) +#define sk_ASN1_GENERALSTRING_delete_ptr(sk, ptr) ((ASN1_GENERALSTRING *)OPENSSL_sk_delete_ptr(ossl_check_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_type(ptr))) +#define sk_ASN1_GENERALSTRING_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_type(ptr)) +#define sk_ASN1_GENERALSTRING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_type(ptr)) +#define sk_ASN1_GENERALSTRING_pop(sk) ((ASN1_GENERALSTRING *)OPENSSL_sk_pop(ossl_check_ASN1_GENERALSTRING_sk_type(sk))) +#define sk_ASN1_GENERALSTRING_shift(sk) ((ASN1_GENERALSTRING *)OPENSSL_sk_shift(ossl_check_ASN1_GENERALSTRING_sk_type(sk))) +#define sk_ASN1_GENERALSTRING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASN1_GENERALSTRING_sk_type(sk),ossl_check_ASN1_GENERALSTRING_freefunc_type(freefunc)) +#define sk_ASN1_GENERALSTRING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_type(ptr), (idx)) +#define sk_ASN1_GENERALSTRING_set(sk, idx, ptr) ((ASN1_GENERALSTRING *)OPENSSL_sk_set(ossl_check_ASN1_GENERALSTRING_sk_type(sk), (idx), ossl_check_ASN1_GENERALSTRING_type(ptr))) +#define sk_ASN1_GENERALSTRING_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_type(ptr)) +#define sk_ASN1_GENERALSTRING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_type(ptr)) +#define sk_ASN1_GENERALSTRING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_type(ptr), pnum) +#define sk_ASN1_GENERALSTRING_sort(sk) OPENSSL_sk_sort(ossl_check_ASN1_GENERALSTRING_sk_type(sk)) +#define sk_ASN1_GENERALSTRING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASN1_GENERALSTRING_sk_type(sk)) +#define sk_ASN1_GENERALSTRING_dup(sk) ((STACK_OF(ASN1_GENERALSTRING) *)OPENSSL_sk_dup(ossl_check_const_ASN1_GENERALSTRING_sk_type(sk))) +#define sk_ASN1_GENERALSTRING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASN1_GENERALSTRING) *)OPENSSL_sk_deep_copy(ossl_check_const_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_copyfunc_type(copyfunc), ossl_check_ASN1_GENERALSTRING_freefunc_type(freefunc))) +#define sk_ASN1_GENERALSTRING_set_cmp_func(sk, cmp) ((sk_ASN1_GENERALSTRING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASN1_GENERALSTRING_sk_type(sk), ossl_check_ASN1_GENERALSTRING_compfunc_type(cmp))) + + +DECLARE_ASN1_FUNCTIONS_name(ASN1_STRING, ASN1_PRINTABLE) + +DECLARE_ASN1_FUNCTIONS_name(ASN1_STRING, DIRECTORYSTRING) +DECLARE_ASN1_FUNCTIONS_name(ASN1_STRING, DISPLAYTEXT) +DECLARE_ASN1_FUNCTIONS(ASN1_PRINTABLESTRING) +DECLARE_ASN1_FUNCTIONS(ASN1_T61STRING) +DECLARE_ASN1_FUNCTIONS(ASN1_IA5STRING) +DECLARE_ASN1_FUNCTIONS(ASN1_GENERALSTRING) +DECLARE_ASN1_FUNCTIONS(ASN1_UTCTIME) +DECLARE_ASN1_FUNCTIONS(ASN1_GENERALIZEDTIME) +DECLARE_ASN1_FUNCTIONS(ASN1_TIME) + +DECLARE_ASN1_DUP_FUNCTION(ASN1_TIME) +DECLARE_ASN1_DUP_FUNCTION(ASN1_UTCTIME) +DECLARE_ASN1_DUP_FUNCTION(ASN1_GENERALIZEDTIME) + +DECLARE_ASN1_ITEM(ASN1_OCTET_STRING_NDEF) + +ASN1_TIME *ASN1_TIME_set(ASN1_TIME *s, time_t t); +ASN1_TIME *ASN1_TIME_adj(ASN1_TIME *s, time_t t, + int offset_day, long offset_sec); +int ASN1_TIME_check(const ASN1_TIME *t); +ASN1_GENERALIZEDTIME *ASN1_TIME_to_generalizedtime(const ASN1_TIME *t, + ASN1_GENERALIZEDTIME **out); +int ASN1_TIME_set_string(ASN1_TIME *s, const char *str); +int ASN1_TIME_set_string_X509(ASN1_TIME *s, const char *str); +int ASN1_TIME_to_tm(const ASN1_TIME *s, struct tm *tm); +int ASN1_TIME_normalize(ASN1_TIME *s); +int ASN1_TIME_cmp_time_t(const ASN1_TIME *s, time_t t); +int ASN1_TIME_compare(const ASN1_TIME *a, const ASN1_TIME *b); + +int i2a_ASN1_INTEGER(BIO *bp, const ASN1_INTEGER *a); +int a2i_ASN1_INTEGER(BIO *bp, ASN1_INTEGER *bs, char *buf, int size); +int i2a_ASN1_ENUMERATED(BIO *bp, const ASN1_ENUMERATED *a); +int a2i_ASN1_ENUMERATED(BIO *bp, ASN1_ENUMERATED *bs, char *buf, int size); +int i2a_ASN1_OBJECT(BIO *bp, const ASN1_OBJECT *a); +int a2i_ASN1_STRING(BIO *bp, ASN1_STRING *bs, char *buf, int size); +int i2a_ASN1_STRING(BIO *bp, const ASN1_STRING *a, int type); +int i2t_ASN1_OBJECT(char *buf, int buf_len, const ASN1_OBJECT *a); + +int a2d_ASN1_OBJECT(unsigned char *out, int olen, const char *buf, int num); +ASN1_OBJECT *ASN1_OBJECT_create(int nid, unsigned char *data, int len, + const char *sn, const char *ln); + +int ASN1_INTEGER_get_int64(int64_t *pr, const ASN1_INTEGER *a); +int ASN1_INTEGER_set_int64(ASN1_INTEGER *a, int64_t r); +int ASN1_INTEGER_get_uint64(uint64_t *pr, const ASN1_INTEGER *a); +int ASN1_INTEGER_set_uint64(ASN1_INTEGER *a, uint64_t r); + +int ASN1_INTEGER_set(ASN1_INTEGER *a, long v); +long ASN1_INTEGER_get(const ASN1_INTEGER *a); +ASN1_INTEGER *BN_to_ASN1_INTEGER(const BIGNUM *bn, ASN1_INTEGER *ai); +BIGNUM *ASN1_INTEGER_to_BN(const ASN1_INTEGER *ai, BIGNUM *bn); + +int ASN1_ENUMERATED_get_int64(int64_t *pr, const ASN1_ENUMERATED *a); +int ASN1_ENUMERATED_set_int64(ASN1_ENUMERATED *a, int64_t r); + + +int ASN1_ENUMERATED_set(ASN1_ENUMERATED *a, long v); +long ASN1_ENUMERATED_get(const ASN1_ENUMERATED *a); +ASN1_ENUMERATED *BN_to_ASN1_ENUMERATED(const BIGNUM *bn, ASN1_ENUMERATED *ai); +BIGNUM *ASN1_ENUMERATED_to_BN(const ASN1_ENUMERATED *ai, BIGNUM *bn); + +/* General */ +/* given a string, return the correct type, max is the maximum length */ +int ASN1_PRINTABLE_type(const unsigned char *s, int max); + +unsigned long ASN1_tag2bit(int tag); + +/* SPECIALS */ +int ASN1_get_object(const unsigned char **pp, long *plength, int *ptag, + int *pclass, long omax); +int ASN1_check_infinite_end(unsigned char **p, long len); +int ASN1_const_check_infinite_end(const unsigned char **p, long len); +void ASN1_put_object(unsigned char **pp, int constructed, int length, + int tag, int xclass); +int ASN1_put_eoc(unsigned char **pp); +int ASN1_object_size(int constructed, int length, int tag); + +/* Used to implement other functions */ +void *ASN1_dup(i2d_of_void *i2d, d2i_of_void *d2i, const void *x); + +# define ASN1_dup_of(type,i2d,d2i,x) \ + ((type*)ASN1_dup(CHECKED_I2D_OF(type, i2d), \ + CHECKED_D2I_OF(type, d2i), \ + CHECKED_PTR_OF(const type, x))) + +void *ASN1_item_dup(const ASN1_ITEM *it, const void *x); +int ASN1_item_sign_ex(const ASN1_ITEM *it, X509_ALGOR *algor1, + X509_ALGOR *algor2, ASN1_BIT_STRING *signature, + const void *data, const ASN1_OCTET_STRING *id, + EVP_PKEY *pkey, const EVP_MD *md, OSSL_LIB_CTX *libctx, + const char *propq); +int ASN1_item_verify_ex(const ASN1_ITEM *it, const X509_ALGOR *alg, + const ASN1_BIT_STRING *signature, const void *data, + const ASN1_OCTET_STRING *id, EVP_PKEY *pkey, + OSSL_LIB_CTX *libctx, const char *propq); + +/* ASN1 alloc/free macros for when a type is only used internally */ + +# define M_ASN1_new_of(type) (type *)ASN1_item_new(ASN1_ITEM_rptr(type)) +# define M_ASN1_free_of(x, type) \ + ASN1_item_free(CHECKED_PTR_OF(type, x), ASN1_ITEM_rptr(type)) + +# ifndef OPENSSL_NO_STDIO +void *ASN1_d2i_fp(void *(*xnew) (void), d2i_of_void *d2i, FILE *in, void **x); + +# define ASN1_d2i_fp_of(type,xnew,d2i,in,x) \ + ((type*)ASN1_d2i_fp(CHECKED_NEW_OF(type, xnew), \ + CHECKED_D2I_OF(type, d2i), \ + in, \ + CHECKED_PPTR_OF(type, x))) + +void *ASN1_item_d2i_fp_ex(const ASN1_ITEM *it, FILE *in, void *x, + OSSL_LIB_CTX *libctx, const char *propq); +void *ASN1_item_d2i_fp(const ASN1_ITEM *it, FILE *in, void *x); +int ASN1_i2d_fp(i2d_of_void *i2d, FILE *out, const void *x); + +# define ASN1_i2d_fp_of(type,i2d,out,x) \ + (ASN1_i2d_fp(CHECKED_I2D_OF(type, i2d), \ + out, \ + CHECKED_PTR_OF(const type, x))) + +int ASN1_item_i2d_fp(const ASN1_ITEM *it, FILE *out, const void *x); +int ASN1_STRING_print_ex_fp(FILE *fp, const ASN1_STRING *str, unsigned long flags); +# endif + +int ASN1_STRING_to_UTF8(unsigned char **out, const ASN1_STRING *in); + +void *ASN1_d2i_bio(void *(*xnew) (void), d2i_of_void *d2i, BIO *in, void **x); + +# define ASN1_d2i_bio_of(type,xnew,d2i,in,x) \ + ((type*)ASN1_d2i_bio( CHECKED_NEW_OF(type, xnew), \ + CHECKED_D2I_OF(type, d2i), \ + in, \ + CHECKED_PPTR_OF(type, x))) + +void *ASN1_item_d2i_bio_ex(const ASN1_ITEM *it, BIO *in, void *pval, + OSSL_LIB_CTX *libctx, const char *propq); +void *ASN1_item_d2i_bio(const ASN1_ITEM *it, BIO *in, void *pval); +int ASN1_i2d_bio(i2d_of_void *i2d, BIO *out, const void *x); + +# define ASN1_i2d_bio_of(type,i2d,out,x) \ + (ASN1_i2d_bio(CHECKED_I2D_OF(type, i2d), \ + out, \ + CHECKED_PTR_OF(const type, x))) + +int ASN1_item_i2d_bio(const ASN1_ITEM *it, BIO *out, const void *x); +BIO *ASN1_item_i2d_mem_bio(const ASN1_ITEM *it, const ASN1_VALUE *val); +int ASN1_UTCTIME_print(BIO *fp, const ASN1_UTCTIME *a); +int ASN1_GENERALIZEDTIME_print(BIO *fp, const ASN1_GENERALIZEDTIME *a); +int ASN1_TIME_print(BIO *bp, const ASN1_TIME *tm); +int ASN1_TIME_print_ex(BIO *bp, const ASN1_TIME *tm, unsigned long flags); +int ASN1_STRING_print(BIO *bp, const ASN1_STRING *v); +int ASN1_STRING_print_ex(BIO *out, const ASN1_STRING *str, unsigned long flags); +int ASN1_buf_print(BIO *bp, const unsigned char *buf, size_t buflen, int off); +int ASN1_bn_print(BIO *bp, const char *number, const BIGNUM *num, + unsigned char *buf, int off); +int ASN1_parse(BIO *bp, const unsigned char *pp, long len, int indent); +int ASN1_parse_dump(BIO *bp, const unsigned char *pp, long len, int indent, + int dump); +const char *ASN1_tag2str(int tag); + +/* Used to load and write Netscape format cert */ + +int ASN1_UNIVERSALSTRING_to_string(ASN1_UNIVERSALSTRING *s); + +int ASN1_TYPE_set_octetstring(ASN1_TYPE *a, unsigned char *data, int len); +int ASN1_TYPE_get_octetstring(const ASN1_TYPE *a, unsigned char *data, int max_len); +int ASN1_TYPE_set_int_octetstring(ASN1_TYPE *a, long num, + unsigned char *data, int len); +int ASN1_TYPE_get_int_octetstring(const ASN1_TYPE *a, long *num, + unsigned char *data, int max_len); + +void *ASN1_item_unpack(const ASN1_STRING *oct, const ASN1_ITEM *it); +void *ASN1_item_unpack_ex(const ASN1_STRING *oct, const ASN1_ITEM *it, + OSSL_LIB_CTX *libctx, const char *propq); + +ASN1_STRING *ASN1_item_pack(void *obj, const ASN1_ITEM *it, + ASN1_OCTET_STRING **oct); + +void ASN1_STRING_set_default_mask(unsigned long mask); +int ASN1_STRING_set_default_mask_asc(const char *p); +unsigned long ASN1_STRING_get_default_mask(void); +int ASN1_mbstring_copy(ASN1_STRING **out, const unsigned char *in, int len, + int inform, unsigned long mask); +int ASN1_mbstring_ncopy(ASN1_STRING **out, const unsigned char *in, int len, + int inform, unsigned long mask, + long minsize, long maxsize); + +ASN1_STRING *ASN1_STRING_set_by_NID(ASN1_STRING **out, + const unsigned char *in, int inlen, + int inform, int nid); +ASN1_STRING_TABLE *ASN1_STRING_TABLE_get(int nid); +int ASN1_STRING_TABLE_add(int, long, long, unsigned long, unsigned long); +void ASN1_STRING_TABLE_cleanup(void); + +/* ASN1 template functions */ + +/* Old API compatible functions */ +ASN1_VALUE *ASN1_item_new(const ASN1_ITEM *it); +ASN1_VALUE *ASN1_item_new_ex(const ASN1_ITEM *it, OSSL_LIB_CTX *libctx, + const char *propq); +void ASN1_item_free(ASN1_VALUE *val, const ASN1_ITEM *it); +ASN1_VALUE *ASN1_item_d2i_ex(ASN1_VALUE **val, const unsigned char **in, + long len, const ASN1_ITEM *it, + OSSL_LIB_CTX *libctx, const char *propq); +ASN1_VALUE *ASN1_item_d2i(ASN1_VALUE **val, const unsigned char **in, + long len, const ASN1_ITEM *it); +int ASN1_item_i2d(const ASN1_VALUE *val, unsigned char **out, const ASN1_ITEM *it); +int ASN1_item_ndef_i2d(const ASN1_VALUE *val, unsigned char **out, + const ASN1_ITEM *it); + +void ASN1_add_oid_module(void); +void ASN1_add_stable_module(void); + +ASN1_TYPE *ASN1_generate_nconf(const char *str, CONF *nconf); +ASN1_TYPE *ASN1_generate_v3(const char *str, X509V3_CTX *cnf); +int ASN1_str2mask(const char *str, unsigned long *pmask); + +/* ASN1 Print flags */ + +/* Indicate missing OPTIONAL fields */ +# define ASN1_PCTX_FLAGS_SHOW_ABSENT 0x001 +/* Mark start and end of SEQUENCE */ +# define ASN1_PCTX_FLAGS_SHOW_SEQUENCE 0x002 +/* Mark start and end of SEQUENCE/SET OF */ +# define ASN1_PCTX_FLAGS_SHOW_SSOF 0x004 +/* Show the ASN1 type of primitives */ +# define ASN1_PCTX_FLAGS_SHOW_TYPE 0x008 +/* Don't show ASN1 type of ANY */ +# define ASN1_PCTX_FLAGS_NO_ANY_TYPE 0x010 +/* Don't show ASN1 type of MSTRINGs */ +# define ASN1_PCTX_FLAGS_NO_MSTRING_TYPE 0x020 +/* Don't show field names in SEQUENCE */ +# define ASN1_PCTX_FLAGS_NO_FIELD_NAME 0x040 +/* Show structure names of each SEQUENCE field */ +# define ASN1_PCTX_FLAGS_SHOW_FIELD_STRUCT_NAME 0x080 +/* Don't show structure name even at top level */ +# define ASN1_PCTX_FLAGS_NO_STRUCT_NAME 0x100 + +int ASN1_item_print(BIO *out, const ASN1_VALUE *ifld, int indent, + const ASN1_ITEM *it, const ASN1_PCTX *pctx); +ASN1_PCTX *ASN1_PCTX_new(void); +void ASN1_PCTX_free(ASN1_PCTX *p); +unsigned long ASN1_PCTX_get_flags(const ASN1_PCTX *p); +void ASN1_PCTX_set_flags(ASN1_PCTX *p, unsigned long flags); +unsigned long ASN1_PCTX_get_nm_flags(const ASN1_PCTX *p); +void ASN1_PCTX_set_nm_flags(ASN1_PCTX *p, unsigned long flags); +unsigned long ASN1_PCTX_get_cert_flags(const ASN1_PCTX *p); +void ASN1_PCTX_set_cert_flags(ASN1_PCTX *p, unsigned long flags); +unsigned long ASN1_PCTX_get_oid_flags(const ASN1_PCTX *p); +void ASN1_PCTX_set_oid_flags(ASN1_PCTX *p, unsigned long flags); +unsigned long ASN1_PCTX_get_str_flags(const ASN1_PCTX *p); +void ASN1_PCTX_set_str_flags(ASN1_PCTX *p, unsigned long flags); + +ASN1_SCTX *ASN1_SCTX_new(int (*scan_cb) (ASN1_SCTX *ctx)); +void ASN1_SCTX_free(ASN1_SCTX *p); +const ASN1_ITEM *ASN1_SCTX_get_item(ASN1_SCTX *p); +const ASN1_TEMPLATE *ASN1_SCTX_get_template(ASN1_SCTX *p); +unsigned long ASN1_SCTX_get_flags(ASN1_SCTX *p); +void ASN1_SCTX_set_app_data(ASN1_SCTX *p, void *data); +void *ASN1_SCTX_get_app_data(ASN1_SCTX *p); + +const BIO_METHOD *BIO_f_asn1(void); + +/* cannot constify val because of CMS_stream() */ +BIO *BIO_new_NDEF(BIO *out, ASN1_VALUE *val, const ASN1_ITEM *it); + +int i2d_ASN1_bio_stream(BIO *out, ASN1_VALUE *val, BIO *in, int flags, + const ASN1_ITEM *it); +int PEM_write_bio_ASN1_stream(BIO *out, ASN1_VALUE *val, BIO *in, int flags, + const char *hdr, const ASN1_ITEM *it); +/* cannot constify val because of CMS_dataFinal() */ +int SMIME_write_ASN1(BIO *bio, ASN1_VALUE *val, BIO *data, int flags, + int ctype_nid, int econt_nid, + STACK_OF(X509_ALGOR) *mdalgs, const ASN1_ITEM *it); +int SMIME_write_ASN1_ex(BIO *bio, ASN1_VALUE *val, BIO *data, int flags, + int ctype_nid, int econt_nid, + STACK_OF(X509_ALGOR) *mdalgs, const ASN1_ITEM *it, + OSSL_LIB_CTX *libctx, const char *propq); +ASN1_VALUE *SMIME_read_ASN1(BIO *bio, BIO **bcont, const ASN1_ITEM *it); +ASN1_VALUE *SMIME_read_ASN1_ex(BIO *bio, int flags, BIO **bcont, + const ASN1_ITEM *it, ASN1_VALUE **x, + OSSL_LIB_CTX *libctx, const char *propq); +int SMIME_crlf_copy(BIO *in, BIO *out, int flags); +int SMIME_text(BIO *in, BIO *out); + +const ASN1_ITEM *ASN1_ITEM_lookup(const char *name); +const ASN1_ITEM *ASN1_ITEM_get(size_t i); + +/* Legacy compatibility */ +# define DECLARE_ASN1_FUNCTIONS_fname(type, itname, name) \ + DECLARE_ASN1_ALLOC_FUNCTIONS_name(type, name) \ + DECLARE_ASN1_ENCODE_FUNCTIONS(type, itname, name) +# define DECLARE_ASN1_FUNCTIONS_const(type) DECLARE_ASN1_FUNCTIONS(type) +# define DECLARE_ASN1_ENCODE_FUNCTIONS_const(type, name) \ + DECLARE_ASN1_ENCODE_FUNCTIONS(type, name) +# define I2D_OF_const(type) I2D_OF(type) +# define ASN1_dup_of_const(type,i2d,d2i,x) ASN1_dup_of(type,i2d,d2i,x) +# define ASN1_i2d_fp_of_const(type,i2d,out,x) ASN1_i2d_fp_of(type,i2d,out,x) +# define ASN1_i2d_bio_of_const(type,i2d,out,x) ASN1_i2d_bio_of(type,i2d,out,x) + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/common/include/openssl/asn1t.h b/contrib/openssl-cmake/common/include/openssl/asn1t.h new file mode 100644 index 000000000000..74ba47d0cf26 --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/asn1t.h @@ -0,0 +1,946 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/asn1t.h.in + * + * Copyright 2000-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_ASN1T_H +# define OPENSSL_ASN1T_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_ASN1T_H +# endif + +# include +# include +# include + +# ifdef OPENSSL_BUILD_SHLIBCRYPTO +# undef OPENSSL_EXTERN +# define OPENSSL_EXTERN OPENSSL_EXPORT +# endif + +/* ASN1 template defines, structures and functions */ + +#ifdef __cplusplus +extern "C" { +#endif + +/*- + * These are the possible values for the itype field of the + * ASN1_ITEM structure and determine how it is interpreted. + * + * For PRIMITIVE types the underlying type + * determines the behaviour if items is NULL. + * + * Otherwise templates must contain a single + * template and the type is treated in the + * same way as the type specified in the template. + * + * For SEQUENCE types the templates field points + * to the members, the size field is the + * structure size. + * + * For CHOICE types the templates field points + * to each possible member (typically a union) + * and the 'size' field is the offset of the + * selector. + * + * The 'funcs' field is used for application-specific + * data and functions. + * + * The EXTERN type uses a new style d2i/i2d. + * The new style should be used where possible + * because it avoids things like the d2i IMPLICIT + * hack. + * + * MSTRING is a multiple string type, it is used + * for a CHOICE of character strings where the + * actual strings all occupy an ASN1_STRING + * structure. In this case the 'utype' field + * has a special meaning, it is used as a mask + * of acceptable types using the B_ASN1 constants. + * + * NDEF_SEQUENCE is the same as SEQUENCE except + * that it will use indefinite length constructed + * encoding if requested. + * + */ + +# define ASN1_ITYPE_PRIMITIVE 0x0 +# define ASN1_ITYPE_SEQUENCE 0x1 +# define ASN1_ITYPE_CHOICE 0x2 +/* unused value 0x3 */ +# define ASN1_ITYPE_EXTERN 0x4 +# define ASN1_ITYPE_MSTRING 0x5 +# define ASN1_ITYPE_NDEF_SEQUENCE 0x6 + +/* Macro to obtain ASN1_ADB pointer from a type (only used internally) */ +# define ASN1_ADB_ptr(iptr) ((const ASN1_ADB *)((iptr)())) + +/* Macros for start and end of ASN1_ITEM definition */ + +# define ASN1_ITEM_start(itname) \ + const ASN1_ITEM * itname##_it(void) \ + { \ + static const ASN1_ITEM local_it = { + +# define static_ASN1_ITEM_start(itname) \ + static ASN1_ITEM_start(itname) + +# define ASN1_ITEM_end(itname) \ + }; \ + return &local_it; \ + } + +/* Macros to aid ASN1 template writing */ + +# define ASN1_ITEM_TEMPLATE(tname) \ + static const ASN1_TEMPLATE tname##_item_tt + +# define ASN1_ITEM_TEMPLATE_END(tname) \ + ;\ + ASN1_ITEM_start(tname) \ + ASN1_ITYPE_PRIMITIVE,\ + -1,\ + &tname##_item_tt,\ + 0,\ + NULL,\ + 0,\ + #tname \ + ASN1_ITEM_end(tname) +# define static_ASN1_ITEM_TEMPLATE_END(tname) \ + ;\ + static_ASN1_ITEM_start(tname) \ + ASN1_ITYPE_PRIMITIVE,\ + -1,\ + &tname##_item_tt,\ + 0,\ + NULL,\ + 0,\ + #tname \ + ASN1_ITEM_end(tname) + +/* This is a ASN1 type which just embeds a template */ + +/*- + * This pair helps declare a SEQUENCE. We can do: + * + * ASN1_SEQUENCE(stname) = { + * ... SEQUENCE components ... + * } ASN1_SEQUENCE_END(stname) + * + * This will produce an ASN1_ITEM called stname_it + * for a structure called stname. + * + * If you want the same structure but a different + * name then use: + * + * ASN1_SEQUENCE(itname) = { + * ... SEQUENCE components ... + * } ASN1_SEQUENCE_END_name(stname, itname) + * + * This will create an item called itname_it using + * a structure called stname. + */ + +# define ASN1_SEQUENCE(tname) \ + static const ASN1_TEMPLATE tname##_seq_tt[] + +# define ASN1_SEQUENCE_END(stname) ASN1_SEQUENCE_END_name(stname, stname) + +# define static_ASN1_SEQUENCE_END(stname) static_ASN1_SEQUENCE_END_name(stname, stname) + +# define ASN1_SEQUENCE_END_name(stname, tname) \ + ;\ + ASN1_ITEM_start(tname) \ + ASN1_ITYPE_SEQUENCE,\ + V_ASN1_SEQUENCE,\ + tname##_seq_tt,\ + sizeof(tname##_seq_tt) / sizeof(ASN1_TEMPLATE),\ + NULL,\ + sizeof(stname),\ + #tname \ + ASN1_ITEM_end(tname) + +# define static_ASN1_SEQUENCE_END_name(stname, tname) \ + ;\ + static_ASN1_ITEM_start(tname) \ + ASN1_ITYPE_SEQUENCE,\ + V_ASN1_SEQUENCE,\ + tname##_seq_tt,\ + sizeof(tname##_seq_tt) / sizeof(ASN1_TEMPLATE),\ + NULL,\ + sizeof(stname),\ + #stname \ + ASN1_ITEM_end(tname) + +# define ASN1_NDEF_SEQUENCE(tname) \ + ASN1_SEQUENCE(tname) + +# define ASN1_NDEF_SEQUENCE_cb(tname, cb) \ + ASN1_SEQUENCE_cb(tname, cb) + +# define ASN1_SEQUENCE_cb(tname, cb) \ + static const ASN1_AUX tname##_aux = {NULL, 0, 0, 0, cb, 0, NULL}; \ + ASN1_SEQUENCE(tname) + +# define ASN1_SEQUENCE_const_cb(tname, const_cb) \ + static const ASN1_AUX tname##_aux = \ + {NULL, ASN1_AFLG_CONST_CB, 0, 0, NULL, 0, const_cb}; \ + ASN1_SEQUENCE(tname) + +# define ASN1_SEQUENCE_cb_const_cb(tname, cb, const_cb) \ + static const ASN1_AUX tname##_aux = \ + {NULL, ASN1_AFLG_CONST_CB, 0, 0, cb, 0, const_cb}; \ + ASN1_SEQUENCE(tname) + +# define ASN1_SEQUENCE_ref(tname, cb) \ + static const ASN1_AUX tname##_aux = {NULL, ASN1_AFLG_REFCOUNT, offsetof(tname, references), offsetof(tname, lock), cb, 0, NULL}; \ + ASN1_SEQUENCE(tname) + +# define ASN1_SEQUENCE_enc(tname, enc, cb) \ + static const ASN1_AUX tname##_aux = {NULL, ASN1_AFLG_ENCODING, 0, 0, cb, offsetof(tname, enc), NULL}; \ + ASN1_SEQUENCE(tname) + +# define ASN1_NDEF_SEQUENCE_END(tname) \ + ;\ + ASN1_ITEM_start(tname) \ + ASN1_ITYPE_NDEF_SEQUENCE,\ + V_ASN1_SEQUENCE,\ + tname##_seq_tt,\ + sizeof(tname##_seq_tt) / sizeof(ASN1_TEMPLATE),\ + NULL,\ + sizeof(tname),\ + #tname \ + ASN1_ITEM_end(tname) +# define static_ASN1_NDEF_SEQUENCE_END(tname) \ + ;\ + static_ASN1_ITEM_start(tname) \ + ASN1_ITYPE_NDEF_SEQUENCE,\ + V_ASN1_SEQUENCE,\ + tname##_seq_tt,\ + sizeof(tname##_seq_tt) / sizeof(ASN1_TEMPLATE),\ + NULL,\ + sizeof(tname),\ + #tname \ + ASN1_ITEM_end(tname) + + +# define ASN1_SEQUENCE_END_enc(stname, tname) ASN1_SEQUENCE_END_ref(stname, tname) + +# define ASN1_SEQUENCE_END_cb(stname, tname) ASN1_SEQUENCE_END_ref(stname, tname) +# define static_ASN1_SEQUENCE_END_cb(stname, tname) static_ASN1_SEQUENCE_END_ref(stname, tname) + +# define ASN1_SEQUENCE_END_ref(stname, tname) \ + ;\ + ASN1_ITEM_start(tname) \ + ASN1_ITYPE_SEQUENCE,\ + V_ASN1_SEQUENCE,\ + tname##_seq_tt,\ + sizeof(tname##_seq_tt) / sizeof(ASN1_TEMPLATE),\ + &tname##_aux,\ + sizeof(stname),\ + #tname \ + ASN1_ITEM_end(tname) +# define static_ASN1_SEQUENCE_END_ref(stname, tname) \ + ;\ + static_ASN1_ITEM_start(tname) \ + ASN1_ITYPE_SEQUENCE,\ + V_ASN1_SEQUENCE,\ + tname##_seq_tt,\ + sizeof(tname##_seq_tt) / sizeof(ASN1_TEMPLATE),\ + &tname##_aux,\ + sizeof(stname),\ + #stname \ + ASN1_ITEM_end(tname) + +# define ASN1_NDEF_SEQUENCE_END_cb(stname, tname) \ + ;\ + ASN1_ITEM_start(tname) \ + ASN1_ITYPE_NDEF_SEQUENCE,\ + V_ASN1_SEQUENCE,\ + tname##_seq_tt,\ + sizeof(tname##_seq_tt) / sizeof(ASN1_TEMPLATE),\ + &tname##_aux,\ + sizeof(stname),\ + #stname \ + ASN1_ITEM_end(tname) + +/*- + * This pair helps declare a CHOICE type. We can do: + * + * ASN1_CHOICE(chname) = { + * ... CHOICE options ... + * ASN1_CHOICE_END(chname) + * + * This will produce an ASN1_ITEM called chname_it + * for a structure called chname. The structure + * definition must look like this: + * typedef struct { + * int type; + * union { + * ASN1_SOMETHING *opt1; + * ASN1_SOMEOTHER *opt2; + * } value; + * } chname; + * + * the name of the selector must be 'type'. + * to use an alternative selector name use the + * ASN1_CHOICE_END_selector() version. + */ + +# define ASN1_CHOICE(tname) \ + static const ASN1_TEMPLATE tname##_ch_tt[] + +# define ASN1_CHOICE_cb(tname, cb) \ + static const ASN1_AUX tname##_aux = {NULL, 0, 0, 0, cb, 0, NULL}; \ + ASN1_CHOICE(tname) + +# define ASN1_CHOICE_END(stname) ASN1_CHOICE_END_name(stname, stname) + +# define static_ASN1_CHOICE_END(stname) static_ASN1_CHOICE_END_name(stname, stname) + +# define ASN1_CHOICE_END_name(stname, tname) ASN1_CHOICE_END_selector(stname, tname, type) + +# define static_ASN1_CHOICE_END_name(stname, tname) static_ASN1_CHOICE_END_selector(stname, tname, type) + +# define ASN1_CHOICE_END_selector(stname, tname, selname) \ + ;\ + ASN1_ITEM_start(tname) \ + ASN1_ITYPE_CHOICE,\ + offsetof(stname,selname) ,\ + tname##_ch_tt,\ + sizeof(tname##_ch_tt) / sizeof(ASN1_TEMPLATE),\ + NULL,\ + sizeof(stname),\ + #stname \ + ASN1_ITEM_end(tname) + +# define static_ASN1_CHOICE_END_selector(stname, tname, selname) \ + ;\ + static_ASN1_ITEM_start(tname) \ + ASN1_ITYPE_CHOICE,\ + offsetof(stname,selname) ,\ + tname##_ch_tt,\ + sizeof(tname##_ch_tt) / sizeof(ASN1_TEMPLATE),\ + NULL,\ + sizeof(stname),\ + #stname \ + ASN1_ITEM_end(tname) + +# define ASN1_CHOICE_END_cb(stname, tname, selname) \ + ;\ + ASN1_ITEM_start(tname) \ + ASN1_ITYPE_CHOICE,\ + offsetof(stname,selname) ,\ + tname##_ch_tt,\ + sizeof(tname##_ch_tt) / sizeof(ASN1_TEMPLATE),\ + &tname##_aux,\ + sizeof(stname),\ + #stname \ + ASN1_ITEM_end(tname) + +/* This helps with the template wrapper form of ASN1_ITEM */ + +# define ASN1_EX_TEMPLATE_TYPE(flags, tag, name, type) { \ + (flags), (tag), 0,\ + #name, ASN1_ITEM_ref(type) } + +/* These help with SEQUENCE or CHOICE components */ + +/* used to declare other types */ + +# define ASN1_EX_TYPE(flags, tag, stname, field, type) { \ + (flags), (tag), offsetof(stname, field),\ + #field, ASN1_ITEM_ref(type) } + +/* implicit and explicit helper macros */ + +# define ASN1_IMP_EX(stname, field, type, tag, ex) \ + ASN1_EX_TYPE(ASN1_TFLG_IMPLICIT | (ex), tag, stname, field, type) + +# define ASN1_EXP_EX(stname, field, type, tag, ex) \ + ASN1_EX_TYPE(ASN1_TFLG_EXPLICIT | (ex), tag, stname, field, type) + +/* Any defined by macros: the field used is in the table itself */ + +# define ASN1_ADB_OBJECT(tblname) { ASN1_TFLG_ADB_OID, -1, 0, #tblname, tblname##_adb } +# define ASN1_ADB_INTEGER(tblname) { ASN1_TFLG_ADB_INT, -1, 0, #tblname, tblname##_adb } + +/* Plain simple type */ +# define ASN1_SIMPLE(stname, field, type) ASN1_EX_TYPE(0,0, stname, field, type) +/* Embedded simple type */ +# define ASN1_EMBED(stname, field, type) ASN1_EX_TYPE(ASN1_TFLG_EMBED,0, stname, field, type) + +/* OPTIONAL simple type */ +# define ASN1_OPT(stname, field, type) ASN1_EX_TYPE(ASN1_TFLG_OPTIONAL, 0, stname, field, type) +# define ASN1_OPT_EMBED(stname, field, type) ASN1_EX_TYPE(ASN1_TFLG_OPTIONAL|ASN1_TFLG_EMBED, 0, stname, field, type) + +/* IMPLICIT tagged simple type */ +# define ASN1_IMP(stname, field, type, tag) ASN1_IMP_EX(stname, field, type, tag, 0) +# define ASN1_IMP_EMBED(stname, field, type, tag) ASN1_IMP_EX(stname, field, type, tag, ASN1_TFLG_EMBED) + +/* IMPLICIT tagged OPTIONAL simple type */ +# define ASN1_IMP_OPT(stname, field, type, tag) ASN1_IMP_EX(stname, field, type, tag, ASN1_TFLG_OPTIONAL) +# define ASN1_IMP_OPT_EMBED(stname, field, type, tag) ASN1_IMP_EX(stname, field, type, tag, ASN1_TFLG_OPTIONAL|ASN1_TFLG_EMBED) + +/* Same as above but EXPLICIT */ + +# define ASN1_EXP(stname, field, type, tag) ASN1_EXP_EX(stname, field, type, tag, 0) +# define ASN1_EXP_EMBED(stname, field, type, tag) ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_EMBED) +# define ASN1_EXP_OPT(stname, field, type, tag) ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_OPTIONAL) +# define ASN1_EXP_OPT_EMBED(stname, field, type, tag) ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_OPTIONAL|ASN1_TFLG_EMBED) + +/* SEQUENCE OF type */ +# define ASN1_SEQUENCE_OF(stname, field, type) \ + ASN1_EX_TYPE(ASN1_TFLG_SEQUENCE_OF, 0, stname, field, type) + +/* OPTIONAL SEQUENCE OF */ +# define ASN1_SEQUENCE_OF_OPT(stname, field, type) \ + ASN1_EX_TYPE(ASN1_TFLG_SEQUENCE_OF|ASN1_TFLG_OPTIONAL, 0, stname, field, type) + +/* Same as above but for SET OF */ + +# define ASN1_SET_OF(stname, field, type) \ + ASN1_EX_TYPE(ASN1_TFLG_SET_OF, 0, stname, field, type) + +# define ASN1_SET_OF_OPT(stname, field, type) \ + ASN1_EX_TYPE(ASN1_TFLG_SET_OF|ASN1_TFLG_OPTIONAL, 0, stname, field, type) + +/* Finally compound types of SEQUENCE, SET, IMPLICIT, EXPLICIT and OPTIONAL */ + +# define ASN1_IMP_SET_OF(stname, field, type, tag) \ + ASN1_IMP_EX(stname, field, type, tag, ASN1_TFLG_SET_OF) + +# define ASN1_EXP_SET_OF(stname, field, type, tag) \ + ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_SET_OF) + +# define ASN1_IMP_SET_OF_OPT(stname, field, type, tag) \ + ASN1_IMP_EX(stname, field, type, tag, ASN1_TFLG_SET_OF|ASN1_TFLG_OPTIONAL) + +# define ASN1_EXP_SET_OF_OPT(stname, field, type, tag) \ + ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_SET_OF|ASN1_TFLG_OPTIONAL) + +# define ASN1_IMP_SEQUENCE_OF(stname, field, type, tag) \ + ASN1_IMP_EX(stname, field, type, tag, ASN1_TFLG_SEQUENCE_OF) + +# define ASN1_IMP_SEQUENCE_OF_OPT(stname, field, type, tag) \ + ASN1_IMP_EX(stname, field, type, tag, ASN1_TFLG_SEQUENCE_OF|ASN1_TFLG_OPTIONAL) + +# define ASN1_EXP_SEQUENCE_OF(stname, field, type, tag) \ + ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_SEQUENCE_OF) + +# define ASN1_EXP_SEQUENCE_OF_OPT(stname, field, type, tag) \ + ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_SEQUENCE_OF|ASN1_TFLG_OPTIONAL) + +/* EXPLICIT using indefinite length constructed form */ +# define ASN1_NDEF_EXP(stname, field, type, tag) \ + ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_NDEF) + +/* EXPLICIT OPTIONAL using indefinite length constructed form */ +# define ASN1_NDEF_EXP_OPT(stname, field, type, tag) \ + ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_OPTIONAL|ASN1_TFLG_NDEF) + +/* Macros for the ASN1_ADB structure */ + +# define ASN1_ADB(name) \ + static const ASN1_ADB_TABLE name##_adbtbl[] + +# define ASN1_ADB_END(name, flags, field, adb_cb, def, none) \ + ;\ + static const ASN1_ITEM *name##_adb(void) \ + { \ + static const ASN1_ADB internal_adb = \ + {\ + flags,\ + offsetof(name, field),\ + adb_cb,\ + name##_adbtbl,\ + sizeof(name##_adbtbl) / sizeof(ASN1_ADB_TABLE),\ + def,\ + none\ + }; \ + return (const ASN1_ITEM *) &internal_adb; \ + } \ + void dummy_function(void) + +# define ADB_ENTRY(val, template) {val, template} + +# define ASN1_ADB_TEMPLATE(name) \ + static const ASN1_TEMPLATE name##_tt + +/* + * This is the ASN1 template structure that defines a wrapper round the + * actual type. It determines the actual position of the field in the value + * structure, various flags such as OPTIONAL and the field name. + */ + +struct ASN1_TEMPLATE_st { + unsigned long flags; /* Various flags */ + long tag; /* tag, not used if no tagging */ + unsigned long offset; /* Offset of this field in structure */ + const char *field_name; /* Field name */ + ASN1_ITEM_EXP *item; /* Relevant ASN1_ITEM or ASN1_ADB */ +}; + +/* Macro to extract ASN1_ITEM and ASN1_ADB pointer from ASN1_TEMPLATE */ + +# define ASN1_TEMPLATE_item(t) (t->item_ptr) +# define ASN1_TEMPLATE_adb(t) (t->item_ptr) + +typedef struct ASN1_ADB_TABLE_st ASN1_ADB_TABLE; +typedef struct ASN1_ADB_st ASN1_ADB; + +struct ASN1_ADB_st { + unsigned long flags; /* Various flags */ + unsigned long offset; /* Offset of selector field */ + int (*adb_cb)(long *psel); /* Application callback */ + const ASN1_ADB_TABLE *tbl; /* Table of possible types */ + long tblcount; /* Number of entries in tbl */ + const ASN1_TEMPLATE *default_tt; /* Type to use if no match */ + const ASN1_TEMPLATE *null_tt; /* Type to use if selector is NULL */ +}; + +struct ASN1_ADB_TABLE_st { + long value; /* NID for an object or value for an int */ + const ASN1_TEMPLATE tt; /* item for this value */ +}; + +/* template flags */ + +/* Field is optional */ +# define ASN1_TFLG_OPTIONAL (0x1) + +/* Field is a SET OF */ +# define ASN1_TFLG_SET_OF (0x1 << 1) + +/* Field is a SEQUENCE OF */ +# define ASN1_TFLG_SEQUENCE_OF (0x2 << 1) + +/* + * Special case: this refers to a SET OF that will be sorted into DER order + * when encoded *and* the corresponding STACK will be modified to match the + * new order. + */ +# define ASN1_TFLG_SET_ORDER (0x3 << 1) + +/* Mask for SET OF or SEQUENCE OF */ +# define ASN1_TFLG_SK_MASK (0x3 << 1) + +/* + * These flags mean the tag should be taken from the tag field. If EXPLICIT + * then the underlying type is used for the inner tag. + */ + +/* IMPLICIT tagging */ +# define ASN1_TFLG_IMPTAG (0x1 << 3) + +/* EXPLICIT tagging, inner tag from underlying type */ +# define ASN1_TFLG_EXPTAG (0x2 << 3) + +# define ASN1_TFLG_TAG_MASK (0x3 << 3) + +/* context specific IMPLICIT */ +# define ASN1_TFLG_IMPLICIT (ASN1_TFLG_IMPTAG|ASN1_TFLG_CONTEXT) + +/* context specific EXPLICIT */ +# define ASN1_TFLG_EXPLICIT (ASN1_TFLG_EXPTAG|ASN1_TFLG_CONTEXT) + +/* + * If tagging is in force these determine the type of tag to use. Otherwise + * the tag is determined by the underlying type. These values reflect the + * actual octet format. + */ + +/* Universal tag */ +# define ASN1_TFLG_UNIVERSAL (0x0<<6) +/* Application tag */ +# define ASN1_TFLG_APPLICATION (0x1<<6) +/* Context specific tag */ +# define ASN1_TFLG_CONTEXT (0x2<<6) +/* Private tag */ +# define ASN1_TFLG_PRIVATE (0x3<<6) + +# define ASN1_TFLG_TAG_CLASS (0x3<<6) + +/* + * These are for ANY DEFINED BY type. In this case the 'item' field points to + * an ASN1_ADB structure which contains a table of values to decode the + * relevant type + */ + +# define ASN1_TFLG_ADB_MASK (0x3<<8) + +# define ASN1_TFLG_ADB_OID (0x1<<8) + +# define ASN1_TFLG_ADB_INT (0x1<<9) + +/* + * This flag when present in a SEQUENCE OF, SET OF or EXPLICIT causes + * indefinite length constructed encoding to be used if required. + */ + +# define ASN1_TFLG_NDEF (0x1<<11) + +/* Field is embedded and not a pointer */ +# define ASN1_TFLG_EMBED (0x1 << 12) + +/* This is the actual ASN1 item itself */ + +struct ASN1_ITEM_st { + char itype; /* The item type, primitive, SEQUENCE, CHOICE + * or extern */ + long utype; /* underlying type */ + const ASN1_TEMPLATE *templates; /* If SEQUENCE or CHOICE this contains + * the contents */ + long tcount; /* Number of templates if SEQUENCE or CHOICE */ + const void *funcs; /* further data and type-specific functions */ + /* funcs can be ASN1_PRIMITIVE_FUNCS*, ASN1_EXTERN_FUNCS*, or ASN1_AUX* */ + long size; /* Structure size (usually) */ + const char *sname; /* Structure name */ +}; + +/* + * Cache for ASN1 tag and length, so we don't keep re-reading it for things + * like CHOICE + */ + +struct ASN1_TLC_st { + char valid; /* Values below are valid */ + int ret; /* return value */ + long plen; /* length */ + int ptag; /* class value */ + int pclass; /* class value */ + int hdrlen; /* header length */ +}; + +/* Typedefs for ASN1 function pointers */ +typedef int ASN1_ex_d2i(ASN1_VALUE **pval, const unsigned char **in, long len, + const ASN1_ITEM *it, int tag, int aclass, char opt, + ASN1_TLC *ctx); + +typedef int ASN1_ex_d2i_ex(ASN1_VALUE **pval, const unsigned char **in, long len, + const ASN1_ITEM *it, int tag, int aclass, char opt, + ASN1_TLC *ctx, OSSL_LIB_CTX *libctx, + const char *propq); +typedef int ASN1_ex_i2d(const ASN1_VALUE **pval, unsigned char **out, + const ASN1_ITEM *it, int tag, int aclass); +typedef int ASN1_ex_new_func(ASN1_VALUE **pval, const ASN1_ITEM *it); +typedef int ASN1_ex_new_ex_func(ASN1_VALUE **pval, const ASN1_ITEM *it, + OSSL_LIB_CTX *libctx, const char *propq); +typedef void ASN1_ex_free_func(ASN1_VALUE **pval, const ASN1_ITEM *it); + +typedef int ASN1_ex_print_func(BIO *out, const ASN1_VALUE **pval, + int indent, const char *fname, + const ASN1_PCTX *pctx); + +typedef int ASN1_primitive_i2c(const ASN1_VALUE **pval, unsigned char *cont, + int *putype, const ASN1_ITEM *it); +typedef int ASN1_primitive_c2i(ASN1_VALUE **pval, const unsigned char *cont, + int len, int utype, char *free_cont, + const ASN1_ITEM *it); +typedef int ASN1_primitive_print(BIO *out, const ASN1_VALUE **pval, + const ASN1_ITEM *it, int indent, + const ASN1_PCTX *pctx); + +typedef struct ASN1_EXTERN_FUNCS_st { + void *app_data; + ASN1_ex_new_func *asn1_ex_new; + ASN1_ex_free_func *asn1_ex_free; + ASN1_ex_free_func *asn1_ex_clear; + ASN1_ex_d2i *asn1_ex_d2i; + ASN1_ex_i2d *asn1_ex_i2d; + ASN1_ex_print_func *asn1_ex_print; + ASN1_ex_new_ex_func *asn1_ex_new_ex; + ASN1_ex_d2i_ex *asn1_ex_d2i_ex; +} ASN1_EXTERN_FUNCS; + +typedef struct ASN1_PRIMITIVE_FUNCS_st { + void *app_data; + unsigned long flags; + ASN1_ex_new_func *prim_new; + ASN1_ex_free_func *prim_free; + ASN1_ex_free_func *prim_clear; + ASN1_primitive_c2i *prim_c2i; + ASN1_primitive_i2c *prim_i2c; + ASN1_primitive_print *prim_print; +} ASN1_PRIMITIVE_FUNCS; + +/* + * This is the ASN1_AUX structure: it handles various miscellaneous + * requirements. For example the use of reference counts and an informational + * callback. The "informational callback" is called at various points during + * the ASN1 encoding and decoding. It can be used to provide minor + * customisation of the structures used. This is most useful where the + * supplied routines *almost* do the right thing but need some extra help at + * a few points. If the callback returns zero then it is assumed a fatal + * error has occurred and the main operation should be abandoned. If major + * changes in the default behaviour are required then an external type is + * more appropriate. + * For the operations ASN1_OP_I2D_PRE, ASN1_OP_I2D_POST, ASN1_OP_PRINT_PRE, and + * ASN1_OP_PRINT_POST, meanwhile a variant of the callback with const parameter + * 'in' is provided to make clear statically that its input is not modified. If + * and only if this variant is in use the flag ASN1_AFLG_CONST_CB must be set. + */ + +typedef int ASN1_aux_cb(int operation, ASN1_VALUE **in, const ASN1_ITEM *it, + void *exarg); +typedef int ASN1_aux_const_cb(int operation, const ASN1_VALUE **in, + const ASN1_ITEM *it, void *exarg); + +typedef struct ASN1_AUX_st { + void *app_data; + int flags; + int ref_offset; /* Offset of reference value */ + int ref_lock; /* Offset of lock value */ + ASN1_aux_cb *asn1_cb; + int enc_offset; /* Offset of ASN1_ENCODING structure */ + ASN1_aux_const_cb *asn1_const_cb; /* for ASN1_OP_I2D_ and ASN1_OP_PRINT_ */ +} ASN1_AUX; + +/* For print related callbacks exarg points to this structure */ +typedef struct ASN1_PRINT_ARG_st { + BIO *out; + int indent; + const ASN1_PCTX *pctx; +} ASN1_PRINT_ARG; + +/* For streaming related callbacks exarg points to this structure */ +typedef struct ASN1_STREAM_ARG_st { + /* BIO to stream through */ + BIO *out; + /* BIO with filters appended */ + BIO *ndef_bio; + /* Streaming I/O boundary */ + unsigned char **boundary; +} ASN1_STREAM_ARG; + +/* Flags in ASN1_AUX */ + +/* Use a reference count */ +# define ASN1_AFLG_REFCOUNT 1 +/* Save the encoding of structure (useful for signatures) */ +# define ASN1_AFLG_ENCODING 2 +/* The Sequence length is invalid */ +# define ASN1_AFLG_BROKEN 4 +/* Use the new asn1_const_cb */ +# define ASN1_AFLG_CONST_CB 8 + +/* operation values for asn1_cb */ + +# define ASN1_OP_NEW_PRE 0 +# define ASN1_OP_NEW_POST 1 +# define ASN1_OP_FREE_PRE 2 +# define ASN1_OP_FREE_POST 3 +# define ASN1_OP_D2I_PRE 4 +# define ASN1_OP_D2I_POST 5 +# define ASN1_OP_I2D_PRE 6 +# define ASN1_OP_I2D_POST 7 +# define ASN1_OP_PRINT_PRE 8 +# define ASN1_OP_PRINT_POST 9 +# define ASN1_OP_STREAM_PRE 10 +# define ASN1_OP_STREAM_POST 11 +# define ASN1_OP_DETACHED_PRE 12 +# define ASN1_OP_DETACHED_POST 13 +# define ASN1_OP_DUP_PRE 14 +# define ASN1_OP_DUP_POST 15 +# define ASN1_OP_GET0_LIBCTX 16 +# define ASN1_OP_GET0_PROPQ 17 + +/* Macro to implement a primitive type */ +# define IMPLEMENT_ASN1_TYPE(stname) IMPLEMENT_ASN1_TYPE_ex(stname, stname, 0) +# define IMPLEMENT_ASN1_TYPE_ex(itname, vname, ex) \ + ASN1_ITEM_start(itname) \ + ASN1_ITYPE_PRIMITIVE, V_##vname, NULL, 0, NULL, ex, #itname \ + ASN1_ITEM_end(itname) + +/* Macro to implement a multi string type */ +# define IMPLEMENT_ASN1_MSTRING(itname, mask) \ + ASN1_ITEM_start(itname) \ + ASN1_ITYPE_MSTRING, mask, NULL, 0, NULL, sizeof(ASN1_STRING), #itname \ + ASN1_ITEM_end(itname) + +# define IMPLEMENT_EXTERN_ASN1(sname, tag, fptrs) \ + ASN1_ITEM_start(sname) \ + ASN1_ITYPE_EXTERN, \ + tag, \ + NULL, \ + 0, \ + &fptrs, \ + 0, \ + #sname \ + ASN1_ITEM_end(sname) + +/* Macro to implement standard functions in terms of ASN1_ITEM structures */ + +# define IMPLEMENT_ASN1_FUNCTIONS(stname) IMPLEMENT_ASN1_FUNCTIONS_fname(stname, stname, stname) + +# define IMPLEMENT_ASN1_FUNCTIONS_name(stname, itname) IMPLEMENT_ASN1_FUNCTIONS_fname(stname, itname, itname) + +# define IMPLEMENT_ASN1_FUNCTIONS_ENCODE_name(stname, itname) \ + IMPLEMENT_ASN1_FUNCTIONS_ENCODE_fname(stname, itname, itname) + +# define IMPLEMENT_STATIC_ASN1_ALLOC_FUNCTIONS(stname) \ + IMPLEMENT_ASN1_ALLOC_FUNCTIONS_pfname(static, stname, stname, stname) + +# define IMPLEMENT_ASN1_ALLOC_FUNCTIONS(stname) \ + IMPLEMENT_ASN1_ALLOC_FUNCTIONS_fname(stname, stname, stname) + +# define IMPLEMENT_ASN1_ALLOC_FUNCTIONS_pfname(pre, stname, itname, fname) \ + pre stname *fname##_new(void) \ + { \ + return (stname *)ASN1_item_new(ASN1_ITEM_rptr(itname)); \ + } \ + pre void fname##_free(stname *a) \ + { \ + ASN1_item_free((ASN1_VALUE *)a, ASN1_ITEM_rptr(itname)); \ + } + +# define IMPLEMENT_ASN1_ALLOC_FUNCTIONS_fname(stname, itname, fname) \ + stname *fname##_new(void) \ + { \ + return (stname *)ASN1_item_new(ASN1_ITEM_rptr(itname)); \ + } \ + void fname##_free(stname *a) \ + { \ + ASN1_item_free((ASN1_VALUE *)a, ASN1_ITEM_rptr(itname)); \ + } + +# define IMPLEMENT_ASN1_FUNCTIONS_fname(stname, itname, fname) \ + IMPLEMENT_ASN1_ENCODE_FUNCTIONS_fname(stname, itname, fname) \ + IMPLEMENT_ASN1_ALLOC_FUNCTIONS_fname(stname, itname, fname) + +# define IMPLEMENT_ASN1_ENCODE_FUNCTIONS_fname(stname, itname, fname) \ + stname *d2i_##fname(stname **a, const unsigned char **in, long len) \ + { \ + return (stname *)ASN1_item_d2i((ASN1_VALUE **)a, in, len, ASN1_ITEM_rptr(itname));\ + } \ + int i2d_##fname(const stname *a, unsigned char **out) \ + { \ + return ASN1_item_i2d((const ASN1_VALUE *)a, out, ASN1_ITEM_rptr(itname));\ + } + +# define IMPLEMENT_ASN1_NDEF_FUNCTION(stname) \ + int i2d_##stname##_NDEF(const stname *a, unsigned char **out) \ + { \ + return ASN1_item_ndef_i2d((const ASN1_VALUE *)a, out, ASN1_ITEM_rptr(stname));\ + } + +# define IMPLEMENT_STATIC_ASN1_ENCODE_FUNCTIONS(stname) \ + static stname *d2i_##stname(stname **a, \ + const unsigned char **in, long len) \ + { \ + return (stname *)ASN1_item_d2i((ASN1_VALUE **)a, in, len, \ + ASN1_ITEM_rptr(stname)); \ + } \ + static int i2d_##stname(const stname *a, unsigned char **out) \ + { \ + return ASN1_item_i2d((const ASN1_VALUE *)a, out, \ + ASN1_ITEM_rptr(stname)); \ + } + +# define IMPLEMENT_ASN1_DUP_FUNCTION(stname) \ + stname * stname##_dup(const stname *x) \ + { \ + return ASN1_item_dup(ASN1_ITEM_rptr(stname), x); \ + } + +# define IMPLEMENT_ASN1_PRINT_FUNCTION(stname) \ + IMPLEMENT_ASN1_PRINT_FUNCTION_fname(stname, stname, stname) + +# define IMPLEMENT_ASN1_PRINT_FUNCTION_fname(stname, itname, fname) \ + int fname##_print_ctx(BIO *out, const stname *x, int indent, \ + const ASN1_PCTX *pctx) \ + { \ + return ASN1_item_print(out, (const ASN1_VALUE *)x, indent, \ + ASN1_ITEM_rptr(itname), pctx); \ + } + +/* external definitions for primitive types */ + +DECLARE_ASN1_ITEM(ASN1_BOOLEAN) +DECLARE_ASN1_ITEM(ASN1_TBOOLEAN) +DECLARE_ASN1_ITEM(ASN1_FBOOLEAN) +DECLARE_ASN1_ITEM(ASN1_SEQUENCE) +DECLARE_ASN1_ITEM(CBIGNUM) +DECLARE_ASN1_ITEM(BIGNUM) +DECLARE_ASN1_ITEM(INT32) +DECLARE_ASN1_ITEM(ZINT32) +DECLARE_ASN1_ITEM(UINT32) +DECLARE_ASN1_ITEM(ZUINT32) +DECLARE_ASN1_ITEM(INT64) +DECLARE_ASN1_ITEM(ZINT64) +DECLARE_ASN1_ITEM(UINT64) +DECLARE_ASN1_ITEM(ZUINT64) + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +/* + * LONG and ZLONG are strongly discouraged for use as stored data, as the + * underlying C type (long) differs in size depending on the architecture. + * They are designed with 32-bit longs in mind. + */ +DECLARE_ASN1_ITEM(LONG) +DECLARE_ASN1_ITEM(ZLONG) +# endif + +SKM_DEFINE_STACK_OF_INTERNAL(ASN1_VALUE, ASN1_VALUE, ASN1_VALUE) +#define sk_ASN1_VALUE_num(sk) OPENSSL_sk_num(ossl_check_const_ASN1_VALUE_sk_type(sk)) +#define sk_ASN1_VALUE_value(sk, idx) ((ASN1_VALUE *)OPENSSL_sk_value(ossl_check_const_ASN1_VALUE_sk_type(sk), (idx))) +#define sk_ASN1_VALUE_new(cmp) ((STACK_OF(ASN1_VALUE) *)OPENSSL_sk_new(ossl_check_ASN1_VALUE_compfunc_type(cmp))) +#define sk_ASN1_VALUE_new_null() ((STACK_OF(ASN1_VALUE) *)OPENSSL_sk_new_null()) +#define sk_ASN1_VALUE_new_reserve(cmp, n) ((STACK_OF(ASN1_VALUE) *)OPENSSL_sk_new_reserve(ossl_check_ASN1_VALUE_compfunc_type(cmp), (n))) +#define sk_ASN1_VALUE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASN1_VALUE_sk_type(sk), (n)) +#define sk_ASN1_VALUE_free(sk) OPENSSL_sk_free(ossl_check_ASN1_VALUE_sk_type(sk)) +#define sk_ASN1_VALUE_zero(sk) OPENSSL_sk_zero(ossl_check_ASN1_VALUE_sk_type(sk)) +#define sk_ASN1_VALUE_delete(sk, i) ((ASN1_VALUE *)OPENSSL_sk_delete(ossl_check_ASN1_VALUE_sk_type(sk), (i))) +#define sk_ASN1_VALUE_delete_ptr(sk, ptr) ((ASN1_VALUE *)OPENSSL_sk_delete_ptr(ossl_check_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_type(ptr))) +#define sk_ASN1_VALUE_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_type(ptr)) +#define sk_ASN1_VALUE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_type(ptr)) +#define sk_ASN1_VALUE_pop(sk) ((ASN1_VALUE *)OPENSSL_sk_pop(ossl_check_ASN1_VALUE_sk_type(sk))) +#define sk_ASN1_VALUE_shift(sk) ((ASN1_VALUE *)OPENSSL_sk_shift(ossl_check_ASN1_VALUE_sk_type(sk))) +#define sk_ASN1_VALUE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASN1_VALUE_sk_type(sk),ossl_check_ASN1_VALUE_freefunc_type(freefunc)) +#define sk_ASN1_VALUE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_type(ptr), (idx)) +#define sk_ASN1_VALUE_set(sk, idx, ptr) ((ASN1_VALUE *)OPENSSL_sk_set(ossl_check_ASN1_VALUE_sk_type(sk), (idx), ossl_check_ASN1_VALUE_type(ptr))) +#define sk_ASN1_VALUE_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_type(ptr)) +#define sk_ASN1_VALUE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_type(ptr)) +#define sk_ASN1_VALUE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_type(ptr), pnum) +#define sk_ASN1_VALUE_sort(sk) OPENSSL_sk_sort(ossl_check_ASN1_VALUE_sk_type(sk)) +#define sk_ASN1_VALUE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASN1_VALUE_sk_type(sk)) +#define sk_ASN1_VALUE_dup(sk) ((STACK_OF(ASN1_VALUE) *)OPENSSL_sk_dup(ossl_check_const_ASN1_VALUE_sk_type(sk))) +#define sk_ASN1_VALUE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASN1_VALUE) *)OPENSSL_sk_deep_copy(ossl_check_const_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_copyfunc_type(copyfunc), ossl_check_ASN1_VALUE_freefunc_type(freefunc))) +#define sk_ASN1_VALUE_set_cmp_func(sk, cmp) ((sk_ASN1_VALUE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASN1_VALUE_sk_type(sk), ossl_check_ASN1_VALUE_compfunc_type(cmp))) + + + +/* Functions used internally by the ASN1 code */ + +int ASN1_item_ex_new(ASN1_VALUE **pval, const ASN1_ITEM *it); +void ASN1_item_ex_free(ASN1_VALUE **pval, const ASN1_ITEM *it); + +int ASN1_item_ex_d2i(ASN1_VALUE **pval, const unsigned char **in, long len, + const ASN1_ITEM *it, int tag, int aclass, char opt, + ASN1_TLC *ctx); + +int ASN1_item_ex_i2d(const ASN1_VALUE **pval, unsigned char **out, + const ASN1_ITEM *it, int tag, int aclass); + +/* Legacy compatibility */ +# define IMPLEMENT_ASN1_FUNCTIONS_const(name) IMPLEMENT_ASN1_FUNCTIONS(name) +# define IMPLEMENT_ASN1_ENCODE_FUNCTIONS_const_fname(stname, itname, fname) \ + IMPLEMENT_ASN1_ENCODE_FUNCTIONS_fname(stname, itname, fname) + +#ifdef __cplusplus +} +#endif +#endif diff --git a/contrib/openssl-cmake/common/include/openssl/bio.h b/contrib/openssl-cmake/common/include/openssl/bio.h new file mode 100644 index 000000000000..e02f867beb0e --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/bio.h @@ -0,0 +1,1022 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/bio.h.in + * + * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + +#ifndef OPENSSL_BIO_H +# define OPENSSL_BIO_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_BIO_H +# endif + +# include + +# ifndef OPENSSL_NO_STDIO +# include +# endif +# include + +# include +# include +# include + +#ifdef __cplusplus +extern "C" { +#endif + +/* There are the classes of BIOs */ +# define BIO_TYPE_DESCRIPTOR 0x0100 /* socket, fd, connect or accept */ +# define BIO_TYPE_FILTER 0x0200 +# define BIO_TYPE_SOURCE_SINK 0x0400 + +/* These are the 'types' of BIOs */ +# define BIO_TYPE_NONE 0 +# define BIO_TYPE_MEM ( 1|BIO_TYPE_SOURCE_SINK) +# define BIO_TYPE_FILE ( 2|BIO_TYPE_SOURCE_SINK) + +# define BIO_TYPE_FD ( 4|BIO_TYPE_SOURCE_SINK|BIO_TYPE_DESCRIPTOR) +# define BIO_TYPE_SOCKET ( 5|BIO_TYPE_SOURCE_SINK|BIO_TYPE_DESCRIPTOR) +# define BIO_TYPE_NULL ( 6|BIO_TYPE_SOURCE_SINK) +# define BIO_TYPE_SSL ( 7|BIO_TYPE_FILTER) +# define BIO_TYPE_MD ( 8|BIO_TYPE_FILTER) +# define BIO_TYPE_BUFFER ( 9|BIO_TYPE_FILTER) +# define BIO_TYPE_CIPHER (10|BIO_TYPE_FILTER) +# define BIO_TYPE_BASE64 (11|BIO_TYPE_FILTER) +# define BIO_TYPE_CONNECT (12|BIO_TYPE_SOURCE_SINK|BIO_TYPE_DESCRIPTOR) +# define BIO_TYPE_ACCEPT (13|BIO_TYPE_SOURCE_SINK|BIO_TYPE_DESCRIPTOR) + +# define BIO_TYPE_NBIO_TEST (16|BIO_TYPE_FILTER)/* server proxy BIO */ +# define BIO_TYPE_NULL_FILTER (17|BIO_TYPE_FILTER) +# define BIO_TYPE_BIO (19|BIO_TYPE_SOURCE_SINK)/* half a BIO pair */ +# define BIO_TYPE_LINEBUFFER (20|BIO_TYPE_FILTER) +# define BIO_TYPE_DGRAM (21|BIO_TYPE_SOURCE_SINK|BIO_TYPE_DESCRIPTOR) +# define BIO_TYPE_ASN1 (22|BIO_TYPE_FILTER) +# define BIO_TYPE_COMP (23|BIO_TYPE_FILTER) +# ifndef OPENSSL_NO_SCTP +# define BIO_TYPE_DGRAM_SCTP (24|BIO_TYPE_SOURCE_SINK|BIO_TYPE_DESCRIPTOR) +# endif +# define BIO_TYPE_CORE_TO_PROV (25|BIO_TYPE_SOURCE_SINK) +# define BIO_TYPE_DGRAM_PAIR (26|BIO_TYPE_SOURCE_SINK) +# define BIO_TYPE_DGRAM_MEM (27|BIO_TYPE_SOURCE_SINK) + +/* Custom type starting index returned by BIO_get_new_index() */ +#define BIO_TYPE_START 128 +/* Custom type maximum index that can be returned by BIO_get_new_index() */ +#define BIO_TYPE_MASK 0xFF + +/* + * BIO_FILENAME_READ|BIO_CLOSE to open or close on free. + * BIO_set_fp(in,stdin,BIO_NOCLOSE); + */ +# define BIO_NOCLOSE 0x00 +# define BIO_CLOSE 0x01 + +/* + * These are used in the following macros and are passed to BIO_ctrl() + */ +# define BIO_CTRL_RESET 1/* opt - rewind/zero etc */ +# define BIO_CTRL_EOF 2/* opt - are we at the eof */ +# define BIO_CTRL_INFO 3/* opt - extra tit-bits */ +# define BIO_CTRL_SET 4/* man - set the 'IO' type */ +# define BIO_CTRL_GET 5/* man - get the 'IO' type */ +# define BIO_CTRL_PUSH 6/* opt - internal, used to signify change */ +# define BIO_CTRL_POP 7/* opt - internal, used to signify change */ +# define BIO_CTRL_GET_CLOSE 8/* man - set the 'close' on free */ +# define BIO_CTRL_SET_CLOSE 9/* man - set the 'close' on free */ +# define BIO_CTRL_PENDING 10/* opt - is their more data buffered */ +# define BIO_CTRL_FLUSH 11/* opt - 'flush' buffered output */ +# define BIO_CTRL_DUP 12/* man - extra stuff for 'duped' BIO */ +# define BIO_CTRL_WPENDING 13/* opt - number of bytes still to write */ +# define BIO_CTRL_SET_CALLBACK 14/* opt - set callback function */ +# define BIO_CTRL_GET_CALLBACK 15/* opt - set callback function */ + +# define BIO_CTRL_PEEK 29/* BIO_f_buffer special */ +# define BIO_CTRL_SET_FILENAME 30/* BIO_s_file special */ + +/* dgram BIO stuff */ +# define BIO_CTRL_DGRAM_CONNECT 31/* BIO dgram special */ +# define BIO_CTRL_DGRAM_SET_CONNECTED 32/* allow for an externally connected + * socket to be passed in */ +# define BIO_CTRL_DGRAM_SET_RECV_TIMEOUT 33/* setsockopt, essentially */ +# define BIO_CTRL_DGRAM_GET_RECV_TIMEOUT 34/* getsockopt, essentially */ +# define BIO_CTRL_DGRAM_SET_SEND_TIMEOUT 35/* setsockopt, essentially */ +# define BIO_CTRL_DGRAM_GET_SEND_TIMEOUT 36/* getsockopt, essentially */ + +# define BIO_CTRL_DGRAM_GET_RECV_TIMER_EXP 37/* flag whether the last */ +# define BIO_CTRL_DGRAM_GET_SEND_TIMER_EXP 38/* I/O operation timed out */ + +/* #ifdef IP_MTU_DISCOVER */ +# define BIO_CTRL_DGRAM_MTU_DISCOVER 39/* set DF bit on egress packets */ +/* #endif */ + +# define BIO_CTRL_DGRAM_QUERY_MTU 40/* as kernel for current MTU */ +# define BIO_CTRL_DGRAM_GET_FALLBACK_MTU 47 +# define BIO_CTRL_DGRAM_GET_MTU 41/* get cached value for MTU */ +# define BIO_CTRL_DGRAM_SET_MTU 42/* set cached value for MTU. + * want to use this if asking + * the kernel fails */ + +# define BIO_CTRL_DGRAM_MTU_EXCEEDED 43/* check whether the MTU was + * exceed in the previous write + * operation */ + +# define BIO_CTRL_DGRAM_GET_PEER 46 +# define BIO_CTRL_DGRAM_SET_PEER 44/* Destination for the data */ + +# define BIO_CTRL_DGRAM_SET_NEXT_TIMEOUT 45/* Next DTLS handshake timeout + * to adjust socket timeouts */ +# define BIO_CTRL_DGRAM_SET_DONT_FRAG 48 + +# define BIO_CTRL_DGRAM_GET_MTU_OVERHEAD 49 + +/* Deliberately outside of OPENSSL_NO_SCTP - used in bss_dgram.c */ +# define BIO_CTRL_DGRAM_SCTP_SET_IN_HANDSHAKE 50 +# ifndef OPENSSL_NO_SCTP +/* SCTP stuff */ +# define BIO_CTRL_DGRAM_SCTP_ADD_AUTH_KEY 51 +# define BIO_CTRL_DGRAM_SCTP_NEXT_AUTH_KEY 52 +# define BIO_CTRL_DGRAM_SCTP_AUTH_CCS_RCVD 53 +# define BIO_CTRL_DGRAM_SCTP_GET_SNDINFO 60 +# define BIO_CTRL_DGRAM_SCTP_SET_SNDINFO 61 +# define BIO_CTRL_DGRAM_SCTP_GET_RCVINFO 62 +# define BIO_CTRL_DGRAM_SCTP_SET_RCVINFO 63 +# define BIO_CTRL_DGRAM_SCTP_GET_PRINFO 64 +# define BIO_CTRL_DGRAM_SCTP_SET_PRINFO 65 +# define BIO_CTRL_DGRAM_SCTP_SAVE_SHUTDOWN 70 +# endif + +# define BIO_CTRL_DGRAM_SET_PEEK_MODE 71 + +/* + * internal BIO: + * # define BIO_CTRL_SET_KTLS_SEND 72 + * # define BIO_CTRL_SET_KTLS_SEND_CTRL_MSG 74 + * # define BIO_CTRL_CLEAR_KTLS_CTRL_MSG 75 + */ + +# define BIO_CTRL_GET_KTLS_SEND 73 +# define BIO_CTRL_GET_KTLS_RECV 76 + +# define BIO_CTRL_DGRAM_SCTP_WAIT_FOR_DRY 77 +# define BIO_CTRL_DGRAM_SCTP_MSG_WAITING 78 + +/* BIO_f_prefix controls */ +# define BIO_CTRL_SET_PREFIX 79 +# define BIO_CTRL_SET_INDENT 80 +# define BIO_CTRL_GET_INDENT 81 + +# define BIO_CTRL_DGRAM_GET_LOCAL_ADDR_CAP 82 +# define BIO_CTRL_DGRAM_GET_LOCAL_ADDR_ENABLE 83 +# define BIO_CTRL_DGRAM_SET_LOCAL_ADDR_ENABLE 84 +# define BIO_CTRL_DGRAM_GET_EFFECTIVE_CAPS 85 +# define BIO_CTRL_DGRAM_GET_CAPS 86 +# define BIO_CTRL_DGRAM_SET_CAPS 87 +# define BIO_CTRL_DGRAM_GET_NO_TRUNC 88 +# define BIO_CTRL_DGRAM_SET_NO_TRUNC 89 + +/* + * internal BIO: + * # define BIO_CTRL_SET_KTLS_TX_ZEROCOPY_SENDFILE 90 + */ + +# define BIO_CTRL_GET_RPOLL_DESCRIPTOR 91 +# define BIO_CTRL_GET_WPOLL_DESCRIPTOR 92 +# define BIO_CTRL_DGRAM_DETECT_PEER_ADDR 93 +# define BIO_CTRL_DGRAM_SET0_LOCAL_ADDR 94 + +# define BIO_DGRAM_CAP_NONE 0U +# define BIO_DGRAM_CAP_HANDLES_SRC_ADDR (1U << 0) +# define BIO_DGRAM_CAP_HANDLES_DST_ADDR (1U << 1) +# define BIO_DGRAM_CAP_PROVIDES_SRC_ADDR (1U << 2) +# define BIO_DGRAM_CAP_PROVIDES_DST_ADDR (1U << 3) + +# ifndef OPENSSL_NO_KTLS +# define BIO_get_ktls_send(b) \ + (BIO_ctrl(b, BIO_CTRL_GET_KTLS_SEND, 0, NULL) > 0) +# define BIO_get_ktls_recv(b) \ + (BIO_ctrl(b, BIO_CTRL_GET_KTLS_RECV, 0, NULL) > 0) +# else +# define BIO_get_ktls_send(b) (0) +# define BIO_get_ktls_recv(b) (0) +# endif + +/* modifiers */ +# define BIO_FP_READ 0x02 +# define BIO_FP_WRITE 0x04 +# define BIO_FP_APPEND 0x08 +# define BIO_FP_TEXT 0x10 + +# define BIO_FLAGS_READ 0x01 +# define BIO_FLAGS_WRITE 0x02 +# define BIO_FLAGS_IO_SPECIAL 0x04 +# define BIO_FLAGS_RWS (BIO_FLAGS_READ|BIO_FLAGS_WRITE|BIO_FLAGS_IO_SPECIAL) +# define BIO_FLAGS_SHOULD_RETRY 0x08 +# ifndef OPENSSL_NO_DEPRECATED_3_0 +/* This #define was replaced by an internal constant and should not be used. */ +# define BIO_FLAGS_UPLINK 0 +# endif + +# define BIO_FLAGS_BASE64_NO_NL 0x100 + +/* + * This is used with memory BIOs: + * BIO_FLAGS_MEM_RDONLY means we shouldn't free up or change the data in any way; + * BIO_FLAGS_NONCLEAR_RST means we shouldn't clear data on reset. + */ +# define BIO_FLAGS_MEM_RDONLY 0x200 +# define BIO_FLAGS_NONCLEAR_RST 0x400 +# define BIO_FLAGS_IN_EOF 0x800 + +/* the BIO FLAGS values 0x1000 to 0x8000 are reserved for internal KTLS flags */ + +typedef union bio_addr_st BIO_ADDR; +typedef struct bio_addrinfo_st BIO_ADDRINFO; + +int BIO_get_new_index(void); +void BIO_set_flags(BIO *b, int flags); +int BIO_test_flags(const BIO *b, int flags); +void BIO_clear_flags(BIO *b, int flags); + +# define BIO_get_flags(b) BIO_test_flags(b, ~(0x0)) +# define BIO_set_retry_special(b) \ + BIO_set_flags(b, (BIO_FLAGS_IO_SPECIAL|BIO_FLAGS_SHOULD_RETRY)) +# define BIO_set_retry_read(b) \ + BIO_set_flags(b, (BIO_FLAGS_READ|BIO_FLAGS_SHOULD_RETRY)) +# define BIO_set_retry_write(b) \ + BIO_set_flags(b, (BIO_FLAGS_WRITE|BIO_FLAGS_SHOULD_RETRY)) + +/* These are normally used internally in BIOs */ +# define BIO_clear_retry_flags(b) \ + BIO_clear_flags(b, (BIO_FLAGS_RWS|BIO_FLAGS_SHOULD_RETRY)) +# define BIO_get_retry_flags(b) \ + BIO_test_flags(b, (BIO_FLAGS_RWS|BIO_FLAGS_SHOULD_RETRY)) + +/* These should be used by the application to tell why we should retry */ +# define BIO_should_read(a) BIO_test_flags(a, BIO_FLAGS_READ) +# define BIO_should_write(a) BIO_test_flags(a, BIO_FLAGS_WRITE) +# define BIO_should_io_special(a) BIO_test_flags(a, BIO_FLAGS_IO_SPECIAL) +# define BIO_retry_type(a) BIO_test_flags(a, BIO_FLAGS_RWS) +# define BIO_should_retry(a) BIO_test_flags(a, BIO_FLAGS_SHOULD_RETRY) + +/* + * The next three are used in conjunction with the BIO_should_io_special() + * condition. After this returns true, BIO *BIO_get_retry_BIO(BIO *bio, int + * *reason); will walk the BIO stack and return the 'reason' for the special + * and the offending BIO. Given a BIO, BIO_get_retry_reason(bio) will return + * the code. + */ +/* + * Returned from the SSL bio when the certificate retrieval code had an error + */ +# define BIO_RR_SSL_X509_LOOKUP 0x01 +/* Returned from the connect BIO when a connect would have blocked */ +# define BIO_RR_CONNECT 0x02 +/* Returned from the accept BIO when an accept would have blocked */ +# define BIO_RR_ACCEPT 0x03 + +/* These are passed by the BIO callback */ +# define BIO_CB_FREE 0x01 +# define BIO_CB_READ 0x02 +# define BIO_CB_WRITE 0x03 +# define BIO_CB_PUTS 0x04 +# define BIO_CB_GETS 0x05 +# define BIO_CB_CTRL 0x06 +# define BIO_CB_RECVMMSG 0x07 +# define BIO_CB_SENDMMSG 0x08 + +/* + * The callback is called before and after the underling operation, The + * BIO_CB_RETURN flag indicates if it is after the call + */ +# define BIO_CB_RETURN 0x80 +# define BIO_CB_return(a) ((a)|BIO_CB_RETURN) +# define BIO_cb_pre(a) (!((a)&BIO_CB_RETURN)) +# define BIO_cb_post(a) ((a)&BIO_CB_RETURN) + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +typedef long (*BIO_callback_fn)(BIO *b, int oper, const char *argp, int argi, + long argl, long ret); +OSSL_DEPRECATEDIN_3_0 BIO_callback_fn BIO_get_callback(const BIO *b); +OSSL_DEPRECATEDIN_3_0 void BIO_set_callback(BIO *b, BIO_callback_fn callback); +OSSL_DEPRECATEDIN_3_0 long BIO_debug_callback(BIO *bio, int cmd, + const char *argp, int argi, + long argl, long ret); +# endif + +typedef long (*BIO_callback_fn_ex)(BIO *b, int oper, const char *argp, + size_t len, int argi, + long argl, int ret, size_t *processed); +BIO_callback_fn_ex BIO_get_callback_ex(const BIO *b); +void BIO_set_callback_ex(BIO *b, BIO_callback_fn_ex callback); +long BIO_debug_callback_ex(BIO *bio, int oper, const char *argp, size_t len, + int argi, long argl, int ret, size_t *processed); + +char *BIO_get_callback_arg(const BIO *b); +void BIO_set_callback_arg(BIO *b, char *arg); + +typedef struct bio_method_st BIO_METHOD; + +const char *BIO_method_name(const BIO *b); +int BIO_method_type(const BIO *b); + +typedef int BIO_info_cb(BIO *, int, int); +typedef BIO_info_cb bio_info_cb; /* backward compatibility */ + +SKM_DEFINE_STACK_OF_INTERNAL(BIO, BIO, BIO) +#define sk_BIO_num(sk) OPENSSL_sk_num(ossl_check_const_BIO_sk_type(sk)) +#define sk_BIO_value(sk, idx) ((BIO *)OPENSSL_sk_value(ossl_check_const_BIO_sk_type(sk), (idx))) +#define sk_BIO_new(cmp) ((STACK_OF(BIO) *)OPENSSL_sk_new(ossl_check_BIO_compfunc_type(cmp))) +#define sk_BIO_new_null() ((STACK_OF(BIO) *)OPENSSL_sk_new_null()) +#define sk_BIO_new_reserve(cmp, n) ((STACK_OF(BIO) *)OPENSSL_sk_new_reserve(ossl_check_BIO_compfunc_type(cmp), (n))) +#define sk_BIO_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_BIO_sk_type(sk), (n)) +#define sk_BIO_free(sk) OPENSSL_sk_free(ossl_check_BIO_sk_type(sk)) +#define sk_BIO_zero(sk) OPENSSL_sk_zero(ossl_check_BIO_sk_type(sk)) +#define sk_BIO_delete(sk, i) ((BIO *)OPENSSL_sk_delete(ossl_check_BIO_sk_type(sk), (i))) +#define sk_BIO_delete_ptr(sk, ptr) ((BIO *)OPENSSL_sk_delete_ptr(ossl_check_BIO_sk_type(sk), ossl_check_BIO_type(ptr))) +#define sk_BIO_push(sk, ptr) OPENSSL_sk_push(ossl_check_BIO_sk_type(sk), ossl_check_BIO_type(ptr)) +#define sk_BIO_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_BIO_sk_type(sk), ossl_check_BIO_type(ptr)) +#define sk_BIO_pop(sk) ((BIO *)OPENSSL_sk_pop(ossl_check_BIO_sk_type(sk))) +#define sk_BIO_shift(sk) ((BIO *)OPENSSL_sk_shift(ossl_check_BIO_sk_type(sk))) +#define sk_BIO_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_BIO_sk_type(sk),ossl_check_BIO_freefunc_type(freefunc)) +#define sk_BIO_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_BIO_sk_type(sk), ossl_check_BIO_type(ptr), (idx)) +#define sk_BIO_set(sk, idx, ptr) ((BIO *)OPENSSL_sk_set(ossl_check_BIO_sk_type(sk), (idx), ossl_check_BIO_type(ptr))) +#define sk_BIO_find(sk, ptr) OPENSSL_sk_find(ossl_check_BIO_sk_type(sk), ossl_check_BIO_type(ptr)) +#define sk_BIO_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_BIO_sk_type(sk), ossl_check_BIO_type(ptr)) +#define sk_BIO_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_BIO_sk_type(sk), ossl_check_BIO_type(ptr), pnum) +#define sk_BIO_sort(sk) OPENSSL_sk_sort(ossl_check_BIO_sk_type(sk)) +#define sk_BIO_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_BIO_sk_type(sk)) +#define sk_BIO_dup(sk) ((STACK_OF(BIO) *)OPENSSL_sk_dup(ossl_check_const_BIO_sk_type(sk))) +#define sk_BIO_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(BIO) *)OPENSSL_sk_deep_copy(ossl_check_const_BIO_sk_type(sk), ossl_check_BIO_copyfunc_type(copyfunc), ossl_check_BIO_freefunc_type(freefunc))) +#define sk_BIO_set_cmp_func(sk, cmp) ((sk_BIO_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_BIO_sk_type(sk), ossl_check_BIO_compfunc_type(cmp))) + + + +/* Prefix and suffix callback in ASN1 BIO */ +typedef int asn1_ps_func (BIO *b, unsigned char **pbuf, int *plen, + void *parg); + +typedef void (*BIO_dgram_sctp_notification_handler_fn) (BIO *b, + void *context, + void *buf); +# ifndef OPENSSL_NO_SCTP +/* SCTP parameter structs */ +struct bio_dgram_sctp_sndinfo { + uint16_t snd_sid; + uint16_t snd_flags; + uint32_t snd_ppid; + uint32_t snd_context; +}; + +struct bio_dgram_sctp_rcvinfo { + uint16_t rcv_sid; + uint16_t rcv_ssn; + uint16_t rcv_flags; + uint32_t rcv_ppid; + uint32_t rcv_tsn; + uint32_t rcv_cumtsn; + uint32_t rcv_context; +}; + +struct bio_dgram_sctp_prinfo { + uint16_t pr_policy; + uint32_t pr_value; +}; +# endif + +/* BIO_sendmmsg/BIO_recvmmsg-related definitions */ +typedef struct bio_msg_st { + void *data; + size_t data_len; + BIO_ADDR *peer, *local; + uint64_t flags; +} BIO_MSG; + +typedef struct bio_mmsg_cb_args_st { + BIO_MSG *msg; + size_t stride, num_msg; + uint64_t flags; + size_t *msgs_processed; +} BIO_MMSG_CB_ARGS; + +#define BIO_POLL_DESCRIPTOR_TYPE_NONE 0 +#define BIO_POLL_DESCRIPTOR_TYPE_SOCK_FD 1 +#define BIO_POLL_DESCRIPTOR_TYPE_SSL 2 +#define BIO_POLL_DESCRIPTOR_CUSTOM_START 8192 + +typedef struct bio_poll_descriptor_st { + uint32_t type; + union { + int fd; + void *custom; + uintptr_t custom_ui; + SSL *ssl; + } value; +} BIO_POLL_DESCRIPTOR; + +/* + * #define BIO_CONN_get_param_hostname BIO_ctrl + */ + +# define BIO_C_SET_CONNECT 100 +# define BIO_C_DO_STATE_MACHINE 101 +# define BIO_C_SET_NBIO 102 +/* # define BIO_C_SET_PROXY_PARAM 103 */ +# define BIO_C_SET_FD 104 +# define BIO_C_GET_FD 105 +# define BIO_C_SET_FILE_PTR 106 +# define BIO_C_GET_FILE_PTR 107 +# define BIO_C_SET_FILENAME 108 +# define BIO_C_SET_SSL 109 +# define BIO_C_GET_SSL 110 +# define BIO_C_SET_MD 111 +# define BIO_C_GET_MD 112 +# define BIO_C_GET_CIPHER_STATUS 113 +# define BIO_C_SET_BUF_MEM 114 +# define BIO_C_GET_BUF_MEM_PTR 115 +# define BIO_C_GET_BUFF_NUM_LINES 116 +# define BIO_C_SET_BUFF_SIZE 117 +# define BIO_C_SET_ACCEPT 118 +# define BIO_C_SSL_MODE 119 +# define BIO_C_GET_MD_CTX 120 +/* # define BIO_C_GET_PROXY_PARAM 121 */ +# define BIO_C_SET_BUFF_READ_DATA 122/* data to read first */ +# define BIO_C_GET_CONNECT 123 +# define BIO_C_GET_ACCEPT 124 +# define BIO_C_SET_SSL_RENEGOTIATE_BYTES 125 +# define BIO_C_GET_SSL_NUM_RENEGOTIATES 126 +# define BIO_C_SET_SSL_RENEGOTIATE_TIMEOUT 127 +# define BIO_C_FILE_SEEK 128 +# define BIO_C_GET_CIPHER_CTX 129 +# define BIO_C_SET_BUF_MEM_EOF_RETURN 130/* return end of input + * value */ +# define BIO_C_SET_BIND_MODE 131 +# define BIO_C_GET_BIND_MODE 132 +# define BIO_C_FILE_TELL 133 +# define BIO_C_GET_SOCKS 134 +# define BIO_C_SET_SOCKS 135 + +# define BIO_C_SET_WRITE_BUF_SIZE 136/* for BIO_s_bio */ +# define BIO_C_GET_WRITE_BUF_SIZE 137 +# define BIO_C_MAKE_BIO_PAIR 138 +# define BIO_C_DESTROY_BIO_PAIR 139 +# define BIO_C_GET_WRITE_GUARANTEE 140 +# define BIO_C_GET_READ_REQUEST 141 +# define BIO_C_SHUTDOWN_WR 142 +# define BIO_C_NREAD0 143 +# define BIO_C_NREAD 144 +# define BIO_C_NWRITE0 145 +# define BIO_C_NWRITE 146 +# define BIO_C_RESET_READ_REQUEST 147 +# define BIO_C_SET_MD_CTX 148 + +# define BIO_C_SET_PREFIX 149 +# define BIO_C_GET_PREFIX 150 +# define BIO_C_SET_SUFFIX 151 +# define BIO_C_GET_SUFFIX 152 + +# define BIO_C_SET_EX_ARG 153 +# define BIO_C_GET_EX_ARG 154 + +# define BIO_C_SET_CONNECT_MODE 155 + +# define BIO_C_SET_TFO 156 /* like BIO_C_SET_NBIO */ + +# define BIO_C_SET_SOCK_TYPE 157 +# define BIO_C_GET_SOCK_TYPE 158 +# define BIO_C_GET_DGRAM_BIO 159 + +# define BIO_set_app_data(s,arg) BIO_set_ex_data(s,0,arg) +# define BIO_get_app_data(s) BIO_get_ex_data(s,0) + +# define BIO_set_nbio(b,n) BIO_ctrl(b,BIO_C_SET_NBIO,(n),NULL) +# define BIO_set_tfo(b,n) BIO_ctrl(b,BIO_C_SET_TFO,(n),NULL) + +# ifndef OPENSSL_NO_SOCK +/* IP families we support, for BIO_s_connect() and BIO_s_accept() */ +/* Note: the underlying operating system may not support some of them */ +# define BIO_FAMILY_IPV4 4 +# define BIO_FAMILY_IPV6 6 +# define BIO_FAMILY_IPANY 256 + +/* BIO_s_connect() */ +# define BIO_set_conn_hostname(b,name) BIO_ctrl(b,BIO_C_SET_CONNECT,0, \ + (char *)(name)) +# define BIO_set_conn_port(b,port) BIO_ctrl(b,BIO_C_SET_CONNECT,1, \ + (char *)(port)) +# define BIO_set_conn_address(b,addr) BIO_ctrl(b,BIO_C_SET_CONNECT,2, \ + (char *)(addr)) +# define BIO_set_conn_ip_family(b,f) BIO_int_ctrl(b,BIO_C_SET_CONNECT,3,f) +# define BIO_get_conn_hostname(b) ((const char *)BIO_ptr_ctrl(b,BIO_C_GET_CONNECT,0)) +# define BIO_get_conn_port(b) ((const char *)BIO_ptr_ctrl(b,BIO_C_GET_CONNECT,1)) +# define BIO_get_conn_address(b) ((const BIO_ADDR *)BIO_ptr_ctrl(b,BIO_C_GET_CONNECT,2)) +# define BIO_get_conn_ip_family(b) BIO_ctrl(b,BIO_C_GET_CONNECT,3,NULL) +# define BIO_get_conn_mode(b) BIO_ctrl(b,BIO_C_GET_CONNECT,4,NULL) +# define BIO_set_conn_mode(b,n) BIO_ctrl(b,BIO_C_SET_CONNECT_MODE,(n),NULL) +# define BIO_set_sock_type(b,t) BIO_ctrl(b,BIO_C_SET_SOCK_TYPE,(t),NULL) +# define BIO_get_sock_type(b) BIO_ctrl(b,BIO_C_GET_SOCK_TYPE,0,NULL) +# define BIO_get0_dgram_bio(b, p) BIO_ctrl(b,BIO_C_GET_DGRAM_BIO,0,(void *)(BIO **)(p)) + +/* BIO_s_accept() */ +# define BIO_set_accept_name(b,name) BIO_ctrl(b,BIO_C_SET_ACCEPT,0, \ + (char *)(name)) +# define BIO_set_accept_port(b,port) BIO_ctrl(b,BIO_C_SET_ACCEPT,1, \ + (char *)(port)) +# define BIO_get_accept_name(b) ((const char *)BIO_ptr_ctrl(b,BIO_C_GET_ACCEPT,0)) +# define BIO_get_accept_port(b) ((const char *)BIO_ptr_ctrl(b,BIO_C_GET_ACCEPT,1)) +# define BIO_get_peer_name(b) ((const char *)BIO_ptr_ctrl(b,BIO_C_GET_ACCEPT,2)) +# define BIO_get_peer_port(b) ((const char *)BIO_ptr_ctrl(b,BIO_C_GET_ACCEPT,3)) +/* #define BIO_set_nbio(b,n) BIO_ctrl(b,BIO_C_SET_NBIO,(n),NULL) */ +# define BIO_set_nbio_accept(b,n) BIO_ctrl(b,BIO_C_SET_ACCEPT,2,(n)?(void *)"a":NULL) +# define BIO_set_accept_bios(b,bio) BIO_ctrl(b,BIO_C_SET_ACCEPT,3, \ + (char *)(bio)) +# define BIO_set_accept_ip_family(b,f) BIO_int_ctrl(b,BIO_C_SET_ACCEPT,4,f) +# define BIO_get_accept_ip_family(b) BIO_ctrl(b,BIO_C_GET_ACCEPT,4,NULL) +# define BIO_set_tfo_accept(b,n) BIO_ctrl(b,BIO_C_SET_ACCEPT,5,(n)?(void *)"a":NULL) + +/* Aliases kept for backward compatibility */ +# define BIO_BIND_NORMAL 0 +# define BIO_BIND_REUSEADDR BIO_SOCK_REUSEADDR +# define BIO_BIND_REUSEADDR_IF_UNUSED BIO_SOCK_REUSEADDR +# define BIO_set_bind_mode(b,mode) BIO_ctrl(b,BIO_C_SET_BIND_MODE,mode,NULL) +# define BIO_get_bind_mode(b) BIO_ctrl(b,BIO_C_GET_BIND_MODE,0,NULL) +# endif /* OPENSSL_NO_SOCK */ + +# define BIO_do_connect(b) BIO_do_handshake(b) +# define BIO_do_accept(b) BIO_do_handshake(b) + +# define BIO_do_handshake(b) BIO_ctrl(b,BIO_C_DO_STATE_MACHINE,0,NULL) + +/* BIO_s_datagram(), BIO_s_fd(), BIO_s_socket(), BIO_s_accept() and BIO_s_connect() */ +# define BIO_set_fd(b,fd,c) BIO_int_ctrl(b,BIO_C_SET_FD,c,fd) +# define BIO_get_fd(b,c) BIO_ctrl(b,BIO_C_GET_FD,0,(char *)(c)) + +/* BIO_s_file() */ +# define BIO_set_fp(b,fp,c) BIO_ctrl(b,BIO_C_SET_FILE_PTR,c,(char *)(fp)) +# define BIO_get_fp(b,fpp) BIO_ctrl(b,BIO_C_GET_FILE_PTR,0,(char *)(fpp)) + +/* BIO_s_fd() and BIO_s_file() */ +# define BIO_seek(b,ofs) (int)BIO_ctrl(b,BIO_C_FILE_SEEK,ofs,NULL) +# define BIO_tell(b) (int)BIO_ctrl(b,BIO_C_FILE_TELL,0,NULL) + +/* + * name is cast to lose const, but might be better to route through a + * function so we can do it safely + */ +# ifdef CONST_STRICT +/* + * If you are wondering why this isn't defined, its because CONST_STRICT is + * purely a compile-time kludge to allow const to be checked. + */ +int BIO_read_filename(BIO *b, const char *name); +# else +# define BIO_read_filename(b,name) (int)BIO_ctrl(b,BIO_C_SET_FILENAME, \ + BIO_CLOSE|BIO_FP_READ,(char *)(name)) +# endif +# define BIO_write_filename(b,name) (int)BIO_ctrl(b,BIO_C_SET_FILENAME, \ + BIO_CLOSE|BIO_FP_WRITE,name) +# define BIO_append_filename(b,name) (int)BIO_ctrl(b,BIO_C_SET_FILENAME, \ + BIO_CLOSE|BIO_FP_APPEND,name) +# define BIO_rw_filename(b,name) (int)BIO_ctrl(b,BIO_C_SET_FILENAME, \ + BIO_CLOSE|BIO_FP_READ|BIO_FP_WRITE,name) + +/* + * WARNING WARNING, this ups the reference count on the read bio of the SSL + * structure. This is because the ssl read BIO is now pointed to by the + * next_bio field in the bio. So when you free the BIO, make sure you are + * doing a BIO_free_all() to catch the underlying BIO. + */ +# define BIO_set_ssl(b,ssl,c) BIO_ctrl(b,BIO_C_SET_SSL,c,(char *)(ssl)) +# define BIO_get_ssl(b,sslp) BIO_ctrl(b,BIO_C_GET_SSL,0,(char *)(sslp)) +# define BIO_set_ssl_mode(b,client) BIO_ctrl(b,BIO_C_SSL_MODE,client,NULL) +# define BIO_set_ssl_renegotiate_bytes(b,num) \ + BIO_ctrl(b,BIO_C_SET_SSL_RENEGOTIATE_BYTES,num,NULL) +# define BIO_get_num_renegotiates(b) \ + BIO_ctrl(b,BIO_C_GET_SSL_NUM_RENEGOTIATES,0,NULL) +# define BIO_set_ssl_renegotiate_timeout(b,seconds) \ + BIO_ctrl(b,BIO_C_SET_SSL_RENEGOTIATE_TIMEOUT,seconds,NULL) + +/* defined in evp.h */ +/* #define BIO_set_md(b,md) BIO_ctrl(b,BIO_C_SET_MD,1,(char *)(md)) */ + +# define BIO_get_mem_data(b,pp) BIO_ctrl(b,BIO_CTRL_INFO,0,(char *)(pp)) +# define BIO_set_mem_buf(b,bm,c) BIO_ctrl(b,BIO_C_SET_BUF_MEM,c,(char *)(bm)) +# define BIO_get_mem_ptr(b,pp) BIO_ctrl(b,BIO_C_GET_BUF_MEM_PTR,0, \ + (char *)(pp)) +# define BIO_set_mem_eof_return(b,v) \ + BIO_ctrl(b,BIO_C_SET_BUF_MEM_EOF_RETURN,v,NULL) + +/* For the BIO_f_buffer() type */ +# define BIO_get_buffer_num_lines(b) BIO_ctrl(b,BIO_C_GET_BUFF_NUM_LINES,0,NULL) +# define BIO_set_buffer_size(b,size) BIO_ctrl(b,BIO_C_SET_BUFF_SIZE,size,NULL) +# define BIO_set_read_buffer_size(b,size) BIO_int_ctrl(b,BIO_C_SET_BUFF_SIZE,size,0) +# define BIO_set_write_buffer_size(b,size) BIO_int_ctrl(b,BIO_C_SET_BUFF_SIZE,size,1) +# define BIO_set_buffer_read_data(b,buf,num) BIO_ctrl(b,BIO_C_SET_BUFF_READ_DATA,num,buf) + +/* Don't use the next one unless you know what you are doing :-) */ +# define BIO_dup_state(b,ret) BIO_ctrl(b,BIO_CTRL_DUP,0,(char *)(ret)) + +# define BIO_reset(b) (int)BIO_ctrl(b,BIO_CTRL_RESET,0,NULL) +# define BIO_eof(b) (int)BIO_ctrl(b,BIO_CTRL_EOF,0,NULL) +# define BIO_set_close(b,c) (int)BIO_ctrl(b,BIO_CTRL_SET_CLOSE,(c),NULL) +# define BIO_get_close(b) (int)BIO_ctrl(b,BIO_CTRL_GET_CLOSE,0,NULL) +# define BIO_pending(b) (int)BIO_ctrl(b,BIO_CTRL_PENDING,0,NULL) +# define BIO_wpending(b) (int)BIO_ctrl(b,BIO_CTRL_WPENDING,0,NULL) +/* ...pending macros have inappropriate return type */ +size_t BIO_ctrl_pending(BIO *b); +size_t BIO_ctrl_wpending(BIO *b); +# define BIO_flush(b) (int)BIO_ctrl(b,BIO_CTRL_FLUSH,0,NULL) +# define BIO_get_info_callback(b,cbp) (int)BIO_ctrl(b,BIO_CTRL_GET_CALLBACK,0, \ + cbp) +# define BIO_set_info_callback(b,cb) (int)BIO_callback_ctrl(b,BIO_CTRL_SET_CALLBACK,cb) + +/* For the BIO_f_buffer() type */ +# define BIO_buffer_get_num_lines(b) BIO_ctrl(b,BIO_CTRL_GET,0,NULL) +# define BIO_buffer_peek(b,s,l) BIO_ctrl(b,BIO_CTRL_PEEK,(l),(s)) + +/* For BIO_s_bio() */ +# define BIO_set_write_buf_size(b,size) (int)BIO_ctrl(b,BIO_C_SET_WRITE_BUF_SIZE,size,NULL) +# define BIO_get_write_buf_size(b,size) (size_t)BIO_ctrl(b,BIO_C_GET_WRITE_BUF_SIZE,size,NULL) +# define BIO_make_bio_pair(b1,b2) (int)BIO_ctrl(b1,BIO_C_MAKE_BIO_PAIR,0,b2) +# define BIO_destroy_bio_pair(b) (int)BIO_ctrl(b,BIO_C_DESTROY_BIO_PAIR,0,NULL) +# define BIO_shutdown_wr(b) (int)BIO_ctrl(b, BIO_C_SHUTDOWN_WR, 0, NULL) +/* macros with inappropriate type -- but ...pending macros use int too: */ +# define BIO_get_write_guarantee(b) (int)BIO_ctrl(b,BIO_C_GET_WRITE_GUARANTEE,0,NULL) +# define BIO_get_read_request(b) (int)BIO_ctrl(b,BIO_C_GET_READ_REQUEST,0,NULL) +size_t BIO_ctrl_get_write_guarantee(BIO *b); +size_t BIO_ctrl_get_read_request(BIO *b); +int BIO_ctrl_reset_read_request(BIO *b); + +/* ctrl macros for dgram */ +# define BIO_ctrl_dgram_connect(b,peer) \ + (int)BIO_ctrl(b,BIO_CTRL_DGRAM_CONNECT,0, (char *)(peer)) +# define BIO_ctrl_set_connected(b,peer) \ + (int)BIO_ctrl(b, BIO_CTRL_DGRAM_SET_CONNECTED, 0, (char *)(peer)) +# define BIO_dgram_recv_timedout(b) \ + (int)BIO_ctrl(b, BIO_CTRL_DGRAM_GET_RECV_TIMER_EXP, 0, NULL) +# define BIO_dgram_send_timedout(b) \ + (int)BIO_ctrl(b, BIO_CTRL_DGRAM_GET_SEND_TIMER_EXP, 0, NULL) +# define BIO_dgram_get_peer(b,peer) \ + (int)BIO_ctrl(b, BIO_CTRL_DGRAM_GET_PEER, 0, (char *)(peer)) +# define BIO_dgram_set_peer(b,peer) \ + (int)BIO_ctrl(b, BIO_CTRL_DGRAM_SET_PEER, 0, (char *)(peer)) +# define BIO_dgram_detect_peer_addr(b,peer) \ + (int)BIO_ctrl(b, BIO_CTRL_DGRAM_DETECT_PEER_ADDR, 0, (char *)(peer)) +# define BIO_dgram_get_mtu_overhead(b) \ + (unsigned int)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_MTU_OVERHEAD, 0, NULL) +# define BIO_dgram_get_local_addr_cap(b) \ + (int)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_LOCAL_ADDR_CAP, 0, NULL) +# define BIO_dgram_get_local_addr_enable(b, penable) \ + (int)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_LOCAL_ADDR_ENABLE, 0, (char *)(penable)) +# define BIO_dgram_set_local_addr_enable(b, enable) \ + (int)BIO_ctrl((b), BIO_CTRL_DGRAM_SET_LOCAL_ADDR_ENABLE, (enable), NULL) +# define BIO_dgram_get_effective_caps(b) \ + (uint32_t)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_EFFECTIVE_CAPS, 0, NULL) +# define BIO_dgram_get_caps(b) \ + (uint32_t)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_CAPS, 0, NULL) +# define BIO_dgram_set_caps(b, caps) \ + (int)BIO_ctrl((b), BIO_CTRL_DGRAM_SET_CAPS, (long)(caps), NULL) +# define BIO_dgram_get_no_trunc(b) \ + (unsigned int)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_NO_TRUNC, 0, NULL) +# define BIO_dgram_set_no_trunc(b, enable) \ + (int)BIO_ctrl((b), BIO_CTRL_DGRAM_SET_NO_TRUNC, (enable), NULL) +# define BIO_dgram_get_mtu(b) \ + (unsigned int)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_MTU, 0, NULL) +# define BIO_dgram_set_mtu(b, mtu) \ + (int)BIO_ctrl((b), BIO_CTRL_DGRAM_SET_MTU, (mtu), NULL) +# define BIO_dgram_set0_local_addr(b, addr) \ + (int)BIO_ctrl((b), BIO_CTRL_DGRAM_SET0_LOCAL_ADDR, 0, (addr)) + +/* ctrl macros for BIO_f_prefix */ +# define BIO_set_prefix(b,p) BIO_ctrl((b), BIO_CTRL_SET_PREFIX, 0, (void *)(p)) +# define BIO_set_indent(b,i) BIO_ctrl((b), BIO_CTRL_SET_INDENT, (i), NULL) +# define BIO_get_indent(b) BIO_ctrl((b), BIO_CTRL_GET_INDENT, 0, NULL) + +#define BIO_get_ex_new_index(l, p, newf, dupf, freef) \ + CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_BIO, l, p, newf, dupf, freef) +int BIO_set_ex_data(BIO *bio, int idx, void *data); +void *BIO_get_ex_data(const BIO *bio, int idx); +uint64_t BIO_number_read(BIO *bio); +uint64_t BIO_number_written(BIO *bio); + +/* For BIO_f_asn1() */ +int BIO_asn1_set_prefix(BIO *b, asn1_ps_func *prefix, + asn1_ps_func *prefix_free); +int BIO_asn1_get_prefix(BIO *b, asn1_ps_func **pprefix, + asn1_ps_func **pprefix_free); +int BIO_asn1_set_suffix(BIO *b, asn1_ps_func *suffix, + asn1_ps_func *suffix_free); +int BIO_asn1_get_suffix(BIO *b, asn1_ps_func **psuffix, + asn1_ps_func **psuffix_free); + +const BIO_METHOD *BIO_s_file(void); +BIO *BIO_new_file(const char *filename, const char *mode); +BIO *BIO_new_from_core_bio(OSSL_LIB_CTX *libctx, OSSL_CORE_BIO *corebio); +# ifndef OPENSSL_NO_STDIO +BIO *BIO_new_fp(FILE *stream, int close_flag); +# endif +BIO *BIO_new_ex(OSSL_LIB_CTX *libctx, const BIO_METHOD *method); +BIO *BIO_new(const BIO_METHOD *type); +int BIO_free(BIO *a); +void BIO_set_data(BIO *a, void *ptr); +void *BIO_get_data(BIO *a); +void BIO_set_init(BIO *a, int init); +int BIO_get_init(BIO *a); +void BIO_set_shutdown(BIO *a, int shut); +int BIO_get_shutdown(BIO *a); +void BIO_vfree(BIO *a); +int BIO_up_ref(BIO *a); +int BIO_read(BIO *b, void *data, int dlen); +int BIO_read_ex(BIO *b, void *data, size_t dlen, size_t *readbytes); +__owur int BIO_recvmmsg(BIO *b, BIO_MSG *msg, + size_t stride, size_t num_msg, uint64_t flags, + size_t *msgs_processed); +int BIO_gets(BIO *bp, char *buf, int size); +int BIO_get_line(BIO *bio, char *buf, int size); +int BIO_write(BIO *b, const void *data, int dlen); +int BIO_write_ex(BIO *b, const void *data, size_t dlen, size_t *written); +__owur int BIO_sendmmsg(BIO *b, BIO_MSG *msg, + size_t stride, size_t num_msg, uint64_t flags, + size_t *msgs_processed); +__owur int BIO_get_rpoll_descriptor(BIO *b, BIO_POLL_DESCRIPTOR *desc); +__owur int BIO_get_wpoll_descriptor(BIO *b, BIO_POLL_DESCRIPTOR *desc); +int BIO_puts(BIO *bp, const char *buf); +int BIO_indent(BIO *b, int indent, int max); +long BIO_ctrl(BIO *bp, int cmd, long larg, void *parg); +long BIO_callback_ctrl(BIO *b, int cmd, BIO_info_cb *fp); +void *BIO_ptr_ctrl(BIO *bp, int cmd, long larg); +long BIO_int_ctrl(BIO *bp, int cmd, long larg, int iarg); +BIO *BIO_push(BIO *b, BIO *append); +BIO *BIO_pop(BIO *b); +void BIO_free_all(BIO *a); +BIO *BIO_find_type(BIO *b, int bio_type); +BIO *BIO_next(BIO *b); +void BIO_set_next(BIO *b, BIO *next); +BIO *BIO_get_retry_BIO(BIO *bio, int *reason); +int BIO_get_retry_reason(BIO *bio); +void BIO_set_retry_reason(BIO *bio, int reason); +BIO *BIO_dup_chain(BIO *in); + +int BIO_nread0(BIO *bio, char **buf); +int BIO_nread(BIO *bio, char **buf, int num); +int BIO_nwrite0(BIO *bio, char **buf); +int BIO_nwrite(BIO *bio, char **buf, int num); + +const BIO_METHOD *BIO_s_mem(void); +# ifndef OPENSSL_NO_DGRAM +const BIO_METHOD *BIO_s_dgram_mem(void); +# endif +const BIO_METHOD *BIO_s_secmem(void); +BIO *BIO_new_mem_buf(const void *buf, int len); +# ifndef OPENSSL_NO_SOCK +const BIO_METHOD *BIO_s_socket(void); +const BIO_METHOD *BIO_s_connect(void); +const BIO_METHOD *BIO_s_accept(void); +# endif +const BIO_METHOD *BIO_s_fd(void); +const BIO_METHOD *BIO_s_log(void); +const BIO_METHOD *BIO_s_bio(void); +const BIO_METHOD *BIO_s_null(void); +const BIO_METHOD *BIO_f_null(void); +const BIO_METHOD *BIO_f_buffer(void); +const BIO_METHOD *BIO_f_readbuffer(void); +const BIO_METHOD *BIO_f_linebuffer(void); +const BIO_METHOD *BIO_f_nbio_test(void); +const BIO_METHOD *BIO_f_prefix(void); +const BIO_METHOD *BIO_s_core(void); +# ifndef OPENSSL_NO_DGRAM +const BIO_METHOD *BIO_s_dgram_pair(void); +const BIO_METHOD *BIO_s_datagram(void); +int BIO_dgram_non_fatal_error(int error); +BIO *BIO_new_dgram(int fd, int close_flag); +# ifndef OPENSSL_NO_SCTP +const BIO_METHOD *BIO_s_datagram_sctp(void); +BIO *BIO_new_dgram_sctp(int fd, int close_flag); +int BIO_dgram_is_sctp(BIO *bio); +int BIO_dgram_sctp_notification_cb(BIO *b, + BIO_dgram_sctp_notification_handler_fn handle_notifications, + void *context); +int BIO_dgram_sctp_wait_for_dry(BIO *b); +int BIO_dgram_sctp_msg_waiting(BIO *b); +# endif +# endif + +# ifndef OPENSSL_NO_SOCK +int BIO_sock_should_retry(int i); +int BIO_sock_non_fatal_error(int error); +int BIO_err_is_non_fatal(unsigned int errcode); +int BIO_socket_wait(int fd, int for_read, time_t max_time); +# endif +int BIO_wait(BIO *bio, time_t max_time, unsigned int nap_milliseconds); +int BIO_do_connect_retry(BIO *bio, int timeout, int nap_milliseconds); + +int BIO_fd_should_retry(int i); +int BIO_fd_non_fatal_error(int error); +int BIO_dump_cb(int (*cb) (const void *data, size_t len, void *u), + void *u, const void *s, int len); +int BIO_dump_indent_cb(int (*cb) (const void *data, size_t len, void *u), + void *u, const void *s, int len, int indent); +int BIO_dump(BIO *b, const void *bytes, int len); +int BIO_dump_indent(BIO *b, const void *bytes, int len, int indent); +# ifndef OPENSSL_NO_STDIO +int BIO_dump_fp(FILE *fp, const void *s, int len); +int BIO_dump_indent_fp(FILE *fp, const void *s, int len, int indent); +# endif +int BIO_hex_string(BIO *out, int indent, int width, const void *data, + int datalen); + +# ifndef OPENSSL_NO_SOCK +BIO_ADDR *BIO_ADDR_new(void); +int BIO_ADDR_copy(BIO_ADDR *dst, const BIO_ADDR *src); +BIO_ADDR *BIO_ADDR_dup(const BIO_ADDR *ap); +int BIO_ADDR_rawmake(BIO_ADDR *ap, int family, + const void *where, size_t wherelen, unsigned short port); +void BIO_ADDR_free(BIO_ADDR *); +void BIO_ADDR_clear(BIO_ADDR *ap); +int BIO_ADDR_family(const BIO_ADDR *ap); +int BIO_ADDR_rawaddress(const BIO_ADDR *ap, void *p, size_t *l); +unsigned short BIO_ADDR_rawport(const BIO_ADDR *ap); +char *BIO_ADDR_hostname_string(const BIO_ADDR *ap, int numeric); +char *BIO_ADDR_service_string(const BIO_ADDR *ap, int numeric); +char *BIO_ADDR_path_string(const BIO_ADDR *ap); + +const BIO_ADDRINFO *BIO_ADDRINFO_next(const BIO_ADDRINFO *bai); +int BIO_ADDRINFO_family(const BIO_ADDRINFO *bai); +int BIO_ADDRINFO_socktype(const BIO_ADDRINFO *bai); +int BIO_ADDRINFO_protocol(const BIO_ADDRINFO *bai); +const BIO_ADDR *BIO_ADDRINFO_address(const BIO_ADDRINFO *bai); +void BIO_ADDRINFO_free(BIO_ADDRINFO *bai); + +enum BIO_hostserv_priorities { + BIO_PARSE_PRIO_HOST, BIO_PARSE_PRIO_SERV +}; +int BIO_parse_hostserv(const char *hostserv, char **host, char **service, + enum BIO_hostserv_priorities hostserv_prio); +enum BIO_lookup_type { + BIO_LOOKUP_CLIENT, BIO_LOOKUP_SERVER +}; +int BIO_lookup(const char *host, const char *service, + enum BIO_lookup_type lookup_type, + int family, int socktype, BIO_ADDRINFO **res); +int BIO_lookup_ex(const char *host, const char *service, + int lookup_type, int family, int socktype, int protocol, + BIO_ADDRINFO **res); +int BIO_sock_error(int sock); +int BIO_socket_ioctl(int fd, long type, void *arg); +int BIO_socket_nbio(int fd, int mode); +int BIO_sock_init(void); +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define BIO_sock_cleanup() while(0) continue +# endif +int BIO_set_tcp_ndelay(int sock, int turn_on); +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 struct hostent *BIO_gethostbyname(const char *name); +OSSL_DEPRECATEDIN_1_1_0 int BIO_get_port(const char *str, unsigned short *port_ptr); +OSSL_DEPRECATEDIN_1_1_0 int BIO_get_host_ip(const char *str, unsigned char *ip); +OSSL_DEPRECATEDIN_1_1_0 int BIO_get_accept_socket(char *host_port, int mode); +OSSL_DEPRECATEDIN_1_1_0 int BIO_accept(int sock, char **ip_port); +# endif + +union BIO_sock_info_u { + BIO_ADDR *addr; +}; +enum BIO_sock_info_type { + BIO_SOCK_INFO_ADDRESS +}; +int BIO_sock_info(int sock, + enum BIO_sock_info_type type, union BIO_sock_info_u *info); + +# define BIO_SOCK_REUSEADDR 0x01 +# define BIO_SOCK_V6_ONLY 0x02 +# define BIO_SOCK_KEEPALIVE 0x04 +# define BIO_SOCK_NONBLOCK 0x08 +# define BIO_SOCK_NODELAY 0x10 +# define BIO_SOCK_TFO 0x20 + +int BIO_socket(int domain, int socktype, int protocol, int options); +int BIO_connect(int sock, const BIO_ADDR *addr, int options); +int BIO_bind(int sock, const BIO_ADDR *addr, int options); +int BIO_listen(int sock, const BIO_ADDR *addr, int options); +int BIO_accept_ex(int accept_sock, BIO_ADDR *addr, int options); +int BIO_closesocket(int sock); + +BIO *BIO_new_socket(int sock, int close_flag); +BIO *BIO_new_connect(const char *host_port); +BIO *BIO_new_accept(const char *host_port); +# endif /* OPENSSL_NO_SOCK*/ + +BIO *BIO_new_fd(int fd, int close_flag); + +int BIO_new_bio_pair(BIO **bio1, size_t writebuf1, + BIO **bio2, size_t writebuf2); +# ifndef OPENSSL_NO_DGRAM +int BIO_new_bio_dgram_pair(BIO **bio1, size_t writebuf1, + BIO **bio2, size_t writebuf2); +# endif + +/* + * If successful, returns 1 and in *bio1, *bio2 two BIO pair endpoints. + * Otherwise returns 0 and sets *bio1 and *bio2 to NULL. Size 0 uses default + * value. + */ + +void BIO_copy_next_retry(BIO *b); + +/* + * long BIO_ghbn_ctrl(int cmd,int iarg,char *parg); + */ + +# define ossl_bio__attr__(x) +# if defined(__GNUC__) && defined(__STDC_VERSION__) \ + && !defined(__MINGW32__) && !defined(__MINGW64__) \ + && !defined(__APPLE__) + /* + * Because we support the 'z' modifier, which made its appearance in C99, + * we can't use __attribute__ with pre C99 dialects. + */ +# if __STDC_VERSION__ >= 199901L +# undef ossl_bio__attr__ +# define ossl_bio__attr__ __attribute__ +# if __GNUC__*10 + __GNUC_MINOR__ >= 44 +# define ossl_bio__printf__ __gnu_printf__ +# else +# define ossl_bio__printf__ __printf__ +# endif +# endif +# endif +int BIO_printf(BIO *bio, const char *format, ...) +ossl_bio__attr__((__format__(ossl_bio__printf__, 2, 3))); +int BIO_vprintf(BIO *bio, const char *format, va_list args) +ossl_bio__attr__((__format__(ossl_bio__printf__, 2, 0))); +int BIO_snprintf(char *buf, size_t n, const char *format, ...) +ossl_bio__attr__((__format__(ossl_bio__printf__, 3, 4))); +int BIO_vsnprintf(char *buf, size_t n, const char *format, va_list args) +ossl_bio__attr__((__format__(ossl_bio__printf__, 3, 0))); +# undef ossl_bio__attr__ +# undef ossl_bio__printf__ + + +BIO_METHOD *BIO_meth_new(int type, const char *name); +void BIO_meth_free(BIO_METHOD *biom); +int BIO_meth_set_write(BIO_METHOD *biom, + int (*write) (BIO *, const char *, int)); +int BIO_meth_set_write_ex(BIO_METHOD *biom, + int (*bwrite) (BIO *, const char *, size_t, size_t *)); +int BIO_meth_set_sendmmsg(BIO_METHOD *biom, + int (*f) (BIO *, BIO_MSG *, size_t, size_t, + uint64_t, size_t *)); +int BIO_meth_set_read(BIO_METHOD *biom, + int (*read) (BIO *, char *, int)); +int BIO_meth_set_read_ex(BIO_METHOD *biom, + int (*bread) (BIO *, char *, size_t, size_t *)); +int BIO_meth_set_recvmmsg(BIO_METHOD *biom, + int (*f) (BIO *, BIO_MSG *, size_t, size_t, + uint64_t, size_t *)); +int BIO_meth_set_puts(BIO_METHOD *biom, + int (*puts) (BIO *, const char *)); +int BIO_meth_set_gets(BIO_METHOD *biom, + int (*ossl_gets) (BIO *, char *, int)); +int BIO_meth_set_ctrl(BIO_METHOD *biom, + long (*ctrl) (BIO *, int, long, void *)); +int BIO_meth_set_create(BIO_METHOD *biom, int (*create) (BIO *)); +int BIO_meth_set_destroy(BIO_METHOD *biom, int (*destroy) (BIO *)); +int BIO_meth_set_callback_ctrl(BIO_METHOD *biom, + long (*callback_ctrl) (BIO *, int, + BIO_info_cb *)); +# ifndef OPENSSL_NO_DEPRECATED_3_5 +OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_write(const BIO_METHOD *biom)) (BIO *, const char *, + int); +OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_write_ex(const BIO_METHOD *biom)) (BIO *, const char *, + size_t, size_t *); +OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_sendmmsg(const BIO_METHOD *biom))(BIO *, BIO_MSG *, + size_t, size_t, + uint64_t, size_t *); +OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_read(const BIO_METHOD *biom)) (BIO *, char *, int); +OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_read_ex(const BIO_METHOD *biom)) (BIO *, char *, + size_t, size_t *); +OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_recvmmsg(const BIO_METHOD *biom))(BIO *, BIO_MSG *, + size_t, size_t, + uint64_t, size_t *); +OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_puts(const BIO_METHOD *biom)) (BIO *, const char *); +OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_gets(const BIO_METHOD *biom)) (BIO *, char *, int); +OSSL_DEPRECATEDIN_3_5 long (*BIO_meth_get_ctrl(const BIO_METHOD *biom)) (BIO *, int, + long, void *); +OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_create(const BIO_METHOD *bion)) (BIO *); +OSSL_DEPRECATEDIN_3_5 int (*BIO_meth_get_destroy(const BIO_METHOD *biom)) (BIO *); +OSSL_DEPRECATEDIN_3_5 long (*BIO_meth_get_callback_ctrl(const BIO_METHOD *biom)) (BIO *, int, + BIO_info_cb *); +# endif +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/common/include/openssl/cmp.h b/contrib/openssl-cmake/common/include/openssl/cmp.h new file mode 100644 index 000000000000..fff7ea754c17 --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/cmp.h @@ -0,0 +1,729 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/cmp.h.in + * + * Copyright 2007-2024 The OpenSSL Project Authors. All Rights Reserved. + * Copyright Nokia 2007-2019 + * Copyright Siemens AG 2015-2019 + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_CMP_H +# define OPENSSL_CMP_H + +# include +# ifndef OPENSSL_NO_CMP + +# include +# include +# include +# include + +/* explicit #includes not strictly needed since implied by the above: */ +# include +# include +# include +# include + +# ifdef __cplusplus +extern "C" { +# endif + +# define OSSL_CMP_PVNO_2 2 +# define OSSL_CMP_PVNO_3 3 +# define OSSL_CMP_PVNO OSSL_CMP_PVNO_2 /* v2 is the default */ + +/*- + * PKIFailureInfo ::= BIT STRING { + * -- since we can fail in more than one way! + * -- More codes may be added in the future if/when required. + * badAlg (0), + * -- unrecognized or unsupported Algorithm Identifier + * badMessageCheck (1), + * -- integrity check failed (e.g., signature did not verify) + * badRequest (2), + * -- transaction not permitted or supported + * badTime (3), + * -- messageTime was not sufficiently close to the system time, + * -- as defined by local policy + * badCertId (4), + * -- no certificate could be found matching the provided criteria + * badDataFormat (5), + * -- the data submitted has the wrong format + * wrongAuthority (6), + * -- the authority indicated in the request is different from the + * -- one creating the response token + * incorrectData (7), + * -- the requester's data is incorrect (for notary services) + * missingTimeStamp (8), + * -- when the timestamp is missing but should be there + * -- (by policy) + * badPOP (9), + * -- the proof-of-possession failed + * certRevoked (10), + * -- the certificate has already been revoked + * certConfirmed (11), + * -- the certificate has already been confirmed + * wrongIntegrity (12), + * -- invalid integrity, password based instead of signature or + * -- vice versa + * badRecipientNonce (13), + * -- invalid recipient nonce, either missing or wrong value + * timeNotAvailable (14), + * -- the TSA's time source is not available + * unacceptedPolicy (15), + * -- the requested TSA policy is not supported by the TSA. + * unacceptedExtension (16), + * -- the requested extension is not supported by the TSA. + * addInfoNotAvailable (17), + * -- the additional information requested could not be + * -- understood or is not available + * badSenderNonce (18), + * -- invalid sender nonce, either missing or wrong size + * badCertTemplate (19), + * -- invalid cert. template or missing mandatory information + * signerNotTrusted (20), + * -- signer of the message unknown or not trusted + * transactionIdInUse (21), + * -- the transaction identifier is already in use + * unsupportedVersion (22), + * -- the version of the message is not supported + * notAuthorized (23), + * -- the sender was not authorized to make the preceding + * -- request or perform the preceding action + * systemUnavail (24), + * -- the request cannot be handled due to system unavailability + * systemFailure (25), + * -- the request cannot be handled due to system failure + * duplicateCertReq (26) + * -- certificate cannot be issued because a duplicate + * -- certificate already exists + * } + */ +# define OSSL_CMP_PKIFAILUREINFO_badAlg 0 +# define OSSL_CMP_PKIFAILUREINFO_badMessageCheck 1 +# define OSSL_CMP_PKIFAILUREINFO_badRequest 2 +# define OSSL_CMP_PKIFAILUREINFO_badTime 3 +# define OSSL_CMP_PKIFAILUREINFO_badCertId 4 +# define OSSL_CMP_PKIFAILUREINFO_badDataFormat 5 +# define OSSL_CMP_PKIFAILUREINFO_wrongAuthority 6 +# define OSSL_CMP_PKIFAILUREINFO_incorrectData 7 +# define OSSL_CMP_PKIFAILUREINFO_missingTimeStamp 8 +# define OSSL_CMP_PKIFAILUREINFO_badPOP 9 +# define OSSL_CMP_PKIFAILUREINFO_certRevoked 10 +# define OSSL_CMP_PKIFAILUREINFO_certConfirmed 11 +# define OSSL_CMP_PKIFAILUREINFO_wrongIntegrity 12 +# define OSSL_CMP_PKIFAILUREINFO_badRecipientNonce 13 +# define OSSL_CMP_PKIFAILUREINFO_timeNotAvailable 14 +# define OSSL_CMP_PKIFAILUREINFO_unacceptedPolicy 15 +# define OSSL_CMP_PKIFAILUREINFO_unacceptedExtension 16 +# define OSSL_CMP_PKIFAILUREINFO_addInfoNotAvailable 17 +# define OSSL_CMP_PKIFAILUREINFO_badSenderNonce 18 +# define OSSL_CMP_PKIFAILUREINFO_badCertTemplate 19 +# define OSSL_CMP_PKIFAILUREINFO_signerNotTrusted 20 +# define OSSL_CMP_PKIFAILUREINFO_transactionIdInUse 21 +# define OSSL_CMP_PKIFAILUREINFO_unsupportedVersion 22 +# define OSSL_CMP_PKIFAILUREINFO_notAuthorized 23 +# define OSSL_CMP_PKIFAILUREINFO_systemUnavail 24 +# define OSSL_CMP_PKIFAILUREINFO_systemFailure 25 +# define OSSL_CMP_PKIFAILUREINFO_duplicateCertReq 26 +# define OSSL_CMP_PKIFAILUREINFO_MAX 26 +# define OSSL_CMP_PKIFAILUREINFO_MAX_BIT_PATTERN \ + ((1 << (OSSL_CMP_PKIFAILUREINFO_MAX + 1)) - 1) +# if OSSL_CMP_PKIFAILUREINFO_MAX_BIT_PATTERN > INT_MAX +# error CMP_PKIFAILUREINFO_MAX bit pattern does not fit in type int +# endif +typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO; + +# define OSSL_CMP_CTX_FAILINFO_badAlg (1 << 0) +# define OSSL_CMP_CTX_FAILINFO_badMessageCheck (1 << 1) +# define OSSL_CMP_CTX_FAILINFO_badRequest (1 << 2) +# define OSSL_CMP_CTX_FAILINFO_badTime (1 << 3) +# define OSSL_CMP_CTX_FAILINFO_badCertId (1 << 4) +# define OSSL_CMP_CTX_FAILINFO_badDataFormat (1 << 5) +# define OSSL_CMP_CTX_FAILINFO_wrongAuthority (1 << 6) +# define OSSL_CMP_CTX_FAILINFO_incorrectData (1 << 7) +# define OSSL_CMP_CTX_FAILINFO_missingTimeStamp (1 << 8) +# define OSSL_CMP_CTX_FAILINFO_badPOP (1 << 9) +# define OSSL_CMP_CTX_FAILINFO_certRevoked (1 << 10) +# define OSSL_CMP_CTX_FAILINFO_certConfirmed (1 << 11) +# define OSSL_CMP_CTX_FAILINFO_wrongIntegrity (1 << 12) +# define OSSL_CMP_CTX_FAILINFO_badRecipientNonce (1 << 13) +# define OSSL_CMP_CTX_FAILINFO_timeNotAvailable (1 << 14) +# define OSSL_CMP_CTX_FAILINFO_unacceptedPolicy (1 << 15) +# define OSSL_CMP_CTX_FAILINFO_unacceptedExtension (1 << 16) +# define OSSL_CMP_CTX_FAILINFO_addInfoNotAvailable (1 << 17) +# define OSSL_CMP_CTX_FAILINFO_badSenderNonce (1 << 18) +# define OSSL_CMP_CTX_FAILINFO_badCertTemplate (1 << 19) +# define OSSL_CMP_CTX_FAILINFO_signerNotTrusted (1 << 20) +# define OSSL_CMP_CTX_FAILINFO_transactionIdInUse (1 << 21) +# define OSSL_CMP_CTX_FAILINFO_unsupportedVersion (1 << 22) +# define OSSL_CMP_CTX_FAILINFO_notAuthorized (1 << 23) +# define OSSL_CMP_CTX_FAILINFO_systemUnavail (1 << 24) +# define OSSL_CMP_CTX_FAILINFO_systemFailure (1 << 25) +# define OSSL_CMP_CTX_FAILINFO_duplicateCertReq (1 << 26) + +/*- + * PKIStatus ::= INTEGER { + * accepted (0), + * -- you got exactly what you asked for + * grantedWithMods (1), + * -- you got something like what you asked for; the + * -- requester is responsible for ascertaining the differences + * rejection (2), + * -- you don't get it, more information elsewhere in the message + * waiting (3), + * -- the request body part has not yet been processed; expect to + * -- hear more later (note: proper handling of this status + * -- response MAY use the polling req/rep PKIMessages specified + * -- in Section 5.3.22; alternatively, polling in the underlying + * -- transport layer MAY have some utility in this regard) + * revocationWarning (4), + * -- this message contains a warning that a revocation is + * -- imminent + * revocationNotification (5), + * -- notification that a revocation has occurred + * keyUpdateWarning (6) + * -- update already done for the oldCertId specified in + * -- CertReqMsg + * } + */ +# define OSSL_CMP_PKISTATUS_rejected_by_client -5 +# define OSSL_CMP_PKISTATUS_checking_response -4 +# define OSSL_CMP_PKISTATUS_request -3 +# define OSSL_CMP_PKISTATUS_trans -2 +# define OSSL_CMP_PKISTATUS_unspecified -1 +# define OSSL_CMP_PKISTATUS_accepted 0 +# define OSSL_CMP_PKISTATUS_grantedWithMods 1 +# define OSSL_CMP_PKISTATUS_rejection 2 +# define OSSL_CMP_PKISTATUS_waiting 3 +# define OSSL_CMP_PKISTATUS_revocationWarning 4 +# define OSSL_CMP_PKISTATUS_revocationNotification 5 +# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 +typedef ASN1_INTEGER OSSL_CMP_PKISTATUS; + +DECLARE_ASN1_ITEM(OSSL_CMP_PKISTATUS) + +# define OSSL_CMP_CERTORENCCERT_CERTIFICATE 0 +# define OSSL_CMP_CERTORENCCERT_ENCRYPTEDCERT 1 + +/* data type declarations */ +typedef struct ossl_cmp_ctx_st OSSL_CMP_CTX; +typedef struct ossl_cmp_pkiheader_st OSSL_CMP_PKIHEADER; +DECLARE_ASN1_FUNCTIONS(OSSL_CMP_PKIHEADER) +typedef struct ossl_cmp_msg_st OSSL_CMP_MSG; +DECLARE_ASN1_DUP_FUNCTION(OSSL_CMP_MSG) +DECLARE_ASN1_ENCODE_FUNCTIONS(OSSL_CMP_MSG, OSSL_CMP_MSG, OSSL_CMP_MSG) +typedef struct ossl_cmp_certstatus_st OSSL_CMP_CERTSTATUS; +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_CMP_CERTSTATUS, OSSL_CMP_CERTSTATUS, OSSL_CMP_CERTSTATUS) +#define sk_OSSL_CMP_CERTSTATUS_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_CMP_CERTSTATUS_sk_type(sk)) +#define sk_OSSL_CMP_CERTSTATUS_value(sk, idx) ((OSSL_CMP_CERTSTATUS *)OPENSSL_sk_value(ossl_check_const_OSSL_CMP_CERTSTATUS_sk_type(sk), (idx))) +#define sk_OSSL_CMP_CERTSTATUS_new(cmp) ((STACK_OF(OSSL_CMP_CERTSTATUS) *)OPENSSL_sk_new(ossl_check_OSSL_CMP_CERTSTATUS_compfunc_type(cmp))) +#define sk_OSSL_CMP_CERTSTATUS_new_null() ((STACK_OF(OSSL_CMP_CERTSTATUS) *)OPENSSL_sk_new_null()) +#define sk_OSSL_CMP_CERTSTATUS_new_reserve(cmp, n) ((STACK_OF(OSSL_CMP_CERTSTATUS) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_CMP_CERTSTATUS_compfunc_type(cmp), (n))) +#define sk_OSSL_CMP_CERTSTATUS_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), (n)) +#define sk_OSSL_CMP_CERTSTATUS_free(sk) OPENSSL_sk_free(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk)) +#define sk_OSSL_CMP_CERTSTATUS_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk)) +#define sk_OSSL_CMP_CERTSTATUS_delete(sk, i) ((OSSL_CMP_CERTSTATUS *)OPENSSL_sk_delete(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), (i))) +#define sk_OSSL_CMP_CERTSTATUS_delete_ptr(sk, ptr) ((OSSL_CMP_CERTSTATUS *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_type(ptr))) +#define sk_OSSL_CMP_CERTSTATUS_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_type(ptr)) +#define sk_OSSL_CMP_CERTSTATUS_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_type(ptr)) +#define sk_OSSL_CMP_CERTSTATUS_pop(sk) ((OSSL_CMP_CERTSTATUS *)OPENSSL_sk_pop(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk))) +#define sk_OSSL_CMP_CERTSTATUS_shift(sk) ((OSSL_CMP_CERTSTATUS *)OPENSSL_sk_shift(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk))) +#define sk_OSSL_CMP_CERTSTATUS_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk),ossl_check_OSSL_CMP_CERTSTATUS_freefunc_type(freefunc)) +#define sk_OSSL_CMP_CERTSTATUS_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_type(ptr), (idx)) +#define sk_OSSL_CMP_CERTSTATUS_set(sk, idx, ptr) ((OSSL_CMP_CERTSTATUS *)OPENSSL_sk_set(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), (idx), ossl_check_OSSL_CMP_CERTSTATUS_type(ptr))) +#define sk_OSSL_CMP_CERTSTATUS_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_type(ptr)) +#define sk_OSSL_CMP_CERTSTATUS_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_type(ptr)) +#define sk_OSSL_CMP_CERTSTATUS_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_type(ptr), pnum) +#define sk_OSSL_CMP_CERTSTATUS_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk)) +#define sk_OSSL_CMP_CERTSTATUS_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_CMP_CERTSTATUS_sk_type(sk)) +#define sk_OSSL_CMP_CERTSTATUS_dup(sk) ((STACK_OF(OSSL_CMP_CERTSTATUS) *)OPENSSL_sk_dup(ossl_check_const_OSSL_CMP_CERTSTATUS_sk_type(sk))) +#define sk_OSSL_CMP_CERTSTATUS_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_CMP_CERTSTATUS) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_copyfunc_type(copyfunc), ossl_check_OSSL_CMP_CERTSTATUS_freefunc_type(freefunc))) +#define sk_OSSL_CMP_CERTSTATUS_set_cmp_func(sk, cmp) ((sk_OSSL_CMP_CERTSTATUS_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_CMP_CERTSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CERTSTATUS_compfunc_type(cmp))) + +typedef struct ossl_cmp_itav_st OSSL_CMP_ITAV; +DECLARE_ASN1_DUP_FUNCTION(OSSL_CMP_ITAV) +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_CMP_ITAV, OSSL_CMP_ITAV, OSSL_CMP_ITAV) +#define sk_OSSL_CMP_ITAV_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_CMP_ITAV_sk_type(sk)) +#define sk_OSSL_CMP_ITAV_value(sk, idx) ((OSSL_CMP_ITAV *)OPENSSL_sk_value(ossl_check_const_OSSL_CMP_ITAV_sk_type(sk), (idx))) +#define sk_OSSL_CMP_ITAV_new(cmp) ((STACK_OF(OSSL_CMP_ITAV) *)OPENSSL_sk_new(ossl_check_OSSL_CMP_ITAV_compfunc_type(cmp))) +#define sk_OSSL_CMP_ITAV_new_null() ((STACK_OF(OSSL_CMP_ITAV) *)OPENSSL_sk_new_null()) +#define sk_OSSL_CMP_ITAV_new_reserve(cmp, n) ((STACK_OF(OSSL_CMP_ITAV) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_CMP_ITAV_compfunc_type(cmp), (n))) +#define sk_OSSL_CMP_ITAV_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_CMP_ITAV_sk_type(sk), (n)) +#define sk_OSSL_CMP_ITAV_free(sk) OPENSSL_sk_free(ossl_check_OSSL_CMP_ITAV_sk_type(sk)) +#define sk_OSSL_CMP_ITAV_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_CMP_ITAV_sk_type(sk)) +#define sk_OSSL_CMP_ITAV_delete(sk, i) ((OSSL_CMP_ITAV *)OPENSSL_sk_delete(ossl_check_OSSL_CMP_ITAV_sk_type(sk), (i))) +#define sk_OSSL_CMP_ITAV_delete_ptr(sk, ptr) ((OSSL_CMP_ITAV *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_type(ptr))) +#define sk_OSSL_CMP_ITAV_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_type(ptr)) +#define sk_OSSL_CMP_ITAV_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_type(ptr)) +#define sk_OSSL_CMP_ITAV_pop(sk) ((OSSL_CMP_ITAV *)OPENSSL_sk_pop(ossl_check_OSSL_CMP_ITAV_sk_type(sk))) +#define sk_OSSL_CMP_ITAV_shift(sk) ((OSSL_CMP_ITAV *)OPENSSL_sk_shift(ossl_check_OSSL_CMP_ITAV_sk_type(sk))) +#define sk_OSSL_CMP_ITAV_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_CMP_ITAV_sk_type(sk),ossl_check_OSSL_CMP_ITAV_freefunc_type(freefunc)) +#define sk_OSSL_CMP_ITAV_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_type(ptr), (idx)) +#define sk_OSSL_CMP_ITAV_set(sk, idx, ptr) ((OSSL_CMP_ITAV *)OPENSSL_sk_set(ossl_check_OSSL_CMP_ITAV_sk_type(sk), (idx), ossl_check_OSSL_CMP_ITAV_type(ptr))) +#define sk_OSSL_CMP_ITAV_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_type(ptr)) +#define sk_OSSL_CMP_ITAV_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_type(ptr)) +#define sk_OSSL_CMP_ITAV_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_type(ptr), pnum) +#define sk_OSSL_CMP_ITAV_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_CMP_ITAV_sk_type(sk)) +#define sk_OSSL_CMP_ITAV_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_CMP_ITAV_sk_type(sk)) +#define sk_OSSL_CMP_ITAV_dup(sk) ((STACK_OF(OSSL_CMP_ITAV) *)OPENSSL_sk_dup(ossl_check_const_OSSL_CMP_ITAV_sk_type(sk))) +#define sk_OSSL_CMP_ITAV_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_CMP_ITAV) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_copyfunc_type(copyfunc), ossl_check_OSSL_CMP_ITAV_freefunc_type(freefunc))) +#define sk_OSSL_CMP_ITAV_set_cmp_func(sk, cmp) ((sk_OSSL_CMP_ITAV_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_CMP_ITAV_sk_type(sk), ossl_check_OSSL_CMP_ITAV_compfunc_type(cmp))) + + +typedef struct ossl_cmp_crlstatus_st OSSL_CMP_CRLSTATUS; +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_CMP_CRLSTATUS, OSSL_CMP_CRLSTATUS, OSSL_CMP_CRLSTATUS) +#define sk_OSSL_CMP_CRLSTATUS_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_CMP_CRLSTATUS_sk_type(sk)) +#define sk_OSSL_CMP_CRLSTATUS_value(sk, idx) ((OSSL_CMP_CRLSTATUS *)OPENSSL_sk_value(ossl_check_const_OSSL_CMP_CRLSTATUS_sk_type(sk), (idx))) +#define sk_OSSL_CMP_CRLSTATUS_new(cmp) ((STACK_OF(OSSL_CMP_CRLSTATUS) *)OPENSSL_sk_new(ossl_check_OSSL_CMP_CRLSTATUS_compfunc_type(cmp))) +#define sk_OSSL_CMP_CRLSTATUS_new_null() ((STACK_OF(OSSL_CMP_CRLSTATUS) *)OPENSSL_sk_new_null()) +#define sk_OSSL_CMP_CRLSTATUS_new_reserve(cmp, n) ((STACK_OF(OSSL_CMP_CRLSTATUS) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_CMP_CRLSTATUS_compfunc_type(cmp), (n))) +#define sk_OSSL_CMP_CRLSTATUS_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_CMP_CRLSTATUS_sk_type(sk), (n)) +#define sk_OSSL_CMP_CRLSTATUS_free(sk) OPENSSL_sk_free(ossl_check_OSSL_CMP_CRLSTATUS_sk_type(sk)) +#define sk_OSSL_CMP_CRLSTATUS_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_CMP_CRLSTATUS_sk_type(sk)) +#define sk_OSSL_CMP_CRLSTATUS_delete(sk, i) ((OSSL_CMP_CRLSTATUS *)OPENSSL_sk_delete(ossl_check_OSSL_CMP_CRLSTATUS_sk_type(sk), (i))) +#define sk_OSSL_CMP_CRLSTATUS_delete_ptr(sk, ptr) ((OSSL_CMP_CRLSTATUS *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_CMP_CRLSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CRLSTATUS_type(ptr))) +#define sk_OSSL_CMP_CRLSTATUS_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_CMP_CRLSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CRLSTATUS_type(ptr)) +#define sk_OSSL_CMP_CRLSTATUS_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_CMP_CRLSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CRLSTATUS_type(ptr)) +#define sk_OSSL_CMP_CRLSTATUS_pop(sk) ((OSSL_CMP_CRLSTATUS *)OPENSSL_sk_pop(ossl_check_OSSL_CMP_CRLSTATUS_sk_type(sk))) +#define sk_OSSL_CMP_CRLSTATUS_shift(sk) ((OSSL_CMP_CRLSTATUS *)OPENSSL_sk_shift(ossl_check_OSSL_CMP_CRLSTATUS_sk_type(sk))) +#define sk_OSSL_CMP_CRLSTATUS_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_CMP_CRLSTATUS_sk_type(sk),ossl_check_OSSL_CMP_CRLSTATUS_freefunc_type(freefunc)) +#define sk_OSSL_CMP_CRLSTATUS_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_CMP_CRLSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CRLSTATUS_type(ptr), (idx)) +#define sk_OSSL_CMP_CRLSTATUS_set(sk, idx, ptr) ((OSSL_CMP_CRLSTATUS *)OPENSSL_sk_set(ossl_check_OSSL_CMP_CRLSTATUS_sk_type(sk), (idx), ossl_check_OSSL_CMP_CRLSTATUS_type(ptr))) +#define sk_OSSL_CMP_CRLSTATUS_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_CMP_CRLSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CRLSTATUS_type(ptr)) +#define sk_OSSL_CMP_CRLSTATUS_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_CMP_CRLSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CRLSTATUS_type(ptr)) +#define sk_OSSL_CMP_CRLSTATUS_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_CMP_CRLSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CRLSTATUS_type(ptr), pnum) +#define sk_OSSL_CMP_CRLSTATUS_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_CMP_CRLSTATUS_sk_type(sk)) +#define sk_OSSL_CMP_CRLSTATUS_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_CMP_CRLSTATUS_sk_type(sk)) +#define sk_OSSL_CMP_CRLSTATUS_dup(sk) ((STACK_OF(OSSL_CMP_CRLSTATUS) *)OPENSSL_sk_dup(ossl_check_const_OSSL_CMP_CRLSTATUS_sk_type(sk))) +#define sk_OSSL_CMP_CRLSTATUS_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_CMP_CRLSTATUS) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_CMP_CRLSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CRLSTATUS_copyfunc_type(copyfunc), ossl_check_OSSL_CMP_CRLSTATUS_freefunc_type(freefunc))) +#define sk_OSSL_CMP_CRLSTATUS_set_cmp_func(sk, cmp) ((sk_OSSL_CMP_CRLSTATUS_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_CMP_CRLSTATUS_sk_type(sk), ossl_check_OSSL_CMP_CRLSTATUS_compfunc_type(cmp))) + + +typedef OSSL_CRMF_ATTRIBUTETYPEANDVALUE OSSL_CMP_ATAV; +# define OSSL_CMP_ATAV_free OSSL_CRMF_ATTRIBUTETYPEANDVALUE_free +typedef STACK_OF(OSSL_CRMF_ATTRIBUTETYPEANDVALUE) OSSL_CMP_ATAVS; +DECLARE_ASN1_FUNCTIONS(OSSL_CMP_ATAVS) +# define stack_st_OSSL_CMP_ATAV stack_st_OSSL_CRMF_ATTRIBUTETYPEANDVALUE +# define sk_OSSL_CMP_ATAV_num sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_num +# define sk_OSSL_CMP_ATAV_value sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_value +# define sk_OSSL_CMP_ATAV_push sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_push +# define sk_OSSL_CMP_ATAV_pop_free sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_pop_free + +typedef struct ossl_cmp_revrepcontent_st OSSL_CMP_REVREPCONTENT; +typedef struct ossl_cmp_pkisi_st OSSL_CMP_PKISI; +DECLARE_ASN1_FUNCTIONS(OSSL_CMP_PKISI) +DECLARE_ASN1_DUP_FUNCTION(OSSL_CMP_PKISI) +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_CMP_PKISI, OSSL_CMP_PKISI, OSSL_CMP_PKISI) +#define sk_OSSL_CMP_PKISI_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_CMP_PKISI_sk_type(sk)) +#define sk_OSSL_CMP_PKISI_value(sk, idx) ((OSSL_CMP_PKISI *)OPENSSL_sk_value(ossl_check_const_OSSL_CMP_PKISI_sk_type(sk), (idx))) +#define sk_OSSL_CMP_PKISI_new(cmp) ((STACK_OF(OSSL_CMP_PKISI) *)OPENSSL_sk_new(ossl_check_OSSL_CMP_PKISI_compfunc_type(cmp))) +#define sk_OSSL_CMP_PKISI_new_null() ((STACK_OF(OSSL_CMP_PKISI) *)OPENSSL_sk_new_null()) +#define sk_OSSL_CMP_PKISI_new_reserve(cmp, n) ((STACK_OF(OSSL_CMP_PKISI) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_CMP_PKISI_compfunc_type(cmp), (n))) +#define sk_OSSL_CMP_PKISI_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_CMP_PKISI_sk_type(sk), (n)) +#define sk_OSSL_CMP_PKISI_free(sk) OPENSSL_sk_free(ossl_check_OSSL_CMP_PKISI_sk_type(sk)) +#define sk_OSSL_CMP_PKISI_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_CMP_PKISI_sk_type(sk)) +#define sk_OSSL_CMP_PKISI_delete(sk, i) ((OSSL_CMP_PKISI *)OPENSSL_sk_delete(ossl_check_OSSL_CMP_PKISI_sk_type(sk), (i))) +#define sk_OSSL_CMP_PKISI_delete_ptr(sk, ptr) ((OSSL_CMP_PKISI *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_type(ptr))) +#define sk_OSSL_CMP_PKISI_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_type(ptr)) +#define sk_OSSL_CMP_PKISI_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_type(ptr)) +#define sk_OSSL_CMP_PKISI_pop(sk) ((OSSL_CMP_PKISI *)OPENSSL_sk_pop(ossl_check_OSSL_CMP_PKISI_sk_type(sk))) +#define sk_OSSL_CMP_PKISI_shift(sk) ((OSSL_CMP_PKISI *)OPENSSL_sk_shift(ossl_check_OSSL_CMP_PKISI_sk_type(sk))) +#define sk_OSSL_CMP_PKISI_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_CMP_PKISI_sk_type(sk),ossl_check_OSSL_CMP_PKISI_freefunc_type(freefunc)) +#define sk_OSSL_CMP_PKISI_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_type(ptr), (idx)) +#define sk_OSSL_CMP_PKISI_set(sk, idx, ptr) ((OSSL_CMP_PKISI *)OPENSSL_sk_set(ossl_check_OSSL_CMP_PKISI_sk_type(sk), (idx), ossl_check_OSSL_CMP_PKISI_type(ptr))) +#define sk_OSSL_CMP_PKISI_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_type(ptr)) +#define sk_OSSL_CMP_PKISI_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_type(ptr)) +#define sk_OSSL_CMP_PKISI_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_type(ptr), pnum) +#define sk_OSSL_CMP_PKISI_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_CMP_PKISI_sk_type(sk)) +#define sk_OSSL_CMP_PKISI_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_CMP_PKISI_sk_type(sk)) +#define sk_OSSL_CMP_PKISI_dup(sk) ((STACK_OF(OSSL_CMP_PKISI) *)OPENSSL_sk_dup(ossl_check_const_OSSL_CMP_PKISI_sk_type(sk))) +#define sk_OSSL_CMP_PKISI_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_CMP_PKISI) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_copyfunc_type(copyfunc), ossl_check_OSSL_CMP_PKISI_freefunc_type(freefunc))) +#define sk_OSSL_CMP_PKISI_set_cmp_func(sk, cmp) ((sk_OSSL_CMP_PKISI_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_CMP_PKISI_sk_type(sk), ossl_check_OSSL_CMP_PKISI_compfunc_type(cmp))) + +typedef struct ossl_cmp_certrepmessage_st OSSL_CMP_CERTREPMESSAGE; +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_CMP_CERTREPMESSAGE, OSSL_CMP_CERTREPMESSAGE, OSSL_CMP_CERTREPMESSAGE) +#define sk_OSSL_CMP_CERTREPMESSAGE_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_CMP_CERTREPMESSAGE_sk_type(sk)) +#define sk_OSSL_CMP_CERTREPMESSAGE_value(sk, idx) ((OSSL_CMP_CERTREPMESSAGE *)OPENSSL_sk_value(ossl_check_const_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), (idx))) +#define sk_OSSL_CMP_CERTREPMESSAGE_new(cmp) ((STACK_OF(OSSL_CMP_CERTREPMESSAGE) *)OPENSSL_sk_new(ossl_check_OSSL_CMP_CERTREPMESSAGE_compfunc_type(cmp))) +#define sk_OSSL_CMP_CERTREPMESSAGE_new_null() ((STACK_OF(OSSL_CMP_CERTREPMESSAGE) *)OPENSSL_sk_new_null()) +#define sk_OSSL_CMP_CERTREPMESSAGE_new_reserve(cmp, n) ((STACK_OF(OSSL_CMP_CERTREPMESSAGE) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_CMP_CERTREPMESSAGE_compfunc_type(cmp), (n))) +#define sk_OSSL_CMP_CERTREPMESSAGE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), (n)) +#define sk_OSSL_CMP_CERTREPMESSAGE_free(sk) OPENSSL_sk_free(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk)) +#define sk_OSSL_CMP_CERTREPMESSAGE_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk)) +#define sk_OSSL_CMP_CERTREPMESSAGE_delete(sk, i) ((OSSL_CMP_CERTREPMESSAGE *)OPENSSL_sk_delete(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), (i))) +#define sk_OSSL_CMP_CERTREPMESSAGE_delete_ptr(sk, ptr) ((OSSL_CMP_CERTREPMESSAGE *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_type(ptr))) +#define sk_OSSL_CMP_CERTREPMESSAGE_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_type(ptr)) +#define sk_OSSL_CMP_CERTREPMESSAGE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_type(ptr)) +#define sk_OSSL_CMP_CERTREPMESSAGE_pop(sk) ((OSSL_CMP_CERTREPMESSAGE *)OPENSSL_sk_pop(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk))) +#define sk_OSSL_CMP_CERTREPMESSAGE_shift(sk) ((OSSL_CMP_CERTREPMESSAGE *)OPENSSL_sk_shift(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk))) +#define sk_OSSL_CMP_CERTREPMESSAGE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk),ossl_check_OSSL_CMP_CERTREPMESSAGE_freefunc_type(freefunc)) +#define sk_OSSL_CMP_CERTREPMESSAGE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_type(ptr), (idx)) +#define sk_OSSL_CMP_CERTREPMESSAGE_set(sk, idx, ptr) ((OSSL_CMP_CERTREPMESSAGE *)OPENSSL_sk_set(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), (idx), ossl_check_OSSL_CMP_CERTREPMESSAGE_type(ptr))) +#define sk_OSSL_CMP_CERTREPMESSAGE_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_type(ptr)) +#define sk_OSSL_CMP_CERTREPMESSAGE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_type(ptr)) +#define sk_OSSL_CMP_CERTREPMESSAGE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_type(ptr), pnum) +#define sk_OSSL_CMP_CERTREPMESSAGE_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk)) +#define sk_OSSL_CMP_CERTREPMESSAGE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_CMP_CERTREPMESSAGE_sk_type(sk)) +#define sk_OSSL_CMP_CERTREPMESSAGE_dup(sk) ((STACK_OF(OSSL_CMP_CERTREPMESSAGE) *)OPENSSL_sk_dup(ossl_check_const_OSSL_CMP_CERTREPMESSAGE_sk_type(sk))) +#define sk_OSSL_CMP_CERTREPMESSAGE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_CMP_CERTREPMESSAGE) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_copyfunc_type(copyfunc), ossl_check_OSSL_CMP_CERTREPMESSAGE_freefunc_type(freefunc))) +#define sk_OSSL_CMP_CERTREPMESSAGE_set_cmp_func(sk, cmp) ((sk_OSSL_CMP_CERTREPMESSAGE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_CMP_CERTREPMESSAGE_sk_type(sk), ossl_check_OSSL_CMP_CERTREPMESSAGE_compfunc_type(cmp))) + +typedef struct ossl_cmp_pollrep_st OSSL_CMP_POLLREP; +typedef STACK_OF(OSSL_CMP_POLLREP) OSSL_CMP_POLLREPCONTENT; +typedef struct ossl_cmp_certresponse_st OSSL_CMP_CERTRESPONSE; +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_CMP_CERTRESPONSE, OSSL_CMP_CERTRESPONSE, OSSL_CMP_CERTRESPONSE) +#define sk_OSSL_CMP_CERTRESPONSE_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_CMP_CERTRESPONSE_sk_type(sk)) +#define sk_OSSL_CMP_CERTRESPONSE_value(sk, idx) ((OSSL_CMP_CERTRESPONSE *)OPENSSL_sk_value(ossl_check_const_OSSL_CMP_CERTRESPONSE_sk_type(sk), (idx))) +#define sk_OSSL_CMP_CERTRESPONSE_new(cmp) ((STACK_OF(OSSL_CMP_CERTRESPONSE) *)OPENSSL_sk_new(ossl_check_OSSL_CMP_CERTRESPONSE_compfunc_type(cmp))) +#define sk_OSSL_CMP_CERTRESPONSE_new_null() ((STACK_OF(OSSL_CMP_CERTRESPONSE) *)OPENSSL_sk_new_null()) +#define sk_OSSL_CMP_CERTRESPONSE_new_reserve(cmp, n) ((STACK_OF(OSSL_CMP_CERTRESPONSE) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_CMP_CERTRESPONSE_compfunc_type(cmp), (n))) +#define sk_OSSL_CMP_CERTRESPONSE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), (n)) +#define sk_OSSL_CMP_CERTRESPONSE_free(sk) OPENSSL_sk_free(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk)) +#define sk_OSSL_CMP_CERTRESPONSE_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk)) +#define sk_OSSL_CMP_CERTRESPONSE_delete(sk, i) ((OSSL_CMP_CERTRESPONSE *)OPENSSL_sk_delete(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), (i))) +#define sk_OSSL_CMP_CERTRESPONSE_delete_ptr(sk, ptr) ((OSSL_CMP_CERTRESPONSE *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_type(ptr))) +#define sk_OSSL_CMP_CERTRESPONSE_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_type(ptr)) +#define sk_OSSL_CMP_CERTRESPONSE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_type(ptr)) +#define sk_OSSL_CMP_CERTRESPONSE_pop(sk) ((OSSL_CMP_CERTRESPONSE *)OPENSSL_sk_pop(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk))) +#define sk_OSSL_CMP_CERTRESPONSE_shift(sk) ((OSSL_CMP_CERTRESPONSE *)OPENSSL_sk_shift(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk))) +#define sk_OSSL_CMP_CERTRESPONSE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk),ossl_check_OSSL_CMP_CERTRESPONSE_freefunc_type(freefunc)) +#define sk_OSSL_CMP_CERTRESPONSE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_type(ptr), (idx)) +#define sk_OSSL_CMP_CERTRESPONSE_set(sk, idx, ptr) ((OSSL_CMP_CERTRESPONSE *)OPENSSL_sk_set(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), (idx), ossl_check_OSSL_CMP_CERTRESPONSE_type(ptr))) +#define sk_OSSL_CMP_CERTRESPONSE_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_type(ptr)) +#define sk_OSSL_CMP_CERTRESPONSE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_type(ptr)) +#define sk_OSSL_CMP_CERTRESPONSE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_type(ptr), pnum) +#define sk_OSSL_CMP_CERTRESPONSE_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk)) +#define sk_OSSL_CMP_CERTRESPONSE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_CMP_CERTRESPONSE_sk_type(sk)) +#define sk_OSSL_CMP_CERTRESPONSE_dup(sk) ((STACK_OF(OSSL_CMP_CERTRESPONSE) *)OPENSSL_sk_dup(ossl_check_const_OSSL_CMP_CERTRESPONSE_sk_type(sk))) +#define sk_OSSL_CMP_CERTRESPONSE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_CMP_CERTRESPONSE) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_copyfunc_type(copyfunc), ossl_check_OSSL_CMP_CERTRESPONSE_freefunc_type(freefunc))) +#define sk_OSSL_CMP_CERTRESPONSE_set_cmp_func(sk, cmp) ((sk_OSSL_CMP_CERTRESPONSE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_CMP_CERTRESPONSE_sk_type(sk), ossl_check_OSSL_CMP_CERTRESPONSE_compfunc_type(cmp))) + +typedef STACK_OF(ASN1_UTF8STRING) OSSL_CMP_PKIFREETEXT; + +/* + * function DECLARATIONS + */ + +/* from cmp_asn.c */ +OSSL_CMP_ITAV *OSSL_CMP_ITAV_create(ASN1_OBJECT *type, ASN1_TYPE *value); +void OSSL_CMP_ITAV_set0(OSSL_CMP_ITAV *itav, ASN1_OBJECT *type, + ASN1_TYPE *value); +ASN1_OBJECT *OSSL_CMP_ITAV_get0_type(const OSSL_CMP_ITAV *itav); +ASN1_TYPE *OSSL_CMP_ITAV_get0_value(const OSSL_CMP_ITAV *itav); +int OSSL_CMP_ITAV_push0_stack_item(STACK_OF(OSSL_CMP_ITAV) **sk_p, + OSSL_CMP_ITAV *itav); +void OSSL_CMP_ITAV_free(OSSL_CMP_ITAV *itav); + +OSSL_CMP_ITAV *OSSL_CMP_ITAV_new0_certProfile(STACK_OF(ASN1_UTF8STRING) + *certProfile); +int OSSL_CMP_ITAV_get0_certProfile(const OSSL_CMP_ITAV *itav, + STACK_OF(ASN1_UTF8STRING) **out); +OSSL_CMP_ITAV *OSSL_CMP_ITAV_new_caCerts(const STACK_OF(X509) *caCerts); +int OSSL_CMP_ITAV_get0_caCerts(const OSSL_CMP_ITAV *itav, STACK_OF(X509) **out); + +OSSL_CMP_ITAV *OSSL_CMP_ITAV_new_rootCaCert(const X509 *rootCaCert); +int OSSL_CMP_ITAV_get0_rootCaCert(const OSSL_CMP_ITAV *itav, X509 **out); +OSSL_CMP_ITAV *OSSL_CMP_ITAV_new_rootCaKeyUpdate(const X509 *newWithNew, + const X509 *newWithOld, + const X509 *oldWithNew); +int OSSL_CMP_ITAV_get0_rootCaKeyUpdate(const OSSL_CMP_ITAV *itav, + X509 **newWithNew, + X509 **newWithOld, + X509 **oldWithNew); + +OSSL_CMP_CRLSTATUS *OSSL_CMP_CRLSTATUS_create(const X509_CRL *crl, + const X509 *cert, int only_DN); +OSSL_CMP_CRLSTATUS *OSSL_CMP_CRLSTATUS_new1(const DIST_POINT_NAME *dpn, + const GENERAL_NAMES *issuer, + const ASN1_TIME *thisUpdate); +int OSSL_CMP_CRLSTATUS_get0(const OSSL_CMP_CRLSTATUS *crlstatus, + DIST_POINT_NAME **dpn, GENERAL_NAMES **issuer, + ASN1_TIME **thisUpdate); +void OSSL_CMP_CRLSTATUS_free(OSSL_CMP_CRLSTATUS *crlstatus); +OSSL_CMP_ITAV +*OSSL_CMP_ITAV_new0_crlStatusList(STACK_OF(OSSL_CMP_CRLSTATUS) *crlStatusList); +int OSSL_CMP_ITAV_get0_crlStatusList(const OSSL_CMP_ITAV *itav, + STACK_OF(OSSL_CMP_CRLSTATUS) **out); +OSSL_CMP_ITAV *OSSL_CMP_ITAV_new_crls(const X509_CRL *crls); +int OSSL_CMP_ITAV_get0_crls(const OSSL_CMP_ITAV *it, STACK_OF(X509_CRL) **out); +OSSL_CMP_ITAV +*OSSL_CMP_ITAV_new0_certReqTemplate(OSSL_CRMF_CERTTEMPLATE *certTemplate, + OSSL_CMP_ATAVS *keySpec); +int OSSL_CMP_ITAV_get1_certReqTemplate(const OSSL_CMP_ITAV *itav, + OSSL_CRMF_CERTTEMPLATE **certTemplate, + OSSL_CMP_ATAVS **keySpec); + +OSSL_CMP_ATAV *OSSL_CMP_ATAV_create(ASN1_OBJECT *type, ASN1_TYPE *value); +void OSSL_CMP_ATAV_set0(OSSL_CMP_ATAV *itav, ASN1_OBJECT *type, + ASN1_TYPE *value); +ASN1_OBJECT *OSSL_CMP_ATAV_get0_type(const OSSL_CMP_ATAV *itav); +ASN1_TYPE *OSSL_CMP_ATAV_get0_value(const OSSL_CMP_ATAV *itav); +OSSL_CMP_ATAV *OSSL_CMP_ATAV_new_algId(const X509_ALGOR *alg); +X509_ALGOR *OSSL_CMP_ATAV_get0_algId(const OSSL_CMP_ATAV *atav); +OSSL_CMP_ATAV *OSSL_CMP_ATAV_new_rsaKeyLen(int len); +int OSSL_CMP_ATAV_get_rsaKeyLen(const OSSL_CMP_ATAV *atav); +int OSSL_CMP_ATAV_push1(OSSL_CMP_ATAVS **sk_p, const OSSL_CMP_ATAV *atav); + +void OSSL_CMP_MSG_free(OSSL_CMP_MSG *msg); + +/* from cmp_ctx.c */ +OSSL_CMP_CTX *OSSL_CMP_CTX_new(OSSL_LIB_CTX *libctx, const char *propq); +void OSSL_CMP_CTX_free(OSSL_CMP_CTX *ctx); +int OSSL_CMP_CTX_reinit(OSSL_CMP_CTX *ctx); +OSSL_LIB_CTX *OSSL_CMP_CTX_get0_libctx(const OSSL_CMP_CTX *ctx); +const char *OSSL_CMP_CTX_get0_propq(const OSSL_CMP_CTX *ctx); +/* CMP general options: */ +# define OSSL_CMP_OPT_LOG_VERBOSITY 0 +/* CMP transfer options: */ +# define OSSL_CMP_OPT_KEEP_ALIVE 10 +# define OSSL_CMP_OPT_MSG_TIMEOUT 11 +# define OSSL_CMP_OPT_TOTAL_TIMEOUT 12 +# define OSSL_CMP_OPT_USE_TLS 13 +/* CMP request options: */ +# define OSSL_CMP_OPT_VALIDITY_DAYS 20 +# define OSSL_CMP_OPT_SUBJECTALTNAME_NODEFAULT 21 +# define OSSL_CMP_OPT_SUBJECTALTNAME_CRITICAL 22 +# define OSSL_CMP_OPT_POLICIES_CRITICAL 23 +# define OSSL_CMP_OPT_POPO_METHOD 24 +# define OSSL_CMP_OPT_IMPLICIT_CONFIRM 25 +# define OSSL_CMP_OPT_DISABLE_CONFIRM 26 +# define OSSL_CMP_OPT_REVOCATION_REASON 27 +/* CMP protection options: */ +# define OSSL_CMP_OPT_UNPROTECTED_SEND 30 +# define OSSL_CMP_OPT_UNPROTECTED_ERRORS 31 +# define OSSL_CMP_OPT_OWF_ALGNID 32 +# define OSSL_CMP_OPT_MAC_ALGNID 33 +# define OSSL_CMP_OPT_DIGEST_ALGNID 34 +# define OSSL_CMP_OPT_IGNORE_KEYUSAGE 35 +# define OSSL_CMP_OPT_PERMIT_TA_IN_EXTRACERTS_FOR_IR 36 +# define OSSL_CMP_OPT_NO_CACHE_EXTRACERTS 37 +int OSSL_CMP_CTX_set_option(OSSL_CMP_CTX *ctx, int opt, int val); +int OSSL_CMP_CTX_get_option(const OSSL_CMP_CTX *ctx, int opt); +/* CMP-specific callback for logging and outputting the error queue: */ +int OSSL_CMP_CTX_set_log_cb(OSSL_CMP_CTX *ctx, OSSL_CMP_log_cb_t cb); +# define OSSL_CMP_CTX_set_log_verbosity(ctx, level) \ + OSSL_CMP_CTX_set_option(ctx, OSSL_CMP_OPT_LOG_VERBOSITY, level) +void OSSL_CMP_CTX_print_errors(const OSSL_CMP_CTX *ctx); +/* message transfer: */ +int OSSL_CMP_CTX_set1_serverPath(OSSL_CMP_CTX *ctx, const char *path); +int OSSL_CMP_CTX_set1_server(OSSL_CMP_CTX *ctx, const char *address); +int OSSL_CMP_CTX_set_serverPort(OSSL_CMP_CTX *ctx, int port); +int OSSL_CMP_CTX_set1_proxy(OSSL_CMP_CTX *ctx, const char *name); +int OSSL_CMP_CTX_set1_no_proxy(OSSL_CMP_CTX *ctx, const char *names); +# ifndef OPENSSL_NO_HTTP +int OSSL_CMP_CTX_set_http_cb(OSSL_CMP_CTX *ctx, OSSL_HTTP_bio_cb_t cb); +int OSSL_CMP_CTX_set_http_cb_arg(OSSL_CMP_CTX *ctx, void *arg); +void *OSSL_CMP_CTX_get_http_cb_arg(const OSSL_CMP_CTX *ctx); +# endif +typedef OSSL_CMP_MSG *(*OSSL_CMP_transfer_cb_t) (OSSL_CMP_CTX *ctx, + const OSSL_CMP_MSG *req); +int OSSL_CMP_CTX_set_transfer_cb(OSSL_CMP_CTX *ctx, OSSL_CMP_transfer_cb_t cb); +int OSSL_CMP_CTX_set_transfer_cb_arg(OSSL_CMP_CTX *ctx, void *arg); +void *OSSL_CMP_CTX_get_transfer_cb_arg(const OSSL_CMP_CTX *ctx); +/* server authentication: */ +int OSSL_CMP_CTX_set1_srvCert(OSSL_CMP_CTX *ctx, X509 *cert); +int OSSL_CMP_CTX_set1_expected_sender(OSSL_CMP_CTX *ctx, const X509_NAME *name); +int OSSL_CMP_CTX_set0_trustedStore(OSSL_CMP_CTX *ctx, X509_STORE *store); +# define OSSL_CMP_CTX_set0_trusted OSSL_CMP_CTX_set0_trustedStore +X509_STORE *OSSL_CMP_CTX_get0_trustedStore(const OSSL_CMP_CTX *ctx); +# define OSSL_CMP_CTX_get0_trusted OSSL_CMP_CTX_get0_trustedStore +int OSSL_CMP_CTX_set1_untrusted(OSSL_CMP_CTX *ctx, STACK_OF(X509) *certs); +STACK_OF(X509) *OSSL_CMP_CTX_get0_untrusted(const OSSL_CMP_CTX *ctx); +/* client authentication: */ +int OSSL_CMP_CTX_set1_cert(OSSL_CMP_CTX *ctx, X509 *cert); +int OSSL_CMP_CTX_build_cert_chain(OSSL_CMP_CTX *ctx, X509_STORE *own_trusted, + STACK_OF(X509) *candidates); +int OSSL_CMP_CTX_set1_pkey(OSSL_CMP_CTX *ctx, EVP_PKEY *pkey); +int OSSL_CMP_CTX_set1_referenceValue(OSSL_CMP_CTX *ctx, + const unsigned char *ref, int len); +int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, + const unsigned char *sec, int len); +/* CMP message header and extra certificates: */ +int OSSL_CMP_CTX_set1_recipient(OSSL_CMP_CTX *ctx, const X509_NAME *name); +int OSSL_CMP_CTX_push0_geninfo_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav); +int OSSL_CMP_CTX_reset_geninfo_ITAVs(OSSL_CMP_CTX *ctx); +STACK_OF(OSSL_CMP_ITAV) + *OSSL_CMP_CTX_get0_geninfo_ITAVs(const OSSL_CMP_CTX *ctx); +int OSSL_CMP_CTX_set1_extraCertsOut(OSSL_CMP_CTX *ctx, + STACK_OF(X509) *extraCertsOut); +/* certificate template: */ +int OSSL_CMP_CTX_set0_newPkey(OSSL_CMP_CTX *ctx, int priv, EVP_PKEY *pkey); +EVP_PKEY *OSSL_CMP_CTX_get0_newPkey(const OSSL_CMP_CTX *ctx, int priv); +int OSSL_CMP_CTX_set1_issuer(OSSL_CMP_CTX *ctx, const X509_NAME *name); +int OSSL_CMP_CTX_set1_serialNumber(OSSL_CMP_CTX *ctx, const ASN1_INTEGER *sn); +int OSSL_CMP_CTX_set1_subjectName(OSSL_CMP_CTX *ctx, const X509_NAME *name); +int OSSL_CMP_CTX_push1_subjectAltName(OSSL_CMP_CTX *ctx, + const GENERAL_NAME *name); +int OSSL_CMP_CTX_set0_reqExtensions(OSSL_CMP_CTX *ctx, X509_EXTENSIONS *exts); +int OSSL_CMP_CTX_reqExtensions_have_SAN(OSSL_CMP_CTX *ctx); +int OSSL_CMP_CTX_push0_policy(OSSL_CMP_CTX *ctx, POLICYINFO *pinfo); +int OSSL_CMP_CTX_set1_oldCert(OSSL_CMP_CTX *ctx, X509 *cert); +int OSSL_CMP_CTX_set1_p10CSR(OSSL_CMP_CTX *ctx, const X509_REQ *csr); +/* misc body contents: */ +int OSSL_CMP_CTX_push0_genm_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav); +/* certificate confirmation: */ +typedef int (*OSSL_CMP_certConf_cb_t) (OSSL_CMP_CTX *ctx, X509 *cert, + int fail_info, const char **txt); +int OSSL_CMP_certConf_cb(OSSL_CMP_CTX *ctx, X509 *cert, int fail_info, + const char **text); +int OSSL_CMP_CTX_set_certConf_cb(OSSL_CMP_CTX *ctx, OSSL_CMP_certConf_cb_t cb); +int OSSL_CMP_CTX_set_certConf_cb_arg(OSSL_CMP_CTX *ctx, void *arg); +void *OSSL_CMP_CTX_get_certConf_cb_arg(const OSSL_CMP_CTX *ctx); +/* result fetching: */ +int OSSL_CMP_CTX_get_status(const OSSL_CMP_CTX *ctx); +OSSL_CMP_PKIFREETEXT *OSSL_CMP_CTX_get0_statusString(const OSSL_CMP_CTX *ctx); +int OSSL_CMP_CTX_get_failInfoCode(const OSSL_CMP_CTX *ctx); +# define OSSL_CMP_PKISI_BUFLEN 1024 +X509 *OSSL_CMP_CTX_get0_validatedSrvCert(const OSSL_CMP_CTX *ctx); +X509 *OSSL_CMP_CTX_get0_newCert(const OSSL_CMP_CTX *ctx); +STACK_OF(X509) *OSSL_CMP_CTX_get1_newChain(const OSSL_CMP_CTX *ctx); +STACK_OF(X509) *OSSL_CMP_CTX_get1_caPubs(const OSSL_CMP_CTX *ctx); +STACK_OF(X509) *OSSL_CMP_CTX_get1_extraCertsIn(const OSSL_CMP_CTX *ctx); +int OSSL_CMP_CTX_set1_transactionID(OSSL_CMP_CTX *ctx, + const ASN1_OCTET_STRING *id); +int OSSL_CMP_CTX_set1_senderNonce(OSSL_CMP_CTX *ctx, + const ASN1_OCTET_STRING *nonce); + +/* from cmp_status.c */ +char *OSSL_CMP_CTX_snprint_PKIStatus(const OSSL_CMP_CTX *ctx, char *buf, + size_t bufsize); +char *OSSL_CMP_snprint_PKIStatusInfo(const OSSL_CMP_PKISI *statusInfo, + char *buf, size_t bufsize); +OSSL_CMP_PKISI * +OSSL_CMP_STATUSINFO_new(int status, int fail_info, const char *text); + +/* from cmp_hdr.c */ +ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_transactionID(const + OSSL_CMP_PKIHEADER *hdr); +ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_recipNonce(const OSSL_CMP_PKIHEADER *hdr); +STACK_OF(OSSL_CMP_ITAV) + *OSSL_CMP_HDR_get0_geninfo_ITAVs(const OSSL_CMP_PKIHEADER *hdr); + +/* from cmp_msg.c */ +OSSL_CMP_PKIHEADER *OSSL_CMP_MSG_get0_header(const OSSL_CMP_MSG *msg); +int OSSL_CMP_MSG_get_bodytype(const OSSL_CMP_MSG *msg); +X509_PUBKEY *OSSL_CMP_MSG_get0_certreq_publickey(const OSSL_CMP_MSG *msg); +int OSSL_CMP_MSG_update_transactionID(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); +int OSSL_CMP_MSG_update_recipNonce(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); +OSSL_CRMF_MSG *OSSL_CMP_CTX_setup_CRM(OSSL_CMP_CTX *ctx, int for_KUR, int rid); +OSSL_CMP_MSG *OSSL_CMP_MSG_read(const char *file, OSSL_LIB_CTX *libctx, + const char *propq); +int OSSL_CMP_MSG_write(const char *file, const OSSL_CMP_MSG *msg); +OSSL_CMP_MSG *d2i_OSSL_CMP_MSG_bio(BIO *bio, OSSL_CMP_MSG **msg); +int i2d_OSSL_CMP_MSG_bio(BIO *bio, const OSSL_CMP_MSG *msg); + +/* from cmp_vfy.c */ +int OSSL_CMP_validate_msg(OSSL_CMP_CTX *ctx, const OSSL_CMP_MSG *msg); +int OSSL_CMP_validate_cert_path(const OSSL_CMP_CTX *ctx, + X509_STORE *trusted_store, X509 *cert); + +/* from cmp_http.c */ +# ifndef OPENSSL_NO_HTTP +OSSL_CMP_MSG *OSSL_CMP_MSG_http_perform(OSSL_CMP_CTX *ctx, + const OSSL_CMP_MSG *req); +# endif + +/* from cmp_server.c */ +typedef struct ossl_cmp_srv_ctx_st OSSL_CMP_SRV_CTX; +OSSL_CMP_MSG *OSSL_CMP_SRV_process_request(OSSL_CMP_SRV_CTX *srv_ctx, + const OSSL_CMP_MSG *req); +OSSL_CMP_MSG * OSSL_CMP_CTX_server_perform(OSSL_CMP_CTX *client_ctx, + const OSSL_CMP_MSG *req); +OSSL_CMP_SRV_CTX *OSSL_CMP_SRV_CTX_new(OSSL_LIB_CTX *libctx, const char *propq); +void OSSL_CMP_SRV_CTX_free(OSSL_CMP_SRV_CTX *srv_ctx); +typedef OSSL_CMP_PKISI *(*OSSL_CMP_SRV_cert_request_cb_t) + (OSSL_CMP_SRV_CTX *srv_ctx, const OSSL_CMP_MSG *req, int certReqId, + const OSSL_CRMF_MSG *crm, const X509_REQ *p10cr, + X509 **certOut, STACK_OF(X509) **chainOut, STACK_OF(X509) **caPubs); +typedef OSSL_CMP_PKISI *(*OSSL_CMP_SRV_rr_cb_t)(OSSL_CMP_SRV_CTX *srv_ctx, + const OSSL_CMP_MSG *req, + const X509_NAME *issuer, + const ASN1_INTEGER *serial); +typedef int (*OSSL_CMP_SRV_genm_cb_t)(OSSL_CMP_SRV_CTX *srv_ctx, + const OSSL_CMP_MSG *req, + const STACK_OF(OSSL_CMP_ITAV) *in, + STACK_OF(OSSL_CMP_ITAV) **out); +typedef void (*OSSL_CMP_SRV_error_cb_t)(OSSL_CMP_SRV_CTX *srv_ctx, + const OSSL_CMP_MSG *req, + const OSSL_CMP_PKISI *statusInfo, + const ASN1_INTEGER *errorCode, + const OSSL_CMP_PKIFREETEXT *errDetails); +typedef int (*OSSL_CMP_SRV_certConf_cb_t)(OSSL_CMP_SRV_CTX *srv_ctx, + const OSSL_CMP_MSG *req, + int certReqId, + const ASN1_OCTET_STRING *certHash, + const OSSL_CMP_PKISI *si); +typedef int (*OSSL_CMP_SRV_pollReq_cb_t)(OSSL_CMP_SRV_CTX *srv_ctx, + const OSSL_CMP_MSG *req, int certReqId, + OSSL_CMP_MSG **certReq, + int64_t *check_after); +int OSSL_CMP_SRV_CTX_init(OSSL_CMP_SRV_CTX *srv_ctx, void *custom_ctx, + OSSL_CMP_SRV_cert_request_cb_t process_cert_request, + OSSL_CMP_SRV_rr_cb_t process_rr, + OSSL_CMP_SRV_genm_cb_t process_genm, + OSSL_CMP_SRV_error_cb_t process_error, + OSSL_CMP_SRV_certConf_cb_t process_certConf, + OSSL_CMP_SRV_pollReq_cb_t process_pollReq); +typedef int (*OSSL_CMP_SRV_delayed_delivery_cb_t)(OSSL_CMP_SRV_CTX *srv_ctx, + const OSSL_CMP_MSG *req); +typedef int (*OSSL_CMP_SRV_clean_transaction_cb_t)(OSSL_CMP_SRV_CTX *srv_ctx, + const ASN1_OCTET_STRING *id); +int OSSL_CMP_SRV_CTX_init_trans(OSSL_CMP_SRV_CTX *srv_ctx, + OSSL_CMP_SRV_delayed_delivery_cb_t delay, + OSSL_CMP_SRV_clean_transaction_cb_t clean); +OSSL_CMP_CTX *OSSL_CMP_SRV_CTX_get0_cmp_ctx(const OSSL_CMP_SRV_CTX *srv_ctx); +void *OSSL_CMP_SRV_CTX_get0_custom_ctx(const OSSL_CMP_SRV_CTX *srv_ctx); +int OSSL_CMP_SRV_CTX_set_send_unprotected_errors(OSSL_CMP_SRV_CTX *srv_ctx, + int val); +int OSSL_CMP_SRV_CTX_set_accept_unprotected(OSSL_CMP_SRV_CTX *srv_ctx, int val); +int OSSL_CMP_SRV_CTX_set_accept_raverified(OSSL_CMP_SRV_CTX *srv_ctx, int val); +int OSSL_CMP_SRV_CTX_set_grant_implicit_confirm(OSSL_CMP_SRV_CTX *srv_ctx, + int val); + +/* from cmp_client.c */ +X509 *OSSL_CMP_exec_certreq(OSSL_CMP_CTX *ctx, int req_type, + const OSSL_CRMF_MSG *crm); +# define OSSL_CMP_IR 0 +# define OSSL_CMP_CR 2 +# define OSSL_CMP_P10CR 4 +# define OSSL_CMP_KUR 7 +# define OSSL_CMP_GENM 21 +# define OSSL_CMP_ERROR 23 +# define OSSL_CMP_exec_IR_ses(ctx) \ + OSSL_CMP_exec_certreq(ctx, OSSL_CMP_IR, NULL) +# define OSSL_CMP_exec_CR_ses(ctx) \ + OSSL_CMP_exec_certreq(ctx, OSSL_CMP_CR, NULL) +# define OSSL_CMP_exec_P10CR_ses(ctx) \ + OSSL_CMP_exec_certreq(ctx, OSSL_CMP_P10CR, NULL) +# define OSSL_CMP_exec_KUR_ses(ctx) \ + OSSL_CMP_exec_certreq(ctx, OSSL_CMP_KUR, NULL) +int OSSL_CMP_try_certreq(OSSL_CMP_CTX *ctx, int req_type, + const OSSL_CRMF_MSG *crm, int *checkAfter); +int OSSL_CMP_exec_RR_ses(OSSL_CMP_CTX *ctx); +STACK_OF(OSSL_CMP_ITAV) *OSSL_CMP_exec_GENM_ses(OSSL_CMP_CTX *ctx); + +/* from cmp_genm.c */ +int OSSL_CMP_get1_caCerts(OSSL_CMP_CTX *ctx, STACK_OF(X509) **out); +int OSSL_CMP_get1_rootCaKeyUpdate(OSSL_CMP_CTX *ctx, + const X509 *oldWithOld, X509 **newWithNew, + X509 **newWithOld, X509 **oldWithNew); +int OSSL_CMP_get1_crlUpdate(OSSL_CMP_CTX *ctx, const X509 *crlcert, + const X509_CRL *last_crl, + X509_CRL **crl); +int OSSL_CMP_get1_certReqTemplate(OSSL_CMP_CTX *ctx, + OSSL_CRMF_CERTTEMPLATE **certTemplate, + OSSL_CMP_ATAVS **keySpec); + +# ifdef __cplusplus +} +# endif +# endif /* !defined(OPENSSL_NO_CMP) */ +#endif /* !defined(OPENSSL_CMP_H) */ diff --git a/contrib/openssl-cmake/common/include/openssl/cms.h b/contrib/openssl-cmake/common/include/openssl/cms.h new file mode 100644 index 000000000000..63afab563557 --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/cms.h @@ -0,0 +1,511 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/cms.h.in + * + * Copyright 2008-2025 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_CMS_H +# define OPENSSL_CMS_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_CMS_H +# endif + +# include + +# ifndef OPENSSL_NO_CMS +# include +# include +# include +# ifdef __cplusplus +extern "C" { +# endif + +typedef struct CMS_EnvelopedData_st CMS_EnvelopedData; +typedef struct CMS_ContentInfo_st CMS_ContentInfo; +typedef struct CMS_SignerInfo_st CMS_SignerInfo; +typedef struct CMS_SignedData_st CMS_SignedData; +typedef struct CMS_CertificateChoices CMS_CertificateChoices; +typedef struct CMS_RevocationInfoChoice_st CMS_RevocationInfoChoice; +typedef struct CMS_RecipientInfo_st CMS_RecipientInfo; +typedef struct CMS_ReceiptRequest_st CMS_ReceiptRequest; +typedef struct CMS_Receipt_st CMS_Receipt; +typedef struct CMS_RecipientEncryptedKey_st CMS_RecipientEncryptedKey; +typedef struct CMS_OtherKeyAttribute_st CMS_OtherKeyAttribute; + +SKM_DEFINE_STACK_OF_INTERNAL(CMS_SignerInfo, CMS_SignerInfo, CMS_SignerInfo) +#define sk_CMS_SignerInfo_num(sk) OPENSSL_sk_num(ossl_check_const_CMS_SignerInfo_sk_type(sk)) +#define sk_CMS_SignerInfo_value(sk, idx) ((CMS_SignerInfo *)OPENSSL_sk_value(ossl_check_const_CMS_SignerInfo_sk_type(sk), (idx))) +#define sk_CMS_SignerInfo_new(cmp) ((STACK_OF(CMS_SignerInfo) *)OPENSSL_sk_new(ossl_check_CMS_SignerInfo_compfunc_type(cmp))) +#define sk_CMS_SignerInfo_new_null() ((STACK_OF(CMS_SignerInfo) *)OPENSSL_sk_new_null()) +#define sk_CMS_SignerInfo_new_reserve(cmp, n) ((STACK_OF(CMS_SignerInfo) *)OPENSSL_sk_new_reserve(ossl_check_CMS_SignerInfo_compfunc_type(cmp), (n))) +#define sk_CMS_SignerInfo_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_CMS_SignerInfo_sk_type(sk), (n)) +#define sk_CMS_SignerInfo_free(sk) OPENSSL_sk_free(ossl_check_CMS_SignerInfo_sk_type(sk)) +#define sk_CMS_SignerInfo_zero(sk) OPENSSL_sk_zero(ossl_check_CMS_SignerInfo_sk_type(sk)) +#define sk_CMS_SignerInfo_delete(sk, i) ((CMS_SignerInfo *)OPENSSL_sk_delete(ossl_check_CMS_SignerInfo_sk_type(sk), (i))) +#define sk_CMS_SignerInfo_delete_ptr(sk, ptr) ((CMS_SignerInfo *)OPENSSL_sk_delete_ptr(ossl_check_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_type(ptr))) +#define sk_CMS_SignerInfo_push(sk, ptr) OPENSSL_sk_push(ossl_check_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_type(ptr)) +#define sk_CMS_SignerInfo_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_type(ptr)) +#define sk_CMS_SignerInfo_pop(sk) ((CMS_SignerInfo *)OPENSSL_sk_pop(ossl_check_CMS_SignerInfo_sk_type(sk))) +#define sk_CMS_SignerInfo_shift(sk) ((CMS_SignerInfo *)OPENSSL_sk_shift(ossl_check_CMS_SignerInfo_sk_type(sk))) +#define sk_CMS_SignerInfo_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_CMS_SignerInfo_sk_type(sk),ossl_check_CMS_SignerInfo_freefunc_type(freefunc)) +#define sk_CMS_SignerInfo_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_type(ptr), (idx)) +#define sk_CMS_SignerInfo_set(sk, idx, ptr) ((CMS_SignerInfo *)OPENSSL_sk_set(ossl_check_CMS_SignerInfo_sk_type(sk), (idx), ossl_check_CMS_SignerInfo_type(ptr))) +#define sk_CMS_SignerInfo_find(sk, ptr) OPENSSL_sk_find(ossl_check_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_type(ptr)) +#define sk_CMS_SignerInfo_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_type(ptr)) +#define sk_CMS_SignerInfo_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_type(ptr), pnum) +#define sk_CMS_SignerInfo_sort(sk) OPENSSL_sk_sort(ossl_check_CMS_SignerInfo_sk_type(sk)) +#define sk_CMS_SignerInfo_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_CMS_SignerInfo_sk_type(sk)) +#define sk_CMS_SignerInfo_dup(sk) ((STACK_OF(CMS_SignerInfo) *)OPENSSL_sk_dup(ossl_check_const_CMS_SignerInfo_sk_type(sk))) +#define sk_CMS_SignerInfo_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(CMS_SignerInfo) *)OPENSSL_sk_deep_copy(ossl_check_const_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_copyfunc_type(copyfunc), ossl_check_CMS_SignerInfo_freefunc_type(freefunc))) +#define sk_CMS_SignerInfo_set_cmp_func(sk, cmp) ((sk_CMS_SignerInfo_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_CMS_SignerInfo_sk_type(sk), ossl_check_CMS_SignerInfo_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(CMS_RecipientEncryptedKey, CMS_RecipientEncryptedKey, CMS_RecipientEncryptedKey) +#define sk_CMS_RecipientEncryptedKey_num(sk) OPENSSL_sk_num(ossl_check_const_CMS_RecipientEncryptedKey_sk_type(sk)) +#define sk_CMS_RecipientEncryptedKey_value(sk, idx) ((CMS_RecipientEncryptedKey *)OPENSSL_sk_value(ossl_check_const_CMS_RecipientEncryptedKey_sk_type(sk), (idx))) +#define sk_CMS_RecipientEncryptedKey_new(cmp) ((STACK_OF(CMS_RecipientEncryptedKey) *)OPENSSL_sk_new(ossl_check_CMS_RecipientEncryptedKey_compfunc_type(cmp))) +#define sk_CMS_RecipientEncryptedKey_new_null() ((STACK_OF(CMS_RecipientEncryptedKey) *)OPENSSL_sk_new_null()) +#define sk_CMS_RecipientEncryptedKey_new_reserve(cmp, n) ((STACK_OF(CMS_RecipientEncryptedKey) *)OPENSSL_sk_new_reserve(ossl_check_CMS_RecipientEncryptedKey_compfunc_type(cmp), (n))) +#define sk_CMS_RecipientEncryptedKey_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), (n)) +#define sk_CMS_RecipientEncryptedKey_free(sk) OPENSSL_sk_free(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk)) +#define sk_CMS_RecipientEncryptedKey_zero(sk) OPENSSL_sk_zero(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk)) +#define sk_CMS_RecipientEncryptedKey_delete(sk, i) ((CMS_RecipientEncryptedKey *)OPENSSL_sk_delete(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), (i))) +#define sk_CMS_RecipientEncryptedKey_delete_ptr(sk, ptr) ((CMS_RecipientEncryptedKey *)OPENSSL_sk_delete_ptr(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_type(ptr))) +#define sk_CMS_RecipientEncryptedKey_push(sk, ptr) OPENSSL_sk_push(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_type(ptr)) +#define sk_CMS_RecipientEncryptedKey_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_type(ptr)) +#define sk_CMS_RecipientEncryptedKey_pop(sk) ((CMS_RecipientEncryptedKey *)OPENSSL_sk_pop(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk))) +#define sk_CMS_RecipientEncryptedKey_shift(sk) ((CMS_RecipientEncryptedKey *)OPENSSL_sk_shift(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk))) +#define sk_CMS_RecipientEncryptedKey_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk),ossl_check_CMS_RecipientEncryptedKey_freefunc_type(freefunc)) +#define sk_CMS_RecipientEncryptedKey_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_type(ptr), (idx)) +#define sk_CMS_RecipientEncryptedKey_set(sk, idx, ptr) ((CMS_RecipientEncryptedKey *)OPENSSL_sk_set(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), (idx), ossl_check_CMS_RecipientEncryptedKey_type(ptr))) +#define sk_CMS_RecipientEncryptedKey_find(sk, ptr) OPENSSL_sk_find(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_type(ptr)) +#define sk_CMS_RecipientEncryptedKey_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_type(ptr)) +#define sk_CMS_RecipientEncryptedKey_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_type(ptr), pnum) +#define sk_CMS_RecipientEncryptedKey_sort(sk) OPENSSL_sk_sort(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk)) +#define sk_CMS_RecipientEncryptedKey_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_CMS_RecipientEncryptedKey_sk_type(sk)) +#define sk_CMS_RecipientEncryptedKey_dup(sk) ((STACK_OF(CMS_RecipientEncryptedKey) *)OPENSSL_sk_dup(ossl_check_const_CMS_RecipientEncryptedKey_sk_type(sk))) +#define sk_CMS_RecipientEncryptedKey_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(CMS_RecipientEncryptedKey) *)OPENSSL_sk_deep_copy(ossl_check_const_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_copyfunc_type(copyfunc), ossl_check_CMS_RecipientEncryptedKey_freefunc_type(freefunc))) +#define sk_CMS_RecipientEncryptedKey_set_cmp_func(sk, cmp) ((sk_CMS_RecipientEncryptedKey_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_CMS_RecipientEncryptedKey_sk_type(sk), ossl_check_CMS_RecipientEncryptedKey_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(CMS_RecipientInfo, CMS_RecipientInfo, CMS_RecipientInfo) +#define sk_CMS_RecipientInfo_num(sk) OPENSSL_sk_num(ossl_check_const_CMS_RecipientInfo_sk_type(sk)) +#define sk_CMS_RecipientInfo_value(sk, idx) ((CMS_RecipientInfo *)OPENSSL_sk_value(ossl_check_const_CMS_RecipientInfo_sk_type(sk), (idx))) +#define sk_CMS_RecipientInfo_new(cmp) ((STACK_OF(CMS_RecipientInfo) *)OPENSSL_sk_new(ossl_check_CMS_RecipientInfo_compfunc_type(cmp))) +#define sk_CMS_RecipientInfo_new_null() ((STACK_OF(CMS_RecipientInfo) *)OPENSSL_sk_new_null()) +#define sk_CMS_RecipientInfo_new_reserve(cmp, n) ((STACK_OF(CMS_RecipientInfo) *)OPENSSL_sk_new_reserve(ossl_check_CMS_RecipientInfo_compfunc_type(cmp), (n))) +#define sk_CMS_RecipientInfo_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_CMS_RecipientInfo_sk_type(sk), (n)) +#define sk_CMS_RecipientInfo_free(sk) OPENSSL_sk_free(ossl_check_CMS_RecipientInfo_sk_type(sk)) +#define sk_CMS_RecipientInfo_zero(sk) OPENSSL_sk_zero(ossl_check_CMS_RecipientInfo_sk_type(sk)) +#define sk_CMS_RecipientInfo_delete(sk, i) ((CMS_RecipientInfo *)OPENSSL_sk_delete(ossl_check_CMS_RecipientInfo_sk_type(sk), (i))) +#define sk_CMS_RecipientInfo_delete_ptr(sk, ptr) ((CMS_RecipientInfo *)OPENSSL_sk_delete_ptr(ossl_check_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_type(ptr))) +#define sk_CMS_RecipientInfo_push(sk, ptr) OPENSSL_sk_push(ossl_check_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_type(ptr)) +#define sk_CMS_RecipientInfo_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_type(ptr)) +#define sk_CMS_RecipientInfo_pop(sk) ((CMS_RecipientInfo *)OPENSSL_sk_pop(ossl_check_CMS_RecipientInfo_sk_type(sk))) +#define sk_CMS_RecipientInfo_shift(sk) ((CMS_RecipientInfo *)OPENSSL_sk_shift(ossl_check_CMS_RecipientInfo_sk_type(sk))) +#define sk_CMS_RecipientInfo_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_CMS_RecipientInfo_sk_type(sk),ossl_check_CMS_RecipientInfo_freefunc_type(freefunc)) +#define sk_CMS_RecipientInfo_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_type(ptr), (idx)) +#define sk_CMS_RecipientInfo_set(sk, idx, ptr) ((CMS_RecipientInfo *)OPENSSL_sk_set(ossl_check_CMS_RecipientInfo_sk_type(sk), (idx), ossl_check_CMS_RecipientInfo_type(ptr))) +#define sk_CMS_RecipientInfo_find(sk, ptr) OPENSSL_sk_find(ossl_check_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_type(ptr)) +#define sk_CMS_RecipientInfo_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_type(ptr)) +#define sk_CMS_RecipientInfo_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_type(ptr), pnum) +#define sk_CMS_RecipientInfo_sort(sk) OPENSSL_sk_sort(ossl_check_CMS_RecipientInfo_sk_type(sk)) +#define sk_CMS_RecipientInfo_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_CMS_RecipientInfo_sk_type(sk)) +#define sk_CMS_RecipientInfo_dup(sk) ((STACK_OF(CMS_RecipientInfo) *)OPENSSL_sk_dup(ossl_check_const_CMS_RecipientInfo_sk_type(sk))) +#define sk_CMS_RecipientInfo_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(CMS_RecipientInfo) *)OPENSSL_sk_deep_copy(ossl_check_const_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_copyfunc_type(copyfunc), ossl_check_CMS_RecipientInfo_freefunc_type(freefunc))) +#define sk_CMS_RecipientInfo_set_cmp_func(sk, cmp) ((sk_CMS_RecipientInfo_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_CMS_RecipientInfo_sk_type(sk), ossl_check_CMS_RecipientInfo_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(CMS_RevocationInfoChoice, CMS_RevocationInfoChoice, CMS_RevocationInfoChoice) +#define sk_CMS_RevocationInfoChoice_num(sk) OPENSSL_sk_num(ossl_check_const_CMS_RevocationInfoChoice_sk_type(sk)) +#define sk_CMS_RevocationInfoChoice_value(sk, idx) ((CMS_RevocationInfoChoice *)OPENSSL_sk_value(ossl_check_const_CMS_RevocationInfoChoice_sk_type(sk), (idx))) +#define sk_CMS_RevocationInfoChoice_new(cmp) ((STACK_OF(CMS_RevocationInfoChoice) *)OPENSSL_sk_new(ossl_check_CMS_RevocationInfoChoice_compfunc_type(cmp))) +#define sk_CMS_RevocationInfoChoice_new_null() ((STACK_OF(CMS_RevocationInfoChoice) *)OPENSSL_sk_new_null()) +#define sk_CMS_RevocationInfoChoice_new_reserve(cmp, n) ((STACK_OF(CMS_RevocationInfoChoice) *)OPENSSL_sk_new_reserve(ossl_check_CMS_RevocationInfoChoice_compfunc_type(cmp), (n))) +#define sk_CMS_RevocationInfoChoice_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), (n)) +#define sk_CMS_RevocationInfoChoice_free(sk) OPENSSL_sk_free(ossl_check_CMS_RevocationInfoChoice_sk_type(sk)) +#define sk_CMS_RevocationInfoChoice_zero(sk) OPENSSL_sk_zero(ossl_check_CMS_RevocationInfoChoice_sk_type(sk)) +#define sk_CMS_RevocationInfoChoice_delete(sk, i) ((CMS_RevocationInfoChoice *)OPENSSL_sk_delete(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), (i))) +#define sk_CMS_RevocationInfoChoice_delete_ptr(sk, ptr) ((CMS_RevocationInfoChoice *)OPENSSL_sk_delete_ptr(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_type(ptr))) +#define sk_CMS_RevocationInfoChoice_push(sk, ptr) OPENSSL_sk_push(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_type(ptr)) +#define sk_CMS_RevocationInfoChoice_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_type(ptr)) +#define sk_CMS_RevocationInfoChoice_pop(sk) ((CMS_RevocationInfoChoice *)OPENSSL_sk_pop(ossl_check_CMS_RevocationInfoChoice_sk_type(sk))) +#define sk_CMS_RevocationInfoChoice_shift(sk) ((CMS_RevocationInfoChoice *)OPENSSL_sk_shift(ossl_check_CMS_RevocationInfoChoice_sk_type(sk))) +#define sk_CMS_RevocationInfoChoice_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_CMS_RevocationInfoChoice_sk_type(sk),ossl_check_CMS_RevocationInfoChoice_freefunc_type(freefunc)) +#define sk_CMS_RevocationInfoChoice_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_type(ptr), (idx)) +#define sk_CMS_RevocationInfoChoice_set(sk, idx, ptr) ((CMS_RevocationInfoChoice *)OPENSSL_sk_set(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), (idx), ossl_check_CMS_RevocationInfoChoice_type(ptr))) +#define sk_CMS_RevocationInfoChoice_find(sk, ptr) OPENSSL_sk_find(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_type(ptr)) +#define sk_CMS_RevocationInfoChoice_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_type(ptr)) +#define sk_CMS_RevocationInfoChoice_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_type(ptr), pnum) +#define sk_CMS_RevocationInfoChoice_sort(sk) OPENSSL_sk_sort(ossl_check_CMS_RevocationInfoChoice_sk_type(sk)) +#define sk_CMS_RevocationInfoChoice_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_CMS_RevocationInfoChoice_sk_type(sk)) +#define sk_CMS_RevocationInfoChoice_dup(sk) ((STACK_OF(CMS_RevocationInfoChoice) *)OPENSSL_sk_dup(ossl_check_const_CMS_RevocationInfoChoice_sk_type(sk))) +#define sk_CMS_RevocationInfoChoice_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(CMS_RevocationInfoChoice) *)OPENSSL_sk_deep_copy(ossl_check_const_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_copyfunc_type(copyfunc), ossl_check_CMS_RevocationInfoChoice_freefunc_type(freefunc))) +#define sk_CMS_RevocationInfoChoice_set_cmp_func(sk, cmp) ((sk_CMS_RevocationInfoChoice_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_CMS_RevocationInfoChoice_sk_type(sk), ossl_check_CMS_RevocationInfoChoice_compfunc_type(cmp))) + + +DECLARE_ASN1_ITEM(CMS_EnvelopedData) +DECLARE_ASN1_ALLOC_FUNCTIONS(CMS_SignedData) +DECLARE_ASN1_FUNCTIONS(CMS_ContentInfo) +DECLARE_ASN1_FUNCTIONS(CMS_ReceiptRequest) +DECLARE_ASN1_PRINT_FUNCTION(CMS_ContentInfo) + +DECLARE_ASN1_DUP_FUNCTION(CMS_EnvelopedData) + +CMS_ContentInfo *CMS_ContentInfo_new_ex(OSSL_LIB_CTX *libctx, const char *propq); + +# define CMS_SIGNERINFO_ISSUER_SERIAL 0 +# define CMS_SIGNERINFO_KEYIDENTIFIER 1 + +# define CMS_RECIPINFO_NONE -1 +# define CMS_RECIPINFO_TRANS 0 +# define CMS_RECIPINFO_AGREE 1 +# define CMS_RECIPINFO_KEK 2 +# define CMS_RECIPINFO_PASS 3 +# define CMS_RECIPINFO_OTHER 4 + +/* S/MIME related flags */ + +# define CMS_TEXT 0x1 +# define CMS_NOCERTS 0x2 +# define CMS_NO_CONTENT_VERIFY 0x4 +# define CMS_NO_ATTR_VERIFY 0x8 +# define CMS_NOSIGS \ + (CMS_NO_CONTENT_VERIFY|CMS_NO_ATTR_VERIFY) +# define CMS_NOINTERN 0x10 +# define CMS_NO_SIGNER_CERT_VERIFY 0x20 +# define CMS_NOVERIFY 0x20 +# define CMS_DETACHED 0x40 +# define CMS_BINARY 0x80 +# define CMS_NOATTR 0x100 +# define CMS_NOSMIMECAP 0x200 +# define CMS_NOOLDMIMETYPE 0x400 +# define CMS_CRLFEOL 0x800 +# define CMS_STREAM 0x1000 +# define CMS_NOCRL 0x2000 +# define CMS_PARTIAL 0x4000 +# define CMS_REUSE_DIGEST 0x8000 +# define CMS_USE_KEYID 0x10000 +# define CMS_DEBUG_DECRYPT 0x20000 +# define CMS_KEY_PARAM 0x40000 +# define CMS_ASCIICRLF 0x80000 +# define CMS_CADES 0x100000 +# define CMS_USE_ORIGINATOR_KEYID 0x200000 +# define CMS_NO_SIGNING_TIME 0x400000 + +const ASN1_OBJECT *CMS_get0_type(const CMS_ContentInfo *cms); + +BIO *CMS_dataInit(CMS_ContentInfo *cms, BIO *icont); +int CMS_dataFinal(CMS_ContentInfo *cms, BIO *bio); + +ASN1_OCTET_STRING **CMS_get0_content(CMS_ContentInfo *cms); +int CMS_is_detached(CMS_ContentInfo *cms); +int CMS_set_detached(CMS_ContentInfo *cms, int detached); + +# ifdef OPENSSL_PEM_H +DECLARE_PEM_rw(CMS, CMS_ContentInfo) +# endif +int CMS_stream(unsigned char ***boundary, CMS_ContentInfo *cms); +CMS_ContentInfo *d2i_CMS_bio(BIO *bp, CMS_ContentInfo **cms); +int i2d_CMS_bio(BIO *bp, CMS_ContentInfo *cms); + +BIO *BIO_new_CMS(BIO *out, CMS_ContentInfo *cms); +int i2d_CMS_bio_stream(BIO *out, CMS_ContentInfo *cms, BIO *in, int flags); +int PEM_write_bio_CMS_stream(BIO *out, CMS_ContentInfo *cms, BIO *in, + int flags); +CMS_ContentInfo *SMIME_read_CMS(BIO *bio, BIO **bcont); +CMS_ContentInfo *SMIME_read_CMS_ex(BIO *bio, int flags, BIO **bcont, CMS_ContentInfo **ci); +int SMIME_write_CMS(BIO *bio, CMS_ContentInfo *cms, BIO *data, int flags); + +int CMS_final(CMS_ContentInfo *cms, BIO *data, BIO *dcont, + unsigned int flags); +int CMS_final_digest(CMS_ContentInfo *cms, + const unsigned char *md, unsigned int mdlen, BIO *dcont, + unsigned int flags); + +CMS_ContentInfo *CMS_sign(X509 *signcert, EVP_PKEY *pkey, + STACK_OF(X509) *certs, BIO *data, + unsigned int flags); +CMS_ContentInfo *CMS_sign_ex(X509 *signcert, EVP_PKEY *pkey, + STACK_OF(X509) *certs, BIO *data, + unsigned int flags, OSSL_LIB_CTX *libctx, + const char *propq); + +CMS_ContentInfo *CMS_sign_receipt(CMS_SignerInfo *si, + X509 *signcert, EVP_PKEY *pkey, + STACK_OF(X509) *certs, unsigned int flags); + +int CMS_data(CMS_ContentInfo *cms, BIO *out, unsigned int flags); +CMS_ContentInfo *CMS_data_create(BIO *in, unsigned int flags); +CMS_ContentInfo *CMS_data_create_ex(BIO *in, unsigned int flags, + OSSL_LIB_CTX *libctx, const char *propq); + +int CMS_digest_verify(CMS_ContentInfo *cms, BIO *dcont, BIO *out, + unsigned int flags); +CMS_ContentInfo *CMS_digest_create(BIO *in, const EVP_MD *md, + unsigned int flags); +CMS_ContentInfo *CMS_digest_create_ex(BIO *in, const EVP_MD *md, + unsigned int flags, OSSL_LIB_CTX *libctx, + const char *propq); + +int CMS_EncryptedData_decrypt(CMS_ContentInfo *cms, + const unsigned char *key, size_t keylen, + BIO *dcont, BIO *out, unsigned int flags); +CMS_ContentInfo *CMS_EncryptedData_encrypt(BIO *in, const EVP_CIPHER *cipher, + const unsigned char *key, + size_t keylen, unsigned int flags); +CMS_ContentInfo *CMS_EncryptedData_encrypt_ex(BIO *in, const EVP_CIPHER *cipher, + const unsigned char *key, + size_t keylen, unsigned int flags, + OSSL_LIB_CTX *libctx, + const char *propq); + +int CMS_EncryptedData_set1_key(CMS_ContentInfo *cms, const EVP_CIPHER *ciph, + const unsigned char *key, size_t keylen); + +int CMS_verify(CMS_ContentInfo *cms, STACK_OF(X509) *certs, + X509_STORE *store, BIO *dcont, BIO *out, unsigned int flags); + +int CMS_verify_receipt(CMS_ContentInfo *rcms, CMS_ContentInfo *ocms, + STACK_OF(X509) *certs, + X509_STORE *store, unsigned int flags); + +STACK_OF(X509) *CMS_get0_signers(CMS_ContentInfo *cms); + +CMS_ContentInfo *CMS_encrypt(STACK_OF(X509) *certs, BIO *in, + const EVP_CIPHER *cipher, unsigned int flags); +CMS_ContentInfo *CMS_encrypt_ex(STACK_OF(X509) *certs, BIO *in, + const EVP_CIPHER *cipher, unsigned int flags, + OSSL_LIB_CTX *libctx, const char *propq); + +int CMS_decrypt(CMS_ContentInfo *cms, EVP_PKEY *pkey, X509 *cert, + BIO *dcont, BIO *out, unsigned int flags); + +int CMS_decrypt_set1_pkey(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert); +int CMS_decrypt_set1_pkey_and_peer(CMS_ContentInfo *cms, EVP_PKEY *pk, + X509 *cert, X509 *peer); +int CMS_decrypt_set1_key(CMS_ContentInfo *cms, + unsigned char *key, size_t keylen, + const unsigned char *id, size_t idlen); +int CMS_decrypt_set1_password(CMS_ContentInfo *cms, + unsigned char *pass, ossl_ssize_t passlen); + +STACK_OF(CMS_RecipientInfo) *CMS_get0_RecipientInfos(CMS_ContentInfo *cms); +int CMS_RecipientInfo_type(CMS_RecipientInfo *ri); +EVP_PKEY_CTX *CMS_RecipientInfo_get0_pkey_ctx(CMS_RecipientInfo *ri); +CMS_ContentInfo *CMS_AuthEnvelopedData_create(const EVP_CIPHER *cipher); +CMS_ContentInfo * +CMS_AuthEnvelopedData_create_ex(const EVP_CIPHER *cipher, OSSL_LIB_CTX *libctx, + const char *propq); +CMS_ContentInfo *CMS_EnvelopedData_create(const EVP_CIPHER *cipher); +CMS_ContentInfo *CMS_EnvelopedData_create_ex(const EVP_CIPHER *cipher, + OSSL_LIB_CTX *libctx, + const char *propq); +BIO *CMS_EnvelopedData_decrypt(CMS_EnvelopedData *env, BIO *detached_data, + EVP_PKEY *pkey, X509 *cert, + ASN1_OCTET_STRING *secret, unsigned int flags, + OSSL_LIB_CTX *libctx, const char *propq); + +CMS_RecipientInfo *CMS_add1_recipient_cert(CMS_ContentInfo *cms, + X509 *recip, unsigned int flags); +CMS_RecipientInfo *CMS_add1_recipient(CMS_ContentInfo *cms, X509 *recip, + EVP_PKEY *originatorPrivKey, X509 * originator, unsigned int flags); +int CMS_RecipientInfo_set0_pkey(CMS_RecipientInfo *ri, EVP_PKEY *pkey); +int CMS_RecipientInfo_ktri_cert_cmp(CMS_RecipientInfo *ri, X509 *cert); +int CMS_RecipientInfo_ktri_get0_algs(CMS_RecipientInfo *ri, + EVP_PKEY **pk, X509 **recip, + X509_ALGOR **palg); +int CMS_RecipientInfo_ktri_get0_signer_id(CMS_RecipientInfo *ri, + ASN1_OCTET_STRING **keyid, + X509_NAME **issuer, + ASN1_INTEGER **sno); + +CMS_RecipientInfo *CMS_add0_recipient_key(CMS_ContentInfo *cms, int nid, + unsigned char *key, size_t keylen, + unsigned char *id, size_t idlen, + ASN1_GENERALIZEDTIME *date, + ASN1_OBJECT *otherTypeId, + ASN1_TYPE *otherType); + +int CMS_RecipientInfo_kekri_get0_id(CMS_RecipientInfo *ri, + X509_ALGOR **palg, + ASN1_OCTET_STRING **pid, + ASN1_GENERALIZEDTIME **pdate, + ASN1_OBJECT **potherid, + ASN1_TYPE **pothertype); + +int CMS_RecipientInfo_set0_key(CMS_RecipientInfo *ri, + unsigned char *key, size_t keylen); + +int CMS_RecipientInfo_kekri_id_cmp(CMS_RecipientInfo *ri, + const unsigned char *id, size_t idlen); + +int CMS_RecipientInfo_set0_password(CMS_RecipientInfo *ri, + unsigned char *pass, + ossl_ssize_t passlen); + +CMS_RecipientInfo *CMS_add0_recipient_password(CMS_ContentInfo *cms, + int iter, int wrap_nid, + int pbe_nid, + unsigned char *pass, + ossl_ssize_t passlen, + const EVP_CIPHER *kekciph); + +int CMS_RecipientInfo_decrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri); +int CMS_RecipientInfo_encrypt(const CMS_ContentInfo *cms, CMS_RecipientInfo *ri); + +int CMS_uncompress(CMS_ContentInfo *cms, BIO *dcont, BIO *out, + unsigned int flags); +CMS_ContentInfo *CMS_compress(BIO *in, int comp_nid, unsigned int flags); + +int CMS_set1_eContentType(CMS_ContentInfo *cms, const ASN1_OBJECT *oid); +const ASN1_OBJECT *CMS_get0_eContentType(CMS_ContentInfo *cms); + +CMS_CertificateChoices *CMS_add0_CertificateChoices(CMS_ContentInfo *cms); +int CMS_add0_cert(CMS_ContentInfo *cms, X509 *cert); +int CMS_add1_cert(CMS_ContentInfo *cms, X509 *cert); +STACK_OF(X509) *CMS_get1_certs(CMS_ContentInfo *cms); + +CMS_RevocationInfoChoice *CMS_add0_RevocationInfoChoice(CMS_ContentInfo *cms); +int CMS_add0_crl(CMS_ContentInfo *cms, X509_CRL *crl); +int CMS_add1_crl(CMS_ContentInfo *cms, X509_CRL *crl); +STACK_OF(X509_CRL) *CMS_get1_crls(CMS_ContentInfo *cms); + +int CMS_SignedData_init(CMS_ContentInfo *cms); +CMS_SignerInfo *CMS_add1_signer(CMS_ContentInfo *cms, + X509 *signer, EVP_PKEY *pk, const EVP_MD *md, + unsigned int flags); +EVP_PKEY_CTX *CMS_SignerInfo_get0_pkey_ctx(CMS_SignerInfo *si); +EVP_MD_CTX *CMS_SignerInfo_get0_md_ctx(CMS_SignerInfo *si); +STACK_OF(CMS_SignerInfo) *CMS_get0_SignerInfos(CMS_ContentInfo *cms); + +void CMS_SignerInfo_set1_signer_cert(CMS_SignerInfo *si, X509 *signer); +int CMS_SignerInfo_get0_signer_id(CMS_SignerInfo *si, + ASN1_OCTET_STRING **keyid, + X509_NAME **issuer, ASN1_INTEGER **sno); +int CMS_SignerInfo_cert_cmp(CMS_SignerInfo *si, X509 *cert); +int CMS_set1_signers_certs(CMS_ContentInfo *cms, STACK_OF(X509) *certs, + unsigned int flags); +void CMS_SignerInfo_get0_algs(CMS_SignerInfo *si, EVP_PKEY **pk, + X509 **signer, X509_ALGOR **pdig, + X509_ALGOR **psig); +ASN1_OCTET_STRING *CMS_SignerInfo_get0_signature(CMS_SignerInfo *si); +int CMS_SignerInfo_sign(CMS_SignerInfo *si); +int CMS_SignerInfo_verify(CMS_SignerInfo *si); +int CMS_SignerInfo_verify_content(CMS_SignerInfo *si, BIO *chain); +BIO *CMS_SignedData_verify(CMS_SignedData *sd, BIO *detached_data, + STACK_OF(X509) *scerts, X509_STORE *store, + STACK_OF(X509) *extra, STACK_OF(X509_CRL) *crls, + unsigned int flags, + OSSL_LIB_CTX *libctx, const char *propq); + +int CMS_add_smimecap(CMS_SignerInfo *si, STACK_OF(X509_ALGOR) *algs); +int CMS_add_simple_smimecap(STACK_OF(X509_ALGOR) **algs, + int algnid, int keysize); +int CMS_add_standard_smimecap(STACK_OF(X509_ALGOR) **smcap); + +int CMS_signed_get_attr_count(const CMS_SignerInfo *si); +int CMS_signed_get_attr_by_NID(const CMS_SignerInfo *si, int nid, + int lastpos); +int CMS_signed_get_attr_by_OBJ(const CMS_SignerInfo *si, const ASN1_OBJECT *obj, + int lastpos); +X509_ATTRIBUTE *CMS_signed_get_attr(const CMS_SignerInfo *si, int loc); +X509_ATTRIBUTE *CMS_signed_delete_attr(CMS_SignerInfo *si, int loc); +int CMS_signed_add1_attr(CMS_SignerInfo *si, X509_ATTRIBUTE *attr); +int CMS_signed_add1_attr_by_OBJ(CMS_SignerInfo *si, + const ASN1_OBJECT *obj, int type, + const void *bytes, int len); +int CMS_signed_add1_attr_by_NID(CMS_SignerInfo *si, + int nid, int type, + const void *bytes, int len); +int CMS_signed_add1_attr_by_txt(CMS_SignerInfo *si, + const char *attrname, int type, + const void *bytes, int len); +void *CMS_signed_get0_data_by_OBJ(const CMS_SignerInfo *si, + const ASN1_OBJECT *oid, + int lastpos, int type); + +int CMS_unsigned_get_attr_count(const CMS_SignerInfo *si); +int CMS_unsigned_get_attr_by_NID(const CMS_SignerInfo *si, int nid, + int lastpos); +int CMS_unsigned_get_attr_by_OBJ(const CMS_SignerInfo *si, + const ASN1_OBJECT *obj, int lastpos); +X509_ATTRIBUTE *CMS_unsigned_get_attr(const CMS_SignerInfo *si, int loc); +X509_ATTRIBUTE *CMS_unsigned_delete_attr(CMS_SignerInfo *si, int loc); +int CMS_unsigned_add1_attr(CMS_SignerInfo *si, X509_ATTRIBUTE *attr); +int CMS_unsigned_add1_attr_by_OBJ(CMS_SignerInfo *si, + const ASN1_OBJECT *obj, int type, + const void *bytes, int len); +int CMS_unsigned_add1_attr_by_NID(CMS_SignerInfo *si, + int nid, int type, + const void *bytes, int len); +int CMS_unsigned_add1_attr_by_txt(CMS_SignerInfo *si, + const char *attrname, int type, + const void *bytes, int len); +void *CMS_unsigned_get0_data_by_OBJ(CMS_SignerInfo *si, ASN1_OBJECT *oid, + int lastpos, int type); + +int CMS_get1_ReceiptRequest(CMS_SignerInfo *si, CMS_ReceiptRequest **prr); +CMS_ReceiptRequest *CMS_ReceiptRequest_create0( + unsigned char *id, int idlen, int allorfirst, + STACK_OF(GENERAL_NAMES) *receiptList, + STACK_OF(GENERAL_NAMES) *receiptsTo); +CMS_ReceiptRequest *CMS_ReceiptRequest_create0_ex( + unsigned char *id, int idlen, int allorfirst, + STACK_OF(GENERAL_NAMES) *receiptList, + STACK_OF(GENERAL_NAMES) *receiptsTo, + OSSL_LIB_CTX *libctx); + +int CMS_add1_ReceiptRequest(CMS_SignerInfo *si, CMS_ReceiptRequest *rr); +void CMS_ReceiptRequest_get0_values(CMS_ReceiptRequest *rr, + ASN1_STRING **pcid, + int *pallorfirst, + STACK_OF(GENERAL_NAMES) **plist, + STACK_OF(GENERAL_NAMES) **prto); +int CMS_RecipientInfo_kari_get0_alg(CMS_RecipientInfo *ri, + X509_ALGOR **palg, + ASN1_OCTET_STRING **pukm); +STACK_OF(CMS_RecipientEncryptedKey) +*CMS_RecipientInfo_kari_get0_reks(CMS_RecipientInfo *ri); + +int CMS_RecipientInfo_kari_get0_orig_id(CMS_RecipientInfo *ri, + X509_ALGOR **pubalg, + ASN1_BIT_STRING **pubkey, + ASN1_OCTET_STRING **keyid, + X509_NAME **issuer, + ASN1_INTEGER **sno); + +int CMS_RecipientInfo_kari_orig_id_cmp(CMS_RecipientInfo *ri, X509 *cert); + +int CMS_RecipientEncryptedKey_get0_id(CMS_RecipientEncryptedKey *rek, + ASN1_OCTET_STRING **keyid, + ASN1_GENERALIZEDTIME **tm, + CMS_OtherKeyAttribute **other, + X509_NAME **issuer, ASN1_INTEGER **sno); +int CMS_RecipientEncryptedKey_cert_cmp(CMS_RecipientEncryptedKey *rek, + X509 *cert); +int CMS_RecipientInfo_kari_set0_pkey(CMS_RecipientInfo *ri, EVP_PKEY *pk); +int CMS_RecipientInfo_kari_set0_pkey_and_peer(CMS_RecipientInfo *ri, EVP_PKEY *pk, X509 *peer); +EVP_CIPHER_CTX *CMS_RecipientInfo_kari_get0_ctx(CMS_RecipientInfo *ri); +int CMS_RecipientInfo_kari_decrypt(CMS_ContentInfo *cms, + CMS_RecipientInfo *ri, + CMS_RecipientEncryptedKey *rek); + +int CMS_SharedInfo_encode(unsigned char **pder, X509_ALGOR *kekalg, + ASN1_OCTET_STRING *ukm, int keylen); + +/* Backward compatibility for spelling errors. */ +# define CMS_R_UNKNOWN_DIGEST_ALGORITM CMS_R_UNKNOWN_DIGEST_ALGORITHM +# define CMS_R_UNSUPPORTED_RECPIENTINFO_TYPE \ + CMS_R_UNSUPPORTED_RECIPIENTINFO_TYPE + +# ifdef __cplusplus +} +# endif +# endif +#endif diff --git a/contrib/openssl-cmake/common/include/openssl/comp.h b/contrib/openssl-cmake/common/include/openssl/comp.h new file mode 100644 index 000000000000..90e39511fe8d --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/comp.h @@ -0,0 +1,98 @@ +/* + * Copyright 2015-2024 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_COMP_H +# define OPENSSL_COMP_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_COMP_H +# endif + +# include + +# include +# include +# ifdef __cplusplus +extern "C" { +# endif + + + +# ifndef OPENSSL_NO_COMP + +COMP_CTX *COMP_CTX_new(COMP_METHOD *meth); +const COMP_METHOD *COMP_CTX_get_method(const COMP_CTX *ctx); +int COMP_CTX_get_type(const COMP_CTX* comp); +int COMP_get_type(const COMP_METHOD *meth); +const char *COMP_get_name(const COMP_METHOD *meth); +void COMP_CTX_free(COMP_CTX *ctx); + +int COMP_compress_block(COMP_CTX *ctx, unsigned char *out, int olen, + unsigned char *in, int ilen); +int COMP_expand_block(COMP_CTX *ctx, unsigned char *out, int olen, + unsigned char *in, int ilen); + +COMP_METHOD *COMP_zlib(void); +COMP_METHOD *COMP_zlib_oneshot(void); +COMP_METHOD *COMP_brotli(void); +COMP_METHOD *COMP_brotli_oneshot(void); +COMP_METHOD *COMP_zstd(void); +COMP_METHOD *COMP_zstd_oneshot(void); + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define COMP_zlib_cleanup() while(0) continue +# endif + +# ifdef OPENSSL_BIO_H +const BIO_METHOD *BIO_f_zlib(void); +const BIO_METHOD *BIO_f_brotli(void); +const BIO_METHOD *BIO_f_zstd(void); +# endif + +# endif + +typedef struct ssl_comp_st SSL_COMP; + +SKM_DEFINE_STACK_OF_INTERNAL(SSL_COMP, SSL_COMP, SSL_COMP) +#define sk_SSL_COMP_num(sk) OPENSSL_sk_num(ossl_check_const_SSL_COMP_sk_type(sk)) +#define sk_SSL_COMP_value(sk, idx) ((SSL_COMP *)OPENSSL_sk_value(ossl_check_const_SSL_COMP_sk_type(sk), (idx))) +#define sk_SSL_COMP_new(cmp) ((STACK_OF(SSL_COMP) *)OPENSSL_sk_new(ossl_check_SSL_COMP_compfunc_type(cmp))) +#define sk_SSL_COMP_new_null() ((STACK_OF(SSL_COMP) *)OPENSSL_sk_new_null()) +#define sk_SSL_COMP_new_reserve(cmp, n) ((STACK_OF(SSL_COMP) *)OPENSSL_sk_new_reserve(ossl_check_SSL_COMP_compfunc_type(cmp), (n))) +#define sk_SSL_COMP_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_SSL_COMP_sk_type(sk), (n)) +#define sk_SSL_COMP_free(sk) OPENSSL_sk_free(ossl_check_SSL_COMP_sk_type(sk)) +#define sk_SSL_COMP_zero(sk) OPENSSL_sk_zero(ossl_check_SSL_COMP_sk_type(sk)) +#define sk_SSL_COMP_delete(sk, i) ((SSL_COMP *)OPENSSL_sk_delete(ossl_check_SSL_COMP_sk_type(sk), (i))) +#define sk_SSL_COMP_delete_ptr(sk, ptr) ((SSL_COMP *)OPENSSL_sk_delete_ptr(ossl_check_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_type(ptr))) +#define sk_SSL_COMP_push(sk, ptr) OPENSSL_sk_push(ossl_check_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_type(ptr)) +#define sk_SSL_COMP_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_type(ptr)) +#define sk_SSL_COMP_pop(sk) ((SSL_COMP *)OPENSSL_sk_pop(ossl_check_SSL_COMP_sk_type(sk))) +#define sk_SSL_COMP_shift(sk) ((SSL_COMP *)OPENSSL_sk_shift(ossl_check_SSL_COMP_sk_type(sk))) +#define sk_SSL_COMP_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_SSL_COMP_sk_type(sk),ossl_check_SSL_COMP_freefunc_type(freefunc)) +#define sk_SSL_COMP_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_type(ptr), (idx)) +#define sk_SSL_COMP_set(sk, idx, ptr) ((SSL_COMP *)OPENSSL_sk_set(ossl_check_SSL_COMP_sk_type(sk), (idx), ossl_check_SSL_COMP_type(ptr))) +#define sk_SSL_COMP_find(sk, ptr) OPENSSL_sk_find(ossl_check_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_type(ptr)) +#define sk_SSL_COMP_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_type(ptr)) +#define sk_SSL_COMP_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_type(ptr), pnum) +#define sk_SSL_COMP_sort(sk) OPENSSL_sk_sort(ossl_check_SSL_COMP_sk_type(sk)) +#define sk_SSL_COMP_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_SSL_COMP_sk_type(sk)) +#define sk_SSL_COMP_dup(sk) ((STACK_OF(SSL_COMP) *)OPENSSL_sk_dup(ossl_check_const_SSL_COMP_sk_type(sk))) +#define sk_SSL_COMP_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(SSL_COMP) *)OPENSSL_sk_deep_copy(ossl_check_const_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_copyfunc_type(copyfunc), ossl_check_SSL_COMP_freefunc_type(freefunc))) +#define sk_SSL_COMP_set_cmp_func(sk, cmp) ((sk_SSL_COMP_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_SSL_COMP_sk_type(sk), ossl_check_SSL_COMP_compfunc_type(cmp))) + + + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/common/include/openssl/conf.h b/contrib/openssl-cmake/common/include/openssl/conf.h new file mode 100644 index 000000000000..38576290bf64 --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/conf.h @@ -0,0 +1,214 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/conf.h.in + * + * Copyright 1995-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_CONF_H +# define OPENSSL_CONF_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_CONF_H +# endif + +# include +# include +# include +# include +# include +# include +# ifndef OPENSSL_NO_STDIO +# include +# endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + char *section; + char *name; + char *value; +} CONF_VALUE; + +SKM_DEFINE_STACK_OF_INTERNAL(CONF_VALUE, CONF_VALUE, CONF_VALUE) +#define sk_CONF_VALUE_num(sk) OPENSSL_sk_num(ossl_check_const_CONF_VALUE_sk_type(sk)) +#define sk_CONF_VALUE_value(sk, idx) ((CONF_VALUE *)OPENSSL_sk_value(ossl_check_const_CONF_VALUE_sk_type(sk), (idx))) +#define sk_CONF_VALUE_new(cmp) ((STACK_OF(CONF_VALUE) *)OPENSSL_sk_new(ossl_check_CONF_VALUE_compfunc_type(cmp))) +#define sk_CONF_VALUE_new_null() ((STACK_OF(CONF_VALUE) *)OPENSSL_sk_new_null()) +#define sk_CONF_VALUE_new_reserve(cmp, n) ((STACK_OF(CONF_VALUE) *)OPENSSL_sk_new_reserve(ossl_check_CONF_VALUE_compfunc_type(cmp), (n))) +#define sk_CONF_VALUE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_CONF_VALUE_sk_type(sk), (n)) +#define sk_CONF_VALUE_free(sk) OPENSSL_sk_free(ossl_check_CONF_VALUE_sk_type(sk)) +#define sk_CONF_VALUE_zero(sk) OPENSSL_sk_zero(ossl_check_CONF_VALUE_sk_type(sk)) +#define sk_CONF_VALUE_delete(sk, i) ((CONF_VALUE *)OPENSSL_sk_delete(ossl_check_CONF_VALUE_sk_type(sk), (i))) +#define sk_CONF_VALUE_delete_ptr(sk, ptr) ((CONF_VALUE *)OPENSSL_sk_delete_ptr(ossl_check_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_type(ptr))) +#define sk_CONF_VALUE_push(sk, ptr) OPENSSL_sk_push(ossl_check_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_type(ptr)) +#define sk_CONF_VALUE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_type(ptr)) +#define sk_CONF_VALUE_pop(sk) ((CONF_VALUE *)OPENSSL_sk_pop(ossl_check_CONF_VALUE_sk_type(sk))) +#define sk_CONF_VALUE_shift(sk) ((CONF_VALUE *)OPENSSL_sk_shift(ossl_check_CONF_VALUE_sk_type(sk))) +#define sk_CONF_VALUE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_CONF_VALUE_sk_type(sk),ossl_check_CONF_VALUE_freefunc_type(freefunc)) +#define sk_CONF_VALUE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_type(ptr), (idx)) +#define sk_CONF_VALUE_set(sk, idx, ptr) ((CONF_VALUE *)OPENSSL_sk_set(ossl_check_CONF_VALUE_sk_type(sk), (idx), ossl_check_CONF_VALUE_type(ptr))) +#define sk_CONF_VALUE_find(sk, ptr) OPENSSL_sk_find(ossl_check_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_type(ptr)) +#define sk_CONF_VALUE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_type(ptr)) +#define sk_CONF_VALUE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_type(ptr), pnum) +#define sk_CONF_VALUE_sort(sk) OPENSSL_sk_sort(ossl_check_CONF_VALUE_sk_type(sk)) +#define sk_CONF_VALUE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_CONF_VALUE_sk_type(sk)) +#define sk_CONF_VALUE_dup(sk) ((STACK_OF(CONF_VALUE) *)OPENSSL_sk_dup(ossl_check_const_CONF_VALUE_sk_type(sk))) +#define sk_CONF_VALUE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(CONF_VALUE) *)OPENSSL_sk_deep_copy(ossl_check_const_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_copyfunc_type(copyfunc), ossl_check_CONF_VALUE_freefunc_type(freefunc))) +#define sk_CONF_VALUE_set_cmp_func(sk, cmp) ((sk_CONF_VALUE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_CONF_VALUE_sk_type(sk), ossl_check_CONF_VALUE_compfunc_type(cmp))) +DEFINE_LHASH_OF_INTERNAL(CONF_VALUE); +#define lh_CONF_VALUE_new(hfn, cmp) ((LHASH_OF(CONF_VALUE) *)OPENSSL_LH_set_thunks(OPENSSL_LH_new(ossl_check_CONF_VALUE_lh_hashfunc_type(hfn), ossl_check_CONF_VALUE_lh_compfunc_type(cmp)), lh_CONF_VALUE_hash_thunk, lh_CONF_VALUE_comp_thunk, lh_CONF_VALUE_doall_thunk, lh_CONF_VALUE_doall_arg_thunk)) +#define lh_CONF_VALUE_free(lh) OPENSSL_LH_free(ossl_check_CONF_VALUE_lh_type(lh)) +#define lh_CONF_VALUE_flush(lh) OPENSSL_LH_flush(ossl_check_CONF_VALUE_lh_type(lh)) +#define lh_CONF_VALUE_insert(lh, ptr) ((CONF_VALUE *)OPENSSL_LH_insert(ossl_check_CONF_VALUE_lh_type(lh), ossl_check_CONF_VALUE_lh_plain_type(ptr))) +#define lh_CONF_VALUE_delete(lh, ptr) ((CONF_VALUE *)OPENSSL_LH_delete(ossl_check_CONF_VALUE_lh_type(lh), ossl_check_const_CONF_VALUE_lh_plain_type(ptr))) +#define lh_CONF_VALUE_retrieve(lh, ptr) ((CONF_VALUE *)OPENSSL_LH_retrieve(ossl_check_CONF_VALUE_lh_type(lh), ossl_check_const_CONF_VALUE_lh_plain_type(ptr))) +#define lh_CONF_VALUE_error(lh) OPENSSL_LH_error(ossl_check_CONF_VALUE_lh_type(lh)) +#define lh_CONF_VALUE_num_items(lh) OPENSSL_LH_num_items(ossl_check_CONF_VALUE_lh_type(lh)) +#define lh_CONF_VALUE_node_stats_bio(lh, out) OPENSSL_LH_node_stats_bio(ossl_check_const_CONF_VALUE_lh_type(lh), out) +#define lh_CONF_VALUE_node_usage_stats_bio(lh, out) OPENSSL_LH_node_usage_stats_bio(ossl_check_const_CONF_VALUE_lh_type(lh), out) +#define lh_CONF_VALUE_stats_bio(lh, out) OPENSSL_LH_stats_bio(ossl_check_const_CONF_VALUE_lh_type(lh), out) +#define lh_CONF_VALUE_get_down_load(lh) OPENSSL_LH_get_down_load(ossl_check_CONF_VALUE_lh_type(lh)) +#define lh_CONF_VALUE_set_down_load(lh, dl) OPENSSL_LH_set_down_load(ossl_check_CONF_VALUE_lh_type(lh), dl) +#define lh_CONF_VALUE_doall(lh, dfn) OPENSSL_LH_doall(ossl_check_CONF_VALUE_lh_type(lh), ossl_check_CONF_VALUE_lh_doallfunc_type(dfn)) + + +struct conf_st; +struct conf_method_st; +typedef struct conf_method_st CONF_METHOD; + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# include +# endif + +/* Module definitions */ +typedef struct conf_imodule_st CONF_IMODULE; +typedef struct conf_module_st CONF_MODULE; + +STACK_OF(CONF_MODULE); +STACK_OF(CONF_IMODULE); + +/* DSO module function typedefs */ +typedef int conf_init_func (CONF_IMODULE *md, const CONF *cnf); +typedef void conf_finish_func (CONF_IMODULE *md); + +# define CONF_MFLAGS_IGNORE_ERRORS 0x1 +# define CONF_MFLAGS_IGNORE_RETURN_CODES 0x2 +# define CONF_MFLAGS_SILENT 0x4 +# define CONF_MFLAGS_NO_DSO 0x8 +# define CONF_MFLAGS_IGNORE_MISSING_FILE 0x10 +# define CONF_MFLAGS_DEFAULT_SECTION 0x20 + +int CONF_set_default_method(CONF_METHOD *meth); +void CONF_set_nconf(CONF *conf, LHASH_OF(CONF_VALUE) *hash); +LHASH_OF(CONF_VALUE) *CONF_load(LHASH_OF(CONF_VALUE) *conf, const char *file, + long *eline); +# ifndef OPENSSL_NO_STDIO +LHASH_OF(CONF_VALUE) *CONF_load_fp(LHASH_OF(CONF_VALUE) *conf, FILE *fp, + long *eline); +# endif +LHASH_OF(CONF_VALUE) *CONF_load_bio(LHASH_OF(CONF_VALUE) *conf, BIO *bp, + long *eline); +STACK_OF(CONF_VALUE) *CONF_get_section(LHASH_OF(CONF_VALUE) *conf, + const char *section); +char *CONF_get_string(LHASH_OF(CONF_VALUE) *conf, const char *group, + const char *name); +long CONF_get_number(LHASH_OF(CONF_VALUE) *conf, const char *group, + const char *name); +void CONF_free(LHASH_OF(CONF_VALUE) *conf); +#ifndef OPENSSL_NO_STDIO +int CONF_dump_fp(LHASH_OF(CONF_VALUE) *conf, FILE *out); +#endif +int CONF_dump_bio(LHASH_OF(CONF_VALUE) *conf, BIO *out); +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 void OPENSSL_config(const char *config_name); +#endif + +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define OPENSSL_no_config() \ + OPENSSL_init_crypto(OPENSSL_INIT_NO_LOAD_CONFIG, NULL) +#endif + +/* + * New conf code. The semantics are different from the functions above. If + * that wasn't the case, the above functions would have been replaced + */ + +CONF *NCONF_new_ex(OSSL_LIB_CTX *libctx, CONF_METHOD *meth); +OSSL_LIB_CTX *NCONF_get0_libctx(const CONF *conf); +CONF *NCONF_new(CONF_METHOD *meth); +CONF_METHOD *NCONF_default(void); +#ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 CONF_METHOD *NCONF_WIN32(void); +#endif +void NCONF_free(CONF *conf); +void NCONF_free_data(CONF *conf); + +int NCONF_load(CONF *conf, const char *file, long *eline); +# ifndef OPENSSL_NO_STDIO +int NCONF_load_fp(CONF *conf, FILE *fp, long *eline); +# endif +int NCONF_load_bio(CONF *conf, BIO *bp, long *eline); +STACK_OF(OPENSSL_CSTRING) *NCONF_get_section_names(const CONF *conf); +STACK_OF(CONF_VALUE) *NCONF_get_section(const CONF *conf, + const char *section); +char *NCONF_get_string(const CONF *conf, const char *group, const char *name); +int NCONF_get_number_e(const CONF *conf, const char *group, const char *name, + long *result); +#ifndef OPENSSL_NO_STDIO +int NCONF_dump_fp(const CONF *conf, FILE *out); +#endif +int NCONF_dump_bio(const CONF *conf, BIO *out); + +#define NCONF_get_number(c,g,n,r) NCONF_get_number_e(c,g,n,r) + +/* Module functions */ + +int CONF_modules_load(const CONF *cnf, const char *appname, + unsigned long flags); +int CONF_modules_load_file_ex(OSSL_LIB_CTX *libctx, const char *filename, + const char *appname, unsigned long flags); +int CONF_modules_load_file(const char *filename, const char *appname, + unsigned long flags); +void CONF_modules_unload(int all); +void CONF_modules_finish(void); +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define CONF_modules_free() while(0) continue +#endif +int CONF_module_add(const char *name, conf_init_func *ifunc, + conf_finish_func *ffunc); + +const char *CONF_imodule_get_name(const CONF_IMODULE *md); +const char *CONF_imodule_get_value(const CONF_IMODULE *md); +void *CONF_imodule_get_usr_data(const CONF_IMODULE *md); +void CONF_imodule_set_usr_data(CONF_IMODULE *md, void *usr_data); +CONF_MODULE *CONF_imodule_get_module(const CONF_IMODULE *md); +unsigned long CONF_imodule_get_flags(const CONF_IMODULE *md); +void CONF_imodule_set_flags(CONF_IMODULE *md, unsigned long flags); +void *CONF_module_get_usr_data(CONF_MODULE *pmod); +void CONF_module_set_usr_data(CONF_MODULE *pmod, void *usr_data); + +char *CONF_get1_default_config_file(void); + +int CONF_parse_list(const char *list, int sep, int nospc, + int (*list_cb) (const char *elem, int len, void *usr), + void *arg); + +void OPENSSL_load_builtin_modules(void); + + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/common/include/openssl/configuration.h b/contrib/openssl-cmake/common/include/openssl/configuration.h new file mode 100644 index 000000000000..30076cd6b66f --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/configuration.h @@ -0,0 +1,185 @@ +/* + * WARNING: do not edit! + * Generated by configdata.pm from Configurations/common0.tmpl, Configurations/unix-Makefile.tmpl + * via Makefile.in + * + * Copyright 2016-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#ifndef OPENSSL_CONFIGURATION_H +# define OPENSSL_CONFIGURATION_H +# pragma once + +# ifdef __cplusplus +extern "C" { +# endif + +# ifdef OPENSSL_ALGORITHM_DEFINES +# error OPENSSL_ALGORITHM_DEFINES no longer supported +# endif + +/* + * OpenSSL was configured with the following options: + */ + +# define OPENSSL_CONFIGURED_API 30500 +# ifndef OPENSSL_RAND_SEED_OS +# define OPENSSL_RAND_SEED_OS +# endif +# ifndef OPENSSL_THREADS +# define OPENSSL_THREADS +# endif +# ifndef OPENSSL_NO_ACVP_TESTS +# define OPENSSL_NO_ACVP_TESTS +# endif +# ifndef OPENSSL_NO_ASAN +# define OPENSSL_NO_ASAN +# endif +# ifndef OPENSSL_NO_BROTLI +# define OPENSSL_NO_BROTLI +# endif +# ifndef OPENSSL_NO_BROTLI_DYNAMIC +# define OPENSSL_NO_BROTLI_DYNAMIC +# endif +# ifndef OPENSSL_NO_CRYPTO_MDEBUG +# define OPENSSL_NO_CRYPTO_MDEBUG +# endif +# ifndef OPENSSL_NO_CRYPTO_MDEBUG_BACKTRACE +# define OPENSSL_NO_CRYPTO_MDEBUG_BACKTRACE +# endif +# ifndef OPENSSL_NO_DEMOS +# define OPENSSL_NO_DEMOS +# endif +# ifndef OPENSSL_NO_DEVCRYPTOENG +# define OPENSSL_NO_DEVCRYPTOENG +# endif +# ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 +# define OPENSSL_NO_EC_NISTP_64_GCC_128 +# endif +# ifndef OPENSSL_NO_EGD +# define OPENSSL_NO_EGD +# endif +# ifndef OPENSSL_NO_EXTERNAL_TESTS +# define OPENSSL_NO_EXTERNAL_TESTS +# endif +# ifndef OPENSSL_NO_FIPS_JITTER +# define OPENSSL_NO_FIPS_JITTER +# endif +# ifndef OPENSSL_NO_FIPS_POST +# define OPENSSL_NO_FIPS_POST +# endif +# ifndef OPENSSL_NO_FIPS_SECURITYCHECKS +# define OPENSSL_NO_FIPS_SECURITYCHECKS +# endif +# ifndef OPENSSL_NO_FUZZ_AFL +# define OPENSSL_NO_FUZZ_AFL +# endif +# ifndef OPENSSL_NO_FUZZ_LIBFUZZER +# define OPENSSL_NO_FUZZ_LIBFUZZER +# endif +# ifndef OPENSSL_NO_H3DEMO +# define OPENSSL_NO_H3DEMO +# endif +# ifndef OPENSSL_NO_HQINTEROP +# define OPENSSL_NO_HQINTEROP +# endif +# ifndef OPENSSL_NO_JITTER +# define OPENSSL_NO_JITTER +# endif +# ifndef OPENSSL_NO_KTLS +# define OPENSSL_NO_KTLS +# endif +# ifndef OPENSSL_NO_MD2 +# define OPENSSL_NO_MD2 +# endif +# ifndef OPENSSL_NO_MSAN +# define OPENSSL_NO_MSAN +# endif +# ifndef OPENSSL_NO_PIE +# define OPENSSL_NO_PIE +# endif +# ifndef OPENSSL_NO_RC5 +# define OPENSSL_NO_RC5 +# endif +# ifndef OPENSSL_NO_SCTP +# define OPENSSL_NO_SCTP +# endif +# ifndef OPENSSL_NO_SSL3 +# define OPENSSL_NO_SSL3 +# endif +# ifndef OPENSSL_NO_SSL3_METHOD +# define OPENSSL_NO_SSL3_METHOD +# endif +# ifndef OPENSSL_NO_SSLKEYLOG +# define OPENSSL_NO_SSLKEYLOG +# endif +# ifndef OPENSSL_NO_TFO +# define OPENSSL_NO_TFO +# endif +# ifndef OPENSSL_NO_TRACE +# define OPENSSL_NO_TRACE +# endif +# ifndef OPENSSL_NO_UBSAN +# define OPENSSL_NO_UBSAN +# endif +# ifndef OPENSSL_NO_UNIT_TEST +# define OPENSSL_NO_UNIT_TEST +# endif +# ifndef OPENSSL_NO_UPLINK +# define OPENSSL_NO_UPLINK +# endif +# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS +# define OPENSSL_NO_WEAK_SSL_CIPHERS +# endif +# ifndef OPENSSL_NO_WINSTORE +# define OPENSSL_NO_WINSTORE +# endif +# ifndef OPENSSL_NO_ZLIB +# define OPENSSL_NO_ZLIB +# endif +# ifndef OPENSSL_NO_ZLIB_DYNAMIC +# define OPENSSL_NO_ZLIB_DYNAMIC +# endif +# ifndef OPENSSL_NO_ZSTD +# define OPENSSL_NO_ZSTD +# endif +# ifndef OPENSSL_NO_ZSTD_DYNAMIC +# define OPENSSL_NO_ZSTD_DYNAMIC +# endif +# ifndef OPENSSL_NO_STATIC_ENGINE +# define OPENSSL_NO_STATIC_ENGINE +# endif + + +/* Generate 80386 code? */ +# undef I386_ONLY + +/* + * The following are cipher-specific, but are part of the public API. + */ +# if !defined(OPENSSL_SYS_UEFI) +# undef BN_LLONG +/* Only one for the following should be defined */ +# define SIXTY_FOUR_BIT_LONG +# undef SIXTY_FOUR_BIT +# undef THIRTY_TWO_BIT +# endif + +# define RC4_INT unsigned int + +# if defined(OPENSSL_NO_COMP) || (defined(OPENSSL_NO_BROTLI) && defined(OPENSSL_NO_ZSTD) && defined(OPENSSL_NO_ZLIB)) +# define OPENSSL_NO_COMP_ALG +# else +# undef OPENSSL_NO_COMP_ALG +# endif + +# ifdef __cplusplus +} +# endif + +#endif /* OPENSSL_CONFIGURATION_H */ diff --git a/contrib/openssl-cmake/common/include/openssl/core_names.h b/contrib/openssl-cmake/common/include/openssl/core_names.h new file mode 100644 index 000000000000..e93e79a52bc9 --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/core_names.h @@ -0,0 +1,575 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/core_names.h.in + * + * Copyright 2019-2025 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + +#ifndef OPENSSL_CORE_NAMES_H +# define OPENSSL_CORE_NAMES_H +# pragma once + +# ifdef __cplusplus +extern "C" { +# endif + +/* OSSL_CIPHER_PARAM_CTS_MODE Values */ +# define OSSL_CIPHER_CTS_MODE_CS1 "CS1" +# define OSSL_CIPHER_CTS_MODE_CS2 "CS2" +# define OSSL_CIPHER_CTS_MODE_CS3 "CS3" + +/* Known CIPHER names (not a complete list) */ +# define OSSL_CIPHER_NAME_AES_128_GCM_SIV "AES-128-GCM-SIV" +# define OSSL_CIPHER_NAME_AES_192_GCM_SIV "AES-192-GCM-SIV" +# define OSSL_CIPHER_NAME_AES_256_GCM_SIV "AES-256-GCM-SIV" + +/* Known DIGEST names (not a complete list) */ +# define OSSL_DIGEST_NAME_MD5 "MD5" +# define OSSL_DIGEST_NAME_MD5_SHA1 "MD5-SHA1" +# define OSSL_DIGEST_NAME_SHA1 "SHA1" +# define OSSL_DIGEST_NAME_SHA2_224 "SHA2-224" +# define OSSL_DIGEST_NAME_SHA2_256 "SHA2-256" +# define OSSL_DIGEST_NAME_SHA2_256_192 "SHA2-256/192" +# define OSSL_DIGEST_NAME_SHA2_384 "SHA2-384" +# define OSSL_DIGEST_NAME_SHA2_512 "SHA2-512" +# define OSSL_DIGEST_NAME_SHA2_512_224 "SHA2-512/224" +# define OSSL_DIGEST_NAME_SHA2_512_256 "SHA2-512/256" +# define OSSL_DIGEST_NAME_MD2 "MD2" +# define OSSL_DIGEST_NAME_MD4 "MD4" +# define OSSL_DIGEST_NAME_MDC2 "MDC2" +# define OSSL_DIGEST_NAME_RIPEMD160 "RIPEMD160" +# define OSSL_DIGEST_NAME_SHA3_224 "SHA3-224" +# define OSSL_DIGEST_NAME_SHA3_256 "SHA3-256" +# define OSSL_DIGEST_NAME_SHA3_384 "SHA3-384" +# define OSSL_DIGEST_NAME_SHA3_512 "SHA3-512" +# define OSSL_DIGEST_NAME_KECCAK_KMAC128 "KECCAK-KMAC-128" +# define OSSL_DIGEST_NAME_KECCAK_KMAC256 "KECCAK-KMAC-256" +# define OSSL_DIGEST_NAME_SM3 "SM3" + +/* Known MAC names */ +# define OSSL_MAC_NAME_BLAKE2BMAC "BLAKE2BMAC" +# define OSSL_MAC_NAME_BLAKE2SMAC "BLAKE2SMAC" +# define OSSL_MAC_NAME_CMAC "CMAC" +# define OSSL_MAC_NAME_GMAC "GMAC" +# define OSSL_MAC_NAME_HMAC "HMAC" +# define OSSL_MAC_NAME_KMAC128 "KMAC128" +# define OSSL_MAC_NAME_KMAC256 "KMAC256" +# define OSSL_MAC_NAME_POLY1305 "POLY1305" +# define OSSL_MAC_NAME_SIPHASH "SIPHASH" + +/* Known KDF names */ +# define OSSL_KDF_NAME_HKDF "HKDF" +# define OSSL_KDF_NAME_TLS1_3_KDF "TLS13-KDF" +# define OSSL_KDF_NAME_PBKDF1 "PBKDF1" +# define OSSL_KDF_NAME_PBKDF2 "PBKDF2" +# define OSSL_KDF_NAME_SCRYPT "SCRYPT" +# define OSSL_KDF_NAME_SSHKDF "SSHKDF" +# define OSSL_KDF_NAME_SSKDF "SSKDF" +# define OSSL_KDF_NAME_TLS1_PRF "TLS1-PRF" +# define OSSL_KDF_NAME_X942KDF_ASN1 "X942KDF-ASN1" +# define OSSL_KDF_NAME_X942KDF_CONCAT "X942KDF-CONCAT" +# define OSSL_KDF_NAME_X963KDF "X963KDF" +# define OSSL_KDF_NAME_KBKDF "KBKDF" +# define OSSL_KDF_NAME_KRB5KDF "KRB5KDF" +# define OSSL_KDF_NAME_HMACDRBGKDF "HMAC-DRBG-KDF" + +/* RSA padding modes */ +# define OSSL_PKEY_RSA_PAD_MODE_NONE "none" +# define OSSL_PKEY_RSA_PAD_MODE_PKCSV15 "pkcs1" +# define OSSL_PKEY_RSA_PAD_MODE_OAEP "oaep" +# define OSSL_PKEY_RSA_PAD_MODE_X931 "x931" +# define OSSL_PKEY_RSA_PAD_MODE_PSS "pss" + +/* RSA pss padding salt length */ +# define OSSL_PKEY_RSA_PSS_SALT_LEN_DIGEST "digest" +# define OSSL_PKEY_RSA_PSS_SALT_LEN_MAX "max" +# define OSSL_PKEY_RSA_PSS_SALT_LEN_AUTO "auto" +# define OSSL_PKEY_RSA_PSS_SALT_LEN_AUTO_DIGEST_MAX "auto-digestmax" + +/* OSSL_PKEY_PARAM_EC_ENCODING values */ +# define OSSL_PKEY_EC_ENCODING_EXPLICIT "explicit" +# define OSSL_PKEY_EC_ENCODING_GROUP "named_curve" + +# define OSSL_PKEY_EC_POINT_CONVERSION_FORMAT_UNCOMPRESSED "uncompressed" +# define OSSL_PKEY_EC_POINT_CONVERSION_FORMAT_COMPRESSED "compressed" +# define OSSL_PKEY_EC_POINT_CONVERSION_FORMAT_HYBRID "hybrid" + +# define OSSL_PKEY_EC_GROUP_CHECK_DEFAULT "default" +# define OSSL_PKEY_EC_GROUP_CHECK_NAMED "named" +# define OSSL_PKEY_EC_GROUP_CHECK_NAMED_NIST "named-nist" + +/* PROV_SKEY well known key types */ +# define OSSL_SKEY_TYPE_GENERIC "GENERIC-SECRET" +# define OSSL_SKEY_TYPE_AES "AES" + +/* OSSL_KEM_PARAM_OPERATION values */ +#define OSSL_KEM_PARAM_OPERATION_RSASVE "RSASVE" +#define OSSL_KEM_PARAM_OPERATION_DHKEM "DHKEM" + +/* Provider configuration variables */ +#define OSSL_PKEY_RETAIN_SEED "pkey_retain_seed" + +/* Parameter name definitions - generated by util/perl/OpenSSL/paramnames.pm */ +# define OSSL_ALG_PARAM_ALGORITHM_ID "algorithm-id" +# define OSSL_ALG_PARAM_ALGORITHM_ID_PARAMS "algorithm-id-params" +# define OSSL_ALG_PARAM_CIPHER "cipher" +# define OSSL_ALG_PARAM_DIGEST "digest" +# define OSSL_ALG_PARAM_ENGINE "engine" +# define OSSL_ALG_PARAM_FIPS_APPROVED_INDICATOR "fips-indicator" +# define OSSL_ALG_PARAM_MAC "mac" +# define OSSL_ALG_PARAM_PROPERTIES "properties" +# define OSSL_ASYM_CIPHER_PARAM_DIGEST OSSL_PKEY_PARAM_DIGEST +# define OSSL_ASYM_CIPHER_PARAM_ENGINE OSSL_PKEY_PARAM_ENGINE +# define OSSL_ASYM_CIPHER_PARAM_FIPS_APPROVED_INDICATOR OSSL_ALG_PARAM_FIPS_APPROVED_INDICATOR +# define OSSL_ASYM_CIPHER_PARAM_FIPS_KEY_CHECK OSSL_PKEY_PARAM_FIPS_KEY_CHECK +# define OSSL_ASYM_CIPHER_PARAM_FIPS_RSA_PKCS15_PAD_DISABLED OSSL_PROV_PARAM_RSA_PKCS15_PAD_DISABLED +# define OSSL_ASYM_CIPHER_PARAM_IMPLICIT_REJECTION "implicit-rejection" +# define OSSL_ASYM_CIPHER_PARAM_MGF1_DIGEST OSSL_PKEY_PARAM_MGF1_DIGEST +# define OSSL_ASYM_CIPHER_PARAM_MGF1_DIGEST_PROPS OSSL_PKEY_PARAM_MGF1_PROPERTIES +# define OSSL_ASYM_CIPHER_PARAM_OAEP_DIGEST OSSL_ALG_PARAM_DIGEST +# define OSSL_ASYM_CIPHER_PARAM_OAEP_DIGEST_PROPS "digest-props" +# define OSSL_ASYM_CIPHER_PARAM_OAEP_LABEL "oaep-label" +# define OSSL_ASYM_CIPHER_PARAM_PAD_MODE OSSL_PKEY_PARAM_PAD_MODE +# define OSSL_ASYM_CIPHER_PARAM_PROPERTIES OSSL_PKEY_PARAM_PROPERTIES +# define OSSL_ASYM_CIPHER_PARAM_TLS_CLIENT_VERSION "tls-client-version" +# define OSSL_ASYM_CIPHER_PARAM_TLS_NEGOTIATED_VERSION "tls-negotiated-version" +# define OSSL_CAPABILITY_TLS_GROUP_ALG "tls-group-alg" +# define OSSL_CAPABILITY_TLS_GROUP_ID "tls-group-id" +# define OSSL_CAPABILITY_TLS_GROUP_IS_KEM "tls-group-is-kem" +# define OSSL_CAPABILITY_TLS_GROUP_MAX_DTLS "tls-max-dtls" +# define OSSL_CAPABILITY_TLS_GROUP_MAX_TLS "tls-max-tls" +# define OSSL_CAPABILITY_TLS_GROUP_MIN_DTLS "tls-min-dtls" +# define OSSL_CAPABILITY_TLS_GROUP_MIN_TLS "tls-min-tls" +# define OSSL_CAPABILITY_TLS_GROUP_NAME "tls-group-name" +# define OSSL_CAPABILITY_TLS_GROUP_NAME_INTERNAL "tls-group-name-internal" +# define OSSL_CAPABILITY_TLS_GROUP_SECURITY_BITS "tls-group-sec-bits" +# define OSSL_CAPABILITY_TLS_SIGALG_CODE_POINT "tls-sigalg-code-point" +# define OSSL_CAPABILITY_TLS_SIGALG_HASH_NAME "tls-sigalg-hash-name" +# define OSSL_CAPABILITY_TLS_SIGALG_HASH_OID "tls-sigalg-hash-oid" +# define OSSL_CAPABILITY_TLS_SIGALG_IANA_NAME "tls-sigalg-iana-name" +# define OSSL_CAPABILITY_TLS_SIGALG_KEYTYPE "tls-sigalg-keytype" +# define OSSL_CAPABILITY_TLS_SIGALG_KEYTYPE_OID "tls-sigalg-keytype-oid" +# define OSSL_CAPABILITY_TLS_SIGALG_MAX_DTLS "tls-max-dtls" +# define OSSL_CAPABILITY_TLS_SIGALG_MAX_TLS "tls-max-tls" +# define OSSL_CAPABILITY_TLS_SIGALG_MIN_DTLS "tls-min-dtls" +# define OSSL_CAPABILITY_TLS_SIGALG_MIN_TLS "tls-min-tls" +# define OSSL_CAPABILITY_TLS_SIGALG_NAME "tls-sigalg-name" +# define OSSL_CAPABILITY_TLS_SIGALG_OID "tls-sigalg-oid" +# define OSSL_CAPABILITY_TLS_SIGALG_SECURITY_BITS "tls-sigalg-sec-bits" +# define OSSL_CAPABILITY_TLS_SIGALG_SIG_NAME "tls-sigalg-sig-name" +# define OSSL_CAPABILITY_TLS_SIGALG_SIG_OID "tls-sigalg-sig-oid" +# define OSSL_CIPHER_PARAM_AEAD "aead" +# define OSSL_CIPHER_PARAM_AEAD_IVLEN OSSL_CIPHER_PARAM_IVLEN +# define OSSL_CIPHER_PARAM_AEAD_IV_GENERATED "iv-generated" +# define OSSL_CIPHER_PARAM_AEAD_MAC_KEY "mackey" +# define OSSL_CIPHER_PARAM_AEAD_TAG "tag" +# define OSSL_CIPHER_PARAM_AEAD_TAGLEN "taglen" +# define OSSL_CIPHER_PARAM_AEAD_TLS1_AAD "tlsaad" +# define OSSL_CIPHER_PARAM_AEAD_TLS1_AAD_PAD "tlsaadpad" +# define OSSL_CIPHER_PARAM_AEAD_TLS1_GET_IV_GEN "tlsivgen" +# define OSSL_CIPHER_PARAM_AEAD_TLS1_IV_FIXED "tlsivfixed" +# define OSSL_CIPHER_PARAM_AEAD_TLS1_SET_IV_INV "tlsivinv" +# define OSSL_CIPHER_PARAM_ALGORITHM_ID OSSL_ALG_PARAM_ALGORITHM_ID +# define OSSL_CIPHER_PARAM_ALGORITHM_ID_PARAMS OSSL_ALG_PARAM_ALGORITHM_ID_PARAMS +# define OSSL_CIPHER_PARAM_ALGORITHM_ID_PARAMS_OLD "alg_id_param" +# define OSSL_CIPHER_PARAM_BLOCK_SIZE "blocksize" +# define OSSL_CIPHER_PARAM_CTS "cts" +# define OSSL_CIPHER_PARAM_CTS_MODE "cts_mode" +# define OSSL_CIPHER_PARAM_CUSTOM_IV "custom-iv" +# define OSSL_CIPHER_PARAM_DECRYPT_ONLY "decrypt-only" +# define OSSL_CIPHER_PARAM_FIPS_APPROVED_INDICATOR OSSL_ALG_PARAM_FIPS_APPROVED_INDICATOR +# define OSSL_CIPHER_PARAM_FIPS_ENCRYPT_CHECK "encrypt-check" +# define OSSL_CIPHER_PARAM_HAS_RAND_KEY "has-randkey" +# define OSSL_CIPHER_PARAM_IV "iv" +# define OSSL_CIPHER_PARAM_IVLEN "ivlen" +# define OSSL_CIPHER_PARAM_KEYLEN "keylen" +# define OSSL_CIPHER_PARAM_MODE "mode" +# define OSSL_CIPHER_PARAM_NUM "num" +# define OSSL_CIPHER_PARAM_PADDING "padding" +# define OSSL_CIPHER_PARAM_PIPELINE_AEAD_TAG "pipeline-tag" +# define OSSL_CIPHER_PARAM_RANDOM_KEY "randkey" +# define OSSL_CIPHER_PARAM_RC2_KEYBITS "keybits" +# define OSSL_CIPHER_PARAM_ROUNDS "rounds" +# define OSSL_CIPHER_PARAM_SPEED "speed" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK "tls-multi" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD "tls1multi_aad" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD_PACKLEN "tls1multi_aadpacklen" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC "tls1multi_enc" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_IN "tls1multi_encin" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_LEN "tls1multi_enclen" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK_INTERLEAVE "tls1multi_interleave" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_BUFSIZE "tls1multi_maxbufsz" +# define OSSL_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_SEND_FRAGMENT "tls1multi_maxsndfrag" +# define OSSL_CIPHER_PARAM_TLS_MAC "tls-mac" +# define OSSL_CIPHER_PARAM_TLS_MAC_SIZE "tls-mac-size" +# define OSSL_CIPHER_PARAM_TLS_VERSION "tls-version" +# define OSSL_CIPHER_PARAM_UPDATED_IV "updated-iv" +# define OSSL_CIPHER_PARAM_USE_BITS "use-bits" +# define OSSL_CIPHER_PARAM_XTS_STANDARD "xts_standard" +# define OSSL_DECODER_PARAM_PROPERTIES OSSL_ALG_PARAM_PROPERTIES +# define OSSL_DIGEST_PARAM_ALGID_ABSENT "algid-absent" +# define OSSL_DIGEST_PARAM_BLOCK_SIZE "blocksize" +# define OSSL_DIGEST_PARAM_MICALG "micalg" +# define OSSL_DIGEST_PARAM_PAD_TYPE "pad-type" +# define OSSL_DIGEST_PARAM_SIZE "size" +# define OSSL_DIGEST_PARAM_SSL3_MS "ssl3-ms" +# define OSSL_DIGEST_PARAM_XOF "xof" +# define OSSL_DIGEST_PARAM_XOFLEN "xoflen" +# define OSSL_DRBG_PARAM_CIPHER OSSL_ALG_PARAM_CIPHER +# define OSSL_DRBG_PARAM_DIGEST OSSL_ALG_PARAM_DIGEST +# define OSSL_DRBG_PARAM_ENTROPY_REQUIRED "entropy_required" +# define OSSL_DRBG_PARAM_FIPS_APPROVED_INDICATOR OSSL_ALG_PARAM_FIPS_APPROVED_INDICATOR +# define OSSL_DRBG_PARAM_FIPS_DIGEST_CHECK OSSL_PKEY_PARAM_FIPS_DIGEST_CHECK +# define OSSL_DRBG_PARAM_MAC OSSL_ALG_PARAM_MAC +# define OSSL_DRBG_PARAM_MAX_ADINLEN "max_adinlen" +# define OSSL_DRBG_PARAM_MAX_ENTROPYLEN "max_entropylen" +# define OSSL_DRBG_PARAM_MAX_LENGTH "maxium_length" +# define OSSL_DRBG_PARAM_MAX_NONCELEN "max_noncelen" +# define OSSL_DRBG_PARAM_MAX_PERSLEN "max_perslen" +# define OSSL_DRBG_PARAM_MIN_ENTROPYLEN "min_entropylen" +# define OSSL_DRBG_PARAM_MIN_LENGTH "minium_length" +# define OSSL_DRBG_PARAM_MIN_NONCELEN "min_noncelen" +# define OSSL_DRBG_PARAM_PREDICTION_RESISTANCE "prediction_resistance" +# define OSSL_DRBG_PARAM_PROPERTIES OSSL_ALG_PARAM_PROPERTIES +# define OSSL_DRBG_PARAM_RANDOM_DATA "random_data" +# define OSSL_DRBG_PARAM_RESEED_COUNTER "reseed_counter" +# define OSSL_DRBG_PARAM_RESEED_REQUESTS "reseed_requests" +# define OSSL_DRBG_PARAM_RESEED_TIME "reseed_time" +# define OSSL_DRBG_PARAM_RESEED_TIME_INTERVAL "reseed_time_interval" +# define OSSL_DRBG_PARAM_SIZE "size" +# define OSSL_DRBG_PARAM_USE_DF "use_derivation_function" +# define OSSL_ENCODER_PARAM_CIPHER OSSL_ALG_PARAM_CIPHER +# define OSSL_ENCODER_PARAM_ENCRYPT_LEVEL "encrypt-level" +# define OSSL_ENCODER_PARAM_PROPERTIES OSSL_ALG_PARAM_PROPERTIES +# define OSSL_ENCODER_PARAM_SAVE_PARAMETERS "save-parameters" +# define OSSL_EXCHANGE_PARAM_EC_ECDH_COFACTOR_MODE "ecdh-cofactor-mode" +# define OSSL_EXCHANGE_PARAM_FIPS_APPROVED_INDICATOR OSSL_ALG_PARAM_FIPS_APPROVED_INDICATOR +# define OSSL_EXCHANGE_PARAM_FIPS_DIGEST_CHECK OSSL_PKEY_PARAM_FIPS_DIGEST_CHECK +# define OSSL_EXCHANGE_PARAM_FIPS_ECDH_COFACTOR_CHECK OSSL_PROV_PARAM_ECDH_COFACTOR_CHECK +# define OSSL_EXCHANGE_PARAM_FIPS_KEY_CHECK OSSL_PKEY_PARAM_FIPS_KEY_CHECK +# define OSSL_EXCHANGE_PARAM_KDF_DIGEST "kdf-digest" +# define OSSL_EXCHANGE_PARAM_KDF_DIGEST_PROPS "kdf-digest-props" +# define OSSL_EXCHANGE_PARAM_KDF_OUTLEN "kdf-outlen" +# define OSSL_EXCHANGE_PARAM_KDF_TYPE "kdf-type" +# define OSSL_EXCHANGE_PARAM_KDF_UKM "kdf-ukm" +# define OSSL_EXCHANGE_PARAM_PAD "pad" +# define OSSL_GEN_PARAM_ITERATION "iteration" +# define OSSL_GEN_PARAM_POTENTIAL "potential" +# define OSSL_KDF_PARAM_ARGON2_AD "ad" +# define OSSL_KDF_PARAM_ARGON2_LANES "lanes" +# define OSSL_KDF_PARAM_ARGON2_MEMCOST "memcost" +# define OSSL_KDF_PARAM_ARGON2_VERSION "version" +# define OSSL_KDF_PARAM_CEK_ALG "cekalg" +# define OSSL_KDF_PARAM_CIPHER OSSL_ALG_PARAM_CIPHER +# define OSSL_KDF_PARAM_CONSTANT "constant" +# define OSSL_KDF_PARAM_DATA "data" +# define OSSL_KDF_PARAM_DIGEST OSSL_ALG_PARAM_DIGEST +# define OSSL_KDF_PARAM_EARLY_CLEAN "early_clean" +# define OSSL_KDF_PARAM_FIPS_APPROVED_INDICATOR OSSL_ALG_PARAM_FIPS_APPROVED_INDICATOR +# define OSSL_KDF_PARAM_FIPS_DIGEST_CHECK OSSL_PKEY_PARAM_FIPS_DIGEST_CHECK +# define OSSL_KDF_PARAM_FIPS_EMS_CHECK "ems_check" +# define OSSL_KDF_PARAM_FIPS_KEY_CHECK OSSL_PKEY_PARAM_FIPS_KEY_CHECK +# define OSSL_KDF_PARAM_HMACDRBG_ENTROPY "entropy" +# define OSSL_KDF_PARAM_HMACDRBG_NONCE "nonce" +# define OSSL_KDF_PARAM_INFO "info" +# define OSSL_KDF_PARAM_ITER "iter" +# define OSSL_KDF_PARAM_KBKDF_R "r" +# define OSSL_KDF_PARAM_KBKDF_USE_L "use-l" +# define OSSL_KDF_PARAM_KBKDF_USE_SEPARATOR "use-separator" +# define OSSL_KDF_PARAM_KEY "key" +# define OSSL_KDF_PARAM_LABEL "label" +# define OSSL_KDF_PARAM_MAC OSSL_ALG_PARAM_MAC +# define OSSL_KDF_PARAM_MAC_SIZE "maclen" +# define OSSL_KDF_PARAM_MODE "mode" +# define OSSL_KDF_PARAM_PASSWORD "pass" +# define OSSL_KDF_PARAM_PKCS12_ID "id" +# define OSSL_KDF_PARAM_PKCS5 "pkcs5" +# define OSSL_KDF_PARAM_PREFIX "prefix" +# define OSSL_KDF_PARAM_PROPERTIES OSSL_ALG_PARAM_PROPERTIES +# define OSSL_KDF_PARAM_SALT "salt" +# define OSSL_KDF_PARAM_SCRYPT_MAXMEM "maxmem_bytes" +# define OSSL_KDF_PARAM_SCRYPT_N "n" +# define OSSL_KDF_PARAM_SCRYPT_P "p" +# define OSSL_KDF_PARAM_SCRYPT_R "r" +# define OSSL_KDF_PARAM_SECRET "secret" +# define OSSL_KDF_PARAM_SEED "seed" +# define OSSL_KDF_PARAM_SIZE "size" +# define OSSL_KDF_PARAM_SSHKDF_SESSION_ID "session_id" +# define OSSL_KDF_PARAM_SSHKDF_TYPE "type" +# define OSSL_KDF_PARAM_SSHKDF_XCGHASH "xcghash" +# define OSSL_KDF_PARAM_THREADS "threads" +# define OSSL_KDF_PARAM_UKM "ukm" +# define OSSL_KDF_PARAM_X942_ACVPINFO "acvp-info" +# define OSSL_KDF_PARAM_X942_PARTYUINFO "partyu-info" +# define OSSL_KDF_PARAM_X942_PARTYVINFO "partyv-info" +# define OSSL_KDF_PARAM_X942_SUPP_PRIVINFO "supp-privinfo" +# define OSSL_KDF_PARAM_X942_SUPP_PUBINFO "supp-pubinfo" +# define OSSL_KDF_PARAM_X942_USE_KEYBITS "use-keybits" +# define OSSL_KEM_PARAM_FIPS_APPROVED_INDICATOR OSSL_ALG_PARAM_FIPS_APPROVED_INDICATOR +# define OSSL_KEM_PARAM_FIPS_KEY_CHECK OSSL_PKEY_PARAM_FIPS_KEY_CHECK +# define OSSL_KEM_PARAM_IKME "ikme" +# define OSSL_KEM_PARAM_OPERATION "operation" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_BLOCK_PADDING "block_padding" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_HS_PADDING "hs_padding" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_MAX_EARLY_DATA "max_early_data" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_MAX_FRAG_LEN "max_frag_len" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_MODE "mode" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_OPTIONS "options" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_READ_AHEAD "read_ahead" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_STREAM_MAC "stream_mac" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_TLSTREE "tlstree" +# define OSSL_LIBSSL_RECORD_LAYER_PARAM_USE_ETM "use_etm" +# define OSSL_LIBSSL_RECORD_LAYER_READ_BUFFER_LEN "read_buffer_len" +# define OSSL_MAC_PARAM_BLOCK_SIZE "block-size" +# define OSSL_MAC_PARAM_CIPHER OSSL_ALG_PARAM_CIPHER +# define OSSL_MAC_PARAM_CUSTOM "custom" +# define OSSL_MAC_PARAM_C_ROUNDS "c-rounds" +# define OSSL_MAC_PARAM_DIGEST OSSL_ALG_PARAM_DIGEST +# define OSSL_MAC_PARAM_DIGEST_NOINIT "digest-noinit" +# define OSSL_MAC_PARAM_DIGEST_ONESHOT "digest-oneshot" +# define OSSL_MAC_PARAM_D_ROUNDS "d-rounds" +# define OSSL_MAC_PARAM_FIPS_APPROVED_INDICATOR OSSL_ALG_PARAM_FIPS_APPROVED_INDICATOR +# define OSSL_MAC_PARAM_FIPS_KEY_CHECK OSSL_PKEY_PARAM_FIPS_KEY_CHECK +# define OSSL_MAC_PARAM_FIPS_NO_SHORT_MAC OSSL_PROV_PARAM_NO_SHORT_MAC +# define OSSL_MAC_PARAM_IV "iv" +# define OSSL_MAC_PARAM_KEY "key" +# define OSSL_MAC_PARAM_PROPERTIES OSSL_ALG_PARAM_PROPERTIES +# define OSSL_MAC_PARAM_SALT "salt" +# define OSSL_MAC_PARAM_SIZE "size" +# define OSSL_MAC_PARAM_TLS_DATA_SIZE "tls-data-size" +# define OSSL_MAC_PARAM_XOF "xof" +# define OSSL_OBJECT_PARAM_DATA "data" +# define OSSL_OBJECT_PARAM_DATA_STRUCTURE "data-structure" +# define OSSL_OBJECT_PARAM_DATA_TYPE "data-type" +# define OSSL_OBJECT_PARAM_DESC "desc" +# define OSSL_OBJECT_PARAM_INPUT_TYPE "input-type" +# define OSSL_OBJECT_PARAM_REFERENCE "reference" +# define OSSL_OBJECT_PARAM_TYPE "type" +# define OSSL_PASSPHRASE_PARAM_INFO "info" +# define OSSL_PKEY_PARAM_ALGORITHM_ID OSSL_ALG_PARAM_ALGORITHM_ID +# define OSSL_PKEY_PARAM_ALGORITHM_ID_PARAMS OSSL_ALG_PARAM_ALGORITHM_ID_PARAMS +# define OSSL_PKEY_PARAM_BITS "bits" +# define OSSL_PKEY_PARAM_CIPHER OSSL_ALG_PARAM_CIPHER +# define OSSL_PKEY_PARAM_DEFAULT_DIGEST "default-digest" +# define OSSL_PKEY_PARAM_DHKEM_IKM "dhkem-ikm" +# define OSSL_PKEY_PARAM_DH_GENERATOR "safeprime-generator" +# define OSSL_PKEY_PARAM_DH_PRIV_LEN "priv_len" +# define OSSL_PKEY_PARAM_DIGEST OSSL_ALG_PARAM_DIGEST +# define OSSL_PKEY_PARAM_DIGEST_SIZE "digest-size" +# define OSSL_PKEY_PARAM_DIST_ID "distid" +# define OSSL_PKEY_PARAM_EC_A "a" +# define OSSL_PKEY_PARAM_EC_B "b" +# define OSSL_PKEY_PARAM_EC_CHAR2_M "m" +# define OSSL_PKEY_PARAM_EC_CHAR2_PP_K1 "k1" +# define OSSL_PKEY_PARAM_EC_CHAR2_PP_K2 "k2" +# define OSSL_PKEY_PARAM_EC_CHAR2_PP_K3 "k3" +# define OSSL_PKEY_PARAM_EC_CHAR2_TP_BASIS "tp" +# define OSSL_PKEY_PARAM_EC_CHAR2_TYPE "basis-type" +# define OSSL_PKEY_PARAM_EC_COFACTOR "cofactor" +# define OSSL_PKEY_PARAM_EC_DECODED_FROM_EXPLICIT_PARAMS "decoded-from-explicit" +# define OSSL_PKEY_PARAM_EC_ENCODING "encoding" +# define OSSL_PKEY_PARAM_EC_FIELD_TYPE "field-type" +# define OSSL_PKEY_PARAM_EC_GENERATOR "generator" +# define OSSL_PKEY_PARAM_EC_GROUP_CHECK_TYPE "group-check" +# define OSSL_PKEY_PARAM_EC_INCLUDE_PUBLIC "include-public" +# define OSSL_PKEY_PARAM_EC_ORDER "order" +# define OSSL_PKEY_PARAM_EC_P "p" +# define OSSL_PKEY_PARAM_EC_POINT_CONVERSION_FORMAT "point-format" +# define OSSL_PKEY_PARAM_EC_PUB_X "qx" +# define OSSL_PKEY_PARAM_EC_PUB_Y "qy" +# define OSSL_PKEY_PARAM_EC_SEED "seed" +# define OSSL_PKEY_PARAM_ENCODED_PUBLIC_KEY "encoded-pub-key" +# define OSSL_PKEY_PARAM_ENGINE OSSL_ALG_PARAM_ENGINE +# define OSSL_PKEY_PARAM_FFC_COFACTOR "j" +# define OSSL_PKEY_PARAM_FFC_DIGEST OSSL_PKEY_PARAM_DIGEST +# define OSSL_PKEY_PARAM_FFC_DIGEST_PROPS OSSL_PKEY_PARAM_PROPERTIES +# define OSSL_PKEY_PARAM_FFC_G "g" +# define OSSL_PKEY_PARAM_FFC_GINDEX "gindex" +# define OSSL_PKEY_PARAM_FFC_H "hindex" +# define OSSL_PKEY_PARAM_FFC_P "p" +# define OSSL_PKEY_PARAM_FFC_PBITS "pbits" +# define OSSL_PKEY_PARAM_FFC_PCOUNTER "pcounter" +# define OSSL_PKEY_PARAM_FFC_Q "q" +# define OSSL_PKEY_PARAM_FFC_QBITS "qbits" +# define OSSL_PKEY_PARAM_FFC_SEED "seed" +# define OSSL_PKEY_PARAM_FFC_TYPE "type" +# define OSSL_PKEY_PARAM_FFC_VALIDATE_G "validate-g" +# define OSSL_PKEY_PARAM_FFC_VALIDATE_LEGACY "validate-legacy" +# define OSSL_PKEY_PARAM_FFC_VALIDATE_PQ "validate-pq" +# define OSSL_PKEY_PARAM_FIPS_APPROVED_INDICATOR OSSL_ALG_PARAM_FIPS_APPROVED_INDICATOR +# define OSSL_PKEY_PARAM_FIPS_DIGEST_CHECK "digest-check" +# define OSSL_PKEY_PARAM_FIPS_KEY_CHECK "key-check" +# define OSSL_PKEY_PARAM_FIPS_SIGN_CHECK "sign-check" +# define OSSL_PKEY_PARAM_GROUP_NAME "group" +# define OSSL_PKEY_PARAM_IMPLICIT_REJECTION "implicit-rejection" +# define OSSL_PKEY_PARAM_MANDATORY_DIGEST "mandatory-digest" +# define OSSL_PKEY_PARAM_MASKGENFUNC "mgf" +# define OSSL_PKEY_PARAM_MAX_SIZE "max-size" +# define OSSL_PKEY_PARAM_MGF1_DIGEST "mgf1-digest" +# define OSSL_PKEY_PARAM_MGF1_PROPERTIES "mgf1-properties" +# define OSSL_PKEY_PARAM_ML_DSA_INPUT_FORMATS "ml-dsa.input_formats" +# define OSSL_PKEY_PARAM_ML_DSA_OUTPUT_FORMATS "ml-dsa.output_formats" +# define OSSL_PKEY_PARAM_ML_DSA_PREFER_SEED "ml-dsa.prefer_seed" +# define OSSL_PKEY_PARAM_ML_DSA_RETAIN_SEED "ml-dsa.retain_seed" +# define OSSL_PKEY_PARAM_ML_DSA_SEED "seed" +# define OSSL_PKEY_PARAM_ML_KEM_IMPORT_PCT_TYPE "ml-kem.import_pct_type" +# define OSSL_PKEY_PARAM_ML_KEM_INPUT_FORMATS "ml-kem.input_formats" +# define OSSL_PKEY_PARAM_ML_KEM_OUTPUT_FORMATS "ml-kem.output_formats" +# define OSSL_PKEY_PARAM_ML_KEM_PREFER_SEED "ml-kem.prefer_seed" +# define OSSL_PKEY_PARAM_ML_KEM_RETAIN_SEED "ml-kem.retain_seed" +# define OSSL_PKEY_PARAM_ML_KEM_SEED "seed" +# define OSSL_PKEY_PARAM_PAD_MODE "pad-mode" +# define OSSL_PKEY_PARAM_PRIV_KEY "priv" +# define OSSL_PKEY_PARAM_PROPERTIES OSSL_ALG_PARAM_PROPERTIES +# define OSSL_PKEY_PARAM_PUB_KEY "pub" +# define OSSL_PKEY_PARAM_RSA_BITS OSSL_PKEY_PARAM_BITS +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT "rsa-coefficient" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT1 "rsa-coefficient1" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT2 "rsa-coefficient2" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT3 "rsa-coefficient3" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT4 "rsa-coefficient4" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT5 "rsa-coefficient5" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT6 "rsa-coefficient6" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT7 "rsa-coefficient7" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT8 "rsa-coefficient8" +# define OSSL_PKEY_PARAM_RSA_COEFFICIENT9 "rsa-coefficient9" +# define OSSL_PKEY_PARAM_RSA_D "d" +# define OSSL_PKEY_PARAM_RSA_DERIVE_FROM_PQ "rsa-derive-from-pq" +# define OSSL_PKEY_PARAM_RSA_DIGEST OSSL_PKEY_PARAM_DIGEST +# define OSSL_PKEY_PARAM_RSA_DIGEST_PROPS OSSL_PKEY_PARAM_PROPERTIES +# define OSSL_PKEY_PARAM_RSA_E "e" +# define OSSL_PKEY_PARAM_RSA_EXPONENT "rsa-exponent" +# define OSSL_PKEY_PARAM_RSA_EXPONENT1 "rsa-exponent1" +# define OSSL_PKEY_PARAM_RSA_EXPONENT10 "rsa-exponent10" +# define OSSL_PKEY_PARAM_RSA_EXPONENT2 "rsa-exponent2" +# define OSSL_PKEY_PARAM_RSA_EXPONENT3 "rsa-exponent3" +# define OSSL_PKEY_PARAM_RSA_EXPONENT4 "rsa-exponent4" +# define OSSL_PKEY_PARAM_RSA_EXPONENT5 "rsa-exponent5" +# define OSSL_PKEY_PARAM_RSA_EXPONENT6 "rsa-exponent6" +# define OSSL_PKEY_PARAM_RSA_EXPONENT7 "rsa-exponent7" +# define OSSL_PKEY_PARAM_RSA_EXPONENT8 "rsa-exponent8" +# define OSSL_PKEY_PARAM_RSA_EXPONENT9 "rsa-exponent9" +# define OSSL_PKEY_PARAM_RSA_FACTOR "rsa-factor" +# define OSSL_PKEY_PARAM_RSA_FACTOR1 "rsa-factor1" +# define OSSL_PKEY_PARAM_RSA_FACTOR10 "rsa-factor10" +# define OSSL_PKEY_PARAM_RSA_FACTOR2 "rsa-factor2" +# define OSSL_PKEY_PARAM_RSA_FACTOR3 "rsa-factor3" +# define OSSL_PKEY_PARAM_RSA_FACTOR4 "rsa-factor4" +# define OSSL_PKEY_PARAM_RSA_FACTOR5 "rsa-factor5" +# define OSSL_PKEY_PARAM_RSA_FACTOR6 "rsa-factor6" +# define OSSL_PKEY_PARAM_RSA_FACTOR7 "rsa-factor7" +# define OSSL_PKEY_PARAM_RSA_FACTOR8 "rsa-factor8" +# define OSSL_PKEY_PARAM_RSA_FACTOR9 "rsa-factor9" +# define OSSL_PKEY_PARAM_RSA_MASKGENFUNC OSSL_PKEY_PARAM_MASKGENFUNC +# define OSSL_PKEY_PARAM_RSA_MGF1_DIGEST OSSL_PKEY_PARAM_MGF1_DIGEST +# define OSSL_PKEY_PARAM_RSA_N "n" +# define OSSL_PKEY_PARAM_RSA_PRIMES "primes" +# define OSSL_PKEY_PARAM_RSA_PSS_SALTLEN "saltlen" +# define OSSL_PKEY_PARAM_RSA_TEST_P1 "p1" +# define OSSL_PKEY_PARAM_RSA_TEST_P2 "p2" +# define OSSL_PKEY_PARAM_RSA_TEST_Q1 "q1" +# define OSSL_PKEY_PARAM_RSA_TEST_Q2 "q2" +# define OSSL_PKEY_PARAM_RSA_TEST_XP "xp" +# define OSSL_PKEY_PARAM_RSA_TEST_XP1 "xp1" +# define OSSL_PKEY_PARAM_RSA_TEST_XP2 "xp2" +# define OSSL_PKEY_PARAM_RSA_TEST_XQ "xq" +# define OSSL_PKEY_PARAM_RSA_TEST_XQ1 "xq1" +# define OSSL_PKEY_PARAM_RSA_TEST_XQ2 "xq2" +# define OSSL_PKEY_PARAM_SECURITY_BITS "security-bits" +# define OSSL_PKEY_PARAM_SLH_DSA_SEED "seed" +# define OSSL_PKEY_PARAM_USE_COFACTOR_ECDH OSSL_PKEY_PARAM_USE_COFACTOR_FLAG +# define OSSL_PKEY_PARAM_USE_COFACTOR_FLAG "use-cofactor-flag" +# define OSSL_PROV_PARAM_BUILDINFO "buildinfo" +# define OSSL_PROV_PARAM_CORE_MODULE_FILENAME "module-filename" +# define OSSL_PROV_PARAM_CORE_PROV_NAME "provider-name" +# define OSSL_PROV_PARAM_CORE_VERSION "openssl-version" +# define OSSL_PROV_PARAM_DRBG_TRUNC_DIGEST "drbg-no-trunc-md" +# define OSSL_PROV_PARAM_DSA_SIGN_DISABLED "dsa-sign-disabled" +# define OSSL_PROV_PARAM_ECDH_COFACTOR_CHECK "ecdh-cofactor-check" +# define OSSL_PROV_PARAM_HKDF_DIGEST_CHECK "hkdf-digest-check" +# define OSSL_PROV_PARAM_HKDF_KEY_CHECK "hkdf-key-check" +# define OSSL_PROV_PARAM_HMAC_KEY_CHECK "hmac-key-check" +# define OSSL_PROV_PARAM_KBKDF_KEY_CHECK "kbkdf-key-check" +# define OSSL_PROV_PARAM_KMAC_KEY_CHECK "kmac-key-check" +# define OSSL_PROV_PARAM_NAME "name" +# define OSSL_PROV_PARAM_NO_SHORT_MAC "no-short-mac" +# define OSSL_PROV_PARAM_PBKDF2_LOWER_BOUND_CHECK "pbkdf2-lower-bound-check" +# define OSSL_PROV_PARAM_RSA_PKCS15_PAD_DISABLED "rsa-pkcs15-pad-disabled" +# define OSSL_PROV_PARAM_RSA_PSS_SALTLEN_CHECK "rsa-pss-saltlen-check" +# define OSSL_PROV_PARAM_RSA_SIGN_X931_PAD_DISABLED "rsa-sign-x931-pad-disabled" +# define OSSL_PROV_PARAM_SECURITY_CHECKS "security-checks" +# define OSSL_PROV_PARAM_SELF_TEST_DESC "st-desc" +# define OSSL_PROV_PARAM_SELF_TEST_PHASE "st-phase" +# define OSSL_PROV_PARAM_SELF_TEST_TYPE "st-type" +# define OSSL_PROV_PARAM_SIGNATURE_DIGEST_CHECK "signature-digest-check" +# define OSSL_PROV_PARAM_SSHKDF_DIGEST_CHECK "sshkdf-digest-check" +# define OSSL_PROV_PARAM_SSHKDF_KEY_CHECK "sshkdf-key-check" +# define OSSL_PROV_PARAM_SSKDF_DIGEST_CHECK "sskdf-digest-check" +# define OSSL_PROV_PARAM_SSKDF_KEY_CHECK "sskdf-key-check" +# define OSSL_PROV_PARAM_STATUS "status" +# define OSSL_PROV_PARAM_TDES_ENCRYPT_DISABLED "tdes-encrypt-disabled" +# define OSSL_PROV_PARAM_TLS13_KDF_DIGEST_CHECK "tls13-kdf-digest-check" +# define OSSL_PROV_PARAM_TLS13_KDF_KEY_CHECK "tls13-kdf-key-check" +# define OSSL_PROV_PARAM_TLS1_PRF_DIGEST_CHECK "tls1-prf-digest-check" +# define OSSL_PROV_PARAM_TLS1_PRF_EMS_CHECK "tls1-prf-ems-check" +# define OSSL_PROV_PARAM_TLS1_PRF_KEY_CHECK "tls1-prf-key-check" +# define OSSL_PROV_PARAM_VERSION "version" +# define OSSL_PROV_PARAM_X942KDF_KEY_CHECK "x942kdf-key-check" +# define OSSL_PROV_PARAM_X963KDF_DIGEST_CHECK "x963kdf-digest-check" +# define OSSL_PROV_PARAM_X963KDF_KEY_CHECK "x963kdf-key-check" +# define OSSL_RAND_PARAM_FIPS_APPROVED_INDICATOR OSSL_ALG_PARAM_FIPS_APPROVED_INDICATOR +# define OSSL_RAND_PARAM_GENERATE "generate" +# define OSSL_RAND_PARAM_MAX_REQUEST "max_request" +# define OSSL_RAND_PARAM_STATE "state" +# define OSSL_RAND_PARAM_STRENGTH "strength" +# define OSSL_RAND_PARAM_TEST_ENTROPY "test_entropy" +# define OSSL_RAND_PARAM_TEST_NONCE "test_nonce" +# define OSSL_SIGNATURE_PARAM_ADD_RANDOM "additional-random" +# define OSSL_SIGNATURE_PARAM_ALGORITHM_ID OSSL_PKEY_PARAM_ALGORITHM_ID +# define OSSL_SIGNATURE_PARAM_ALGORITHM_ID_PARAMS OSSL_PKEY_PARAM_ALGORITHM_ID_PARAMS +# define OSSL_SIGNATURE_PARAM_CONTEXT_STRING "context-string" +# define OSSL_SIGNATURE_PARAM_DETERMINISTIC "deterministic" +# define OSSL_SIGNATURE_PARAM_DIGEST OSSL_PKEY_PARAM_DIGEST +# define OSSL_SIGNATURE_PARAM_DIGEST_SIZE OSSL_PKEY_PARAM_DIGEST_SIZE +# define OSSL_SIGNATURE_PARAM_FIPS_APPROVED_INDICATOR OSSL_ALG_PARAM_FIPS_APPROVED_INDICATOR +# define OSSL_SIGNATURE_PARAM_FIPS_DIGEST_CHECK OSSL_PKEY_PARAM_FIPS_DIGEST_CHECK +# define OSSL_SIGNATURE_PARAM_FIPS_KEY_CHECK OSSL_PKEY_PARAM_FIPS_KEY_CHECK +# define OSSL_SIGNATURE_PARAM_FIPS_RSA_PSS_SALTLEN_CHECK "rsa-pss-saltlen-check" +# define OSSL_SIGNATURE_PARAM_FIPS_SIGN_CHECK OSSL_PKEY_PARAM_FIPS_SIGN_CHECK +# define OSSL_SIGNATURE_PARAM_FIPS_SIGN_X931_PAD_CHECK "sign-x931-pad-check" +# define OSSL_SIGNATURE_PARAM_FIPS_VERIFY_MESSAGE "verify-message" +# define OSSL_SIGNATURE_PARAM_INSTANCE "instance" +# define OSSL_SIGNATURE_PARAM_KAT "kat" +# define OSSL_SIGNATURE_PARAM_MESSAGE_ENCODING "message-encoding" +# define OSSL_SIGNATURE_PARAM_MGF1_DIGEST OSSL_PKEY_PARAM_MGF1_DIGEST +# define OSSL_SIGNATURE_PARAM_MGF1_PROPERTIES OSSL_PKEY_PARAM_MGF1_PROPERTIES +# define OSSL_SIGNATURE_PARAM_MU "mu" +# define OSSL_SIGNATURE_PARAM_NONCE_TYPE "nonce-type" +# define OSSL_SIGNATURE_PARAM_PAD_MODE OSSL_PKEY_PARAM_PAD_MODE +# define OSSL_SIGNATURE_PARAM_PROPERTIES OSSL_PKEY_PARAM_PROPERTIES +# define OSSL_SIGNATURE_PARAM_PSS_SALTLEN "saltlen" +# define OSSL_SIGNATURE_PARAM_SIGNATURE "signature" +# define OSSL_SIGNATURE_PARAM_TEST_ENTROPY "test-entropy" +# define OSSL_SKEY_PARAM_KEY_LENGTH "key-length" +# define OSSL_SKEY_PARAM_RAW_BYTES "raw-bytes" +# define OSSL_STORE_PARAM_ALIAS "alias" +# define OSSL_STORE_PARAM_DIGEST "digest" +# define OSSL_STORE_PARAM_EXPECT "expect" +# define OSSL_STORE_PARAM_FINGERPRINT "fingerprint" +# define OSSL_STORE_PARAM_INPUT_TYPE "input-type" +# define OSSL_STORE_PARAM_ISSUER "name" +# define OSSL_STORE_PARAM_PROPERTIES "properties" +# define OSSL_STORE_PARAM_SERIAL "serial" +# define OSSL_STORE_PARAM_SUBJECT "subject" + +# ifdef __cplusplus +} +# endif + +#endif diff --git a/contrib/openssl-cmake/common/include/openssl/crmf.h b/contrib/openssl-cmake/common/include/openssl/crmf.h new file mode 100644 index 000000000000..4bf550fd47da --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/crmf.h @@ -0,0 +1,278 @@ +/*- + * WARNING: do not edit! + * Generated by Makefile from include/openssl/crmf.h.in + * + * Copyright 2007-2025 The OpenSSL Project Authors. All Rights Reserved. + * Copyright Nokia 2007-2019 + * Copyright Siemens AG 2015-2019 + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + * + * CRMF (RFC 4211) implementation by M. Peylo, M. Viljanen, and D. von Oheimb. + */ + + + +#ifndef OPENSSL_CRMF_H +# define OPENSSL_CRMF_H + +# include + +# ifndef OPENSSL_NO_CRMF +# include +# include +# include +# include /* for GENERAL_NAME etc. */ +# include + +/* explicit #includes not strictly needed since implied by the above: */ +# include +# include + +# ifdef __cplusplus +extern "C" { +# endif + +# define OSSL_CRMF_POPOPRIVKEY_THISMESSAGE 0 +# define OSSL_CRMF_POPOPRIVKEY_SUBSEQUENTMESSAGE 1 +# define OSSL_CRMF_POPOPRIVKEY_DHMAC 2 +# define OSSL_CRMF_POPOPRIVKEY_AGREEMAC 3 +# define OSSL_CRMF_POPOPRIVKEY_ENCRYPTEDKEY 4 + +# define OSSL_CRMF_SUBSEQUENTMESSAGE_ENCRCERT 0 +# define OSSL_CRMF_SUBSEQUENTMESSAGE_CHALLENGERESP 1 +typedef struct ossl_crmf_encryptedvalue_st OSSL_CRMF_ENCRYPTEDVALUE; +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_ENCRYPTEDVALUE) + +typedef struct ossl_crmf_encryptedkey_st OSSL_CRMF_ENCRYPTEDKEY; +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_ENCRYPTEDKEY) + +typedef struct ossl_crmf_msg_st OSSL_CRMF_MSG; +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_MSG) +DECLARE_ASN1_DUP_FUNCTION(OSSL_CRMF_MSG) +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_CRMF_MSG, OSSL_CRMF_MSG, OSSL_CRMF_MSG) +#define sk_OSSL_CRMF_MSG_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_CRMF_MSG_sk_type(sk)) +#define sk_OSSL_CRMF_MSG_value(sk, idx) ((OSSL_CRMF_MSG *)OPENSSL_sk_value(ossl_check_const_OSSL_CRMF_MSG_sk_type(sk), (idx))) +#define sk_OSSL_CRMF_MSG_new(cmp) ((STACK_OF(OSSL_CRMF_MSG) *)OPENSSL_sk_new(ossl_check_OSSL_CRMF_MSG_compfunc_type(cmp))) +#define sk_OSSL_CRMF_MSG_new_null() ((STACK_OF(OSSL_CRMF_MSG) *)OPENSSL_sk_new_null()) +#define sk_OSSL_CRMF_MSG_new_reserve(cmp, n) ((STACK_OF(OSSL_CRMF_MSG) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_CRMF_MSG_compfunc_type(cmp), (n))) +#define sk_OSSL_CRMF_MSG_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_CRMF_MSG_sk_type(sk), (n)) +#define sk_OSSL_CRMF_MSG_free(sk) OPENSSL_sk_free(ossl_check_OSSL_CRMF_MSG_sk_type(sk)) +#define sk_OSSL_CRMF_MSG_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_CRMF_MSG_sk_type(sk)) +#define sk_OSSL_CRMF_MSG_delete(sk, i) ((OSSL_CRMF_MSG *)OPENSSL_sk_delete(ossl_check_OSSL_CRMF_MSG_sk_type(sk), (i))) +#define sk_OSSL_CRMF_MSG_delete_ptr(sk, ptr) ((OSSL_CRMF_MSG *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_type(ptr))) +#define sk_OSSL_CRMF_MSG_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_type(ptr)) +#define sk_OSSL_CRMF_MSG_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_type(ptr)) +#define sk_OSSL_CRMF_MSG_pop(sk) ((OSSL_CRMF_MSG *)OPENSSL_sk_pop(ossl_check_OSSL_CRMF_MSG_sk_type(sk))) +#define sk_OSSL_CRMF_MSG_shift(sk) ((OSSL_CRMF_MSG *)OPENSSL_sk_shift(ossl_check_OSSL_CRMF_MSG_sk_type(sk))) +#define sk_OSSL_CRMF_MSG_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_CRMF_MSG_sk_type(sk),ossl_check_OSSL_CRMF_MSG_freefunc_type(freefunc)) +#define sk_OSSL_CRMF_MSG_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_type(ptr), (idx)) +#define sk_OSSL_CRMF_MSG_set(sk, idx, ptr) ((OSSL_CRMF_MSG *)OPENSSL_sk_set(ossl_check_OSSL_CRMF_MSG_sk_type(sk), (idx), ossl_check_OSSL_CRMF_MSG_type(ptr))) +#define sk_OSSL_CRMF_MSG_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_type(ptr)) +#define sk_OSSL_CRMF_MSG_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_type(ptr)) +#define sk_OSSL_CRMF_MSG_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_type(ptr), pnum) +#define sk_OSSL_CRMF_MSG_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_CRMF_MSG_sk_type(sk)) +#define sk_OSSL_CRMF_MSG_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_CRMF_MSG_sk_type(sk)) +#define sk_OSSL_CRMF_MSG_dup(sk) ((STACK_OF(OSSL_CRMF_MSG) *)OPENSSL_sk_dup(ossl_check_const_OSSL_CRMF_MSG_sk_type(sk))) +#define sk_OSSL_CRMF_MSG_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_CRMF_MSG) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_copyfunc_type(copyfunc), ossl_check_OSSL_CRMF_MSG_freefunc_type(freefunc))) +#define sk_OSSL_CRMF_MSG_set_cmp_func(sk, cmp) ((sk_OSSL_CRMF_MSG_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_CRMF_MSG_sk_type(sk), ossl_check_OSSL_CRMF_MSG_compfunc_type(cmp))) + +typedef struct ossl_crmf_attributetypeandvalue_st OSSL_CRMF_ATTRIBUTETYPEANDVALUE; +void OSSL_CRMF_ATTRIBUTETYPEANDVALUE_free(OSSL_CRMF_ATTRIBUTETYPEANDVALUE *v); +DECLARE_ASN1_DUP_FUNCTION(OSSL_CRMF_ATTRIBUTETYPEANDVALUE) +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_CRMF_ATTRIBUTETYPEANDVALUE, OSSL_CRMF_ATTRIBUTETYPEANDVALUE, OSSL_CRMF_ATTRIBUTETYPEANDVALUE) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_sk_type(sk)) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_value(sk, idx) ((OSSL_CRMF_ATTRIBUTETYPEANDVALUE *)OPENSSL_sk_value(ossl_check_const_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_sk_type(sk), (idx))) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_new(cmp) ((STACK_OF(OSSL_CRMF_ATTRIBUTETYPEANDVALUE) *)OPENSSL_sk_new(ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_compfunc_type(cmp))) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_new_null() ((STACK_OF(OSSL_CRMF_ATTRIBUTETYPEANDVALUE) *)OPENSSL_sk_new_null()) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_new_reserve(cmp, n) ((STACK_OF(OSSL_CRMF_ATTRIBUTETYPEANDVALUE) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_compfunc_type(cmp), (n))) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_sk_type(sk), (n)) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_free(sk) OPENSSL_sk_free(ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_sk_type(sk)) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_sk_type(sk)) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_delete(sk, i) ((OSSL_CRMF_ATTRIBUTETYPEANDVALUE *)OPENSSL_sk_delete(ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_sk_type(sk), (i))) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_delete_ptr(sk, ptr) ((OSSL_CRMF_ATTRIBUTETYPEANDVALUE *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_sk_type(sk), ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_type(ptr))) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_sk_type(sk), ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_type(ptr)) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_sk_type(sk), ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_type(ptr)) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_pop(sk) ((OSSL_CRMF_ATTRIBUTETYPEANDVALUE *)OPENSSL_sk_pop(ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_sk_type(sk))) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_shift(sk) ((OSSL_CRMF_ATTRIBUTETYPEANDVALUE *)OPENSSL_sk_shift(ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_sk_type(sk))) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_sk_type(sk),ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_freefunc_type(freefunc)) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_sk_type(sk), ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_type(ptr), (idx)) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_set(sk, idx, ptr) ((OSSL_CRMF_ATTRIBUTETYPEANDVALUE *)OPENSSL_sk_set(ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_sk_type(sk), (idx), ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_type(ptr))) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_sk_type(sk), ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_type(ptr)) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_sk_type(sk), ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_type(ptr)) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_sk_type(sk), ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_type(ptr), pnum) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_sk_type(sk)) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_sk_type(sk)) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_dup(sk) ((STACK_OF(OSSL_CRMF_ATTRIBUTETYPEANDVALUE) *)OPENSSL_sk_dup(ossl_check_const_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_sk_type(sk))) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_CRMF_ATTRIBUTETYPEANDVALUE) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_sk_type(sk), ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_copyfunc_type(copyfunc), ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_freefunc_type(freefunc))) +#define sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_set_cmp_func(sk, cmp) ((sk_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_sk_type(sk), ossl_check_OSSL_CRMF_ATTRIBUTETYPEANDVALUE_compfunc_type(cmp))) + + +typedef struct ossl_crmf_pbmparameter_st OSSL_CRMF_PBMPARAMETER; +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_PBMPARAMETER) +typedef struct ossl_crmf_poposigningkey_st OSSL_CRMF_POPOSIGNINGKEY; +typedef struct ossl_crmf_certrequest_st OSSL_CRMF_CERTREQUEST; +typedef struct ossl_crmf_certid_st OSSL_CRMF_CERTID; +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_CERTID) +DECLARE_ASN1_DUP_FUNCTION(OSSL_CRMF_CERTID) +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_CRMF_CERTID, OSSL_CRMF_CERTID, OSSL_CRMF_CERTID) +#define sk_OSSL_CRMF_CERTID_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_CRMF_CERTID_sk_type(sk)) +#define sk_OSSL_CRMF_CERTID_value(sk, idx) ((OSSL_CRMF_CERTID *)OPENSSL_sk_value(ossl_check_const_OSSL_CRMF_CERTID_sk_type(sk), (idx))) +#define sk_OSSL_CRMF_CERTID_new(cmp) ((STACK_OF(OSSL_CRMF_CERTID) *)OPENSSL_sk_new(ossl_check_OSSL_CRMF_CERTID_compfunc_type(cmp))) +#define sk_OSSL_CRMF_CERTID_new_null() ((STACK_OF(OSSL_CRMF_CERTID) *)OPENSSL_sk_new_null()) +#define sk_OSSL_CRMF_CERTID_new_reserve(cmp, n) ((STACK_OF(OSSL_CRMF_CERTID) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_CRMF_CERTID_compfunc_type(cmp), (n))) +#define sk_OSSL_CRMF_CERTID_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), (n)) +#define sk_OSSL_CRMF_CERTID_free(sk) OPENSSL_sk_free(ossl_check_OSSL_CRMF_CERTID_sk_type(sk)) +#define sk_OSSL_CRMF_CERTID_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_CRMF_CERTID_sk_type(sk)) +#define sk_OSSL_CRMF_CERTID_delete(sk, i) ((OSSL_CRMF_CERTID *)OPENSSL_sk_delete(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), (i))) +#define sk_OSSL_CRMF_CERTID_delete_ptr(sk, ptr) ((OSSL_CRMF_CERTID *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_type(ptr))) +#define sk_OSSL_CRMF_CERTID_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_type(ptr)) +#define sk_OSSL_CRMF_CERTID_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_type(ptr)) +#define sk_OSSL_CRMF_CERTID_pop(sk) ((OSSL_CRMF_CERTID *)OPENSSL_sk_pop(ossl_check_OSSL_CRMF_CERTID_sk_type(sk))) +#define sk_OSSL_CRMF_CERTID_shift(sk) ((OSSL_CRMF_CERTID *)OPENSSL_sk_shift(ossl_check_OSSL_CRMF_CERTID_sk_type(sk))) +#define sk_OSSL_CRMF_CERTID_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_CRMF_CERTID_sk_type(sk),ossl_check_OSSL_CRMF_CERTID_freefunc_type(freefunc)) +#define sk_OSSL_CRMF_CERTID_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_type(ptr), (idx)) +#define sk_OSSL_CRMF_CERTID_set(sk, idx, ptr) ((OSSL_CRMF_CERTID *)OPENSSL_sk_set(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), (idx), ossl_check_OSSL_CRMF_CERTID_type(ptr))) +#define sk_OSSL_CRMF_CERTID_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_type(ptr)) +#define sk_OSSL_CRMF_CERTID_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_type(ptr)) +#define sk_OSSL_CRMF_CERTID_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_type(ptr), pnum) +#define sk_OSSL_CRMF_CERTID_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_CRMF_CERTID_sk_type(sk)) +#define sk_OSSL_CRMF_CERTID_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_CRMF_CERTID_sk_type(sk)) +#define sk_OSSL_CRMF_CERTID_dup(sk) ((STACK_OF(OSSL_CRMF_CERTID) *)OPENSSL_sk_dup(ossl_check_const_OSSL_CRMF_CERTID_sk_type(sk))) +#define sk_OSSL_CRMF_CERTID_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_CRMF_CERTID) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_copyfunc_type(copyfunc), ossl_check_OSSL_CRMF_CERTID_freefunc_type(freefunc))) +#define sk_OSSL_CRMF_CERTID_set_cmp_func(sk, cmp) ((sk_OSSL_CRMF_CERTID_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_CRMF_CERTID_sk_type(sk), ossl_check_OSSL_CRMF_CERTID_compfunc_type(cmp))) + + +typedef struct ossl_crmf_pkipublicationinfo_st OSSL_CRMF_PKIPUBLICATIONINFO; +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_PKIPUBLICATIONINFO) +typedef struct ossl_crmf_singlepubinfo_st OSSL_CRMF_SINGLEPUBINFO; +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_SINGLEPUBINFO) +typedef struct ossl_crmf_certtemplate_st OSSL_CRMF_CERTTEMPLATE; +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_CERTTEMPLATE) +DECLARE_ASN1_DUP_FUNCTION(OSSL_CRMF_CERTTEMPLATE) +typedef STACK_OF(OSSL_CRMF_MSG) OSSL_CRMF_MSGS; +DECLARE_ASN1_FUNCTIONS(OSSL_CRMF_MSGS) + +typedef struct ossl_crmf_optionalvalidity_st OSSL_CRMF_OPTIONALVALIDITY; + +/* crmf_pbm.c */ +OSSL_CRMF_PBMPARAMETER *OSSL_CRMF_pbmp_new(OSSL_LIB_CTX *libctx, size_t slen, + int owfnid, size_t itercnt, + int macnid); +int OSSL_CRMF_pbm_new(OSSL_LIB_CTX *libctx, const char *propq, + const OSSL_CRMF_PBMPARAMETER *pbmp, + const unsigned char *msg, size_t msglen, + const unsigned char *sec, size_t seclen, + unsigned char **mac, size_t *maclen); + +/* crmf_lib.c */ +int OSSL_CRMF_MSG_set1_regCtrl_regToken(OSSL_CRMF_MSG *msg, + const ASN1_UTF8STRING *tok); +ASN1_UTF8STRING +*OSSL_CRMF_MSG_get0_regCtrl_regToken(const OSSL_CRMF_MSG *msg); +int OSSL_CRMF_MSG_set1_regCtrl_authenticator(OSSL_CRMF_MSG *msg, + const ASN1_UTF8STRING *auth); +ASN1_UTF8STRING +*OSSL_CRMF_MSG_get0_regCtrl_authenticator(const OSSL_CRMF_MSG *msg); +int +OSSL_CRMF_MSG_PKIPublicationInfo_push0_SinglePubInfo(OSSL_CRMF_PKIPUBLICATIONINFO *pi, + OSSL_CRMF_SINGLEPUBINFO *spi); +# define OSSL_CRMF_PUB_METHOD_DONTCARE 0 +# define OSSL_CRMF_PUB_METHOD_X500 1 +# define OSSL_CRMF_PUB_METHOD_WEB 2 +# define OSSL_CRMF_PUB_METHOD_LDAP 3 +int OSSL_CRMF_MSG_set0_SinglePubInfo(OSSL_CRMF_SINGLEPUBINFO *spi, + int method, GENERAL_NAME *nm); +# define OSSL_CRMF_PUB_ACTION_DONTPUBLISH 0 +# define OSSL_CRMF_PUB_ACTION_PLEASEPUBLISH 1 +int OSSL_CRMF_MSG_set_PKIPublicationInfo_action(OSSL_CRMF_PKIPUBLICATIONINFO *pi, + int action); +int OSSL_CRMF_MSG_set1_regCtrl_pkiPublicationInfo(OSSL_CRMF_MSG *msg, + const OSSL_CRMF_PKIPUBLICATIONINFO *pi); +OSSL_CRMF_PKIPUBLICATIONINFO +*OSSL_CRMF_MSG_get0_regCtrl_pkiPublicationInfo(const OSSL_CRMF_MSG *msg); +int OSSL_CRMF_MSG_set1_regCtrl_protocolEncrKey(OSSL_CRMF_MSG *msg, + const X509_PUBKEY *pubkey); +X509_PUBKEY +*OSSL_CRMF_MSG_get0_regCtrl_protocolEncrKey(const OSSL_CRMF_MSG *msg); +int OSSL_CRMF_MSG_set1_regCtrl_oldCertID(OSSL_CRMF_MSG *msg, + const OSSL_CRMF_CERTID *cid); +OSSL_CRMF_CERTID +*OSSL_CRMF_MSG_get0_regCtrl_oldCertID(const OSSL_CRMF_MSG *msg); +OSSL_CRMF_CERTID *OSSL_CRMF_CERTID_gen(const X509_NAME *issuer, + const ASN1_INTEGER *serial); + +int OSSL_CRMF_MSG_set1_regInfo_utf8Pairs(OSSL_CRMF_MSG *msg, + const ASN1_UTF8STRING *utf8pairs); +ASN1_UTF8STRING +*OSSL_CRMF_MSG_get0_regInfo_utf8Pairs(const OSSL_CRMF_MSG *msg); +int OSSL_CRMF_MSG_set1_regInfo_certReq(OSSL_CRMF_MSG *msg, + const OSSL_CRMF_CERTREQUEST *cr); +OSSL_CRMF_CERTREQUEST +*OSSL_CRMF_MSG_get0_regInfo_certReq(const OSSL_CRMF_MSG *msg); + +int OSSL_CRMF_MSG_set0_validity(OSSL_CRMF_MSG *crm, + ASN1_TIME *notBefore, ASN1_TIME *notAfter); +int OSSL_CRMF_MSG_set_certReqId(OSSL_CRMF_MSG *crm, int rid); +int OSSL_CRMF_MSG_get_certReqId(const OSSL_CRMF_MSG *crm); +int OSSL_CRMF_MSG_set0_extensions(OSSL_CRMF_MSG *crm, X509_EXTENSIONS *exts); + +int OSSL_CRMF_MSG_push0_extension(OSSL_CRMF_MSG *crm, X509_EXTENSION *ext); +# define OSSL_CRMF_POPO_NONE -1 +# define OSSL_CRMF_POPO_RAVERIFIED 0 +# define OSSL_CRMF_POPO_SIGNATURE 1 +# define OSSL_CRMF_POPO_KEYENC 2 +# define OSSL_CRMF_POPO_KEYAGREE 3 +int OSSL_CRMF_MSG_create_popo(int meth, OSSL_CRMF_MSG *crm, + EVP_PKEY *pkey, const EVP_MD *digest, + OSSL_LIB_CTX *libctx, const char *propq); +int OSSL_CRMF_MSGS_verify_popo(const OSSL_CRMF_MSGS *reqs, + int rid, int acceptRAVerified, + OSSL_LIB_CTX *libctx, const char *propq); +OSSL_CRMF_CERTTEMPLATE *OSSL_CRMF_MSG_get0_tmpl(const OSSL_CRMF_MSG *crm); +X509_PUBKEY +*OSSL_CRMF_CERTTEMPLATE_get0_publicKey(const OSSL_CRMF_CERTTEMPLATE *tmpl); +const X509_NAME +*OSSL_CRMF_CERTTEMPLATE_get0_subject(const OSSL_CRMF_CERTTEMPLATE *tmpl); +const X509_NAME +*OSSL_CRMF_CERTTEMPLATE_get0_issuer(const OSSL_CRMF_CERTTEMPLATE *tmpl); +const ASN1_INTEGER +*OSSL_CRMF_CERTTEMPLATE_get0_serialNumber(const OSSL_CRMF_CERTTEMPLATE *tmpl); +X509_EXTENSIONS +*OSSL_CRMF_CERTTEMPLATE_get0_extensions(const OSSL_CRMF_CERTTEMPLATE *tmpl); +const X509_NAME +*OSSL_CRMF_CERTID_get0_issuer(const OSSL_CRMF_CERTID *cid); +const ASN1_INTEGER +*OSSL_CRMF_CERTID_get0_serialNumber(const OSSL_CRMF_CERTID *cid); +int OSSL_CRMF_CERTTEMPLATE_fill(OSSL_CRMF_CERTTEMPLATE *tmpl, + EVP_PKEY *pubkey, + const X509_NAME *subject, + const X509_NAME *issuer, + const ASN1_INTEGER *serial); +X509 *OSSL_CRMF_ENCRYPTEDVALUE_get1_encCert(const OSSL_CRMF_ENCRYPTEDVALUE *ecert, + OSSL_LIB_CTX *libctx, const char *propq, + EVP_PKEY *pkey); +X509 *OSSL_CRMF_ENCRYPTEDKEY_get1_encCert(const OSSL_CRMF_ENCRYPTEDKEY *ecert, + OSSL_LIB_CTX *libctx, const char *propq, + EVP_PKEY *pkey, unsigned int flags); +unsigned char +*OSSL_CRMF_ENCRYPTEDVALUE_decrypt(const OSSL_CRMF_ENCRYPTEDVALUE *enc, + OSSL_LIB_CTX *libctx, const char *propq, + EVP_PKEY *pkey, int *outlen); +EVP_PKEY *OSSL_CRMF_ENCRYPTEDKEY_get1_pkey(const OSSL_CRMF_ENCRYPTEDKEY *encryptedKey, + X509_STORE *ts, STACK_OF(X509) *extra, EVP_PKEY *pkey, + X509 *cert, ASN1_OCTET_STRING *secret, + OSSL_LIB_CTX *libctx, const char *propq); +int OSSL_CRMF_MSG_centralkeygen_requested(const OSSL_CRMF_MSG *crm, const X509_REQ *p10cr); +# ifndef OPENSSL_NO_CMS +OSSL_CRMF_ENCRYPTEDKEY *OSSL_CRMF_ENCRYPTEDKEY_init_envdata(CMS_EnvelopedData *envdata); +# endif + +# ifdef __cplusplus +} +# endif +# endif /* !defined(OPENSSL_NO_CRMF) */ +#endif /* !defined(OPENSSL_CRMF_H) */ diff --git a/contrib/openssl-cmake/common/include/openssl/crypto.h b/contrib/openssl-cmake/common/include/openssl/crypto.h new file mode 100644 index 000000000000..fd2cfd3e5a9a --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/crypto.h @@ -0,0 +1,583 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/crypto.h.in + * + * Copyright 1995-2024 The OpenSSL Project Authors. All Rights Reserved. + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_CRYPTO_H +# define OPENSSL_CRYPTO_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_CRYPTO_H +# endif + +# include +# include + +# include + +# ifndef OPENSSL_NO_STDIO +# include +# endif + +# include +# include +# include +# include +# include +# include + +# ifdef CHARSET_EBCDIC +# include +# endif + +/* + * Resolve problems on some operating systems with symbol names that clash + * one way or another + */ +# include + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# include +# endif + +#ifdef __cplusplus +extern "C" { +#endif + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define SSLeay OpenSSL_version_num +# define SSLeay_version OpenSSL_version +# define SSLEAY_VERSION_NUMBER OPENSSL_VERSION_NUMBER +# define SSLEAY_VERSION OPENSSL_VERSION +# define SSLEAY_CFLAGS OPENSSL_CFLAGS +# define SSLEAY_BUILT_ON OPENSSL_BUILT_ON +# define SSLEAY_PLATFORM OPENSSL_PLATFORM +# define SSLEAY_DIR OPENSSL_DIR + +/* + * Old type for allocating dynamic locks. No longer used. Use the new thread + * API instead. + */ +typedef struct { + int dummy; +} CRYPTO_dynlock; + +# endif /* OPENSSL_NO_DEPRECATED_1_1_0 */ + +typedef void CRYPTO_RWLOCK; + +CRYPTO_RWLOCK *CRYPTO_THREAD_lock_new(void); +__owur int CRYPTO_THREAD_read_lock(CRYPTO_RWLOCK *lock); +__owur int CRYPTO_THREAD_write_lock(CRYPTO_RWLOCK *lock); +int CRYPTO_THREAD_unlock(CRYPTO_RWLOCK *lock); +void CRYPTO_THREAD_lock_free(CRYPTO_RWLOCK *lock); + +int CRYPTO_atomic_add(int *val, int amount, int *ret, CRYPTO_RWLOCK *lock); +int CRYPTO_atomic_add64(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock); +int CRYPTO_atomic_and(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock); +int CRYPTO_atomic_or(uint64_t *val, uint64_t op, uint64_t *ret, + CRYPTO_RWLOCK *lock); +int CRYPTO_atomic_load(uint64_t *val, uint64_t *ret, CRYPTO_RWLOCK *lock); +int CRYPTO_atomic_load_int(int *val, int *ret, CRYPTO_RWLOCK *lock); +int CRYPTO_atomic_store(uint64_t *dst, uint64_t val, CRYPTO_RWLOCK *lock); + +/* No longer needed, so this is a no-op */ +#define OPENSSL_malloc_init() while(0) continue + +# define OPENSSL_malloc(num) \ + CRYPTO_malloc(num, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_zalloc(num) \ + CRYPTO_zalloc(num, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_aligned_alloc(num, alignment, freeptr) \ + CRYPTO_aligned_alloc(num, alignment, freeptr, \ + OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_realloc(addr, num) \ + CRYPTO_realloc(addr, num, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_clear_realloc(addr, old_num, num) \ + CRYPTO_clear_realloc(addr, old_num, num, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_clear_free(addr, num) \ + CRYPTO_clear_free(addr, num, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_free(addr) \ + CRYPTO_free(addr, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_memdup(str, s) \ + CRYPTO_memdup((str), s, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_strdup(str) \ + CRYPTO_strdup(str, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_strndup(str, n) \ + CRYPTO_strndup(str, n, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_secure_malloc(num) \ + CRYPTO_secure_malloc(num, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_secure_zalloc(num) \ + CRYPTO_secure_zalloc(num, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_secure_free(addr) \ + CRYPTO_secure_free(addr, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_secure_clear_free(addr, num) \ + CRYPTO_secure_clear_free(addr, num, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_secure_actual_size(ptr) \ + CRYPTO_secure_actual_size(ptr) + +size_t OPENSSL_strlcpy(char *dst, const char *src, size_t siz); +size_t OPENSSL_strlcat(char *dst, const char *src, size_t siz); +size_t OPENSSL_strnlen(const char *str, size_t maxlen); +int OPENSSL_strtoul(const char *str, char **endptr, int base, unsigned long *num); +int OPENSSL_buf2hexstr_ex(char *str, size_t str_n, size_t *strlength, + const unsigned char *buf, size_t buflen, + const char sep); +char *OPENSSL_buf2hexstr(const unsigned char *buf, long buflen); +int OPENSSL_hexstr2buf_ex(unsigned char *buf, size_t buf_n, size_t *buflen, + const char *str, const char sep); +unsigned char *OPENSSL_hexstr2buf(const char *str, long *buflen); +int OPENSSL_hexchar2int(unsigned char c); +int OPENSSL_strcasecmp(const char *s1, const char *s2); +int OPENSSL_strncasecmp(const char *s1, const char *s2, size_t n); + +# define OPENSSL_MALLOC_MAX_NELEMS(type) (((1U<<(sizeof(int)*8-1))-1)/sizeof(type)) + +/* + * These functions return the values of OPENSSL_VERSION_MAJOR, + * OPENSSL_VERSION_MINOR, OPENSSL_VERSION_PATCH, OPENSSL_VERSION_PRE_RELEASE + * and OPENSSL_VERSION_BUILD_METADATA, respectively. + */ +unsigned int OPENSSL_version_major(void); +unsigned int OPENSSL_version_minor(void); +unsigned int OPENSSL_version_patch(void); +const char *OPENSSL_version_pre_release(void); +const char *OPENSSL_version_build_metadata(void); + +unsigned long OpenSSL_version_num(void); +const char *OpenSSL_version(int type); +# define OPENSSL_VERSION 0 +# define OPENSSL_CFLAGS 1 +# define OPENSSL_BUILT_ON 2 +# define OPENSSL_PLATFORM 3 +# define OPENSSL_DIR 4 +# define OPENSSL_ENGINES_DIR 5 +# define OPENSSL_VERSION_STRING 6 +# define OPENSSL_FULL_VERSION_STRING 7 +# define OPENSSL_MODULES_DIR 8 +# define OPENSSL_CPU_INFO 9 +# define OPENSSL_WINCTX 10 + +const char *OPENSSL_info(int type); +/* + * The series starts at 1001 to avoid confusion with the OpenSSL_version + * types. + */ +# define OPENSSL_INFO_CONFIG_DIR 1001 +# define OPENSSL_INFO_ENGINES_DIR 1002 +# define OPENSSL_INFO_MODULES_DIR 1003 +# define OPENSSL_INFO_DSO_EXTENSION 1004 +# define OPENSSL_INFO_DIR_FILENAME_SEPARATOR 1005 +# define OPENSSL_INFO_LIST_SEPARATOR 1006 +# define OPENSSL_INFO_SEED_SOURCE 1007 +# define OPENSSL_INFO_CPU_SETTINGS 1008 +# define OPENSSL_INFO_WINDOWS_CONTEXT 1009 + +int OPENSSL_issetugid(void); + +struct crypto_ex_data_st { + OSSL_LIB_CTX *ctx; + STACK_OF(void) *sk; +}; + +SKM_DEFINE_STACK_OF_INTERNAL(void, void, void) +#define sk_void_num(sk) OPENSSL_sk_num(ossl_check_const_void_sk_type(sk)) +#define sk_void_value(sk, idx) ((void *)OPENSSL_sk_value(ossl_check_const_void_sk_type(sk), (idx))) +#define sk_void_new(cmp) ((STACK_OF(void) *)OPENSSL_sk_new(ossl_check_void_compfunc_type(cmp))) +#define sk_void_new_null() ((STACK_OF(void) *)OPENSSL_sk_new_null()) +#define sk_void_new_reserve(cmp, n) ((STACK_OF(void) *)OPENSSL_sk_new_reserve(ossl_check_void_compfunc_type(cmp), (n))) +#define sk_void_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_void_sk_type(sk), (n)) +#define sk_void_free(sk) OPENSSL_sk_free(ossl_check_void_sk_type(sk)) +#define sk_void_zero(sk) OPENSSL_sk_zero(ossl_check_void_sk_type(sk)) +#define sk_void_delete(sk, i) ((void *)OPENSSL_sk_delete(ossl_check_void_sk_type(sk), (i))) +#define sk_void_delete_ptr(sk, ptr) ((void *)OPENSSL_sk_delete_ptr(ossl_check_void_sk_type(sk), ossl_check_void_type(ptr))) +#define sk_void_push(sk, ptr) OPENSSL_sk_push(ossl_check_void_sk_type(sk), ossl_check_void_type(ptr)) +#define sk_void_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_void_sk_type(sk), ossl_check_void_type(ptr)) +#define sk_void_pop(sk) ((void *)OPENSSL_sk_pop(ossl_check_void_sk_type(sk))) +#define sk_void_shift(sk) ((void *)OPENSSL_sk_shift(ossl_check_void_sk_type(sk))) +#define sk_void_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_void_sk_type(sk),ossl_check_void_freefunc_type(freefunc)) +#define sk_void_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_void_sk_type(sk), ossl_check_void_type(ptr), (idx)) +#define sk_void_set(sk, idx, ptr) ((void *)OPENSSL_sk_set(ossl_check_void_sk_type(sk), (idx), ossl_check_void_type(ptr))) +#define sk_void_find(sk, ptr) OPENSSL_sk_find(ossl_check_void_sk_type(sk), ossl_check_void_type(ptr)) +#define sk_void_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_void_sk_type(sk), ossl_check_void_type(ptr)) +#define sk_void_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_void_sk_type(sk), ossl_check_void_type(ptr), pnum) +#define sk_void_sort(sk) OPENSSL_sk_sort(ossl_check_void_sk_type(sk)) +#define sk_void_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_void_sk_type(sk)) +#define sk_void_dup(sk) ((STACK_OF(void) *)OPENSSL_sk_dup(ossl_check_const_void_sk_type(sk))) +#define sk_void_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(void) *)OPENSSL_sk_deep_copy(ossl_check_const_void_sk_type(sk), ossl_check_void_copyfunc_type(copyfunc), ossl_check_void_freefunc_type(freefunc))) +#define sk_void_set_cmp_func(sk, cmp) ((sk_void_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_void_sk_type(sk), ossl_check_void_compfunc_type(cmp))) + + + +/* + * Per class, we have a STACK of function pointers. + */ +# define CRYPTO_EX_INDEX_SSL 0 +# define CRYPTO_EX_INDEX_SSL_CTX 1 +# define CRYPTO_EX_INDEX_SSL_SESSION 2 +# define CRYPTO_EX_INDEX_X509 3 +# define CRYPTO_EX_INDEX_X509_STORE 4 +# define CRYPTO_EX_INDEX_X509_STORE_CTX 5 +# define CRYPTO_EX_INDEX_DH 6 +# define CRYPTO_EX_INDEX_DSA 7 +# define CRYPTO_EX_INDEX_EC_KEY 8 +# define CRYPTO_EX_INDEX_RSA 9 +# define CRYPTO_EX_INDEX_ENGINE 10 +# define CRYPTO_EX_INDEX_UI 11 +# define CRYPTO_EX_INDEX_BIO 12 +# define CRYPTO_EX_INDEX_APP 13 +# define CRYPTO_EX_INDEX_UI_METHOD 14 +# define CRYPTO_EX_INDEX_RAND_DRBG 15 +# define CRYPTO_EX_INDEX_DRBG CRYPTO_EX_INDEX_RAND_DRBG +# define CRYPTO_EX_INDEX_OSSL_LIB_CTX 16 +# define CRYPTO_EX_INDEX_EVP_PKEY 17 +# define CRYPTO_EX_INDEX__COUNT 18 + +typedef void CRYPTO_EX_new (void *parent, void *ptr, CRYPTO_EX_DATA *ad, + int idx, long argl, void *argp); +typedef void CRYPTO_EX_free (void *parent, void *ptr, CRYPTO_EX_DATA *ad, + int idx, long argl, void *argp); +typedef int CRYPTO_EX_dup (CRYPTO_EX_DATA *to, const CRYPTO_EX_DATA *from, + void **from_d, int idx, long argl, void *argp); +__owur int CRYPTO_get_ex_new_index(int class_index, long argl, void *argp, + CRYPTO_EX_new *new_func, + CRYPTO_EX_dup *dup_func, + CRYPTO_EX_free *free_func); +/* No longer use an index. */ +int CRYPTO_free_ex_index(int class_index, int idx); + +/* + * Initialise/duplicate/free CRYPTO_EX_DATA variables corresponding to a + * given class (invokes whatever per-class callbacks are applicable) + */ +int CRYPTO_new_ex_data(int class_index, void *obj, CRYPTO_EX_DATA *ad); +int CRYPTO_dup_ex_data(int class_index, CRYPTO_EX_DATA *to, + const CRYPTO_EX_DATA *from); + +void CRYPTO_free_ex_data(int class_index, void *obj, CRYPTO_EX_DATA *ad); + +/* Allocate a single item in the CRYPTO_EX_DATA variable */ +int CRYPTO_alloc_ex_data(int class_index, void *obj, CRYPTO_EX_DATA *ad, + int idx); + +/* + * Get/set data in a CRYPTO_EX_DATA variable corresponding to a particular + * index (relative to the class type involved) + */ +int CRYPTO_set_ex_data(CRYPTO_EX_DATA *ad, int idx, void *val); +void *CRYPTO_get_ex_data(const CRYPTO_EX_DATA *ad, int idx); + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +/* + * This function cleans up all "ex_data" state. It mustn't be called under + * potential race-conditions. + */ +# define CRYPTO_cleanup_all_ex_data() while(0) continue + +/* + * The old locking functions have been removed completely without compatibility + * macros. This is because the old functions either could not properly report + * errors, or the returned error values were not clearly documented. + * Replacing the locking functions with no-ops would cause race condition + * issues in the affected applications. It is far better for them to fail at + * compile time. + * On the other hand, the locking callbacks are no longer used. Consequently, + * the callback management functions can be safely replaced with no-op macros. + */ +# define CRYPTO_num_locks() (1) +# define CRYPTO_set_locking_callback(func) +# define CRYPTO_get_locking_callback() (NULL) +# define CRYPTO_set_add_lock_callback(func) +# define CRYPTO_get_add_lock_callback() (NULL) + +/* + * These defines where used in combination with the old locking callbacks, + * they are not called anymore, but old code that's not called might still + * use them. + */ +# define CRYPTO_LOCK 1 +# define CRYPTO_UNLOCK 2 +# define CRYPTO_READ 4 +# define CRYPTO_WRITE 8 + +/* This structure is no longer used */ +typedef struct crypto_threadid_st { + int dummy; +} CRYPTO_THREADID; +/* Only use CRYPTO_THREADID_set_[numeric|pointer]() within callbacks */ +# define CRYPTO_THREADID_set_numeric(id, val) +# define CRYPTO_THREADID_set_pointer(id, ptr) +# define CRYPTO_THREADID_set_callback(threadid_func) (0) +# define CRYPTO_THREADID_get_callback() (NULL) +# define CRYPTO_THREADID_current(id) +# define CRYPTO_THREADID_cmp(a, b) (-1) +# define CRYPTO_THREADID_cpy(dest, src) +# define CRYPTO_THREADID_hash(id) (0UL) + +# ifndef OPENSSL_NO_DEPRECATED_1_0_0 +# define CRYPTO_set_id_callback(func) +# define CRYPTO_get_id_callback() (NULL) +# define CRYPTO_thread_id() (0UL) +# endif /* OPENSSL_NO_DEPRECATED_1_0_0 */ + +# define CRYPTO_set_dynlock_create_callback(dyn_create_function) +# define CRYPTO_set_dynlock_lock_callback(dyn_lock_function) +# define CRYPTO_set_dynlock_destroy_callback(dyn_destroy_function) +# define CRYPTO_get_dynlock_create_callback() (NULL) +# define CRYPTO_get_dynlock_lock_callback() (NULL) +# define CRYPTO_get_dynlock_destroy_callback() (NULL) +# endif /* OPENSSL_NO_DEPRECATED_1_1_0 */ + +typedef void *(*CRYPTO_malloc_fn)(size_t num, const char *file, int line); +typedef void *(*CRYPTO_realloc_fn)(void *addr, size_t num, const char *file, + int line); +typedef void (*CRYPTO_free_fn)(void *addr, const char *file, int line); +int CRYPTO_set_mem_functions(CRYPTO_malloc_fn malloc_fn, + CRYPTO_realloc_fn realloc_fn, + CRYPTO_free_fn free_fn); +void CRYPTO_get_mem_functions(CRYPTO_malloc_fn *malloc_fn, + CRYPTO_realloc_fn *realloc_fn, + CRYPTO_free_fn *free_fn); + +OSSL_CRYPTO_ALLOC void *CRYPTO_malloc(size_t num, const char *file, int line); +OSSL_CRYPTO_ALLOC void *CRYPTO_zalloc(size_t num, const char *file, int line); +OSSL_CRYPTO_ALLOC void *CRYPTO_aligned_alloc(size_t num, size_t align, + void **freeptr, const char *file, + int line); +OSSL_CRYPTO_ALLOC void *CRYPTO_memdup(const void *str, size_t siz, const char *file, int line); +OSSL_CRYPTO_ALLOC char *CRYPTO_strdup(const char *str, const char *file, int line); +OSSL_CRYPTO_ALLOC char *CRYPTO_strndup(const char *str, size_t s, const char *file, int line); +void CRYPTO_free(void *ptr, const char *file, int line); +void CRYPTO_clear_free(void *ptr, size_t num, const char *file, int line); +void *CRYPTO_realloc(void *addr, size_t num, const char *file, int line); +void *CRYPTO_clear_realloc(void *addr, size_t old_num, size_t num, + const char *file, int line); + +int CRYPTO_secure_malloc_init(size_t sz, size_t minsize); +int CRYPTO_secure_malloc_done(void); +OSSL_CRYPTO_ALLOC void *CRYPTO_secure_malloc(size_t num, const char *file, int line); +OSSL_CRYPTO_ALLOC void *CRYPTO_secure_zalloc(size_t num, const char *file, int line); +void CRYPTO_secure_free(void *ptr, const char *file, int line); +void CRYPTO_secure_clear_free(void *ptr, size_t num, + const char *file, int line); +int CRYPTO_secure_allocated(const void *ptr); +int CRYPTO_secure_malloc_initialized(void); +size_t CRYPTO_secure_actual_size(void *ptr); +size_t CRYPTO_secure_used(void); + +void OPENSSL_cleanse(void *ptr, size_t len); + +# ifndef OPENSSL_NO_CRYPTO_MDEBUG +/* + * The following can be used to detect memory leaks in the library. If + * used, it turns on malloc checking + */ +# define CRYPTO_MEM_CHECK_OFF 0x0 /* Control only */ +# define CRYPTO_MEM_CHECK_ON 0x1 /* Control and mode bit */ +# define CRYPTO_MEM_CHECK_ENABLE 0x2 /* Control and mode bit */ +# define CRYPTO_MEM_CHECK_DISABLE 0x3 /* Control only */ + +/* max allowed length for value of OPENSSL_MALLOC_FAILURES env var. */ +# define CRYPTO_MEM_CHECK_MAX_FS 256 + +void CRYPTO_get_alloc_counts(int *mcount, int *rcount, int *fcount); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define OPENSSL_mem_debug_push(info) \ + CRYPTO_mem_debug_push(info, OPENSSL_FILE, OPENSSL_LINE) +# define OPENSSL_mem_debug_pop() \ + CRYPTO_mem_debug_pop() +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 int CRYPTO_set_mem_debug(int flag); +OSSL_DEPRECATEDIN_3_0 int CRYPTO_mem_ctrl(int mode); +OSSL_DEPRECATEDIN_3_0 int CRYPTO_mem_debug_push(const char *info, + const char *file, int line); +OSSL_DEPRECATEDIN_3_0 int CRYPTO_mem_debug_pop(void); +OSSL_DEPRECATEDIN_3_0 void CRYPTO_mem_debug_malloc(void *addr, size_t num, + int flag, + const char *file, int line); +OSSL_DEPRECATEDIN_3_0 void CRYPTO_mem_debug_realloc(void *addr1, void *addr2, + size_t num, int flag, + const char *file, int line); +OSSL_DEPRECATEDIN_3_0 void CRYPTO_mem_debug_free(void *addr, int flag, + const char *file, int line); +OSSL_DEPRECATEDIN_3_0 +int CRYPTO_mem_leaks_cb(int (*cb)(const char *str, size_t len, void *u), + void *u); +# endif +# ifndef OPENSSL_NO_STDIO +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 int CRYPTO_mem_leaks_fp(FILE *); +# endif +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 int CRYPTO_mem_leaks(BIO *bio); +# endif +# endif /* OPENSSL_NO_CRYPTO_MDEBUG */ + +/* die if we have to */ +ossl_noreturn void OPENSSL_die(const char *assertion, const char *file, int line); +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define OpenSSLDie(f,l,a) OPENSSL_die((a),(f),(l)) +# endif +# define OPENSSL_assert(e) \ + (void)((e) ? 0 : (OPENSSL_die("assertion failed: " #e, OPENSSL_FILE, OPENSSL_LINE), 1)) + +int OPENSSL_isservice(void); + +void OPENSSL_init(void); +# ifdef OPENSSL_SYS_UNIX +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 void OPENSSL_fork_prepare(void); +OSSL_DEPRECATEDIN_3_0 void OPENSSL_fork_parent(void); +OSSL_DEPRECATEDIN_3_0 void OPENSSL_fork_child(void); +# endif +# endif + +struct tm *OPENSSL_gmtime(const time_t *timer, struct tm *result); +int OPENSSL_gmtime_adj(struct tm *tm, int offset_day, long offset_sec); +int OPENSSL_gmtime_diff(int *pday, int *psec, + const struct tm *from, const struct tm *to); + +/* + * CRYPTO_memcmp returns zero iff the |len| bytes at |a| and |b| are equal. + * It takes an amount of time dependent on |len|, but independent of the + * contents of |a| and |b|. Unlike memcmp, it cannot be used to put elements + * into a defined order as the return value when a != b is undefined, other + * than to be non-zero. + */ +int CRYPTO_memcmp(const void * in_a, const void * in_b, size_t len); + +/* Standard initialisation options */ +# define OPENSSL_INIT_NO_LOAD_CRYPTO_STRINGS 0x00000001L +# define OPENSSL_INIT_LOAD_CRYPTO_STRINGS 0x00000002L +# define OPENSSL_INIT_ADD_ALL_CIPHERS 0x00000004L +# define OPENSSL_INIT_ADD_ALL_DIGESTS 0x00000008L +# define OPENSSL_INIT_NO_ADD_ALL_CIPHERS 0x00000010L +# define OPENSSL_INIT_NO_ADD_ALL_DIGESTS 0x00000020L +# define OPENSSL_INIT_LOAD_CONFIG 0x00000040L +# define OPENSSL_INIT_NO_LOAD_CONFIG 0x00000080L +# define OPENSSL_INIT_ASYNC 0x00000100L +# define OPENSSL_INIT_ENGINE_RDRAND 0x00000200L +# define OPENSSL_INIT_ENGINE_DYNAMIC 0x00000400L +# define OPENSSL_INIT_ENGINE_OPENSSL 0x00000800L +# define OPENSSL_INIT_ENGINE_CRYPTODEV 0x00001000L +# define OPENSSL_INIT_ENGINE_CAPI 0x00002000L +# define OPENSSL_INIT_ENGINE_PADLOCK 0x00004000L +# define OPENSSL_INIT_ENGINE_AFALG 0x00008000L +/* FREE: 0x00010000L */ +# define OPENSSL_INIT_ATFORK 0x00020000L +/* OPENSSL_INIT_BASE_ONLY 0x00040000L */ +# define OPENSSL_INIT_NO_ATEXIT 0x00080000L +/* OPENSSL_INIT flag range 0x03f00000 reserved for OPENSSL_init_ssl() */ +/* FREE: 0x04000000L */ +/* FREE: 0x08000000L */ +/* FREE: 0x10000000L */ +/* FREE: 0x20000000L */ +/* FREE: 0x40000000L */ +/* FREE: 0x80000000L */ +/* Max OPENSSL_INIT flag value is 0x80000000 */ + +/* openssl and dasync not counted as builtin */ +# define OPENSSL_INIT_ENGINE_ALL_BUILTIN \ + (OPENSSL_INIT_ENGINE_RDRAND | OPENSSL_INIT_ENGINE_DYNAMIC \ + | OPENSSL_INIT_ENGINE_CRYPTODEV | OPENSSL_INIT_ENGINE_CAPI | \ + OPENSSL_INIT_ENGINE_PADLOCK) + +/* Library initialisation functions */ +void OPENSSL_cleanup(void); +int OPENSSL_init_crypto(uint64_t opts, const OPENSSL_INIT_SETTINGS *settings); +int OPENSSL_atexit(void (*handler)(void)); +void OPENSSL_thread_stop(void); +void OPENSSL_thread_stop_ex(OSSL_LIB_CTX *ctx); + +/* Low-level control of initialization */ +OPENSSL_INIT_SETTINGS *OPENSSL_INIT_new(void); +# ifndef OPENSSL_NO_STDIO +int OPENSSL_INIT_set_config_filename(OPENSSL_INIT_SETTINGS *settings, + const char *config_filename); +void OPENSSL_INIT_set_config_file_flags(OPENSSL_INIT_SETTINGS *settings, + unsigned long flags); +int OPENSSL_INIT_set_config_appname(OPENSSL_INIT_SETTINGS *settings, + const char *config_appname); +# endif +void OPENSSL_INIT_free(OPENSSL_INIT_SETTINGS *settings); + +# if defined(OPENSSL_THREADS) && !defined(CRYPTO_TDEBUG) +# if defined(_WIN32) +# if defined(BASETYPES) || defined(_WINDEF_H) +/* application has to include in order to use this */ +typedef DWORD CRYPTO_THREAD_LOCAL; +typedef DWORD CRYPTO_THREAD_ID; + +typedef LONG CRYPTO_ONCE; +# define CRYPTO_ONCE_STATIC_INIT 0 +# endif +# else +# if defined(__TANDEM) && defined(_SPT_MODEL_) +# define SPT_THREAD_SIGNAL 1 +# define SPT_THREAD_AWARE 1 +# include +# else +# include +# endif +typedef pthread_once_t CRYPTO_ONCE; +typedef pthread_key_t CRYPTO_THREAD_LOCAL; +typedef pthread_t CRYPTO_THREAD_ID; + +# define CRYPTO_ONCE_STATIC_INIT PTHREAD_ONCE_INIT +# endif +# endif + +# if !defined(CRYPTO_ONCE_STATIC_INIT) +typedef unsigned int CRYPTO_ONCE; +typedef unsigned int CRYPTO_THREAD_LOCAL; +typedef unsigned int CRYPTO_THREAD_ID; +# define CRYPTO_ONCE_STATIC_INIT 0 +# endif + +int CRYPTO_THREAD_run_once(CRYPTO_ONCE *once, void (*init)(void)); + +int CRYPTO_THREAD_init_local(CRYPTO_THREAD_LOCAL *key, void (*cleanup)(void *)); +void *CRYPTO_THREAD_get_local(CRYPTO_THREAD_LOCAL *key); +int CRYPTO_THREAD_set_local(CRYPTO_THREAD_LOCAL *key, void *val); +int CRYPTO_THREAD_cleanup_local(CRYPTO_THREAD_LOCAL *key); + +CRYPTO_THREAD_ID CRYPTO_THREAD_get_current_id(void); +int CRYPTO_THREAD_compare_id(CRYPTO_THREAD_ID a, CRYPTO_THREAD_ID b); + +OSSL_LIB_CTX *OSSL_LIB_CTX_new(void); +OSSL_LIB_CTX *OSSL_LIB_CTX_new_from_dispatch(const OSSL_CORE_HANDLE *handle, + const OSSL_DISPATCH *in); +OSSL_LIB_CTX *OSSL_LIB_CTX_new_child(const OSSL_CORE_HANDLE *handle, + const OSSL_DISPATCH *in); +int OSSL_LIB_CTX_load_config(OSSL_LIB_CTX *ctx, const char *config_file); +void OSSL_LIB_CTX_free(OSSL_LIB_CTX *); +OSSL_LIB_CTX *OSSL_LIB_CTX_get0_global_default(void); +OSSL_LIB_CTX *OSSL_LIB_CTX_set0_default(OSSL_LIB_CTX *libctx); +int OSSL_LIB_CTX_get_conf_diagnostics(OSSL_LIB_CTX *ctx); +void OSSL_LIB_CTX_set_conf_diagnostics(OSSL_LIB_CTX *ctx, int value); + +void OSSL_sleep(uint64_t millis); + + +void *OSSL_LIB_CTX_get_data(OSSL_LIB_CTX *ctx, int index); + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/common/include/openssl/ct.h b/contrib/openssl-cmake/common/include/openssl/ct.h new file mode 100644 index 000000000000..e6dd1192a4e0 --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/ct.h @@ -0,0 +1,573 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/ct.h.in + * + * Copyright 2016-2020 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_CT_H +# define OPENSSL_CT_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_CT_H +# endif + +# include + +# ifndef OPENSSL_NO_CT +# include +# include +# include +# include +# ifdef __cplusplus +extern "C" { +# endif + + +/* Minimum RSA key size, from RFC6962 */ +# define SCT_MIN_RSA_BITS 2048 + +/* All hashes are SHA256 in v1 of Certificate Transparency */ +# define CT_V1_HASHLEN SHA256_DIGEST_LENGTH + +SKM_DEFINE_STACK_OF_INTERNAL(SCT, SCT, SCT) +#define sk_SCT_num(sk) OPENSSL_sk_num(ossl_check_const_SCT_sk_type(sk)) +#define sk_SCT_value(sk, idx) ((SCT *)OPENSSL_sk_value(ossl_check_const_SCT_sk_type(sk), (idx))) +#define sk_SCT_new(cmp) ((STACK_OF(SCT) *)OPENSSL_sk_new(ossl_check_SCT_compfunc_type(cmp))) +#define sk_SCT_new_null() ((STACK_OF(SCT) *)OPENSSL_sk_new_null()) +#define sk_SCT_new_reserve(cmp, n) ((STACK_OF(SCT) *)OPENSSL_sk_new_reserve(ossl_check_SCT_compfunc_type(cmp), (n))) +#define sk_SCT_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_SCT_sk_type(sk), (n)) +#define sk_SCT_free(sk) OPENSSL_sk_free(ossl_check_SCT_sk_type(sk)) +#define sk_SCT_zero(sk) OPENSSL_sk_zero(ossl_check_SCT_sk_type(sk)) +#define sk_SCT_delete(sk, i) ((SCT *)OPENSSL_sk_delete(ossl_check_SCT_sk_type(sk), (i))) +#define sk_SCT_delete_ptr(sk, ptr) ((SCT *)OPENSSL_sk_delete_ptr(ossl_check_SCT_sk_type(sk), ossl_check_SCT_type(ptr))) +#define sk_SCT_push(sk, ptr) OPENSSL_sk_push(ossl_check_SCT_sk_type(sk), ossl_check_SCT_type(ptr)) +#define sk_SCT_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_SCT_sk_type(sk), ossl_check_SCT_type(ptr)) +#define sk_SCT_pop(sk) ((SCT *)OPENSSL_sk_pop(ossl_check_SCT_sk_type(sk))) +#define sk_SCT_shift(sk) ((SCT *)OPENSSL_sk_shift(ossl_check_SCT_sk_type(sk))) +#define sk_SCT_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_SCT_sk_type(sk),ossl_check_SCT_freefunc_type(freefunc)) +#define sk_SCT_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_SCT_sk_type(sk), ossl_check_SCT_type(ptr), (idx)) +#define sk_SCT_set(sk, idx, ptr) ((SCT *)OPENSSL_sk_set(ossl_check_SCT_sk_type(sk), (idx), ossl_check_SCT_type(ptr))) +#define sk_SCT_find(sk, ptr) OPENSSL_sk_find(ossl_check_SCT_sk_type(sk), ossl_check_SCT_type(ptr)) +#define sk_SCT_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_SCT_sk_type(sk), ossl_check_SCT_type(ptr)) +#define sk_SCT_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_SCT_sk_type(sk), ossl_check_SCT_type(ptr), pnum) +#define sk_SCT_sort(sk) OPENSSL_sk_sort(ossl_check_SCT_sk_type(sk)) +#define sk_SCT_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_SCT_sk_type(sk)) +#define sk_SCT_dup(sk) ((STACK_OF(SCT) *)OPENSSL_sk_dup(ossl_check_const_SCT_sk_type(sk))) +#define sk_SCT_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(SCT) *)OPENSSL_sk_deep_copy(ossl_check_const_SCT_sk_type(sk), ossl_check_SCT_copyfunc_type(copyfunc), ossl_check_SCT_freefunc_type(freefunc))) +#define sk_SCT_set_cmp_func(sk, cmp) ((sk_SCT_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_SCT_sk_type(sk), ossl_check_SCT_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(CTLOG, CTLOG, CTLOG) +#define sk_CTLOG_num(sk) OPENSSL_sk_num(ossl_check_const_CTLOG_sk_type(sk)) +#define sk_CTLOG_value(sk, idx) ((CTLOG *)OPENSSL_sk_value(ossl_check_const_CTLOG_sk_type(sk), (idx))) +#define sk_CTLOG_new(cmp) ((STACK_OF(CTLOG) *)OPENSSL_sk_new(ossl_check_CTLOG_compfunc_type(cmp))) +#define sk_CTLOG_new_null() ((STACK_OF(CTLOG) *)OPENSSL_sk_new_null()) +#define sk_CTLOG_new_reserve(cmp, n) ((STACK_OF(CTLOG) *)OPENSSL_sk_new_reserve(ossl_check_CTLOG_compfunc_type(cmp), (n))) +#define sk_CTLOG_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_CTLOG_sk_type(sk), (n)) +#define sk_CTLOG_free(sk) OPENSSL_sk_free(ossl_check_CTLOG_sk_type(sk)) +#define sk_CTLOG_zero(sk) OPENSSL_sk_zero(ossl_check_CTLOG_sk_type(sk)) +#define sk_CTLOG_delete(sk, i) ((CTLOG *)OPENSSL_sk_delete(ossl_check_CTLOG_sk_type(sk), (i))) +#define sk_CTLOG_delete_ptr(sk, ptr) ((CTLOG *)OPENSSL_sk_delete_ptr(ossl_check_CTLOG_sk_type(sk), ossl_check_CTLOG_type(ptr))) +#define sk_CTLOG_push(sk, ptr) OPENSSL_sk_push(ossl_check_CTLOG_sk_type(sk), ossl_check_CTLOG_type(ptr)) +#define sk_CTLOG_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_CTLOG_sk_type(sk), ossl_check_CTLOG_type(ptr)) +#define sk_CTLOG_pop(sk) ((CTLOG *)OPENSSL_sk_pop(ossl_check_CTLOG_sk_type(sk))) +#define sk_CTLOG_shift(sk) ((CTLOG *)OPENSSL_sk_shift(ossl_check_CTLOG_sk_type(sk))) +#define sk_CTLOG_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_CTLOG_sk_type(sk),ossl_check_CTLOG_freefunc_type(freefunc)) +#define sk_CTLOG_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_CTLOG_sk_type(sk), ossl_check_CTLOG_type(ptr), (idx)) +#define sk_CTLOG_set(sk, idx, ptr) ((CTLOG *)OPENSSL_sk_set(ossl_check_CTLOG_sk_type(sk), (idx), ossl_check_CTLOG_type(ptr))) +#define sk_CTLOG_find(sk, ptr) OPENSSL_sk_find(ossl_check_CTLOG_sk_type(sk), ossl_check_CTLOG_type(ptr)) +#define sk_CTLOG_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_CTLOG_sk_type(sk), ossl_check_CTLOG_type(ptr)) +#define sk_CTLOG_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_CTLOG_sk_type(sk), ossl_check_CTLOG_type(ptr), pnum) +#define sk_CTLOG_sort(sk) OPENSSL_sk_sort(ossl_check_CTLOG_sk_type(sk)) +#define sk_CTLOG_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_CTLOG_sk_type(sk)) +#define sk_CTLOG_dup(sk) ((STACK_OF(CTLOG) *)OPENSSL_sk_dup(ossl_check_const_CTLOG_sk_type(sk))) +#define sk_CTLOG_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(CTLOG) *)OPENSSL_sk_deep_copy(ossl_check_const_CTLOG_sk_type(sk), ossl_check_CTLOG_copyfunc_type(copyfunc), ossl_check_CTLOG_freefunc_type(freefunc))) +#define sk_CTLOG_set_cmp_func(sk, cmp) ((sk_CTLOG_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_CTLOG_sk_type(sk), ossl_check_CTLOG_compfunc_type(cmp))) + + + +typedef enum { + CT_LOG_ENTRY_TYPE_NOT_SET = -1, + CT_LOG_ENTRY_TYPE_X509 = 0, + CT_LOG_ENTRY_TYPE_PRECERT = 1 +} ct_log_entry_type_t; + +typedef enum { + SCT_VERSION_NOT_SET = -1, + SCT_VERSION_V1 = 0 +} sct_version_t; + +typedef enum { + SCT_SOURCE_UNKNOWN, + SCT_SOURCE_TLS_EXTENSION, + SCT_SOURCE_X509V3_EXTENSION, + SCT_SOURCE_OCSP_STAPLED_RESPONSE +} sct_source_t; + +typedef enum { + SCT_VALIDATION_STATUS_NOT_SET, + SCT_VALIDATION_STATUS_UNKNOWN_LOG, + SCT_VALIDATION_STATUS_VALID, + SCT_VALIDATION_STATUS_INVALID, + SCT_VALIDATION_STATUS_UNVERIFIED, + SCT_VALIDATION_STATUS_UNKNOWN_VERSION +} sct_validation_status_t; + +/****************************************** + * CT policy evaluation context functions * + ******************************************/ + +/* + * Creates a new, empty policy evaluation context associated with the given + * library context and property query string. + * The caller is responsible for calling CT_POLICY_EVAL_CTX_free when finished + * with the CT_POLICY_EVAL_CTX. + */ +CT_POLICY_EVAL_CTX *CT_POLICY_EVAL_CTX_new_ex(OSSL_LIB_CTX *libctx, + const char *propq); + +/* + * The same as CT_POLICY_EVAL_CTX_new_ex() but the default library + * context and property query string is used. + */ +CT_POLICY_EVAL_CTX *CT_POLICY_EVAL_CTX_new(void); + +/* Deletes a policy evaluation context and anything it owns. */ +void CT_POLICY_EVAL_CTX_free(CT_POLICY_EVAL_CTX *ctx); + +/* Gets the peer certificate that the SCTs are for */ +X509* CT_POLICY_EVAL_CTX_get0_cert(const CT_POLICY_EVAL_CTX *ctx); + +/* + * Sets the certificate associated with the received SCTs. + * Increments the reference count of cert. + * Returns 1 on success, 0 otherwise. + */ +int CT_POLICY_EVAL_CTX_set1_cert(CT_POLICY_EVAL_CTX *ctx, X509 *cert); + +/* Gets the issuer of the aforementioned certificate */ +X509* CT_POLICY_EVAL_CTX_get0_issuer(const CT_POLICY_EVAL_CTX *ctx); + +/* + * Sets the issuer of the certificate associated with the received SCTs. + * Increments the reference count of issuer. + * Returns 1 on success, 0 otherwise. + */ +int CT_POLICY_EVAL_CTX_set1_issuer(CT_POLICY_EVAL_CTX *ctx, X509 *issuer); + +/* Gets the CT logs that are trusted sources of SCTs */ +const CTLOG_STORE *CT_POLICY_EVAL_CTX_get0_log_store(const CT_POLICY_EVAL_CTX *ctx); + +/* Sets the log store that is in use. It must outlive the CT_POLICY_EVAL_CTX. */ +void CT_POLICY_EVAL_CTX_set_shared_CTLOG_STORE(CT_POLICY_EVAL_CTX *ctx, + CTLOG_STORE *log_store); + +/* + * Gets the time, in milliseconds since the Unix epoch, that will be used as the + * current time when checking whether an SCT was issued in the future. + * Such SCTs will fail validation, as required by RFC6962. + */ +uint64_t CT_POLICY_EVAL_CTX_get_time(const CT_POLICY_EVAL_CTX *ctx); + +/* + * Sets the time to evaluate SCTs against, in milliseconds since the Unix epoch. + * If an SCT's timestamp is after this time, it will be interpreted as having + * been issued in the future. RFC6962 states that "TLS clients MUST reject SCTs + * whose timestamp is in the future", so an SCT will not validate in this case. + */ +void CT_POLICY_EVAL_CTX_set_time(CT_POLICY_EVAL_CTX *ctx, uint64_t time_in_ms); + +/***************** + * SCT functions * + *****************/ + +/* + * Creates a new, blank SCT. + * The caller is responsible for calling SCT_free when finished with the SCT. + */ +SCT *SCT_new(void); + +/* + * Creates a new SCT from some base64-encoded strings. + * The caller is responsible for calling SCT_free when finished with the SCT. + */ +SCT *SCT_new_from_base64(unsigned char version, + const char *logid_base64, + ct_log_entry_type_t entry_type, + uint64_t timestamp, + const char *extensions_base64, + const char *signature_base64); + +/* + * Frees the SCT and the underlying data structures. + */ +void SCT_free(SCT *sct); + +/* + * Free a stack of SCTs, and the underlying SCTs themselves. + * Intended to be compatible with X509V3_EXT_FREE. + */ +void SCT_LIST_free(STACK_OF(SCT) *a); + +/* + * Returns the version of the SCT. + */ +sct_version_t SCT_get_version(const SCT *sct); + +/* + * Set the version of an SCT. + * Returns 1 on success, 0 if the version is unrecognized. + */ +__owur int SCT_set_version(SCT *sct, sct_version_t version); + +/* + * Returns the log entry type of the SCT. + */ +ct_log_entry_type_t SCT_get_log_entry_type(const SCT *sct); + +/* + * Set the log entry type of an SCT. + * Returns 1 on success, 0 otherwise. + */ +__owur int SCT_set_log_entry_type(SCT *sct, ct_log_entry_type_t entry_type); + +/* + * Gets the ID of the log that an SCT came from. + * Ownership of the log ID remains with the SCT. + * Returns the length of the log ID. + */ +size_t SCT_get0_log_id(const SCT *sct, unsigned char **log_id); + +/* + * Set the log ID of an SCT to point directly to the *log_id specified. + * The SCT takes ownership of the specified pointer. + * Returns 1 on success, 0 otherwise. + */ +__owur int SCT_set0_log_id(SCT *sct, unsigned char *log_id, size_t log_id_len); + +/* + * Set the log ID of an SCT. + * This makes a copy of the log_id. + * Returns 1 on success, 0 otherwise. + */ +__owur int SCT_set1_log_id(SCT *sct, const unsigned char *log_id, + size_t log_id_len); + +/* + * Returns the timestamp for the SCT (epoch time in milliseconds). + */ +uint64_t SCT_get_timestamp(const SCT *sct); + +/* + * Set the timestamp of an SCT (epoch time in milliseconds). + */ +void SCT_set_timestamp(SCT *sct, uint64_t timestamp); + +/* + * Return the NID for the signature used by the SCT. + * For CT v1, this will be either NID_sha256WithRSAEncryption or + * NID_ecdsa_with_SHA256 (or NID_undef if incorrect/unset). + */ +int SCT_get_signature_nid(const SCT *sct); + +/* + * Set the signature type of an SCT + * For CT v1, this should be either NID_sha256WithRSAEncryption or + * NID_ecdsa_with_SHA256. + * Returns 1 on success, 0 otherwise. + */ +__owur int SCT_set_signature_nid(SCT *sct, int nid); + +/* + * Set *ext to point to the extension data for the SCT. ext must not be NULL. + * The SCT retains ownership of this pointer. + * Returns length of the data pointed to. + */ +size_t SCT_get0_extensions(const SCT *sct, unsigned char **ext); + +/* + * Set the extensions of an SCT to point directly to the *ext specified. + * The SCT takes ownership of the specified pointer. + */ +void SCT_set0_extensions(SCT *sct, unsigned char *ext, size_t ext_len); + +/* + * Set the extensions of an SCT. + * This takes a copy of the ext. + * Returns 1 on success, 0 otherwise. + */ +__owur int SCT_set1_extensions(SCT *sct, const unsigned char *ext, + size_t ext_len); + +/* + * Set *sig to point to the signature for the SCT. sig must not be NULL. + * The SCT retains ownership of this pointer. + * Returns length of the data pointed to. + */ +size_t SCT_get0_signature(const SCT *sct, unsigned char **sig); + +/* + * Set the signature of an SCT to point directly to the *sig specified. + * The SCT takes ownership of the specified pointer. + */ +void SCT_set0_signature(SCT *sct, unsigned char *sig, size_t sig_len); + +/* + * Set the signature of an SCT to be a copy of the *sig specified. + * Returns 1 on success, 0 otherwise. + */ +__owur int SCT_set1_signature(SCT *sct, const unsigned char *sig, + size_t sig_len); + +/* + * The origin of this SCT, e.g. TLS extension, OCSP response, etc. + */ +sct_source_t SCT_get_source(const SCT *sct); + +/* + * Set the origin of this SCT, e.g. TLS extension, OCSP response, etc. + * Returns 1 on success, 0 otherwise. + */ +__owur int SCT_set_source(SCT *sct, sct_source_t source); + +/* + * Returns a text string describing the validation status of |sct|. + */ +const char *SCT_validation_status_string(const SCT *sct); + +/* + * Pretty-prints an |sct| to |out|. + * It will be indented by the number of spaces specified by |indent|. + * If |logs| is not NULL, it will be used to lookup the CT log that the SCT came + * from, so that the log name can be printed. + */ +void SCT_print(const SCT *sct, BIO *out, int indent, const CTLOG_STORE *logs); + +/* + * Pretty-prints an |sct_list| to |out|. + * It will be indented by the number of spaces specified by |indent|. + * SCTs will be delimited by |separator|. + * If |logs| is not NULL, it will be used to lookup the CT log that each SCT + * came from, so that the log names can be printed. + */ +void SCT_LIST_print(const STACK_OF(SCT) *sct_list, BIO *out, int indent, + const char *separator, const CTLOG_STORE *logs); + +/* + * Gets the last result of validating this SCT. + * If it has not been validated yet, returns SCT_VALIDATION_STATUS_NOT_SET. + */ +sct_validation_status_t SCT_get_validation_status(const SCT *sct); + +/* + * Validates the given SCT with the provided context. + * Sets the "validation_status" field of the SCT. + * Returns 1 if the SCT is valid and the signature verifies. + * Returns 0 if the SCT is invalid or could not be verified. + * Returns -1 if an error occurs. + */ +__owur int SCT_validate(SCT *sct, const CT_POLICY_EVAL_CTX *ctx); + +/* + * Validates the given list of SCTs with the provided context. + * Sets the "validation_status" field of each SCT. + * Returns 1 if there are no invalid SCTs and all signatures verify. + * Returns 0 if at least one SCT is invalid or could not be verified. + * Returns a negative integer if an error occurs. + */ +__owur int SCT_LIST_validate(const STACK_OF(SCT) *scts, + CT_POLICY_EVAL_CTX *ctx); + + +/********************************* + * SCT parsing and serialization * + *********************************/ + +/* + * Serialize (to TLS format) a stack of SCTs and return the length. + * "a" must not be NULL. + * If "pp" is NULL, just return the length of what would have been serialized. + * If "pp" is not NULL and "*pp" is null, function will allocate a new pointer + * for data that caller is responsible for freeing (only if function returns + * successfully). + * If "pp" is NULL and "*pp" is not NULL, caller is responsible for ensuring + * that "*pp" is large enough to accept all of the serialized data. + * Returns < 0 on error, >= 0 indicating bytes written (or would have been) + * on success. + */ +__owur int i2o_SCT_LIST(const STACK_OF(SCT) *a, unsigned char **pp); + +/* + * Convert TLS format SCT list to a stack of SCTs. + * If "a" or "*a" is NULL, a new stack will be created that the caller is + * responsible for freeing (by calling SCT_LIST_free). + * "**pp" and "*pp" must not be NULL. + * Upon success, "*pp" will point to after the last bytes read, and a stack + * will be returned. + * Upon failure, a NULL pointer will be returned, and the position of "*pp" is + * not defined. + */ +STACK_OF(SCT) *o2i_SCT_LIST(STACK_OF(SCT) **a, const unsigned char **pp, + size_t len); + +/* + * Serialize (to DER format) a stack of SCTs and return the length. + * "a" must not be NULL. + * If "pp" is NULL, just returns the length of what would have been serialized. + * If "pp" is not NULL and "*pp" is null, function will allocate a new pointer + * for data that caller is responsible for freeing (only if function returns + * successfully). + * If "pp" is NULL and "*pp" is not NULL, caller is responsible for ensuring + * that "*pp" is large enough to accept all of the serialized data. + * Returns < 0 on error, >= 0 indicating bytes written (or would have been) + * on success. + */ +__owur int i2d_SCT_LIST(const STACK_OF(SCT) *a, unsigned char **pp); + +/* + * Parses an SCT list in DER format and returns it. + * If "a" or "*a" is NULL, a new stack will be created that the caller is + * responsible for freeing (by calling SCT_LIST_free). + * "**pp" and "*pp" must not be NULL. + * Upon success, "*pp" will point to after the last bytes read, and a stack + * will be returned. + * Upon failure, a NULL pointer will be returned, and the position of "*pp" is + * not defined. + */ +STACK_OF(SCT) *d2i_SCT_LIST(STACK_OF(SCT) **a, const unsigned char **pp, + long len); + +/* + * Serialize (to TLS format) an |sct| and write it to |out|. + * If |out| is null, no SCT will be output but the length will still be returned. + * If |out| points to a null pointer, a string will be allocated to hold the + * TLS-format SCT. It is the responsibility of the caller to free it. + * If |out| points to an allocated string, the TLS-format SCT will be written + * to it. + * The length of the SCT in TLS format will be returned. + */ +__owur int i2o_SCT(const SCT *sct, unsigned char **out); + +/* + * Parses an SCT in TLS format and returns it. + * If |psct| is not null, it will end up pointing to the parsed SCT. If it + * already points to a non-null pointer, the pointer will be free'd. + * |in| should be a pointer to a string containing the TLS-format SCT. + * |in| will be advanced to the end of the SCT if parsing succeeds. + * |len| should be the length of the SCT in |in|. + * Returns NULL if an error occurs. + * If the SCT is an unsupported version, only the SCT's 'sct' and 'sct_len' + * fields will be populated (with |in| and |len| respectively). + */ +SCT *o2i_SCT(SCT **psct, const unsigned char **in, size_t len); + +/******************** + * CT log functions * + ********************/ + +/* + * Creates a new CT log instance with the given |public_key| and |name| and + * associates it with the give library context |libctx| and property query + * string |propq|. + * Takes ownership of |public_key| but copies |name|. + * Returns NULL if malloc fails or if |public_key| cannot be converted to DER. + * Should be deleted by the caller using CTLOG_free when no longer needed. + */ +CTLOG *CTLOG_new_ex(EVP_PKEY *public_key, const char *name, OSSL_LIB_CTX *libctx, + const char *propq); + +/* + * The same as CTLOG_new_ex except that the default library context and + * property query string are used. + */ +CTLOG *CTLOG_new(EVP_PKEY *public_key, const char *name); + +/* + * Creates a new CTLOG instance with the base64-encoded SubjectPublicKeyInfo DER + * in |pkey_base64| and associated with the given library context |libctx| and + * property query string |propq|. The |name| is a string to help users identify + * this log. + * Returns 1 on success, 0 on failure. + * Should be deleted by the caller using CTLOG_free when no longer needed. + */ +int CTLOG_new_from_base64_ex(CTLOG **ct_log, const char *pkey_base64, + const char *name, OSSL_LIB_CTX *libctx, + const char *propq); + +/* + * The same as CTLOG_new_from_base64_ex() except that the default + * library context and property query string are used. + * Returns 1 on success, 0 on failure. + */ +int CTLOG_new_from_base64(CTLOG ** ct_log, + const char *pkey_base64, const char *name); + +/* + * Deletes a CT log instance and its fields. + */ +void CTLOG_free(CTLOG *log); + +/* Gets the name of the CT log */ +const char *CTLOG_get0_name(const CTLOG *log); +/* Gets the ID of the CT log */ +void CTLOG_get0_log_id(const CTLOG *log, const uint8_t **log_id, + size_t *log_id_len); +/* Gets the public key of the CT log */ +EVP_PKEY *CTLOG_get0_public_key(const CTLOG *log); + +/************************** + * CT log store functions * + **************************/ + +/* + * Creates a new CT log store and associates it with the given libctx and + * property query string. + * Should be deleted by the caller using CTLOG_STORE_free when no longer needed. + */ +CTLOG_STORE *CTLOG_STORE_new_ex(OSSL_LIB_CTX *libctx, const char *propq); + +/* + * Same as CTLOG_STORE_new_ex except that the default libctx and + * property query string are used. + * Should be deleted by the caller using CTLOG_STORE_free when no longer needed. + */ +CTLOG_STORE *CTLOG_STORE_new(void); + +/* + * Deletes a CT log store and all of the CT log instances held within. + */ +void CTLOG_STORE_free(CTLOG_STORE *store); + +/* + * Finds a CT log in the store based on its log ID. + * Returns the CT log, or NULL if no match is found. + */ +const CTLOG *CTLOG_STORE_get0_log_by_id(const CTLOG_STORE *store, + const uint8_t *log_id, + size_t log_id_len); + +/* + * Loads a CT log list into a |store| from a |file|. + * Returns 1 if loading is successful, or 0 otherwise. + */ +__owur int CTLOG_STORE_load_file(CTLOG_STORE *store, const char *file); + +/* + * Loads the default CT log list into a |store|. + * Returns 1 if loading is successful, or 0 otherwise. + */ +__owur int CTLOG_STORE_load_default_file(CTLOG_STORE *store); + +# ifdef __cplusplus +} +# endif +# endif +#endif diff --git a/contrib/openssl-cmake/common/include/openssl/err.h b/contrib/openssl-cmake/common/include/openssl/err.h new file mode 100644 index 000000000000..daca18e7b757 --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/err.h @@ -0,0 +1,512 @@ +/* + * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_ERR_H +# define OPENSSL_ERR_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_ERR_H +# endif + +# include + +# ifndef OPENSSL_NO_STDIO +# include +# include +# endif + +# include +# include +# include +# include + +#ifdef __cplusplus +extern "C" { +#endif + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# ifndef OPENSSL_NO_FILENAMES +# define ERR_PUT_error(l,f,r,fn,ln) ERR_put_error(l,f,r,fn,ln) +# else +# define ERR_PUT_error(l,f,r,fn,ln) ERR_put_error(l,f,r,NULL,0) +# endif +# endif + +# include +# include + +# define ERR_TXT_MALLOCED 0x01 +# define ERR_TXT_STRING 0x02 + +# if !defined(OPENSSL_NO_DEPRECATED_3_0) || defined(OSSL_FORCE_ERR_STATE) +# define ERR_FLAG_MARK 0x01 +# define ERR_FLAG_CLEAR 0x02 + +# define ERR_NUM_ERRORS 16 +struct err_state_st { + int err_flags[ERR_NUM_ERRORS]; + int err_marks[ERR_NUM_ERRORS]; + unsigned long err_buffer[ERR_NUM_ERRORS]; + char *err_data[ERR_NUM_ERRORS]; + size_t err_data_size[ERR_NUM_ERRORS]; + int err_data_flags[ERR_NUM_ERRORS]; + char *err_file[ERR_NUM_ERRORS]; + int err_line[ERR_NUM_ERRORS]; + char *err_func[ERR_NUM_ERRORS]; + int top, bottom; +}; +# endif + +/* library */ +# define ERR_LIB_NONE 1 +# define ERR_LIB_SYS 2 +# define ERR_LIB_BN 3 +# define ERR_LIB_RSA 4 +# define ERR_LIB_DH 5 +# define ERR_LIB_EVP 6 +# define ERR_LIB_BUF 7 +# define ERR_LIB_OBJ 8 +# define ERR_LIB_PEM 9 +# define ERR_LIB_DSA 10 +# define ERR_LIB_X509 11 +/* #define ERR_LIB_METH 12 */ +# define ERR_LIB_ASN1 13 +# define ERR_LIB_CONF 14 +# define ERR_LIB_CRYPTO 15 +# define ERR_LIB_EC 16 +# define ERR_LIB_SSL 20 +/* #define ERR_LIB_SSL23 21 */ +/* #define ERR_LIB_SSL2 22 */ +/* #define ERR_LIB_SSL3 23 */ +/* #define ERR_LIB_RSAREF 30 */ +/* #define ERR_LIB_PROXY 31 */ +# define ERR_LIB_BIO 32 +# define ERR_LIB_PKCS7 33 +# define ERR_LIB_X509V3 34 +# define ERR_LIB_PKCS12 35 +# define ERR_LIB_RAND 36 +# define ERR_LIB_DSO 37 +# define ERR_LIB_ENGINE 38 +# define ERR_LIB_OCSP 39 +# define ERR_LIB_UI 40 +# define ERR_LIB_COMP 41 +# define ERR_LIB_ECDSA 42 +# define ERR_LIB_ECDH 43 +# define ERR_LIB_OSSL_STORE 44 +# define ERR_LIB_FIPS 45 +# define ERR_LIB_CMS 46 +# define ERR_LIB_TS 47 +# define ERR_LIB_HMAC 48 +/* # define ERR_LIB_JPAKE 49 */ +# define ERR_LIB_CT 50 +# define ERR_LIB_ASYNC 51 +# define ERR_LIB_KDF 52 +# define ERR_LIB_SM2 53 +# define ERR_LIB_ESS 54 +# define ERR_LIB_PROP 55 +# define ERR_LIB_CRMF 56 +# define ERR_LIB_PROV 57 +# define ERR_LIB_CMP 58 +# define ERR_LIB_OSSL_ENCODER 59 +# define ERR_LIB_OSSL_DECODER 60 +# define ERR_LIB_HTTP 61 + +# define ERR_LIB_USER 128 + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define ASN1err(f, r) ERR_raise_data(ERR_LIB_ASN1, (r), NULL) +# define ASYNCerr(f, r) ERR_raise_data(ERR_LIB_ASYNC, (r), NULL) +# define BIOerr(f, r) ERR_raise_data(ERR_LIB_BIO, (r), NULL) +# define BNerr(f, r) ERR_raise_data(ERR_LIB_BN, (r), NULL) +# define BUFerr(f, r) ERR_raise_data(ERR_LIB_BUF, (r), NULL) +# define CMPerr(f, r) ERR_raise_data(ERR_LIB_CMP, (r), NULL) +# define CMSerr(f, r) ERR_raise_data(ERR_LIB_CMS, (r), NULL) +# define COMPerr(f, r) ERR_raise_data(ERR_LIB_COMP, (r), NULL) +# define CONFerr(f, r) ERR_raise_data(ERR_LIB_CONF, (r), NULL) +# define CRMFerr(f, r) ERR_raise_data(ERR_LIB_CRMF, (r), NULL) +# define CRYPTOerr(f, r) ERR_raise_data(ERR_LIB_CRYPTO, (r), NULL) +# define CTerr(f, r) ERR_raise_data(ERR_LIB_CT, (r), NULL) +# define DHerr(f, r) ERR_raise_data(ERR_LIB_DH, (r), NULL) +# define DSAerr(f, r) ERR_raise_data(ERR_LIB_DSA, (r), NULL) +# define DSOerr(f, r) ERR_raise_data(ERR_LIB_DSO, (r), NULL) +# define ECDHerr(f, r) ERR_raise_data(ERR_LIB_ECDH, (r), NULL) +# define ECDSAerr(f, r) ERR_raise_data(ERR_LIB_ECDSA, (r), NULL) +# define ECerr(f, r) ERR_raise_data(ERR_LIB_EC, (r), NULL) +# define ENGINEerr(f, r) ERR_raise_data(ERR_LIB_ENGINE, (r), NULL) +# define ESSerr(f, r) ERR_raise_data(ERR_LIB_ESS, (r), NULL) +# define EVPerr(f, r) ERR_raise_data(ERR_LIB_EVP, (r), NULL) +# define FIPSerr(f, r) ERR_raise_data(ERR_LIB_FIPS, (r), NULL) +# define HMACerr(f, r) ERR_raise_data(ERR_LIB_HMAC, (r), NULL) +# define HTTPerr(f, r) ERR_raise_data(ERR_LIB_HTTP, (r), NULL) +# define KDFerr(f, r) ERR_raise_data(ERR_LIB_KDF, (r), NULL) +# define OBJerr(f, r) ERR_raise_data(ERR_LIB_OBJ, (r), NULL) +# define OCSPerr(f, r) ERR_raise_data(ERR_LIB_OCSP, (r), NULL) +# define OSSL_STOREerr(f, r) ERR_raise_data(ERR_LIB_OSSL_STORE, (r), NULL) +# define PEMerr(f, r) ERR_raise_data(ERR_LIB_PEM, (r), NULL) +# define PKCS12err(f, r) ERR_raise_data(ERR_LIB_PKCS12, (r), NULL) +# define PKCS7err(f, r) ERR_raise_data(ERR_LIB_PKCS7, (r), NULL) +# define PROPerr(f, r) ERR_raise_data(ERR_LIB_PROP, (r), NULL) +# define PROVerr(f, r) ERR_raise_data(ERR_LIB_PROV, (r), NULL) +# define RANDerr(f, r) ERR_raise_data(ERR_LIB_RAND, (r), NULL) +# define RSAerr(f, r) ERR_raise_data(ERR_LIB_RSA, (r), NULL) +# define KDFerr(f, r) ERR_raise_data(ERR_LIB_KDF, (r), NULL) +# define SM2err(f, r) ERR_raise_data(ERR_LIB_SM2, (r), NULL) +# define SSLerr(f, r) ERR_raise_data(ERR_LIB_SSL, (r), NULL) +# define SYSerr(f, r) ERR_raise_data(ERR_LIB_SYS, (r), NULL) +# define TSerr(f, r) ERR_raise_data(ERR_LIB_TS, (r), NULL) +# define UIerr(f, r) ERR_raise_data(ERR_LIB_UI, (r), NULL) +# define X509V3err(f, r) ERR_raise_data(ERR_LIB_X509V3, (r), NULL) +# define X509err(f, r) ERR_raise_data(ERR_LIB_X509, (r), NULL) +# endif + +/*- + * The error code packs differently depending on if it records a system + * error or an OpenSSL error. + * + * A system error packs like this (we follow POSIX and only allow positive + * numbers that fit in an |int|): + * + * +-+-------------------------------------------------------------+ + * |1| system error number | + * +-+-------------------------------------------------------------+ + * + * An OpenSSL error packs like this: + * + * <---------------------------- 32 bits --------------------------> + * <--- 8 bits ---><------------------ 23 bits -----------------> + * +-+---------------+---------------------------------------------+ + * |0| library | reason | + * +-+---------------+---------------------------------------------+ + * + * A few of the reason bits are reserved as flags with special meaning: + * + * <5 bits-<>--------- 19 bits -----------------> + * +-------+-+-----------------------------------+ + * | rflags| | reason | + * +-------+-+-----------------------------------+ + * ^ + * | + * ERR_RFLAG_FATAL = ERR_R_FATAL + * + * The reason flags are part of the overall reason code for practical + * reasons, as they provide an easy way to place different types of + * reason codes in different numeric ranges. + * + * The currently known reason flags are: + * + * ERR_RFLAG_FATAL Flags that the reason code is considered fatal. + * For backward compatibility reasons, this flag + * is also the code for ERR_R_FATAL (that reason + * code served the dual purpose of flag and reason + * code in one in pre-3.0 OpenSSL). + * ERR_RFLAG_COMMON Flags that the reason code is common to all + * libraries. All ERR_R_ macros must use this flag, + * and no other _R_ macro is allowed to use it. + */ + +/* Macros to help decode recorded system errors */ +# define ERR_SYSTEM_FLAG ((unsigned int)INT_MAX + 1) +# define ERR_SYSTEM_MASK ((unsigned int)INT_MAX) + +/* + * Macros to help decode recorded OpenSSL errors + * As expressed above, RFLAGS and REASON overlap by one bit to allow + * ERR_R_FATAL to use ERR_RFLAG_FATAL as its reason code. + */ +# define ERR_LIB_OFFSET 23L +# define ERR_LIB_MASK 0xFF +# define ERR_RFLAGS_OFFSET 18L +# define ERR_RFLAGS_MASK 0x1F +# define ERR_REASON_MASK 0X7FFFFF + +/* + * Reason flags are defined pre-shifted to easily combine with the reason + * number. + */ +# define ERR_RFLAG_FATAL (0x1 << ERR_RFLAGS_OFFSET) +# define ERR_RFLAG_COMMON (0x2 << ERR_RFLAGS_OFFSET) + +# define ERR_SYSTEM_ERROR(errcode) (((errcode) & ERR_SYSTEM_FLAG) != 0) + +static ossl_unused ossl_inline int ERR_GET_LIB(unsigned long errcode) +{ + if (ERR_SYSTEM_ERROR(errcode)) + return ERR_LIB_SYS; + return (errcode >> ERR_LIB_OFFSET) & ERR_LIB_MASK; +} + +static ossl_unused ossl_inline int ERR_GET_RFLAGS(unsigned long errcode) +{ + if (ERR_SYSTEM_ERROR(errcode)) + return 0; + return errcode & (ERR_RFLAGS_MASK << ERR_RFLAGS_OFFSET); +} + +static ossl_unused ossl_inline int ERR_GET_REASON(unsigned long errcode) +{ + if (ERR_SYSTEM_ERROR(errcode)) + return errcode & ERR_SYSTEM_MASK; + return errcode & ERR_REASON_MASK; +} + +static ossl_unused ossl_inline int ERR_FATAL_ERROR(unsigned long errcode) +{ + return (ERR_GET_RFLAGS(errcode) & ERR_RFLAG_FATAL) != 0; +} + +static ossl_unused ossl_inline int ERR_COMMON_ERROR(unsigned long errcode) +{ + return (ERR_GET_RFLAGS(errcode) & ERR_RFLAG_COMMON) != 0; +} + +/* + * ERR_PACK is a helper macro to properly pack OpenSSL error codes and may + * only be used for that purpose. System errors are packed internally. + * ERR_PACK takes reason flags and reason code combined in |reason|. + * ERR_PACK ignores |func|, that parameter is just legacy from pre-3.0 OpenSSL. + */ +# define ERR_PACK(lib,func,reason) \ + ( (((unsigned long)(lib) & ERR_LIB_MASK ) << ERR_LIB_OFFSET) | \ + (((unsigned long)(reason) & ERR_REASON_MASK)) ) + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define SYS_F_FOPEN 0 +# define SYS_F_CONNECT 0 +# define SYS_F_GETSERVBYNAME 0 +# define SYS_F_SOCKET 0 +# define SYS_F_IOCTLSOCKET 0 +# define SYS_F_BIND 0 +# define SYS_F_LISTEN 0 +# define SYS_F_ACCEPT 0 +# define SYS_F_WSASTARTUP 0 +# define SYS_F_OPENDIR 0 +# define SYS_F_FREAD 0 +# define SYS_F_GETADDRINFO 0 +# define SYS_F_GETNAMEINFO 0 +# define SYS_F_SETSOCKOPT 0 +# define SYS_F_GETSOCKOPT 0 +# define SYS_F_GETSOCKNAME 0 +# define SYS_F_GETHOSTBYNAME 0 +# define SYS_F_FFLUSH 0 +# define SYS_F_OPEN 0 +# define SYS_F_CLOSE 0 +# define SYS_F_IOCTL 0 +# define SYS_F_STAT 0 +# define SYS_F_FCNTL 0 +# define SYS_F_FSTAT 0 +# define SYS_F_SENDFILE 0 +# endif + +/* + * All ERR_R_ codes must be combined with ERR_RFLAG_COMMON. + */ + +/* "we came from here" global reason codes, range 1..255 */ +# define ERR_R_SYS_LIB (ERR_LIB_SYS/* 2 */ | ERR_RFLAG_COMMON) +# define ERR_R_BN_LIB (ERR_LIB_BN/* 3 */ | ERR_RFLAG_COMMON) +# define ERR_R_RSA_LIB (ERR_LIB_RSA/* 4 */ | ERR_RFLAG_COMMON) +# define ERR_R_DH_LIB (ERR_LIB_DH/* 5 */ | ERR_RFLAG_COMMON) +# define ERR_R_EVP_LIB (ERR_LIB_EVP/* 6 */ | ERR_RFLAG_COMMON) +# define ERR_R_BUF_LIB (ERR_LIB_BUF/* 7 */ | ERR_RFLAG_COMMON) +# define ERR_R_OBJ_LIB (ERR_LIB_OBJ/* 8 */ | ERR_RFLAG_COMMON) +# define ERR_R_PEM_LIB (ERR_LIB_PEM/* 9 */ | ERR_RFLAG_COMMON) +# define ERR_R_DSA_LIB (ERR_LIB_DSA/* 10 */ | ERR_RFLAG_COMMON) +# define ERR_R_X509_LIB (ERR_LIB_X509/* 11 */ | ERR_RFLAG_COMMON) +# define ERR_R_ASN1_LIB (ERR_LIB_ASN1/* 13 */ | ERR_RFLAG_COMMON) +# define ERR_R_CONF_LIB (ERR_LIB_CONF/* 14 */ | ERR_RFLAG_COMMON) +# define ERR_R_CRYPTO_LIB (ERR_LIB_CRYPTO/* 15 */ | ERR_RFLAG_COMMON) +# define ERR_R_EC_LIB (ERR_LIB_EC/* 16 */ | ERR_RFLAG_COMMON) +# define ERR_R_SSL_LIB (ERR_LIB_SSL/* 20 */ | ERR_RFLAG_COMMON) +# define ERR_R_BIO_LIB (ERR_LIB_BIO/* 32 */ | ERR_RFLAG_COMMON) +# define ERR_R_PKCS7_LIB (ERR_LIB_PKCS7/* 33 */ | ERR_RFLAG_COMMON) +# define ERR_R_X509V3_LIB (ERR_LIB_X509V3/* 34 */ | ERR_RFLAG_COMMON) +# define ERR_R_PKCS12_LIB (ERR_LIB_PKCS12/* 35 */ | ERR_RFLAG_COMMON) +# define ERR_R_RAND_LIB (ERR_LIB_RAND/* 36 */ | ERR_RFLAG_COMMON) +# define ERR_R_DSO_LIB (ERR_LIB_DSO/* 37 */ | ERR_RFLAG_COMMON) +# define ERR_R_ENGINE_LIB (ERR_LIB_ENGINE/* 38 */ | ERR_RFLAG_COMMON) +# define ERR_R_UI_LIB (ERR_LIB_UI/* 40 */ | ERR_RFLAG_COMMON) +# define ERR_R_ECDSA_LIB (ERR_LIB_ECDSA/* 42 */ | ERR_RFLAG_COMMON) +# define ERR_R_OSSL_STORE_LIB (ERR_LIB_OSSL_STORE/* 44 */ | ERR_RFLAG_COMMON) +# define ERR_R_CMS_LIB (ERR_LIB_CMS/* 46 */ | ERR_RFLAG_COMMON) +# define ERR_R_TS_LIB (ERR_LIB_TS/* 47 */ | ERR_RFLAG_COMMON) +# define ERR_R_CT_LIB (ERR_LIB_CT/* 50 */ | ERR_RFLAG_COMMON) +# define ERR_R_PROV_LIB (ERR_LIB_PROV/* 57 */ | ERR_RFLAG_COMMON) +# define ERR_R_ESS_LIB (ERR_LIB_ESS/* 54 */ | ERR_RFLAG_COMMON) +# define ERR_R_CMP_LIB (ERR_LIB_CMP/* 58 */ | ERR_RFLAG_COMMON) +# define ERR_R_OSSL_ENCODER_LIB (ERR_LIB_OSSL_ENCODER/* 59 */ | ERR_RFLAG_COMMON) +# define ERR_R_OSSL_DECODER_LIB (ERR_LIB_OSSL_DECODER/* 60 */ | ERR_RFLAG_COMMON) + +/* Other common error codes, range 256..2^ERR_RFLAGS_OFFSET-1 */ +# define ERR_R_FATAL (ERR_RFLAG_FATAL|ERR_RFLAG_COMMON) +# define ERR_R_MALLOC_FAILURE (256|ERR_R_FATAL) +# define ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED (257|ERR_R_FATAL) +# define ERR_R_PASSED_NULL_PARAMETER (258|ERR_R_FATAL) +# define ERR_R_INTERNAL_ERROR (259|ERR_R_FATAL) +# define ERR_R_DISABLED (260|ERR_R_FATAL) +# define ERR_R_INIT_FAIL (261|ERR_R_FATAL) +# define ERR_R_PASSED_INVALID_ARGUMENT (262|ERR_RFLAG_COMMON) +# define ERR_R_OPERATION_FAIL (263|ERR_R_FATAL) +# define ERR_R_INVALID_PROVIDER_FUNCTIONS (264|ERR_R_FATAL) +# define ERR_R_INTERRUPTED_OR_CANCELLED (265|ERR_RFLAG_COMMON) +# define ERR_R_NESTED_ASN1_ERROR (266|ERR_RFLAG_COMMON) +# define ERR_R_MISSING_ASN1_EOS (267|ERR_RFLAG_COMMON) +# define ERR_R_UNSUPPORTED (268|ERR_RFLAG_COMMON) +# define ERR_R_FETCH_FAILED (269|ERR_RFLAG_COMMON) +# define ERR_R_INVALID_PROPERTY_DEFINITION (270|ERR_RFLAG_COMMON) +# define ERR_R_UNABLE_TO_GET_READ_LOCK (271|ERR_R_FATAL) +# define ERR_R_UNABLE_TO_GET_WRITE_LOCK (272|ERR_R_FATAL) + +typedef struct ERR_string_data_st { + unsigned long error; + const char *string; +} ERR_STRING_DATA; + +DEFINE_LHASH_OF_INTERNAL(ERR_STRING_DATA); +#define lh_ERR_STRING_DATA_new(hfn, cmp) ((LHASH_OF(ERR_STRING_DATA) *)OPENSSL_LH_set_thunks(OPENSSL_LH_new(ossl_check_ERR_STRING_DATA_lh_hashfunc_type(hfn), ossl_check_ERR_STRING_DATA_lh_compfunc_type(cmp)), lh_ERR_STRING_DATA_hash_thunk, lh_ERR_STRING_DATA_comp_thunk, lh_ERR_STRING_DATA_doall_thunk, lh_ERR_STRING_DATA_doall_arg_thunk)) +#define lh_ERR_STRING_DATA_free(lh) OPENSSL_LH_free(ossl_check_ERR_STRING_DATA_lh_type(lh)) +#define lh_ERR_STRING_DATA_flush(lh) OPENSSL_LH_flush(ossl_check_ERR_STRING_DATA_lh_type(lh)) +#define lh_ERR_STRING_DATA_insert(lh, ptr) ((ERR_STRING_DATA *)OPENSSL_LH_insert(ossl_check_ERR_STRING_DATA_lh_type(lh), ossl_check_ERR_STRING_DATA_lh_plain_type(ptr))) +#define lh_ERR_STRING_DATA_delete(lh, ptr) ((ERR_STRING_DATA *)OPENSSL_LH_delete(ossl_check_ERR_STRING_DATA_lh_type(lh), ossl_check_const_ERR_STRING_DATA_lh_plain_type(ptr))) +#define lh_ERR_STRING_DATA_retrieve(lh, ptr) ((ERR_STRING_DATA *)OPENSSL_LH_retrieve(ossl_check_ERR_STRING_DATA_lh_type(lh), ossl_check_const_ERR_STRING_DATA_lh_plain_type(ptr))) +#define lh_ERR_STRING_DATA_error(lh) OPENSSL_LH_error(ossl_check_ERR_STRING_DATA_lh_type(lh)) +#define lh_ERR_STRING_DATA_num_items(lh) OPENSSL_LH_num_items(ossl_check_ERR_STRING_DATA_lh_type(lh)) +#define lh_ERR_STRING_DATA_node_stats_bio(lh, out) OPENSSL_LH_node_stats_bio(ossl_check_const_ERR_STRING_DATA_lh_type(lh), out) +#define lh_ERR_STRING_DATA_node_usage_stats_bio(lh, out) OPENSSL_LH_node_usage_stats_bio(ossl_check_const_ERR_STRING_DATA_lh_type(lh), out) +#define lh_ERR_STRING_DATA_stats_bio(lh, out) OPENSSL_LH_stats_bio(ossl_check_const_ERR_STRING_DATA_lh_type(lh), out) +#define lh_ERR_STRING_DATA_get_down_load(lh) OPENSSL_LH_get_down_load(ossl_check_ERR_STRING_DATA_lh_type(lh)) +#define lh_ERR_STRING_DATA_set_down_load(lh, dl) OPENSSL_LH_set_down_load(ossl_check_ERR_STRING_DATA_lh_type(lh), dl) +#define lh_ERR_STRING_DATA_doall(lh, dfn) OPENSSL_LH_doall(ossl_check_ERR_STRING_DATA_lh_type(lh), ossl_check_ERR_STRING_DATA_lh_doallfunc_type(dfn)) + + +/* 12 lines and some on an 80 column terminal */ +#define ERR_MAX_DATA_SIZE 1024 + +/* Building blocks */ +void ERR_new(void); +void ERR_set_debug(const char *file, int line, const char *func); +void ERR_set_error(int lib, int reason, const char *fmt, ...); +void ERR_vset_error(int lib, int reason, const char *fmt, va_list args); + +/* Main error raising functions */ +# define ERR_raise(lib, reason) ERR_raise_data((lib),(reason),NULL) +# define ERR_raise_data \ + (ERR_new(), \ + ERR_set_debug(OPENSSL_FILE,OPENSSL_LINE,OPENSSL_FUNC), \ + ERR_set_error) + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +/* Backward compatibility */ +# define ERR_put_error(lib, func, reason, file, line) \ + (ERR_new(), \ + ERR_set_debug((file), (line), OPENSSL_FUNC), \ + ERR_set_error((lib), (reason), NULL)) +# endif + +void ERR_set_error_data(char *data, int flags); + +unsigned long ERR_get_error(void); +unsigned long ERR_get_error_all(const char **file, int *line, + const char **func, + const char **data, int *flags); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 +unsigned long ERR_get_error_line(const char **file, int *line); +OSSL_DEPRECATEDIN_3_0 +unsigned long ERR_get_error_line_data(const char **file, int *line, + const char **data, int *flags); +#endif +unsigned long ERR_peek_error(void); +unsigned long ERR_peek_error_line(const char **file, int *line); +unsigned long ERR_peek_error_func(const char **func); +unsigned long ERR_peek_error_data(const char **data, int *flags); +unsigned long ERR_peek_error_all(const char **file, int *line, + const char **func, + const char **data, int *flags); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 +unsigned long ERR_peek_error_line_data(const char **file, int *line, + const char **data, int *flags); +# endif +unsigned long ERR_peek_last_error(void); +unsigned long ERR_peek_last_error_line(const char **file, int *line); +unsigned long ERR_peek_last_error_func(const char **func); +unsigned long ERR_peek_last_error_data(const char **data, int *flags); +unsigned long ERR_peek_last_error_all(const char **file, int *line, + const char **func, + const char **data, int *flags); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 +unsigned long ERR_peek_last_error_line_data(const char **file, int *line, + const char **data, int *flags); +# endif + +void ERR_clear_error(void); + +char *ERR_error_string(unsigned long e, char *buf); +void ERR_error_string_n(unsigned long e, char *buf, size_t len); +const char *ERR_lib_error_string(unsigned long e); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 const char *ERR_func_error_string(unsigned long e); +# endif +const char *ERR_reason_error_string(unsigned long e); + +void ERR_print_errors_cb(int (*cb) (const char *str, size_t len, void *u), + void *u); +# ifndef OPENSSL_NO_STDIO +void ERR_print_errors_fp(FILE *fp); +# endif +void ERR_print_errors(BIO *bp); + +void ERR_add_error_data(int num, ...); +void ERR_add_error_vdata(int num, va_list args); +void ERR_add_error_txt(const char *sepr, const char *txt); +void ERR_add_error_mem_bio(const char *sep, BIO *bio); + +int ERR_load_strings(int lib, ERR_STRING_DATA *str); +int ERR_load_strings_const(const ERR_STRING_DATA *str); +int ERR_unload_strings(int lib, ERR_STRING_DATA *str); + +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define ERR_load_crypto_strings() \ + OPENSSL_init_crypto(OPENSSL_INIT_LOAD_CRYPTO_STRINGS, NULL) +# define ERR_free_strings() while(0) continue +#endif +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 void ERR_remove_thread_state(void *); +#endif +#ifndef OPENSSL_NO_DEPRECATED_1_0_0 +OSSL_DEPRECATEDIN_1_0_0 void ERR_remove_state(unsigned long pid); +#endif +#ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 ERR_STATE *ERR_get_state(void); +#endif + +int ERR_get_next_error_library(void); + +int ERR_set_mark(void); +int ERR_pop_to_mark(void); +int ERR_clear_last_mark(void); +int ERR_count_to_mark(void); +int ERR_pop(void); + +ERR_STATE *OSSL_ERR_STATE_new(void); +void OSSL_ERR_STATE_save(ERR_STATE *es); +void OSSL_ERR_STATE_save_to_mark(ERR_STATE *es); +void OSSL_ERR_STATE_restore(const ERR_STATE *es); +void OSSL_ERR_STATE_free(ERR_STATE *es); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/contrib/openssl-cmake/common/include/openssl/ess.h b/contrib/openssl-cmake/common/include/openssl/ess.h new file mode 100644 index 000000000000..4055bebbea2f --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/ess.h @@ -0,0 +1,128 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/ess.h.in + * + * Copyright 2019-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_ESS_H +# define OPENSSL_ESS_H +# pragma once + +# include + +# include +# include +# include + +# ifdef __cplusplus +extern "C" { +# endif + + +typedef struct ESS_issuer_serial ESS_ISSUER_SERIAL; +typedef struct ESS_cert_id ESS_CERT_ID; +typedef struct ESS_signing_cert ESS_SIGNING_CERT; + +SKM_DEFINE_STACK_OF_INTERNAL(ESS_CERT_ID, ESS_CERT_ID, ESS_CERT_ID) +#define sk_ESS_CERT_ID_num(sk) OPENSSL_sk_num(ossl_check_const_ESS_CERT_ID_sk_type(sk)) +#define sk_ESS_CERT_ID_value(sk, idx) ((ESS_CERT_ID *)OPENSSL_sk_value(ossl_check_const_ESS_CERT_ID_sk_type(sk), (idx))) +#define sk_ESS_CERT_ID_new(cmp) ((STACK_OF(ESS_CERT_ID) *)OPENSSL_sk_new(ossl_check_ESS_CERT_ID_compfunc_type(cmp))) +#define sk_ESS_CERT_ID_new_null() ((STACK_OF(ESS_CERT_ID) *)OPENSSL_sk_new_null()) +#define sk_ESS_CERT_ID_new_reserve(cmp, n) ((STACK_OF(ESS_CERT_ID) *)OPENSSL_sk_new_reserve(ossl_check_ESS_CERT_ID_compfunc_type(cmp), (n))) +#define sk_ESS_CERT_ID_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ESS_CERT_ID_sk_type(sk), (n)) +#define sk_ESS_CERT_ID_free(sk) OPENSSL_sk_free(ossl_check_ESS_CERT_ID_sk_type(sk)) +#define sk_ESS_CERT_ID_zero(sk) OPENSSL_sk_zero(ossl_check_ESS_CERT_ID_sk_type(sk)) +#define sk_ESS_CERT_ID_delete(sk, i) ((ESS_CERT_ID *)OPENSSL_sk_delete(ossl_check_ESS_CERT_ID_sk_type(sk), (i))) +#define sk_ESS_CERT_ID_delete_ptr(sk, ptr) ((ESS_CERT_ID *)OPENSSL_sk_delete_ptr(ossl_check_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_type(ptr))) +#define sk_ESS_CERT_ID_push(sk, ptr) OPENSSL_sk_push(ossl_check_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_type(ptr)) +#define sk_ESS_CERT_ID_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_type(ptr)) +#define sk_ESS_CERT_ID_pop(sk) ((ESS_CERT_ID *)OPENSSL_sk_pop(ossl_check_ESS_CERT_ID_sk_type(sk))) +#define sk_ESS_CERT_ID_shift(sk) ((ESS_CERT_ID *)OPENSSL_sk_shift(ossl_check_ESS_CERT_ID_sk_type(sk))) +#define sk_ESS_CERT_ID_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ESS_CERT_ID_sk_type(sk),ossl_check_ESS_CERT_ID_freefunc_type(freefunc)) +#define sk_ESS_CERT_ID_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_type(ptr), (idx)) +#define sk_ESS_CERT_ID_set(sk, idx, ptr) ((ESS_CERT_ID *)OPENSSL_sk_set(ossl_check_ESS_CERT_ID_sk_type(sk), (idx), ossl_check_ESS_CERT_ID_type(ptr))) +#define sk_ESS_CERT_ID_find(sk, ptr) OPENSSL_sk_find(ossl_check_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_type(ptr)) +#define sk_ESS_CERT_ID_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_type(ptr)) +#define sk_ESS_CERT_ID_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_type(ptr), pnum) +#define sk_ESS_CERT_ID_sort(sk) OPENSSL_sk_sort(ossl_check_ESS_CERT_ID_sk_type(sk)) +#define sk_ESS_CERT_ID_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ESS_CERT_ID_sk_type(sk)) +#define sk_ESS_CERT_ID_dup(sk) ((STACK_OF(ESS_CERT_ID) *)OPENSSL_sk_dup(ossl_check_const_ESS_CERT_ID_sk_type(sk))) +#define sk_ESS_CERT_ID_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ESS_CERT_ID) *)OPENSSL_sk_deep_copy(ossl_check_const_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_copyfunc_type(copyfunc), ossl_check_ESS_CERT_ID_freefunc_type(freefunc))) +#define sk_ESS_CERT_ID_set_cmp_func(sk, cmp) ((sk_ESS_CERT_ID_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ESS_CERT_ID_sk_type(sk), ossl_check_ESS_CERT_ID_compfunc_type(cmp))) + + + +typedef struct ESS_signing_cert_v2_st ESS_SIGNING_CERT_V2; +typedef struct ESS_cert_id_v2_st ESS_CERT_ID_V2; + +SKM_DEFINE_STACK_OF_INTERNAL(ESS_CERT_ID_V2, ESS_CERT_ID_V2, ESS_CERT_ID_V2) +#define sk_ESS_CERT_ID_V2_num(sk) OPENSSL_sk_num(ossl_check_const_ESS_CERT_ID_V2_sk_type(sk)) +#define sk_ESS_CERT_ID_V2_value(sk, idx) ((ESS_CERT_ID_V2 *)OPENSSL_sk_value(ossl_check_const_ESS_CERT_ID_V2_sk_type(sk), (idx))) +#define sk_ESS_CERT_ID_V2_new(cmp) ((STACK_OF(ESS_CERT_ID_V2) *)OPENSSL_sk_new(ossl_check_ESS_CERT_ID_V2_compfunc_type(cmp))) +#define sk_ESS_CERT_ID_V2_new_null() ((STACK_OF(ESS_CERT_ID_V2) *)OPENSSL_sk_new_null()) +#define sk_ESS_CERT_ID_V2_new_reserve(cmp, n) ((STACK_OF(ESS_CERT_ID_V2) *)OPENSSL_sk_new_reserve(ossl_check_ESS_CERT_ID_V2_compfunc_type(cmp), (n))) +#define sk_ESS_CERT_ID_V2_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ESS_CERT_ID_V2_sk_type(sk), (n)) +#define sk_ESS_CERT_ID_V2_free(sk) OPENSSL_sk_free(ossl_check_ESS_CERT_ID_V2_sk_type(sk)) +#define sk_ESS_CERT_ID_V2_zero(sk) OPENSSL_sk_zero(ossl_check_ESS_CERT_ID_V2_sk_type(sk)) +#define sk_ESS_CERT_ID_V2_delete(sk, i) ((ESS_CERT_ID_V2 *)OPENSSL_sk_delete(ossl_check_ESS_CERT_ID_V2_sk_type(sk), (i))) +#define sk_ESS_CERT_ID_V2_delete_ptr(sk, ptr) ((ESS_CERT_ID_V2 *)OPENSSL_sk_delete_ptr(ossl_check_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_type(ptr))) +#define sk_ESS_CERT_ID_V2_push(sk, ptr) OPENSSL_sk_push(ossl_check_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_type(ptr)) +#define sk_ESS_CERT_ID_V2_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_type(ptr)) +#define sk_ESS_CERT_ID_V2_pop(sk) ((ESS_CERT_ID_V2 *)OPENSSL_sk_pop(ossl_check_ESS_CERT_ID_V2_sk_type(sk))) +#define sk_ESS_CERT_ID_V2_shift(sk) ((ESS_CERT_ID_V2 *)OPENSSL_sk_shift(ossl_check_ESS_CERT_ID_V2_sk_type(sk))) +#define sk_ESS_CERT_ID_V2_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ESS_CERT_ID_V2_sk_type(sk),ossl_check_ESS_CERT_ID_V2_freefunc_type(freefunc)) +#define sk_ESS_CERT_ID_V2_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_type(ptr), (idx)) +#define sk_ESS_CERT_ID_V2_set(sk, idx, ptr) ((ESS_CERT_ID_V2 *)OPENSSL_sk_set(ossl_check_ESS_CERT_ID_V2_sk_type(sk), (idx), ossl_check_ESS_CERT_ID_V2_type(ptr))) +#define sk_ESS_CERT_ID_V2_find(sk, ptr) OPENSSL_sk_find(ossl_check_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_type(ptr)) +#define sk_ESS_CERT_ID_V2_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_type(ptr)) +#define sk_ESS_CERT_ID_V2_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_type(ptr), pnum) +#define sk_ESS_CERT_ID_V2_sort(sk) OPENSSL_sk_sort(ossl_check_ESS_CERT_ID_V2_sk_type(sk)) +#define sk_ESS_CERT_ID_V2_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ESS_CERT_ID_V2_sk_type(sk)) +#define sk_ESS_CERT_ID_V2_dup(sk) ((STACK_OF(ESS_CERT_ID_V2) *)OPENSSL_sk_dup(ossl_check_const_ESS_CERT_ID_V2_sk_type(sk))) +#define sk_ESS_CERT_ID_V2_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ESS_CERT_ID_V2) *)OPENSSL_sk_deep_copy(ossl_check_const_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_copyfunc_type(copyfunc), ossl_check_ESS_CERT_ID_V2_freefunc_type(freefunc))) +#define sk_ESS_CERT_ID_V2_set_cmp_func(sk, cmp) ((sk_ESS_CERT_ID_V2_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ESS_CERT_ID_V2_sk_type(sk), ossl_check_ESS_CERT_ID_V2_compfunc_type(cmp))) + + +DECLARE_ASN1_ALLOC_FUNCTIONS(ESS_ISSUER_SERIAL) +DECLARE_ASN1_ENCODE_FUNCTIONS_only(ESS_ISSUER_SERIAL, ESS_ISSUER_SERIAL) +DECLARE_ASN1_DUP_FUNCTION(ESS_ISSUER_SERIAL) + +DECLARE_ASN1_ALLOC_FUNCTIONS(ESS_CERT_ID) +DECLARE_ASN1_ENCODE_FUNCTIONS_only(ESS_CERT_ID, ESS_CERT_ID) +DECLARE_ASN1_DUP_FUNCTION(ESS_CERT_ID) + +DECLARE_ASN1_FUNCTIONS(ESS_SIGNING_CERT) +DECLARE_ASN1_DUP_FUNCTION(ESS_SIGNING_CERT) + +DECLARE_ASN1_ALLOC_FUNCTIONS(ESS_CERT_ID_V2) +DECLARE_ASN1_ENCODE_FUNCTIONS_only(ESS_CERT_ID_V2, ESS_CERT_ID_V2) +DECLARE_ASN1_DUP_FUNCTION(ESS_CERT_ID_V2) + +DECLARE_ASN1_FUNCTIONS(ESS_SIGNING_CERT_V2) +DECLARE_ASN1_DUP_FUNCTION(ESS_SIGNING_CERT_V2) + +ESS_SIGNING_CERT *OSSL_ESS_signing_cert_new_init(const X509 *signcert, + const STACK_OF(X509) *certs, + int set_issuer_serial); +ESS_SIGNING_CERT_V2 *OSSL_ESS_signing_cert_v2_new_init(const EVP_MD *hash_alg, + const X509 *signcert, + const + STACK_OF(X509) *certs, + int set_issuer_serial); +int OSSL_ESS_check_signing_certs(const ESS_SIGNING_CERT *ss, + const ESS_SIGNING_CERT_V2 *ssv2, + const STACK_OF(X509) *chain, + int require_signing_cert); + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/common/include/openssl/fipskey.h b/contrib/openssl-cmake/common/include/openssl/fipskey.h new file mode 100644 index 000000000000..620812bf0a5f --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/fipskey.h @@ -0,0 +1,41 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/fipskey.h.in + * + * Copyright 2020-2024 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#ifndef OPENSSL_FIPSKEY_H +# define OPENSSL_FIPSKEY_H +# pragma once + +# ifdef __cplusplus +extern "C" { +# endif + +/* + * The FIPS validation HMAC key, usable as an array initializer. + */ +#define FIPS_KEY_ELEMENTS \ + 0xf4, 0x55, 0x66, 0x50, 0xac, 0x31, 0xd3, 0x54, 0x61, 0x61, 0x0b, 0xac, 0x4e, 0xd8, 0x1b, 0x1a, 0x18, 0x1b, 0x2d, 0x8a, 0x43, 0xea, 0x28, 0x54, 0xcb, 0xae, 0x22, 0xca, 0x74, 0x56, 0x08, 0x13 + +/* + * The FIPS validation key, as a string. + */ +#define FIPS_KEY_STRING "f4556650ac31d35461610bac4ed81b1a181b2d8a43ea2854cbae22ca74560813" + +/* + * The FIPS provider vendor name, as a string. + */ +#define FIPS_VENDOR "OpenSSL non-compliant FIPS Provider" + +# ifdef __cplusplus +} +# endif + +#endif diff --git a/contrib/openssl-cmake/common/include/openssl/lhash.h b/contrib/openssl-cmake/common/include/openssl/lhash.h new file mode 100644 index 000000000000..62c55b20fd97 --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/lhash.h @@ -0,0 +1,398 @@ +/* + * Copyright 1995-2024 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +/* + * Header for dynamic hash table routines Author - Eric Young + */ + +#ifndef OPENSSL_LHASH_H +# define OPENSSL_LHASH_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_LHASH_H +# endif + +# include +# include +# ifndef OPENSSL_NO_STDIO +# include +# endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct lhash_node_st OPENSSL_LH_NODE; +typedef int (*OPENSSL_LH_COMPFUNC) (const void *, const void *); +typedef int (*OPENSSL_LH_COMPFUNCTHUNK) (const void *, const void *, OPENSSL_LH_COMPFUNC cfn); +typedef unsigned long (*OPENSSL_LH_HASHFUNC) (const void *); +typedef unsigned long (*OPENSSL_LH_HASHFUNCTHUNK) (const void *, OPENSSL_LH_HASHFUNC hfn); +typedef void (*OPENSSL_LH_DOALL_FUNC) (void *); +typedef void (*OPENSSL_LH_DOALL_FUNC_THUNK) (void *, OPENSSL_LH_DOALL_FUNC doall); +typedef void (*OPENSSL_LH_DOALL_FUNCARG) (void *, void *); +typedef void (*OPENSSL_LH_DOALL_FUNCARG_THUNK) (void *, void *, OPENSSL_LH_DOALL_FUNCARG doall); +typedef struct lhash_st OPENSSL_LHASH; + +/* + * Macros for declaring and implementing type-safe wrappers for LHASH + * callbacks. This way, callbacks can be provided to LHASH structures without + * function pointer casting and the macro-defined callbacks provide + * per-variable casting before deferring to the underlying type-specific + * callbacks. NB: It is possible to place a "static" in front of both the + * DECLARE and IMPLEMENT macros if the functions are strictly internal. + */ + +/* First: "hash" functions */ +# define DECLARE_LHASH_HASH_FN(name, o_type) \ + unsigned long name##_LHASH_HASH(const void *); +# define IMPLEMENT_LHASH_HASH_FN(name, o_type) \ + unsigned long name##_LHASH_HASH(const void *arg) { \ + const o_type *a = arg; \ + return name##_hash(a); } +# define LHASH_HASH_FN(name) name##_LHASH_HASH + +/* Second: "compare" functions */ +# define DECLARE_LHASH_COMP_FN(name, o_type) \ + int name##_LHASH_COMP(const void *, const void *); +# define IMPLEMENT_LHASH_COMP_FN(name, o_type) \ + int name##_LHASH_COMP(const void *arg1, const void *arg2) { \ + const o_type *a = arg1; \ + const o_type *b = arg2; \ + return name##_cmp(a,b); } +# define LHASH_COMP_FN(name) name##_LHASH_COMP + +/* Fourth: "doall_arg" functions */ +# define DECLARE_LHASH_DOALL_ARG_FN(name, o_type, a_type) \ + void name##_LHASH_DOALL_ARG(void *, void *); +# define IMPLEMENT_LHASH_DOALL_ARG_FN(name, o_type, a_type) \ + void name##_LHASH_DOALL_ARG(void *arg1, void *arg2) { \ + o_type *a = arg1; \ + a_type *b = arg2; \ + name##_doall_arg(a, b); } +# define LHASH_DOALL_ARG_FN(name) name##_LHASH_DOALL_ARG + + +# define LH_LOAD_MULT 256 + +int OPENSSL_LH_error(OPENSSL_LHASH *lh); +OPENSSL_LHASH *OPENSSL_LH_new(OPENSSL_LH_HASHFUNC h, OPENSSL_LH_COMPFUNC c); +OPENSSL_LHASH *OPENSSL_LH_set_thunks(OPENSSL_LHASH *lh, + OPENSSL_LH_HASHFUNCTHUNK hw, + OPENSSL_LH_COMPFUNCTHUNK cw, + OPENSSL_LH_DOALL_FUNC_THUNK daw, + OPENSSL_LH_DOALL_FUNCARG_THUNK daaw); +void OPENSSL_LH_free(OPENSSL_LHASH *lh); +void OPENSSL_LH_flush(OPENSSL_LHASH *lh); +void *OPENSSL_LH_insert(OPENSSL_LHASH *lh, void *data); +void *OPENSSL_LH_delete(OPENSSL_LHASH *lh, const void *data); +void *OPENSSL_LH_retrieve(OPENSSL_LHASH *lh, const void *data); +void OPENSSL_LH_doall(OPENSSL_LHASH *lh, OPENSSL_LH_DOALL_FUNC func); +void OPENSSL_LH_doall_arg(OPENSSL_LHASH *lh, + OPENSSL_LH_DOALL_FUNCARG func, void *arg); +void OPENSSL_LH_doall_arg_thunk(OPENSSL_LHASH *lh, + OPENSSL_LH_DOALL_FUNCARG_THUNK daaw, + OPENSSL_LH_DOALL_FUNCARG fn, void *arg); + +unsigned long OPENSSL_LH_strhash(const char *c); +unsigned long OPENSSL_LH_num_items(const OPENSSL_LHASH *lh); +unsigned long OPENSSL_LH_get_down_load(const OPENSSL_LHASH *lh); +void OPENSSL_LH_set_down_load(OPENSSL_LHASH *lh, unsigned long down_load); + +# ifndef OPENSSL_NO_STDIO +# ifndef OPENSSL_NO_DEPRECATED_3_1 +OSSL_DEPRECATEDIN_3_1 void OPENSSL_LH_stats(const OPENSSL_LHASH *lh, FILE *fp); +OSSL_DEPRECATEDIN_3_1 void OPENSSL_LH_node_stats(const OPENSSL_LHASH *lh, FILE *fp); +OSSL_DEPRECATEDIN_3_1 void OPENSSL_LH_node_usage_stats(const OPENSSL_LHASH *lh, FILE *fp); +# endif +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_1 +OSSL_DEPRECATEDIN_3_1 void OPENSSL_LH_stats_bio(const OPENSSL_LHASH *lh, BIO *out); +OSSL_DEPRECATEDIN_3_1 void OPENSSL_LH_node_stats_bio(const OPENSSL_LHASH *lh, BIO *out); +OSSL_DEPRECATEDIN_3_1 void OPENSSL_LH_node_usage_stats_bio(const OPENSSL_LHASH *lh, BIO *out); +# endif + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define _LHASH OPENSSL_LHASH +# define LHASH_NODE OPENSSL_LH_NODE +# define lh_error OPENSSL_LH_error +# define lh_new OPENSSL_LH_new +# define lh_free OPENSSL_LH_free +# define lh_insert OPENSSL_LH_insert +# define lh_delete OPENSSL_LH_delete +# define lh_retrieve OPENSSL_LH_retrieve +# define lh_doall OPENSSL_LH_doall +# define lh_doall_arg OPENSSL_LH_doall_arg +# define lh_strhash OPENSSL_LH_strhash +# define lh_num_items OPENSSL_LH_num_items +# ifndef OPENSSL_NO_STDIO +# define lh_stats OPENSSL_LH_stats +# define lh_node_stats OPENSSL_LH_node_stats +# define lh_node_usage_stats OPENSSL_LH_node_usage_stats +# endif +# define lh_stats_bio OPENSSL_LH_stats_bio +# define lh_node_stats_bio OPENSSL_LH_node_stats_bio +# define lh_node_usage_stats_bio OPENSSL_LH_node_usage_stats_bio +# endif + +/* Type checking... */ + +# define LHASH_OF(type) struct lhash_st_##type + +/* Helper macro for internal use */ +# define DEFINE_LHASH_OF_INTERNAL(type) \ + LHASH_OF(type) { \ + union lh_##type##_dummy { void* d1; unsigned long d2; int d3; } dummy; \ + }; \ + typedef int (*lh_##type##_compfunc)(const type *a, const type *b); \ + typedef unsigned long (*lh_##type##_hashfunc)(const type *a); \ + typedef void (*lh_##type##_doallfunc)(type *a); \ + static ossl_inline unsigned long lh_##type##_hash_thunk(const void *data, OPENSSL_LH_HASHFUNC hfn) \ + { \ + unsigned long (*hfn_conv)(const type *) = (unsigned long (*)(const type *))hfn; \ + return hfn_conv((const type *)data); \ + } \ + static ossl_inline int lh_##type##_comp_thunk(const void *da, const void *db, OPENSSL_LH_COMPFUNC cfn) \ + { \ + int (*cfn_conv)(const type *, const type *) = (int (*)(const type *, const type *))cfn; \ + return cfn_conv((const type *)da, (const type *)db); \ + } \ + static ossl_inline void lh_##type##_doall_thunk(void *node, OPENSSL_LH_DOALL_FUNC doall) \ + { \ + void (*doall_conv)(type *) = (void (*)(type *))doall; \ + doall_conv((type *)node); \ + } \ + static ossl_inline void lh_##type##_doall_arg_thunk(void *node, void *arg, OPENSSL_LH_DOALL_FUNCARG doall) \ + { \ + void (*doall_conv)(type *, void *) = (void (*)(type *, void *))doall; \ + doall_conv((type *)node, arg); \ + } \ + static ossl_unused ossl_inline type *\ + ossl_check_##type##_lh_plain_type(type *ptr) \ + { \ + return ptr; \ + } \ + static ossl_unused ossl_inline const type * \ + ossl_check_const_##type##_lh_plain_type(const type *ptr) \ + { \ + return ptr; \ + } \ + static ossl_unused ossl_inline const OPENSSL_LHASH * \ + ossl_check_const_##type##_lh_type(const LHASH_OF(type) *lh) \ + { \ + return (const OPENSSL_LHASH *)lh; \ + } \ + static ossl_unused ossl_inline OPENSSL_LHASH * \ + ossl_check_##type##_lh_type(LHASH_OF(type) *lh) \ + { \ + return (OPENSSL_LHASH *)lh; \ + } \ + static ossl_unused ossl_inline OPENSSL_LH_COMPFUNC \ + ossl_check_##type##_lh_compfunc_type(lh_##type##_compfunc cmp) \ + { \ + return (OPENSSL_LH_COMPFUNC)cmp; \ + } \ + static ossl_unused ossl_inline OPENSSL_LH_HASHFUNC \ + ossl_check_##type##_lh_hashfunc_type(lh_##type##_hashfunc hfn) \ + { \ + return (OPENSSL_LH_HASHFUNC)hfn; \ + } \ + static ossl_unused ossl_inline OPENSSL_LH_DOALL_FUNC \ + ossl_check_##type##_lh_doallfunc_type(lh_##type##_doallfunc dfn) \ + { \ + return (OPENSSL_LH_DOALL_FUNC)dfn; \ + } \ + LHASH_OF(type) + +# ifndef OPENSSL_NO_DEPRECATED_3_1 +# define DEFINE_LHASH_OF_DEPRECATED(type) \ + static ossl_unused ossl_inline void \ + lh_##type##_node_stats_bio(const LHASH_OF(type) *lh, BIO *out) \ + { \ + OPENSSL_LH_node_stats_bio((const OPENSSL_LHASH *)lh, out); \ + } \ + static ossl_unused ossl_inline void \ + lh_##type##_node_usage_stats_bio(const LHASH_OF(type) *lh, BIO *out) \ + { \ + OPENSSL_LH_node_usage_stats_bio((const OPENSSL_LHASH *)lh, out); \ + } \ + static ossl_unused ossl_inline void \ + lh_##type##_stats_bio(const LHASH_OF(type) *lh, BIO *out) \ + { \ + OPENSSL_LH_stats_bio((const OPENSSL_LHASH *)lh, out); \ + } +# else +# define DEFINE_LHASH_OF_DEPRECATED(type) +# endif + +# define DEFINE_LHASH_OF_EX(type) \ + LHASH_OF(type) { \ + union lh_##type##_dummy { void* d1; unsigned long d2; int d3; } dummy; \ + }; \ + static unsigned long \ + lh_##type##_hfn_thunk(const void *data, OPENSSL_LH_HASHFUNC hfn) \ + { \ + unsigned long (*hfn_conv)(const type *) = (unsigned long (*)(const type *))hfn; \ + return hfn_conv((const type *)data); \ + } \ + static int lh_##type##_cfn_thunk(const void *da, const void *db, OPENSSL_LH_COMPFUNC cfn) \ + { \ + int (*cfn_conv)(const type *, const type *) = (int (*)(const type *, const type *))cfn; \ + return cfn_conv((const type *)da, (const type *)db); \ + } \ + static ossl_unused ossl_inline void \ + lh_##type##_free(LHASH_OF(type) *lh) \ + { \ + OPENSSL_LH_free((OPENSSL_LHASH *)lh); \ + } \ + static ossl_unused ossl_inline void \ + lh_##type##_flush(LHASH_OF(type) *lh) \ + { \ + OPENSSL_LH_flush((OPENSSL_LHASH *)lh); \ + } \ + static ossl_unused ossl_inline type * \ + lh_##type##_insert(LHASH_OF(type) *lh, type *d) \ + { \ + return (type *)OPENSSL_LH_insert((OPENSSL_LHASH *)lh, d); \ + } \ + static ossl_unused ossl_inline type * \ + lh_##type##_delete(LHASH_OF(type) *lh, const type *d) \ + { \ + return (type *)OPENSSL_LH_delete((OPENSSL_LHASH *)lh, d); \ + } \ + static ossl_unused ossl_inline type * \ + lh_##type##_retrieve(LHASH_OF(type) *lh, const type *d) \ + { \ + return (type *)OPENSSL_LH_retrieve((OPENSSL_LHASH *)lh, d); \ + } \ + static ossl_unused ossl_inline int \ + lh_##type##_error(LHASH_OF(type) *lh) \ + { \ + return OPENSSL_LH_error((OPENSSL_LHASH *)lh); \ + } \ + static ossl_unused ossl_inline unsigned long \ + lh_##type##_num_items(LHASH_OF(type) *lh) \ + { \ + return OPENSSL_LH_num_items((OPENSSL_LHASH *)lh); \ + } \ + static ossl_unused ossl_inline unsigned long \ + lh_##type##_get_down_load(LHASH_OF(type) *lh) \ + { \ + return OPENSSL_LH_get_down_load((OPENSSL_LHASH *)lh); \ + } \ + static ossl_unused ossl_inline void \ + lh_##type##_set_down_load(LHASH_OF(type) *lh, unsigned long dl) \ + { \ + OPENSSL_LH_set_down_load((OPENSSL_LHASH *)lh, dl); \ + } \ + static ossl_unused ossl_inline void \ + lh_##type##_doall_thunk(void *node, OPENSSL_LH_DOALL_FUNC doall) \ + { \ + void (*doall_conv)(type *) = (void (*)(type *))doall; \ + doall_conv((type *)node); \ + } \ + static ossl_unused ossl_inline void \ + lh_##type##_doall_arg_thunk(void *node, void *arg, OPENSSL_LH_DOALL_FUNCARG doall) \ + { \ + void (*doall_conv)(type *, void *) = (void (*)(type *, void *))doall; \ + doall_conv((type *)node, arg); \ + } \ + static ossl_unused ossl_inline void \ + lh_##type##_doall(LHASH_OF(type) *lh, void (*doall)(type *)) \ + { \ + OPENSSL_LH_doall((OPENSSL_LHASH *)lh, (OPENSSL_LH_DOALL_FUNC)doall); \ + } \ + static ossl_unused ossl_inline LHASH_OF(type) * \ + lh_##type##_new(unsigned long (*hfn)(const type *), \ + int (*cfn)(const type *, const type *)) \ + { \ + return (LHASH_OF(type) *)OPENSSL_LH_set_thunks(OPENSSL_LH_new((OPENSSL_LH_HASHFUNC)hfn, (OPENSSL_LH_COMPFUNC)cfn), \ + lh_##type##_hfn_thunk, lh_##type##_cfn_thunk, \ + lh_##type##_doall_thunk, \ + lh_##type##_doall_arg_thunk); \ + } \ + static ossl_unused ossl_inline void \ + lh_##type##_doall_arg(LHASH_OF(type) *lh, \ + void (*doallarg)(type *, void *), void *arg) \ + { \ + OPENSSL_LH_doall_arg((OPENSSL_LHASH *)lh, \ + (OPENSSL_LH_DOALL_FUNCARG)doallarg, arg); \ + } \ + LHASH_OF(type) + +# define DEFINE_LHASH_OF(type) \ + DEFINE_LHASH_OF_EX(type); \ + DEFINE_LHASH_OF_DEPRECATED(type) \ + LHASH_OF(type) + +#define IMPLEMENT_LHASH_DOALL_ARG_CONST(type, argtype) \ + int_implement_lhash_doall(type, argtype, const type) + +#define IMPLEMENT_LHASH_DOALL_ARG(type, argtype) \ + int_implement_lhash_doall(type, argtype, type) + +#define int_implement_lhash_doall(type, argtype, cbargtype) \ + static ossl_unused ossl_inline void \ + lh_##type##_doall_##argtype##_thunk(void *node, void *arg, OPENSSL_LH_DOALL_FUNCARG fn) \ + { \ + void (*fn_conv)(cbargtype *, argtype *) = (void (*)(cbargtype *, argtype *))fn; \ + fn_conv((cbargtype *)node, (argtype *)arg); \ + } \ + static ossl_unused ossl_inline void \ + lh_##type##_doall_##argtype(LHASH_OF(type) *lh, \ + void (*fn)(cbargtype *, argtype *), \ + argtype *arg) \ + { \ + OPENSSL_LH_doall_arg_thunk((OPENSSL_LHASH *)lh, \ + lh_##type##_doall_##argtype##_thunk, \ + (OPENSSL_LH_DOALL_FUNCARG)fn, \ + (void *)arg); \ + } \ + LHASH_OF(type) + +DEFINE_LHASH_OF_INTERNAL(OPENSSL_STRING); +#define lh_OPENSSL_STRING_new(hfn, cmp) ((LHASH_OF(OPENSSL_STRING) *)OPENSSL_LH_set_thunks(OPENSSL_LH_new(ossl_check_OPENSSL_STRING_lh_hashfunc_type(hfn), ossl_check_OPENSSL_STRING_lh_compfunc_type(cmp)), lh_OPENSSL_STRING_hash_thunk, lh_OPENSSL_STRING_comp_thunk, lh_OPENSSL_STRING_doall_thunk, lh_OPENSSL_STRING_doall_arg_thunk)) +#define lh_OPENSSL_STRING_free(lh) OPENSSL_LH_free(ossl_check_OPENSSL_STRING_lh_type(lh)) +#define lh_OPENSSL_STRING_flush(lh) OPENSSL_LH_flush(ossl_check_OPENSSL_STRING_lh_type(lh)) +#define lh_OPENSSL_STRING_insert(lh, ptr) ((OPENSSL_STRING *)OPENSSL_LH_insert(ossl_check_OPENSSL_STRING_lh_type(lh), ossl_check_OPENSSL_STRING_lh_plain_type(ptr))) +#define lh_OPENSSL_STRING_delete(lh, ptr) ((OPENSSL_STRING *)OPENSSL_LH_delete(ossl_check_OPENSSL_STRING_lh_type(lh), ossl_check_const_OPENSSL_STRING_lh_plain_type(ptr))) +#define lh_OPENSSL_STRING_retrieve(lh, ptr) ((OPENSSL_STRING *)OPENSSL_LH_retrieve(ossl_check_OPENSSL_STRING_lh_type(lh), ossl_check_const_OPENSSL_STRING_lh_plain_type(ptr))) +#define lh_OPENSSL_STRING_error(lh) OPENSSL_LH_error(ossl_check_OPENSSL_STRING_lh_type(lh)) +#define lh_OPENSSL_STRING_num_items(lh) OPENSSL_LH_num_items(ossl_check_OPENSSL_STRING_lh_type(lh)) +#define lh_OPENSSL_STRING_node_stats_bio(lh, out) OPENSSL_LH_node_stats_bio(ossl_check_const_OPENSSL_STRING_lh_type(lh), out) +#define lh_OPENSSL_STRING_node_usage_stats_bio(lh, out) OPENSSL_LH_node_usage_stats_bio(ossl_check_const_OPENSSL_STRING_lh_type(lh), out) +#define lh_OPENSSL_STRING_stats_bio(lh, out) OPENSSL_LH_stats_bio(ossl_check_const_OPENSSL_STRING_lh_type(lh), out) +#define lh_OPENSSL_STRING_get_down_load(lh) OPENSSL_LH_get_down_load(ossl_check_OPENSSL_STRING_lh_type(lh)) +#define lh_OPENSSL_STRING_set_down_load(lh, dl) OPENSSL_LH_set_down_load(ossl_check_OPENSSL_STRING_lh_type(lh), dl) +#define lh_OPENSSL_STRING_doall(lh, dfn) OPENSSL_LH_doall(ossl_check_OPENSSL_STRING_lh_type(lh), ossl_check_OPENSSL_STRING_lh_doallfunc_type(dfn)) +DEFINE_LHASH_OF_INTERNAL(OPENSSL_CSTRING); +#define lh_OPENSSL_CSTRING_new(hfn, cmp) ((LHASH_OF(OPENSSL_CSTRING) *)OPENSSL_LH_set_thunks(OPENSSL_LH_new(ossl_check_OPENSSL_CSTRING_lh_hashfunc_type(hfn), ossl_check_OPENSSL_CSTRING_lh_compfunc_type(cmp)), lh_OPENSSL_CSTRING_hash_thunk, lh_OPENSSL_CSTRING_comp_thunk, lh_OPENSSL_CSTRING_doall_thunk, lh_OPENSSL_CSTRING_doall_arg_thunk)) +#define lh_OPENSSL_CSTRING_free(lh) OPENSSL_LH_free(ossl_check_OPENSSL_CSTRING_lh_type(lh)) +#define lh_OPENSSL_CSTRING_flush(lh) OPENSSL_LH_flush(ossl_check_OPENSSL_CSTRING_lh_type(lh)) +#define lh_OPENSSL_CSTRING_insert(lh, ptr) ((OPENSSL_CSTRING *)OPENSSL_LH_insert(ossl_check_OPENSSL_CSTRING_lh_type(lh), ossl_check_OPENSSL_CSTRING_lh_plain_type(ptr))) +#define lh_OPENSSL_CSTRING_delete(lh, ptr) ((OPENSSL_CSTRING *)OPENSSL_LH_delete(ossl_check_OPENSSL_CSTRING_lh_type(lh), ossl_check_const_OPENSSL_CSTRING_lh_plain_type(ptr))) +#define lh_OPENSSL_CSTRING_retrieve(lh, ptr) ((OPENSSL_CSTRING *)OPENSSL_LH_retrieve(ossl_check_OPENSSL_CSTRING_lh_type(lh), ossl_check_const_OPENSSL_CSTRING_lh_plain_type(ptr))) +#define lh_OPENSSL_CSTRING_error(lh) OPENSSL_LH_error(ossl_check_OPENSSL_CSTRING_lh_type(lh)) +#define lh_OPENSSL_CSTRING_num_items(lh) OPENSSL_LH_num_items(ossl_check_OPENSSL_CSTRING_lh_type(lh)) +#define lh_OPENSSL_CSTRING_node_stats_bio(lh, out) OPENSSL_LH_node_stats_bio(ossl_check_const_OPENSSL_CSTRING_lh_type(lh), out) +#define lh_OPENSSL_CSTRING_node_usage_stats_bio(lh, out) OPENSSL_LH_node_usage_stats_bio(ossl_check_const_OPENSSL_CSTRING_lh_type(lh), out) +#define lh_OPENSSL_CSTRING_stats_bio(lh, out) OPENSSL_LH_stats_bio(ossl_check_const_OPENSSL_CSTRING_lh_type(lh), out) +#define lh_OPENSSL_CSTRING_get_down_load(lh) OPENSSL_LH_get_down_load(ossl_check_OPENSSL_CSTRING_lh_type(lh)) +#define lh_OPENSSL_CSTRING_set_down_load(lh, dl) OPENSSL_LH_set_down_load(ossl_check_OPENSSL_CSTRING_lh_type(lh), dl) +#define lh_OPENSSL_CSTRING_doall(lh, dfn) OPENSSL_LH_doall(ossl_check_OPENSSL_CSTRING_lh_type(lh), ossl_check_OPENSSL_CSTRING_lh_doallfunc_type(dfn)) + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/contrib/openssl-cmake/common/include/openssl/ocsp.h b/contrib/openssl-cmake/common/include/openssl/ocsp.h new file mode 100644 index 000000000000..142b183140ba --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/ocsp.h @@ -0,0 +1,483 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/ocsp.h.in + * + * Copyright 2000-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_OCSP_H +# define OPENSSL_OCSP_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_OCSP_H +# endif + +# include +# include +# include + +/* + * These definitions are outside the OPENSSL_NO_OCSP guard because although for + * historical reasons they have OCSP_* names, they can actually be used + * independently of OCSP. E.g. see RFC5280 + */ +/*- + * CRLReason ::= ENUMERATED { + * unspecified (0), + * keyCompromise (1), + * cACompromise (2), + * affiliationChanged (3), + * superseded (4), + * cessationOfOperation (5), + * certificateHold (6), + * -- value 7 is not used + * removeFromCRL (8), + * privilegeWithdrawn (9), + * aACompromise (10) } + */ +# define OCSP_REVOKED_STATUS_NOSTATUS -1 +# define OCSP_REVOKED_STATUS_UNSPECIFIED 0 +# define OCSP_REVOKED_STATUS_KEYCOMPROMISE 1 +# define OCSP_REVOKED_STATUS_CACOMPROMISE 2 +# define OCSP_REVOKED_STATUS_AFFILIATIONCHANGED 3 +# define OCSP_REVOKED_STATUS_SUPERSEDED 4 +# define OCSP_REVOKED_STATUS_CESSATIONOFOPERATION 5 +# define OCSP_REVOKED_STATUS_CERTIFICATEHOLD 6 +# define OCSP_REVOKED_STATUS_REMOVEFROMCRL 8 +# define OCSP_REVOKED_STATUS_PRIVILEGEWITHDRAWN 9 +# define OCSP_REVOKED_STATUS_AACOMPROMISE 10 + + +# ifndef OPENSSL_NO_OCSP + +# include +# include +# include +# include + +# ifdef __cplusplus +extern "C" { +# endif + +/* Various flags and values */ + +# define OCSP_DEFAULT_NONCE_LENGTH 16 + +# define OCSP_NOCERTS 0x1 +# define OCSP_NOINTERN 0x2 +# define OCSP_NOSIGS 0x4 +# define OCSP_NOCHAIN 0x8 +# define OCSP_NOVERIFY 0x10 +# define OCSP_NOEXPLICIT 0x20 +# define OCSP_NOCASIGN 0x40 +# define OCSP_NODELEGATED 0x80 +# define OCSP_NOCHECKS 0x100 +# define OCSP_TRUSTOTHER 0x200 +# define OCSP_RESPID_KEY 0x400 +# define OCSP_NOTIME 0x800 +# define OCSP_PARTIAL_CHAIN 0x1000 + +typedef struct ocsp_cert_id_st OCSP_CERTID; +typedef struct ocsp_one_request_st OCSP_ONEREQ; +typedef struct ocsp_req_info_st OCSP_REQINFO; +typedef struct ocsp_signature_st OCSP_SIGNATURE; +typedef struct ocsp_request_st OCSP_REQUEST; + +SKM_DEFINE_STACK_OF_INTERNAL(OCSP_CERTID, OCSP_CERTID, OCSP_CERTID) +#define sk_OCSP_CERTID_num(sk) OPENSSL_sk_num(ossl_check_const_OCSP_CERTID_sk_type(sk)) +#define sk_OCSP_CERTID_value(sk, idx) ((OCSP_CERTID *)OPENSSL_sk_value(ossl_check_const_OCSP_CERTID_sk_type(sk), (idx))) +#define sk_OCSP_CERTID_new(cmp) ((STACK_OF(OCSP_CERTID) *)OPENSSL_sk_new(ossl_check_OCSP_CERTID_compfunc_type(cmp))) +#define sk_OCSP_CERTID_new_null() ((STACK_OF(OCSP_CERTID) *)OPENSSL_sk_new_null()) +#define sk_OCSP_CERTID_new_reserve(cmp, n) ((STACK_OF(OCSP_CERTID) *)OPENSSL_sk_new_reserve(ossl_check_OCSP_CERTID_compfunc_type(cmp), (n))) +#define sk_OCSP_CERTID_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OCSP_CERTID_sk_type(sk), (n)) +#define sk_OCSP_CERTID_free(sk) OPENSSL_sk_free(ossl_check_OCSP_CERTID_sk_type(sk)) +#define sk_OCSP_CERTID_zero(sk) OPENSSL_sk_zero(ossl_check_OCSP_CERTID_sk_type(sk)) +#define sk_OCSP_CERTID_delete(sk, i) ((OCSP_CERTID *)OPENSSL_sk_delete(ossl_check_OCSP_CERTID_sk_type(sk), (i))) +#define sk_OCSP_CERTID_delete_ptr(sk, ptr) ((OCSP_CERTID *)OPENSSL_sk_delete_ptr(ossl_check_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_type(ptr))) +#define sk_OCSP_CERTID_push(sk, ptr) OPENSSL_sk_push(ossl_check_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_type(ptr)) +#define sk_OCSP_CERTID_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_type(ptr)) +#define sk_OCSP_CERTID_pop(sk) ((OCSP_CERTID *)OPENSSL_sk_pop(ossl_check_OCSP_CERTID_sk_type(sk))) +#define sk_OCSP_CERTID_shift(sk) ((OCSP_CERTID *)OPENSSL_sk_shift(ossl_check_OCSP_CERTID_sk_type(sk))) +#define sk_OCSP_CERTID_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OCSP_CERTID_sk_type(sk),ossl_check_OCSP_CERTID_freefunc_type(freefunc)) +#define sk_OCSP_CERTID_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_type(ptr), (idx)) +#define sk_OCSP_CERTID_set(sk, idx, ptr) ((OCSP_CERTID *)OPENSSL_sk_set(ossl_check_OCSP_CERTID_sk_type(sk), (idx), ossl_check_OCSP_CERTID_type(ptr))) +#define sk_OCSP_CERTID_find(sk, ptr) OPENSSL_sk_find(ossl_check_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_type(ptr)) +#define sk_OCSP_CERTID_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_type(ptr)) +#define sk_OCSP_CERTID_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_type(ptr), pnum) +#define sk_OCSP_CERTID_sort(sk) OPENSSL_sk_sort(ossl_check_OCSP_CERTID_sk_type(sk)) +#define sk_OCSP_CERTID_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OCSP_CERTID_sk_type(sk)) +#define sk_OCSP_CERTID_dup(sk) ((STACK_OF(OCSP_CERTID) *)OPENSSL_sk_dup(ossl_check_const_OCSP_CERTID_sk_type(sk))) +#define sk_OCSP_CERTID_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OCSP_CERTID) *)OPENSSL_sk_deep_copy(ossl_check_const_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_copyfunc_type(copyfunc), ossl_check_OCSP_CERTID_freefunc_type(freefunc))) +#define sk_OCSP_CERTID_set_cmp_func(sk, cmp) ((sk_OCSP_CERTID_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OCSP_CERTID_sk_type(sk), ossl_check_OCSP_CERTID_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(OCSP_ONEREQ, OCSP_ONEREQ, OCSP_ONEREQ) +#define sk_OCSP_ONEREQ_num(sk) OPENSSL_sk_num(ossl_check_const_OCSP_ONEREQ_sk_type(sk)) +#define sk_OCSP_ONEREQ_value(sk, idx) ((OCSP_ONEREQ *)OPENSSL_sk_value(ossl_check_const_OCSP_ONEREQ_sk_type(sk), (idx))) +#define sk_OCSP_ONEREQ_new(cmp) ((STACK_OF(OCSP_ONEREQ) *)OPENSSL_sk_new(ossl_check_OCSP_ONEREQ_compfunc_type(cmp))) +#define sk_OCSP_ONEREQ_new_null() ((STACK_OF(OCSP_ONEREQ) *)OPENSSL_sk_new_null()) +#define sk_OCSP_ONEREQ_new_reserve(cmp, n) ((STACK_OF(OCSP_ONEREQ) *)OPENSSL_sk_new_reserve(ossl_check_OCSP_ONEREQ_compfunc_type(cmp), (n))) +#define sk_OCSP_ONEREQ_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OCSP_ONEREQ_sk_type(sk), (n)) +#define sk_OCSP_ONEREQ_free(sk) OPENSSL_sk_free(ossl_check_OCSP_ONEREQ_sk_type(sk)) +#define sk_OCSP_ONEREQ_zero(sk) OPENSSL_sk_zero(ossl_check_OCSP_ONEREQ_sk_type(sk)) +#define sk_OCSP_ONEREQ_delete(sk, i) ((OCSP_ONEREQ *)OPENSSL_sk_delete(ossl_check_OCSP_ONEREQ_sk_type(sk), (i))) +#define sk_OCSP_ONEREQ_delete_ptr(sk, ptr) ((OCSP_ONEREQ *)OPENSSL_sk_delete_ptr(ossl_check_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_type(ptr))) +#define sk_OCSP_ONEREQ_push(sk, ptr) OPENSSL_sk_push(ossl_check_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_type(ptr)) +#define sk_OCSP_ONEREQ_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_type(ptr)) +#define sk_OCSP_ONEREQ_pop(sk) ((OCSP_ONEREQ *)OPENSSL_sk_pop(ossl_check_OCSP_ONEREQ_sk_type(sk))) +#define sk_OCSP_ONEREQ_shift(sk) ((OCSP_ONEREQ *)OPENSSL_sk_shift(ossl_check_OCSP_ONEREQ_sk_type(sk))) +#define sk_OCSP_ONEREQ_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OCSP_ONEREQ_sk_type(sk),ossl_check_OCSP_ONEREQ_freefunc_type(freefunc)) +#define sk_OCSP_ONEREQ_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_type(ptr), (idx)) +#define sk_OCSP_ONEREQ_set(sk, idx, ptr) ((OCSP_ONEREQ *)OPENSSL_sk_set(ossl_check_OCSP_ONEREQ_sk_type(sk), (idx), ossl_check_OCSP_ONEREQ_type(ptr))) +#define sk_OCSP_ONEREQ_find(sk, ptr) OPENSSL_sk_find(ossl_check_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_type(ptr)) +#define sk_OCSP_ONEREQ_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_type(ptr)) +#define sk_OCSP_ONEREQ_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_type(ptr), pnum) +#define sk_OCSP_ONEREQ_sort(sk) OPENSSL_sk_sort(ossl_check_OCSP_ONEREQ_sk_type(sk)) +#define sk_OCSP_ONEREQ_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OCSP_ONEREQ_sk_type(sk)) +#define sk_OCSP_ONEREQ_dup(sk) ((STACK_OF(OCSP_ONEREQ) *)OPENSSL_sk_dup(ossl_check_const_OCSP_ONEREQ_sk_type(sk))) +#define sk_OCSP_ONEREQ_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OCSP_ONEREQ) *)OPENSSL_sk_deep_copy(ossl_check_const_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_copyfunc_type(copyfunc), ossl_check_OCSP_ONEREQ_freefunc_type(freefunc))) +#define sk_OCSP_ONEREQ_set_cmp_func(sk, cmp) ((sk_OCSP_ONEREQ_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OCSP_ONEREQ_sk_type(sk), ossl_check_OCSP_ONEREQ_compfunc_type(cmp))) + + +# define OCSP_RESPONSE_STATUS_SUCCESSFUL 0 +# define OCSP_RESPONSE_STATUS_MALFORMEDREQUEST 1 +# define OCSP_RESPONSE_STATUS_INTERNALERROR 2 +# define OCSP_RESPONSE_STATUS_TRYLATER 3 +# define OCSP_RESPONSE_STATUS_SIGREQUIRED 5 +# define OCSP_RESPONSE_STATUS_UNAUTHORIZED 6 + +typedef struct ocsp_resp_bytes_st OCSP_RESPBYTES; + +# define V_OCSP_RESPID_NAME 0 +# define V_OCSP_RESPID_KEY 1 + +SKM_DEFINE_STACK_OF_INTERNAL(OCSP_RESPID, OCSP_RESPID, OCSP_RESPID) +#define sk_OCSP_RESPID_num(sk) OPENSSL_sk_num(ossl_check_const_OCSP_RESPID_sk_type(sk)) +#define sk_OCSP_RESPID_value(sk, idx) ((OCSP_RESPID *)OPENSSL_sk_value(ossl_check_const_OCSP_RESPID_sk_type(sk), (idx))) +#define sk_OCSP_RESPID_new(cmp) ((STACK_OF(OCSP_RESPID) *)OPENSSL_sk_new(ossl_check_OCSP_RESPID_compfunc_type(cmp))) +#define sk_OCSP_RESPID_new_null() ((STACK_OF(OCSP_RESPID) *)OPENSSL_sk_new_null()) +#define sk_OCSP_RESPID_new_reserve(cmp, n) ((STACK_OF(OCSP_RESPID) *)OPENSSL_sk_new_reserve(ossl_check_OCSP_RESPID_compfunc_type(cmp), (n))) +#define sk_OCSP_RESPID_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OCSP_RESPID_sk_type(sk), (n)) +#define sk_OCSP_RESPID_free(sk) OPENSSL_sk_free(ossl_check_OCSP_RESPID_sk_type(sk)) +#define sk_OCSP_RESPID_zero(sk) OPENSSL_sk_zero(ossl_check_OCSP_RESPID_sk_type(sk)) +#define sk_OCSP_RESPID_delete(sk, i) ((OCSP_RESPID *)OPENSSL_sk_delete(ossl_check_OCSP_RESPID_sk_type(sk), (i))) +#define sk_OCSP_RESPID_delete_ptr(sk, ptr) ((OCSP_RESPID *)OPENSSL_sk_delete_ptr(ossl_check_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_type(ptr))) +#define sk_OCSP_RESPID_push(sk, ptr) OPENSSL_sk_push(ossl_check_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_type(ptr)) +#define sk_OCSP_RESPID_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_type(ptr)) +#define sk_OCSP_RESPID_pop(sk) ((OCSP_RESPID *)OPENSSL_sk_pop(ossl_check_OCSP_RESPID_sk_type(sk))) +#define sk_OCSP_RESPID_shift(sk) ((OCSP_RESPID *)OPENSSL_sk_shift(ossl_check_OCSP_RESPID_sk_type(sk))) +#define sk_OCSP_RESPID_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OCSP_RESPID_sk_type(sk),ossl_check_OCSP_RESPID_freefunc_type(freefunc)) +#define sk_OCSP_RESPID_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_type(ptr), (idx)) +#define sk_OCSP_RESPID_set(sk, idx, ptr) ((OCSP_RESPID *)OPENSSL_sk_set(ossl_check_OCSP_RESPID_sk_type(sk), (idx), ossl_check_OCSP_RESPID_type(ptr))) +#define sk_OCSP_RESPID_find(sk, ptr) OPENSSL_sk_find(ossl_check_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_type(ptr)) +#define sk_OCSP_RESPID_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_type(ptr)) +#define sk_OCSP_RESPID_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_type(ptr), pnum) +#define sk_OCSP_RESPID_sort(sk) OPENSSL_sk_sort(ossl_check_OCSP_RESPID_sk_type(sk)) +#define sk_OCSP_RESPID_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OCSP_RESPID_sk_type(sk)) +#define sk_OCSP_RESPID_dup(sk) ((STACK_OF(OCSP_RESPID) *)OPENSSL_sk_dup(ossl_check_const_OCSP_RESPID_sk_type(sk))) +#define sk_OCSP_RESPID_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OCSP_RESPID) *)OPENSSL_sk_deep_copy(ossl_check_const_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_copyfunc_type(copyfunc), ossl_check_OCSP_RESPID_freefunc_type(freefunc))) +#define sk_OCSP_RESPID_set_cmp_func(sk, cmp) ((sk_OCSP_RESPID_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OCSP_RESPID_sk_type(sk), ossl_check_OCSP_RESPID_compfunc_type(cmp))) + + +typedef struct ocsp_revoked_info_st OCSP_REVOKEDINFO; + +# define V_OCSP_CERTSTATUS_GOOD 0 +# define V_OCSP_CERTSTATUS_REVOKED 1 +# define V_OCSP_CERTSTATUS_UNKNOWN 2 + +typedef struct ocsp_cert_status_st OCSP_CERTSTATUS; +typedef struct ocsp_single_response_st OCSP_SINGLERESP; + +SKM_DEFINE_STACK_OF_INTERNAL(OCSP_SINGLERESP, OCSP_SINGLERESP, OCSP_SINGLERESP) +#define sk_OCSP_SINGLERESP_num(sk) OPENSSL_sk_num(ossl_check_const_OCSP_SINGLERESP_sk_type(sk)) +#define sk_OCSP_SINGLERESP_value(sk, idx) ((OCSP_SINGLERESP *)OPENSSL_sk_value(ossl_check_const_OCSP_SINGLERESP_sk_type(sk), (idx))) +#define sk_OCSP_SINGLERESP_new(cmp) ((STACK_OF(OCSP_SINGLERESP) *)OPENSSL_sk_new(ossl_check_OCSP_SINGLERESP_compfunc_type(cmp))) +#define sk_OCSP_SINGLERESP_new_null() ((STACK_OF(OCSP_SINGLERESP) *)OPENSSL_sk_new_null()) +#define sk_OCSP_SINGLERESP_new_reserve(cmp, n) ((STACK_OF(OCSP_SINGLERESP) *)OPENSSL_sk_new_reserve(ossl_check_OCSP_SINGLERESP_compfunc_type(cmp), (n))) +#define sk_OCSP_SINGLERESP_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OCSP_SINGLERESP_sk_type(sk), (n)) +#define sk_OCSP_SINGLERESP_free(sk) OPENSSL_sk_free(ossl_check_OCSP_SINGLERESP_sk_type(sk)) +#define sk_OCSP_SINGLERESP_zero(sk) OPENSSL_sk_zero(ossl_check_OCSP_SINGLERESP_sk_type(sk)) +#define sk_OCSP_SINGLERESP_delete(sk, i) ((OCSP_SINGLERESP *)OPENSSL_sk_delete(ossl_check_OCSP_SINGLERESP_sk_type(sk), (i))) +#define sk_OCSP_SINGLERESP_delete_ptr(sk, ptr) ((OCSP_SINGLERESP *)OPENSSL_sk_delete_ptr(ossl_check_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_type(ptr))) +#define sk_OCSP_SINGLERESP_push(sk, ptr) OPENSSL_sk_push(ossl_check_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_type(ptr)) +#define sk_OCSP_SINGLERESP_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_type(ptr)) +#define sk_OCSP_SINGLERESP_pop(sk) ((OCSP_SINGLERESP *)OPENSSL_sk_pop(ossl_check_OCSP_SINGLERESP_sk_type(sk))) +#define sk_OCSP_SINGLERESP_shift(sk) ((OCSP_SINGLERESP *)OPENSSL_sk_shift(ossl_check_OCSP_SINGLERESP_sk_type(sk))) +#define sk_OCSP_SINGLERESP_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OCSP_SINGLERESP_sk_type(sk),ossl_check_OCSP_SINGLERESP_freefunc_type(freefunc)) +#define sk_OCSP_SINGLERESP_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_type(ptr), (idx)) +#define sk_OCSP_SINGLERESP_set(sk, idx, ptr) ((OCSP_SINGLERESP *)OPENSSL_sk_set(ossl_check_OCSP_SINGLERESP_sk_type(sk), (idx), ossl_check_OCSP_SINGLERESP_type(ptr))) +#define sk_OCSP_SINGLERESP_find(sk, ptr) OPENSSL_sk_find(ossl_check_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_type(ptr)) +#define sk_OCSP_SINGLERESP_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_type(ptr)) +#define sk_OCSP_SINGLERESP_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_type(ptr), pnum) +#define sk_OCSP_SINGLERESP_sort(sk) OPENSSL_sk_sort(ossl_check_OCSP_SINGLERESP_sk_type(sk)) +#define sk_OCSP_SINGLERESP_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OCSP_SINGLERESP_sk_type(sk)) +#define sk_OCSP_SINGLERESP_dup(sk) ((STACK_OF(OCSP_SINGLERESP) *)OPENSSL_sk_dup(ossl_check_const_OCSP_SINGLERESP_sk_type(sk))) +#define sk_OCSP_SINGLERESP_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OCSP_SINGLERESP) *)OPENSSL_sk_deep_copy(ossl_check_const_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_copyfunc_type(copyfunc), ossl_check_OCSP_SINGLERESP_freefunc_type(freefunc))) +#define sk_OCSP_SINGLERESP_set_cmp_func(sk, cmp) ((sk_OCSP_SINGLERESP_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OCSP_SINGLERESP_sk_type(sk), ossl_check_OCSP_SINGLERESP_compfunc_type(cmp))) + + +typedef struct ocsp_response_data_st OCSP_RESPDATA; + +typedef struct ocsp_basic_response_st OCSP_BASICRESP; + +typedef struct ocsp_crl_id_st OCSP_CRLID; +typedef struct ocsp_service_locator_st OCSP_SERVICELOC; + +# define PEM_STRING_OCSP_REQUEST "OCSP REQUEST" +# define PEM_STRING_OCSP_RESPONSE "OCSP RESPONSE" + +# define d2i_OCSP_REQUEST_bio(bp,p) ASN1_d2i_bio_of(OCSP_REQUEST,OCSP_REQUEST_new,d2i_OCSP_REQUEST,bp,p) + +# define d2i_OCSP_RESPONSE_bio(bp,p) ASN1_d2i_bio_of(OCSP_RESPONSE,OCSP_RESPONSE_new,d2i_OCSP_RESPONSE,bp,p) + +# define PEM_read_bio_OCSP_REQUEST(bp,x,cb) (OCSP_REQUEST *)PEM_ASN1_read_bio( \ + (char *(*)())d2i_OCSP_REQUEST,PEM_STRING_OCSP_REQUEST, \ + bp,(char **)(x),cb,NULL) + +# define PEM_read_bio_OCSP_RESPONSE(bp,x,cb) (OCSP_RESPONSE *)PEM_ASN1_read_bio(\ + (char *(*)())d2i_OCSP_RESPONSE,PEM_STRING_OCSP_RESPONSE, \ + bp,(char **)(x),cb,NULL) + +# define PEM_write_bio_OCSP_REQUEST(bp,o) \ + PEM_ASN1_write_bio((int (*)())i2d_OCSP_REQUEST,PEM_STRING_OCSP_REQUEST,\ + bp,(char *)(o), NULL,NULL,0,NULL,NULL) + +# define PEM_write_bio_OCSP_RESPONSE(bp,o) \ + PEM_ASN1_write_bio((int (*)())i2d_OCSP_RESPONSE,PEM_STRING_OCSP_RESPONSE,\ + bp,(char *)(o), NULL,NULL,0,NULL,NULL) + +# define i2d_OCSP_RESPONSE_bio(bp,o) ASN1_i2d_bio_of(OCSP_RESPONSE,i2d_OCSP_RESPONSE,bp,o) + +# define i2d_OCSP_REQUEST_bio(bp,o) ASN1_i2d_bio_of(OCSP_REQUEST,i2d_OCSP_REQUEST,bp,o) + +# define ASN1_BIT_STRING_digest(data,type,md,len) \ + ASN1_item_digest(ASN1_ITEM_rptr(ASN1_BIT_STRING),type,data,md,len) + +# define OCSP_CERTSTATUS_dup(cs)\ + (OCSP_CERTSTATUS*)ASN1_dup((i2d_of_void *)i2d_OCSP_CERTSTATUS,\ + (d2i_of_void *)d2i_OCSP_CERTSTATUS,(char *)(cs)) + +DECLARE_ASN1_DUP_FUNCTION(OCSP_CERTID) + +OSSL_HTTP_REQ_CTX *OCSP_sendreq_new(BIO *io, const char *path, + const OCSP_REQUEST *req, int buf_size); +OCSP_RESPONSE *OCSP_sendreq_bio(BIO *b, const char *path, OCSP_REQUEST *req); + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +typedef OSSL_HTTP_REQ_CTX OCSP_REQ_CTX; +# define OCSP_REQ_CTX_new(io, buf_size) \ + OSSL_HTTP_REQ_CTX_new(io, io, buf_size) +# define OCSP_REQ_CTX_free OSSL_HTTP_REQ_CTX_free +# define OCSP_REQ_CTX_http(rctx, op, path) \ + (OSSL_HTTP_REQ_CTX_set_expected(rctx, NULL, 1 /* asn1 */, 0, 0) && \ + OSSL_HTTP_REQ_CTX_set_request_line(rctx, strcmp(op, "POST") == 0, \ + NULL, NULL, path)) +# define OCSP_REQ_CTX_add1_header OSSL_HTTP_REQ_CTX_add1_header +# define OCSP_REQ_CTX_i2d(r, it, req) \ + OSSL_HTTP_REQ_CTX_set1_req(r, "application/ocsp-request", it, req) +# define OCSP_REQ_CTX_set1_req(r, req) \ + OCSP_REQ_CTX_i2d(r, ASN1_ITEM_rptr(OCSP_REQUEST), (ASN1_VALUE *)(req)) +# define OCSP_REQ_CTX_nbio OSSL_HTTP_REQ_CTX_nbio +# define OCSP_REQ_CTX_nbio_d2i OSSL_HTTP_REQ_CTX_nbio_d2i +# define OCSP_sendreq_nbio(p, r) \ + OSSL_HTTP_REQ_CTX_nbio_d2i(r, (ASN1_VALUE **)(p), \ + ASN1_ITEM_rptr(OCSP_RESPONSE)) +# define OCSP_REQ_CTX_get0_mem_bio OSSL_HTTP_REQ_CTX_get0_mem_bio +# define OCSP_set_max_response_length OSSL_HTTP_REQ_CTX_set_max_response_length +# endif + +OCSP_CERTID *OCSP_cert_to_id(const EVP_MD *dgst, const X509 *subject, + const X509 *issuer); + +OCSP_CERTID *OCSP_cert_id_new(const EVP_MD *dgst, + const X509_NAME *issuerName, + const ASN1_BIT_STRING *issuerKey, + const ASN1_INTEGER *serialNumber); + +OCSP_ONEREQ *OCSP_request_add0_id(OCSP_REQUEST *req, OCSP_CERTID *cid); + +int OCSP_request_add1_nonce(OCSP_REQUEST *req, unsigned char *val, int len); +int OCSP_basic_add1_nonce(OCSP_BASICRESP *resp, unsigned char *val, int len); +int OCSP_check_nonce(OCSP_REQUEST *req, OCSP_BASICRESP *bs); +int OCSP_copy_nonce(OCSP_BASICRESP *resp, OCSP_REQUEST *req); + +int OCSP_request_set1_name(OCSP_REQUEST *req, const X509_NAME *nm); +int OCSP_request_add1_cert(OCSP_REQUEST *req, X509 *cert); + +int OCSP_request_sign(OCSP_REQUEST *req, + X509 *signer, + EVP_PKEY *key, + const EVP_MD *dgst, + STACK_OF(X509) *certs, unsigned long flags); + +int OCSP_response_status(OCSP_RESPONSE *resp); +OCSP_BASICRESP *OCSP_response_get1_basic(OCSP_RESPONSE *resp); + +const ASN1_OCTET_STRING *OCSP_resp_get0_signature(const OCSP_BASICRESP *bs); +const X509_ALGOR *OCSP_resp_get0_tbs_sigalg(const OCSP_BASICRESP *bs); +const OCSP_RESPDATA *OCSP_resp_get0_respdata(const OCSP_BASICRESP *bs); +int OCSP_resp_get0_signer(OCSP_BASICRESP *bs, X509 **signer, + STACK_OF(X509) *extra_certs); + +int OCSP_resp_count(OCSP_BASICRESP *bs); +OCSP_SINGLERESP *OCSP_resp_get0(OCSP_BASICRESP *bs, int idx); +const ASN1_GENERALIZEDTIME *OCSP_resp_get0_produced_at(const OCSP_BASICRESP* bs); +const STACK_OF(X509) *OCSP_resp_get0_certs(const OCSP_BASICRESP *bs); +int OCSP_resp_get0_id(const OCSP_BASICRESP *bs, + const ASN1_OCTET_STRING **pid, + const X509_NAME **pname); +int OCSP_resp_get1_id(const OCSP_BASICRESP *bs, + ASN1_OCTET_STRING **pid, + X509_NAME **pname); + +int OCSP_resp_find(OCSP_BASICRESP *bs, OCSP_CERTID *id, int last); +int OCSP_single_get0_status(OCSP_SINGLERESP *single, int *reason, + ASN1_GENERALIZEDTIME **revtime, + ASN1_GENERALIZEDTIME **thisupd, + ASN1_GENERALIZEDTIME **nextupd); +int OCSP_resp_find_status(OCSP_BASICRESP *bs, OCSP_CERTID *id, int *status, + int *reason, + ASN1_GENERALIZEDTIME **revtime, + ASN1_GENERALIZEDTIME **thisupd, + ASN1_GENERALIZEDTIME **nextupd); +int OCSP_check_validity(ASN1_GENERALIZEDTIME *thisupd, + ASN1_GENERALIZEDTIME *nextupd, long sec, long maxsec); + +int OCSP_request_verify(OCSP_REQUEST *req, STACK_OF(X509) *certs, + X509_STORE *store, unsigned long flags); + +# define OCSP_parse_url(url, host, port, path, ssl) \ + OSSL_HTTP_parse_url(url, ssl, NULL, host, port, NULL, path, NULL, NULL) + +int OCSP_id_issuer_cmp(const OCSP_CERTID *a, const OCSP_CERTID *b); +int OCSP_id_cmp(const OCSP_CERTID *a, const OCSP_CERTID *b); + +int OCSP_request_onereq_count(OCSP_REQUEST *req); +OCSP_ONEREQ *OCSP_request_onereq_get0(OCSP_REQUEST *req, int i); +OCSP_CERTID *OCSP_onereq_get0_id(OCSP_ONEREQ *one); +int OCSP_id_get0_info(ASN1_OCTET_STRING **piNameHash, ASN1_OBJECT **pmd, + ASN1_OCTET_STRING **pikeyHash, + ASN1_INTEGER **pserial, OCSP_CERTID *cid); +int OCSP_request_is_signed(OCSP_REQUEST *req); +OCSP_RESPONSE *OCSP_response_create(int status, OCSP_BASICRESP *bs); +OCSP_SINGLERESP *OCSP_basic_add1_status(OCSP_BASICRESP *rsp, + OCSP_CERTID *cid, + int status, int reason, + ASN1_TIME *revtime, + ASN1_TIME *thisupd, + ASN1_TIME *nextupd); +int OCSP_basic_add1_cert(OCSP_BASICRESP *resp, X509 *cert); +int OCSP_basic_sign(OCSP_BASICRESP *brsp, + X509 *signer, EVP_PKEY *key, const EVP_MD *dgst, + STACK_OF(X509) *certs, unsigned long flags); +int OCSP_basic_sign_ctx(OCSP_BASICRESP *brsp, + X509 *signer, EVP_MD_CTX *ctx, + STACK_OF(X509) *certs, unsigned long flags); +int OCSP_RESPID_set_by_name(OCSP_RESPID *respid, X509 *cert); +int OCSP_RESPID_set_by_key_ex(OCSP_RESPID *respid, X509 *cert, + OSSL_LIB_CTX *libctx, const char *propq); +int OCSP_RESPID_set_by_key(OCSP_RESPID *respid, X509 *cert); +int OCSP_RESPID_match_ex(OCSP_RESPID *respid, X509 *cert, OSSL_LIB_CTX *libctx, + const char *propq); +int OCSP_RESPID_match(OCSP_RESPID *respid, X509 *cert); + +X509_EXTENSION *OCSP_crlID_new(const char *url, long *n, char *tim); + +X509_EXTENSION *OCSP_accept_responses_new(char **oids); + +X509_EXTENSION *OCSP_archive_cutoff_new(char *tim); + +X509_EXTENSION *OCSP_url_svcloc_new(const X509_NAME *issuer, const char **urls); + +int OCSP_REQUEST_get_ext_count(OCSP_REQUEST *x); +int OCSP_REQUEST_get_ext_by_NID(OCSP_REQUEST *x, int nid, int lastpos); +int OCSP_REQUEST_get_ext_by_OBJ(OCSP_REQUEST *x, const ASN1_OBJECT *obj, + int lastpos); +int OCSP_REQUEST_get_ext_by_critical(OCSP_REQUEST *x, int crit, int lastpos); +X509_EXTENSION *OCSP_REQUEST_get_ext(OCSP_REQUEST *x, int loc); +X509_EXTENSION *OCSP_REQUEST_delete_ext(OCSP_REQUEST *x, int loc); +void *OCSP_REQUEST_get1_ext_d2i(OCSP_REQUEST *x, int nid, int *crit, + int *idx); +int OCSP_REQUEST_add1_ext_i2d(OCSP_REQUEST *x, int nid, void *value, int crit, + unsigned long flags); +int OCSP_REQUEST_add_ext(OCSP_REQUEST *x, X509_EXTENSION *ex, int loc); + +int OCSP_ONEREQ_get_ext_count(OCSP_ONEREQ *x); +int OCSP_ONEREQ_get_ext_by_NID(OCSP_ONEREQ *x, int nid, int lastpos); +int OCSP_ONEREQ_get_ext_by_OBJ(OCSP_ONEREQ *x, const ASN1_OBJECT *obj, int lastpos); +int OCSP_ONEREQ_get_ext_by_critical(OCSP_ONEREQ *x, int crit, int lastpos); +X509_EXTENSION *OCSP_ONEREQ_get_ext(OCSP_ONEREQ *x, int loc); +X509_EXTENSION *OCSP_ONEREQ_delete_ext(OCSP_ONEREQ *x, int loc); +void *OCSP_ONEREQ_get1_ext_d2i(OCSP_ONEREQ *x, int nid, int *crit, int *idx); +int OCSP_ONEREQ_add1_ext_i2d(OCSP_ONEREQ *x, int nid, void *value, int crit, + unsigned long flags); +int OCSP_ONEREQ_add_ext(OCSP_ONEREQ *x, X509_EXTENSION *ex, int loc); + +int OCSP_BASICRESP_get_ext_count(OCSP_BASICRESP *x); +int OCSP_BASICRESP_get_ext_by_NID(OCSP_BASICRESP *x, int nid, int lastpos); +int OCSP_BASICRESP_get_ext_by_OBJ(OCSP_BASICRESP *x, const ASN1_OBJECT *obj, + int lastpos); +int OCSP_BASICRESP_get_ext_by_critical(OCSP_BASICRESP *x, int crit, + int lastpos); +X509_EXTENSION *OCSP_BASICRESP_get_ext(OCSP_BASICRESP *x, int loc); +X509_EXTENSION *OCSP_BASICRESP_delete_ext(OCSP_BASICRESP *x, int loc); +void *OCSP_BASICRESP_get1_ext_d2i(OCSP_BASICRESP *x, int nid, int *crit, + int *idx); +int OCSP_BASICRESP_add1_ext_i2d(OCSP_BASICRESP *x, int nid, void *value, + int crit, unsigned long flags); +int OCSP_BASICRESP_add_ext(OCSP_BASICRESP *x, X509_EXTENSION *ex, int loc); + +int OCSP_SINGLERESP_get_ext_count(OCSP_SINGLERESP *x); +int OCSP_SINGLERESP_get_ext_by_NID(OCSP_SINGLERESP *x, int nid, int lastpos); +int OCSP_SINGLERESP_get_ext_by_OBJ(OCSP_SINGLERESP *x, const ASN1_OBJECT *obj, + int lastpos); +int OCSP_SINGLERESP_get_ext_by_critical(OCSP_SINGLERESP *x, int crit, + int lastpos); +X509_EXTENSION *OCSP_SINGLERESP_get_ext(OCSP_SINGLERESP *x, int loc); +X509_EXTENSION *OCSP_SINGLERESP_delete_ext(OCSP_SINGLERESP *x, int loc); +void *OCSP_SINGLERESP_get1_ext_d2i(OCSP_SINGLERESP *x, int nid, int *crit, + int *idx); +int OCSP_SINGLERESP_add1_ext_i2d(OCSP_SINGLERESP *x, int nid, void *value, + int crit, unsigned long flags); +int OCSP_SINGLERESP_add_ext(OCSP_SINGLERESP *x, X509_EXTENSION *ex, int loc); +const OCSP_CERTID *OCSP_SINGLERESP_get0_id(const OCSP_SINGLERESP *x); + +DECLARE_ASN1_FUNCTIONS(OCSP_SINGLERESP) +DECLARE_ASN1_FUNCTIONS(OCSP_CERTSTATUS) +DECLARE_ASN1_FUNCTIONS(OCSP_REVOKEDINFO) +DECLARE_ASN1_FUNCTIONS(OCSP_BASICRESP) +DECLARE_ASN1_FUNCTIONS(OCSP_RESPDATA) +DECLARE_ASN1_FUNCTIONS(OCSP_RESPID) +DECLARE_ASN1_FUNCTIONS(OCSP_RESPONSE) +DECLARE_ASN1_FUNCTIONS(OCSP_RESPBYTES) +DECLARE_ASN1_FUNCTIONS(OCSP_ONEREQ) +DECLARE_ASN1_FUNCTIONS(OCSP_CERTID) +DECLARE_ASN1_FUNCTIONS(OCSP_REQUEST) +DECLARE_ASN1_FUNCTIONS(OCSP_SIGNATURE) +DECLARE_ASN1_FUNCTIONS(OCSP_REQINFO) +DECLARE_ASN1_FUNCTIONS(OCSP_CRLID) +DECLARE_ASN1_FUNCTIONS(OCSP_SERVICELOC) + +const char *OCSP_response_status_str(long s); +const char *OCSP_cert_status_str(long s); +const char *OCSP_crl_reason_str(long s); + +int OCSP_REQUEST_print(BIO *bp, OCSP_REQUEST *a, unsigned long flags); +int OCSP_RESPONSE_print(BIO *bp, OCSP_RESPONSE *o, unsigned long flags); + +int OCSP_basic_verify(OCSP_BASICRESP *bs, STACK_OF(X509) *certs, + X509_STORE *st, unsigned long flags); + + +# ifdef __cplusplus +} +# endif +# endif /* !defined(OPENSSL_NO_OCSP) */ +#endif diff --git a/contrib/openssl-cmake/common/include/openssl/opensslv.h b/contrib/openssl-cmake/common/include/openssl/opensslv.h new file mode 100644 index 000000000000..110c34de0e46 --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/opensslv.h @@ -0,0 +1,114 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/opensslv.h.in + * + * Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#ifndef OPENSSL_OPENSSLV_H +# define OPENSSL_OPENSSLV_H +# pragma once + +# ifdef __cplusplus +extern "C" { +# endif + +/* + * SECTION 1: VERSION DATA. These will change for each release + */ + +/* + * Base version macros + * + * These macros express version number MAJOR.MINOR.PATCH exactly + */ +# define OPENSSL_VERSION_MAJOR 3 +# define OPENSSL_VERSION_MINOR 5 +# define OPENSSL_VERSION_PATCH 6 + +/* + * Additional version information + * + * These are also part of the new version scheme, but aren't part + * of the version number itself. + */ + +/* Could be: #define OPENSSL_VERSION_PRE_RELEASE "-alpha.1" */ +# define OPENSSL_VERSION_PRE_RELEASE "" +/* Could be: #define OPENSSL_VERSION_BUILD_METADATA "+fips" */ +/* Could be: #define OPENSSL_VERSION_BUILD_METADATA "+vendor.1" */ +# define OPENSSL_VERSION_BUILD_METADATA "" + +/* + * Note: The OpenSSL Project will never define OPENSSL_VERSION_BUILD_METADATA + * to be anything but the empty string. Its use is entirely reserved for + * others + */ + +/* + * Shared library version + * + * This is strictly to express ABI version, which may or may not + * be related to the API version expressed with the macros above. + * This is defined in free form. + */ +# define OPENSSL_SHLIB_VERSION 3 + +/* + * SECTION 2: USEFUL MACROS + */ + +/* For checking general API compatibility when preprocessing */ +# define OPENSSL_VERSION_PREREQ(maj,min) \ + ((OPENSSL_VERSION_MAJOR << 16) + OPENSSL_VERSION_MINOR >= ((maj) << 16) + (min)) + +/* + * Macros to get the version in easily digested string form, both the short + * "MAJOR.MINOR.PATCH" variant (where MAJOR, MINOR and PATCH are replaced + * with the values from the corresponding OPENSSL_VERSION_ macros) and the + * longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and + * OPENSSL_VERSION_BUILD_METADATA_STR appended. + */ +# define OPENSSL_VERSION_STR "3.5.6" +# define OPENSSL_FULL_VERSION_STR "3.5.6" + +/* + * SECTION 3: ADDITIONAL METADATA + * + * These strings are defined separately to allow them to be parsable. + */ +# define OPENSSL_RELEASE_DATE "7 Apr 2026" + +/* + * SECTION 4: BACKWARD COMPATIBILITY + */ + +# define OPENSSL_VERSION_TEXT "OpenSSL 3.5.6 7 Apr 2026" + +/* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */ +# ifdef OPENSSL_VERSION_PRE_RELEASE +# define _OPENSSL_VERSION_PRE_RELEASE 0x0L +# else +# define _OPENSSL_VERSION_PRE_RELEASE 0xfL +# endif +# define OPENSSL_VERSION_NUMBER \ + ( (OPENSSL_VERSION_MAJOR<<28) \ + |(OPENSSL_VERSION_MINOR<<20) \ + |(OPENSSL_VERSION_PATCH<<4) \ + |_OPENSSL_VERSION_PRE_RELEASE ) + +# ifdef __cplusplus +} +# endif + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_OPENSSLV_H +# endif + +#endif /* OPENSSL_OPENSSLV_H */ diff --git a/contrib/openssl-cmake/common/include/openssl/pkcs12.h b/contrib/openssl-cmake/common/include/openssl/pkcs12.h new file mode 100644 index 000000000000..0809645dad0b --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/pkcs12.h @@ -0,0 +1,366 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/pkcs12.h.in + * + * Copyright 1999-2024 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_PKCS12_H +# define OPENSSL_PKCS12_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_PKCS12_H +# endif + +# include +# include +# include +# include +# ifndef OPENSSL_NO_STDIO +# include +# endif + +#ifdef __cplusplus +extern "C" { +#endif + +# define PKCS12_KEY_ID 1 +# define PKCS12_IV_ID 2 +# define PKCS12_MAC_ID 3 + +/* Default iteration count */ +# ifndef PKCS12_DEFAULT_ITER +# define PKCS12_DEFAULT_ITER PKCS5_DEFAULT_ITER +# endif + +# define PKCS12_MAC_KEY_LENGTH 20 + +/* The macro is expected to be used only internally. Kept for backwards compatibility. */ +# define PKCS12_SALT_LEN 8 + +/* It's not clear if these are actually needed... */ +# define PKCS12_key_gen PKCS12_key_gen_utf8 +# define PKCS12_add_friendlyname PKCS12_add_friendlyname_utf8 + +/* MS key usage constants */ + +# define KEY_EX 0x10 +# define KEY_SIG 0x80 + +typedef struct PKCS12_MAC_DATA_st PKCS12_MAC_DATA; + +typedef struct PKCS12_st PKCS12; + +typedef struct PKCS12_SAFEBAG_st PKCS12_SAFEBAG; + +SKM_DEFINE_STACK_OF_INTERNAL(PKCS12_SAFEBAG, PKCS12_SAFEBAG, PKCS12_SAFEBAG) +#define sk_PKCS12_SAFEBAG_num(sk) OPENSSL_sk_num(ossl_check_const_PKCS12_SAFEBAG_sk_type(sk)) +#define sk_PKCS12_SAFEBAG_value(sk, idx) ((PKCS12_SAFEBAG *)OPENSSL_sk_value(ossl_check_const_PKCS12_SAFEBAG_sk_type(sk), (idx))) +#define sk_PKCS12_SAFEBAG_new(cmp) ((STACK_OF(PKCS12_SAFEBAG) *)OPENSSL_sk_new(ossl_check_PKCS12_SAFEBAG_compfunc_type(cmp))) +#define sk_PKCS12_SAFEBAG_new_null() ((STACK_OF(PKCS12_SAFEBAG) *)OPENSSL_sk_new_null()) +#define sk_PKCS12_SAFEBAG_new_reserve(cmp, n) ((STACK_OF(PKCS12_SAFEBAG) *)OPENSSL_sk_new_reserve(ossl_check_PKCS12_SAFEBAG_compfunc_type(cmp), (n))) +#define sk_PKCS12_SAFEBAG_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_PKCS12_SAFEBAG_sk_type(sk), (n)) +#define sk_PKCS12_SAFEBAG_free(sk) OPENSSL_sk_free(ossl_check_PKCS12_SAFEBAG_sk_type(sk)) +#define sk_PKCS12_SAFEBAG_zero(sk) OPENSSL_sk_zero(ossl_check_PKCS12_SAFEBAG_sk_type(sk)) +#define sk_PKCS12_SAFEBAG_delete(sk, i) ((PKCS12_SAFEBAG *)OPENSSL_sk_delete(ossl_check_PKCS12_SAFEBAG_sk_type(sk), (i))) +#define sk_PKCS12_SAFEBAG_delete_ptr(sk, ptr) ((PKCS12_SAFEBAG *)OPENSSL_sk_delete_ptr(ossl_check_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_type(ptr))) +#define sk_PKCS12_SAFEBAG_push(sk, ptr) OPENSSL_sk_push(ossl_check_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_type(ptr)) +#define sk_PKCS12_SAFEBAG_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_type(ptr)) +#define sk_PKCS12_SAFEBAG_pop(sk) ((PKCS12_SAFEBAG *)OPENSSL_sk_pop(ossl_check_PKCS12_SAFEBAG_sk_type(sk))) +#define sk_PKCS12_SAFEBAG_shift(sk) ((PKCS12_SAFEBAG *)OPENSSL_sk_shift(ossl_check_PKCS12_SAFEBAG_sk_type(sk))) +#define sk_PKCS12_SAFEBAG_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_PKCS12_SAFEBAG_sk_type(sk),ossl_check_PKCS12_SAFEBAG_freefunc_type(freefunc)) +#define sk_PKCS12_SAFEBAG_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_type(ptr), (idx)) +#define sk_PKCS12_SAFEBAG_set(sk, idx, ptr) ((PKCS12_SAFEBAG *)OPENSSL_sk_set(ossl_check_PKCS12_SAFEBAG_sk_type(sk), (idx), ossl_check_PKCS12_SAFEBAG_type(ptr))) +#define sk_PKCS12_SAFEBAG_find(sk, ptr) OPENSSL_sk_find(ossl_check_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_type(ptr)) +#define sk_PKCS12_SAFEBAG_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_type(ptr)) +#define sk_PKCS12_SAFEBAG_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_type(ptr), pnum) +#define sk_PKCS12_SAFEBAG_sort(sk) OPENSSL_sk_sort(ossl_check_PKCS12_SAFEBAG_sk_type(sk)) +#define sk_PKCS12_SAFEBAG_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_PKCS12_SAFEBAG_sk_type(sk)) +#define sk_PKCS12_SAFEBAG_dup(sk) ((STACK_OF(PKCS12_SAFEBAG) *)OPENSSL_sk_dup(ossl_check_const_PKCS12_SAFEBAG_sk_type(sk))) +#define sk_PKCS12_SAFEBAG_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(PKCS12_SAFEBAG) *)OPENSSL_sk_deep_copy(ossl_check_const_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_copyfunc_type(copyfunc), ossl_check_PKCS12_SAFEBAG_freefunc_type(freefunc))) +#define sk_PKCS12_SAFEBAG_set_cmp_func(sk, cmp) ((sk_PKCS12_SAFEBAG_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_PKCS12_SAFEBAG_sk_type(sk), ossl_check_PKCS12_SAFEBAG_compfunc_type(cmp))) + + +typedef struct pkcs12_bag_st PKCS12_BAGS; + +# define PKCS12_ERROR 0 +# define PKCS12_OK 1 + +/* Compatibility macros */ + +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 + +# define M_PKCS12_bag_type PKCS12_bag_type +# define M_PKCS12_cert_bag_type PKCS12_cert_bag_type +# define M_PKCS12_crl_bag_type PKCS12_cert_bag_type + +# define PKCS12_certbag2x509 PKCS12_SAFEBAG_get1_cert +# define PKCS12_certbag2scrl PKCS12_SAFEBAG_get1_crl +# define PKCS12_bag_type PKCS12_SAFEBAG_get_nid +# define PKCS12_cert_bag_type PKCS12_SAFEBAG_get_bag_nid +# define PKCS12_x5092certbag PKCS12_SAFEBAG_create_cert +# define PKCS12_x509crl2certbag PKCS12_SAFEBAG_create_crl +# define PKCS12_MAKE_KEYBAG PKCS12_SAFEBAG_create0_p8inf +# define PKCS12_MAKE_SHKEYBAG PKCS12_SAFEBAG_create_pkcs8_encrypt + +#endif +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 ASN1_TYPE *PKCS12_get_attr(const PKCS12_SAFEBAG *bag, + int attr_nid); +#endif + +ASN1_TYPE *PKCS8_get_attr(PKCS8_PRIV_KEY_INFO *p8, int attr_nid); +int PKCS12_mac_present(const PKCS12 *p12); +void PKCS12_get0_mac(const ASN1_OCTET_STRING **pmac, + const X509_ALGOR **pmacalg, + const ASN1_OCTET_STRING **psalt, + const ASN1_INTEGER **piter, + const PKCS12 *p12); + +const ASN1_TYPE *PKCS12_SAFEBAG_get0_attr(const PKCS12_SAFEBAG *bag, + int attr_nid); +const ASN1_OBJECT *PKCS12_SAFEBAG_get0_type(const PKCS12_SAFEBAG *bag); +int PKCS12_SAFEBAG_get_nid(const PKCS12_SAFEBAG *bag); +int PKCS12_SAFEBAG_get_bag_nid(const PKCS12_SAFEBAG *bag); +const ASN1_TYPE *PKCS12_SAFEBAG_get0_bag_obj(const PKCS12_SAFEBAG *bag); +const ASN1_OBJECT *PKCS12_SAFEBAG_get0_bag_type(const PKCS12_SAFEBAG *bag); + +X509 *PKCS12_SAFEBAG_get1_cert_ex(const PKCS12_SAFEBAG *bag, OSSL_LIB_CTX *libctx, const char *propq); +X509 *PKCS12_SAFEBAG_get1_cert(const PKCS12_SAFEBAG *bag); +X509_CRL *PKCS12_SAFEBAG_get1_crl_ex(const PKCS12_SAFEBAG *bag, OSSL_LIB_CTX *libctx, const char *propq); +X509_CRL *PKCS12_SAFEBAG_get1_crl(const PKCS12_SAFEBAG *bag); +const STACK_OF(PKCS12_SAFEBAG) * +PKCS12_SAFEBAG_get0_safes(const PKCS12_SAFEBAG *bag); +const PKCS8_PRIV_KEY_INFO *PKCS12_SAFEBAG_get0_p8inf(const PKCS12_SAFEBAG *bag); +const X509_SIG *PKCS12_SAFEBAG_get0_pkcs8(const PKCS12_SAFEBAG *bag); + +PKCS12_SAFEBAG *PKCS12_SAFEBAG_create_cert(X509 *x509); +PKCS12_SAFEBAG *PKCS12_SAFEBAG_create_crl(X509_CRL *crl); +PKCS12_SAFEBAG *PKCS12_SAFEBAG_create_secret(int type, int vtype, const unsigned char *value, int len); +PKCS12_SAFEBAG *PKCS12_SAFEBAG_create0_p8inf(PKCS8_PRIV_KEY_INFO *p8); +PKCS12_SAFEBAG *PKCS12_SAFEBAG_create0_pkcs8(X509_SIG *p8); +PKCS12_SAFEBAG *PKCS12_SAFEBAG_create_pkcs8_encrypt(int pbe_nid, + const char *pass, + int passlen, + unsigned char *salt, + int saltlen, int iter, + PKCS8_PRIV_KEY_INFO *p8inf); +PKCS12_SAFEBAG *PKCS12_SAFEBAG_create_pkcs8_encrypt_ex(int pbe_nid, + const char *pass, + int passlen, + unsigned char *salt, + int saltlen, int iter, + PKCS8_PRIV_KEY_INFO *p8inf, + OSSL_LIB_CTX *ctx, + const char *propq); + +PKCS12_SAFEBAG *PKCS12_item_pack_safebag(void *obj, const ASN1_ITEM *it, + int nid1, int nid2); +PKCS8_PRIV_KEY_INFO *PKCS8_decrypt(const X509_SIG *p8, const char *pass, + int passlen); +PKCS8_PRIV_KEY_INFO *PKCS8_decrypt_ex(const X509_SIG *p8, const char *pass, + int passlen, OSSL_LIB_CTX *ctx, + const char *propq); +PKCS8_PRIV_KEY_INFO *PKCS12_decrypt_skey(const PKCS12_SAFEBAG *bag, + const char *pass, int passlen); +PKCS8_PRIV_KEY_INFO *PKCS12_decrypt_skey_ex(const PKCS12_SAFEBAG *bag, + const char *pass, int passlen, + OSSL_LIB_CTX *ctx, + const char *propq); +X509_SIG *PKCS8_encrypt(int pbe_nid, const EVP_CIPHER *cipher, + const char *pass, int passlen, unsigned char *salt, + int saltlen, int iter, PKCS8_PRIV_KEY_INFO *p8); +X509_SIG *PKCS8_encrypt_ex(int pbe_nid, const EVP_CIPHER *cipher, + const char *pass, int passlen, unsigned char *salt, + int saltlen, int iter, PKCS8_PRIV_KEY_INFO *p8, + OSSL_LIB_CTX *ctx, const char *propq); +X509_SIG *PKCS8_set0_pbe(const char *pass, int passlen, + PKCS8_PRIV_KEY_INFO *p8inf, X509_ALGOR *pbe); +X509_SIG *PKCS8_set0_pbe_ex(const char *pass, int passlen, + PKCS8_PRIV_KEY_INFO *p8inf, X509_ALGOR *pbe, + OSSL_LIB_CTX *ctx, const char *propq); +PKCS7 *PKCS12_pack_p7data(STACK_OF(PKCS12_SAFEBAG) *sk); +STACK_OF(PKCS12_SAFEBAG) *PKCS12_unpack_p7data(PKCS7 *p7); +PKCS7 *PKCS12_pack_p7encdata(int pbe_nid, const char *pass, int passlen, + unsigned char *salt, int saltlen, int iter, + STACK_OF(PKCS12_SAFEBAG) *bags); +PKCS7 *PKCS12_pack_p7encdata_ex(int pbe_nid, const char *pass, int passlen, + unsigned char *salt, int saltlen, int iter, + STACK_OF(PKCS12_SAFEBAG) *bags, + OSSL_LIB_CTX *ctx, const char *propq); + +STACK_OF(PKCS12_SAFEBAG) *PKCS12_unpack_p7encdata(PKCS7 *p7, const char *pass, + int passlen); + +int PKCS12_pack_authsafes(PKCS12 *p12, STACK_OF(PKCS7) *safes); +STACK_OF(PKCS7) *PKCS12_unpack_authsafes(const PKCS12 *p12); + +int PKCS12_add_localkeyid(PKCS12_SAFEBAG *bag, unsigned char *name, + int namelen); +int PKCS12_add_friendlyname_asc(PKCS12_SAFEBAG *bag, const char *name, + int namelen); +int PKCS12_add_friendlyname_utf8(PKCS12_SAFEBAG *bag, const char *name, + int namelen); +int PKCS12_add_CSPName_asc(PKCS12_SAFEBAG *bag, const char *name, + int namelen); +int PKCS12_add_friendlyname_uni(PKCS12_SAFEBAG *bag, + const unsigned char *name, int namelen); +int PKCS12_add1_attr_by_NID(PKCS12_SAFEBAG *bag, int nid, int type, + const unsigned char *bytes, int len); +int PKCS12_add1_attr_by_txt(PKCS12_SAFEBAG *bag, const char *attrname, int type, + const unsigned char *bytes, int len); +int PKCS8_add_keyusage(PKCS8_PRIV_KEY_INFO *p8, int usage); +ASN1_TYPE *PKCS12_get_attr_gen(const STACK_OF(X509_ATTRIBUTE) *attrs, + int attr_nid); +char *PKCS12_get_friendlyname(PKCS12_SAFEBAG *bag); +const STACK_OF(X509_ATTRIBUTE) * +PKCS12_SAFEBAG_get0_attrs(const PKCS12_SAFEBAG *bag); +void PKCS12_SAFEBAG_set0_attrs(PKCS12_SAFEBAG *bag, STACK_OF(X509_ATTRIBUTE) *attrs); +unsigned char *PKCS12_pbe_crypt(const X509_ALGOR *algor, + const char *pass, int passlen, + const unsigned char *in, int inlen, + unsigned char **data, int *datalen, + int en_de); +unsigned char *PKCS12_pbe_crypt_ex(const X509_ALGOR *algor, + const char *pass, int passlen, + const unsigned char *in, int inlen, + unsigned char **data, int *datalen, + int en_de, OSSL_LIB_CTX *libctx, + const char *propq); +void *PKCS12_item_decrypt_d2i(const X509_ALGOR *algor, const ASN1_ITEM *it, + const char *pass, int passlen, + const ASN1_OCTET_STRING *oct, int zbuf); +void *PKCS12_item_decrypt_d2i_ex(const X509_ALGOR *algor, const ASN1_ITEM *it, + const char *pass, int passlen, + const ASN1_OCTET_STRING *oct, int zbuf, + OSSL_LIB_CTX *libctx, + const char *propq); +ASN1_OCTET_STRING *PKCS12_item_i2d_encrypt(X509_ALGOR *algor, + const ASN1_ITEM *it, + const char *pass, int passlen, + void *obj, int zbuf); +ASN1_OCTET_STRING *PKCS12_item_i2d_encrypt_ex(X509_ALGOR *algor, + const ASN1_ITEM *it, + const char *pass, int passlen, + void *obj, int zbuf, + OSSL_LIB_CTX *ctx, + const char *propq); +PKCS12 *PKCS12_init(int mode); +PKCS12 *PKCS12_init_ex(int mode, OSSL_LIB_CTX *ctx, const char *propq); + +int PKCS12_key_gen_asc(const char *pass, int passlen, unsigned char *salt, + int saltlen, int id, int iter, int n, + unsigned char *out, const EVP_MD *md_type); +int PKCS12_key_gen_asc_ex(const char *pass, int passlen, unsigned char *salt, + int saltlen, int id, int iter, int n, + unsigned char *out, const EVP_MD *md_type, + OSSL_LIB_CTX *ctx, const char *propq); +int PKCS12_key_gen_uni(unsigned char *pass, int passlen, unsigned char *salt, + int saltlen, int id, int iter, int n, + unsigned char *out, const EVP_MD *md_type); +int PKCS12_key_gen_uni_ex(unsigned char *pass, int passlen, unsigned char *salt, + int saltlen, int id, int iter, int n, + unsigned char *out, const EVP_MD *md_type, + OSSL_LIB_CTX *ctx, const char *propq); +int PKCS12_key_gen_utf8(const char *pass, int passlen, unsigned char *salt, + int saltlen, int id, int iter, int n, + unsigned char *out, const EVP_MD *md_type); +int PKCS12_key_gen_utf8_ex(const char *pass, int passlen, unsigned char *salt, + int saltlen, int id, int iter, int n, + unsigned char *out, const EVP_MD *md_type, + OSSL_LIB_CTX *ctx, const char *propq); + +int PKCS12_PBE_keyivgen(EVP_CIPHER_CTX *ctx, const char *pass, int passlen, + ASN1_TYPE *param, const EVP_CIPHER *cipher, + const EVP_MD *md_type, int en_de); +int PKCS12_PBE_keyivgen_ex(EVP_CIPHER_CTX *ctx, const char *pass, int passlen, + ASN1_TYPE *param, const EVP_CIPHER *cipher, + const EVP_MD *md_type, int en_de, + OSSL_LIB_CTX *libctx, const char *propq); +int PKCS12_gen_mac(PKCS12 *p12, const char *pass, int passlen, + unsigned char *mac, unsigned int *maclen); +int PKCS12_verify_mac(PKCS12 *p12, const char *pass, int passlen); +int PKCS12_set_mac(PKCS12 *p12, const char *pass, int passlen, + unsigned char *salt, int saltlen, int iter, + const EVP_MD *md_type); +int PKCS12_set_pbmac1_pbkdf2(PKCS12 *p12, const char *pass, int passlen, + unsigned char *salt, int saltlen, int iter, + const EVP_MD *md_type, const char *prf_md_name); +int PKCS12_setup_mac(PKCS12 *p12, int iter, unsigned char *salt, + int saltlen, const EVP_MD *md_type); +unsigned char *OPENSSL_asc2uni(const char *asc, int asclen, + unsigned char **uni, int *unilen); +char *OPENSSL_uni2asc(const unsigned char *uni, int unilen); +unsigned char *OPENSSL_utf82uni(const char *asc, int asclen, + unsigned char **uni, int *unilen); +char *OPENSSL_uni2utf8(const unsigned char *uni, int unilen); + +DECLARE_ASN1_FUNCTIONS(PKCS12) +DECLARE_ASN1_FUNCTIONS(PKCS12_MAC_DATA) +DECLARE_ASN1_FUNCTIONS(PKCS12_SAFEBAG) +DECLARE_ASN1_FUNCTIONS(PKCS12_BAGS) + +DECLARE_ASN1_ITEM(PKCS12_SAFEBAGS) +DECLARE_ASN1_ITEM(PKCS12_AUTHSAFES) + +void PKCS12_PBE_add(void); +int PKCS12_parse(PKCS12 *p12, const char *pass, EVP_PKEY **pkey, X509 **cert, + STACK_OF(X509) **ca); +typedef int PKCS12_create_cb(PKCS12_SAFEBAG *bag, void *cbarg); +PKCS12 *PKCS12_create(const char *pass, const char *name, EVP_PKEY *pkey, + X509 *cert, STACK_OF(X509) *ca, int nid_key, int nid_cert, + int iter, int mac_iter, int keytype); +PKCS12 *PKCS12_create_ex(const char *pass, const char *name, EVP_PKEY *pkey, + X509 *cert, STACK_OF(X509) *ca, int nid_key, int nid_cert, + int iter, int mac_iter, int keytype, + OSSL_LIB_CTX *ctx, const char *propq); +PKCS12 *PKCS12_create_ex2(const char *pass, const char *name, EVP_PKEY *pkey, + X509 *cert, STACK_OF(X509) *ca, int nid_key, int nid_cert, + int iter, int mac_iter, int keytype, + OSSL_LIB_CTX *ctx, const char *propq, + PKCS12_create_cb *cb, void *cbarg); + +PKCS12_SAFEBAG *PKCS12_add_cert(STACK_OF(PKCS12_SAFEBAG) **pbags, X509 *cert); +PKCS12_SAFEBAG *PKCS12_add_key(STACK_OF(PKCS12_SAFEBAG) **pbags, + EVP_PKEY *key, int key_usage, int iter, + int key_nid, const char *pass); +PKCS12_SAFEBAG *PKCS12_add_key_ex(STACK_OF(PKCS12_SAFEBAG) **pbags, + EVP_PKEY *key, int key_usage, int iter, + int key_nid, const char *pass, + OSSL_LIB_CTX *ctx, const char *propq); + +PKCS12_SAFEBAG *PKCS12_add_secret(STACK_OF(PKCS12_SAFEBAG) **pbags, + int nid_type, const unsigned char *value, int len); +int PKCS12_add_safe(STACK_OF(PKCS7) **psafes, STACK_OF(PKCS12_SAFEBAG) *bags, + int safe_nid, int iter, const char *pass); +int PKCS12_add_safe_ex(STACK_OF(PKCS7) **psafes, STACK_OF(PKCS12_SAFEBAG) *bags, + int safe_nid, int iter, const char *pass, + OSSL_LIB_CTX *ctx, const char *propq); + +PKCS12 *PKCS12_add_safes(STACK_OF(PKCS7) *safes, int p7_nid); +PKCS12 *PKCS12_add_safes_ex(STACK_OF(PKCS7) *safes, int p7_nid, + OSSL_LIB_CTX *ctx, const char *propq); + +int i2d_PKCS12_bio(BIO *bp, const PKCS12 *p12); +# ifndef OPENSSL_NO_STDIO +int i2d_PKCS12_fp(FILE *fp, const PKCS12 *p12); +# endif +PKCS12 *d2i_PKCS12_bio(BIO *bp, PKCS12 **p12); +# ifndef OPENSSL_NO_STDIO +PKCS12 *d2i_PKCS12_fp(FILE *fp, PKCS12 **p12); +# endif +int PKCS12_newpass(PKCS12 *p12, const char *oldpass, const char *newpass); + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/common/include/openssl/pkcs7.h b/contrib/openssl-cmake/common/include/openssl/pkcs7.h new file mode 100644 index 000000000000..fa68462aff97 --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/pkcs7.h @@ -0,0 +1,430 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/pkcs7.h.in + * + * Copyright 1995-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_PKCS7_H +# define OPENSSL_PKCS7_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_PKCS7_H +# endif + +# include +# include +# include + +# include +# include +# include +# ifndef OPENSSL_NO_STDIO +# include +# endif + +#ifdef __cplusplus +extern "C" { +#endif + + +/*- +Encryption_ID DES-CBC +Digest_ID MD5 +Digest_Encryption_ID rsaEncryption +Key_Encryption_ID rsaEncryption +*/ + +typedef struct PKCS7_CTX_st { + OSSL_LIB_CTX *libctx; + char *propq; +} PKCS7_CTX; + +typedef struct pkcs7_issuer_and_serial_st { + X509_NAME *issuer; + ASN1_INTEGER *serial; +} PKCS7_ISSUER_AND_SERIAL; + +typedef struct pkcs7_signer_info_st { + ASN1_INTEGER *version; /* version 1 */ + PKCS7_ISSUER_AND_SERIAL *issuer_and_serial; + X509_ALGOR *digest_alg; + STACK_OF(X509_ATTRIBUTE) *auth_attr; /* [ 0 ] */ + X509_ALGOR *digest_enc_alg; /* confusing name, actually used for signing */ + ASN1_OCTET_STRING *enc_digest; /* confusing name, actually signature */ + STACK_OF(X509_ATTRIBUTE) *unauth_attr; /* [ 1 ] */ + /* The private key to sign with */ + EVP_PKEY *pkey; + const PKCS7_CTX *ctx; +} PKCS7_SIGNER_INFO; +SKM_DEFINE_STACK_OF_INTERNAL(PKCS7_SIGNER_INFO, PKCS7_SIGNER_INFO, PKCS7_SIGNER_INFO) +#define sk_PKCS7_SIGNER_INFO_num(sk) OPENSSL_sk_num(ossl_check_const_PKCS7_SIGNER_INFO_sk_type(sk)) +#define sk_PKCS7_SIGNER_INFO_value(sk, idx) ((PKCS7_SIGNER_INFO *)OPENSSL_sk_value(ossl_check_const_PKCS7_SIGNER_INFO_sk_type(sk), (idx))) +#define sk_PKCS7_SIGNER_INFO_new(cmp) ((STACK_OF(PKCS7_SIGNER_INFO) *)OPENSSL_sk_new(ossl_check_PKCS7_SIGNER_INFO_compfunc_type(cmp))) +#define sk_PKCS7_SIGNER_INFO_new_null() ((STACK_OF(PKCS7_SIGNER_INFO) *)OPENSSL_sk_new_null()) +#define sk_PKCS7_SIGNER_INFO_new_reserve(cmp, n) ((STACK_OF(PKCS7_SIGNER_INFO) *)OPENSSL_sk_new_reserve(ossl_check_PKCS7_SIGNER_INFO_compfunc_type(cmp), (n))) +#define sk_PKCS7_SIGNER_INFO_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), (n)) +#define sk_PKCS7_SIGNER_INFO_free(sk) OPENSSL_sk_free(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk)) +#define sk_PKCS7_SIGNER_INFO_zero(sk) OPENSSL_sk_zero(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk)) +#define sk_PKCS7_SIGNER_INFO_delete(sk, i) ((PKCS7_SIGNER_INFO *)OPENSSL_sk_delete(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), (i))) +#define sk_PKCS7_SIGNER_INFO_delete_ptr(sk, ptr) ((PKCS7_SIGNER_INFO *)OPENSSL_sk_delete_ptr(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_type(ptr))) +#define sk_PKCS7_SIGNER_INFO_push(sk, ptr) OPENSSL_sk_push(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_type(ptr)) +#define sk_PKCS7_SIGNER_INFO_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_type(ptr)) +#define sk_PKCS7_SIGNER_INFO_pop(sk) ((PKCS7_SIGNER_INFO *)OPENSSL_sk_pop(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk))) +#define sk_PKCS7_SIGNER_INFO_shift(sk) ((PKCS7_SIGNER_INFO *)OPENSSL_sk_shift(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk))) +#define sk_PKCS7_SIGNER_INFO_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk),ossl_check_PKCS7_SIGNER_INFO_freefunc_type(freefunc)) +#define sk_PKCS7_SIGNER_INFO_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_type(ptr), (idx)) +#define sk_PKCS7_SIGNER_INFO_set(sk, idx, ptr) ((PKCS7_SIGNER_INFO *)OPENSSL_sk_set(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), (idx), ossl_check_PKCS7_SIGNER_INFO_type(ptr))) +#define sk_PKCS7_SIGNER_INFO_find(sk, ptr) OPENSSL_sk_find(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_type(ptr)) +#define sk_PKCS7_SIGNER_INFO_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_type(ptr)) +#define sk_PKCS7_SIGNER_INFO_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_type(ptr), pnum) +#define sk_PKCS7_SIGNER_INFO_sort(sk) OPENSSL_sk_sort(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk)) +#define sk_PKCS7_SIGNER_INFO_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_PKCS7_SIGNER_INFO_sk_type(sk)) +#define sk_PKCS7_SIGNER_INFO_dup(sk) ((STACK_OF(PKCS7_SIGNER_INFO) *)OPENSSL_sk_dup(ossl_check_const_PKCS7_SIGNER_INFO_sk_type(sk))) +#define sk_PKCS7_SIGNER_INFO_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(PKCS7_SIGNER_INFO) *)OPENSSL_sk_deep_copy(ossl_check_const_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_copyfunc_type(copyfunc), ossl_check_PKCS7_SIGNER_INFO_freefunc_type(freefunc))) +#define sk_PKCS7_SIGNER_INFO_set_cmp_func(sk, cmp) ((sk_PKCS7_SIGNER_INFO_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_PKCS7_SIGNER_INFO_sk_type(sk), ossl_check_PKCS7_SIGNER_INFO_compfunc_type(cmp))) + + +typedef struct pkcs7_recip_info_st { + ASN1_INTEGER *version; /* version 0 */ + PKCS7_ISSUER_AND_SERIAL *issuer_and_serial; + X509_ALGOR *key_enc_algor; + ASN1_OCTET_STRING *enc_key; + X509 *cert; /* get the pub-key from this */ + const PKCS7_CTX *ctx; +} PKCS7_RECIP_INFO; +SKM_DEFINE_STACK_OF_INTERNAL(PKCS7_RECIP_INFO, PKCS7_RECIP_INFO, PKCS7_RECIP_INFO) +#define sk_PKCS7_RECIP_INFO_num(sk) OPENSSL_sk_num(ossl_check_const_PKCS7_RECIP_INFO_sk_type(sk)) +#define sk_PKCS7_RECIP_INFO_value(sk, idx) ((PKCS7_RECIP_INFO *)OPENSSL_sk_value(ossl_check_const_PKCS7_RECIP_INFO_sk_type(sk), (idx))) +#define sk_PKCS7_RECIP_INFO_new(cmp) ((STACK_OF(PKCS7_RECIP_INFO) *)OPENSSL_sk_new(ossl_check_PKCS7_RECIP_INFO_compfunc_type(cmp))) +#define sk_PKCS7_RECIP_INFO_new_null() ((STACK_OF(PKCS7_RECIP_INFO) *)OPENSSL_sk_new_null()) +#define sk_PKCS7_RECIP_INFO_new_reserve(cmp, n) ((STACK_OF(PKCS7_RECIP_INFO) *)OPENSSL_sk_new_reserve(ossl_check_PKCS7_RECIP_INFO_compfunc_type(cmp), (n))) +#define sk_PKCS7_RECIP_INFO_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), (n)) +#define sk_PKCS7_RECIP_INFO_free(sk) OPENSSL_sk_free(ossl_check_PKCS7_RECIP_INFO_sk_type(sk)) +#define sk_PKCS7_RECIP_INFO_zero(sk) OPENSSL_sk_zero(ossl_check_PKCS7_RECIP_INFO_sk_type(sk)) +#define sk_PKCS7_RECIP_INFO_delete(sk, i) ((PKCS7_RECIP_INFO *)OPENSSL_sk_delete(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), (i))) +#define sk_PKCS7_RECIP_INFO_delete_ptr(sk, ptr) ((PKCS7_RECIP_INFO *)OPENSSL_sk_delete_ptr(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_type(ptr))) +#define sk_PKCS7_RECIP_INFO_push(sk, ptr) OPENSSL_sk_push(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_type(ptr)) +#define sk_PKCS7_RECIP_INFO_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_type(ptr)) +#define sk_PKCS7_RECIP_INFO_pop(sk) ((PKCS7_RECIP_INFO *)OPENSSL_sk_pop(ossl_check_PKCS7_RECIP_INFO_sk_type(sk))) +#define sk_PKCS7_RECIP_INFO_shift(sk) ((PKCS7_RECIP_INFO *)OPENSSL_sk_shift(ossl_check_PKCS7_RECIP_INFO_sk_type(sk))) +#define sk_PKCS7_RECIP_INFO_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_PKCS7_RECIP_INFO_sk_type(sk),ossl_check_PKCS7_RECIP_INFO_freefunc_type(freefunc)) +#define sk_PKCS7_RECIP_INFO_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_type(ptr), (idx)) +#define sk_PKCS7_RECIP_INFO_set(sk, idx, ptr) ((PKCS7_RECIP_INFO *)OPENSSL_sk_set(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), (idx), ossl_check_PKCS7_RECIP_INFO_type(ptr))) +#define sk_PKCS7_RECIP_INFO_find(sk, ptr) OPENSSL_sk_find(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_type(ptr)) +#define sk_PKCS7_RECIP_INFO_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_type(ptr)) +#define sk_PKCS7_RECIP_INFO_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_type(ptr), pnum) +#define sk_PKCS7_RECIP_INFO_sort(sk) OPENSSL_sk_sort(ossl_check_PKCS7_RECIP_INFO_sk_type(sk)) +#define sk_PKCS7_RECIP_INFO_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_PKCS7_RECIP_INFO_sk_type(sk)) +#define sk_PKCS7_RECIP_INFO_dup(sk) ((STACK_OF(PKCS7_RECIP_INFO) *)OPENSSL_sk_dup(ossl_check_const_PKCS7_RECIP_INFO_sk_type(sk))) +#define sk_PKCS7_RECIP_INFO_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(PKCS7_RECIP_INFO) *)OPENSSL_sk_deep_copy(ossl_check_const_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_copyfunc_type(copyfunc), ossl_check_PKCS7_RECIP_INFO_freefunc_type(freefunc))) +#define sk_PKCS7_RECIP_INFO_set_cmp_func(sk, cmp) ((sk_PKCS7_RECIP_INFO_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_PKCS7_RECIP_INFO_sk_type(sk), ossl_check_PKCS7_RECIP_INFO_compfunc_type(cmp))) + + + +typedef struct pkcs7_signed_st { + ASN1_INTEGER *version; /* version 1 */ + STACK_OF(X509_ALGOR) *md_algs; /* md used */ + STACK_OF(X509) *cert; /* [ 0 ] */ /* name should be 'certificates' */ + STACK_OF(X509_CRL) *crl; /* [ 1 ] */ /* name should be 'crls' */ + STACK_OF(PKCS7_SIGNER_INFO) *signer_info; + struct pkcs7_st *contents; +} PKCS7_SIGNED; +/* + * The above structure is very very similar to PKCS7_SIGN_ENVELOPE. How about + * merging the two + */ + +typedef struct pkcs7_enc_content_st { + ASN1_OBJECT *content_type; + X509_ALGOR *algorithm; + ASN1_OCTET_STRING *enc_data; /* [ 0 ] */ + const EVP_CIPHER *cipher; + const PKCS7_CTX *ctx; +} PKCS7_ENC_CONTENT; + +typedef struct pkcs7_enveloped_st { + ASN1_INTEGER *version; /* version 0 */ + STACK_OF(PKCS7_RECIP_INFO) *recipientinfo; + PKCS7_ENC_CONTENT *enc_data; +} PKCS7_ENVELOPE; + +typedef struct pkcs7_signedandenveloped_st { + ASN1_INTEGER *version; /* version 1 */ + STACK_OF(X509_ALGOR) *md_algs; /* md used */ + STACK_OF(X509) *cert; /* [ 0 ] */ /* name should be 'certificates' */ + STACK_OF(X509_CRL) *crl; /* [ 1 ] */ /* name should be 'crls' */ + STACK_OF(PKCS7_SIGNER_INFO) *signer_info; + PKCS7_ENC_CONTENT *enc_data; + STACK_OF(PKCS7_RECIP_INFO) *recipientinfo; +} PKCS7_SIGN_ENVELOPE; + +typedef struct pkcs7_digest_st { + ASN1_INTEGER *version; /* version 0 */ + X509_ALGOR *md; /* md used */ + struct pkcs7_st *contents; + ASN1_OCTET_STRING *digest; +} PKCS7_DIGEST; + +typedef struct pkcs7_encrypted_st { + ASN1_INTEGER *version; /* version 0 */ + PKCS7_ENC_CONTENT *enc_data; +} PKCS7_ENCRYPT; + +typedef struct pkcs7_st { + /* + * The following is non NULL if it contains ASN1 encoding of this + * structure + */ + unsigned char *asn1; + long length; +# define PKCS7_S_HEADER 0 +# define PKCS7_S_BODY 1 +# define PKCS7_S_TAIL 2 + int state; /* used during processing */ + int detached; + ASN1_OBJECT *type; + /* content as defined by the type */ + /* + * all encryption/message digests are applied to the 'contents', leaving + * out the 'type' field. + */ + union { + char *ptr; + /* NID_pkcs7_data */ + ASN1_OCTET_STRING *data; + /* NID_pkcs7_signed */ + PKCS7_SIGNED *sign; /* field name 'signed' would clash with C keyword */ + /* NID_pkcs7_enveloped */ + PKCS7_ENVELOPE *enveloped; + /* NID_pkcs7_signedAndEnveloped */ + PKCS7_SIGN_ENVELOPE *signed_and_enveloped; + /* NID_pkcs7_digest */ + PKCS7_DIGEST *digest; + /* NID_pkcs7_encrypted */ + PKCS7_ENCRYPT *encrypted; + /* Anything else */ + ASN1_TYPE *other; + } d; + PKCS7_CTX ctx; +} PKCS7; +SKM_DEFINE_STACK_OF_INTERNAL(PKCS7, PKCS7, PKCS7) +#define sk_PKCS7_num(sk) OPENSSL_sk_num(ossl_check_const_PKCS7_sk_type(sk)) +#define sk_PKCS7_value(sk, idx) ((PKCS7 *)OPENSSL_sk_value(ossl_check_const_PKCS7_sk_type(sk), (idx))) +#define sk_PKCS7_new(cmp) ((STACK_OF(PKCS7) *)OPENSSL_sk_new(ossl_check_PKCS7_compfunc_type(cmp))) +#define sk_PKCS7_new_null() ((STACK_OF(PKCS7) *)OPENSSL_sk_new_null()) +#define sk_PKCS7_new_reserve(cmp, n) ((STACK_OF(PKCS7) *)OPENSSL_sk_new_reserve(ossl_check_PKCS7_compfunc_type(cmp), (n))) +#define sk_PKCS7_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_PKCS7_sk_type(sk), (n)) +#define sk_PKCS7_free(sk) OPENSSL_sk_free(ossl_check_PKCS7_sk_type(sk)) +#define sk_PKCS7_zero(sk) OPENSSL_sk_zero(ossl_check_PKCS7_sk_type(sk)) +#define sk_PKCS7_delete(sk, i) ((PKCS7 *)OPENSSL_sk_delete(ossl_check_PKCS7_sk_type(sk), (i))) +#define sk_PKCS7_delete_ptr(sk, ptr) ((PKCS7 *)OPENSSL_sk_delete_ptr(ossl_check_PKCS7_sk_type(sk), ossl_check_PKCS7_type(ptr))) +#define sk_PKCS7_push(sk, ptr) OPENSSL_sk_push(ossl_check_PKCS7_sk_type(sk), ossl_check_PKCS7_type(ptr)) +#define sk_PKCS7_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_PKCS7_sk_type(sk), ossl_check_PKCS7_type(ptr)) +#define sk_PKCS7_pop(sk) ((PKCS7 *)OPENSSL_sk_pop(ossl_check_PKCS7_sk_type(sk))) +#define sk_PKCS7_shift(sk) ((PKCS7 *)OPENSSL_sk_shift(ossl_check_PKCS7_sk_type(sk))) +#define sk_PKCS7_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_PKCS7_sk_type(sk),ossl_check_PKCS7_freefunc_type(freefunc)) +#define sk_PKCS7_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_PKCS7_sk_type(sk), ossl_check_PKCS7_type(ptr), (idx)) +#define sk_PKCS7_set(sk, idx, ptr) ((PKCS7 *)OPENSSL_sk_set(ossl_check_PKCS7_sk_type(sk), (idx), ossl_check_PKCS7_type(ptr))) +#define sk_PKCS7_find(sk, ptr) OPENSSL_sk_find(ossl_check_PKCS7_sk_type(sk), ossl_check_PKCS7_type(ptr)) +#define sk_PKCS7_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_PKCS7_sk_type(sk), ossl_check_PKCS7_type(ptr)) +#define sk_PKCS7_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_PKCS7_sk_type(sk), ossl_check_PKCS7_type(ptr), pnum) +#define sk_PKCS7_sort(sk) OPENSSL_sk_sort(ossl_check_PKCS7_sk_type(sk)) +#define sk_PKCS7_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_PKCS7_sk_type(sk)) +#define sk_PKCS7_dup(sk) ((STACK_OF(PKCS7) *)OPENSSL_sk_dup(ossl_check_const_PKCS7_sk_type(sk))) +#define sk_PKCS7_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(PKCS7) *)OPENSSL_sk_deep_copy(ossl_check_const_PKCS7_sk_type(sk), ossl_check_PKCS7_copyfunc_type(copyfunc), ossl_check_PKCS7_freefunc_type(freefunc))) +#define sk_PKCS7_set_cmp_func(sk, cmp) ((sk_PKCS7_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_PKCS7_sk_type(sk), ossl_check_PKCS7_compfunc_type(cmp))) + + + +# define PKCS7_OP_SET_DETACHED_SIGNATURE 1 +# define PKCS7_OP_GET_DETACHED_SIGNATURE 2 + +# define PKCS7_get_signed_attributes(si) ((si)->auth_attr) +# define PKCS7_get_attributes(si) ((si)->unauth_attr) + +# define PKCS7_type_is_signed(a) (OBJ_obj2nid((a)->type) == NID_pkcs7_signed) +# define PKCS7_type_is_encrypted(a) (OBJ_obj2nid((a)->type) == NID_pkcs7_encrypted) +# define PKCS7_type_is_enveloped(a) (OBJ_obj2nid((a)->type) == NID_pkcs7_enveloped) +# define PKCS7_type_is_signedAndEnveloped(a) \ + (OBJ_obj2nid((a)->type) == NID_pkcs7_signedAndEnveloped) +# define PKCS7_type_is_data(a) (OBJ_obj2nid((a)->type) == NID_pkcs7_data) +# define PKCS7_type_is_digest(a) (OBJ_obj2nid((a)->type) == NID_pkcs7_digest) + +# define PKCS7_set_detached(p,v) \ + PKCS7_ctrl(p,PKCS7_OP_SET_DETACHED_SIGNATURE,v,NULL) +# define PKCS7_get_detached(p) \ + PKCS7_ctrl(p,PKCS7_OP_GET_DETACHED_SIGNATURE,0,NULL) + +# define PKCS7_is_detached(p7) (PKCS7_type_is_signed(p7) && PKCS7_get_detached(p7)) + +/* S/MIME related flags */ + +# define PKCS7_TEXT 0x1 +# define PKCS7_NOCERTS 0x2 +# define PKCS7_NOSIGS 0x4 +# define PKCS7_NOCHAIN 0x8 +# define PKCS7_NOINTERN 0x10 +# define PKCS7_NOVERIFY 0x20 +# define PKCS7_DETACHED 0x40 +# define PKCS7_BINARY 0x80 +# define PKCS7_NOATTR 0x100 +# define PKCS7_NOSMIMECAP 0x200 +# define PKCS7_NOOLDMIMETYPE 0x400 +# define PKCS7_CRLFEOL 0x800 +# define PKCS7_STREAM 0x1000 +# define PKCS7_NOCRL 0x2000 +# define PKCS7_PARTIAL 0x4000 +# define PKCS7_REUSE_DIGEST 0x8000 +# define PKCS7_NO_DUAL_CONTENT 0x10000 + +/* Flags: for compatibility with older code */ + +# define SMIME_TEXT PKCS7_TEXT +# define SMIME_NOCERTS PKCS7_NOCERTS +# define SMIME_NOSIGS PKCS7_NOSIGS +# define SMIME_NOCHAIN PKCS7_NOCHAIN +# define SMIME_NOINTERN PKCS7_NOINTERN +# define SMIME_NOVERIFY PKCS7_NOVERIFY +# define SMIME_DETACHED PKCS7_DETACHED +# define SMIME_BINARY PKCS7_BINARY +# define SMIME_NOATTR PKCS7_NOATTR + +/* CRLF ASCII canonicalisation */ +# define SMIME_ASCIICRLF 0x80000 + +DECLARE_ASN1_FUNCTIONS(PKCS7_ISSUER_AND_SERIAL) + +int PKCS7_ISSUER_AND_SERIAL_digest(PKCS7_ISSUER_AND_SERIAL *data, + const EVP_MD *type, unsigned char *md, + unsigned int *len); +# ifndef OPENSSL_NO_STDIO +PKCS7 *d2i_PKCS7_fp(FILE *fp, PKCS7 **p7); +int i2d_PKCS7_fp(FILE *fp, const PKCS7 *p7); +# endif +DECLARE_ASN1_DUP_FUNCTION(PKCS7) +PKCS7 *d2i_PKCS7_bio(BIO *bp, PKCS7 **p7); +int i2d_PKCS7_bio(BIO *bp, const PKCS7 *p7); +int i2d_PKCS7_bio_stream(BIO *out, PKCS7 *p7, BIO *in, int flags); +int PEM_write_bio_PKCS7_stream(BIO *out, PKCS7 *p7, BIO *in, int flags); + +DECLARE_ASN1_FUNCTIONS(PKCS7_SIGNER_INFO) +DECLARE_ASN1_FUNCTIONS(PKCS7_RECIP_INFO) +DECLARE_ASN1_FUNCTIONS(PKCS7_SIGNED) +DECLARE_ASN1_FUNCTIONS(PKCS7_ENC_CONTENT) +DECLARE_ASN1_FUNCTIONS(PKCS7_ENVELOPE) +DECLARE_ASN1_FUNCTIONS(PKCS7_SIGN_ENVELOPE) +DECLARE_ASN1_FUNCTIONS(PKCS7_DIGEST) +DECLARE_ASN1_FUNCTIONS(PKCS7_ENCRYPT) +DECLARE_ASN1_FUNCTIONS(PKCS7) +PKCS7 *PKCS7_new_ex(OSSL_LIB_CTX *libctx, const char *propq); + +DECLARE_ASN1_ITEM(PKCS7_ATTR_SIGN) +DECLARE_ASN1_ITEM(PKCS7_ATTR_VERIFY) + +DECLARE_ASN1_NDEF_FUNCTION(PKCS7) +DECLARE_ASN1_PRINT_FUNCTION(PKCS7) + +long PKCS7_ctrl(PKCS7 *p7, int cmd, long larg, char *parg); + +int PKCS7_type_is_other(PKCS7 *p7); +int PKCS7_set_type(PKCS7 *p7, int type); +int PKCS7_set0_type_other(PKCS7 *p7, int type, ASN1_TYPE *other); +int PKCS7_set_content(PKCS7 *p7, PKCS7 *p7_data); +int PKCS7_SIGNER_INFO_set(PKCS7_SIGNER_INFO *p7i, X509 *x509, EVP_PKEY *pkey, + const EVP_MD *dgst); +int PKCS7_SIGNER_INFO_sign(PKCS7_SIGNER_INFO *si); +int PKCS7_add_signer(PKCS7 *p7, PKCS7_SIGNER_INFO *p7i); +int PKCS7_add_certificate(PKCS7 *p7, X509 *cert); +int PKCS7_add_crl(PKCS7 *p7, X509_CRL *crl); +int PKCS7_content_new(PKCS7 *p7, int nid); +int PKCS7_dataVerify(X509_STORE *cert_store, X509_STORE_CTX *ctx, + BIO *bio, PKCS7 *p7, PKCS7_SIGNER_INFO *si); +int PKCS7_signatureVerify(BIO *bio, PKCS7 *p7, PKCS7_SIGNER_INFO *si, + X509 *signer); + +BIO *PKCS7_dataInit(PKCS7 *p7, BIO *bio); +int PKCS7_dataFinal(PKCS7 *p7, BIO *bio); +BIO *PKCS7_dataDecode(PKCS7 *p7, EVP_PKEY *pkey, BIO *in_bio, X509 *pcert); + +PKCS7_SIGNER_INFO *PKCS7_add_signature(PKCS7 *p7, X509 *x509, + EVP_PKEY *pkey, const EVP_MD *dgst); +X509 *PKCS7_cert_from_signer_info(PKCS7 *p7, PKCS7_SIGNER_INFO *si); +int PKCS7_set_digest(PKCS7 *p7, const EVP_MD *md); +STACK_OF(PKCS7_SIGNER_INFO) *PKCS7_get_signer_info(PKCS7 *p7); + +PKCS7_RECIP_INFO *PKCS7_add_recipient(PKCS7 *p7, X509 *x509); +void PKCS7_SIGNER_INFO_get0_algs(PKCS7_SIGNER_INFO *si, EVP_PKEY **pk, + X509_ALGOR **pdig, X509_ALGOR **psig); +void PKCS7_RECIP_INFO_get0_alg(PKCS7_RECIP_INFO *ri, X509_ALGOR **penc); +int PKCS7_add_recipient_info(PKCS7 *p7, PKCS7_RECIP_INFO *ri); +int PKCS7_RECIP_INFO_set(PKCS7_RECIP_INFO *p7i, X509 *x509); +int PKCS7_set_cipher(PKCS7 *p7, const EVP_CIPHER *cipher); +int PKCS7_stream(unsigned char ***boundary, PKCS7 *p7); + +PKCS7_ISSUER_AND_SERIAL *PKCS7_get_issuer_and_serial(PKCS7 *p7, int idx); +ASN1_OCTET_STRING *PKCS7_get_octet_string(PKCS7 *p7); +ASN1_OCTET_STRING *PKCS7_digest_from_attributes(STACK_OF(X509_ATTRIBUTE) *sk); +int PKCS7_add_signed_attribute(PKCS7_SIGNER_INFO *p7si, int nid, int type, + void *data); +int PKCS7_add_attribute(PKCS7_SIGNER_INFO *p7si, int nid, int atrtype, + void *value); +ASN1_TYPE *PKCS7_get_attribute(const PKCS7_SIGNER_INFO *si, int nid); +ASN1_TYPE *PKCS7_get_signed_attribute(const PKCS7_SIGNER_INFO *si, int nid); +int PKCS7_set_signed_attributes(PKCS7_SIGNER_INFO *p7si, + STACK_OF(X509_ATTRIBUTE) *sk); +int PKCS7_set_attributes(PKCS7_SIGNER_INFO *p7si, + STACK_OF(X509_ATTRIBUTE) *sk); + +PKCS7 *PKCS7_sign(X509 *signcert, EVP_PKEY *pkey, STACK_OF(X509) *certs, + BIO *data, int flags); +PKCS7 *PKCS7_sign_ex(X509 *signcert, EVP_PKEY *pkey, STACK_OF(X509) *certs, + BIO *data, int flags, OSSL_LIB_CTX *libctx, + const char *propq); + +PKCS7_SIGNER_INFO *PKCS7_sign_add_signer(PKCS7 *p7, + X509 *signcert, EVP_PKEY *pkey, + const EVP_MD *md, int flags); + +int PKCS7_final(PKCS7 *p7, BIO *data, int flags); +int PKCS7_verify(PKCS7 *p7, STACK_OF(X509) *certs, X509_STORE *store, + BIO *indata, BIO *out, int flags); +STACK_OF(X509) *PKCS7_get0_signers(PKCS7 *p7, STACK_OF(X509) *certs, + int flags); +PKCS7 *PKCS7_encrypt(STACK_OF(X509) *certs, BIO *in, const EVP_CIPHER *cipher, + int flags); +PKCS7 *PKCS7_encrypt_ex(STACK_OF(X509) *certs, BIO *in, + const EVP_CIPHER *cipher, int flags, + OSSL_LIB_CTX *libctx, const char *propq); +int PKCS7_decrypt(PKCS7 *p7, EVP_PKEY *pkey, X509 *cert, BIO *data, + int flags); + +int PKCS7_add_attrib_smimecap(PKCS7_SIGNER_INFO *si, + STACK_OF(X509_ALGOR) *cap); +STACK_OF(X509_ALGOR) *PKCS7_get_smimecap(PKCS7_SIGNER_INFO *si); +int PKCS7_simple_smimecap(STACK_OF(X509_ALGOR) *sk, int nid, int arg); + +int PKCS7_add_attrib_content_type(PKCS7_SIGNER_INFO *si, ASN1_OBJECT *coid); +int PKCS7_add0_attrib_signing_time(PKCS7_SIGNER_INFO *si, ASN1_TIME *t); +int PKCS7_add1_attrib_digest(PKCS7_SIGNER_INFO *si, + const unsigned char *md, int mdlen); + +int SMIME_write_PKCS7(BIO *bio, PKCS7 *p7, BIO *data, int flags); +PKCS7 *SMIME_read_PKCS7_ex(BIO *bio, BIO **bcont, PKCS7 **p7); +PKCS7 *SMIME_read_PKCS7(BIO *bio, BIO **bcont); + +BIO *BIO_new_PKCS7(BIO *out, PKCS7 *p7); + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/common/include/openssl/safestack.h b/contrib/openssl-cmake/common/include/openssl/safestack.h new file mode 100644 index 000000000000..0499700b5625 --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/safestack.h @@ -0,0 +1,297 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/safestack.h.in + * + * Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_SAFESTACK_H +# define OPENSSL_SAFESTACK_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_SAFESTACK_H +# endif + +# include +# include + +#ifdef __cplusplus +extern "C" { +#endif + +# define STACK_OF(type) struct stack_st_##type + +/* Helper macro for internal use */ +# define SKM_DEFINE_STACK_OF_INTERNAL(t1, t2, t3) \ + STACK_OF(t1); \ + typedef int (*sk_##t1##_compfunc)(const t3 * const *a, const t3 *const *b); \ + typedef void (*sk_##t1##_freefunc)(t3 *a); \ + typedef t3 * (*sk_##t1##_copyfunc)(const t3 *a); \ + static ossl_unused ossl_inline t2 *ossl_check_##t1##_type(t2 *ptr) \ + { \ + return ptr; \ + } \ + static ossl_unused ossl_inline const OPENSSL_STACK *ossl_check_const_##t1##_sk_type(const STACK_OF(t1) *sk) \ + { \ + return (const OPENSSL_STACK *)sk; \ + } \ + static ossl_unused ossl_inline OPENSSL_STACK *ossl_check_##t1##_sk_type(STACK_OF(t1) *sk) \ + { \ + return (OPENSSL_STACK *)sk; \ + } \ + static ossl_unused ossl_inline OPENSSL_sk_compfunc ossl_check_##t1##_compfunc_type(sk_##t1##_compfunc cmp) \ + { \ + return (OPENSSL_sk_compfunc)cmp; \ + } \ + static ossl_unused ossl_inline OPENSSL_sk_copyfunc ossl_check_##t1##_copyfunc_type(sk_##t1##_copyfunc cpy) \ + { \ + return (OPENSSL_sk_copyfunc)cpy; \ + } \ + static ossl_unused ossl_inline OPENSSL_sk_freefunc ossl_check_##t1##_freefunc_type(sk_##t1##_freefunc fr) \ + { \ + return (OPENSSL_sk_freefunc)fr; \ + } + +# define SKM_DEFINE_STACK_OF(t1, t2, t3) \ + STACK_OF(t1); \ + typedef int (*sk_##t1##_compfunc)(const t3 * const *a, const t3 *const *b); \ + typedef void (*sk_##t1##_freefunc)(t3 *a); \ + typedef t3 * (*sk_##t1##_copyfunc)(const t3 *a); \ + static ossl_unused ossl_inline int sk_##t1##_num(const STACK_OF(t1) *sk) \ + { \ + return OPENSSL_sk_num((const OPENSSL_STACK *)sk); \ + } \ + static ossl_unused ossl_inline t2 *sk_##t1##_value(const STACK_OF(t1) *sk, int idx) \ + { \ + return (t2 *)OPENSSL_sk_value((const OPENSSL_STACK *)sk, idx); \ + } \ + static ossl_unused ossl_inline STACK_OF(t1) *sk_##t1##_new(sk_##t1##_compfunc compare) \ + { \ + return (STACK_OF(t1) *)OPENSSL_sk_new((OPENSSL_sk_compfunc)compare); \ + } \ + static ossl_unused ossl_inline STACK_OF(t1) *sk_##t1##_new_null(void) \ + { \ + return (STACK_OF(t1) *)OPENSSL_sk_new_null(); \ + } \ + static ossl_unused ossl_inline STACK_OF(t1) *sk_##t1##_new_reserve(sk_##t1##_compfunc compare, int n) \ + { \ + return (STACK_OF(t1) *)OPENSSL_sk_new_reserve((OPENSSL_sk_compfunc)compare, n); \ + } \ + static ossl_unused ossl_inline int sk_##t1##_reserve(STACK_OF(t1) *sk, int n) \ + { \ + return OPENSSL_sk_reserve((OPENSSL_STACK *)sk, n); \ + } \ + static ossl_unused ossl_inline void sk_##t1##_free(STACK_OF(t1) *sk) \ + { \ + OPENSSL_sk_free((OPENSSL_STACK *)sk); \ + } \ + static ossl_unused ossl_inline void sk_##t1##_zero(STACK_OF(t1) *sk) \ + { \ + OPENSSL_sk_zero((OPENSSL_STACK *)sk); \ + } \ + static ossl_unused ossl_inline t2 *sk_##t1##_delete(STACK_OF(t1) *sk, int i) \ + { \ + return (t2 *)OPENSSL_sk_delete((OPENSSL_STACK *)sk, i); \ + } \ + static ossl_unused ossl_inline t2 *sk_##t1##_delete_ptr(STACK_OF(t1) *sk, t2 *ptr) \ + { \ + return (t2 *)OPENSSL_sk_delete_ptr((OPENSSL_STACK *)sk, \ + (const void *)ptr); \ + } \ + static ossl_unused ossl_inline int sk_##t1##_push(STACK_OF(t1) *sk, t2 *ptr) \ + { \ + return OPENSSL_sk_push((OPENSSL_STACK *)sk, (const void *)ptr); \ + } \ + static ossl_unused ossl_inline int sk_##t1##_unshift(STACK_OF(t1) *sk, t2 *ptr) \ + { \ + return OPENSSL_sk_unshift((OPENSSL_STACK *)sk, (const void *)ptr); \ + } \ + static ossl_unused ossl_inline t2 *sk_##t1##_pop(STACK_OF(t1) *sk) \ + { \ + return (t2 *)OPENSSL_sk_pop((OPENSSL_STACK *)sk); \ + } \ + static ossl_unused ossl_inline t2 *sk_##t1##_shift(STACK_OF(t1) *sk) \ + { \ + return (t2 *)OPENSSL_sk_shift((OPENSSL_STACK *)sk); \ + } \ + static ossl_unused ossl_inline void sk_##t1##_pop_free(STACK_OF(t1) *sk, sk_##t1##_freefunc freefunc) \ + { \ + OPENSSL_sk_pop_free((OPENSSL_STACK *)sk, (OPENSSL_sk_freefunc)freefunc); \ + } \ + static ossl_unused ossl_inline int sk_##t1##_insert(STACK_OF(t1) *sk, t2 *ptr, int idx) \ + { \ + return OPENSSL_sk_insert((OPENSSL_STACK *)sk, (const void *)ptr, idx); \ + } \ + static ossl_unused ossl_inline t2 *sk_##t1##_set(STACK_OF(t1) *sk, int idx, t2 *ptr) \ + { \ + return (t2 *)OPENSSL_sk_set((OPENSSL_STACK *)sk, idx, (const void *)ptr); \ + } \ + static ossl_unused ossl_inline int sk_##t1##_find(STACK_OF(t1) *sk, t2 *ptr) \ + { \ + return OPENSSL_sk_find((OPENSSL_STACK *)sk, (const void *)ptr); \ + } \ + static ossl_unused ossl_inline int sk_##t1##_find_ex(STACK_OF(t1) *sk, t2 *ptr) \ + { \ + return OPENSSL_sk_find_ex((OPENSSL_STACK *)sk, (const void *)ptr); \ + } \ + static ossl_unused ossl_inline int sk_##t1##_find_all(STACK_OF(t1) *sk, t2 *ptr, int *pnum) \ + { \ + return OPENSSL_sk_find_all((OPENSSL_STACK *)sk, (const void *)ptr, pnum); \ + } \ + static ossl_unused ossl_inline void sk_##t1##_sort(STACK_OF(t1) *sk) \ + { \ + OPENSSL_sk_sort((OPENSSL_STACK *)sk); \ + } \ + static ossl_unused ossl_inline int sk_##t1##_is_sorted(const STACK_OF(t1) *sk) \ + { \ + return OPENSSL_sk_is_sorted((const OPENSSL_STACK *)sk); \ + } \ + static ossl_unused ossl_inline STACK_OF(t1) * sk_##t1##_dup(const STACK_OF(t1) *sk) \ + { \ + return (STACK_OF(t1) *)OPENSSL_sk_dup((const OPENSSL_STACK *)sk); \ + } \ + static ossl_unused ossl_inline STACK_OF(t1) *sk_##t1##_deep_copy(const STACK_OF(t1) *sk, \ + sk_##t1##_copyfunc copyfunc, \ + sk_##t1##_freefunc freefunc) \ + { \ + return (STACK_OF(t1) *)OPENSSL_sk_deep_copy((const OPENSSL_STACK *)sk, \ + (OPENSSL_sk_copyfunc)copyfunc, \ + (OPENSSL_sk_freefunc)freefunc); \ + } \ + static ossl_unused ossl_inline sk_##t1##_compfunc sk_##t1##_set_cmp_func(STACK_OF(t1) *sk, sk_##t1##_compfunc compare) \ + { \ + return (sk_##t1##_compfunc)OPENSSL_sk_set_cmp_func((OPENSSL_STACK *)sk, (OPENSSL_sk_compfunc)compare); \ + } + +# define DEFINE_STACK_OF(t) SKM_DEFINE_STACK_OF(t, t, t) +# define DEFINE_STACK_OF_CONST(t) SKM_DEFINE_STACK_OF(t, const t, t) +# define DEFINE_SPECIAL_STACK_OF(t1, t2) SKM_DEFINE_STACK_OF(t1, t2, t2) +# define DEFINE_SPECIAL_STACK_OF_CONST(t1, t2) \ + SKM_DEFINE_STACK_OF(t1, const t2, t2) + +/*- + * Strings are special: normally an lhash entry will point to a single + * (somewhat) mutable object. In the case of strings: + * + * a) Instead of a single char, there is an array of chars, NUL-terminated. + * b) The string may have be immutable. + * + * So, they need their own declarations. Especially important for + * type-checking tools, such as Deputy. + * + * In practice, however, it appears to be hard to have a const + * string. For now, I'm settling for dealing with the fact it is a + * string at all. + */ +typedef char *OPENSSL_STRING; +typedef const char *OPENSSL_CSTRING; + +/*- + * Confusingly, LHASH_OF(STRING) deals with char ** throughout, but + * STACK_OF(STRING) is really more like STACK_OF(char), only, as mentioned + * above, instead of a single char each entry is a NUL-terminated array of + * chars. So, we have to implement STRING specially for STACK_OF. This is + * dealt with in the autogenerated macros below. + */ +SKM_DEFINE_STACK_OF_INTERNAL(OPENSSL_STRING, char, char) +#define sk_OPENSSL_STRING_num(sk) OPENSSL_sk_num(ossl_check_const_OPENSSL_STRING_sk_type(sk)) +#define sk_OPENSSL_STRING_value(sk, idx) ((char *)OPENSSL_sk_value(ossl_check_const_OPENSSL_STRING_sk_type(sk), (idx))) +#define sk_OPENSSL_STRING_new(cmp) ((STACK_OF(OPENSSL_STRING) *)OPENSSL_sk_new(ossl_check_OPENSSL_STRING_compfunc_type(cmp))) +#define sk_OPENSSL_STRING_new_null() ((STACK_OF(OPENSSL_STRING) *)OPENSSL_sk_new_null()) +#define sk_OPENSSL_STRING_new_reserve(cmp, n) ((STACK_OF(OPENSSL_STRING) *)OPENSSL_sk_new_reserve(ossl_check_OPENSSL_STRING_compfunc_type(cmp), (n))) +#define sk_OPENSSL_STRING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OPENSSL_STRING_sk_type(sk), (n)) +#define sk_OPENSSL_STRING_free(sk) OPENSSL_sk_free(ossl_check_OPENSSL_STRING_sk_type(sk)) +#define sk_OPENSSL_STRING_zero(sk) OPENSSL_sk_zero(ossl_check_OPENSSL_STRING_sk_type(sk)) +#define sk_OPENSSL_STRING_delete(sk, i) ((char *)OPENSSL_sk_delete(ossl_check_OPENSSL_STRING_sk_type(sk), (i))) +#define sk_OPENSSL_STRING_delete_ptr(sk, ptr) ((char *)OPENSSL_sk_delete_ptr(ossl_check_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_type(ptr))) +#define sk_OPENSSL_STRING_push(sk, ptr) OPENSSL_sk_push(ossl_check_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_type(ptr)) +#define sk_OPENSSL_STRING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_type(ptr)) +#define sk_OPENSSL_STRING_pop(sk) ((char *)OPENSSL_sk_pop(ossl_check_OPENSSL_STRING_sk_type(sk))) +#define sk_OPENSSL_STRING_shift(sk) ((char *)OPENSSL_sk_shift(ossl_check_OPENSSL_STRING_sk_type(sk))) +#define sk_OPENSSL_STRING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OPENSSL_STRING_sk_type(sk),ossl_check_OPENSSL_STRING_freefunc_type(freefunc)) +#define sk_OPENSSL_STRING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_type(ptr), (idx)) +#define sk_OPENSSL_STRING_set(sk, idx, ptr) ((char *)OPENSSL_sk_set(ossl_check_OPENSSL_STRING_sk_type(sk), (idx), ossl_check_OPENSSL_STRING_type(ptr))) +#define sk_OPENSSL_STRING_find(sk, ptr) OPENSSL_sk_find(ossl_check_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_type(ptr)) +#define sk_OPENSSL_STRING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_type(ptr)) +#define sk_OPENSSL_STRING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_type(ptr), pnum) +#define sk_OPENSSL_STRING_sort(sk) OPENSSL_sk_sort(ossl_check_OPENSSL_STRING_sk_type(sk)) +#define sk_OPENSSL_STRING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OPENSSL_STRING_sk_type(sk)) +#define sk_OPENSSL_STRING_dup(sk) ((STACK_OF(OPENSSL_STRING) *)OPENSSL_sk_dup(ossl_check_const_OPENSSL_STRING_sk_type(sk))) +#define sk_OPENSSL_STRING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OPENSSL_STRING) *)OPENSSL_sk_deep_copy(ossl_check_const_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_copyfunc_type(copyfunc), ossl_check_OPENSSL_STRING_freefunc_type(freefunc))) +#define sk_OPENSSL_STRING_set_cmp_func(sk, cmp) ((sk_OPENSSL_STRING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OPENSSL_STRING_sk_type(sk), ossl_check_OPENSSL_STRING_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(OPENSSL_CSTRING, const char, char) +#define sk_OPENSSL_CSTRING_num(sk) OPENSSL_sk_num(ossl_check_const_OPENSSL_CSTRING_sk_type(sk)) +#define sk_OPENSSL_CSTRING_value(sk, idx) ((const char *)OPENSSL_sk_value(ossl_check_const_OPENSSL_CSTRING_sk_type(sk), (idx))) +#define sk_OPENSSL_CSTRING_new(cmp) ((STACK_OF(OPENSSL_CSTRING) *)OPENSSL_sk_new(ossl_check_OPENSSL_CSTRING_compfunc_type(cmp))) +#define sk_OPENSSL_CSTRING_new_null() ((STACK_OF(OPENSSL_CSTRING) *)OPENSSL_sk_new_null()) +#define sk_OPENSSL_CSTRING_new_reserve(cmp, n) ((STACK_OF(OPENSSL_CSTRING) *)OPENSSL_sk_new_reserve(ossl_check_OPENSSL_CSTRING_compfunc_type(cmp), (n))) +#define sk_OPENSSL_CSTRING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OPENSSL_CSTRING_sk_type(sk), (n)) +#define sk_OPENSSL_CSTRING_free(sk) OPENSSL_sk_free(ossl_check_OPENSSL_CSTRING_sk_type(sk)) +#define sk_OPENSSL_CSTRING_zero(sk) OPENSSL_sk_zero(ossl_check_OPENSSL_CSTRING_sk_type(sk)) +#define sk_OPENSSL_CSTRING_delete(sk, i) ((const char *)OPENSSL_sk_delete(ossl_check_OPENSSL_CSTRING_sk_type(sk), (i))) +#define sk_OPENSSL_CSTRING_delete_ptr(sk, ptr) ((const char *)OPENSSL_sk_delete_ptr(ossl_check_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_type(ptr))) +#define sk_OPENSSL_CSTRING_push(sk, ptr) OPENSSL_sk_push(ossl_check_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_type(ptr)) +#define sk_OPENSSL_CSTRING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_type(ptr)) +#define sk_OPENSSL_CSTRING_pop(sk) ((const char *)OPENSSL_sk_pop(ossl_check_OPENSSL_CSTRING_sk_type(sk))) +#define sk_OPENSSL_CSTRING_shift(sk) ((const char *)OPENSSL_sk_shift(ossl_check_OPENSSL_CSTRING_sk_type(sk))) +#define sk_OPENSSL_CSTRING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OPENSSL_CSTRING_sk_type(sk),ossl_check_OPENSSL_CSTRING_freefunc_type(freefunc)) +#define sk_OPENSSL_CSTRING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_type(ptr), (idx)) +#define sk_OPENSSL_CSTRING_set(sk, idx, ptr) ((const char *)OPENSSL_sk_set(ossl_check_OPENSSL_CSTRING_sk_type(sk), (idx), ossl_check_OPENSSL_CSTRING_type(ptr))) +#define sk_OPENSSL_CSTRING_find(sk, ptr) OPENSSL_sk_find(ossl_check_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_type(ptr)) +#define sk_OPENSSL_CSTRING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_type(ptr)) +#define sk_OPENSSL_CSTRING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_type(ptr), pnum) +#define sk_OPENSSL_CSTRING_sort(sk) OPENSSL_sk_sort(ossl_check_OPENSSL_CSTRING_sk_type(sk)) +#define sk_OPENSSL_CSTRING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OPENSSL_CSTRING_sk_type(sk)) +#define sk_OPENSSL_CSTRING_dup(sk) ((STACK_OF(OPENSSL_CSTRING) *)OPENSSL_sk_dup(ossl_check_const_OPENSSL_CSTRING_sk_type(sk))) +#define sk_OPENSSL_CSTRING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OPENSSL_CSTRING) *)OPENSSL_sk_deep_copy(ossl_check_const_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_copyfunc_type(copyfunc), ossl_check_OPENSSL_CSTRING_freefunc_type(freefunc))) +#define sk_OPENSSL_CSTRING_set_cmp_func(sk, cmp) ((sk_OPENSSL_CSTRING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OPENSSL_CSTRING_sk_type(sk), ossl_check_OPENSSL_CSTRING_compfunc_type(cmp))) + + +#if !defined(OPENSSL_NO_DEPRECATED_3_0) +/* + * This is not used by OpenSSL. A block of bytes, NOT nul-terminated. + * These should also be distinguished from "normal" stacks. + */ +typedef void *OPENSSL_BLOCK; +SKM_DEFINE_STACK_OF_INTERNAL(OPENSSL_BLOCK, void, void) +#define sk_OPENSSL_BLOCK_num(sk) OPENSSL_sk_num(ossl_check_const_OPENSSL_BLOCK_sk_type(sk)) +#define sk_OPENSSL_BLOCK_value(sk, idx) ((void *)OPENSSL_sk_value(ossl_check_const_OPENSSL_BLOCK_sk_type(sk), (idx))) +#define sk_OPENSSL_BLOCK_new(cmp) ((STACK_OF(OPENSSL_BLOCK) *)OPENSSL_sk_new(ossl_check_OPENSSL_BLOCK_compfunc_type(cmp))) +#define sk_OPENSSL_BLOCK_new_null() ((STACK_OF(OPENSSL_BLOCK) *)OPENSSL_sk_new_null()) +#define sk_OPENSSL_BLOCK_new_reserve(cmp, n) ((STACK_OF(OPENSSL_BLOCK) *)OPENSSL_sk_new_reserve(ossl_check_OPENSSL_BLOCK_compfunc_type(cmp), (n))) +#define sk_OPENSSL_BLOCK_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OPENSSL_BLOCK_sk_type(sk), (n)) +#define sk_OPENSSL_BLOCK_free(sk) OPENSSL_sk_free(ossl_check_OPENSSL_BLOCK_sk_type(sk)) +#define sk_OPENSSL_BLOCK_zero(sk) OPENSSL_sk_zero(ossl_check_OPENSSL_BLOCK_sk_type(sk)) +#define sk_OPENSSL_BLOCK_delete(sk, i) ((void *)OPENSSL_sk_delete(ossl_check_OPENSSL_BLOCK_sk_type(sk), (i))) +#define sk_OPENSSL_BLOCK_delete_ptr(sk, ptr) ((void *)OPENSSL_sk_delete_ptr(ossl_check_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_type(ptr))) +#define sk_OPENSSL_BLOCK_push(sk, ptr) OPENSSL_sk_push(ossl_check_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_type(ptr)) +#define sk_OPENSSL_BLOCK_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_type(ptr)) +#define sk_OPENSSL_BLOCK_pop(sk) ((void *)OPENSSL_sk_pop(ossl_check_OPENSSL_BLOCK_sk_type(sk))) +#define sk_OPENSSL_BLOCK_shift(sk) ((void *)OPENSSL_sk_shift(ossl_check_OPENSSL_BLOCK_sk_type(sk))) +#define sk_OPENSSL_BLOCK_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OPENSSL_BLOCK_sk_type(sk),ossl_check_OPENSSL_BLOCK_freefunc_type(freefunc)) +#define sk_OPENSSL_BLOCK_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_type(ptr), (idx)) +#define sk_OPENSSL_BLOCK_set(sk, idx, ptr) ((void *)OPENSSL_sk_set(ossl_check_OPENSSL_BLOCK_sk_type(sk), (idx), ossl_check_OPENSSL_BLOCK_type(ptr))) +#define sk_OPENSSL_BLOCK_find(sk, ptr) OPENSSL_sk_find(ossl_check_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_type(ptr)) +#define sk_OPENSSL_BLOCK_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_type(ptr)) +#define sk_OPENSSL_BLOCK_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_type(ptr), pnum) +#define sk_OPENSSL_BLOCK_sort(sk) OPENSSL_sk_sort(ossl_check_OPENSSL_BLOCK_sk_type(sk)) +#define sk_OPENSSL_BLOCK_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OPENSSL_BLOCK_sk_type(sk)) +#define sk_OPENSSL_BLOCK_dup(sk) ((STACK_OF(OPENSSL_BLOCK) *)OPENSSL_sk_dup(ossl_check_const_OPENSSL_BLOCK_sk_type(sk))) +#define sk_OPENSSL_BLOCK_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OPENSSL_BLOCK) *)OPENSSL_sk_deep_copy(ossl_check_const_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_copyfunc_type(copyfunc), ossl_check_OPENSSL_BLOCK_freefunc_type(freefunc))) +#define sk_OPENSSL_BLOCK_set_cmp_func(sk, cmp) ((sk_OPENSSL_BLOCK_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OPENSSL_BLOCK_sk_type(sk), ossl_check_OPENSSL_BLOCK_compfunc_type(cmp))) + +#endif + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/common/include/openssl/srp.h b/contrib/openssl-cmake/common/include/openssl/srp.h new file mode 100644 index 000000000000..a48766c6ce8b --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/srp.h @@ -0,0 +1,285 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/srp.h.in + * + * Copyright 2004-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright (c) 2004, EdelKey Project. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + * + * Originally written by Christophe Renou and Peter Sylvester, + * for the EdelKey project. + */ + + + +#ifndef OPENSSL_SRP_H +# define OPENSSL_SRP_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_SRP_H +# endif + +#include + +#ifndef OPENSSL_NO_SRP +# include +# include +# include +# include +# include + +# ifdef __cplusplus +extern "C" { +# endif + +# ifndef OPENSSL_NO_DEPRECATED_3_0 + +typedef struct SRP_gN_cache_st { + char *b64_bn; + BIGNUM *bn; +} SRP_gN_cache; +SKM_DEFINE_STACK_OF_INTERNAL(SRP_gN_cache, SRP_gN_cache, SRP_gN_cache) +#define sk_SRP_gN_cache_num(sk) OPENSSL_sk_num(ossl_check_const_SRP_gN_cache_sk_type(sk)) +#define sk_SRP_gN_cache_value(sk, idx) ((SRP_gN_cache *)OPENSSL_sk_value(ossl_check_const_SRP_gN_cache_sk_type(sk), (idx))) +#define sk_SRP_gN_cache_new(cmp) ((STACK_OF(SRP_gN_cache) *)OPENSSL_sk_new(ossl_check_SRP_gN_cache_compfunc_type(cmp))) +#define sk_SRP_gN_cache_new_null() ((STACK_OF(SRP_gN_cache) *)OPENSSL_sk_new_null()) +#define sk_SRP_gN_cache_new_reserve(cmp, n) ((STACK_OF(SRP_gN_cache) *)OPENSSL_sk_new_reserve(ossl_check_SRP_gN_cache_compfunc_type(cmp), (n))) +#define sk_SRP_gN_cache_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_SRP_gN_cache_sk_type(sk), (n)) +#define sk_SRP_gN_cache_free(sk) OPENSSL_sk_free(ossl_check_SRP_gN_cache_sk_type(sk)) +#define sk_SRP_gN_cache_zero(sk) OPENSSL_sk_zero(ossl_check_SRP_gN_cache_sk_type(sk)) +#define sk_SRP_gN_cache_delete(sk, i) ((SRP_gN_cache *)OPENSSL_sk_delete(ossl_check_SRP_gN_cache_sk_type(sk), (i))) +#define sk_SRP_gN_cache_delete_ptr(sk, ptr) ((SRP_gN_cache *)OPENSSL_sk_delete_ptr(ossl_check_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_type(ptr))) +#define sk_SRP_gN_cache_push(sk, ptr) OPENSSL_sk_push(ossl_check_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_type(ptr)) +#define sk_SRP_gN_cache_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_type(ptr)) +#define sk_SRP_gN_cache_pop(sk) ((SRP_gN_cache *)OPENSSL_sk_pop(ossl_check_SRP_gN_cache_sk_type(sk))) +#define sk_SRP_gN_cache_shift(sk) ((SRP_gN_cache *)OPENSSL_sk_shift(ossl_check_SRP_gN_cache_sk_type(sk))) +#define sk_SRP_gN_cache_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_SRP_gN_cache_sk_type(sk),ossl_check_SRP_gN_cache_freefunc_type(freefunc)) +#define sk_SRP_gN_cache_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_type(ptr), (idx)) +#define sk_SRP_gN_cache_set(sk, idx, ptr) ((SRP_gN_cache *)OPENSSL_sk_set(ossl_check_SRP_gN_cache_sk_type(sk), (idx), ossl_check_SRP_gN_cache_type(ptr))) +#define sk_SRP_gN_cache_find(sk, ptr) OPENSSL_sk_find(ossl_check_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_type(ptr)) +#define sk_SRP_gN_cache_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_type(ptr)) +#define sk_SRP_gN_cache_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_type(ptr), pnum) +#define sk_SRP_gN_cache_sort(sk) OPENSSL_sk_sort(ossl_check_SRP_gN_cache_sk_type(sk)) +#define sk_SRP_gN_cache_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_SRP_gN_cache_sk_type(sk)) +#define sk_SRP_gN_cache_dup(sk) ((STACK_OF(SRP_gN_cache) *)OPENSSL_sk_dup(ossl_check_const_SRP_gN_cache_sk_type(sk))) +#define sk_SRP_gN_cache_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(SRP_gN_cache) *)OPENSSL_sk_deep_copy(ossl_check_const_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_copyfunc_type(copyfunc), ossl_check_SRP_gN_cache_freefunc_type(freefunc))) +#define sk_SRP_gN_cache_set_cmp_func(sk, cmp) ((sk_SRP_gN_cache_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_SRP_gN_cache_sk_type(sk), ossl_check_SRP_gN_cache_compfunc_type(cmp))) + + + +typedef struct SRP_user_pwd_st { + /* Owned by us. */ + char *id; + BIGNUM *s; + BIGNUM *v; + /* Not owned by us. */ + const BIGNUM *g; + const BIGNUM *N; + /* Owned by us. */ + char *info; +} SRP_user_pwd; +SKM_DEFINE_STACK_OF_INTERNAL(SRP_user_pwd, SRP_user_pwd, SRP_user_pwd) +#define sk_SRP_user_pwd_num(sk) OPENSSL_sk_num(ossl_check_const_SRP_user_pwd_sk_type(sk)) +#define sk_SRP_user_pwd_value(sk, idx) ((SRP_user_pwd *)OPENSSL_sk_value(ossl_check_const_SRP_user_pwd_sk_type(sk), (idx))) +#define sk_SRP_user_pwd_new(cmp) ((STACK_OF(SRP_user_pwd) *)OPENSSL_sk_new(ossl_check_SRP_user_pwd_compfunc_type(cmp))) +#define sk_SRP_user_pwd_new_null() ((STACK_OF(SRP_user_pwd) *)OPENSSL_sk_new_null()) +#define sk_SRP_user_pwd_new_reserve(cmp, n) ((STACK_OF(SRP_user_pwd) *)OPENSSL_sk_new_reserve(ossl_check_SRP_user_pwd_compfunc_type(cmp), (n))) +#define sk_SRP_user_pwd_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_SRP_user_pwd_sk_type(sk), (n)) +#define sk_SRP_user_pwd_free(sk) OPENSSL_sk_free(ossl_check_SRP_user_pwd_sk_type(sk)) +#define sk_SRP_user_pwd_zero(sk) OPENSSL_sk_zero(ossl_check_SRP_user_pwd_sk_type(sk)) +#define sk_SRP_user_pwd_delete(sk, i) ((SRP_user_pwd *)OPENSSL_sk_delete(ossl_check_SRP_user_pwd_sk_type(sk), (i))) +#define sk_SRP_user_pwd_delete_ptr(sk, ptr) ((SRP_user_pwd *)OPENSSL_sk_delete_ptr(ossl_check_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_type(ptr))) +#define sk_SRP_user_pwd_push(sk, ptr) OPENSSL_sk_push(ossl_check_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_type(ptr)) +#define sk_SRP_user_pwd_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_type(ptr)) +#define sk_SRP_user_pwd_pop(sk) ((SRP_user_pwd *)OPENSSL_sk_pop(ossl_check_SRP_user_pwd_sk_type(sk))) +#define sk_SRP_user_pwd_shift(sk) ((SRP_user_pwd *)OPENSSL_sk_shift(ossl_check_SRP_user_pwd_sk_type(sk))) +#define sk_SRP_user_pwd_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_SRP_user_pwd_sk_type(sk),ossl_check_SRP_user_pwd_freefunc_type(freefunc)) +#define sk_SRP_user_pwd_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_type(ptr), (idx)) +#define sk_SRP_user_pwd_set(sk, idx, ptr) ((SRP_user_pwd *)OPENSSL_sk_set(ossl_check_SRP_user_pwd_sk_type(sk), (idx), ossl_check_SRP_user_pwd_type(ptr))) +#define sk_SRP_user_pwd_find(sk, ptr) OPENSSL_sk_find(ossl_check_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_type(ptr)) +#define sk_SRP_user_pwd_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_type(ptr)) +#define sk_SRP_user_pwd_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_type(ptr), pnum) +#define sk_SRP_user_pwd_sort(sk) OPENSSL_sk_sort(ossl_check_SRP_user_pwd_sk_type(sk)) +#define sk_SRP_user_pwd_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_SRP_user_pwd_sk_type(sk)) +#define sk_SRP_user_pwd_dup(sk) ((STACK_OF(SRP_user_pwd) *)OPENSSL_sk_dup(ossl_check_const_SRP_user_pwd_sk_type(sk))) +#define sk_SRP_user_pwd_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(SRP_user_pwd) *)OPENSSL_sk_deep_copy(ossl_check_const_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_copyfunc_type(copyfunc), ossl_check_SRP_user_pwd_freefunc_type(freefunc))) +#define sk_SRP_user_pwd_set_cmp_func(sk, cmp) ((sk_SRP_user_pwd_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_SRP_user_pwd_sk_type(sk), ossl_check_SRP_user_pwd_compfunc_type(cmp))) + + +OSSL_DEPRECATEDIN_3_0 +SRP_user_pwd *SRP_user_pwd_new(void); +OSSL_DEPRECATEDIN_3_0 +void SRP_user_pwd_free(SRP_user_pwd *user_pwd); + +OSSL_DEPRECATEDIN_3_0 +void SRP_user_pwd_set_gN(SRP_user_pwd *user_pwd, const BIGNUM *g, + const BIGNUM *N); +OSSL_DEPRECATEDIN_3_0 +int SRP_user_pwd_set1_ids(SRP_user_pwd *user_pwd, const char *id, + const char *info); +OSSL_DEPRECATEDIN_3_0 +int SRP_user_pwd_set0_sv(SRP_user_pwd *user_pwd, BIGNUM *s, BIGNUM *v); + +typedef struct SRP_VBASE_st { + STACK_OF(SRP_user_pwd) *users_pwd; + STACK_OF(SRP_gN_cache) *gN_cache; +/* to simulate a user */ + char *seed_key; + const BIGNUM *default_g; + const BIGNUM *default_N; +} SRP_VBASE; + +/* + * Internal structure storing N and g pair + */ +typedef struct SRP_gN_st { + char *id; + const BIGNUM *g; + const BIGNUM *N; +} SRP_gN; +SKM_DEFINE_STACK_OF_INTERNAL(SRP_gN, SRP_gN, SRP_gN) +#define sk_SRP_gN_num(sk) OPENSSL_sk_num(ossl_check_const_SRP_gN_sk_type(sk)) +#define sk_SRP_gN_value(sk, idx) ((SRP_gN *)OPENSSL_sk_value(ossl_check_const_SRP_gN_sk_type(sk), (idx))) +#define sk_SRP_gN_new(cmp) ((STACK_OF(SRP_gN) *)OPENSSL_sk_new(ossl_check_SRP_gN_compfunc_type(cmp))) +#define sk_SRP_gN_new_null() ((STACK_OF(SRP_gN) *)OPENSSL_sk_new_null()) +#define sk_SRP_gN_new_reserve(cmp, n) ((STACK_OF(SRP_gN) *)OPENSSL_sk_new_reserve(ossl_check_SRP_gN_compfunc_type(cmp), (n))) +#define sk_SRP_gN_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_SRP_gN_sk_type(sk), (n)) +#define sk_SRP_gN_free(sk) OPENSSL_sk_free(ossl_check_SRP_gN_sk_type(sk)) +#define sk_SRP_gN_zero(sk) OPENSSL_sk_zero(ossl_check_SRP_gN_sk_type(sk)) +#define sk_SRP_gN_delete(sk, i) ((SRP_gN *)OPENSSL_sk_delete(ossl_check_SRP_gN_sk_type(sk), (i))) +#define sk_SRP_gN_delete_ptr(sk, ptr) ((SRP_gN *)OPENSSL_sk_delete_ptr(ossl_check_SRP_gN_sk_type(sk), ossl_check_SRP_gN_type(ptr))) +#define sk_SRP_gN_push(sk, ptr) OPENSSL_sk_push(ossl_check_SRP_gN_sk_type(sk), ossl_check_SRP_gN_type(ptr)) +#define sk_SRP_gN_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_SRP_gN_sk_type(sk), ossl_check_SRP_gN_type(ptr)) +#define sk_SRP_gN_pop(sk) ((SRP_gN *)OPENSSL_sk_pop(ossl_check_SRP_gN_sk_type(sk))) +#define sk_SRP_gN_shift(sk) ((SRP_gN *)OPENSSL_sk_shift(ossl_check_SRP_gN_sk_type(sk))) +#define sk_SRP_gN_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_SRP_gN_sk_type(sk),ossl_check_SRP_gN_freefunc_type(freefunc)) +#define sk_SRP_gN_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_SRP_gN_sk_type(sk), ossl_check_SRP_gN_type(ptr), (idx)) +#define sk_SRP_gN_set(sk, idx, ptr) ((SRP_gN *)OPENSSL_sk_set(ossl_check_SRP_gN_sk_type(sk), (idx), ossl_check_SRP_gN_type(ptr))) +#define sk_SRP_gN_find(sk, ptr) OPENSSL_sk_find(ossl_check_SRP_gN_sk_type(sk), ossl_check_SRP_gN_type(ptr)) +#define sk_SRP_gN_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_SRP_gN_sk_type(sk), ossl_check_SRP_gN_type(ptr)) +#define sk_SRP_gN_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_SRP_gN_sk_type(sk), ossl_check_SRP_gN_type(ptr), pnum) +#define sk_SRP_gN_sort(sk) OPENSSL_sk_sort(ossl_check_SRP_gN_sk_type(sk)) +#define sk_SRP_gN_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_SRP_gN_sk_type(sk)) +#define sk_SRP_gN_dup(sk) ((STACK_OF(SRP_gN) *)OPENSSL_sk_dup(ossl_check_const_SRP_gN_sk_type(sk))) +#define sk_SRP_gN_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(SRP_gN) *)OPENSSL_sk_deep_copy(ossl_check_const_SRP_gN_sk_type(sk), ossl_check_SRP_gN_copyfunc_type(copyfunc), ossl_check_SRP_gN_freefunc_type(freefunc))) +#define sk_SRP_gN_set_cmp_func(sk, cmp) ((sk_SRP_gN_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_SRP_gN_sk_type(sk), ossl_check_SRP_gN_compfunc_type(cmp))) + + + +OSSL_DEPRECATEDIN_3_0 +SRP_VBASE *SRP_VBASE_new(char *seed_key); +OSSL_DEPRECATEDIN_3_0 +void SRP_VBASE_free(SRP_VBASE *vb); +OSSL_DEPRECATEDIN_3_0 +int SRP_VBASE_init(SRP_VBASE *vb, char *verifier_file); + +OSSL_DEPRECATEDIN_3_0 +int SRP_VBASE_add0_user(SRP_VBASE *vb, SRP_user_pwd *user_pwd); + +/* NOTE: unlike in SRP_VBASE_get_by_user, caller owns the returned pointer.*/ +OSSL_DEPRECATEDIN_3_0 +SRP_user_pwd *SRP_VBASE_get1_by_user(SRP_VBASE *vb, char *username); + +OSSL_DEPRECATEDIN_3_0 +char *SRP_create_verifier_ex(const char *user, const char *pass, char **salt, + char **verifier, const char *N, const char *g, + OSSL_LIB_CTX *libctx, const char *propq); +OSSL_DEPRECATEDIN_3_0 +char *SRP_create_verifier(const char *user, const char *pass, char **salt, + char **verifier, const char *N, const char *g); +OSSL_DEPRECATEDIN_3_0 +int SRP_create_verifier_BN_ex(const char *user, const char *pass, BIGNUM **salt, + BIGNUM **verifier, const BIGNUM *N, + const BIGNUM *g, OSSL_LIB_CTX *libctx, + const char *propq); +OSSL_DEPRECATEDIN_3_0 +int SRP_create_verifier_BN(const char *user, const char *pass, BIGNUM **salt, + BIGNUM **verifier, const BIGNUM *N, + const BIGNUM *g); + +# define SRP_NO_ERROR 0 +# define SRP_ERR_VBASE_INCOMPLETE_FILE 1 +# define SRP_ERR_VBASE_BN_LIB 2 +# define SRP_ERR_OPEN_FILE 3 +# define SRP_ERR_MEMORY 4 + +# define DB_srptype 0 +# define DB_srpverifier 1 +# define DB_srpsalt 2 +# define DB_srpid 3 +# define DB_srpgN 4 +# define DB_srpinfo 5 +# undef DB_NUMBER +# define DB_NUMBER 6 + +# define DB_SRP_INDEX 'I' +# define DB_SRP_VALID 'V' +# define DB_SRP_REVOKED 'R' +# define DB_SRP_MODIF 'v' + +/* see srp.c */ +OSSL_DEPRECATEDIN_3_0 +char *SRP_check_known_gN_param(const BIGNUM *g, const BIGNUM *N); +OSSL_DEPRECATEDIN_3_0 +SRP_gN *SRP_get_default_gN(const char *id); + +/* server side .... */ +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_server_key(const BIGNUM *A, const BIGNUM *v, const BIGNUM *u, + const BIGNUM *b, const BIGNUM *N); +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_B_ex(const BIGNUM *b, const BIGNUM *N, const BIGNUM *g, + const BIGNUM *v, OSSL_LIB_CTX *libctx, const char *propq); +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_B(const BIGNUM *b, const BIGNUM *N, const BIGNUM *g, + const BIGNUM *v); + +OSSL_DEPRECATEDIN_3_0 +int SRP_Verify_A_mod_N(const BIGNUM *A, const BIGNUM *N); +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_u_ex(const BIGNUM *A, const BIGNUM *B, const BIGNUM *N, + OSSL_LIB_CTX *libctx, const char *propq); +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_u(const BIGNUM *A, const BIGNUM *B, const BIGNUM *N); + +/* client side .... */ + +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_x_ex(const BIGNUM *s, const char *user, const char *pass, + OSSL_LIB_CTX *libctx, const char *propq); +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_x(const BIGNUM *s, const char *user, const char *pass); +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_A(const BIGNUM *a, const BIGNUM *N, const BIGNUM *g); +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_client_key_ex(const BIGNUM *N, const BIGNUM *B, const BIGNUM *g, + const BIGNUM *x, const BIGNUM *a, const BIGNUM *u, + OSSL_LIB_CTX *libctx, const char *propq); +OSSL_DEPRECATEDIN_3_0 +BIGNUM *SRP_Calc_client_key(const BIGNUM *N, const BIGNUM *B, const BIGNUM *g, + const BIGNUM *x, const BIGNUM *a, const BIGNUM *u); +OSSL_DEPRECATEDIN_3_0 +int SRP_Verify_B_mod_N(const BIGNUM *B, const BIGNUM *N); + +# define SRP_MINIMAL_N 1024 + +# endif /* OPENSSL_NO_DEPRECATED_3_0 */ + +/* This method ignores the configured seed and fails for an unknown user. */ +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 +SRP_user_pwd *SRP_VBASE_get_by_user(SRP_VBASE *vb, char *username); +# endif + +# ifdef __cplusplus +} +# endif +# endif + +#endif diff --git a/contrib/openssl-cmake/common/include/openssl/ssl.h b/contrib/openssl-cmake/common/include/openssl/ssl.h new file mode 100644 index 000000000000..aeb28d2b55d5 --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/ssl.h @@ -0,0 +1,2933 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/ssl.h.in + * + * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved. + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved + * Copyright 2005 Nokia. All rights reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_SSL_H +# define OPENSSL_SSL_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_SSL_H +# endif + +# include +# include +# include +# include +# include +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# include +# include +# include +# endif +# include +# include +# include +# include + +# include +# include +# include +# include +# include +# ifndef OPENSSL_NO_STDIO +# include +# endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* OpenSSL version number for ASN.1 encoding of the session information */ +/*- + * Version 0 - initial version + * Version 1 - added the optional peer certificate + */ +# define SSL_SESSION_ASN1_VERSION 0x0001 + +# define SSL_MAX_SSL_SESSION_ID_LENGTH 32 +# define SSL_MAX_SID_CTX_LENGTH 32 + +# define SSL_MIN_RSA_MODULUS_LENGTH_IN_BYTES (512/8) +# define SSL_MAX_KEY_ARG_LENGTH 8 +/* SSL_MAX_MASTER_KEY_LENGTH is defined in prov_ssl.h */ + +/* The maximum number of encrypt/decrypt pipelines we can support */ +# define SSL_MAX_PIPELINES 32 + +/* text strings for the ciphers */ + +/* These are used to specify which ciphers to use and not to use */ + +# define SSL_TXT_LOW "LOW" +# define SSL_TXT_MEDIUM "MEDIUM" +# define SSL_TXT_HIGH "HIGH" +# define SSL_TXT_FIPS "FIPS" + +# define SSL_TXT_aNULL "aNULL" +# define SSL_TXT_eNULL "eNULL" +# define SSL_TXT_NULL "NULL" + +# define SSL_TXT_kRSA "kRSA" +# define SSL_TXT_kDHr "kDHr"/* this cipher class has been removed */ +# define SSL_TXT_kDHd "kDHd"/* this cipher class has been removed */ +# define SSL_TXT_kDH "kDH"/* this cipher class has been removed */ +# define SSL_TXT_kEDH "kEDH"/* alias for kDHE */ +# define SSL_TXT_kDHE "kDHE" +# define SSL_TXT_kECDHr "kECDHr"/* this cipher class has been removed */ +# define SSL_TXT_kECDHe "kECDHe"/* this cipher class has been removed */ +# define SSL_TXT_kECDH "kECDH"/* this cipher class has been removed */ +# define SSL_TXT_kEECDH "kEECDH"/* alias for kECDHE */ +# define SSL_TXT_kECDHE "kECDHE" +# define SSL_TXT_kPSK "kPSK" +# define SSL_TXT_kRSAPSK "kRSAPSK" +# define SSL_TXT_kECDHEPSK "kECDHEPSK" +# define SSL_TXT_kDHEPSK "kDHEPSK" +# define SSL_TXT_kGOST "kGOST" +# define SSL_TXT_kGOST18 "kGOST18" +# define SSL_TXT_kSRP "kSRP" + +# define SSL_TXT_aRSA "aRSA" +# define SSL_TXT_aDSS "aDSS" +# define SSL_TXT_aDH "aDH"/* this cipher class has been removed */ +# define SSL_TXT_aECDH "aECDH"/* this cipher class has been removed */ +# define SSL_TXT_aECDSA "aECDSA" +# define SSL_TXT_aPSK "aPSK" +# define SSL_TXT_aGOST94 "aGOST94" +# define SSL_TXT_aGOST01 "aGOST01" +# define SSL_TXT_aGOST12 "aGOST12" +# define SSL_TXT_aGOST "aGOST" +# define SSL_TXT_aSRP "aSRP" + +# define SSL_TXT_DSS "DSS" +# define SSL_TXT_DH "DH" +# define SSL_TXT_DHE "DHE"/* same as "kDHE:-ADH" */ +# define SSL_TXT_EDH "EDH"/* alias for DHE */ +# define SSL_TXT_ADH "ADH" +# define SSL_TXT_RSA "RSA" +# define SSL_TXT_ECDH "ECDH" +# define SSL_TXT_EECDH "EECDH"/* alias for ECDHE" */ +# define SSL_TXT_ECDHE "ECDHE"/* same as "kECDHE:-AECDH" */ +# define SSL_TXT_AECDH "AECDH" +# define SSL_TXT_ECDSA "ECDSA" +# define SSL_TXT_PSK "PSK" +# define SSL_TXT_SRP "SRP" + +# define SSL_TXT_DES "DES" +# define SSL_TXT_3DES "3DES" +# define SSL_TXT_RC4 "RC4" +# define SSL_TXT_RC2 "RC2" +# define SSL_TXT_IDEA "IDEA" +# define SSL_TXT_SEED "SEED" +# define SSL_TXT_AES128 "AES128" +# define SSL_TXT_AES256 "AES256" +# define SSL_TXT_AES "AES" +# define SSL_TXT_AES_GCM "AESGCM" +# define SSL_TXT_AES_CCM "AESCCM" +# define SSL_TXT_AES_CCM_8 "AESCCM8" +# define SSL_TXT_CAMELLIA128 "CAMELLIA128" +# define SSL_TXT_CAMELLIA256 "CAMELLIA256" +# define SSL_TXT_CAMELLIA "CAMELLIA" +# define SSL_TXT_CHACHA20 "CHACHA20" +# define SSL_TXT_GOST "GOST89" +# define SSL_TXT_ARIA "ARIA" +# define SSL_TXT_ARIA_GCM "ARIAGCM" +# define SSL_TXT_ARIA128 "ARIA128" +# define SSL_TXT_ARIA256 "ARIA256" +# define SSL_TXT_GOST2012_GOST8912_GOST8912 "GOST2012-GOST8912-GOST8912" +# define SSL_TXT_CBC "CBC" + +# define SSL_TXT_MD5 "MD5" +# define SSL_TXT_SHA1 "SHA1" +# define SSL_TXT_SHA "SHA"/* same as "SHA1" */ +# define SSL_TXT_GOST94 "GOST94" +# define SSL_TXT_GOST89MAC "GOST89MAC" +# define SSL_TXT_GOST12 "GOST12" +# define SSL_TXT_GOST89MAC12 "GOST89MAC12" +# define SSL_TXT_SHA256 "SHA256" +# define SSL_TXT_SHA384 "SHA384" + +# define SSL_TXT_SSLV3 "SSLv3" +# define SSL_TXT_TLSV1 "TLSv1" +# define SSL_TXT_TLSV1_1 "TLSv1.1" +# define SSL_TXT_TLSV1_2 "TLSv1.2" + +# define SSL_TXT_ALL "ALL" + +/*- + * COMPLEMENTOF* definitions. These identifiers are used to (de-select) + * ciphers normally not being used. + * Example: "RC4" will activate all ciphers using RC4 including ciphers + * without authentication, which would normally disabled by DEFAULT (due + * the "!ADH" being part of default). Therefore "RC4:!COMPLEMENTOFDEFAULT" + * will make sure that it is also disabled in the specific selection. + * COMPLEMENTOF* identifiers are portable between version, as adjustments + * to the default cipher setup will also be included here. + * + * COMPLEMENTOFDEFAULT does not experience the same special treatment that + * DEFAULT gets, as only selection is being done and no sorting as needed + * for DEFAULT. + */ +# define SSL_TXT_CMPALL "COMPLEMENTOFALL" +# define SSL_TXT_CMPDEF "COMPLEMENTOFDEFAULT" + +/* + * The following cipher list is used by default. It also is substituted when + * an application-defined cipher list string starts with 'DEFAULT'. + * This applies to ciphersuites for TLSv1.2 and below. + * DEPRECATED IN 3.0.0, in favor of OSSL_default_cipher_list() + * Update both macro and function simultaneously + */ +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define SSL_DEFAULT_CIPHER_LIST "ALL:!COMPLEMENTOFDEFAULT:!eNULL" +/* + * This is the default set of TLSv1.3 ciphersuites + * DEPRECATED IN 3.0.0, in favor of OSSL_default_ciphersuites() + * Update both macro and function simultaneously + */ +# define TLS_DEFAULT_CIPHERSUITES "TLS_AES_256_GCM_SHA384:" \ + "TLS_CHACHA20_POLY1305_SHA256:" \ + "TLS_AES_128_GCM_SHA256" +# endif +/* + * As of OpenSSL 1.0.0, ssl_create_cipher_list() in ssl/ssl_ciph.c always + * starts with a reasonable order, and all we have to do for DEFAULT is + * throwing out anonymous and unencrypted ciphersuites! (The latter are not + * actually enabled by ALL, but "ALL:RSA" would enable some of them.) + */ + +/* Used in SSL_set_shutdown()/SSL_get_shutdown(); */ +# define SSL_SENT_SHUTDOWN 1 +# define SSL_RECEIVED_SHUTDOWN 2 + +#ifdef __cplusplus +} +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +# define SSL_FILETYPE_ASN1 X509_FILETYPE_ASN1 +# define SSL_FILETYPE_PEM X509_FILETYPE_PEM + +/* + * This is needed to stop compilers complaining about the 'struct ssl_st *' + * function parameters used to prototype callbacks in SSL_CTX. + */ +typedef struct ssl_st *ssl_crock_st; +typedef struct tls_session_ticket_ext_st TLS_SESSION_TICKET_EXT; +typedef struct ssl_method_st SSL_METHOD; +typedef struct ssl_cipher_st SSL_CIPHER; +typedef struct ssl_session_st SSL_SESSION; +typedef struct tls_sigalgs_st TLS_SIGALGS; +typedef struct ssl_conf_ctx_st SSL_CONF_CTX; + +STACK_OF(SSL_CIPHER); + +/* SRTP protection profiles for use with the use_srtp extension (RFC 5764)*/ +typedef struct srtp_protection_profile_st { + const char *name; + unsigned long id; +} SRTP_PROTECTION_PROFILE; +SKM_DEFINE_STACK_OF_INTERNAL(SRTP_PROTECTION_PROFILE, SRTP_PROTECTION_PROFILE, SRTP_PROTECTION_PROFILE) +#define sk_SRTP_PROTECTION_PROFILE_num(sk) OPENSSL_sk_num(ossl_check_const_SRTP_PROTECTION_PROFILE_sk_type(sk)) +#define sk_SRTP_PROTECTION_PROFILE_value(sk, idx) ((SRTP_PROTECTION_PROFILE *)OPENSSL_sk_value(ossl_check_const_SRTP_PROTECTION_PROFILE_sk_type(sk), (idx))) +#define sk_SRTP_PROTECTION_PROFILE_new(cmp) ((STACK_OF(SRTP_PROTECTION_PROFILE) *)OPENSSL_sk_new(ossl_check_SRTP_PROTECTION_PROFILE_compfunc_type(cmp))) +#define sk_SRTP_PROTECTION_PROFILE_new_null() ((STACK_OF(SRTP_PROTECTION_PROFILE) *)OPENSSL_sk_new_null()) +#define sk_SRTP_PROTECTION_PROFILE_new_reserve(cmp, n) ((STACK_OF(SRTP_PROTECTION_PROFILE) *)OPENSSL_sk_new_reserve(ossl_check_SRTP_PROTECTION_PROFILE_compfunc_type(cmp), (n))) +#define sk_SRTP_PROTECTION_PROFILE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), (n)) +#define sk_SRTP_PROTECTION_PROFILE_free(sk) OPENSSL_sk_free(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk)) +#define sk_SRTP_PROTECTION_PROFILE_zero(sk) OPENSSL_sk_zero(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk)) +#define sk_SRTP_PROTECTION_PROFILE_delete(sk, i) ((SRTP_PROTECTION_PROFILE *)OPENSSL_sk_delete(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), (i))) +#define sk_SRTP_PROTECTION_PROFILE_delete_ptr(sk, ptr) ((SRTP_PROTECTION_PROFILE *)OPENSSL_sk_delete_ptr(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_type(ptr))) +#define sk_SRTP_PROTECTION_PROFILE_push(sk, ptr) OPENSSL_sk_push(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_type(ptr)) +#define sk_SRTP_PROTECTION_PROFILE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_type(ptr)) +#define sk_SRTP_PROTECTION_PROFILE_pop(sk) ((SRTP_PROTECTION_PROFILE *)OPENSSL_sk_pop(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk))) +#define sk_SRTP_PROTECTION_PROFILE_shift(sk) ((SRTP_PROTECTION_PROFILE *)OPENSSL_sk_shift(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk))) +#define sk_SRTP_PROTECTION_PROFILE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk),ossl_check_SRTP_PROTECTION_PROFILE_freefunc_type(freefunc)) +#define sk_SRTP_PROTECTION_PROFILE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_type(ptr), (idx)) +#define sk_SRTP_PROTECTION_PROFILE_set(sk, idx, ptr) ((SRTP_PROTECTION_PROFILE *)OPENSSL_sk_set(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), (idx), ossl_check_SRTP_PROTECTION_PROFILE_type(ptr))) +#define sk_SRTP_PROTECTION_PROFILE_find(sk, ptr) OPENSSL_sk_find(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_type(ptr)) +#define sk_SRTP_PROTECTION_PROFILE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_type(ptr)) +#define sk_SRTP_PROTECTION_PROFILE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_type(ptr), pnum) +#define sk_SRTP_PROTECTION_PROFILE_sort(sk) OPENSSL_sk_sort(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk)) +#define sk_SRTP_PROTECTION_PROFILE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_SRTP_PROTECTION_PROFILE_sk_type(sk)) +#define sk_SRTP_PROTECTION_PROFILE_dup(sk) ((STACK_OF(SRTP_PROTECTION_PROFILE) *)OPENSSL_sk_dup(ossl_check_const_SRTP_PROTECTION_PROFILE_sk_type(sk))) +#define sk_SRTP_PROTECTION_PROFILE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(SRTP_PROTECTION_PROFILE) *)OPENSSL_sk_deep_copy(ossl_check_const_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_copyfunc_type(copyfunc), ossl_check_SRTP_PROTECTION_PROFILE_freefunc_type(freefunc))) +#define sk_SRTP_PROTECTION_PROFILE_set_cmp_func(sk, cmp) ((sk_SRTP_PROTECTION_PROFILE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_SRTP_PROTECTION_PROFILE_sk_type(sk), ossl_check_SRTP_PROTECTION_PROFILE_compfunc_type(cmp))) + + + +typedef int (*tls_session_ticket_ext_cb_fn)(SSL *s, const unsigned char *data, + int len, void *arg); +typedef int (*tls_session_secret_cb_fn)(SSL *s, void *secret, int *secret_len, + STACK_OF(SSL_CIPHER) *peer_ciphers, + const SSL_CIPHER **cipher, void *arg); + +/* Extension context codes */ +/* This extension is only allowed in TLS */ +#define SSL_EXT_TLS_ONLY 0x00001 +/* This extension is only allowed in DTLS */ +#define SSL_EXT_DTLS_ONLY 0x00002 +/* Some extensions may be allowed in DTLS but we don't implement them for it */ +#define SSL_EXT_TLS_IMPLEMENTATION_ONLY 0x00004 +/* Most extensions are not defined for SSLv3 but EXT_TYPE_renegotiate is */ +#define SSL_EXT_SSL3_ALLOWED 0x00008 +/* Extension is only defined for TLS1.2 and below */ +#define SSL_EXT_TLS1_2_AND_BELOW_ONLY 0x00010 +/* Extension is only defined for TLS1.3 and above */ +#define SSL_EXT_TLS1_3_ONLY 0x00020 +/* Ignore this extension during parsing if we are resuming */ +#define SSL_EXT_IGNORE_ON_RESUMPTION 0x00040 +#define SSL_EXT_CLIENT_HELLO 0x00080 +/* Really means TLS1.2 or below */ +#define SSL_EXT_TLS1_2_SERVER_HELLO 0x00100 +#define SSL_EXT_TLS1_3_SERVER_HELLO 0x00200 +#define SSL_EXT_TLS1_3_ENCRYPTED_EXTENSIONS 0x00400 +#define SSL_EXT_TLS1_3_HELLO_RETRY_REQUEST 0x00800 +#define SSL_EXT_TLS1_3_CERTIFICATE 0x01000 +#define SSL_EXT_TLS1_3_NEW_SESSION_TICKET 0x02000 +#define SSL_EXT_TLS1_3_CERTIFICATE_REQUEST 0x04000 +#define SSL_EXT_TLS1_3_CERTIFICATE_COMPRESSION 0x08000 +/* When sending a raw public key in a certificate message */ +#define SSL_EXT_TLS1_3_RAW_PUBLIC_KEY 0x10000 + +/* Typedefs for handling custom extensions */ + +typedef int (*custom_ext_add_cb)(SSL *s, unsigned int ext_type, + const unsigned char **out, size_t *outlen, + int *al, void *add_arg); + +typedef void (*custom_ext_free_cb)(SSL *s, unsigned int ext_type, + const unsigned char *out, void *add_arg); + +typedef int (*custom_ext_parse_cb)(SSL *s, unsigned int ext_type, + const unsigned char *in, size_t inlen, + int *al, void *parse_arg); + + +typedef int (*SSL_custom_ext_add_cb_ex)(SSL *s, unsigned int ext_type, + unsigned int context, + const unsigned char **out, + size_t *outlen, X509 *x, + size_t chainidx, + int *al, void *add_arg); + +typedef void (*SSL_custom_ext_free_cb_ex)(SSL *s, unsigned int ext_type, + unsigned int context, + const unsigned char *out, + void *add_arg); + +typedef int (*SSL_custom_ext_parse_cb_ex)(SSL *s, unsigned int ext_type, + unsigned int context, + const unsigned char *in, + size_t inlen, X509 *x, + size_t chainidx, + int *al, void *parse_arg); + +/* Typedef for verification callback */ +typedef int (*SSL_verify_cb)(int preverify_ok, X509_STORE_CTX *x509_ctx); + +/* Typedef for SSL async callback */ +typedef int (*SSL_async_callback_fn)(SSL *s, void *arg); + +#define SSL_OP_BIT(n) ((uint64_t)1 << (uint64_t)n) + +/* + * SSL/TLS connection options. + */ + /* Disable Extended master secret */ +# define SSL_OP_NO_EXTENDED_MASTER_SECRET SSL_OP_BIT(0) + /* Cleanse plaintext copies of data delivered to the application */ +# define SSL_OP_CLEANSE_PLAINTEXT SSL_OP_BIT(1) + /* Allow initial connection to servers that don't support RI */ +# define SSL_OP_LEGACY_SERVER_CONNECT SSL_OP_BIT(2) + /* Enable support for Kernel TLS */ +# define SSL_OP_ENABLE_KTLS SSL_OP_BIT(3) +# define SSL_OP_TLSEXT_PADDING SSL_OP_BIT(4) +# define SSL_OP_SAFARI_ECDHE_ECDSA_BUG SSL_OP_BIT(6) +# define SSL_OP_IGNORE_UNEXPECTED_EOF SSL_OP_BIT(7) +# define SSL_OP_ALLOW_CLIENT_RENEGOTIATION SSL_OP_BIT(8) +# define SSL_OP_DISABLE_TLSEXT_CA_NAMES SSL_OP_BIT(9) + /* In TLSv1.3 allow a non-(ec)dhe based kex_mode */ +# define SSL_OP_ALLOW_NO_DHE_KEX SSL_OP_BIT(10) + /* + * Disable SSL 3.0/TLS 1.0 CBC vulnerability workaround that was added + * in OpenSSL 0.9.6d. Usually (depending on the application protocol) + * the workaround is not needed. Unfortunately some broken SSL/TLS + * implementations cannot handle it at all, which is why we include it + * in SSL_OP_ALL. Added in 0.9.6e + */ +# define SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS SSL_OP_BIT(11) + /* DTLS options */ +# define SSL_OP_NO_QUERY_MTU SSL_OP_BIT(12) + /* Turn on Cookie Exchange (on relevant for servers) */ +# define SSL_OP_COOKIE_EXCHANGE SSL_OP_BIT(13) + /* Don't use RFC4507 ticket extension */ +# define SSL_OP_NO_TICKET SSL_OP_BIT(14) +# ifndef OPENSSL_NO_DTLS1_METHOD + /* + * Use Cisco's version identifier of DTLS_BAD_VER + * (only with deprecated DTLSv1_client_method()) + */ +# define SSL_OP_CISCO_ANYCONNECT SSL_OP_BIT(15) +# endif + /* As server, disallow session resumption on renegotiation */ +# define SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION SSL_OP_BIT(16) + /* Don't use compression even if supported */ +# define SSL_OP_NO_COMPRESSION SSL_OP_BIT(17) + /* Permit unsafe legacy renegotiation */ +# define SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION SSL_OP_BIT(18) + /* Disable encrypt-then-mac */ +# define SSL_OP_NO_ENCRYPT_THEN_MAC SSL_OP_BIT(19) + /* + * Enable TLSv1.3 Compatibility mode. This is on by default. A future + * version of OpenSSL may have this disabled by default. + */ +# define SSL_OP_ENABLE_MIDDLEBOX_COMPAT SSL_OP_BIT(20) + /* + * Prioritize Chacha20Poly1305 when client does. + * Modifies SSL_OP_CIPHER_SERVER_PREFERENCE + */ +# define SSL_OP_PRIORITIZE_CHACHA SSL_OP_BIT(21) + /* + * Set on servers to choose the cipher according to server's preferences. + */ +# define SSL_OP_CIPHER_SERVER_PREFERENCE SSL_OP_BIT(22) + /* + * If set, a server will allow a client to issue an SSLv3.0 version + * number as latest version supported in the premaster secret, even when + * TLSv1.0 (version 3.1) was announced in the client hello. Normally + * this is forbidden to prevent version rollback attacks. + */ +# define SSL_OP_TLS_ROLLBACK_BUG SSL_OP_BIT(23) + /* + * Switches off automatic TLSv1.3 anti-replay protection for early data. + * This is a server-side option only (no effect on the client). + */ +# define SSL_OP_NO_ANTI_REPLAY SSL_OP_BIT(24) +# define SSL_OP_NO_SSLv3 SSL_OP_BIT(25) +# define SSL_OP_NO_TLSv1 SSL_OP_BIT(26) +# define SSL_OP_NO_TLSv1_2 SSL_OP_BIT(27) +# define SSL_OP_NO_TLSv1_1 SSL_OP_BIT(28) +# define SSL_OP_NO_TLSv1_3 SSL_OP_BIT(29) +# define SSL_OP_NO_DTLSv1 SSL_OP_BIT(26) +# define SSL_OP_NO_DTLSv1_2 SSL_OP_BIT(27) + /* Disallow all renegotiation */ +# define SSL_OP_NO_RENEGOTIATION SSL_OP_BIT(30) + /* + * Make server add server-hello extension from early version of + * cryptopro draft, when GOST ciphersuite is negotiated. Required for + * interoperability with CryptoPro CSP 3.x + */ +# define SSL_OP_CRYPTOPRO_TLSEXT_BUG SSL_OP_BIT(31) +/* + * Disable RFC8879 certificate compression + * SSL_OP_NO_TX_CERTIFICATE_COMPRESSION: don't send compressed certificates, + * and ignore the extension when received. + * SSL_OP_NO_RX_CERTIFICATE_COMPRESSION: don't send the extension, and + * subsequently indicating that receiving is not supported + */ +# define SSL_OP_NO_TX_CERTIFICATE_COMPRESSION SSL_OP_BIT(32) +# define SSL_OP_NO_RX_CERTIFICATE_COMPRESSION SSL_OP_BIT(33) + /* Enable KTLS TX zerocopy on Linux */ +# define SSL_OP_ENABLE_KTLS_TX_ZEROCOPY_SENDFILE SSL_OP_BIT(34) + +#define SSL_OP_PREFER_NO_DHE_KEX SSL_OP_BIT(35) + +/* + * Option "collections." + */ +# define SSL_OP_NO_SSL_MASK \ + ( SSL_OP_NO_SSLv3 | SSL_OP_NO_TLSv1 | SSL_OP_NO_TLSv1_1 \ + | SSL_OP_NO_TLSv1_2 | SSL_OP_NO_TLSv1_3 ) +# define SSL_OP_NO_DTLS_MASK \ + ( SSL_OP_NO_DTLSv1 | SSL_OP_NO_DTLSv1_2 ) + +/* Various bug workarounds that should be rather harmless. */ +# define SSL_OP_ALL \ + ( SSL_OP_CRYPTOPRO_TLSEXT_BUG | SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS \ + | SSL_OP_TLSEXT_PADDING | SSL_OP_SAFARI_ECDHE_ECDSA_BUG ) + +/* + * OBSOLETE OPTIONS retained for compatibility + */ + +# define SSL_OP_MICROSOFT_SESS_ID_BUG 0x0 +# define SSL_OP_NETSCAPE_CHALLENGE_BUG 0x0 +# define SSL_OP_NETSCAPE_REUSE_CIPHER_CHANGE_BUG 0x0 +# define SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG 0x0 +# define SSL_OP_MICROSOFT_BIG_SSLV3_BUFFER 0x0 +# define SSL_OP_MSIE_SSLV2_RSA_PADDING 0x0 +# define SSL_OP_SSLEAY_080_CLIENT_DH_BUG 0x0 +# define SSL_OP_TLS_D5_BUG 0x0 +# define SSL_OP_TLS_BLOCK_PADDING_BUG 0x0 +# define SSL_OP_SINGLE_ECDH_USE 0x0 +# define SSL_OP_SINGLE_DH_USE 0x0 +# define SSL_OP_EPHEMERAL_RSA 0x0 +# define SSL_OP_NO_SSLv2 0x0 +# define SSL_OP_PKCS1_CHECK_1 0x0 +# define SSL_OP_PKCS1_CHECK_2 0x0 +# define SSL_OP_NETSCAPE_CA_DN_BUG 0x0 +# define SSL_OP_NETSCAPE_DEMO_CIPHER_CHANGE_BUG 0x0 + +/* + * Allow SSL_write(..., n) to return r with 0 < r < n (i.e. report success + * when just a single record has been written): + */ +# define SSL_MODE_ENABLE_PARTIAL_WRITE 0x00000001U +/* + * Make it possible to retry SSL_write() with changed buffer location (buffer + * contents must stay the same!); this is not the default to avoid the + * misconception that non-blocking SSL_write() behaves like non-blocking + * write(): + */ +# define SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER 0x00000002U +/* + * Never bother the application with retries if the transport is blocking: + */ +# define SSL_MODE_AUTO_RETRY 0x00000004U +/* Don't attempt to automatically build certificate chain */ +# define SSL_MODE_NO_AUTO_CHAIN 0x00000008U +/* + * Save RAM by releasing read and write buffers when they're empty. (SSL3 and + * TLS only.) Released buffers are freed. + */ +# define SSL_MODE_RELEASE_BUFFERS 0x00000010U +/* + * Send the current time in the Random fields of the ClientHello and + * ServerHello records for compatibility with hypothetical implementations + * that require it. + */ +# define SSL_MODE_SEND_CLIENTHELLO_TIME 0x00000020U +# define SSL_MODE_SEND_SERVERHELLO_TIME 0x00000040U +/* + * Send TLS_FALLBACK_SCSV in the ClientHello. To be set only by applications + * that reconnect with a downgraded protocol version; see + * draft-ietf-tls-downgrade-scsv-00 for details. DO NOT ENABLE THIS if your + * application attempts a normal handshake. Only use this in explicit + * fallback retries, following the guidance in + * draft-ietf-tls-downgrade-scsv-00. + */ +# define SSL_MODE_SEND_FALLBACK_SCSV 0x00000080U +/* + * Support Asynchronous operation + */ +# define SSL_MODE_ASYNC 0x00000100U + +/* + * When using DTLS/SCTP, include the terminating zero in the label + * used for computing the endpoint-pair shared secret. Required for + * interoperability with implementations having this bug like these + * older version of OpenSSL: + * - OpenSSL 1.0.0 series + * - OpenSSL 1.0.1 series + * - OpenSSL 1.0.2 series + * - OpenSSL 1.1.0 series + * - OpenSSL 1.1.1 and 1.1.1a + */ +# define SSL_MODE_DTLS_SCTP_LABEL_LENGTH_BUG 0x00000400U + +/* Cert related flags */ +/* + * Many implementations ignore some aspects of the TLS standards such as + * enforcing certificate chain algorithms. When this is set we enforce them. + */ +# define SSL_CERT_FLAG_TLS_STRICT 0x00000001U + +/* Suite B modes, takes same values as certificate verify flags */ +# define SSL_CERT_FLAG_SUITEB_128_LOS_ONLY 0x10000 +/* Suite B 192 bit only mode */ +# define SSL_CERT_FLAG_SUITEB_192_LOS 0x20000 +/* Suite B 128 bit mode allowing 192 bit algorithms */ +# define SSL_CERT_FLAG_SUITEB_128_LOS 0x30000 + +/* Perform all sorts of protocol violations for testing purposes */ +# define SSL_CERT_FLAG_BROKEN_PROTOCOL 0x10000000 + +/* Flags for building certificate chains */ +/* Treat any existing certificates as untrusted CAs */ +# define SSL_BUILD_CHAIN_FLAG_UNTRUSTED 0x1 +/* Don't include root CA in chain */ +# define SSL_BUILD_CHAIN_FLAG_NO_ROOT 0x2 +/* Just check certificates already there */ +# define SSL_BUILD_CHAIN_FLAG_CHECK 0x4 +/* Ignore verification errors */ +# define SSL_BUILD_CHAIN_FLAG_IGNORE_ERROR 0x8 +/* Clear verification errors from queue */ +# define SSL_BUILD_CHAIN_FLAG_CLEAR_ERROR 0x10 + +/* Flags returned by SSL_check_chain */ +/* Certificate can be used with this session */ +# define CERT_PKEY_VALID 0x1 +/* Certificate can also be used for signing */ +# define CERT_PKEY_SIGN 0x2 +/* EE certificate signing algorithm OK */ +# define CERT_PKEY_EE_SIGNATURE 0x10 +/* CA signature algorithms OK */ +# define CERT_PKEY_CA_SIGNATURE 0x20 +/* EE certificate parameters OK */ +# define CERT_PKEY_EE_PARAM 0x40 +/* CA certificate parameters OK */ +# define CERT_PKEY_CA_PARAM 0x80 +/* Signing explicitly allowed as opposed to SHA1 fallback */ +# define CERT_PKEY_EXPLICIT_SIGN 0x100 +/* Client CA issuer names match (always set for server cert) */ +# define CERT_PKEY_ISSUER_NAME 0x200 +/* Cert type matches client types (always set for server cert) */ +# define CERT_PKEY_CERT_TYPE 0x400 +/* Cert chain suitable to Suite B */ +# define CERT_PKEY_SUITEB 0x800 +/* Cert pkey valid for raw public key use */ +# define CERT_PKEY_RPK 0x1000 + +# define SSL_CONF_FLAG_CMDLINE 0x1 +# define SSL_CONF_FLAG_FILE 0x2 +# define SSL_CONF_FLAG_CLIENT 0x4 +# define SSL_CONF_FLAG_SERVER 0x8 +# define SSL_CONF_FLAG_SHOW_ERRORS 0x10 +# define SSL_CONF_FLAG_CERTIFICATE 0x20 +# define SSL_CONF_FLAG_REQUIRE_PRIVATE 0x40 +/* Configuration value types */ +# define SSL_CONF_TYPE_UNKNOWN 0x0 +# define SSL_CONF_TYPE_STRING 0x1 +# define SSL_CONF_TYPE_FILE 0x2 +# define SSL_CONF_TYPE_DIR 0x3 +# define SSL_CONF_TYPE_NONE 0x4 +# define SSL_CONF_TYPE_STORE 0x5 + +/* Maximum length of the application-controlled segment of a a TLSv1.3 cookie */ +# define SSL_COOKIE_LENGTH 4096 + +/* + * Note: SSL[_CTX]_set_{options,mode} use |= op on the previous value, they + * cannot be used to clear bits. + */ + +uint64_t SSL_CTX_get_options(const SSL_CTX *ctx); +uint64_t SSL_get_options(const SSL *s); +uint64_t SSL_CTX_clear_options(SSL_CTX *ctx, uint64_t op); +uint64_t SSL_clear_options(SSL *s, uint64_t op); +uint64_t SSL_CTX_set_options(SSL_CTX *ctx, uint64_t op); +uint64_t SSL_set_options(SSL *s, uint64_t op); + +# define SSL_CTX_set_mode(ctx,op) \ + SSL_CTX_ctrl((ctx),SSL_CTRL_MODE,(op),NULL) +# define SSL_CTX_clear_mode(ctx,op) \ + SSL_CTX_ctrl((ctx),SSL_CTRL_CLEAR_MODE,(op),NULL) +# define SSL_CTX_get_mode(ctx) \ + SSL_CTX_ctrl((ctx),SSL_CTRL_MODE,0,NULL) +# define SSL_clear_mode(ssl,op) \ + SSL_ctrl((ssl),SSL_CTRL_CLEAR_MODE,(op),NULL) +# define SSL_set_mode(ssl,op) \ + SSL_ctrl((ssl),SSL_CTRL_MODE,(op),NULL) +# define SSL_get_mode(ssl) \ + SSL_ctrl((ssl),SSL_CTRL_MODE,0,NULL) +# define SSL_set_mtu(ssl, mtu) \ + SSL_ctrl((ssl),SSL_CTRL_SET_MTU,(mtu),NULL) +# define DTLS_set_link_mtu(ssl, mtu) \ + SSL_ctrl((ssl),DTLS_CTRL_SET_LINK_MTU,(mtu),NULL) +# define DTLS_get_link_min_mtu(ssl) \ + SSL_ctrl((ssl),DTLS_CTRL_GET_LINK_MIN_MTU,0,NULL) + +# define SSL_get_secure_renegotiation_support(ssl) \ + SSL_ctrl((ssl), SSL_CTRL_GET_RI_SUPPORT, 0, NULL) + +# define SSL_CTX_set_cert_flags(ctx,op) \ + SSL_CTX_ctrl((ctx),SSL_CTRL_CERT_FLAGS,(op),NULL) +# define SSL_set_cert_flags(s,op) \ + SSL_ctrl((s),SSL_CTRL_CERT_FLAGS,(op),NULL) +# define SSL_CTX_clear_cert_flags(ctx,op) \ + SSL_CTX_ctrl((ctx),SSL_CTRL_CLEAR_CERT_FLAGS,(op),NULL) +# define SSL_clear_cert_flags(s,op) \ + SSL_ctrl((s),SSL_CTRL_CLEAR_CERT_FLAGS,(op),NULL) + +void SSL_CTX_set_msg_callback(SSL_CTX *ctx, + void (*cb) (int write_p, int version, + int content_type, const void *buf, + size_t len, SSL *ssl, void *arg)); +void SSL_set_msg_callback(SSL *ssl, + void (*cb) (int write_p, int version, + int content_type, const void *buf, + size_t len, SSL *ssl, void *arg)); +# define SSL_CTX_set_msg_callback_arg(ctx, arg) SSL_CTX_ctrl((ctx), SSL_CTRL_SET_MSG_CALLBACK_ARG, 0, (arg)) +# define SSL_set_msg_callback_arg(ssl, arg) SSL_ctrl((ssl), SSL_CTRL_SET_MSG_CALLBACK_ARG, 0, (arg)) + +# define SSL_get_extms_support(s) \ + SSL_ctrl((s),SSL_CTRL_GET_EXTMS_SUPPORT,0,NULL) + +# ifndef OPENSSL_NO_SRP +/* see tls_srp.c */ +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 __owur int SSL_SRP_CTX_init(SSL *s); +OSSL_DEPRECATEDIN_3_0 __owur int SSL_CTX_SRP_CTX_init(SSL_CTX *ctx); +OSSL_DEPRECATEDIN_3_0 int SSL_SRP_CTX_free(SSL *ctx); +OSSL_DEPRECATEDIN_3_0 int SSL_CTX_SRP_CTX_free(SSL_CTX *ctx); +OSSL_DEPRECATEDIN_3_0 __owur int SSL_srp_server_param_with_username(SSL *s, + int *ad); +OSSL_DEPRECATEDIN_3_0 __owur int SRP_Calc_A_param(SSL *s); +# endif +# endif + +/* 100k max cert list */ +# define SSL_MAX_CERT_LIST_DEFAULT (1024*100) + +# define SSL_SESSION_CACHE_MAX_SIZE_DEFAULT (1024*20) + +/* + * This callback type is used inside SSL_CTX, SSL, and in the functions that + * set them. It is used to override the generation of SSL/TLS session IDs in + * a server. Return value should be zero on an error, non-zero to proceed. + * Also, callbacks should themselves check if the id they generate is unique + * otherwise the SSL handshake will fail with an error - callbacks can do + * this using the 'ssl' value they're passed by; + * SSL_has_matching_session_id(ssl, id, *id_len) The length value passed in + * is set at the maximum size the session ID can be. In SSLv3/TLSv1 it is 32 + * bytes. The callback can alter this length to be less if desired. It is + * also an error for the callback to set the size to zero. + */ +typedef int (*GEN_SESSION_CB) (SSL *ssl, unsigned char *id, + unsigned int *id_len); + +# define SSL_SESS_CACHE_OFF 0x0000 +# define SSL_SESS_CACHE_CLIENT 0x0001 +# define SSL_SESS_CACHE_SERVER 0x0002 +# define SSL_SESS_CACHE_BOTH (SSL_SESS_CACHE_CLIENT|SSL_SESS_CACHE_SERVER) +# define SSL_SESS_CACHE_NO_AUTO_CLEAR 0x0080 +/* enough comments already ... see SSL_CTX_set_session_cache_mode(3) */ +# define SSL_SESS_CACHE_NO_INTERNAL_LOOKUP 0x0100 +# define SSL_SESS_CACHE_NO_INTERNAL_STORE 0x0200 +# define SSL_SESS_CACHE_NO_INTERNAL \ + (SSL_SESS_CACHE_NO_INTERNAL_LOOKUP|SSL_SESS_CACHE_NO_INTERNAL_STORE) +# define SSL_SESS_CACHE_UPDATE_TIME 0x0400 + +LHASH_OF(SSL_SESSION) *SSL_CTX_sessions(SSL_CTX *ctx); +# define SSL_CTX_sess_number(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_NUMBER,0,NULL) +# define SSL_CTX_sess_connect(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_CONNECT,0,NULL) +# define SSL_CTX_sess_connect_good(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_CONNECT_GOOD,0,NULL) +# define SSL_CTX_sess_connect_renegotiate(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_CONNECT_RENEGOTIATE,0,NULL) +# define SSL_CTX_sess_accept(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_ACCEPT,0,NULL) +# define SSL_CTX_sess_accept_renegotiate(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_ACCEPT_RENEGOTIATE,0,NULL) +# define SSL_CTX_sess_accept_good(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_ACCEPT_GOOD,0,NULL) +# define SSL_CTX_sess_hits(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_HIT,0,NULL) +# define SSL_CTX_sess_cb_hits(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_CB_HIT,0,NULL) +# define SSL_CTX_sess_misses(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_MISSES,0,NULL) +# define SSL_CTX_sess_timeouts(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_TIMEOUTS,0,NULL) +# define SSL_CTX_sess_cache_full(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SESS_CACHE_FULL,0,NULL) + +void SSL_CTX_sess_set_new_cb(SSL_CTX *ctx, + int (*new_session_cb) (struct ssl_st *ssl, + SSL_SESSION *sess)); +int (*SSL_CTX_sess_get_new_cb(SSL_CTX *ctx)) (struct ssl_st *ssl, + SSL_SESSION *sess); +void SSL_CTX_sess_set_remove_cb(SSL_CTX *ctx, + void (*remove_session_cb) (struct ssl_ctx_st + *ctx, + SSL_SESSION *sess)); +void (*SSL_CTX_sess_get_remove_cb(SSL_CTX *ctx)) (struct ssl_ctx_st *ctx, + SSL_SESSION *sess); +void SSL_CTX_sess_set_get_cb(SSL_CTX *ctx, + SSL_SESSION *(*get_session_cb) (struct ssl_st + *ssl, + const unsigned char + *data, int len, + int *copy)); +SSL_SESSION *(*SSL_CTX_sess_get_get_cb(SSL_CTX *ctx)) (struct ssl_st *ssl, + const unsigned char *data, + int len, int *copy); +void SSL_CTX_set_info_callback(SSL_CTX *ctx, + void (*cb) (const SSL *ssl, int type, int val)); +void (*SSL_CTX_get_info_callback(SSL_CTX *ctx)) (const SSL *ssl, int type, + int val); +void SSL_CTX_set_client_cert_cb(SSL_CTX *ctx, + int (*client_cert_cb) (SSL *ssl, X509 **x509, + EVP_PKEY **pkey)); +int (*SSL_CTX_get_client_cert_cb(SSL_CTX *ctx)) (SSL *ssl, X509 **x509, + EVP_PKEY **pkey); +# ifndef OPENSSL_NO_ENGINE +__owur int SSL_CTX_set_client_cert_engine(SSL_CTX *ctx, ENGINE *e); +# endif +void SSL_CTX_set_cookie_generate_cb(SSL_CTX *ctx, + int (*app_gen_cookie_cb) (SSL *ssl, + unsigned char + *cookie, + unsigned int + *cookie_len)); +void SSL_CTX_set_cookie_verify_cb(SSL_CTX *ctx, + int (*app_verify_cookie_cb) (SSL *ssl, + const unsigned + char *cookie, + unsigned int + cookie_len)); + +void SSL_CTX_set_stateless_cookie_generate_cb( + SSL_CTX *ctx, + int (*gen_stateless_cookie_cb) (SSL *ssl, + unsigned char *cookie, + size_t *cookie_len)); +void SSL_CTX_set_stateless_cookie_verify_cb( + SSL_CTX *ctx, + int (*verify_stateless_cookie_cb) (SSL *ssl, + const unsigned char *cookie, + size_t cookie_len)); +# ifndef OPENSSL_NO_NEXTPROTONEG + +typedef int (*SSL_CTX_npn_advertised_cb_func)(SSL *ssl, + const unsigned char **out, + unsigned int *outlen, + void *arg); +void SSL_CTX_set_next_protos_advertised_cb(SSL_CTX *s, + SSL_CTX_npn_advertised_cb_func cb, + void *arg); +# define SSL_CTX_set_npn_advertised_cb SSL_CTX_set_next_protos_advertised_cb + +typedef int (*SSL_CTX_npn_select_cb_func)(SSL *s, + unsigned char **out, + unsigned char *outlen, + const unsigned char *in, + unsigned int inlen, + void *arg); +void SSL_CTX_set_next_proto_select_cb(SSL_CTX *s, + SSL_CTX_npn_select_cb_func cb, + void *arg); +# define SSL_CTX_set_npn_select_cb SSL_CTX_set_next_proto_select_cb + +void SSL_get0_next_proto_negotiated(const SSL *s, const unsigned char **data, + unsigned *len); +# define SSL_get0_npn_negotiated SSL_get0_next_proto_negotiated +# endif + +__owur int SSL_select_next_proto(unsigned char **out, unsigned char *outlen, + const unsigned char *in, unsigned int inlen, + const unsigned char *client, + unsigned int client_len); + +# define OPENSSL_NPN_UNSUPPORTED 0 +# define OPENSSL_NPN_NEGOTIATED 1 +# define OPENSSL_NPN_NO_OVERLAP 2 + +__owur int SSL_CTX_set_alpn_protos(SSL_CTX *ctx, const unsigned char *protos, + unsigned int protos_len); +__owur int SSL_set_alpn_protos(SSL *ssl, const unsigned char *protos, + unsigned int protos_len); +typedef int (*SSL_CTX_alpn_select_cb_func)(SSL *ssl, + const unsigned char **out, + unsigned char *outlen, + const unsigned char *in, + unsigned int inlen, + void *arg); +void SSL_CTX_set_alpn_select_cb(SSL_CTX *ctx, + SSL_CTX_alpn_select_cb_func cb, + void *arg); +void SSL_get0_alpn_selected(const SSL *ssl, const unsigned char **data, + unsigned int *len); + +# ifndef OPENSSL_NO_PSK +/* + * the maximum length of the buffer given to callbacks containing the + * resulting identity/psk + */ +# define PSK_MAX_IDENTITY_LEN 256 +# define PSK_MAX_PSK_LEN 512 +typedef unsigned int (*SSL_psk_client_cb_func)(SSL *ssl, + const char *hint, + char *identity, + unsigned int max_identity_len, + unsigned char *psk, + unsigned int max_psk_len); +void SSL_CTX_set_psk_client_callback(SSL_CTX *ctx, SSL_psk_client_cb_func cb); +void SSL_set_psk_client_callback(SSL *ssl, SSL_psk_client_cb_func cb); + +typedef unsigned int (*SSL_psk_server_cb_func)(SSL *ssl, + const char *identity, + unsigned char *psk, + unsigned int max_psk_len); +void SSL_CTX_set_psk_server_callback(SSL_CTX *ctx, SSL_psk_server_cb_func cb); +void SSL_set_psk_server_callback(SSL *ssl, SSL_psk_server_cb_func cb); + +__owur int SSL_CTX_use_psk_identity_hint(SSL_CTX *ctx, const char *identity_hint); +__owur int SSL_use_psk_identity_hint(SSL *s, const char *identity_hint); +const char *SSL_get_psk_identity_hint(const SSL *s); +const char *SSL_get_psk_identity(const SSL *s); +# endif + +typedef int (*SSL_psk_find_session_cb_func)(SSL *ssl, + const unsigned char *identity, + size_t identity_len, + SSL_SESSION **sess); +typedef int (*SSL_psk_use_session_cb_func)(SSL *ssl, const EVP_MD *md, + const unsigned char **id, + size_t *idlen, + SSL_SESSION **sess); + +void SSL_set_psk_find_session_callback(SSL *s, SSL_psk_find_session_cb_func cb); +void SSL_CTX_set_psk_find_session_callback(SSL_CTX *ctx, + SSL_psk_find_session_cb_func cb); +void SSL_set_psk_use_session_callback(SSL *s, SSL_psk_use_session_cb_func cb); +void SSL_CTX_set_psk_use_session_callback(SSL_CTX *ctx, + SSL_psk_use_session_cb_func cb); + +/* Register callbacks to handle custom TLS Extensions for client or server. */ + +__owur int SSL_CTX_has_client_custom_ext(const SSL_CTX *ctx, + unsigned int ext_type); + +__owur int SSL_CTX_add_client_custom_ext(SSL_CTX *ctx, + unsigned int ext_type, + custom_ext_add_cb add_cb, + custom_ext_free_cb free_cb, + void *add_arg, + custom_ext_parse_cb parse_cb, + void *parse_arg); + +__owur int SSL_CTX_add_server_custom_ext(SSL_CTX *ctx, + unsigned int ext_type, + custom_ext_add_cb add_cb, + custom_ext_free_cb free_cb, + void *add_arg, + custom_ext_parse_cb parse_cb, + void *parse_arg); + +__owur int SSL_CTX_add_custom_ext(SSL_CTX *ctx, unsigned int ext_type, + unsigned int context, + SSL_custom_ext_add_cb_ex add_cb, + SSL_custom_ext_free_cb_ex free_cb, + void *add_arg, + SSL_custom_ext_parse_cb_ex parse_cb, + void *parse_arg); + +__owur int SSL_extension_supported(unsigned int ext_type); + +# define SSL_NOTHING 1 +# define SSL_WRITING 2 +# define SSL_READING 3 +# define SSL_X509_LOOKUP 4 +# define SSL_ASYNC_PAUSED 5 +# define SSL_ASYNC_NO_JOBS 6 +# define SSL_CLIENT_HELLO_CB 7 +# define SSL_RETRY_VERIFY 8 + +/* These will only be used when doing non-blocking IO */ +# define SSL_want_nothing(s) (SSL_want(s) == SSL_NOTHING) +# define SSL_want_read(s) (SSL_want(s) == SSL_READING) +# define SSL_want_write(s) (SSL_want(s) == SSL_WRITING) +# define SSL_want_x509_lookup(s) (SSL_want(s) == SSL_X509_LOOKUP) +# define SSL_want_retry_verify(s) (SSL_want(s) == SSL_RETRY_VERIFY) +# define SSL_want_async(s) (SSL_want(s) == SSL_ASYNC_PAUSED) +# define SSL_want_async_job(s) (SSL_want(s) == SSL_ASYNC_NO_JOBS) +# define SSL_want_client_hello_cb(s) (SSL_want(s) == SSL_CLIENT_HELLO_CB) + +# define SSL_MAC_FLAG_READ_MAC_STREAM 1 +# define SSL_MAC_FLAG_WRITE_MAC_STREAM 2 +# define SSL_MAC_FLAG_READ_MAC_TLSTREE 4 +# define SSL_MAC_FLAG_WRITE_MAC_TLSTREE 8 + +/* + * A callback for logging out TLS key material. This callback should log out + * |line| followed by a newline. + */ +typedef void (*SSL_CTX_keylog_cb_func)(const SSL *ssl, const char *line); + +/* + * SSL_CTX_set_keylog_callback configures a callback to log key material. This + * is intended for debugging use with tools like Wireshark. The cb function + * should log line followed by a newline. + */ +void SSL_CTX_set_keylog_callback(SSL_CTX *ctx, SSL_CTX_keylog_cb_func cb); + +/* + * SSL_CTX_get_keylog_callback returns the callback configured by + * SSL_CTX_set_keylog_callback. + */ +SSL_CTX_keylog_cb_func SSL_CTX_get_keylog_callback(const SSL_CTX *ctx); + +int SSL_CTX_set_max_early_data(SSL_CTX *ctx, uint32_t max_early_data); +uint32_t SSL_CTX_get_max_early_data(const SSL_CTX *ctx); +int SSL_set_max_early_data(SSL *s, uint32_t max_early_data); +uint32_t SSL_get_max_early_data(const SSL *s); +int SSL_CTX_set_recv_max_early_data(SSL_CTX *ctx, uint32_t recv_max_early_data); +uint32_t SSL_CTX_get_recv_max_early_data(const SSL_CTX *ctx); +int SSL_set_recv_max_early_data(SSL *s, uint32_t recv_max_early_data); +uint32_t SSL_get_recv_max_early_data(const SSL *s); + +#ifdef __cplusplus +} +#endif + +# include +# include +# include /* This is mostly sslv3 with a few tweaks */ +# include /* Datagram TLS */ +# include /* Support for the use_srtp extension */ +# include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * These need to be after the above set of includes due to a compiler bug + * in VisualStudio 2015 + */ +SKM_DEFINE_STACK_OF_INTERNAL(SSL_CIPHER, const SSL_CIPHER, SSL_CIPHER) +#define sk_SSL_CIPHER_num(sk) OPENSSL_sk_num(ossl_check_const_SSL_CIPHER_sk_type(sk)) +#define sk_SSL_CIPHER_value(sk, idx) ((const SSL_CIPHER *)OPENSSL_sk_value(ossl_check_const_SSL_CIPHER_sk_type(sk), (idx))) +#define sk_SSL_CIPHER_new(cmp) ((STACK_OF(SSL_CIPHER) *)OPENSSL_sk_new(ossl_check_SSL_CIPHER_compfunc_type(cmp))) +#define sk_SSL_CIPHER_new_null() ((STACK_OF(SSL_CIPHER) *)OPENSSL_sk_new_null()) +#define sk_SSL_CIPHER_new_reserve(cmp, n) ((STACK_OF(SSL_CIPHER) *)OPENSSL_sk_new_reserve(ossl_check_SSL_CIPHER_compfunc_type(cmp), (n))) +#define sk_SSL_CIPHER_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_SSL_CIPHER_sk_type(sk), (n)) +#define sk_SSL_CIPHER_free(sk) OPENSSL_sk_free(ossl_check_SSL_CIPHER_sk_type(sk)) +#define sk_SSL_CIPHER_zero(sk) OPENSSL_sk_zero(ossl_check_SSL_CIPHER_sk_type(sk)) +#define sk_SSL_CIPHER_delete(sk, i) ((const SSL_CIPHER *)OPENSSL_sk_delete(ossl_check_SSL_CIPHER_sk_type(sk), (i))) +#define sk_SSL_CIPHER_delete_ptr(sk, ptr) ((const SSL_CIPHER *)OPENSSL_sk_delete_ptr(ossl_check_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_type(ptr))) +#define sk_SSL_CIPHER_push(sk, ptr) OPENSSL_sk_push(ossl_check_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_type(ptr)) +#define sk_SSL_CIPHER_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_type(ptr)) +#define sk_SSL_CIPHER_pop(sk) ((const SSL_CIPHER *)OPENSSL_sk_pop(ossl_check_SSL_CIPHER_sk_type(sk))) +#define sk_SSL_CIPHER_shift(sk) ((const SSL_CIPHER *)OPENSSL_sk_shift(ossl_check_SSL_CIPHER_sk_type(sk))) +#define sk_SSL_CIPHER_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_SSL_CIPHER_sk_type(sk),ossl_check_SSL_CIPHER_freefunc_type(freefunc)) +#define sk_SSL_CIPHER_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_type(ptr), (idx)) +#define sk_SSL_CIPHER_set(sk, idx, ptr) ((const SSL_CIPHER *)OPENSSL_sk_set(ossl_check_SSL_CIPHER_sk_type(sk), (idx), ossl_check_SSL_CIPHER_type(ptr))) +#define sk_SSL_CIPHER_find(sk, ptr) OPENSSL_sk_find(ossl_check_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_type(ptr)) +#define sk_SSL_CIPHER_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_type(ptr)) +#define sk_SSL_CIPHER_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_type(ptr), pnum) +#define sk_SSL_CIPHER_sort(sk) OPENSSL_sk_sort(ossl_check_SSL_CIPHER_sk_type(sk)) +#define sk_SSL_CIPHER_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_SSL_CIPHER_sk_type(sk)) +#define sk_SSL_CIPHER_dup(sk) ((STACK_OF(SSL_CIPHER) *)OPENSSL_sk_dup(ossl_check_const_SSL_CIPHER_sk_type(sk))) +#define sk_SSL_CIPHER_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(SSL_CIPHER) *)OPENSSL_sk_deep_copy(ossl_check_const_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_copyfunc_type(copyfunc), ossl_check_SSL_CIPHER_freefunc_type(freefunc))) +#define sk_SSL_CIPHER_set_cmp_func(sk, cmp) ((sk_SSL_CIPHER_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_SSL_CIPHER_sk_type(sk), ossl_check_SSL_CIPHER_compfunc_type(cmp))) + + +/* compatibility */ +# define SSL_set_app_data(s,arg) (SSL_set_ex_data(s,0,(char *)(arg))) +# define SSL_get_app_data(s) (SSL_get_ex_data(s,0)) +# define SSL_SESSION_set_app_data(s,a) (SSL_SESSION_set_ex_data(s,0, \ + (char *)(a))) +# define SSL_SESSION_get_app_data(s) (SSL_SESSION_get_ex_data(s,0)) +# define SSL_CTX_get_app_data(ctx) (SSL_CTX_get_ex_data(ctx,0)) +# define SSL_CTX_set_app_data(ctx,arg) (SSL_CTX_set_ex_data(ctx,0, \ + (char *)(arg))) +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 void SSL_set_debug(SSL *s, int debug); +# endif + +/* TLSv1.3 KeyUpdate message types */ +/* -1 used so that this is an invalid value for the on-the-wire protocol */ +#define SSL_KEY_UPDATE_NONE -1 +/* Values as defined for the on-the-wire protocol */ +#define SSL_KEY_UPDATE_NOT_REQUESTED 0 +#define SSL_KEY_UPDATE_REQUESTED 1 + +/* + * The valid handshake states (one for each type message sent and one for each + * type of message received). There are also two "special" states: + * TLS = TLS or DTLS state + * DTLS = DTLS specific state + * CR/SR = Client Read/Server Read + * CW/SW = Client Write/Server Write + * + * The "special" states are: + * TLS_ST_BEFORE = No handshake has been initiated yet + * TLS_ST_OK = A handshake has been successfully completed + */ +typedef enum { + TLS_ST_BEFORE, + TLS_ST_OK, + DTLS_ST_CR_HELLO_VERIFY_REQUEST, + TLS_ST_CR_SRVR_HELLO, + TLS_ST_CR_CERT, + TLS_ST_CR_COMP_CERT, + TLS_ST_CR_CERT_STATUS, + TLS_ST_CR_KEY_EXCH, + TLS_ST_CR_CERT_REQ, + TLS_ST_CR_SRVR_DONE, + TLS_ST_CR_SESSION_TICKET, + TLS_ST_CR_CHANGE, + TLS_ST_CR_FINISHED, + TLS_ST_CW_CLNT_HELLO, + TLS_ST_CW_CERT, + TLS_ST_CW_COMP_CERT, + TLS_ST_CW_KEY_EXCH, + TLS_ST_CW_CERT_VRFY, + TLS_ST_CW_CHANGE, + TLS_ST_CW_NEXT_PROTO, + TLS_ST_CW_FINISHED, + TLS_ST_SW_HELLO_REQ, + TLS_ST_SR_CLNT_HELLO, + DTLS_ST_SW_HELLO_VERIFY_REQUEST, + TLS_ST_SW_SRVR_HELLO, + TLS_ST_SW_CERT, + TLS_ST_SW_COMP_CERT, + TLS_ST_SW_KEY_EXCH, + TLS_ST_SW_CERT_REQ, + TLS_ST_SW_SRVR_DONE, + TLS_ST_SR_CERT, + TLS_ST_SR_COMP_CERT, + TLS_ST_SR_KEY_EXCH, + TLS_ST_SR_CERT_VRFY, + TLS_ST_SR_NEXT_PROTO, + TLS_ST_SR_CHANGE, + TLS_ST_SR_FINISHED, + TLS_ST_SW_SESSION_TICKET, + TLS_ST_SW_CERT_STATUS, + TLS_ST_SW_CHANGE, + TLS_ST_SW_FINISHED, + TLS_ST_SW_ENCRYPTED_EXTENSIONS, + TLS_ST_CR_ENCRYPTED_EXTENSIONS, + TLS_ST_CR_CERT_VRFY, + TLS_ST_SW_CERT_VRFY, + TLS_ST_CR_HELLO_REQ, + TLS_ST_SW_KEY_UPDATE, + TLS_ST_CW_KEY_UPDATE, + TLS_ST_SR_KEY_UPDATE, + TLS_ST_CR_KEY_UPDATE, + TLS_ST_EARLY_DATA, + TLS_ST_PENDING_EARLY_DATA_END, + TLS_ST_CW_END_OF_EARLY_DATA, + TLS_ST_SR_END_OF_EARLY_DATA +} OSSL_HANDSHAKE_STATE; + +/* + * Most of the following state values are no longer used and are defined to be + * the closest equivalent value in the current state machine code. Not all + * defines have an equivalent and are set to a dummy value (-1). SSL_ST_CONNECT + * and SSL_ST_ACCEPT are still in use in the definition of SSL_CB_ACCEPT_LOOP, + * SSL_CB_ACCEPT_EXIT, SSL_CB_CONNECT_LOOP and SSL_CB_CONNECT_EXIT. + */ + +# define SSL_ST_CONNECT 0x1000 +# define SSL_ST_ACCEPT 0x2000 + +# define SSL_ST_MASK 0x0FFF + +# define SSL_CB_LOOP 0x01 +# define SSL_CB_EXIT 0x02 +# define SSL_CB_READ 0x04 +# define SSL_CB_WRITE 0x08 +# define SSL_CB_ALERT 0x4000/* used in callback */ +# define SSL_CB_READ_ALERT (SSL_CB_ALERT|SSL_CB_READ) +# define SSL_CB_WRITE_ALERT (SSL_CB_ALERT|SSL_CB_WRITE) +# define SSL_CB_ACCEPT_LOOP (SSL_ST_ACCEPT|SSL_CB_LOOP) +# define SSL_CB_ACCEPT_EXIT (SSL_ST_ACCEPT|SSL_CB_EXIT) +# define SSL_CB_CONNECT_LOOP (SSL_ST_CONNECT|SSL_CB_LOOP) +# define SSL_CB_CONNECT_EXIT (SSL_ST_CONNECT|SSL_CB_EXIT) +# define SSL_CB_HANDSHAKE_START 0x10 +# define SSL_CB_HANDSHAKE_DONE 0x20 + +/* Is the SSL_connection established? */ +# define SSL_in_connect_init(a) (SSL_in_init(a) && !SSL_is_server(a)) +# define SSL_in_accept_init(a) (SSL_in_init(a) && SSL_is_server(a)) +int SSL_in_init(const SSL *s); +int SSL_in_before(const SSL *s); +int SSL_is_init_finished(const SSL *s); + +/* + * The following 3 states are kept in ssl->rlayer.rstate when reads fail, you + * should not need these + */ +# define SSL_ST_READ_HEADER 0xF0 +# define SSL_ST_READ_BODY 0xF1 +# define SSL_ST_READ_DONE 0xF2 + +/*- + * Obtain latest Finished message + * -- that we sent (SSL_get_finished) + * -- that we expected from peer (SSL_get_peer_finished). + * Returns length (0 == no Finished so far), copies up to 'count' bytes. + */ +size_t SSL_get_finished(const SSL *s, void *buf, size_t count); +size_t SSL_get_peer_finished(const SSL *s, void *buf, size_t count); + +/* + * use either SSL_VERIFY_NONE or SSL_VERIFY_PEER, the last 3 options are + * 'ored' with SSL_VERIFY_PEER if they are desired + */ +# define SSL_VERIFY_NONE 0x00 +# define SSL_VERIFY_PEER 0x01 +# define SSL_VERIFY_FAIL_IF_NO_PEER_CERT 0x02 +# define SSL_VERIFY_CLIENT_ONCE 0x04 +# define SSL_VERIFY_POST_HANDSHAKE 0x08 + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define OpenSSL_add_ssl_algorithms() SSL_library_init() +# define SSLeay_add_ssl_algorithms() SSL_library_init() +# endif + +/* More backward compatibility */ +# define SSL_get_cipher(s) \ + SSL_CIPHER_get_name(SSL_get_current_cipher(s)) +# define SSL_get_cipher_bits(s,np) \ + SSL_CIPHER_get_bits(SSL_get_current_cipher(s),np) +# define SSL_get_cipher_version(s) \ + SSL_CIPHER_get_version(SSL_get_current_cipher(s)) +# define SSL_get_cipher_name(s) \ + SSL_CIPHER_get_name(SSL_get_current_cipher(s)) +# define SSL_get_time(a) SSL_SESSION_get_time(a) +# define SSL_set_time(a,b) SSL_SESSION_set_time((a),(b)) +# define SSL_get_timeout(a) SSL_SESSION_get_timeout(a) +# define SSL_set_timeout(a,b) SSL_SESSION_set_timeout((a),(b)) + +# define d2i_SSL_SESSION_bio(bp,s_id) ASN1_d2i_bio_of(SSL_SESSION,SSL_SESSION_new,d2i_SSL_SESSION,bp,s_id) +# define i2d_SSL_SESSION_bio(bp,s_id) ASN1_i2d_bio_of(SSL_SESSION,i2d_SSL_SESSION,bp,s_id) + +DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) +# define SSL_AD_REASON_OFFSET 1000/* offset to get SSL_R_... value + * from SSL_AD_... */ +/* These alert types are for SSLv3 and TLSv1 */ +# define SSL_AD_CLOSE_NOTIFY SSL3_AD_CLOSE_NOTIFY +/* fatal */ +# define SSL_AD_UNEXPECTED_MESSAGE SSL3_AD_UNEXPECTED_MESSAGE +/* fatal */ +# define SSL_AD_BAD_RECORD_MAC SSL3_AD_BAD_RECORD_MAC +# define SSL_AD_DECRYPTION_FAILED TLS1_AD_DECRYPTION_FAILED +# define SSL_AD_RECORD_OVERFLOW TLS1_AD_RECORD_OVERFLOW +/* fatal */ +# define SSL_AD_DECOMPRESSION_FAILURE SSL3_AD_DECOMPRESSION_FAILURE +/* fatal */ +# define SSL_AD_HANDSHAKE_FAILURE SSL3_AD_HANDSHAKE_FAILURE +/* Not for TLS */ +# define SSL_AD_NO_CERTIFICATE SSL3_AD_NO_CERTIFICATE +# define SSL_AD_BAD_CERTIFICATE SSL3_AD_BAD_CERTIFICATE +# define SSL_AD_UNSUPPORTED_CERTIFICATE SSL3_AD_UNSUPPORTED_CERTIFICATE +# define SSL_AD_CERTIFICATE_REVOKED SSL3_AD_CERTIFICATE_REVOKED +# define SSL_AD_CERTIFICATE_EXPIRED SSL3_AD_CERTIFICATE_EXPIRED +# define SSL_AD_CERTIFICATE_UNKNOWN SSL3_AD_CERTIFICATE_UNKNOWN +/* fatal */ +# define SSL_AD_ILLEGAL_PARAMETER SSL3_AD_ILLEGAL_PARAMETER +/* fatal */ +# define SSL_AD_UNKNOWN_CA TLS1_AD_UNKNOWN_CA +/* fatal */ +# define SSL_AD_ACCESS_DENIED TLS1_AD_ACCESS_DENIED +/* fatal */ +# define SSL_AD_DECODE_ERROR TLS1_AD_DECODE_ERROR +# define SSL_AD_DECRYPT_ERROR TLS1_AD_DECRYPT_ERROR +/* fatal */ +# define SSL_AD_EXPORT_RESTRICTION TLS1_AD_EXPORT_RESTRICTION +/* fatal */ +# define SSL_AD_PROTOCOL_VERSION TLS1_AD_PROTOCOL_VERSION +/* fatal */ +# define SSL_AD_INSUFFICIENT_SECURITY TLS1_AD_INSUFFICIENT_SECURITY +/* fatal */ +# define SSL_AD_INTERNAL_ERROR TLS1_AD_INTERNAL_ERROR +# define SSL_AD_USER_CANCELLED TLS1_AD_USER_CANCELLED +# define SSL_AD_NO_RENEGOTIATION TLS1_AD_NO_RENEGOTIATION +# define SSL_AD_MISSING_EXTENSION TLS13_AD_MISSING_EXTENSION +# define SSL_AD_CERTIFICATE_REQUIRED TLS13_AD_CERTIFICATE_REQUIRED +# define SSL_AD_UNSUPPORTED_EXTENSION TLS1_AD_UNSUPPORTED_EXTENSION +# define SSL_AD_CERTIFICATE_UNOBTAINABLE TLS1_AD_CERTIFICATE_UNOBTAINABLE +# define SSL_AD_UNRECOGNIZED_NAME TLS1_AD_UNRECOGNIZED_NAME +# define SSL_AD_BAD_CERTIFICATE_STATUS_RESPONSE TLS1_AD_BAD_CERTIFICATE_STATUS_RESPONSE +# define SSL_AD_BAD_CERTIFICATE_HASH_VALUE TLS1_AD_BAD_CERTIFICATE_HASH_VALUE +/* fatal */ +# define SSL_AD_UNKNOWN_PSK_IDENTITY TLS1_AD_UNKNOWN_PSK_IDENTITY +/* fatal */ +# define SSL_AD_INAPPROPRIATE_FALLBACK TLS1_AD_INAPPROPRIATE_FALLBACK +# define SSL_AD_NO_APPLICATION_PROTOCOL TLS1_AD_NO_APPLICATION_PROTOCOL +# define SSL_ERROR_NONE 0 +# define SSL_ERROR_SSL 1 +# define SSL_ERROR_WANT_READ 2 +# define SSL_ERROR_WANT_WRITE 3 +# define SSL_ERROR_WANT_X509_LOOKUP 4 +# define SSL_ERROR_SYSCALL 5/* look at error stack/return + * value/errno */ +# define SSL_ERROR_ZERO_RETURN 6 +# define SSL_ERROR_WANT_CONNECT 7 +# define SSL_ERROR_WANT_ACCEPT 8 +# define SSL_ERROR_WANT_ASYNC 9 +# define SSL_ERROR_WANT_ASYNC_JOB 10 +# define SSL_ERROR_WANT_CLIENT_HELLO_CB 11 +# define SSL_ERROR_WANT_RETRY_VERIFY 12 + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define SSL_CTRL_SET_TMP_DH 3 +# define SSL_CTRL_SET_TMP_ECDH 4 +# define SSL_CTRL_SET_TMP_DH_CB 6 +# endif + +# define SSL_CTRL_GET_CLIENT_CERT_REQUEST 9 +# define SSL_CTRL_GET_NUM_RENEGOTIATIONS 10 +# define SSL_CTRL_CLEAR_NUM_RENEGOTIATIONS 11 +# define SSL_CTRL_GET_TOTAL_RENEGOTIATIONS 12 +# define SSL_CTRL_GET_FLAGS 13 +# define SSL_CTRL_EXTRA_CHAIN_CERT 14 +# define SSL_CTRL_SET_MSG_CALLBACK 15 +# define SSL_CTRL_SET_MSG_CALLBACK_ARG 16 +/* only applies to datagram connections */ +# define SSL_CTRL_SET_MTU 17 +/* Stats */ +# define SSL_CTRL_SESS_NUMBER 20 +# define SSL_CTRL_SESS_CONNECT 21 +# define SSL_CTRL_SESS_CONNECT_GOOD 22 +# define SSL_CTRL_SESS_CONNECT_RENEGOTIATE 23 +# define SSL_CTRL_SESS_ACCEPT 24 +# define SSL_CTRL_SESS_ACCEPT_GOOD 25 +# define SSL_CTRL_SESS_ACCEPT_RENEGOTIATE 26 +# define SSL_CTRL_SESS_HIT 27 +# define SSL_CTRL_SESS_CB_HIT 28 +# define SSL_CTRL_SESS_MISSES 29 +# define SSL_CTRL_SESS_TIMEOUTS 30 +# define SSL_CTRL_SESS_CACHE_FULL 31 +# define SSL_CTRL_MODE 33 +# define SSL_CTRL_GET_READ_AHEAD 40 +# define SSL_CTRL_SET_READ_AHEAD 41 +# define SSL_CTRL_SET_SESS_CACHE_SIZE 42 +# define SSL_CTRL_GET_SESS_CACHE_SIZE 43 +# define SSL_CTRL_SET_SESS_CACHE_MODE 44 +# define SSL_CTRL_GET_SESS_CACHE_MODE 45 +# define SSL_CTRL_GET_MAX_CERT_LIST 50 +# define SSL_CTRL_SET_MAX_CERT_LIST 51 +# define SSL_CTRL_SET_MAX_SEND_FRAGMENT 52 +/* see tls1.h for macros based on these */ +# define SSL_CTRL_SET_TLSEXT_SERVERNAME_CB 53 +# define SSL_CTRL_SET_TLSEXT_SERVERNAME_ARG 54 +# define SSL_CTRL_SET_TLSEXT_HOSTNAME 55 +# define SSL_CTRL_SET_TLSEXT_DEBUG_CB 56 +# define SSL_CTRL_SET_TLSEXT_DEBUG_ARG 57 +# define SSL_CTRL_GET_TLSEXT_TICKET_KEYS 58 +# define SSL_CTRL_SET_TLSEXT_TICKET_KEYS 59 +/*# define SSL_CTRL_SET_TLSEXT_OPAQUE_PRF_INPUT 60 */ +/*# define SSL_CTRL_SET_TLSEXT_OPAQUE_PRF_INPUT_CB 61 */ +/*# define SSL_CTRL_SET_TLSEXT_OPAQUE_PRF_INPUT_CB_ARG 62 */ +# define SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB 63 +# define SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB_ARG 64 +# define SSL_CTRL_SET_TLSEXT_STATUS_REQ_TYPE 65 +# define SSL_CTRL_GET_TLSEXT_STATUS_REQ_EXTS 66 +# define SSL_CTRL_SET_TLSEXT_STATUS_REQ_EXTS 67 +# define SSL_CTRL_GET_TLSEXT_STATUS_REQ_IDS 68 +# define SSL_CTRL_SET_TLSEXT_STATUS_REQ_IDS 69 +# define SSL_CTRL_GET_TLSEXT_STATUS_REQ_OCSP_RESP 70 +# define SSL_CTRL_SET_TLSEXT_STATUS_REQ_OCSP_RESP 71 +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB 72 +# endif +# define SSL_CTRL_SET_TLS_EXT_SRP_USERNAME_CB 75 +# define SSL_CTRL_SET_SRP_VERIFY_PARAM_CB 76 +# define SSL_CTRL_SET_SRP_GIVE_CLIENT_PWD_CB 77 +# define SSL_CTRL_SET_SRP_ARG 78 +# define SSL_CTRL_SET_TLS_EXT_SRP_USERNAME 79 +# define SSL_CTRL_SET_TLS_EXT_SRP_STRENGTH 80 +# define SSL_CTRL_SET_TLS_EXT_SRP_PASSWORD 81 +# define DTLS_CTRL_GET_TIMEOUT 73 +# define DTLS_CTRL_HANDLE_TIMEOUT 74 +# define SSL_CTRL_GET_RI_SUPPORT 76 +# define SSL_CTRL_CLEAR_MODE 78 +# define SSL_CTRL_SET_NOT_RESUMABLE_SESS_CB 79 +# define SSL_CTRL_GET_EXTRA_CHAIN_CERTS 82 +# define SSL_CTRL_CLEAR_EXTRA_CHAIN_CERTS 83 +# define SSL_CTRL_CHAIN 88 +# define SSL_CTRL_CHAIN_CERT 89 +# define SSL_CTRL_GET_GROUPS 90 +# define SSL_CTRL_SET_GROUPS 91 +# define SSL_CTRL_SET_GROUPS_LIST 92 +# define SSL_CTRL_GET_SHARED_GROUP 93 +# define SSL_CTRL_SET_SIGALGS 97 +# define SSL_CTRL_SET_SIGALGS_LIST 98 +# define SSL_CTRL_CERT_FLAGS 99 +# define SSL_CTRL_CLEAR_CERT_FLAGS 100 +# define SSL_CTRL_SET_CLIENT_SIGALGS 101 +# define SSL_CTRL_SET_CLIENT_SIGALGS_LIST 102 +# define SSL_CTRL_GET_CLIENT_CERT_TYPES 103 +# define SSL_CTRL_SET_CLIENT_CERT_TYPES 104 +# define SSL_CTRL_BUILD_CERT_CHAIN 105 +# define SSL_CTRL_SET_VERIFY_CERT_STORE 106 +# define SSL_CTRL_SET_CHAIN_CERT_STORE 107 +# define SSL_CTRL_GET_PEER_SIGNATURE_NID 108 +# define SSL_CTRL_GET_PEER_TMP_KEY 109 +# define SSL_CTRL_GET_RAW_CIPHERLIST 110 +# define SSL_CTRL_GET_EC_POINT_FORMATS 111 +# define SSL_CTRL_GET_CHAIN_CERTS 115 +# define SSL_CTRL_SELECT_CURRENT_CERT 116 +# define SSL_CTRL_SET_CURRENT_CERT 117 +# define SSL_CTRL_SET_DH_AUTO 118 +# define DTLS_CTRL_SET_LINK_MTU 120 +# define DTLS_CTRL_GET_LINK_MIN_MTU 121 +# define SSL_CTRL_GET_EXTMS_SUPPORT 122 +# define SSL_CTRL_SET_MIN_PROTO_VERSION 123 +# define SSL_CTRL_SET_MAX_PROTO_VERSION 124 +# define SSL_CTRL_SET_SPLIT_SEND_FRAGMENT 125 +# define SSL_CTRL_SET_MAX_PIPELINES 126 +# define SSL_CTRL_GET_TLSEXT_STATUS_REQ_TYPE 127 +# define SSL_CTRL_GET_TLSEXT_STATUS_REQ_CB 128 +# define SSL_CTRL_GET_TLSEXT_STATUS_REQ_CB_ARG 129 +# define SSL_CTRL_GET_MIN_PROTO_VERSION 130 +# define SSL_CTRL_GET_MAX_PROTO_VERSION 131 +# define SSL_CTRL_GET_SIGNATURE_NID 132 +# define SSL_CTRL_GET_TMP_KEY 133 +# define SSL_CTRL_GET_NEGOTIATED_GROUP 134 +# define SSL_CTRL_GET_IANA_GROUPS 135 +# define SSL_CTRL_SET_RETRY_VERIFY 136 +# define SSL_CTRL_GET_VERIFY_CERT_STORE 137 +# define SSL_CTRL_GET_CHAIN_CERT_STORE 138 +# define SSL_CTRL_GET0_IMPLEMENTED_GROUPS 139 +# define SSL_CTRL_GET_SIGNATURE_NAME 140 +# define SSL_CTRL_GET_PEER_SIGNATURE_NAME 141 +# define SSL_CERT_SET_FIRST 1 +# define SSL_CERT_SET_NEXT 2 +# define SSL_CERT_SET_SERVER 3 +# define DTLSv1_get_timeout(ssl, arg) \ + SSL_ctrl(ssl,DTLS_CTRL_GET_TIMEOUT,0, (void *)(arg)) +# define DTLSv1_handle_timeout(ssl) \ + SSL_ctrl(ssl,DTLS_CTRL_HANDLE_TIMEOUT,0, NULL) +# define SSL_num_renegotiations(ssl) \ + SSL_ctrl((ssl),SSL_CTRL_GET_NUM_RENEGOTIATIONS,0,NULL) +# define SSL_clear_num_renegotiations(ssl) \ + SSL_ctrl((ssl),SSL_CTRL_CLEAR_NUM_RENEGOTIATIONS,0,NULL) +# define SSL_total_renegotiations(ssl) \ + SSL_ctrl((ssl),SSL_CTRL_GET_TOTAL_RENEGOTIATIONS,0,NULL) +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define SSL_CTX_set_tmp_dh(ctx,dh) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_TMP_DH,0,(char *)(dh)) +# endif +# define SSL_CTX_set_dh_auto(ctx, onoff) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_DH_AUTO,onoff,NULL) +# define SSL_set_dh_auto(s, onoff) \ + SSL_ctrl(s,SSL_CTRL_SET_DH_AUTO,onoff,NULL) +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define SSL_set_tmp_dh(ssl,dh) \ + SSL_ctrl(ssl,SSL_CTRL_SET_TMP_DH,0,(char *)(dh)) +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define SSL_CTX_set_tmp_ecdh(ctx,ecdh) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_TMP_ECDH,0,(char *)(ecdh)) +# define SSL_set_tmp_ecdh(ssl,ecdh) \ + SSL_ctrl(ssl,SSL_CTRL_SET_TMP_ECDH,0,(char *)(ecdh)) +# endif +# define SSL_CTX_add_extra_chain_cert(ctx,x509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_EXTRA_CHAIN_CERT,0,(char *)(x509)) +# define SSL_CTX_get_extra_chain_certs(ctx,px509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_EXTRA_CHAIN_CERTS,0,px509) +# define SSL_CTX_get_extra_chain_certs_only(ctx,px509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_EXTRA_CHAIN_CERTS,1,px509) +# define SSL_CTX_clear_extra_chain_certs(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_CLEAR_EXTRA_CHAIN_CERTS,0,NULL) +# define SSL_CTX_set0_chain(ctx,sk) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_CHAIN,0,(char *)(sk)) +# define SSL_CTX_set1_chain(ctx,sk) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_CHAIN,1,(char *)(sk)) +# define SSL_CTX_add0_chain_cert(ctx,x509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_CHAIN_CERT,0,(char *)(x509)) +# define SSL_CTX_add1_chain_cert(ctx,x509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_CHAIN_CERT,1,(char *)(x509)) +# define SSL_CTX_get0_chain_certs(ctx,px509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_CHAIN_CERTS,0,px509) +# define SSL_CTX_clear_chain_certs(ctx) \ + SSL_CTX_set0_chain(ctx,NULL) +# define SSL_CTX_build_cert_chain(ctx, flags) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_BUILD_CERT_CHAIN, flags, NULL) +# define SSL_CTX_select_current_cert(ctx,x509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SELECT_CURRENT_CERT,0,(char *)(x509)) +# define SSL_CTX_set_current_cert(ctx, op) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CURRENT_CERT, op, NULL) +# define SSL_CTX_set0_verify_cert_store(ctx,st) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_VERIFY_CERT_STORE,0,(char *)(st)) +# define SSL_CTX_set1_verify_cert_store(ctx,st) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_VERIFY_CERT_STORE,1,(char *)(st)) +# define SSL_CTX_get0_verify_cert_store(ctx,st) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_VERIFY_CERT_STORE,0,(char *)(st)) +# define SSL_CTX_set0_chain_cert_store(ctx,st) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CHAIN_CERT_STORE,0,(char *)(st)) +# define SSL_CTX_set1_chain_cert_store(ctx,st) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CHAIN_CERT_STORE,1,(char *)(st)) +# define SSL_CTX_get0_chain_cert_store(ctx,st) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_CHAIN_CERT_STORE,0,(char *)(st)) +# define SSL_set0_chain(s,sk) \ + SSL_ctrl(s,SSL_CTRL_CHAIN,0,(char *)(sk)) +# define SSL_set1_chain(s,sk) \ + SSL_ctrl(s,SSL_CTRL_CHAIN,1,(char *)(sk)) +# define SSL_add0_chain_cert(s,x509) \ + SSL_ctrl(s,SSL_CTRL_CHAIN_CERT,0,(char *)(x509)) +# define SSL_add1_chain_cert(s,x509) \ + SSL_ctrl(s,SSL_CTRL_CHAIN_CERT,1,(char *)(x509)) +# define SSL_get0_chain_certs(s,px509) \ + SSL_ctrl(s,SSL_CTRL_GET_CHAIN_CERTS,0,px509) +# define SSL_clear_chain_certs(s) \ + SSL_set0_chain(s,NULL) +# define SSL_build_cert_chain(s, flags) \ + SSL_ctrl(s,SSL_CTRL_BUILD_CERT_CHAIN, flags, NULL) +# define SSL_select_current_cert(s,x509) \ + SSL_ctrl(s,SSL_CTRL_SELECT_CURRENT_CERT,0,(char *)(x509)) +# define SSL_set_current_cert(s,op) \ + SSL_ctrl(s,SSL_CTRL_SET_CURRENT_CERT, op, NULL) +# define SSL_set0_verify_cert_store(s,st) \ + SSL_ctrl(s,SSL_CTRL_SET_VERIFY_CERT_STORE,0,(char *)(st)) +# define SSL_set1_verify_cert_store(s,st) \ + SSL_ctrl(s,SSL_CTRL_SET_VERIFY_CERT_STORE,1,(char *)(st)) +#define SSL_get0_verify_cert_store(s,st) \ + SSL_ctrl(s,SSL_CTRL_GET_VERIFY_CERT_STORE,0,(char *)(st)) +# define SSL_set0_chain_cert_store(s,st) \ + SSL_ctrl(s,SSL_CTRL_SET_CHAIN_CERT_STORE,0,(char *)(st)) +# define SSL_set1_chain_cert_store(s,st) \ + SSL_ctrl(s,SSL_CTRL_SET_CHAIN_CERT_STORE,1,(char *)(st)) +#define SSL_get0_chain_cert_store(s,st) \ + SSL_ctrl(s,SSL_CTRL_GET_CHAIN_CERT_STORE,0,(char *)(st)) + +# define SSL_get1_groups(s, glist) \ + SSL_ctrl(s,SSL_CTRL_GET_GROUPS,0,(int*)(glist)) +# define SSL_get0_iana_groups(s, plst) \ + SSL_ctrl(s,SSL_CTRL_GET_IANA_GROUPS,0,(uint16_t **)(plst)) +# define SSL_CTX_set1_groups(ctx, glist, glistlen) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_GROUPS,glistlen,(int *)(glist)) +# define SSL_CTX_set1_groups_list(ctx, s) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_GROUPS_LIST,0,(char *)(s)) +# define SSL_CTX_get0_implemented_groups(ctx, all, out) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET0_IMPLEMENTED_GROUPS, all, \ + (STACK_OF(OPENSSL_CSTRING) *)(out)) +# define SSL_set1_groups(s, glist, glistlen) \ + SSL_ctrl(s,SSL_CTRL_SET_GROUPS,glistlen,(char *)(glist)) +# define SSL_set1_groups_list(s, str) \ + SSL_ctrl(s,SSL_CTRL_SET_GROUPS_LIST,0,(char *)(str)) +# define SSL_get_shared_group(s, n) \ + SSL_ctrl(s,SSL_CTRL_GET_SHARED_GROUP,n,NULL) +# define SSL_get_negotiated_group(s) \ + SSL_ctrl(s,SSL_CTRL_GET_NEGOTIATED_GROUP,0,NULL) +# define SSL_CTX_set1_sigalgs(ctx, slist, slistlen) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_SIGALGS,slistlen,(int *)(slist)) +# define SSL_CTX_set1_sigalgs_list(ctx, s) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_SIGALGS_LIST,0,(char *)(s)) +# define SSL_set1_sigalgs(s, slist, slistlen) \ + SSL_ctrl(s,SSL_CTRL_SET_SIGALGS,slistlen,(int *)(slist)) +# define SSL_set1_sigalgs_list(s, str) \ + SSL_ctrl(s,SSL_CTRL_SET_SIGALGS_LIST,0,(char *)(str)) +# define SSL_CTX_set1_client_sigalgs(ctx, slist, slistlen) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CLIENT_SIGALGS,slistlen,(int *)(slist)) +# define SSL_CTX_set1_client_sigalgs_list(ctx, s) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CLIENT_SIGALGS_LIST,0,(char *)(s)) +# define SSL_set1_client_sigalgs(s, slist, slistlen) \ + SSL_ctrl(s,SSL_CTRL_SET_CLIENT_SIGALGS,slistlen,(int *)(slist)) +# define SSL_set1_client_sigalgs_list(s, str) \ + SSL_ctrl(s,SSL_CTRL_SET_CLIENT_SIGALGS_LIST,0,(char *)(str)) +# define SSL_get0_certificate_types(s, clist) \ + SSL_ctrl(s, SSL_CTRL_GET_CLIENT_CERT_TYPES, 0, (char *)(clist)) +# define SSL_CTX_set1_client_certificate_types(ctx, clist, clistlen) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CLIENT_CERT_TYPES,clistlen, \ + (char *)(clist)) +# define SSL_set1_client_certificate_types(s, clist, clistlen) \ + SSL_ctrl(s,SSL_CTRL_SET_CLIENT_CERT_TYPES,clistlen,(char *)(clist)) +# define SSL_get0_signature_name(s, str) \ + SSL_ctrl(s,SSL_CTRL_GET_SIGNATURE_NAME,0,(1?(str):(const char **)NULL)) +# define SSL_get_signature_nid(s, pn) \ + SSL_ctrl(s,SSL_CTRL_GET_SIGNATURE_NID,0,pn) +# define SSL_get0_peer_signature_name(s, str) \ + SSL_ctrl(s,SSL_CTRL_GET_PEER_SIGNATURE_NAME,0,(1?(str):(const char **)NULL)) +# define SSL_get_peer_signature_nid(s, pn) \ + SSL_ctrl(s,SSL_CTRL_GET_PEER_SIGNATURE_NID,0,pn) +# define SSL_get_peer_tmp_key(s, pk) \ + SSL_ctrl(s,SSL_CTRL_GET_PEER_TMP_KEY,0,pk) +# define SSL_get_tmp_key(s, pk) \ + SSL_ctrl(s,SSL_CTRL_GET_TMP_KEY,0,pk) +# define SSL_get0_raw_cipherlist(s, plst) \ + SSL_ctrl(s,SSL_CTRL_GET_RAW_CIPHERLIST,0,plst) +# define SSL_get0_ec_point_formats(s, plst) \ + SSL_ctrl(s,SSL_CTRL_GET_EC_POINT_FORMATS,0,plst) +# define SSL_CTX_set_min_proto_version(ctx, version) \ + SSL_CTX_ctrl(ctx, SSL_CTRL_SET_MIN_PROTO_VERSION, version, NULL) +# define SSL_CTX_set_max_proto_version(ctx, version) \ + SSL_CTX_ctrl(ctx, SSL_CTRL_SET_MAX_PROTO_VERSION, version, NULL) +# define SSL_CTX_get_min_proto_version(ctx) \ + SSL_CTX_ctrl(ctx, SSL_CTRL_GET_MIN_PROTO_VERSION, 0, NULL) +# define SSL_CTX_get_max_proto_version(ctx) \ + SSL_CTX_ctrl(ctx, SSL_CTRL_GET_MAX_PROTO_VERSION, 0, NULL) +# define SSL_set_min_proto_version(s, version) \ + SSL_ctrl(s, SSL_CTRL_SET_MIN_PROTO_VERSION, version, NULL) +# define SSL_set_max_proto_version(s, version) \ + SSL_ctrl(s, SSL_CTRL_SET_MAX_PROTO_VERSION, version, NULL) +# define SSL_get_min_proto_version(s) \ + SSL_ctrl(s, SSL_CTRL_GET_MIN_PROTO_VERSION, 0, NULL) +# define SSL_get_max_proto_version(s) \ + SSL_ctrl(s, SSL_CTRL_GET_MAX_PROTO_VERSION, 0, NULL) + +const char *SSL_get0_group_name(SSL *s); +const char *SSL_group_to_name(SSL *s, int id); + +/* Backwards compatibility, original 1.1.0 names */ +# define SSL_CTRL_GET_SERVER_TMP_KEY \ + SSL_CTRL_GET_PEER_TMP_KEY +# define SSL_get_server_tmp_key(s, pk) \ + SSL_get_peer_tmp_key(s, pk) + +int SSL_set0_tmp_dh_pkey(SSL *s, EVP_PKEY *dhpkey); +int SSL_CTX_set0_tmp_dh_pkey(SSL_CTX *ctx, EVP_PKEY *dhpkey); + +/* + * The following symbol names are old and obsolete. They are kept + * for compatibility reasons only and should not be used anymore. + */ +# define SSL_CTRL_GET_CURVES SSL_CTRL_GET_GROUPS +# define SSL_CTRL_SET_CURVES SSL_CTRL_SET_GROUPS +# define SSL_CTRL_SET_CURVES_LIST SSL_CTRL_SET_GROUPS_LIST +# define SSL_CTRL_GET_SHARED_CURVE SSL_CTRL_GET_SHARED_GROUP + +# define SSL_get1_curves SSL_get1_groups +# define SSL_CTX_set1_curves SSL_CTX_set1_groups +# define SSL_CTX_set1_curves_list SSL_CTX_set1_groups_list +# define SSL_set1_curves SSL_set1_groups +# define SSL_set1_curves_list SSL_set1_groups_list +# define SSL_get_shared_curve SSL_get_shared_group + + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +/* Provide some compatibility macros for removed functionality. */ +# define SSL_CTX_need_tmp_RSA(ctx) 0 +# define SSL_CTX_set_tmp_rsa(ctx,rsa) 1 +# define SSL_need_tmp_RSA(ssl) 0 +# define SSL_set_tmp_rsa(ssl,rsa) 1 +# define SSL_CTX_set_ecdh_auto(dummy, onoff) ((onoff) != 0) +# define SSL_set_ecdh_auto(dummy, onoff) ((onoff) != 0) +/* + * We "pretend" to call the callback to avoid warnings about unused static + * functions. + */ +# define SSL_CTX_set_tmp_rsa_callback(ctx, cb) while(0) (cb)(NULL, 0, 0) +# define SSL_set_tmp_rsa_callback(ssl, cb) while(0) (cb)(NULL, 0, 0) +# endif +__owur const BIO_METHOD *BIO_f_ssl(void); +__owur BIO *BIO_new_ssl(SSL_CTX *ctx, int client); +__owur BIO *BIO_new_ssl_connect(SSL_CTX *ctx); +__owur BIO *BIO_new_buffer_ssl_connect(SSL_CTX *ctx); +__owur int BIO_ssl_copy_session_id(BIO *to, BIO *from); +void BIO_ssl_shutdown(BIO *ssl_bio); + +__owur int SSL_CTX_set_cipher_list(SSL_CTX *, const char *str); +__owur SSL_CTX *SSL_CTX_new(const SSL_METHOD *meth); +__owur SSL_CTX *SSL_CTX_new_ex(OSSL_LIB_CTX *libctx, const char *propq, + const SSL_METHOD *meth); +int SSL_CTX_up_ref(SSL_CTX *ctx); +void SSL_CTX_free(SSL_CTX *); +__owur long SSL_CTX_set_timeout(SSL_CTX *ctx, long t); +__owur long SSL_CTX_get_timeout(const SSL_CTX *ctx); +__owur X509_STORE *SSL_CTX_get_cert_store(const SSL_CTX *); +void SSL_CTX_set_cert_store(SSL_CTX *, X509_STORE *); +void SSL_CTX_set1_cert_store(SSL_CTX *, X509_STORE *); +__owur int SSL_want(const SSL *s); +__owur int SSL_clear(SSL *s); + +#ifndef OPENSSL_NO_DEPRECATED_3_4 +OSSL_DEPRECATEDIN_3_4_FOR("not Y2038-safe, replace with SSL_CTX_flush_sessions_ex()") +void SSL_CTX_flush_sessions(SSL_CTX *ctx, long tm); +#endif +void SSL_CTX_flush_sessions_ex(SSL_CTX *ctx, time_t tm); + +__owur const SSL_CIPHER *SSL_get_current_cipher(const SSL *s); +__owur const SSL_CIPHER *SSL_get_pending_cipher(const SSL *s); +__owur int SSL_CIPHER_get_bits(const SSL_CIPHER *c, int *alg_bits); +__owur const char *SSL_CIPHER_get_version(const SSL_CIPHER *c); +__owur const char *SSL_CIPHER_get_name(const SSL_CIPHER *c); +__owur const char *SSL_CIPHER_standard_name(const SSL_CIPHER *c); +__owur const char *OPENSSL_cipher_name(const char *rfc_name); +__owur uint32_t SSL_CIPHER_get_id(const SSL_CIPHER *c); +__owur uint16_t SSL_CIPHER_get_protocol_id(const SSL_CIPHER *c); +__owur int SSL_CIPHER_get_kx_nid(const SSL_CIPHER *c); +__owur int SSL_CIPHER_get_auth_nid(const SSL_CIPHER *c); +__owur const EVP_MD *SSL_CIPHER_get_handshake_digest(const SSL_CIPHER *c); +__owur int SSL_CIPHER_is_aead(const SSL_CIPHER *c); + +__owur int SSL_get_fd(const SSL *s); +__owur int SSL_get_rfd(const SSL *s); +__owur int SSL_get_wfd(const SSL *s); +__owur const char *SSL_get_cipher_list(const SSL *s, int n); +__owur char *SSL_get_shared_ciphers(const SSL *s, char *buf, int size); +__owur int SSL_get_read_ahead(const SSL *s); +__owur int SSL_pending(const SSL *s); +__owur int SSL_has_pending(const SSL *s); +# ifndef OPENSSL_NO_SOCK +__owur int SSL_set_fd(SSL *s, int fd); +__owur int SSL_set_rfd(SSL *s, int fd); +__owur int SSL_set_wfd(SSL *s, int fd); +# endif +void SSL_set0_rbio(SSL *s, BIO *rbio); +void SSL_set0_wbio(SSL *s, BIO *wbio); +void SSL_set_bio(SSL *s, BIO *rbio, BIO *wbio); +__owur BIO *SSL_get_rbio(const SSL *s); +__owur BIO *SSL_get_wbio(const SSL *s); +__owur int SSL_set_cipher_list(SSL *s, const char *str); +__owur int SSL_CTX_set_ciphersuites(SSL_CTX *ctx, const char *str); +__owur int SSL_set_ciphersuites(SSL *s, const char *str); +void SSL_set_read_ahead(SSL *s, int yes); +__owur int SSL_get_verify_mode(const SSL *s); +__owur int SSL_get_verify_depth(const SSL *s); +__owur SSL_verify_cb SSL_get_verify_callback(const SSL *s); +void SSL_set_verify(SSL *s, int mode, SSL_verify_cb callback); +void SSL_set_verify_depth(SSL *s, int depth); +void SSL_set_cert_cb(SSL *s, int (*cb) (SSL *ssl, void *arg), void *arg); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 __owur int SSL_use_RSAPrivateKey(SSL *ssl, RSA *rsa); +OSSL_DEPRECATEDIN_3_0 +__owur int SSL_use_RSAPrivateKey_ASN1(SSL *ssl, + const unsigned char *d, long len); +# endif +__owur int SSL_use_PrivateKey(SSL *ssl, EVP_PKEY *pkey); +__owur int SSL_use_PrivateKey_ASN1(int pk, SSL *ssl, const unsigned char *d, + long len); +__owur int SSL_use_certificate(SSL *ssl, X509 *x); +__owur int SSL_use_certificate_ASN1(SSL *ssl, const unsigned char *d, int len); +__owur int SSL_use_cert_and_key(SSL *ssl, X509 *x509, EVP_PKEY *privatekey, + STACK_OF(X509) *chain, int override); + + +/* serverinfo file format versions */ +# define SSL_SERVERINFOV1 1 +# define SSL_SERVERINFOV2 2 + +/* Set serverinfo data for the current active cert. */ +__owur int SSL_CTX_use_serverinfo(SSL_CTX *ctx, const unsigned char *serverinfo, + size_t serverinfo_length); +__owur int SSL_CTX_use_serverinfo_ex(SSL_CTX *ctx, unsigned int version, + const unsigned char *serverinfo, + size_t serverinfo_length); +__owur int SSL_CTX_use_serverinfo_file(SSL_CTX *ctx, const char *file); + +#ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 +__owur int SSL_use_RSAPrivateKey_file(SSL *ssl, const char *file, int type); +#endif + +__owur int SSL_use_PrivateKey_file(SSL *ssl, const char *file, int type); +__owur int SSL_use_certificate_file(SSL *ssl, const char *file, int type); + +#ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 +__owur int SSL_CTX_use_RSAPrivateKey_file(SSL_CTX *ctx, const char *file, + int type); +#endif +__owur int SSL_CTX_use_PrivateKey_file(SSL_CTX *ctx, const char *file, + int type); +__owur int SSL_CTX_use_certificate_file(SSL_CTX *ctx, const char *file, + int type); +/* PEM type */ +__owur int SSL_CTX_use_certificate_chain_file(SSL_CTX *ctx, const char *file); +__owur int SSL_use_certificate_chain_file(SSL *ssl, const char *file); +__owur STACK_OF(X509_NAME) *SSL_load_client_CA_file(const char *file); +__owur STACK_OF(X509_NAME) +*SSL_load_client_CA_file_ex(const char *file, OSSL_LIB_CTX *libctx, + const char *propq); +__owur int SSL_add_file_cert_subjects_to_stack(STACK_OF(X509_NAME) *stackCAs, + const char *file); +int SSL_add_dir_cert_subjects_to_stack(STACK_OF(X509_NAME) *stackCAs, + const char *dir); +int SSL_add_store_cert_subjects_to_stack(STACK_OF(X509_NAME) *stackCAs, + const char *uri); + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define SSL_load_error_strings() \ + OPENSSL_init_ssl(OPENSSL_INIT_LOAD_SSL_STRINGS \ + | OPENSSL_INIT_LOAD_CRYPTO_STRINGS, NULL) +# endif + +__owur const char *SSL_state_string(const SSL *s); +__owur const char *SSL_rstate_string(const SSL *s); +__owur const char *SSL_state_string_long(const SSL *s); +__owur const char *SSL_rstate_string_long(const SSL *s); + +#ifndef OPENSSL_NO_DEPRECATED_3_4 +OSSL_DEPRECATEDIN_3_4_FOR("not Y2038-safe, replace with SSL_SESSION_get_time_ex()") +__owur long SSL_SESSION_get_time(const SSL_SESSION *s); +OSSL_DEPRECATEDIN_3_4_FOR("not Y2038-safe, replace with SSL_SESSION_set_time_ex()") +__owur long SSL_SESSION_set_time(SSL_SESSION *s, long t); +#endif +__owur long SSL_SESSION_get_timeout(const SSL_SESSION *s); +__owur long SSL_SESSION_set_timeout(SSL_SESSION *s, long t); +__owur int SSL_SESSION_get_protocol_version(const SSL_SESSION *s); +__owur int SSL_SESSION_set_protocol_version(SSL_SESSION *s, int version); + +__owur time_t SSL_SESSION_get_time_ex(const SSL_SESSION *s); +__owur time_t SSL_SESSION_set_time_ex(SSL_SESSION *s, time_t t); + +__owur const char *SSL_SESSION_get0_hostname(const SSL_SESSION *s); +__owur int SSL_SESSION_set1_hostname(SSL_SESSION *s, const char *hostname); +void SSL_SESSION_get0_alpn_selected(const SSL_SESSION *s, + const unsigned char **alpn, + size_t *len); +__owur int SSL_SESSION_set1_alpn_selected(SSL_SESSION *s, + const unsigned char *alpn, + size_t len); +__owur const SSL_CIPHER *SSL_SESSION_get0_cipher(const SSL_SESSION *s); +__owur int SSL_SESSION_set_cipher(SSL_SESSION *s, const SSL_CIPHER *cipher); +__owur int SSL_SESSION_has_ticket(const SSL_SESSION *s); +__owur unsigned long SSL_SESSION_get_ticket_lifetime_hint(const SSL_SESSION *s); +void SSL_SESSION_get0_ticket(const SSL_SESSION *s, const unsigned char **tick, + size_t *len); +__owur uint32_t SSL_SESSION_get_max_early_data(const SSL_SESSION *s); +__owur int SSL_SESSION_set_max_early_data(SSL_SESSION *s, + uint32_t max_early_data); +__owur int SSL_copy_session_id(SSL *to, const SSL *from); +__owur X509 *SSL_SESSION_get0_peer(SSL_SESSION *s); +__owur int SSL_SESSION_set1_id_context(SSL_SESSION *s, + const unsigned char *sid_ctx, + unsigned int sid_ctx_len); +__owur int SSL_SESSION_set1_id(SSL_SESSION *s, const unsigned char *sid, + unsigned int sid_len); +__owur int SSL_SESSION_is_resumable(const SSL_SESSION *s); + +__owur SSL_SESSION *SSL_SESSION_new(void); +__owur SSL_SESSION *SSL_SESSION_dup(const SSL_SESSION *src); +const unsigned char *SSL_SESSION_get_id(const SSL_SESSION *s, + unsigned int *len); +const unsigned char *SSL_SESSION_get0_id_context(const SSL_SESSION *s, + unsigned int *len); +__owur unsigned int SSL_SESSION_get_compress_id(const SSL_SESSION *s); +# ifndef OPENSSL_NO_STDIO +int SSL_SESSION_print_fp(FILE *fp, const SSL_SESSION *ses); +# endif +int SSL_SESSION_print(BIO *fp, const SSL_SESSION *ses); +int SSL_SESSION_print_keylog(BIO *bp, const SSL_SESSION *x); +int SSL_SESSION_up_ref(SSL_SESSION *ses); +void SSL_SESSION_free(SSL_SESSION *ses); +__owur int i2d_SSL_SESSION(const SSL_SESSION *in, unsigned char **pp); +__owur int SSL_set_session(SSL *to, SSL_SESSION *session); +int SSL_CTX_add_session(SSL_CTX *ctx, SSL_SESSION *session); +int SSL_CTX_remove_session(SSL_CTX *ctx, SSL_SESSION *session); +__owur int SSL_CTX_set_generate_session_id(SSL_CTX *ctx, GEN_SESSION_CB cb); +__owur int SSL_set_generate_session_id(SSL *s, GEN_SESSION_CB cb); +__owur int SSL_has_matching_session_id(const SSL *s, + const unsigned char *id, + unsigned int id_len); +SSL_SESSION *d2i_SSL_SESSION(SSL_SESSION **a, const unsigned char **pp, + long length); +SSL_SESSION *d2i_SSL_SESSION_ex(SSL_SESSION **a, const unsigned char **pp, + long length, OSSL_LIB_CTX *libctx, + const char *propq); + +# ifdef OPENSSL_X509_H +__owur X509 *SSL_get0_peer_certificate(const SSL *s); +__owur X509 *SSL_get1_peer_certificate(const SSL *s); +/* Deprecated in 3.0.0 */ +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define SSL_get_peer_certificate SSL_get1_peer_certificate +# endif +# endif + +__owur STACK_OF(X509) *SSL_get_peer_cert_chain(const SSL *s); + +__owur int SSL_CTX_get_verify_mode(const SSL_CTX *ctx); +__owur int SSL_CTX_get_verify_depth(const SSL_CTX *ctx); +__owur SSL_verify_cb SSL_CTX_get_verify_callback(const SSL_CTX *ctx); +void SSL_CTX_set_verify(SSL_CTX *ctx, int mode, SSL_verify_cb callback); +void SSL_CTX_set_verify_depth(SSL_CTX *ctx, int depth); +void SSL_CTX_set_cert_verify_callback(SSL_CTX *ctx, + int (*cb) (X509_STORE_CTX *, void *), + void *arg); +void SSL_CTX_set_cert_cb(SSL_CTX *c, int (*cb) (SSL *ssl, void *arg), + void *arg); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 +__owur int SSL_CTX_use_RSAPrivateKey(SSL_CTX *ctx, RSA *rsa); +OSSL_DEPRECATEDIN_3_0 +__owur int SSL_CTX_use_RSAPrivateKey_ASN1(SSL_CTX *ctx, const unsigned char *d, + long len); +# endif +__owur int SSL_CTX_use_PrivateKey(SSL_CTX *ctx, EVP_PKEY *pkey); +__owur int SSL_CTX_use_PrivateKey_ASN1(int pk, SSL_CTX *ctx, + const unsigned char *d, long len); +__owur int SSL_CTX_use_certificate(SSL_CTX *ctx, X509 *x); +__owur int SSL_CTX_use_certificate_ASN1(SSL_CTX *ctx, int len, + const unsigned char *d); +__owur int SSL_CTX_use_cert_and_key(SSL_CTX *ctx, X509 *x509, EVP_PKEY *privatekey, + STACK_OF(X509) *chain, int override); + +void SSL_CTX_set_default_passwd_cb(SSL_CTX *ctx, pem_password_cb *cb); +void SSL_CTX_set_default_passwd_cb_userdata(SSL_CTX *ctx, void *u); +pem_password_cb *SSL_CTX_get_default_passwd_cb(SSL_CTX *ctx); +void *SSL_CTX_get_default_passwd_cb_userdata(SSL_CTX *ctx); +void SSL_set_default_passwd_cb(SSL *s, pem_password_cb *cb); +void SSL_set_default_passwd_cb_userdata(SSL *s, void *u); +pem_password_cb *SSL_get_default_passwd_cb(SSL *s); +void *SSL_get_default_passwd_cb_userdata(SSL *s); + +__owur int SSL_CTX_check_private_key(const SSL_CTX *ctx); +__owur int SSL_check_private_key(const SSL *ctx); + +__owur int SSL_CTX_set_session_id_context(SSL_CTX *ctx, + const unsigned char *sid_ctx, + unsigned int sid_ctx_len); + +SSL *SSL_new(SSL_CTX *ctx); +int SSL_up_ref(SSL *s); +int SSL_is_dtls(const SSL *s); +int SSL_is_tls(const SSL *s); +int SSL_is_quic(const SSL *s); +__owur int SSL_set_session_id_context(SSL *ssl, const unsigned char *sid_ctx, + unsigned int sid_ctx_len); + +__owur int SSL_CTX_set_purpose(SSL_CTX *ctx, int purpose); +__owur int SSL_set_purpose(SSL *ssl, int purpose); +__owur int SSL_CTX_set_trust(SSL_CTX *ctx, int trust); +__owur int SSL_set_trust(SSL *ssl, int trust); + +__owur int SSL_set1_host(SSL *s, const char *hostname); +__owur int SSL_add1_host(SSL *s, const char *hostname); +__owur const char *SSL_get0_peername(SSL *s); +void SSL_set_hostflags(SSL *s, unsigned int flags); + +__owur int SSL_CTX_dane_enable(SSL_CTX *ctx); +__owur int SSL_CTX_dane_mtype_set(SSL_CTX *ctx, const EVP_MD *md, + uint8_t mtype, uint8_t ord); +__owur int SSL_dane_enable(SSL *s, const char *basedomain); +__owur int SSL_dane_tlsa_add(SSL *s, uint8_t usage, uint8_t selector, + uint8_t mtype, const unsigned char *data, size_t dlen); +__owur int SSL_get0_dane_authority(SSL *s, X509 **mcert, EVP_PKEY **mspki); +__owur int SSL_get0_dane_tlsa(SSL *s, uint8_t *usage, uint8_t *selector, + uint8_t *mtype, const unsigned char **data, + size_t *dlen); +/* + * Bridge opacity barrier between libcrypt and libssl, also needed to support + * offline testing in test/danetest.c + */ +SSL_DANE *SSL_get0_dane(SSL *ssl); +/* + * DANE flags + */ +unsigned long SSL_CTX_dane_set_flags(SSL_CTX *ctx, unsigned long flags); +unsigned long SSL_CTX_dane_clear_flags(SSL_CTX *ctx, unsigned long flags); +unsigned long SSL_dane_set_flags(SSL *ssl, unsigned long flags); +unsigned long SSL_dane_clear_flags(SSL *ssl, unsigned long flags); + +__owur int SSL_CTX_set1_param(SSL_CTX *ctx, X509_VERIFY_PARAM *vpm); +__owur int SSL_set1_param(SSL *ssl, X509_VERIFY_PARAM *vpm); + +__owur X509_VERIFY_PARAM *SSL_CTX_get0_param(SSL_CTX *ctx); +__owur X509_VERIFY_PARAM *SSL_get0_param(SSL *ssl); + +# ifndef OPENSSL_NO_SRP +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 int SSL_CTX_set_srp_username(SSL_CTX *ctx, char *name); +OSSL_DEPRECATEDIN_3_0 int SSL_CTX_set_srp_password(SSL_CTX *ctx, char *password); +OSSL_DEPRECATEDIN_3_0 int SSL_CTX_set_srp_strength(SSL_CTX *ctx, int strength); +OSSL_DEPRECATEDIN_3_0 +int SSL_CTX_set_srp_client_pwd_callback(SSL_CTX *ctx, + char *(*cb) (SSL *, void *)); +OSSL_DEPRECATEDIN_3_0 +int SSL_CTX_set_srp_verify_param_callback(SSL_CTX *ctx, + int (*cb) (SSL *, void *)); +OSSL_DEPRECATEDIN_3_0 +int SSL_CTX_set_srp_username_callback(SSL_CTX *ctx, + int (*cb) (SSL *, int *, void *)); +OSSL_DEPRECATEDIN_3_0 int SSL_CTX_set_srp_cb_arg(SSL_CTX *ctx, void *arg); + +OSSL_DEPRECATEDIN_3_0 +int SSL_set_srp_server_param(SSL *s, const BIGNUM *N, const BIGNUM *g, + BIGNUM *sa, BIGNUM *v, char *info); +OSSL_DEPRECATEDIN_3_0 +int SSL_set_srp_server_param_pw(SSL *s, const char *user, const char *pass, + const char *grp); + +OSSL_DEPRECATEDIN_3_0 __owur BIGNUM *SSL_get_srp_g(SSL *s); +OSSL_DEPRECATEDIN_3_0 __owur BIGNUM *SSL_get_srp_N(SSL *s); + +OSSL_DEPRECATEDIN_3_0 __owur char *SSL_get_srp_username(SSL *s); +OSSL_DEPRECATEDIN_3_0 __owur char *SSL_get_srp_userinfo(SSL *s); +# endif +# endif + +/* + * ClientHello callback and helpers. + */ + +# define SSL_CLIENT_HELLO_SUCCESS 1 +# define SSL_CLIENT_HELLO_ERROR 0 +# define SSL_CLIENT_HELLO_RETRY (-1) + +typedef int (*SSL_client_hello_cb_fn) (SSL *s, int *al, void *arg); +void SSL_CTX_set_client_hello_cb(SSL_CTX *c, SSL_client_hello_cb_fn cb, + void *arg); +typedef int (*SSL_new_pending_conn_cb_fn) (SSL_CTX *ctx, SSL *new_ssl, + void *arg); +void SSL_CTX_set_new_pending_conn_cb(SSL_CTX *c, SSL_new_pending_conn_cb_fn cb, + void *arg); + +int SSL_client_hello_isv2(SSL *s); +unsigned int SSL_client_hello_get0_legacy_version(SSL *s); +size_t SSL_client_hello_get0_random(SSL *s, const unsigned char **out); +size_t SSL_client_hello_get0_session_id(SSL *s, const unsigned char **out); +size_t SSL_client_hello_get0_ciphers(SSL *s, const unsigned char **out); +size_t SSL_client_hello_get0_compression_methods(SSL *s, + const unsigned char **out); +int SSL_client_hello_get1_extensions_present(SSL *s, int **out, size_t *outlen); +int SSL_client_hello_get_extension_order(SSL *s, uint16_t *exts, + size_t *num_exts); +int SSL_client_hello_get0_ext(SSL *s, unsigned int type, + const unsigned char **out, size_t *outlen); + +void SSL_certs_clear(SSL *s); +void SSL_free(SSL *ssl); +# ifdef OSSL_ASYNC_FD +/* + * Windows application developer has to include windows.h to use these. + */ +__owur int SSL_waiting_for_async(SSL *s); +__owur int SSL_get_all_async_fds(SSL *s, OSSL_ASYNC_FD *fds, size_t *numfds); +__owur int SSL_get_changed_async_fds(SSL *s, OSSL_ASYNC_FD *addfd, + size_t *numaddfds, OSSL_ASYNC_FD *delfd, + size_t *numdelfds); +__owur int SSL_CTX_set_async_callback(SSL_CTX *ctx, SSL_async_callback_fn callback); +__owur int SSL_CTX_set_async_callback_arg(SSL_CTX *ctx, void *arg); +__owur int SSL_set_async_callback(SSL *s, SSL_async_callback_fn callback); +__owur int SSL_set_async_callback_arg(SSL *s, void *arg); +__owur int SSL_get_async_status(SSL *s, int *status); + +# endif +__owur int SSL_accept(SSL *ssl); +__owur int SSL_stateless(SSL *s); +__owur int SSL_connect(SSL *ssl); +__owur int SSL_read(SSL *ssl, void *buf, int num); +__owur int SSL_read_ex(SSL *ssl, void *buf, size_t num, size_t *readbytes); + +# define SSL_READ_EARLY_DATA_ERROR 0 +# define SSL_READ_EARLY_DATA_SUCCESS 1 +# define SSL_READ_EARLY_DATA_FINISH 2 + +__owur int SSL_read_early_data(SSL *s, void *buf, size_t num, + size_t *readbytes); +__owur int SSL_peek(SSL *ssl, void *buf, int num); +__owur int SSL_peek_ex(SSL *ssl, void *buf, size_t num, size_t *readbytes); +__owur ossl_ssize_t SSL_sendfile(SSL *s, int fd, off_t offset, size_t size, + int flags); +__owur int SSL_write(SSL *ssl, const void *buf, int num); +__owur int SSL_write_ex(SSL *s, const void *buf, size_t num, size_t *written); +__owur int SSL_write_early_data(SSL *s, const void *buf, size_t num, + size_t *written); +long SSL_ctrl(SSL *ssl, int cmd, long larg, void *parg); +long SSL_callback_ctrl(SSL *, int, void (*)(void)); +long SSL_CTX_ctrl(SSL_CTX *ctx, int cmd, long larg, void *parg); +long SSL_CTX_callback_ctrl(SSL_CTX *, int, void (*)(void)); + +# define SSL_WRITE_FLAG_CONCLUDE (1U << 0) + +__owur int SSL_write_ex2(SSL *s, const void *buf, size_t num, + uint64_t flags, + size_t *written); + +# define SSL_EARLY_DATA_NOT_SENT 0 +# define SSL_EARLY_DATA_REJECTED 1 +# define SSL_EARLY_DATA_ACCEPTED 2 + +__owur int SSL_get_early_data_status(const SSL *s); + +__owur int SSL_get_error(const SSL *s, int ret_code); +__owur const char *SSL_get_version(const SSL *s); +__owur int SSL_get_handshake_rtt(const SSL *s, uint64_t *rtt); + +/* This sets the 'default' SSL version that SSL_new() will create */ +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 +__owur int SSL_CTX_set_ssl_version(SSL_CTX *ctx, const SSL_METHOD *meth); +# endif + +# ifndef OPENSSL_NO_SSL3_METHOD +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *SSLv3_method(void); /* SSLv3 */ +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *SSLv3_server_method(void); +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *SSLv3_client_method(void); +# endif +# endif + +#define SSLv23_method TLS_method +#define SSLv23_server_method TLS_server_method +#define SSLv23_client_method TLS_client_method + +/* Negotiate highest available SSL/TLS version */ +__owur const SSL_METHOD *TLS_method(void); +__owur const SSL_METHOD *TLS_server_method(void); +__owur const SSL_METHOD *TLS_client_method(void); + +# ifndef OPENSSL_NO_TLS1_METHOD +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_method(void); /* TLSv1.0 */ +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_server_method(void); +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_client_method(void); +# endif +# endif + +# ifndef OPENSSL_NO_TLS1_1_METHOD +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_1_method(void); /* TLSv1.1 */ +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_1_server_method(void); +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_1_client_method(void); +# endif +# endif + +# ifndef OPENSSL_NO_TLS1_2_METHOD +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_2_method(void); /* TLSv1.2 */ +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_2_server_method(void); +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *TLSv1_2_client_method(void); +# endif +# endif + +# ifndef OPENSSL_NO_DTLS1_METHOD +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *DTLSv1_method(void); /* DTLSv1.0 */ +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *DTLSv1_server_method(void); +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *DTLSv1_client_method(void); +# endif +# endif + +# ifndef OPENSSL_NO_DTLS1_2_METHOD +/* DTLSv1.2 */ +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *DTLSv1_2_method(void); +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *DTLSv1_2_server_method(void); +OSSL_DEPRECATEDIN_1_1_0 __owur const SSL_METHOD *DTLSv1_2_client_method(void); +# endif +# endif + +__owur const SSL_METHOD *DTLS_method(void); /* DTLS 1.0 and 1.2 */ +__owur const SSL_METHOD *DTLS_server_method(void); /* DTLS 1.0 and 1.2 */ +__owur const SSL_METHOD *DTLS_client_method(void); /* DTLS 1.0 and 1.2 */ + +__owur size_t DTLS_get_data_mtu(const SSL *s); + +__owur STACK_OF(SSL_CIPHER) *SSL_get_ciphers(const SSL *s); +__owur STACK_OF(SSL_CIPHER) *SSL_CTX_get_ciphers(const SSL_CTX *ctx); +__owur STACK_OF(SSL_CIPHER) *SSL_get_client_ciphers(const SSL *s); +__owur STACK_OF(SSL_CIPHER) *SSL_get1_supported_ciphers(SSL *s); + +__owur int SSL_do_handshake(SSL *s); +int SSL_key_update(SSL *s, int updatetype); +int SSL_get_key_update_type(const SSL *s); +int SSL_renegotiate(SSL *s); +int SSL_renegotiate_abbreviated(SSL *s); +__owur int SSL_renegotiate_pending(const SSL *s); +int SSL_new_session_ticket(SSL *s); +int SSL_shutdown(SSL *s); +__owur int SSL_verify_client_post_handshake(SSL *s); +void SSL_CTX_set_post_handshake_auth(SSL_CTX *ctx, int val); +void SSL_set_post_handshake_auth(SSL *s, int val); + +__owur const SSL_METHOD *SSL_CTX_get_ssl_method(const SSL_CTX *ctx); +__owur const SSL_METHOD *SSL_get_ssl_method(const SSL *s); +__owur int SSL_set_ssl_method(SSL *s, const SSL_METHOD *method); +__owur const char *SSL_alert_type_string_long(int value); +__owur const char *SSL_alert_type_string(int value); +__owur const char *SSL_alert_desc_string_long(int value); +__owur const char *SSL_alert_desc_string(int value); + +void SSL_set0_CA_list(SSL *s, STACK_OF(X509_NAME) *name_list); +void SSL_CTX_set0_CA_list(SSL_CTX *ctx, STACK_OF(X509_NAME) *name_list); +__owur const STACK_OF(X509_NAME) *SSL_get0_CA_list(const SSL *s); +__owur const STACK_OF(X509_NAME) *SSL_CTX_get0_CA_list(const SSL_CTX *ctx); +__owur int SSL_add1_to_CA_list(SSL *ssl, const X509 *x); +__owur int SSL_CTX_add1_to_CA_list(SSL_CTX *ctx, const X509 *x); +__owur const STACK_OF(X509_NAME) *SSL_get0_peer_CA_list(const SSL *s); + +void SSL_set_client_CA_list(SSL *s, STACK_OF(X509_NAME) *name_list); +void SSL_CTX_set_client_CA_list(SSL_CTX *ctx, STACK_OF(X509_NAME) *name_list); +__owur STACK_OF(X509_NAME) *SSL_get_client_CA_list(const SSL *s); +__owur STACK_OF(X509_NAME) *SSL_CTX_get_client_CA_list(const SSL_CTX *s); +__owur int SSL_add_client_CA(SSL *ssl, X509 *x); +__owur int SSL_CTX_add_client_CA(SSL_CTX *ctx, X509 *x); + +void SSL_set_connect_state(SSL *s); +void SSL_set_accept_state(SSL *s); + +__owur long SSL_get_default_timeout(const SSL *s); + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define SSL_library_init() OPENSSL_init_ssl(0, NULL) +# endif + +__owur char *SSL_CIPHER_description(const SSL_CIPHER *, char *buf, int size); +__owur STACK_OF(X509_NAME) *SSL_dup_CA_list(const STACK_OF(X509_NAME) *sk); + +__owur SSL *SSL_dup(SSL *ssl); + +__owur X509 *SSL_get_certificate(const SSL *ssl); +/* + * EVP_PKEY + */ +struct evp_pkey_st *SSL_get_privatekey(const SSL *ssl); + +__owur X509 *SSL_CTX_get0_certificate(const SSL_CTX *ctx); +__owur EVP_PKEY *SSL_CTX_get0_privatekey(const SSL_CTX *ctx); + +void SSL_CTX_set_quiet_shutdown(SSL_CTX *ctx, int mode); +__owur int SSL_CTX_get_quiet_shutdown(const SSL_CTX *ctx); +void SSL_set_quiet_shutdown(SSL *ssl, int mode); +__owur int SSL_get_quiet_shutdown(const SSL *ssl); +void SSL_set_shutdown(SSL *ssl, int mode); +__owur int SSL_get_shutdown(const SSL *ssl); +__owur int SSL_version(const SSL *ssl); +__owur int SSL_client_version(const SSL *s); +__owur int SSL_CTX_set_default_verify_paths(SSL_CTX *ctx); +__owur int SSL_CTX_set_default_verify_dir(SSL_CTX *ctx); +__owur int SSL_CTX_set_default_verify_file(SSL_CTX *ctx); +__owur int SSL_CTX_set_default_verify_store(SSL_CTX *ctx); +__owur int SSL_CTX_load_verify_file(SSL_CTX *ctx, const char *CAfile); +__owur int SSL_CTX_load_verify_dir(SSL_CTX *ctx, const char *CApath); +__owur int SSL_CTX_load_verify_store(SSL_CTX *ctx, const char *CAstore); +__owur int SSL_CTX_load_verify_locations(SSL_CTX *ctx, + const char *CAfile, + const char *CApath); +# define SSL_get0_session SSL_get_session/* just peek at pointer */ +__owur SSL_SESSION *SSL_get_session(const SSL *ssl); +__owur SSL_SESSION *SSL_get1_session(SSL *ssl); /* obtain a reference count */ +__owur SSL_CTX *SSL_get_SSL_CTX(const SSL *ssl); +SSL_CTX *SSL_set_SSL_CTX(SSL *ssl, SSL_CTX *ctx); +void SSL_set_info_callback(SSL *ssl, + void (*cb) (const SSL *ssl, int type, int val)); +void (*SSL_get_info_callback(const SSL *ssl)) (const SSL *ssl, int type, + int val); +__owur OSSL_HANDSHAKE_STATE SSL_get_state(const SSL *ssl); + +void SSL_set_verify_result(SSL *ssl, long v); +__owur long SSL_get_verify_result(const SSL *ssl); +__owur STACK_OF(X509) *SSL_get0_verified_chain(const SSL *s); + +__owur size_t SSL_get_client_random(const SSL *ssl, unsigned char *out, + size_t outlen); +__owur size_t SSL_get_server_random(const SSL *ssl, unsigned char *out, + size_t outlen); +__owur size_t SSL_SESSION_get_master_key(const SSL_SESSION *sess, + unsigned char *out, size_t outlen); +__owur int SSL_SESSION_set1_master_key(SSL_SESSION *sess, + const unsigned char *in, size_t len); +uint8_t SSL_SESSION_get_max_fragment_length(const SSL_SESSION *sess); + +#define SSL_get_ex_new_index(l, p, newf, dupf, freef) \ + CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_SSL, l, p, newf, dupf, freef) +__owur int SSL_set_ex_data(SSL *ssl, int idx, void *data); +void *SSL_get_ex_data(const SSL *ssl, int idx); +#define SSL_SESSION_get_ex_new_index(l, p, newf, dupf, freef) \ + CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_SSL_SESSION, l, p, newf, dupf, freef) +__owur int SSL_SESSION_set_ex_data(SSL_SESSION *ss, int idx, void *data); +void *SSL_SESSION_get_ex_data(const SSL_SESSION *ss, int idx); +#define SSL_CTX_get_ex_new_index(l, p, newf, dupf, freef) \ + CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_SSL_CTX, l, p, newf, dupf, freef) +__owur int SSL_CTX_set_ex_data(SSL_CTX *ssl, int idx, void *data); +void *SSL_CTX_get_ex_data(const SSL_CTX *ssl, int idx); + +__owur int SSL_get_ex_data_X509_STORE_CTX_idx(void); + +# define SSL_CTX_sess_set_cache_size(ctx,t) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_SESS_CACHE_SIZE,t,NULL) +# define SSL_CTX_sess_get_cache_size(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_SESS_CACHE_SIZE,0,NULL) +# define SSL_CTX_set_session_cache_mode(ctx,m) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_SESS_CACHE_MODE,m,NULL) +# define SSL_CTX_get_session_cache_mode(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_SESS_CACHE_MODE,0,NULL) + +# define SSL_CTX_get_default_read_ahead(ctx) SSL_CTX_get_read_ahead(ctx) +# define SSL_CTX_set_default_read_ahead(ctx,m) SSL_CTX_set_read_ahead(ctx,m) +# define SSL_CTX_get_read_ahead(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_READ_AHEAD,0,NULL) +# define SSL_CTX_set_read_ahead(ctx,m) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_READ_AHEAD,m,NULL) +# define SSL_CTX_get_max_cert_list(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_MAX_CERT_LIST,0,NULL) +# define SSL_CTX_set_max_cert_list(ctx,m) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_MAX_CERT_LIST,m,NULL) +# define SSL_get_max_cert_list(ssl) \ + SSL_ctrl(ssl,SSL_CTRL_GET_MAX_CERT_LIST,0,NULL) +# define SSL_set_max_cert_list(ssl,m) \ + SSL_ctrl(ssl,SSL_CTRL_SET_MAX_CERT_LIST,m,NULL) + +# define SSL_CTX_set_max_send_fragment(ctx,m) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_MAX_SEND_FRAGMENT,m,NULL) +# define SSL_set_max_send_fragment(ssl,m) \ + SSL_ctrl(ssl,SSL_CTRL_SET_MAX_SEND_FRAGMENT,m,NULL) +# define SSL_CTX_set_split_send_fragment(ctx,m) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_SPLIT_SEND_FRAGMENT,m,NULL) +# define SSL_set_split_send_fragment(ssl,m) \ + SSL_ctrl(ssl,SSL_CTRL_SET_SPLIT_SEND_FRAGMENT,m,NULL) +# define SSL_CTX_set_max_pipelines(ctx,m) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_MAX_PIPELINES,m,NULL) +# define SSL_set_max_pipelines(ssl,m) \ + SSL_ctrl(ssl,SSL_CTRL_SET_MAX_PIPELINES,m,NULL) +# define SSL_set_retry_verify(ssl) \ + (SSL_ctrl(ssl,SSL_CTRL_SET_RETRY_VERIFY,0,NULL) > 0) + +void SSL_CTX_set_default_read_buffer_len(SSL_CTX *ctx, size_t len); +void SSL_set_default_read_buffer_len(SSL *s, size_t len); + +# ifndef OPENSSL_NO_DH +# ifndef OPENSSL_NO_DEPRECATED_3_0 +/* NB: the |keylength| is only applicable when is_export is true */ +OSSL_DEPRECATEDIN_3_0 +void SSL_CTX_set_tmp_dh_callback(SSL_CTX *ctx, + DH *(*dh) (SSL *ssl, int is_export, + int keylength)); +OSSL_DEPRECATEDIN_3_0 +void SSL_set_tmp_dh_callback(SSL *ssl, + DH *(*dh) (SSL *ssl, int is_export, + int keylength)); +# endif +# endif + +__owur const COMP_METHOD *SSL_get_current_compression(const SSL *s); +__owur const COMP_METHOD *SSL_get_current_expansion(const SSL *s); +__owur const char *SSL_COMP_get_name(const COMP_METHOD *comp); +__owur const char *SSL_COMP_get0_name(const SSL_COMP *comp); +__owur int SSL_COMP_get_id(const SSL_COMP *comp); +STACK_OF(SSL_COMP) *SSL_COMP_get_compression_methods(void); +__owur STACK_OF(SSL_COMP) *SSL_COMP_set0_compression_methods(STACK_OF(SSL_COMP) + *meths); +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define SSL_COMP_free_compression_methods() while(0) continue +# endif +__owur int SSL_COMP_add_compression_method(int id, COMP_METHOD *cm); + +const SSL_CIPHER *SSL_CIPHER_find(SSL *ssl, const unsigned char *ptr); +int SSL_CIPHER_get_cipher_nid(const SSL_CIPHER *c); +int SSL_CIPHER_get_digest_nid(const SSL_CIPHER *c); +int SSL_bytes_to_cipher_list(SSL *s, const unsigned char *bytes, size_t len, + int isv2format, STACK_OF(SSL_CIPHER) **sk, + STACK_OF(SSL_CIPHER) **scsvs); + +/* TLS extensions functions */ +__owur int SSL_set_session_ticket_ext(SSL *s, void *ext_data, int ext_len); + +__owur int SSL_set_session_ticket_ext_cb(SSL *s, + tls_session_ticket_ext_cb_fn cb, + void *arg); + +/* Pre-shared secret session resumption functions */ +__owur int SSL_set_session_secret_cb(SSL *s, + tls_session_secret_cb_fn session_secret_cb, + void *arg); + +void SSL_CTX_set_not_resumable_session_callback(SSL_CTX *ctx, + int (*cb) (SSL *ssl, + int + is_forward_secure)); + +void SSL_set_not_resumable_session_callback(SSL *ssl, + int (*cb) (SSL *ssl, + int is_forward_secure)); + +void SSL_CTX_set_record_padding_callback(SSL_CTX *ctx, + size_t (*cb) (SSL *ssl, int type, + size_t len, void *arg)); +void SSL_CTX_set_record_padding_callback_arg(SSL_CTX *ctx, void *arg); +void *SSL_CTX_get_record_padding_callback_arg(const SSL_CTX *ctx); +int SSL_CTX_set_block_padding(SSL_CTX *ctx, size_t block_size); +int SSL_CTX_set_block_padding_ex(SSL_CTX *ctx, size_t app_block_size, + size_t hs_block_size); + +int SSL_set_record_padding_callback(SSL *ssl, + size_t (*cb) (SSL *ssl, int type, + size_t len, void *arg)); +void SSL_set_record_padding_callback_arg(SSL *ssl, void *arg); +void *SSL_get_record_padding_callback_arg(const SSL *ssl); +int SSL_set_block_padding(SSL *ssl, size_t block_size); +int SSL_set_block_padding_ex(SSL *ssl, size_t app_block_size, + size_t hs_block_size); +int SSL_set_num_tickets(SSL *s, size_t num_tickets); +size_t SSL_get_num_tickets(const SSL *s); +int SSL_CTX_set_num_tickets(SSL_CTX *ctx, size_t num_tickets); +size_t SSL_CTX_get_num_tickets(const SSL_CTX *ctx); + +/* QUIC support */ +int SSL_handle_events(SSL *s); +__owur int SSL_get_event_timeout(SSL *s, struct timeval *tv, int *is_infinite); +__owur int SSL_get_rpoll_descriptor(SSL *s, BIO_POLL_DESCRIPTOR *desc); +__owur int SSL_get_wpoll_descriptor(SSL *s, BIO_POLL_DESCRIPTOR *desc); +__owur int SSL_net_read_desired(SSL *s); +__owur int SSL_net_write_desired(SSL *s); +__owur int SSL_set_blocking_mode(SSL *s, int blocking); +__owur int SSL_get_blocking_mode(SSL *s); +__owur int SSL_set1_initial_peer_addr(SSL *s, const BIO_ADDR *peer_addr); +__owur SSL *SSL_get0_connection(SSL *s); +__owur int SSL_is_connection(SSL *s); + +__owur int SSL_is_listener(SSL *ssl); +__owur SSL *SSL_get0_listener(SSL *s); +#define SSL_LISTENER_FLAG_NO_VALIDATE (1UL << 1) +__owur SSL *SSL_new_listener(SSL_CTX *ctx, uint64_t flags); +__owur SSL *SSL_new_listener_from(SSL *ssl, uint64_t flags); +__owur SSL *SSL_new_from_listener(SSL *ssl, uint64_t flags); +#define SSL_ACCEPT_CONNECTION_NO_BLOCK (1UL << 0) +__owur SSL *SSL_accept_connection(SSL *ssl, uint64_t flags); +__owur size_t SSL_get_accept_connection_queue_len(SSL *ssl); +__owur int SSL_listen(SSL *ssl); + +__owur int SSL_is_domain(SSL *s); +__owur SSL *SSL_get0_domain(SSL *s); +__owur SSL *SSL_new_domain(SSL_CTX *ctx, uint64_t flags); + +#define SSL_DOMAIN_FLAG_SINGLE_THREAD (1U << 0) +#define SSL_DOMAIN_FLAG_MULTI_THREAD (1U << 1) +#define SSL_DOMAIN_FLAG_THREAD_ASSISTED (1U << 2) +#define SSL_DOMAIN_FLAG_BLOCKING (1U << 3) +#define SSL_DOMAIN_FLAG_LEGACY_BLOCKING (1U << 4) + +__owur int SSL_CTX_set_domain_flags(SSL_CTX *ctx, uint64_t domain_flags); +__owur int SSL_CTX_get_domain_flags(const SSL_CTX *ctx, uint64_t *domain_flags); +__owur int SSL_get_domain_flags(const SSL *ssl, uint64_t *domain_flags); + +#define SSL_STREAM_TYPE_NONE 0 +#define SSL_STREAM_TYPE_READ (1U << 0) +#define SSL_STREAM_TYPE_WRITE (1U << 1) +#define SSL_STREAM_TYPE_BIDI (SSL_STREAM_TYPE_READ | SSL_STREAM_TYPE_WRITE) +__owur int SSL_get_stream_type(SSL *s); + +__owur uint64_t SSL_get_stream_id(SSL *s); +__owur int SSL_is_stream_local(SSL *s); + +#define SSL_DEFAULT_STREAM_MODE_NONE 0 +#define SSL_DEFAULT_STREAM_MODE_AUTO_BIDI 1 +#define SSL_DEFAULT_STREAM_MODE_AUTO_UNI 2 +__owur int SSL_set_default_stream_mode(SSL *s, uint32_t mode); + +#define SSL_STREAM_FLAG_UNI (1U << 0) +#define SSL_STREAM_FLAG_NO_BLOCK (1U << 1) +#define SSL_STREAM_FLAG_ADVANCE (1U << 2) +__owur SSL *SSL_new_stream(SSL *s, uint64_t flags); + +#define SSL_INCOMING_STREAM_POLICY_AUTO 0 +#define SSL_INCOMING_STREAM_POLICY_ACCEPT 1 +#define SSL_INCOMING_STREAM_POLICY_REJECT 2 +__owur int SSL_set_incoming_stream_policy(SSL *s, int policy, uint64_t aec); + +#define SSL_ACCEPT_STREAM_NO_BLOCK (1U << 0) +__owur SSL *SSL_accept_stream(SSL *s, uint64_t flags); +__owur size_t SSL_get_accept_stream_queue_len(SSL *s); + +# ifndef OPENSSL_NO_QUIC +__owur int SSL_inject_net_dgram(SSL *s, const unsigned char *buf, + size_t buf_len, + const BIO_ADDR *peer, + const BIO_ADDR *local); +# endif + +typedef struct ssl_shutdown_ex_args_st { + uint64_t quic_error_code; + const char *quic_reason; +} SSL_SHUTDOWN_EX_ARGS; + +#define SSL_SHUTDOWN_FLAG_RAPID (1U << 0) +#define SSL_SHUTDOWN_FLAG_NO_STREAM_FLUSH (1U << 1) +#define SSL_SHUTDOWN_FLAG_NO_BLOCK (1U << 2) +#define SSL_SHUTDOWN_FLAG_WAIT_PEER (1U << 3) + +__owur int SSL_shutdown_ex(SSL *ssl, uint64_t flags, + const SSL_SHUTDOWN_EX_ARGS *args, + size_t args_len); + +__owur int SSL_stream_conclude(SSL *ssl, uint64_t flags); + +typedef struct ssl_stream_reset_args_st { + uint64_t quic_error_code; +} SSL_STREAM_RESET_ARGS; + +__owur int SSL_stream_reset(SSL *ssl, + const SSL_STREAM_RESET_ARGS *args, + size_t args_len); + +#define SSL_STREAM_STATE_NONE 0 +#define SSL_STREAM_STATE_OK 1 +#define SSL_STREAM_STATE_WRONG_DIR 2 +#define SSL_STREAM_STATE_FINISHED 3 +#define SSL_STREAM_STATE_RESET_LOCAL 4 +#define SSL_STREAM_STATE_RESET_REMOTE 5 +#define SSL_STREAM_STATE_CONN_CLOSED 6 +__owur int SSL_get_stream_read_state(SSL *ssl); +__owur int SSL_get_stream_write_state(SSL *ssl); + +__owur int SSL_get_stream_read_error_code(SSL *ssl, uint64_t *app_error_code); +__owur int SSL_get_stream_write_error_code(SSL *ssl, uint64_t *app_error_code); + +#define SSL_CONN_CLOSE_FLAG_LOCAL (1U << 0) +#define SSL_CONN_CLOSE_FLAG_TRANSPORT (1U << 1) + +typedef struct ssl_conn_close_info_st { + uint64_t error_code, frame_type; + const char *reason; + size_t reason_len; + uint32_t flags; +} SSL_CONN_CLOSE_INFO; + +__owur int SSL_get_conn_close_info(SSL *ssl, + SSL_CONN_CLOSE_INFO *info, + size_t info_len); + +# define SSL_VALUE_CLASS_GENERIC 0 +# define SSL_VALUE_CLASS_FEATURE_REQUEST 1 +# define SSL_VALUE_CLASS_FEATURE_PEER_REQUEST 2 +# define SSL_VALUE_CLASS_FEATURE_NEGOTIATED 3 + +# define SSL_VALUE_NONE 0 +# define SSL_VALUE_QUIC_STREAM_BIDI_LOCAL_AVAIL 1 +# define SSL_VALUE_QUIC_STREAM_BIDI_REMOTE_AVAIL 2 +# define SSL_VALUE_QUIC_STREAM_UNI_LOCAL_AVAIL 3 +# define SSL_VALUE_QUIC_STREAM_UNI_REMOTE_AVAIL 4 +# define SSL_VALUE_QUIC_IDLE_TIMEOUT 5 +# define SSL_VALUE_EVENT_HANDLING_MODE 6 +# define SSL_VALUE_STREAM_WRITE_BUF_SIZE 7 +# define SSL_VALUE_STREAM_WRITE_BUF_USED 8 +# define SSL_VALUE_STREAM_WRITE_BUF_AVAIL 9 + +# define SSL_VALUE_EVENT_HANDLING_MODE_INHERIT 0 +# define SSL_VALUE_EVENT_HANDLING_MODE_IMPLICIT 1 +# define SSL_VALUE_EVENT_HANDLING_MODE_EXPLICIT 2 + +int SSL_get_value_uint(SSL *s, uint32_t class_, uint32_t id, uint64_t *v); +int SSL_set_value_uint(SSL *s, uint32_t class_, uint32_t id, uint64_t v); + +# define SSL_get_generic_value_uint(ssl, id, v) \ + SSL_get_value_uint((ssl), SSL_VALUE_CLASS_GENERIC, (id), (v)) +# define SSL_set_generic_value_uint(ssl, id, v) \ + SSL_set_value_uint((ssl), SSL_VALUE_CLASS_GENERIC, (id), (v)) +# define SSL_get_feature_request_uint(ssl, id, v) \ + SSL_get_value_uint((ssl), SSL_VALUE_CLASS_FEATURE_REQUEST, (id), (v)) +# define SSL_set_feature_request_uint(ssl, id, v) \ + SSL_set_value_uint((ssl), SSL_VALUE_CLASS_FEATURE_REQUEST, (id), (v)) +# define SSL_get_feature_peer_request_uint(ssl, id, v) \ + SSL_get_value_uint((ssl), SSL_VALUE_CLASS_FEATURE_PEER_REQUEST, (id), (v)) +# define SSL_get_feature_negotiated_uint(ssl, id, v) \ + SSL_get_value_uint((ssl), SSL_VALUE_CLASS_FEATURE_NEGOTIATED, (id), (v)) + +# define SSL_get_quic_stream_bidi_local_avail(ssl, value) \ + SSL_get_generic_value_uint((ssl), SSL_VALUE_QUIC_STREAM_BIDI_LOCAL_AVAIL, \ + (value)) +# define SSL_get_quic_stream_bidi_remote_avail(ssl, value) \ + SSL_get_generic_value_uint((ssl), SSL_VALUE_QUIC_STREAM_BIDI_REMOTE_AVAIL, \ + (value)) +# define SSL_get_quic_stream_uni_local_avail(ssl, value) \ + SSL_get_generic_value_uint((ssl), SSL_VALUE_QUIC_STREAM_UNI_LOCAL_AVAIL, \ + (value)) +# define SSL_get_quic_stream_uni_remote_avail(ssl, value) \ + SSL_get_generic_value_uint((ssl), SSL_VALUE_QUIC_STREAM_UNI_REMOTE_AVAIL, \ + (value)) + +# define SSL_get_event_handling_mode(ssl, value) \ + SSL_get_generic_value_uint((ssl), SSL_VALUE_EVENT_HANDLING_MODE, \ + (value)) +# define SSL_set_event_handling_mode(ssl, value) \ + SSL_set_generic_value_uint((ssl), SSL_VALUE_EVENT_HANDLING_MODE, \ + (value)) + +# define SSL_get_stream_write_buf_size(ssl, value) \ + SSL_get_generic_value_uint((ssl), SSL_VALUE_STREAM_WRITE_BUF_SIZE, \ + (value)) +# define SSL_get_stream_write_buf_used(ssl, value) \ + SSL_get_generic_value_uint((ssl), SSL_VALUE_STREAM_WRITE_BUF_USED, \ + (value)) +# define SSL_get_stream_write_buf_avail(ssl, value) \ + SSL_get_generic_value_uint((ssl), SSL_VALUE_STREAM_WRITE_BUF_AVAIL, \ + (value)) + +# define SSL_POLL_EVENT_NONE 0 + +# define SSL_POLL_EVENT_F (1U << 0) /* F (Failure) */ +# define SSL_POLL_EVENT_EL (1U << 1) /* EL (Exception on Listener) */ +# define SSL_POLL_EVENT_EC (1U << 2) /* EC (Exception on Conn) */ +# define SSL_POLL_EVENT_ECD (1U << 3) /* ECD (Exception on Conn Drained) */ +# define SSL_POLL_EVENT_ER (1U << 4) /* ER (Exception on Read) */ +# define SSL_POLL_EVENT_EW (1U << 5) /* EW (Exception on Write) */ +# define SSL_POLL_EVENT_R (1U << 6) /* R (Readable) */ +# define SSL_POLL_EVENT_W (1U << 7) /* W (Writable) */ +# define SSL_POLL_EVENT_IC (1U << 8) /* IC (Incoming Connection) */ +# define SSL_POLL_EVENT_ISB (1U << 9) /* ISB (Incoming Stream: Bidi) */ +# define SSL_POLL_EVENT_ISU (1U << 10) /* ISU (Incoming Stream: Uni) */ +# define SSL_POLL_EVENT_OSB (1U << 11) /* OSB (Outgoing Stream: Bidi) */ +# define SSL_POLL_EVENT_OSU (1U << 12) /* OSU (Outgoing Stream: Uni) */ + +# define SSL_POLL_EVENT_RW (SSL_POLL_EVENT_R | SSL_POLL_EVENT_W) +# define SSL_POLL_EVENT_RE (SSL_POLL_EVENT_R | SSL_POLL_EVENT_ER) +# define SSL_POLL_EVENT_WE (SSL_POLL_EVENT_W | SSL_POLL_EVENT_EW) +# define SSL_POLL_EVENT_RWE (SSL_POLL_EVENT_RE | SSL_POLL_EVENT_WE) +# define SSL_POLL_EVENT_E (SSL_POLL_EVENT_EL | SSL_POLL_EVENT_EC \ + | SSL_POLL_EVENT_ER | SSL_POLL_EVENT_EW) +# define SSL_POLL_EVENT_IS (SSL_POLL_EVENT_ISB | SSL_POLL_EVENT_ISU) +# define SSL_POLL_EVENT_ISE (SSL_POLL_EVENT_IS | SSL_POLL_EVENT_EC) +# define SSL_POLL_EVENT_I (SSL_POLL_EVENT_IS | SSL_POLL_EVENT_IC) +# define SSL_POLL_EVENT_OS (SSL_POLL_EVENT_OSB | SSL_POLL_EVENT_OSU) +# define SSL_POLL_EVENT_OSE (SSL_POLL_EVENT_OS | SSL_POLL_EVENT_EC) + +typedef struct ssl_poll_item_st { + BIO_POLL_DESCRIPTOR desc; + uint64_t events, revents; +} SSL_POLL_ITEM; + +# define SSL_POLL_FLAG_NO_HANDLE_EVENTS (1U << 0) + +__owur int SSL_poll(SSL_POLL_ITEM *items, + size_t num_items, + size_t stride, + const struct timeval *timeout, + uint64_t flags, + size_t *result_count); + +static ossl_inline ossl_unused BIO_POLL_DESCRIPTOR +SSL_as_poll_descriptor(SSL *s) +{ + BIO_POLL_DESCRIPTOR d; + + d.type = BIO_POLL_DESCRIPTOR_TYPE_SSL; + d.value.ssl = s; + return d; +} + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define SSL_cache_hit(s) SSL_session_reused(s) +# endif + +__owur int SSL_session_reused(const SSL *s); +__owur int SSL_is_server(const SSL *s); + +__owur __owur SSL_CONF_CTX *SSL_CONF_CTX_new(void); +int SSL_CONF_CTX_finish(SSL_CONF_CTX *cctx); +void SSL_CONF_CTX_free(SSL_CONF_CTX *cctx); +unsigned int SSL_CONF_CTX_set_flags(SSL_CONF_CTX *cctx, unsigned int flags); +__owur unsigned int SSL_CONF_CTX_clear_flags(SSL_CONF_CTX *cctx, + unsigned int flags); +__owur int SSL_CONF_CTX_set1_prefix(SSL_CONF_CTX *cctx, const char *pre); + +void SSL_CONF_CTX_set_ssl(SSL_CONF_CTX *cctx, SSL *ssl); +void SSL_CONF_CTX_set_ssl_ctx(SSL_CONF_CTX *cctx, SSL_CTX *ctx); + +__owur int SSL_CONF_cmd(SSL_CONF_CTX *cctx, const char *cmd, const char *value); +__owur int SSL_CONF_cmd_argv(SSL_CONF_CTX *cctx, int *pargc, char ***pargv); +__owur int SSL_CONF_cmd_value_type(SSL_CONF_CTX *cctx, const char *cmd); + +void SSL_add_ssl_module(void); +int SSL_config(SSL *s, const char *name); +int SSL_CTX_config(SSL_CTX *ctx, const char *name); + +# ifndef OPENSSL_NO_SSL_TRACE +void SSL_trace(int write_p, int version, int content_type, + const void *buf, size_t len, SSL *ssl, void *arg); +# endif + +# ifndef OPENSSL_NO_SOCK +int DTLSv1_listen(SSL *s, BIO_ADDR *client); +# endif + +# ifndef OPENSSL_NO_CT + +/* + * A callback for verifying that the received SCTs are sufficient. + * Expected to return 1 if they are sufficient, otherwise 0. + * May return a negative integer if an error occurs. + * A connection should be aborted if the SCTs are deemed insufficient. + */ +typedef int (*ssl_ct_validation_cb)(const CT_POLICY_EVAL_CTX *ctx, + const STACK_OF(SCT) *scts, void *arg); + +/* + * Sets a |callback| that is invoked upon receipt of ServerHelloDone to validate + * the received SCTs. + * If the callback returns a non-positive result, the connection is terminated. + * Call this function before beginning a handshake. + * If a NULL |callback| is provided, SCT validation is disabled. + * |arg| is arbitrary userdata that will be passed to the callback whenever it + * is invoked. Ownership of |arg| remains with the caller. + * + * NOTE: A side-effect of setting a CT callback is that an OCSP stapled response + * will be requested. + */ +int SSL_set_ct_validation_callback(SSL *s, ssl_ct_validation_cb callback, + void *arg); +int SSL_CTX_set_ct_validation_callback(SSL_CTX *ctx, + ssl_ct_validation_cb callback, + void *arg); +#define SSL_disable_ct(s) \ + ((void) SSL_set_validation_callback((s), NULL, NULL)) +#define SSL_CTX_disable_ct(ctx) \ + ((void) SSL_CTX_set_validation_callback((ctx), NULL, NULL)) + +/* + * The validation type enumerates the available behaviours of the built-in SSL + * CT validation callback selected via SSL_enable_ct() and SSL_CTX_enable_ct(). + * The underlying callback is a static function in libssl. + */ +enum { + SSL_CT_VALIDATION_PERMISSIVE = 0, + SSL_CT_VALIDATION_STRICT +}; + +/* + * Enable CT by setting up a callback that implements one of the built-in + * validation variants. The SSL_CT_VALIDATION_PERMISSIVE variant always + * continues the handshake, the application can make appropriate decisions at + * handshake completion. The SSL_CT_VALIDATION_STRICT variant requires at + * least one valid SCT, or else handshake termination will be requested. The + * handshake may continue anyway if SSL_VERIFY_NONE is in effect. + */ +int SSL_enable_ct(SSL *s, int validation_mode); +int SSL_CTX_enable_ct(SSL_CTX *ctx, int validation_mode); + +/* + * Report whether a non-NULL callback is enabled. + */ +int SSL_ct_is_enabled(const SSL *s); +int SSL_CTX_ct_is_enabled(const SSL_CTX *ctx); + +/* Gets the SCTs received from a connection */ +const STACK_OF(SCT) *SSL_get0_peer_scts(SSL *s); + +/* + * Loads the CT log list from the default location. + * If a CTLOG_STORE has previously been set using SSL_CTX_set_ctlog_store, + * the log information loaded from this file will be appended to the + * CTLOG_STORE. + * Returns 1 on success, 0 otherwise. + */ +int SSL_CTX_set_default_ctlog_list_file(SSL_CTX *ctx); + +/* + * Loads the CT log list from the specified file path. + * If a CTLOG_STORE has previously been set using SSL_CTX_set_ctlog_store, + * the log information loaded from this file will be appended to the + * CTLOG_STORE. + * Returns 1 on success, 0 otherwise. + */ +int SSL_CTX_set_ctlog_list_file(SSL_CTX *ctx, const char *path); + +/* + * Sets the CT log list used by all SSL connections created from this SSL_CTX. + * Ownership of the CTLOG_STORE is transferred to the SSL_CTX. + */ +void SSL_CTX_set0_ctlog_store(SSL_CTX *ctx, CTLOG_STORE *logs); + +/* + * Gets the CT log list used by all SSL connections created from this SSL_CTX. + * This will be NULL unless one of the following functions has been called: + * - SSL_CTX_set_default_ctlog_list_file + * - SSL_CTX_set_ctlog_list_file + * - SSL_CTX_set_ctlog_store + */ +const CTLOG_STORE *SSL_CTX_get0_ctlog_store(const SSL_CTX *ctx); + +# endif /* OPENSSL_NO_CT */ + +/* What the "other" parameter contains in security callback */ +/* Mask for type */ +# define SSL_SECOP_OTHER_TYPE 0xffff0000 +# define SSL_SECOP_OTHER_NONE 0 +# define SSL_SECOP_OTHER_CIPHER (1 << 16) +# define SSL_SECOP_OTHER_CURVE (2 << 16) +# define SSL_SECOP_OTHER_DH (3 << 16) +# define SSL_SECOP_OTHER_PKEY (4 << 16) +# define SSL_SECOP_OTHER_SIGALG (5 << 16) +# define SSL_SECOP_OTHER_CERT (6 << 16) + +/* Indicated operation refers to peer key or certificate */ +# define SSL_SECOP_PEER 0x1000 + +/* Values for "op" parameter in security callback */ + +/* Called to filter ciphers */ +/* Ciphers client supports */ +# define SSL_SECOP_CIPHER_SUPPORTED (1 | SSL_SECOP_OTHER_CIPHER) +/* Cipher shared by client/server */ +# define SSL_SECOP_CIPHER_SHARED (2 | SSL_SECOP_OTHER_CIPHER) +/* Sanity check of cipher server selects */ +# define SSL_SECOP_CIPHER_CHECK (3 | SSL_SECOP_OTHER_CIPHER) +/* Curves supported by client */ +# define SSL_SECOP_CURVE_SUPPORTED (4 | SSL_SECOP_OTHER_CURVE) +/* Curves shared by client/server */ +# define SSL_SECOP_CURVE_SHARED (5 | SSL_SECOP_OTHER_CURVE) +/* Sanity check of curve server selects */ +# define SSL_SECOP_CURVE_CHECK (6 | SSL_SECOP_OTHER_CURVE) +/* Temporary DH key */ +# define SSL_SECOP_TMP_DH (7 | SSL_SECOP_OTHER_PKEY) +/* SSL/TLS version */ +# define SSL_SECOP_VERSION (9 | SSL_SECOP_OTHER_NONE) +/* Session tickets */ +# define SSL_SECOP_TICKET (10 | SSL_SECOP_OTHER_NONE) +/* Supported signature algorithms sent to peer */ +# define SSL_SECOP_SIGALG_SUPPORTED (11 | SSL_SECOP_OTHER_SIGALG) +/* Shared signature algorithm */ +# define SSL_SECOP_SIGALG_SHARED (12 | SSL_SECOP_OTHER_SIGALG) +/* Sanity check signature algorithm allowed */ +# define SSL_SECOP_SIGALG_CHECK (13 | SSL_SECOP_OTHER_SIGALG) +/* Used to get mask of supported public key signature algorithms */ +# define SSL_SECOP_SIGALG_MASK (14 | SSL_SECOP_OTHER_SIGALG) +/* Use to see if compression is allowed */ +# define SSL_SECOP_COMPRESSION (15 | SSL_SECOP_OTHER_NONE) +/* EE key in certificate */ +# define SSL_SECOP_EE_KEY (16 | SSL_SECOP_OTHER_CERT) +/* CA key in certificate */ +# define SSL_SECOP_CA_KEY (17 | SSL_SECOP_OTHER_CERT) +/* CA digest algorithm in certificate */ +# define SSL_SECOP_CA_MD (18 | SSL_SECOP_OTHER_CERT) +/* Peer EE key in certificate */ +# define SSL_SECOP_PEER_EE_KEY (SSL_SECOP_EE_KEY | SSL_SECOP_PEER) +/* Peer CA key in certificate */ +# define SSL_SECOP_PEER_CA_KEY (SSL_SECOP_CA_KEY | SSL_SECOP_PEER) +/* Peer CA digest algorithm in certificate */ +# define SSL_SECOP_PEER_CA_MD (SSL_SECOP_CA_MD | SSL_SECOP_PEER) + +void SSL_set_security_level(SSL *s, int level); +__owur int SSL_get_security_level(const SSL *s); +void SSL_set_security_callback(SSL *s, + int (*cb) (const SSL *s, const SSL_CTX *ctx, + int op, int bits, int nid, + void *other, void *ex)); +int (*SSL_get_security_callback(const SSL *s)) (const SSL *s, + const SSL_CTX *ctx, int op, + int bits, int nid, void *other, + void *ex); +void SSL_set0_security_ex_data(SSL *s, void *ex); +__owur void *SSL_get0_security_ex_data(const SSL *s); + +void SSL_CTX_set_security_level(SSL_CTX *ctx, int level); +__owur int SSL_CTX_get_security_level(const SSL_CTX *ctx); +void SSL_CTX_set_security_callback(SSL_CTX *ctx, + int (*cb) (const SSL *s, const SSL_CTX *ctx, + int op, int bits, int nid, + void *other, void *ex)); +int (*SSL_CTX_get_security_callback(const SSL_CTX *ctx)) (const SSL *s, + const SSL_CTX *ctx, + int op, int bits, + int nid, + void *other, + void *ex); +void SSL_CTX_set0_security_ex_data(SSL_CTX *ctx, void *ex); +__owur void *SSL_CTX_get0_security_ex_data(const SSL_CTX *ctx); + +/* OPENSSL_INIT flag 0x010000 reserved for internal use */ +# define OPENSSL_INIT_NO_LOAD_SSL_STRINGS 0x00100000L +# define OPENSSL_INIT_LOAD_SSL_STRINGS 0x00200000L + +# define OPENSSL_INIT_SSL_DEFAULT \ + (OPENSSL_INIT_LOAD_SSL_STRINGS | OPENSSL_INIT_LOAD_CRYPTO_STRINGS) + +int OPENSSL_init_ssl(uint64_t opts, const OPENSSL_INIT_SETTINGS *settings); + +# ifndef OPENSSL_NO_UNIT_TEST +__owur const struct openssl_ssl_test_functions *SSL_test_functions(void); +# endif + +__owur int SSL_free_buffers(SSL *ssl); +__owur int SSL_alloc_buffers(SSL *ssl); + +/* Status codes passed to the decrypt session ticket callback. Some of these + * are for internal use only and are never passed to the callback. */ +typedef int SSL_TICKET_STATUS; + +/* Support for ticket appdata */ +/* fatal error, malloc failure */ +# define SSL_TICKET_FATAL_ERR_MALLOC 0 +/* fatal error, either from parsing or decrypting the ticket */ +# define SSL_TICKET_FATAL_ERR_OTHER 1 +/* No ticket present */ +# define SSL_TICKET_NONE 2 +/* Empty ticket present */ +# define SSL_TICKET_EMPTY 3 +/* the ticket couldn't be decrypted */ +# define SSL_TICKET_NO_DECRYPT 4 +/* a ticket was successfully decrypted */ +# define SSL_TICKET_SUCCESS 5 +/* same as above but the ticket needs to be renewed */ +# define SSL_TICKET_SUCCESS_RENEW 6 + +/* Return codes for the decrypt session ticket callback */ +typedef int SSL_TICKET_RETURN; + +/* An error occurred */ +#define SSL_TICKET_RETURN_ABORT 0 +/* Do not use the ticket, do not send a renewed ticket to the client */ +#define SSL_TICKET_RETURN_IGNORE 1 +/* Do not use the ticket, send a renewed ticket to the client */ +#define SSL_TICKET_RETURN_IGNORE_RENEW 2 +/* Use the ticket, do not send a renewed ticket to the client */ +#define SSL_TICKET_RETURN_USE 3 +/* Use the ticket, send a renewed ticket to the client */ +#define SSL_TICKET_RETURN_USE_RENEW 4 + +typedef int (*SSL_CTX_generate_session_ticket_fn)(SSL *s, void *arg); +typedef SSL_TICKET_RETURN (*SSL_CTX_decrypt_session_ticket_fn)(SSL *s, SSL_SESSION *ss, + const unsigned char *keyname, + size_t keyname_length, + SSL_TICKET_STATUS status, + void *arg); +int SSL_CTX_set_session_ticket_cb(SSL_CTX *ctx, + SSL_CTX_generate_session_ticket_fn gen_cb, + SSL_CTX_decrypt_session_ticket_fn dec_cb, + void *arg); +int SSL_SESSION_set1_ticket_appdata(SSL_SESSION *ss, const void *data, size_t len); +int SSL_SESSION_get0_ticket_appdata(SSL_SESSION *ss, void **data, size_t *len); + +typedef unsigned int (*DTLS_timer_cb)(SSL *s, unsigned int timer_us); + +void DTLS_set_timer_cb(SSL *s, DTLS_timer_cb cb); + + +typedef int (*SSL_allow_early_data_cb_fn)(SSL *s, void *arg); +void SSL_CTX_set_allow_early_data_cb(SSL_CTX *ctx, + SSL_allow_early_data_cb_fn cb, + void *arg); +void SSL_set_allow_early_data_cb(SSL *s, + SSL_allow_early_data_cb_fn cb, + void *arg); + +/* store the default cipher strings inside the library */ +const char *OSSL_default_cipher_list(void); +const char *OSSL_default_ciphersuites(void); + +/* RFC8879 Certificate compression APIs */ + +int SSL_CTX_compress_certs(SSL_CTX *ctx, int alg); +int SSL_compress_certs(SSL *ssl, int alg); + +int SSL_CTX_set1_cert_comp_preference(SSL_CTX *ctx, int *algs, size_t len); +int SSL_set1_cert_comp_preference(SSL *ssl, int *algs, size_t len); + +int SSL_CTX_set1_compressed_cert(SSL_CTX *ctx, int algorithm, unsigned char *comp_data, + size_t comp_length, size_t orig_length); +int SSL_set1_compressed_cert(SSL *ssl, int algorithm, unsigned char *comp_data, + size_t comp_length, size_t orig_length); +size_t SSL_CTX_get1_compressed_cert(SSL_CTX *ctx, int alg, unsigned char **data, size_t *orig_len); +size_t SSL_get1_compressed_cert(SSL *ssl, int alg, unsigned char **data, size_t *orig_len); + +__owur int SSL_add_expected_rpk(SSL *s, EVP_PKEY *rpk); +__owur EVP_PKEY *SSL_get0_peer_rpk(const SSL *s); +__owur EVP_PKEY *SSL_SESSION_get0_peer_rpk(SSL_SESSION *s); +__owur int SSL_get_negotiated_client_cert_type(const SSL *s); +__owur int SSL_get_negotiated_server_cert_type(const SSL *s); + +__owur int SSL_set1_client_cert_type(SSL *s, const unsigned char *val, size_t len); +__owur int SSL_set1_server_cert_type(SSL *s, const unsigned char *val, size_t len); +__owur int SSL_CTX_set1_client_cert_type(SSL_CTX *ctx, const unsigned char *val, size_t len); +__owur int SSL_CTX_set1_server_cert_type(SSL_CTX *ctx, const unsigned char *val, size_t len); +__owur int SSL_get0_client_cert_type(const SSL *s, unsigned char **t, size_t *len); +__owur int SSL_get0_server_cert_type(const SSL *s, unsigned char **t, size_t *len); +__owur int SSL_CTX_get0_client_cert_type(const SSL_CTX *ctx, unsigned char **t, size_t *len); +__owur int SSL_CTX_get0_server_cert_type(const SSL_CTX *s, unsigned char **t, size_t *len); + +/* + * Protection level. For <= TLSv1.2 only "NONE" and "APPLICATION" are used. + */ +# define OSSL_RECORD_PROTECTION_LEVEL_NONE 0 +# define OSSL_RECORD_PROTECTION_LEVEL_EARLY 1 +# define OSSL_RECORD_PROTECTION_LEVEL_HANDSHAKE 2 +# define OSSL_RECORD_PROTECTION_LEVEL_APPLICATION 3 + +int SSL_set_quic_tls_cbs(SSL *s, const OSSL_DISPATCH *qtdis, void *arg); +int SSL_set_quic_tls_transport_params(SSL *s, + const unsigned char *params, + size_t params_len); + +int SSL_set_quic_tls_early_data_enabled(SSL *s, int enabled); + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/common/include/openssl/ui.h b/contrib/openssl-cmake/common/include/openssl/ui.h new file mode 100644 index 000000000000..e64ec3b37fba --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/ui.h @@ -0,0 +1,407 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/ui.h.in + * + * Copyright 2001-2020 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_UI_H +# define OPENSSL_UI_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_UI_H +# endif + +# include + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# include +# endif +# include +# include +# include +# include + +/* For compatibility reasons, the macro OPENSSL_NO_UI is currently retained */ +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# ifdef OPENSSL_NO_UI_CONSOLE +# define OPENSSL_NO_UI +# endif +# endif + +# ifdef __cplusplus +extern "C" { +# endif + +/* + * All the following functions return -1 or NULL on error and in some cases + * (UI_process()) -2 if interrupted or in some other way cancelled. When + * everything is fine, they return 0, a positive value or a non-NULL pointer, + * all depending on their purpose. + */ + +/* Creators and destructor. */ +UI *UI_new(void); +UI *UI_new_method(const UI_METHOD *method); +void UI_free(UI *ui); + +/*- + The following functions are used to add strings to be printed and prompt + strings to prompt for data. The names are UI_{add,dup}__string + and UI_{add,dup}_input_boolean. + + UI_{add,dup}__string have the following meanings: + add add a text or prompt string. The pointers given to these + functions are used verbatim, no copying is done. + dup make a copy of the text or prompt string, then add the copy + to the collection of strings in the user interface. + + The function is a name for the functionality that the given + string shall be used for. It can be one of: + input use the string as data prompt. + verify use the string as verification prompt. This + is used to verify a previous input. + info use the string for informational output. + error use the string for error output. + Honestly, there's currently no difference between info and error for the + moment. + + UI_{add,dup}_input_boolean have the same semantics for "add" and "dup", + and are typically used when one wants to prompt for a yes/no response. + + All of the functions in this group take a UI and a prompt string. + The string input and verify addition functions also take a flag argument, + a buffer for the result to end up with, a minimum input size and a maximum + input size (the result buffer MUST be large enough to be able to contain + the maximum number of characters). Additionally, the verify addition + functions takes another buffer to compare the result against. + The boolean input functions take an action description string (which should + be safe to ignore if the expected user action is obvious, for example with + a dialog box with an OK button and a Cancel button), a string of acceptable + characters to mean OK and to mean Cancel. The two last strings are checked + to make sure they don't have common characters. Additionally, the same + flag argument as for the string input is taken, as well as a result buffer. + The result buffer is required to be at least one byte long. Depending on + the answer, the first character from the OK or the Cancel character strings + will be stored in the first byte of the result buffer. No NUL will be + added, so the result is *not* a string. + + On success, the all return an index of the added information. That index + is useful when retrieving results with UI_get0_result(). */ +int UI_add_input_string(UI *ui, const char *prompt, int flags, + char *result_buf, int minsize, int maxsize); +int UI_dup_input_string(UI *ui, const char *prompt, int flags, + char *result_buf, int minsize, int maxsize); +int UI_add_verify_string(UI *ui, const char *prompt, int flags, + char *result_buf, int minsize, int maxsize, + const char *test_buf); +int UI_dup_verify_string(UI *ui, const char *prompt, int flags, + char *result_buf, int minsize, int maxsize, + const char *test_buf); +int UI_add_input_boolean(UI *ui, const char *prompt, const char *action_desc, + const char *ok_chars, const char *cancel_chars, + int flags, char *result_buf); +int UI_dup_input_boolean(UI *ui, const char *prompt, const char *action_desc, + const char *ok_chars, const char *cancel_chars, + int flags, char *result_buf); +int UI_add_info_string(UI *ui, const char *text); +int UI_dup_info_string(UI *ui, const char *text); +int UI_add_error_string(UI *ui, const char *text); +int UI_dup_error_string(UI *ui, const char *text); + +/* These are the possible flags. They can be or'ed together. */ +/* Use to have echoing of input */ +# define UI_INPUT_FLAG_ECHO 0x01 +/* + * Use a default password. Where that password is found is completely up to + * the application, it might for example be in the user data set with + * UI_add_user_data(). It is not recommended to have more than one input in + * each UI being marked with this flag, or the application might get + * confused. + */ +# define UI_INPUT_FLAG_DEFAULT_PWD 0x02 + +/*- + * The user of these routines may want to define flags of their own. The core + * UI won't look at those, but will pass them on to the method routines. They + * must use higher bits so they don't get confused with the UI bits above. + * UI_INPUT_FLAG_USER_BASE tells which is the lowest bit to use. A good + * example of use is this: + * + * #define MY_UI_FLAG1 (0x01 << UI_INPUT_FLAG_USER_BASE) + * +*/ +# define UI_INPUT_FLAG_USER_BASE 16 + +/*- + * The following function helps construct a prompt. + * phrase_desc is a textual short description of the phrase to enter, + * for example "pass phrase", and + * object_name is the name of the object + * (which might be a card name or a file name) or NULL. + * The returned string shall always be allocated on the heap with + * OPENSSL_malloc(), and need to be free'd with OPENSSL_free(). + * + * If the ui_method doesn't contain a pointer to a user-defined prompt + * constructor, a default string is built, looking like this: + * + * "Enter {phrase_desc} for {object_name}:" + * + * So, if phrase_desc has the value "pass phrase" and object_name has + * the value "foo.key", the resulting string is: + * + * "Enter pass phrase for foo.key:" +*/ +char *UI_construct_prompt(UI *ui_method, + const char *phrase_desc, const char *object_name); + +/* + * The following function is used to store a pointer to user-specific data. + * Any previous such pointer will be returned and replaced. + * + * For callback purposes, this function makes a lot more sense than using + * ex_data, since the latter requires that different parts of OpenSSL or + * applications share the same ex_data index. + * + * Note that the UI_OpenSSL() method completely ignores the user data. Other + * methods may not, however. + */ +void *UI_add_user_data(UI *ui, void *user_data); +/* + * Alternatively, this function is used to duplicate the user data. + * This uses the duplicator method function. The destroy function will + * be used to free the user data in this case. + */ +int UI_dup_user_data(UI *ui, void *user_data); +/* We need a user data retrieving function as well. */ +void *UI_get0_user_data(UI *ui); + +/* Return the result associated with a prompt given with the index i. */ +const char *UI_get0_result(UI *ui, int i); +int UI_get_result_length(UI *ui, int i); + +/* When all strings have been added, process the whole thing. */ +int UI_process(UI *ui); + +/* + * Give a user interface parameterised control commands. This can be used to + * send down an integer, a data pointer or a function pointer, as well as be + * used to get information from a UI. + */ +int UI_ctrl(UI *ui, int cmd, long i, void *p, void (*f) (void)); + +/* The commands */ +/* + * Use UI_CONTROL_PRINT_ERRORS with the value 1 to have UI_process print the + * OpenSSL error stack before printing any info or added error messages and + * before any prompting. + */ +# define UI_CTRL_PRINT_ERRORS 1 +/* + * Check if a UI_process() is possible to do again with the same instance of + * a user interface. This makes UI_ctrl() return 1 if it is redoable, and 0 + * if not. + */ +# define UI_CTRL_IS_REDOABLE 2 + +/* Some methods may use extra data */ +# define UI_set_app_data(s,arg) UI_set_ex_data(s,0,arg) +# define UI_get_app_data(s) UI_get_ex_data(s,0) + +# define UI_get_ex_new_index(l, p, newf, dupf, freef) \ + CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_UI, l, p, newf, dupf, freef) +int UI_set_ex_data(UI *r, int idx, void *arg); +void *UI_get_ex_data(const UI *r, int idx); + +/* Use specific methods instead of the built-in one */ +void UI_set_default_method(const UI_METHOD *meth); +const UI_METHOD *UI_get_default_method(void); +const UI_METHOD *UI_get_method(UI *ui); +const UI_METHOD *UI_set_method(UI *ui, const UI_METHOD *meth); + +# ifndef OPENSSL_NO_UI_CONSOLE + +/* The method with all the built-in thingies */ +UI_METHOD *UI_OpenSSL(void); + +# endif + +/* + * NULL method. Literally does nothing, but may serve as a placeholder + * to avoid internal default. + */ +const UI_METHOD *UI_null(void); + +/* ---------- For method writers ---------- */ +/*- + A method contains a number of functions that implement the low level + of the User Interface. The functions are: + + an opener This function starts a session, maybe by opening + a channel to a tty, or by opening a window. + a writer This function is called to write a given string, + maybe to the tty, maybe as a field label in a + window. + a flusher This function is called to flush everything that + has been output so far. It can be used to actually + display a dialog box after it has been built. + a reader This function is called to read a given prompt, + maybe from the tty, maybe from a field in a + window. Note that it's called with all string + structures, not only the prompt ones, so it must + check such things itself. + a closer This function closes the session, maybe by closing + the channel to the tty, or closing the window. + + All these functions are expected to return: + + 0 on error. + 1 on success. + -1 on out-of-band events, for example if some prompting has + been canceled (by pressing Ctrl-C, for example). This is + only checked when returned by the flusher or the reader. + + The way this is used, the opener is first called, then the writer for all + strings, then the flusher, then the reader for all strings and finally the + closer. Note that if you want to prompt from a terminal or other command + line interface, the best is to have the reader also write the prompts + instead of having the writer do it. If you want to prompt from a dialog + box, the writer can be used to build up the contents of the box, and the + flusher to actually display the box and run the event loop until all data + has been given, after which the reader only grabs the given data and puts + them back into the UI strings. + + All method functions take a UI as argument. Additionally, the writer and + the reader take a UI_STRING. +*/ + +/* + * The UI_STRING type is the data structure that contains all the needed info + * about a string or a prompt, including test data for a verification prompt. + */ +typedef struct ui_string_st UI_STRING; + +SKM_DEFINE_STACK_OF_INTERNAL(UI_STRING, UI_STRING, UI_STRING) +#define sk_UI_STRING_num(sk) OPENSSL_sk_num(ossl_check_const_UI_STRING_sk_type(sk)) +#define sk_UI_STRING_value(sk, idx) ((UI_STRING *)OPENSSL_sk_value(ossl_check_const_UI_STRING_sk_type(sk), (idx))) +#define sk_UI_STRING_new(cmp) ((STACK_OF(UI_STRING) *)OPENSSL_sk_new(ossl_check_UI_STRING_compfunc_type(cmp))) +#define sk_UI_STRING_new_null() ((STACK_OF(UI_STRING) *)OPENSSL_sk_new_null()) +#define sk_UI_STRING_new_reserve(cmp, n) ((STACK_OF(UI_STRING) *)OPENSSL_sk_new_reserve(ossl_check_UI_STRING_compfunc_type(cmp), (n))) +#define sk_UI_STRING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_UI_STRING_sk_type(sk), (n)) +#define sk_UI_STRING_free(sk) OPENSSL_sk_free(ossl_check_UI_STRING_sk_type(sk)) +#define sk_UI_STRING_zero(sk) OPENSSL_sk_zero(ossl_check_UI_STRING_sk_type(sk)) +#define sk_UI_STRING_delete(sk, i) ((UI_STRING *)OPENSSL_sk_delete(ossl_check_UI_STRING_sk_type(sk), (i))) +#define sk_UI_STRING_delete_ptr(sk, ptr) ((UI_STRING *)OPENSSL_sk_delete_ptr(ossl_check_UI_STRING_sk_type(sk), ossl_check_UI_STRING_type(ptr))) +#define sk_UI_STRING_push(sk, ptr) OPENSSL_sk_push(ossl_check_UI_STRING_sk_type(sk), ossl_check_UI_STRING_type(ptr)) +#define sk_UI_STRING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_UI_STRING_sk_type(sk), ossl_check_UI_STRING_type(ptr)) +#define sk_UI_STRING_pop(sk) ((UI_STRING *)OPENSSL_sk_pop(ossl_check_UI_STRING_sk_type(sk))) +#define sk_UI_STRING_shift(sk) ((UI_STRING *)OPENSSL_sk_shift(ossl_check_UI_STRING_sk_type(sk))) +#define sk_UI_STRING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_UI_STRING_sk_type(sk),ossl_check_UI_STRING_freefunc_type(freefunc)) +#define sk_UI_STRING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_UI_STRING_sk_type(sk), ossl_check_UI_STRING_type(ptr), (idx)) +#define sk_UI_STRING_set(sk, idx, ptr) ((UI_STRING *)OPENSSL_sk_set(ossl_check_UI_STRING_sk_type(sk), (idx), ossl_check_UI_STRING_type(ptr))) +#define sk_UI_STRING_find(sk, ptr) OPENSSL_sk_find(ossl_check_UI_STRING_sk_type(sk), ossl_check_UI_STRING_type(ptr)) +#define sk_UI_STRING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_UI_STRING_sk_type(sk), ossl_check_UI_STRING_type(ptr)) +#define sk_UI_STRING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_UI_STRING_sk_type(sk), ossl_check_UI_STRING_type(ptr), pnum) +#define sk_UI_STRING_sort(sk) OPENSSL_sk_sort(ossl_check_UI_STRING_sk_type(sk)) +#define sk_UI_STRING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_UI_STRING_sk_type(sk)) +#define sk_UI_STRING_dup(sk) ((STACK_OF(UI_STRING) *)OPENSSL_sk_dup(ossl_check_const_UI_STRING_sk_type(sk))) +#define sk_UI_STRING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(UI_STRING) *)OPENSSL_sk_deep_copy(ossl_check_const_UI_STRING_sk_type(sk), ossl_check_UI_STRING_copyfunc_type(copyfunc), ossl_check_UI_STRING_freefunc_type(freefunc))) +#define sk_UI_STRING_set_cmp_func(sk, cmp) ((sk_UI_STRING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_UI_STRING_sk_type(sk), ossl_check_UI_STRING_compfunc_type(cmp))) + + +/* + * The different types of strings that are currently supported. This is only + * needed by method authors. + */ +enum UI_string_types { + UIT_NONE = 0, + UIT_PROMPT, /* Prompt for a string */ + UIT_VERIFY, /* Prompt for a string and verify */ + UIT_BOOLEAN, /* Prompt for a yes/no response */ + UIT_INFO, /* Send info to the user */ + UIT_ERROR /* Send an error message to the user */ +}; + +/* Create and manipulate methods */ +UI_METHOD *UI_create_method(const char *name); +void UI_destroy_method(UI_METHOD *ui_method); +int UI_method_set_opener(UI_METHOD *method, int (*opener) (UI *ui)); +int UI_method_set_writer(UI_METHOD *method, + int (*writer) (UI *ui, UI_STRING *uis)); +int UI_method_set_flusher(UI_METHOD *method, int (*flusher) (UI *ui)); +int UI_method_set_reader(UI_METHOD *method, + int (*reader) (UI *ui, UI_STRING *uis)); +int UI_method_set_closer(UI_METHOD *method, int (*closer) (UI *ui)); +int UI_method_set_data_duplicator(UI_METHOD *method, + void *(*duplicator) (UI *ui, void *ui_data), + void (*destructor)(UI *ui, void *ui_data)); +int UI_method_set_prompt_constructor(UI_METHOD *method, + char *(*prompt_constructor) (UI *ui, + const char + *phrase_desc, + const char + *object_name)); +int UI_method_set_ex_data(UI_METHOD *method, int idx, void *data); +int (*UI_method_get_opener(const UI_METHOD *method)) (UI *); +int (*UI_method_get_writer(const UI_METHOD *method)) (UI *, UI_STRING *); +int (*UI_method_get_flusher(const UI_METHOD *method)) (UI *); +int (*UI_method_get_reader(const UI_METHOD *method)) (UI *, UI_STRING *); +int (*UI_method_get_closer(const UI_METHOD *method)) (UI *); +char *(*UI_method_get_prompt_constructor(const UI_METHOD *method)) + (UI *, const char *, const char *); +void *(*UI_method_get_data_duplicator(const UI_METHOD *method)) (UI *, void *); +void (*UI_method_get_data_destructor(const UI_METHOD *method)) (UI *, void *); +const void *UI_method_get_ex_data(const UI_METHOD *method, int idx); + +/* + * The following functions are helpers for method writers to access relevant + * data from a UI_STRING. + */ + +/* Return type of the UI_STRING */ +enum UI_string_types UI_get_string_type(UI_STRING *uis); +/* Return input flags of the UI_STRING */ +int UI_get_input_flags(UI_STRING *uis); +/* Return the actual string to output (the prompt, info or error) */ +const char *UI_get0_output_string(UI_STRING *uis); +/* + * Return the optional action string to output (the boolean prompt + * instruction) + */ +const char *UI_get0_action_string(UI_STRING *uis); +/* Return the result of a prompt */ +const char *UI_get0_result_string(UI_STRING *uis); +int UI_get_result_string_length(UI_STRING *uis); +/* + * Return the string to test the result against. Only useful with verifies. + */ +const char *UI_get0_test_string(UI_STRING *uis); +/* Return the required minimum size of the result */ +int UI_get_result_minsize(UI_STRING *uis); +/* Return the required maximum size of the result */ +int UI_get_result_maxsize(UI_STRING *uis); +/* Set the result of a UI_STRING. */ +int UI_set_result(UI *ui, UI_STRING *uis, const char *result); +int UI_set_result_ex(UI *ui, UI_STRING *uis, const char *result, int len); + +/* A couple of popular utility functions */ +int UI_UTIL_read_pw_string(char *buf, int length, const char *prompt, + int verify); +int UI_UTIL_read_pw(char *buf, char *buff, int size, const char *prompt, + int verify); +UI_METHOD *UI_UTIL_wrap_read_pem_callback(pem_password_cb *cb, int rwflag); + + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/common/include/openssl/x509.h b/contrib/openssl-cmake/common/include/openssl/x509.h new file mode 100644 index 000000000000..d013458c2264 --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/x509.h @@ -0,0 +1,1303 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/x509.h.in + * + * Copyright 1995-2024 The OpenSSL Project Authors. All Rights Reserved. + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_X509_H +# define OPENSSL_X509_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_X509_H +# endif + +# include +# include +# include +# include +# include +# include +# include +# include +# include + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# include +# include +# include +# endif + +# include +# include +# ifndef OPENSSL_NO_STDIO +# include +# endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* Needed stacks for types defined in other headers */ +SKM_DEFINE_STACK_OF_INTERNAL(X509_NAME, X509_NAME, X509_NAME) +#define sk_X509_NAME_num(sk) OPENSSL_sk_num(ossl_check_const_X509_NAME_sk_type(sk)) +#define sk_X509_NAME_value(sk, idx) ((X509_NAME *)OPENSSL_sk_value(ossl_check_const_X509_NAME_sk_type(sk), (idx))) +#define sk_X509_NAME_new(cmp) ((STACK_OF(X509_NAME) *)OPENSSL_sk_new(ossl_check_X509_NAME_compfunc_type(cmp))) +#define sk_X509_NAME_new_null() ((STACK_OF(X509_NAME) *)OPENSSL_sk_new_null()) +#define sk_X509_NAME_new_reserve(cmp, n) ((STACK_OF(X509_NAME) *)OPENSSL_sk_new_reserve(ossl_check_X509_NAME_compfunc_type(cmp), (n))) +#define sk_X509_NAME_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_NAME_sk_type(sk), (n)) +#define sk_X509_NAME_free(sk) OPENSSL_sk_free(ossl_check_X509_NAME_sk_type(sk)) +#define sk_X509_NAME_zero(sk) OPENSSL_sk_zero(ossl_check_X509_NAME_sk_type(sk)) +#define sk_X509_NAME_delete(sk, i) ((X509_NAME *)OPENSSL_sk_delete(ossl_check_X509_NAME_sk_type(sk), (i))) +#define sk_X509_NAME_delete_ptr(sk, ptr) ((X509_NAME *)OPENSSL_sk_delete_ptr(ossl_check_X509_NAME_sk_type(sk), ossl_check_X509_NAME_type(ptr))) +#define sk_X509_NAME_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_NAME_sk_type(sk), ossl_check_X509_NAME_type(ptr)) +#define sk_X509_NAME_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_NAME_sk_type(sk), ossl_check_X509_NAME_type(ptr)) +#define sk_X509_NAME_pop(sk) ((X509_NAME *)OPENSSL_sk_pop(ossl_check_X509_NAME_sk_type(sk))) +#define sk_X509_NAME_shift(sk) ((X509_NAME *)OPENSSL_sk_shift(ossl_check_X509_NAME_sk_type(sk))) +#define sk_X509_NAME_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_NAME_sk_type(sk),ossl_check_X509_NAME_freefunc_type(freefunc)) +#define sk_X509_NAME_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_NAME_sk_type(sk), ossl_check_X509_NAME_type(ptr), (idx)) +#define sk_X509_NAME_set(sk, idx, ptr) ((X509_NAME *)OPENSSL_sk_set(ossl_check_X509_NAME_sk_type(sk), (idx), ossl_check_X509_NAME_type(ptr))) +#define sk_X509_NAME_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_NAME_sk_type(sk), ossl_check_X509_NAME_type(ptr)) +#define sk_X509_NAME_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_NAME_sk_type(sk), ossl_check_X509_NAME_type(ptr)) +#define sk_X509_NAME_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_NAME_sk_type(sk), ossl_check_X509_NAME_type(ptr), pnum) +#define sk_X509_NAME_sort(sk) OPENSSL_sk_sort(ossl_check_X509_NAME_sk_type(sk)) +#define sk_X509_NAME_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_NAME_sk_type(sk)) +#define sk_X509_NAME_dup(sk) ((STACK_OF(X509_NAME) *)OPENSSL_sk_dup(ossl_check_const_X509_NAME_sk_type(sk))) +#define sk_X509_NAME_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_NAME) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_NAME_sk_type(sk), ossl_check_X509_NAME_copyfunc_type(copyfunc), ossl_check_X509_NAME_freefunc_type(freefunc))) +#define sk_X509_NAME_set_cmp_func(sk, cmp) ((sk_X509_NAME_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_NAME_sk_type(sk), ossl_check_X509_NAME_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(X509, X509, X509) +#define sk_X509_num(sk) OPENSSL_sk_num(ossl_check_const_X509_sk_type(sk)) +#define sk_X509_value(sk, idx) ((X509 *)OPENSSL_sk_value(ossl_check_const_X509_sk_type(sk), (idx))) +#define sk_X509_new(cmp) ((STACK_OF(X509) *)OPENSSL_sk_new(ossl_check_X509_compfunc_type(cmp))) +#define sk_X509_new_null() ((STACK_OF(X509) *)OPENSSL_sk_new_null()) +#define sk_X509_new_reserve(cmp, n) ((STACK_OF(X509) *)OPENSSL_sk_new_reserve(ossl_check_X509_compfunc_type(cmp), (n))) +#define sk_X509_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_sk_type(sk), (n)) +#define sk_X509_free(sk) OPENSSL_sk_free(ossl_check_X509_sk_type(sk)) +#define sk_X509_zero(sk) OPENSSL_sk_zero(ossl_check_X509_sk_type(sk)) +#define sk_X509_delete(sk, i) ((X509 *)OPENSSL_sk_delete(ossl_check_X509_sk_type(sk), (i))) +#define sk_X509_delete_ptr(sk, ptr) ((X509 *)OPENSSL_sk_delete_ptr(ossl_check_X509_sk_type(sk), ossl_check_X509_type(ptr))) +#define sk_X509_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_sk_type(sk), ossl_check_X509_type(ptr)) +#define sk_X509_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_sk_type(sk), ossl_check_X509_type(ptr)) +#define sk_X509_pop(sk) ((X509 *)OPENSSL_sk_pop(ossl_check_X509_sk_type(sk))) +#define sk_X509_shift(sk) ((X509 *)OPENSSL_sk_shift(ossl_check_X509_sk_type(sk))) +#define sk_X509_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_sk_type(sk),ossl_check_X509_freefunc_type(freefunc)) +#define sk_X509_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_sk_type(sk), ossl_check_X509_type(ptr), (idx)) +#define sk_X509_set(sk, idx, ptr) ((X509 *)OPENSSL_sk_set(ossl_check_X509_sk_type(sk), (idx), ossl_check_X509_type(ptr))) +#define sk_X509_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_sk_type(sk), ossl_check_X509_type(ptr)) +#define sk_X509_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_sk_type(sk), ossl_check_X509_type(ptr)) +#define sk_X509_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_sk_type(sk), ossl_check_X509_type(ptr), pnum) +#define sk_X509_sort(sk) OPENSSL_sk_sort(ossl_check_X509_sk_type(sk)) +#define sk_X509_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_sk_type(sk)) +#define sk_X509_dup(sk) ((STACK_OF(X509) *)OPENSSL_sk_dup(ossl_check_const_X509_sk_type(sk))) +#define sk_X509_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_sk_type(sk), ossl_check_X509_copyfunc_type(copyfunc), ossl_check_X509_freefunc_type(freefunc))) +#define sk_X509_set_cmp_func(sk, cmp) ((sk_X509_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_sk_type(sk), ossl_check_X509_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(X509_REVOKED, X509_REVOKED, X509_REVOKED) +#define sk_X509_REVOKED_num(sk) OPENSSL_sk_num(ossl_check_const_X509_REVOKED_sk_type(sk)) +#define sk_X509_REVOKED_value(sk, idx) ((X509_REVOKED *)OPENSSL_sk_value(ossl_check_const_X509_REVOKED_sk_type(sk), (idx))) +#define sk_X509_REVOKED_new(cmp) ((STACK_OF(X509_REVOKED) *)OPENSSL_sk_new(ossl_check_X509_REVOKED_compfunc_type(cmp))) +#define sk_X509_REVOKED_new_null() ((STACK_OF(X509_REVOKED) *)OPENSSL_sk_new_null()) +#define sk_X509_REVOKED_new_reserve(cmp, n) ((STACK_OF(X509_REVOKED) *)OPENSSL_sk_new_reserve(ossl_check_X509_REVOKED_compfunc_type(cmp), (n))) +#define sk_X509_REVOKED_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_REVOKED_sk_type(sk), (n)) +#define sk_X509_REVOKED_free(sk) OPENSSL_sk_free(ossl_check_X509_REVOKED_sk_type(sk)) +#define sk_X509_REVOKED_zero(sk) OPENSSL_sk_zero(ossl_check_X509_REVOKED_sk_type(sk)) +#define sk_X509_REVOKED_delete(sk, i) ((X509_REVOKED *)OPENSSL_sk_delete(ossl_check_X509_REVOKED_sk_type(sk), (i))) +#define sk_X509_REVOKED_delete_ptr(sk, ptr) ((X509_REVOKED *)OPENSSL_sk_delete_ptr(ossl_check_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_type(ptr))) +#define sk_X509_REVOKED_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_type(ptr)) +#define sk_X509_REVOKED_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_type(ptr)) +#define sk_X509_REVOKED_pop(sk) ((X509_REVOKED *)OPENSSL_sk_pop(ossl_check_X509_REVOKED_sk_type(sk))) +#define sk_X509_REVOKED_shift(sk) ((X509_REVOKED *)OPENSSL_sk_shift(ossl_check_X509_REVOKED_sk_type(sk))) +#define sk_X509_REVOKED_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_REVOKED_sk_type(sk),ossl_check_X509_REVOKED_freefunc_type(freefunc)) +#define sk_X509_REVOKED_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_type(ptr), (idx)) +#define sk_X509_REVOKED_set(sk, idx, ptr) ((X509_REVOKED *)OPENSSL_sk_set(ossl_check_X509_REVOKED_sk_type(sk), (idx), ossl_check_X509_REVOKED_type(ptr))) +#define sk_X509_REVOKED_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_type(ptr)) +#define sk_X509_REVOKED_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_type(ptr)) +#define sk_X509_REVOKED_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_type(ptr), pnum) +#define sk_X509_REVOKED_sort(sk) OPENSSL_sk_sort(ossl_check_X509_REVOKED_sk_type(sk)) +#define sk_X509_REVOKED_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_REVOKED_sk_type(sk)) +#define sk_X509_REVOKED_dup(sk) ((STACK_OF(X509_REVOKED) *)OPENSSL_sk_dup(ossl_check_const_X509_REVOKED_sk_type(sk))) +#define sk_X509_REVOKED_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_REVOKED) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_copyfunc_type(copyfunc), ossl_check_X509_REVOKED_freefunc_type(freefunc))) +#define sk_X509_REVOKED_set_cmp_func(sk, cmp) ((sk_X509_REVOKED_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_REVOKED_sk_type(sk), ossl_check_X509_REVOKED_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(X509_CRL, X509_CRL, X509_CRL) +#define sk_X509_CRL_num(sk) OPENSSL_sk_num(ossl_check_const_X509_CRL_sk_type(sk)) +#define sk_X509_CRL_value(sk, idx) ((X509_CRL *)OPENSSL_sk_value(ossl_check_const_X509_CRL_sk_type(sk), (idx))) +#define sk_X509_CRL_new(cmp) ((STACK_OF(X509_CRL) *)OPENSSL_sk_new(ossl_check_X509_CRL_compfunc_type(cmp))) +#define sk_X509_CRL_new_null() ((STACK_OF(X509_CRL) *)OPENSSL_sk_new_null()) +#define sk_X509_CRL_new_reserve(cmp, n) ((STACK_OF(X509_CRL) *)OPENSSL_sk_new_reserve(ossl_check_X509_CRL_compfunc_type(cmp), (n))) +#define sk_X509_CRL_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_CRL_sk_type(sk), (n)) +#define sk_X509_CRL_free(sk) OPENSSL_sk_free(ossl_check_X509_CRL_sk_type(sk)) +#define sk_X509_CRL_zero(sk) OPENSSL_sk_zero(ossl_check_X509_CRL_sk_type(sk)) +#define sk_X509_CRL_delete(sk, i) ((X509_CRL *)OPENSSL_sk_delete(ossl_check_X509_CRL_sk_type(sk), (i))) +#define sk_X509_CRL_delete_ptr(sk, ptr) ((X509_CRL *)OPENSSL_sk_delete_ptr(ossl_check_X509_CRL_sk_type(sk), ossl_check_X509_CRL_type(ptr))) +#define sk_X509_CRL_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_CRL_sk_type(sk), ossl_check_X509_CRL_type(ptr)) +#define sk_X509_CRL_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_CRL_sk_type(sk), ossl_check_X509_CRL_type(ptr)) +#define sk_X509_CRL_pop(sk) ((X509_CRL *)OPENSSL_sk_pop(ossl_check_X509_CRL_sk_type(sk))) +#define sk_X509_CRL_shift(sk) ((X509_CRL *)OPENSSL_sk_shift(ossl_check_X509_CRL_sk_type(sk))) +#define sk_X509_CRL_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_CRL_sk_type(sk),ossl_check_X509_CRL_freefunc_type(freefunc)) +#define sk_X509_CRL_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_CRL_sk_type(sk), ossl_check_X509_CRL_type(ptr), (idx)) +#define sk_X509_CRL_set(sk, idx, ptr) ((X509_CRL *)OPENSSL_sk_set(ossl_check_X509_CRL_sk_type(sk), (idx), ossl_check_X509_CRL_type(ptr))) +#define sk_X509_CRL_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_CRL_sk_type(sk), ossl_check_X509_CRL_type(ptr)) +#define sk_X509_CRL_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_CRL_sk_type(sk), ossl_check_X509_CRL_type(ptr)) +#define sk_X509_CRL_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_CRL_sk_type(sk), ossl_check_X509_CRL_type(ptr), pnum) +#define sk_X509_CRL_sort(sk) OPENSSL_sk_sort(ossl_check_X509_CRL_sk_type(sk)) +#define sk_X509_CRL_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_CRL_sk_type(sk)) +#define sk_X509_CRL_dup(sk) ((STACK_OF(X509_CRL) *)OPENSSL_sk_dup(ossl_check_const_X509_CRL_sk_type(sk))) +#define sk_X509_CRL_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_CRL) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_CRL_sk_type(sk), ossl_check_X509_CRL_copyfunc_type(copyfunc), ossl_check_X509_CRL_freefunc_type(freefunc))) +#define sk_X509_CRL_set_cmp_func(sk, cmp) ((sk_X509_CRL_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_CRL_sk_type(sk), ossl_check_X509_CRL_compfunc_type(cmp))) + + +/* Flags for X509_get_signature_info() */ +/* Signature info is valid */ +# define X509_SIG_INFO_VALID 0x1 +/* Signature is suitable for TLS use */ +# define X509_SIG_INFO_TLS 0x2 + +# define X509_FILETYPE_PEM 1 +# define X509_FILETYPE_ASN1 2 +# define X509_FILETYPE_DEFAULT 3 + +/*- + * : + * The KeyUsage BITSTRING is treated as a little-endian integer, hence bit `0` + * is 0x80, while bit `7` is 0x01 (the LSB of the integer value), bit `8` is + * then the MSB of the second octet, or 0x8000. + */ +# define X509v3_KU_DIGITAL_SIGNATURE 0x0080 /* (0) */ +# define X509v3_KU_NON_REPUDIATION 0x0040 /* (1) */ +# define X509v3_KU_KEY_ENCIPHERMENT 0x0020 /* (2) */ +# define X509v3_KU_DATA_ENCIPHERMENT 0x0010 /* (3) */ +# define X509v3_KU_KEY_AGREEMENT 0x0008 /* (4) */ +# define X509v3_KU_KEY_CERT_SIGN 0x0004 /* (5) */ +# define X509v3_KU_CRL_SIGN 0x0002 /* (6) */ +# define X509v3_KU_ENCIPHER_ONLY 0x0001 /* (7) */ +# define X509v3_KU_DECIPHER_ONLY 0x8000 /* (8) */ +# ifndef OPENSSL_NO_DEPRECATED_3_4 +# define X509v3_KU_UNDEF 0xffff /* vestigial, not used */ +# endif + +struct X509_algor_st { + ASN1_OBJECT *algorithm; + ASN1_TYPE *parameter; +} /* X509_ALGOR */ ; + +typedef STACK_OF(X509_ALGOR) X509_ALGORS; + +typedef struct X509_val_st { + ASN1_TIME *notBefore; + ASN1_TIME *notAfter; +} X509_VAL; + +typedef struct X509_sig_st X509_SIG; + +typedef struct X509_name_entry_st X509_NAME_ENTRY; + +SKM_DEFINE_STACK_OF_INTERNAL(X509_NAME_ENTRY, X509_NAME_ENTRY, X509_NAME_ENTRY) +#define sk_X509_NAME_ENTRY_num(sk) OPENSSL_sk_num(ossl_check_const_X509_NAME_ENTRY_sk_type(sk)) +#define sk_X509_NAME_ENTRY_value(sk, idx) ((X509_NAME_ENTRY *)OPENSSL_sk_value(ossl_check_const_X509_NAME_ENTRY_sk_type(sk), (idx))) +#define sk_X509_NAME_ENTRY_new(cmp) ((STACK_OF(X509_NAME_ENTRY) *)OPENSSL_sk_new(ossl_check_X509_NAME_ENTRY_compfunc_type(cmp))) +#define sk_X509_NAME_ENTRY_new_null() ((STACK_OF(X509_NAME_ENTRY) *)OPENSSL_sk_new_null()) +#define sk_X509_NAME_ENTRY_new_reserve(cmp, n) ((STACK_OF(X509_NAME_ENTRY) *)OPENSSL_sk_new_reserve(ossl_check_X509_NAME_ENTRY_compfunc_type(cmp), (n))) +#define sk_X509_NAME_ENTRY_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_NAME_ENTRY_sk_type(sk), (n)) +#define sk_X509_NAME_ENTRY_free(sk) OPENSSL_sk_free(ossl_check_X509_NAME_ENTRY_sk_type(sk)) +#define sk_X509_NAME_ENTRY_zero(sk) OPENSSL_sk_zero(ossl_check_X509_NAME_ENTRY_sk_type(sk)) +#define sk_X509_NAME_ENTRY_delete(sk, i) ((X509_NAME_ENTRY *)OPENSSL_sk_delete(ossl_check_X509_NAME_ENTRY_sk_type(sk), (i))) +#define sk_X509_NAME_ENTRY_delete_ptr(sk, ptr) ((X509_NAME_ENTRY *)OPENSSL_sk_delete_ptr(ossl_check_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_type(ptr))) +#define sk_X509_NAME_ENTRY_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_type(ptr)) +#define sk_X509_NAME_ENTRY_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_type(ptr)) +#define sk_X509_NAME_ENTRY_pop(sk) ((X509_NAME_ENTRY *)OPENSSL_sk_pop(ossl_check_X509_NAME_ENTRY_sk_type(sk))) +#define sk_X509_NAME_ENTRY_shift(sk) ((X509_NAME_ENTRY *)OPENSSL_sk_shift(ossl_check_X509_NAME_ENTRY_sk_type(sk))) +#define sk_X509_NAME_ENTRY_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_NAME_ENTRY_sk_type(sk),ossl_check_X509_NAME_ENTRY_freefunc_type(freefunc)) +#define sk_X509_NAME_ENTRY_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_type(ptr), (idx)) +#define sk_X509_NAME_ENTRY_set(sk, idx, ptr) ((X509_NAME_ENTRY *)OPENSSL_sk_set(ossl_check_X509_NAME_ENTRY_sk_type(sk), (idx), ossl_check_X509_NAME_ENTRY_type(ptr))) +#define sk_X509_NAME_ENTRY_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_type(ptr)) +#define sk_X509_NAME_ENTRY_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_type(ptr)) +#define sk_X509_NAME_ENTRY_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_type(ptr), pnum) +#define sk_X509_NAME_ENTRY_sort(sk) OPENSSL_sk_sort(ossl_check_X509_NAME_ENTRY_sk_type(sk)) +#define sk_X509_NAME_ENTRY_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_NAME_ENTRY_sk_type(sk)) +#define sk_X509_NAME_ENTRY_dup(sk) ((STACK_OF(X509_NAME_ENTRY) *)OPENSSL_sk_dup(ossl_check_const_X509_NAME_ENTRY_sk_type(sk))) +#define sk_X509_NAME_ENTRY_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_NAME_ENTRY) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_copyfunc_type(copyfunc), ossl_check_X509_NAME_ENTRY_freefunc_type(freefunc))) +#define sk_X509_NAME_ENTRY_set_cmp_func(sk, cmp) ((sk_X509_NAME_ENTRY_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_NAME_ENTRY_sk_type(sk), ossl_check_X509_NAME_ENTRY_compfunc_type(cmp))) + + +# define X509_EX_V_NETSCAPE_HACK 0x8000 +# define X509_EX_V_INIT 0x0001 +typedef struct X509_extension_st X509_EXTENSION; +SKM_DEFINE_STACK_OF_INTERNAL(X509_EXTENSION, X509_EXTENSION, X509_EXTENSION) +#define sk_X509_EXTENSION_num(sk) OPENSSL_sk_num(ossl_check_const_X509_EXTENSION_sk_type(sk)) +#define sk_X509_EXTENSION_value(sk, idx) ((X509_EXTENSION *)OPENSSL_sk_value(ossl_check_const_X509_EXTENSION_sk_type(sk), (idx))) +#define sk_X509_EXTENSION_new(cmp) ((STACK_OF(X509_EXTENSION) *)OPENSSL_sk_new(ossl_check_X509_EXTENSION_compfunc_type(cmp))) +#define sk_X509_EXTENSION_new_null() ((STACK_OF(X509_EXTENSION) *)OPENSSL_sk_new_null()) +#define sk_X509_EXTENSION_new_reserve(cmp, n) ((STACK_OF(X509_EXTENSION) *)OPENSSL_sk_new_reserve(ossl_check_X509_EXTENSION_compfunc_type(cmp), (n))) +#define sk_X509_EXTENSION_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_EXTENSION_sk_type(sk), (n)) +#define sk_X509_EXTENSION_free(sk) OPENSSL_sk_free(ossl_check_X509_EXTENSION_sk_type(sk)) +#define sk_X509_EXTENSION_zero(sk) OPENSSL_sk_zero(ossl_check_X509_EXTENSION_sk_type(sk)) +#define sk_X509_EXTENSION_delete(sk, i) ((X509_EXTENSION *)OPENSSL_sk_delete(ossl_check_X509_EXTENSION_sk_type(sk), (i))) +#define sk_X509_EXTENSION_delete_ptr(sk, ptr) ((X509_EXTENSION *)OPENSSL_sk_delete_ptr(ossl_check_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_type(ptr))) +#define sk_X509_EXTENSION_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_type(ptr)) +#define sk_X509_EXTENSION_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_type(ptr)) +#define sk_X509_EXTENSION_pop(sk) ((X509_EXTENSION *)OPENSSL_sk_pop(ossl_check_X509_EXTENSION_sk_type(sk))) +#define sk_X509_EXTENSION_shift(sk) ((X509_EXTENSION *)OPENSSL_sk_shift(ossl_check_X509_EXTENSION_sk_type(sk))) +#define sk_X509_EXTENSION_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_EXTENSION_sk_type(sk),ossl_check_X509_EXTENSION_freefunc_type(freefunc)) +#define sk_X509_EXTENSION_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_type(ptr), (idx)) +#define sk_X509_EXTENSION_set(sk, idx, ptr) ((X509_EXTENSION *)OPENSSL_sk_set(ossl_check_X509_EXTENSION_sk_type(sk), (idx), ossl_check_X509_EXTENSION_type(ptr))) +#define sk_X509_EXTENSION_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_type(ptr)) +#define sk_X509_EXTENSION_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_type(ptr)) +#define sk_X509_EXTENSION_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_type(ptr), pnum) +#define sk_X509_EXTENSION_sort(sk) OPENSSL_sk_sort(ossl_check_X509_EXTENSION_sk_type(sk)) +#define sk_X509_EXTENSION_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_EXTENSION_sk_type(sk)) +#define sk_X509_EXTENSION_dup(sk) ((STACK_OF(X509_EXTENSION) *)OPENSSL_sk_dup(ossl_check_const_X509_EXTENSION_sk_type(sk))) +#define sk_X509_EXTENSION_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_EXTENSION) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_copyfunc_type(copyfunc), ossl_check_X509_EXTENSION_freefunc_type(freefunc))) +#define sk_X509_EXTENSION_set_cmp_func(sk, cmp) ((sk_X509_EXTENSION_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_EXTENSION_sk_type(sk), ossl_check_X509_EXTENSION_compfunc_type(cmp))) + +typedef STACK_OF(X509_EXTENSION) X509_EXTENSIONS; +typedef struct x509_attributes_st X509_ATTRIBUTE; +SKM_DEFINE_STACK_OF_INTERNAL(X509_ATTRIBUTE, X509_ATTRIBUTE, X509_ATTRIBUTE) +#define sk_X509_ATTRIBUTE_num(sk) OPENSSL_sk_num(ossl_check_const_X509_ATTRIBUTE_sk_type(sk)) +#define sk_X509_ATTRIBUTE_value(sk, idx) ((X509_ATTRIBUTE *)OPENSSL_sk_value(ossl_check_const_X509_ATTRIBUTE_sk_type(sk), (idx))) +#define sk_X509_ATTRIBUTE_new(cmp) ((STACK_OF(X509_ATTRIBUTE) *)OPENSSL_sk_new(ossl_check_X509_ATTRIBUTE_compfunc_type(cmp))) +#define sk_X509_ATTRIBUTE_new_null() ((STACK_OF(X509_ATTRIBUTE) *)OPENSSL_sk_new_null()) +#define sk_X509_ATTRIBUTE_new_reserve(cmp, n) ((STACK_OF(X509_ATTRIBUTE) *)OPENSSL_sk_new_reserve(ossl_check_X509_ATTRIBUTE_compfunc_type(cmp), (n))) +#define sk_X509_ATTRIBUTE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_ATTRIBUTE_sk_type(sk), (n)) +#define sk_X509_ATTRIBUTE_free(sk) OPENSSL_sk_free(ossl_check_X509_ATTRIBUTE_sk_type(sk)) +#define sk_X509_ATTRIBUTE_zero(sk) OPENSSL_sk_zero(ossl_check_X509_ATTRIBUTE_sk_type(sk)) +#define sk_X509_ATTRIBUTE_delete(sk, i) ((X509_ATTRIBUTE *)OPENSSL_sk_delete(ossl_check_X509_ATTRIBUTE_sk_type(sk), (i))) +#define sk_X509_ATTRIBUTE_delete_ptr(sk, ptr) ((X509_ATTRIBUTE *)OPENSSL_sk_delete_ptr(ossl_check_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_type(ptr))) +#define sk_X509_ATTRIBUTE_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_type(ptr)) +#define sk_X509_ATTRIBUTE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_type(ptr)) +#define sk_X509_ATTRIBUTE_pop(sk) ((X509_ATTRIBUTE *)OPENSSL_sk_pop(ossl_check_X509_ATTRIBUTE_sk_type(sk))) +#define sk_X509_ATTRIBUTE_shift(sk) ((X509_ATTRIBUTE *)OPENSSL_sk_shift(ossl_check_X509_ATTRIBUTE_sk_type(sk))) +#define sk_X509_ATTRIBUTE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_ATTRIBUTE_sk_type(sk),ossl_check_X509_ATTRIBUTE_freefunc_type(freefunc)) +#define sk_X509_ATTRIBUTE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_type(ptr), (idx)) +#define sk_X509_ATTRIBUTE_set(sk, idx, ptr) ((X509_ATTRIBUTE *)OPENSSL_sk_set(ossl_check_X509_ATTRIBUTE_sk_type(sk), (idx), ossl_check_X509_ATTRIBUTE_type(ptr))) +#define sk_X509_ATTRIBUTE_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_type(ptr)) +#define sk_X509_ATTRIBUTE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_type(ptr)) +#define sk_X509_ATTRIBUTE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_type(ptr), pnum) +#define sk_X509_ATTRIBUTE_sort(sk) OPENSSL_sk_sort(ossl_check_X509_ATTRIBUTE_sk_type(sk)) +#define sk_X509_ATTRIBUTE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_ATTRIBUTE_sk_type(sk)) +#define sk_X509_ATTRIBUTE_dup(sk) ((STACK_OF(X509_ATTRIBUTE) *)OPENSSL_sk_dup(ossl_check_const_X509_ATTRIBUTE_sk_type(sk))) +#define sk_X509_ATTRIBUTE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_ATTRIBUTE) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_copyfunc_type(copyfunc), ossl_check_X509_ATTRIBUTE_freefunc_type(freefunc))) +#define sk_X509_ATTRIBUTE_set_cmp_func(sk, cmp) ((sk_X509_ATTRIBUTE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_ATTRIBUTE_sk_type(sk), ossl_check_X509_ATTRIBUTE_compfunc_type(cmp))) + +typedef struct X509_req_info_st X509_REQ_INFO; +typedef struct X509_req_st X509_REQ; +typedef struct x509_cert_aux_st X509_CERT_AUX; +typedef struct x509_cinf_st X509_CINF; + +/* Flags for X509_print_ex() */ + +# define X509_FLAG_COMPAT 0 +# define X509_FLAG_NO_HEADER 1L +# define X509_FLAG_NO_VERSION (1L << 1) +# define X509_FLAG_NO_SERIAL (1L << 2) +# define X509_FLAG_NO_SIGNAME (1L << 3) +# define X509_FLAG_NO_ISSUER (1L << 4) +# define X509_FLAG_NO_VALIDITY (1L << 5) +# define X509_FLAG_NO_SUBJECT (1L << 6) +# define X509_FLAG_NO_PUBKEY (1L << 7) +# define X509_FLAG_NO_EXTENSIONS (1L << 8) +# define X509_FLAG_NO_SIGDUMP (1L << 9) +# define X509_FLAG_NO_AUX (1L << 10) +# define X509_FLAG_NO_ATTRIBUTES (1L << 11) +# define X509_FLAG_NO_IDS (1L << 12) +# define X509_FLAG_EXTENSIONS_ONLY_KID (1L << 13) + +/* Flags specific to X509_NAME_print_ex() */ + +/* The field separator information */ + +# define XN_FLAG_SEP_MASK (0xf << 16) + +# define XN_FLAG_COMPAT 0/* Traditional; use old X509_NAME_print */ +# define XN_FLAG_SEP_COMMA_PLUS (1 << 16)/* RFC2253 ,+ */ +# define XN_FLAG_SEP_CPLUS_SPC (2 << 16)/* ,+ spaced: more readable */ +# define XN_FLAG_SEP_SPLUS_SPC (3 << 16)/* ;+ spaced */ +# define XN_FLAG_SEP_MULTILINE (4 << 16)/* One line per field */ + +# define XN_FLAG_DN_REV (1 << 20)/* Reverse DN order */ + +/* How the field name is shown */ + +# define XN_FLAG_FN_MASK (0x3 << 21) + +# define XN_FLAG_FN_SN 0/* Object short name */ +# define XN_FLAG_FN_LN (1 << 21)/* Object long name */ +# define XN_FLAG_FN_OID (2 << 21)/* Always use OIDs */ +# define XN_FLAG_FN_NONE (3 << 21)/* No field names */ + +# define XN_FLAG_SPC_EQ (1 << 23)/* Put spaces round '=' */ + +/* + * This determines if we dump fields we don't recognise: RFC2253 requires + * this. + */ + +# define XN_FLAG_DUMP_UNKNOWN_FIELDS (1 << 24) + +# define XN_FLAG_FN_ALIGN (1 << 25)/* Align field names to 20 + * characters */ + +/* Complete set of RFC2253 flags */ + +# define XN_FLAG_RFC2253 (ASN1_STRFLGS_RFC2253 | \ + XN_FLAG_SEP_COMMA_PLUS | \ + XN_FLAG_DN_REV | \ + XN_FLAG_FN_SN | \ + XN_FLAG_DUMP_UNKNOWN_FIELDS) + +/* readable oneline form */ + +# define XN_FLAG_ONELINE (ASN1_STRFLGS_RFC2253 | \ + ASN1_STRFLGS_ESC_QUOTE | \ + XN_FLAG_SEP_CPLUS_SPC | \ + XN_FLAG_SPC_EQ | \ + XN_FLAG_FN_SN) + +/* readable multiline form */ + +# define XN_FLAG_MULTILINE (ASN1_STRFLGS_ESC_CTRL | \ + ASN1_STRFLGS_ESC_MSB | \ + XN_FLAG_SEP_MULTILINE | \ + XN_FLAG_SPC_EQ | \ + XN_FLAG_FN_LN | \ + XN_FLAG_FN_ALIGN) + +typedef struct X509_crl_info_st X509_CRL_INFO; + +typedef struct private_key_st { + int version; + /* The PKCS#8 data types */ + X509_ALGOR *enc_algor; + ASN1_OCTET_STRING *enc_pkey; /* encrypted pub key */ + /* When decrypted, the following will not be NULL */ + EVP_PKEY *dec_pkey; + /* used to encrypt and decrypt */ + int key_length; + char *key_data; + int key_free; /* true if we should auto free key_data */ + /* expanded version of 'enc_algor' */ + EVP_CIPHER_INFO cipher; +} X509_PKEY; + +typedef struct X509_info_st { + X509 *x509; + X509_CRL *crl; + X509_PKEY *x_pkey; + EVP_CIPHER_INFO enc_cipher; + int enc_len; + char *enc_data; +} X509_INFO; +SKM_DEFINE_STACK_OF_INTERNAL(X509_INFO, X509_INFO, X509_INFO) +#define sk_X509_INFO_num(sk) OPENSSL_sk_num(ossl_check_const_X509_INFO_sk_type(sk)) +#define sk_X509_INFO_value(sk, idx) ((X509_INFO *)OPENSSL_sk_value(ossl_check_const_X509_INFO_sk_type(sk), (idx))) +#define sk_X509_INFO_new(cmp) ((STACK_OF(X509_INFO) *)OPENSSL_sk_new(ossl_check_X509_INFO_compfunc_type(cmp))) +#define sk_X509_INFO_new_null() ((STACK_OF(X509_INFO) *)OPENSSL_sk_new_null()) +#define sk_X509_INFO_new_reserve(cmp, n) ((STACK_OF(X509_INFO) *)OPENSSL_sk_new_reserve(ossl_check_X509_INFO_compfunc_type(cmp), (n))) +#define sk_X509_INFO_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_INFO_sk_type(sk), (n)) +#define sk_X509_INFO_free(sk) OPENSSL_sk_free(ossl_check_X509_INFO_sk_type(sk)) +#define sk_X509_INFO_zero(sk) OPENSSL_sk_zero(ossl_check_X509_INFO_sk_type(sk)) +#define sk_X509_INFO_delete(sk, i) ((X509_INFO *)OPENSSL_sk_delete(ossl_check_X509_INFO_sk_type(sk), (i))) +#define sk_X509_INFO_delete_ptr(sk, ptr) ((X509_INFO *)OPENSSL_sk_delete_ptr(ossl_check_X509_INFO_sk_type(sk), ossl_check_X509_INFO_type(ptr))) +#define sk_X509_INFO_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_INFO_sk_type(sk), ossl_check_X509_INFO_type(ptr)) +#define sk_X509_INFO_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_INFO_sk_type(sk), ossl_check_X509_INFO_type(ptr)) +#define sk_X509_INFO_pop(sk) ((X509_INFO *)OPENSSL_sk_pop(ossl_check_X509_INFO_sk_type(sk))) +#define sk_X509_INFO_shift(sk) ((X509_INFO *)OPENSSL_sk_shift(ossl_check_X509_INFO_sk_type(sk))) +#define sk_X509_INFO_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_INFO_sk_type(sk),ossl_check_X509_INFO_freefunc_type(freefunc)) +#define sk_X509_INFO_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_INFO_sk_type(sk), ossl_check_X509_INFO_type(ptr), (idx)) +#define sk_X509_INFO_set(sk, idx, ptr) ((X509_INFO *)OPENSSL_sk_set(ossl_check_X509_INFO_sk_type(sk), (idx), ossl_check_X509_INFO_type(ptr))) +#define sk_X509_INFO_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_INFO_sk_type(sk), ossl_check_X509_INFO_type(ptr)) +#define sk_X509_INFO_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_INFO_sk_type(sk), ossl_check_X509_INFO_type(ptr)) +#define sk_X509_INFO_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_INFO_sk_type(sk), ossl_check_X509_INFO_type(ptr), pnum) +#define sk_X509_INFO_sort(sk) OPENSSL_sk_sort(ossl_check_X509_INFO_sk_type(sk)) +#define sk_X509_INFO_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_INFO_sk_type(sk)) +#define sk_X509_INFO_dup(sk) ((STACK_OF(X509_INFO) *)OPENSSL_sk_dup(ossl_check_const_X509_INFO_sk_type(sk))) +#define sk_X509_INFO_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_INFO) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_INFO_sk_type(sk), ossl_check_X509_INFO_copyfunc_type(copyfunc), ossl_check_X509_INFO_freefunc_type(freefunc))) +#define sk_X509_INFO_set_cmp_func(sk, cmp) ((sk_X509_INFO_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_INFO_sk_type(sk), ossl_check_X509_INFO_compfunc_type(cmp))) + + +/* + * The next 2 structures and their 8 routines are used to manipulate Netscape's + * spki structures - useful if you are writing a CA web page + */ +typedef struct Netscape_spkac_st { + X509_PUBKEY *pubkey; + ASN1_IA5STRING *challenge; /* challenge sent in atlas >= PR2 */ +} NETSCAPE_SPKAC; + +typedef struct Netscape_spki_st { + NETSCAPE_SPKAC *spkac; /* signed public key and challenge */ + X509_ALGOR sig_algor; + ASN1_BIT_STRING *signature; +} NETSCAPE_SPKI; + +/* Netscape certificate sequence structure */ +typedef struct Netscape_certificate_sequence { + ASN1_OBJECT *type; + STACK_OF(X509) *certs; +} NETSCAPE_CERT_SEQUENCE; + +/*- Unused (and iv length is wrong) +typedef struct CBCParameter_st + { + unsigned char iv[8]; + } CBC_PARAM; +*/ + +/* Password based encryption structure */ + +typedef struct PBEPARAM_st { + ASN1_OCTET_STRING *salt; + ASN1_INTEGER *iter; +} PBEPARAM; + +/* Password based encryption V2 structures */ + +typedef struct PBE2PARAM_st { + X509_ALGOR *keyfunc; + X509_ALGOR *encryption; +} PBE2PARAM; + +typedef struct PBKDF2PARAM_st { +/* Usually OCTET STRING but could be anything */ + ASN1_TYPE *salt; + ASN1_INTEGER *iter; + ASN1_INTEGER *keylength; + X509_ALGOR *prf; +} PBKDF2PARAM; + +typedef struct { + X509_ALGOR *keyDerivationFunc; + X509_ALGOR *messageAuthScheme; +} PBMAC1PARAM; + +# ifndef OPENSSL_NO_SCRYPT +typedef struct SCRYPT_PARAMS_st { + ASN1_OCTET_STRING *salt; + ASN1_INTEGER *costParameter; + ASN1_INTEGER *blockSize; + ASN1_INTEGER *parallelizationParameter; + ASN1_INTEGER *keyLength; +} SCRYPT_PARAMS; +# endif + +#ifdef __cplusplus +} +#endif + +# include +# include + +#ifdef __cplusplus +extern "C" { +#endif + +# define X509_EXT_PACK_UNKNOWN 1 +# define X509_EXT_PACK_STRING 2 + +# define X509_extract_key(x) X509_get_pubkey(x)/*****/ +# define X509_REQ_extract_key(a) X509_REQ_get_pubkey(a) +# define X509_name_cmp(a,b) X509_NAME_cmp((a),(b)) + +void X509_CRL_set_default_method(const X509_CRL_METHOD *meth); +X509_CRL_METHOD *X509_CRL_METHOD_new(int (*crl_init) (X509_CRL *crl), + int (*crl_free) (X509_CRL *crl), + int (*crl_lookup) (X509_CRL *crl, + X509_REVOKED **ret, + const + ASN1_INTEGER *serial, + const + X509_NAME *issuer), + int (*crl_verify) (X509_CRL *crl, + EVP_PKEY *pk)); +void X509_CRL_METHOD_free(X509_CRL_METHOD *m); + +void X509_CRL_set_meth_data(X509_CRL *crl, void *dat); +void *X509_CRL_get_meth_data(X509_CRL *crl); + +const char *X509_verify_cert_error_string(long n); + +int X509_verify(X509 *a, EVP_PKEY *r); +int X509_self_signed(X509 *cert, int verify_signature); + +int X509_REQ_verify_ex(X509_REQ *a, EVP_PKEY *r, OSSL_LIB_CTX *libctx, + const char *propq); +int X509_REQ_verify(X509_REQ *a, EVP_PKEY *r); +int X509_CRL_verify(X509_CRL *a, EVP_PKEY *r); +int NETSCAPE_SPKI_verify(NETSCAPE_SPKI *a, EVP_PKEY *r); + +NETSCAPE_SPKI *NETSCAPE_SPKI_b64_decode(const char *str, int len); +char *NETSCAPE_SPKI_b64_encode(NETSCAPE_SPKI *x); +EVP_PKEY *NETSCAPE_SPKI_get_pubkey(NETSCAPE_SPKI *x); +int NETSCAPE_SPKI_set_pubkey(NETSCAPE_SPKI *x, EVP_PKEY *pkey); + +int NETSCAPE_SPKI_print(BIO *out, NETSCAPE_SPKI *spki); + +int X509_signature_dump(BIO *bp, const ASN1_STRING *sig, int indent); +int X509_signature_print(BIO *bp, const X509_ALGOR *alg, + const ASN1_STRING *sig); + +int X509_sign(X509 *x, EVP_PKEY *pkey, const EVP_MD *md); +int X509_sign_ctx(X509 *x, EVP_MD_CTX *ctx); +int X509_REQ_sign(X509_REQ *x, EVP_PKEY *pkey, const EVP_MD *md); +int X509_REQ_sign_ctx(X509_REQ *x, EVP_MD_CTX *ctx); +int X509_CRL_sign(X509_CRL *x, EVP_PKEY *pkey, const EVP_MD *md); +int X509_CRL_sign_ctx(X509_CRL *x, EVP_MD_CTX *ctx); +int NETSCAPE_SPKI_sign(NETSCAPE_SPKI *x, EVP_PKEY *pkey, const EVP_MD *md); + +int X509_pubkey_digest(const X509 *data, const EVP_MD *type, + unsigned char *md, unsigned int *len); +int X509_digest(const X509 *data, const EVP_MD *type, + unsigned char *md, unsigned int *len); +ASN1_OCTET_STRING *X509_digest_sig(const X509 *cert, + EVP_MD **md_used, int *md_is_fallback); +int X509_CRL_digest(const X509_CRL *data, const EVP_MD *type, + unsigned char *md, unsigned int *len); +int X509_REQ_digest(const X509_REQ *data, const EVP_MD *type, + unsigned char *md, unsigned int *len); +int X509_NAME_digest(const X509_NAME *data, const EVP_MD *type, + unsigned char *md, unsigned int *len); + +X509 *X509_load_http(const char *url, BIO *bio, BIO *rbio, int timeout); +X509_CRL *X509_CRL_load_http(const char *url, BIO *bio, BIO *rbio, int timeout); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# include /* OSSL_HTTP_REQ_CTX_nbio_d2i */ +# define X509_http_nbio(rctx, pcert) \ + OSSL_HTTP_REQ_CTX_nbio_d2i(rctx, pcert, ASN1_ITEM_rptr(X509)) +# define X509_CRL_http_nbio(rctx, pcrl) \ + OSSL_HTTP_REQ_CTX_nbio_d2i(rctx, pcrl, ASN1_ITEM_rptr(X509_CRL)) +# endif + +# ifndef OPENSSL_NO_STDIO +X509 *d2i_X509_fp(FILE *fp, X509 **x509); +int i2d_X509_fp(FILE *fp, const X509 *x509); +X509_CRL *d2i_X509_CRL_fp(FILE *fp, X509_CRL **crl); +int i2d_X509_CRL_fp(FILE *fp, const X509_CRL *crl); +X509_REQ *d2i_X509_REQ_fp(FILE *fp, X509_REQ **req); +int i2d_X509_REQ_fp(FILE *fp, const X509_REQ *req); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 RSA *d2i_RSAPrivateKey_fp(FILE *fp, RSA **rsa); +OSSL_DEPRECATEDIN_3_0 int i2d_RSAPrivateKey_fp(FILE *fp, const RSA *rsa); +OSSL_DEPRECATEDIN_3_0 RSA *d2i_RSAPublicKey_fp(FILE *fp, RSA **rsa); +OSSL_DEPRECATEDIN_3_0 int i2d_RSAPublicKey_fp(FILE *fp, const RSA *rsa); +OSSL_DEPRECATEDIN_3_0 RSA *d2i_RSA_PUBKEY_fp(FILE *fp, RSA **rsa); +OSSL_DEPRECATEDIN_3_0 int i2d_RSA_PUBKEY_fp(FILE *fp, const RSA *rsa); +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# ifndef OPENSSL_NO_DSA +OSSL_DEPRECATEDIN_3_0 DSA *d2i_DSA_PUBKEY_fp(FILE *fp, DSA **dsa); +OSSL_DEPRECATEDIN_3_0 int i2d_DSA_PUBKEY_fp(FILE *fp, const DSA *dsa); +OSSL_DEPRECATEDIN_3_0 DSA *d2i_DSAPrivateKey_fp(FILE *fp, DSA **dsa); +OSSL_DEPRECATEDIN_3_0 int i2d_DSAPrivateKey_fp(FILE *fp, const DSA *dsa); +# endif +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# ifndef OPENSSL_NO_EC +OSSL_DEPRECATEDIN_3_0 EC_KEY *d2i_EC_PUBKEY_fp(FILE *fp, EC_KEY **eckey); +OSSL_DEPRECATEDIN_3_0 int i2d_EC_PUBKEY_fp(FILE *fp, const EC_KEY *eckey); +OSSL_DEPRECATEDIN_3_0 EC_KEY *d2i_ECPrivateKey_fp(FILE *fp, EC_KEY **eckey); +OSSL_DEPRECATEDIN_3_0 int i2d_ECPrivateKey_fp(FILE *fp, const EC_KEY *eckey); +# endif /* OPENSSL_NO_EC */ +# endif /* OPENSSL_NO_DEPRECATED_3_0 */ +X509_SIG *d2i_PKCS8_fp(FILE *fp, X509_SIG **p8); +int i2d_PKCS8_fp(FILE *fp, const X509_SIG *p8); +X509_PUBKEY *d2i_X509_PUBKEY_fp(FILE *fp, X509_PUBKEY **xpk); +int i2d_X509_PUBKEY_fp(FILE *fp, const X509_PUBKEY *xpk); +PKCS8_PRIV_KEY_INFO *d2i_PKCS8_PRIV_KEY_INFO_fp(FILE *fp, + PKCS8_PRIV_KEY_INFO **p8inf); +int i2d_PKCS8_PRIV_KEY_INFO_fp(FILE *fp, const PKCS8_PRIV_KEY_INFO *p8inf); +int i2d_PKCS8PrivateKeyInfo_fp(FILE *fp, const EVP_PKEY *key); +int i2d_PrivateKey_fp(FILE *fp, const EVP_PKEY *pkey); +EVP_PKEY *d2i_PrivateKey_ex_fp(FILE *fp, EVP_PKEY **a, OSSL_LIB_CTX *libctx, + const char *propq); +EVP_PKEY *d2i_PrivateKey_fp(FILE *fp, EVP_PKEY **a); +int i2d_PUBKEY_fp(FILE *fp, const EVP_PKEY *pkey); +EVP_PKEY *d2i_PUBKEY_ex_fp(FILE *fp, EVP_PKEY **a, OSSL_LIB_CTX *libctx, + const char *propq); +EVP_PKEY *d2i_PUBKEY_fp(FILE *fp, EVP_PKEY **a); +# endif + +X509 *d2i_X509_bio(BIO *bp, X509 **x509); +int i2d_X509_bio(BIO *bp, const X509 *x509); +X509_CRL *d2i_X509_CRL_bio(BIO *bp, X509_CRL **crl); +int i2d_X509_CRL_bio(BIO *bp, const X509_CRL *crl); +X509_REQ *d2i_X509_REQ_bio(BIO *bp, X509_REQ **req); +int i2d_X509_REQ_bio(BIO *bp, const X509_REQ *req); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 RSA *d2i_RSAPrivateKey_bio(BIO *bp, RSA **rsa); +OSSL_DEPRECATEDIN_3_0 int i2d_RSAPrivateKey_bio(BIO *bp, const RSA *rsa); +OSSL_DEPRECATEDIN_3_0 RSA *d2i_RSAPublicKey_bio(BIO *bp, RSA **rsa); +OSSL_DEPRECATEDIN_3_0 int i2d_RSAPublicKey_bio(BIO *bp, const RSA *rsa); +OSSL_DEPRECATEDIN_3_0 RSA *d2i_RSA_PUBKEY_bio(BIO *bp, RSA **rsa); +OSSL_DEPRECATEDIN_3_0 int i2d_RSA_PUBKEY_bio(BIO *bp, const RSA *rsa); +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# ifndef OPENSSL_NO_DSA +OSSL_DEPRECATEDIN_3_0 DSA *d2i_DSA_PUBKEY_bio(BIO *bp, DSA **dsa); +OSSL_DEPRECATEDIN_3_0 int i2d_DSA_PUBKEY_bio(BIO *bp, const DSA *dsa); +OSSL_DEPRECATEDIN_3_0 DSA *d2i_DSAPrivateKey_bio(BIO *bp, DSA **dsa); +OSSL_DEPRECATEDIN_3_0 int i2d_DSAPrivateKey_bio(BIO *bp, const DSA *dsa); +# endif +# endif + +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# ifndef OPENSSL_NO_EC +OSSL_DEPRECATEDIN_3_0 EC_KEY *d2i_EC_PUBKEY_bio(BIO *bp, EC_KEY **eckey); +OSSL_DEPRECATEDIN_3_0 int i2d_EC_PUBKEY_bio(BIO *bp, const EC_KEY *eckey); +OSSL_DEPRECATEDIN_3_0 EC_KEY *d2i_ECPrivateKey_bio(BIO *bp, EC_KEY **eckey); +OSSL_DEPRECATEDIN_3_0 int i2d_ECPrivateKey_bio(BIO *bp, const EC_KEY *eckey); +# endif /* OPENSSL_NO_EC */ +# endif /* OPENSSL_NO_DEPRECATED_3_0 */ + +X509_SIG *d2i_PKCS8_bio(BIO *bp, X509_SIG **p8); +int i2d_PKCS8_bio(BIO *bp, const X509_SIG *p8); +X509_PUBKEY *d2i_X509_PUBKEY_bio(BIO *bp, X509_PUBKEY **xpk); +int i2d_X509_PUBKEY_bio(BIO *bp, const X509_PUBKEY *xpk); +PKCS8_PRIV_KEY_INFO *d2i_PKCS8_PRIV_KEY_INFO_bio(BIO *bp, + PKCS8_PRIV_KEY_INFO **p8inf); +int i2d_PKCS8_PRIV_KEY_INFO_bio(BIO *bp, const PKCS8_PRIV_KEY_INFO *p8inf); +int i2d_PKCS8PrivateKeyInfo_bio(BIO *bp, const EVP_PKEY *key); +int i2d_PrivateKey_bio(BIO *bp, const EVP_PKEY *pkey); +EVP_PKEY *d2i_PrivateKey_ex_bio(BIO *bp, EVP_PKEY **a, OSSL_LIB_CTX *libctx, + const char *propq); +EVP_PKEY *d2i_PrivateKey_bio(BIO *bp, EVP_PKEY **a); +int i2d_PUBKEY_bio(BIO *bp, const EVP_PKEY *pkey); +EVP_PKEY *d2i_PUBKEY_ex_bio(BIO *bp, EVP_PKEY **a, OSSL_LIB_CTX *libctx, + const char *propq); +EVP_PKEY *d2i_PUBKEY_bio(BIO *bp, EVP_PKEY **a); + +DECLARE_ASN1_DUP_FUNCTION(X509) +DECLARE_ASN1_DUP_FUNCTION(X509_ALGOR) +DECLARE_ASN1_DUP_FUNCTION(X509_ATTRIBUTE) +DECLARE_ASN1_DUP_FUNCTION(X509_CRL) +DECLARE_ASN1_DUP_FUNCTION(X509_EXTENSION) +DECLARE_ASN1_DUP_FUNCTION(X509_PUBKEY) +DECLARE_ASN1_DUP_FUNCTION(X509_REQ) +DECLARE_ASN1_DUP_FUNCTION(X509_REVOKED) +int X509_ALGOR_set0(X509_ALGOR *alg, ASN1_OBJECT *aobj, int ptype, + void *pval); +void X509_ALGOR_get0(const ASN1_OBJECT **paobj, int *pptype, + const void **ppval, const X509_ALGOR *algor); +void X509_ALGOR_set_md(X509_ALGOR *alg, const EVP_MD *md); +int X509_ALGOR_cmp(const X509_ALGOR *a, const X509_ALGOR *b); +int X509_ALGOR_copy(X509_ALGOR *dest, const X509_ALGOR *src); + +DECLARE_ASN1_DUP_FUNCTION(X509_NAME) +DECLARE_ASN1_DUP_FUNCTION(X509_NAME_ENTRY) + +int X509_cmp_time(const ASN1_TIME *s, time_t *t); +int X509_cmp_current_time(const ASN1_TIME *s); +int X509_cmp_timeframe(const X509_VERIFY_PARAM *vpm, + const ASN1_TIME *start, const ASN1_TIME *end); +ASN1_TIME *X509_time_adj(ASN1_TIME *s, long adj, time_t *t); +ASN1_TIME *X509_time_adj_ex(ASN1_TIME *s, + int offset_day, long offset_sec, time_t *t); +ASN1_TIME *X509_gmtime_adj(ASN1_TIME *s, long adj); + +const char *X509_get_default_cert_area(void); +const char *X509_get_default_cert_dir(void); +const char *X509_get_default_cert_file(void); +const char *X509_get_default_cert_dir_env(void); +const char *X509_get_default_cert_file_env(void); +const char *X509_get_default_private_dir(void); + +X509_REQ *X509_to_X509_REQ(X509 *x, EVP_PKEY *pkey, const EVP_MD *md); +X509 *X509_REQ_to_X509(X509_REQ *r, int days, EVP_PKEY *pkey); + +DECLARE_ASN1_FUNCTIONS(X509_ALGOR) +DECLARE_ASN1_ENCODE_FUNCTIONS(X509_ALGORS, X509_ALGORS, X509_ALGORS) +DECLARE_ASN1_FUNCTIONS(X509_VAL) + +DECLARE_ASN1_FUNCTIONS(X509_PUBKEY) + +X509_PUBKEY *X509_PUBKEY_new_ex(OSSL_LIB_CTX *libctx, const char *propq); +int X509_PUBKEY_set(X509_PUBKEY **x, EVP_PKEY *pkey); +EVP_PKEY *X509_PUBKEY_get0(const X509_PUBKEY *key); +EVP_PKEY *X509_PUBKEY_get(const X509_PUBKEY *key); +int X509_get_pubkey_parameters(EVP_PKEY *pkey, STACK_OF(X509) *chain); +long X509_get_pathlen(X509 *x); +DECLARE_ASN1_ENCODE_FUNCTIONS_only(EVP_PKEY, PUBKEY) +EVP_PKEY *d2i_PUBKEY_ex(EVP_PKEY **a, const unsigned char **pp, long length, + OSSL_LIB_CTX *libctx, const char *propq); +# ifndef OPENSSL_NO_DEPRECATED_3_0 +DECLARE_ASN1_ENCODE_FUNCTIONS_only_attr(OSSL_DEPRECATEDIN_3_0,RSA, RSA_PUBKEY) +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# ifndef OPENSSL_NO_DSA +DECLARE_ASN1_ENCODE_FUNCTIONS_only_attr(OSSL_DEPRECATEDIN_3_0,DSA, DSA_PUBKEY) +# endif +# endif +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# ifndef OPENSSL_NO_EC +DECLARE_ASN1_ENCODE_FUNCTIONS_only_attr(OSSL_DEPRECATEDIN_3_0, EC_KEY, EC_PUBKEY) +# endif +# endif + +DECLARE_ASN1_FUNCTIONS(X509_SIG) +void X509_SIG_get0(const X509_SIG *sig, const X509_ALGOR **palg, + const ASN1_OCTET_STRING **pdigest); +void X509_SIG_getm(X509_SIG *sig, X509_ALGOR **palg, + ASN1_OCTET_STRING **pdigest); + +DECLARE_ASN1_FUNCTIONS(X509_REQ_INFO) +DECLARE_ASN1_FUNCTIONS(X509_REQ) +X509_REQ *X509_REQ_new_ex(OSSL_LIB_CTX *libctx, const char *propq); + +DECLARE_ASN1_FUNCTIONS(X509_ATTRIBUTE) +X509_ATTRIBUTE *X509_ATTRIBUTE_create(int nid, int atrtype, void *value); + +DECLARE_ASN1_FUNCTIONS(X509_EXTENSION) +DECLARE_ASN1_ENCODE_FUNCTIONS(X509_EXTENSIONS, X509_EXTENSIONS, X509_EXTENSIONS) + +DECLARE_ASN1_FUNCTIONS(X509_NAME_ENTRY) + +DECLARE_ASN1_FUNCTIONS(X509_NAME) + +int X509_NAME_set(X509_NAME **xn, const X509_NAME *name); + +DECLARE_ASN1_FUNCTIONS(X509_CINF) +DECLARE_ASN1_FUNCTIONS(X509) +X509 *X509_new_ex(OSSL_LIB_CTX *libctx, const char *propq); +DECLARE_ASN1_FUNCTIONS(X509_CERT_AUX) + +#define X509_get_ex_new_index(l, p, newf, dupf, freef) \ + CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_X509, l, p, newf, dupf, freef) +int X509_set_ex_data(X509 *r, int idx, void *arg); +void *X509_get_ex_data(const X509 *r, int idx); +DECLARE_ASN1_ENCODE_FUNCTIONS_only(X509,X509_AUX) + +int i2d_re_X509_tbs(X509 *x, unsigned char **pp); + +int X509_SIG_INFO_get(const X509_SIG_INFO *siginf, int *mdnid, int *pknid, + int *secbits, uint32_t *flags); +void X509_SIG_INFO_set(X509_SIG_INFO *siginf, int mdnid, int pknid, + int secbits, uint32_t flags); + +int X509_get_signature_info(X509 *x, int *mdnid, int *pknid, int *secbits, + uint32_t *flags); + +void X509_get0_signature(const ASN1_BIT_STRING **psig, + const X509_ALGOR **palg, const X509 *x); +int X509_get_signature_nid(const X509 *x); + +void X509_set0_distinguishing_id(X509 *x, ASN1_OCTET_STRING *d_id); +ASN1_OCTET_STRING *X509_get0_distinguishing_id(X509 *x); +void X509_REQ_set0_distinguishing_id(X509_REQ *x, ASN1_OCTET_STRING *d_id); +ASN1_OCTET_STRING *X509_REQ_get0_distinguishing_id(X509_REQ *x); + +int X509_alias_set1(X509 *x, const unsigned char *name, int len); +int X509_keyid_set1(X509 *x, const unsigned char *id, int len); +unsigned char *X509_alias_get0(X509 *x, int *len); +unsigned char *X509_keyid_get0(X509 *x, int *len); + +DECLARE_ASN1_FUNCTIONS(X509_REVOKED) +DECLARE_ASN1_FUNCTIONS(X509_CRL_INFO) +DECLARE_ASN1_FUNCTIONS(X509_CRL) +X509_CRL *X509_CRL_new_ex(OSSL_LIB_CTX *libctx, const char *propq); + +int X509_CRL_add0_revoked(X509_CRL *crl, X509_REVOKED *rev); +int X509_CRL_get0_by_serial(X509_CRL *crl, + X509_REVOKED **ret, const ASN1_INTEGER *serial); +int X509_CRL_get0_by_cert(X509_CRL *crl, X509_REVOKED **ret, X509 *x); + +X509_PKEY *X509_PKEY_new(void); +void X509_PKEY_free(X509_PKEY *a); + +DECLARE_ASN1_FUNCTIONS(NETSCAPE_SPKI) +DECLARE_ASN1_FUNCTIONS(NETSCAPE_SPKAC) +DECLARE_ASN1_FUNCTIONS(NETSCAPE_CERT_SEQUENCE) + +X509_INFO *X509_INFO_new(void); +void X509_INFO_free(X509_INFO *a); +char *X509_NAME_oneline(const X509_NAME *a, char *buf, int size); + +#ifndef OPENSSL_NO_DEPRECATED_3_0 +OSSL_DEPRECATEDIN_3_0 +int ASN1_verify(i2d_of_void *i2d, X509_ALGOR *algor1, + ASN1_BIT_STRING *signature, char *data, EVP_PKEY *pkey); +OSSL_DEPRECATEDIN_3_0 +int ASN1_digest(i2d_of_void *i2d, const EVP_MD *type, char *data, + unsigned char *md, unsigned int *len); +OSSL_DEPRECATEDIN_3_0 +int ASN1_sign(i2d_of_void *i2d, X509_ALGOR *algor1, X509_ALGOR *algor2, + ASN1_BIT_STRING *signature, char *data, EVP_PKEY *pkey, + const EVP_MD *type); +#endif +int ASN1_item_digest(const ASN1_ITEM *it, const EVP_MD *type, void *data, + unsigned char *md, unsigned int *len); +int ASN1_item_verify(const ASN1_ITEM *it, const X509_ALGOR *alg, + const ASN1_BIT_STRING *signature, const void *data, + EVP_PKEY *pkey); +int ASN1_item_verify_ctx(const ASN1_ITEM *it, const X509_ALGOR *alg, + const ASN1_BIT_STRING *signature, const void *data, + EVP_MD_CTX *ctx); +int ASN1_item_sign(const ASN1_ITEM *it, X509_ALGOR *algor1, X509_ALGOR *algor2, + ASN1_BIT_STRING *signature, const void *data, + EVP_PKEY *pkey, const EVP_MD *md); +int ASN1_item_sign_ctx(const ASN1_ITEM *it, X509_ALGOR *algor1, + X509_ALGOR *algor2, ASN1_BIT_STRING *signature, + const void *data, EVP_MD_CTX *ctx); + +#define X509_VERSION_1 0 +#define X509_VERSION_2 1 +#define X509_VERSION_3 2 + +long X509_get_version(const X509 *x); +int X509_set_version(X509 *x, long version); +int X509_set_serialNumber(X509 *x, ASN1_INTEGER *serial); +ASN1_INTEGER *X509_get_serialNumber(X509 *x); +const ASN1_INTEGER *X509_get0_serialNumber(const X509 *x); +int X509_set_issuer_name(X509 *x, const X509_NAME *name); +X509_NAME *X509_get_issuer_name(const X509 *a); +int X509_set_subject_name(X509 *x, const X509_NAME *name); +X509_NAME *X509_get_subject_name(const X509 *a); +const ASN1_TIME * X509_get0_notBefore(const X509 *x); +ASN1_TIME *X509_getm_notBefore(const X509 *x); +int X509_set1_notBefore(X509 *x, const ASN1_TIME *tm); +const ASN1_TIME *X509_get0_notAfter(const X509 *x); +ASN1_TIME *X509_getm_notAfter(const X509 *x); +int X509_set1_notAfter(X509 *x, const ASN1_TIME *tm); +int X509_set_pubkey(X509 *x, EVP_PKEY *pkey); +int X509_up_ref(X509 *x); +int X509_get_signature_type(const X509 *x); + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define X509_get_notBefore X509_getm_notBefore +# define X509_get_notAfter X509_getm_notAfter +# define X509_set_notBefore X509_set1_notBefore +# define X509_set_notAfter X509_set1_notAfter +#endif + + +/* + * This one is only used so that a binary form can output, as in + * i2d_X509_PUBKEY(X509_get_X509_PUBKEY(x), &buf) + */ +X509_PUBKEY *X509_get_X509_PUBKEY(const X509 *x); +const STACK_OF(X509_EXTENSION) *X509_get0_extensions(const X509 *x); +void X509_get0_uids(const X509 *x, const ASN1_BIT_STRING **piuid, + const ASN1_BIT_STRING **psuid); +const X509_ALGOR *X509_get0_tbs_sigalg(const X509 *x); + +EVP_PKEY *X509_get0_pubkey(const X509 *x); +EVP_PKEY *X509_get_pubkey(X509 *x); +ASN1_BIT_STRING *X509_get0_pubkey_bitstr(const X509 *x); + +#define X509_REQ_VERSION_1 0 + +long X509_REQ_get_version(const X509_REQ *req); +int X509_REQ_set_version(X509_REQ *x, long version); +X509_NAME *X509_REQ_get_subject_name(const X509_REQ *req); +int X509_REQ_set_subject_name(X509_REQ *req, const X509_NAME *name); +void X509_REQ_get0_signature(const X509_REQ *req, const ASN1_BIT_STRING **psig, + const X509_ALGOR **palg); +void X509_REQ_set0_signature(X509_REQ *req, ASN1_BIT_STRING *psig); +int X509_REQ_set1_signature_algo(X509_REQ *req, X509_ALGOR *palg); +int X509_REQ_get_signature_nid(const X509_REQ *req); +int i2d_re_X509_REQ_tbs(X509_REQ *req, unsigned char **pp); +int X509_REQ_set_pubkey(X509_REQ *x, EVP_PKEY *pkey); +EVP_PKEY *X509_REQ_get_pubkey(X509_REQ *req); +EVP_PKEY *X509_REQ_get0_pubkey(const X509_REQ *req); +X509_PUBKEY *X509_REQ_get_X509_PUBKEY(X509_REQ *req); +int X509_REQ_extension_nid(int nid); +int *X509_REQ_get_extension_nids(void); +void X509_REQ_set_extension_nids(int *nids); +STACK_OF(X509_EXTENSION) *X509_REQ_get_extensions(OSSL_FUTURE_CONST X509_REQ *req); +int X509_REQ_add_extensions_nid(X509_REQ *req, + const STACK_OF(X509_EXTENSION) *exts, int nid); +int X509_REQ_add_extensions(X509_REQ *req, const STACK_OF(X509_EXTENSION) *ext); +int X509_REQ_get_attr_count(const X509_REQ *req); +int X509_REQ_get_attr_by_NID(const X509_REQ *req, int nid, int lastpos); +int X509_REQ_get_attr_by_OBJ(const X509_REQ *req, const ASN1_OBJECT *obj, + int lastpos); +X509_ATTRIBUTE *X509_REQ_get_attr(const X509_REQ *req, int loc); +X509_ATTRIBUTE *X509_REQ_delete_attr(X509_REQ *req, int loc); +int X509_REQ_add1_attr(X509_REQ *req, X509_ATTRIBUTE *attr); +int X509_REQ_add1_attr_by_OBJ(X509_REQ *req, + const ASN1_OBJECT *obj, int type, + const unsigned char *bytes, int len); +int X509_REQ_add1_attr_by_NID(X509_REQ *req, + int nid, int type, + const unsigned char *bytes, int len); +int X509_REQ_add1_attr_by_txt(X509_REQ *req, + const char *attrname, int type, + const unsigned char *bytes, int len); + +#define X509_CRL_VERSION_1 0 +#define X509_CRL_VERSION_2 1 + +int X509_CRL_set_version(X509_CRL *x, long version); +int X509_CRL_set_issuer_name(X509_CRL *x, const X509_NAME *name); +int X509_CRL_set1_lastUpdate(X509_CRL *x, const ASN1_TIME *tm); +int X509_CRL_set1_nextUpdate(X509_CRL *x, const ASN1_TIME *tm); +int X509_CRL_sort(X509_CRL *crl); +int X509_CRL_up_ref(X509_CRL *crl); + +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define X509_CRL_set_lastUpdate X509_CRL_set1_lastUpdate +# define X509_CRL_set_nextUpdate X509_CRL_set1_nextUpdate +#endif + +long X509_CRL_get_version(const X509_CRL *crl); +const ASN1_TIME *X509_CRL_get0_lastUpdate(const X509_CRL *crl); +const ASN1_TIME *X509_CRL_get0_nextUpdate(const X509_CRL *crl); +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +OSSL_DEPRECATEDIN_1_1_0 ASN1_TIME *X509_CRL_get_lastUpdate(X509_CRL *crl); +OSSL_DEPRECATEDIN_1_1_0 ASN1_TIME *X509_CRL_get_nextUpdate(X509_CRL *crl); +#endif +X509_NAME *X509_CRL_get_issuer(const X509_CRL *crl); +const STACK_OF(X509_EXTENSION) *X509_CRL_get0_extensions(const X509_CRL *crl); +STACK_OF(X509_REVOKED) *X509_CRL_get_REVOKED(X509_CRL *crl); +void X509_CRL_get0_signature(const X509_CRL *crl, const ASN1_BIT_STRING **psig, + const X509_ALGOR **palg); +int X509_CRL_get_signature_nid(const X509_CRL *crl); +int i2d_re_X509_CRL_tbs(X509_CRL *req, unsigned char **pp); + +const ASN1_INTEGER *X509_REVOKED_get0_serialNumber(const X509_REVOKED *x); +int X509_REVOKED_set_serialNumber(X509_REVOKED *x, ASN1_INTEGER *serial); +const ASN1_TIME *X509_REVOKED_get0_revocationDate(const X509_REVOKED *x); +int X509_REVOKED_set_revocationDate(X509_REVOKED *r, ASN1_TIME *tm); +const STACK_OF(X509_EXTENSION) * +X509_REVOKED_get0_extensions(const X509_REVOKED *r); + +X509_CRL *X509_CRL_diff(X509_CRL *base, X509_CRL *newer, + EVP_PKEY *skey, const EVP_MD *md, unsigned int flags); + +int X509_REQ_check_private_key(const X509_REQ *req, EVP_PKEY *pkey); + +int X509_check_private_key(const X509 *cert, const EVP_PKEY *pkey); +int X509_chain_check_suiteb(int *perror_depth, + X509 *x, STACK_OF(X509) *chain, + unsigned long flags); +int X509_CRL_check_suiteb(X509_CRL *crl, EVP_PKEY *pk, unsigned long flags); +void OSSL_STACK_OF_X509_free(STACK_OF(X509) *certs); +STACK_OF(X509) *X509_chain_up_ref(STACK_OF(X509) *chain); + +int X509_issuer_and_serial_cmp(const X509 *a, const X509 *b); +unsigned long X509_issuer_and_serial_hash(X509 *a); + +int X509_issuer_name_cmp(const X509 *a, const X509 *b); +unsigned long X509_issuer_name_hash(X509 *a); + +int X509_subject_name_cmp(const X509 *a, const X509 *b); +unsigned long X509_subject_name_hash(X509 *x); + +# ifndef OPENSSL_NO_MD5 +unsigned long X509_issuer_name_hash_old(X509 *a); +unsigned long X509_subject_name_hash_old(X509 *x); +# endif + +# define X509_ADD_FLAG_DEFAULT 0 +# define X509_ADD_FLAG_UP_REF 0x1 +# define X509_ADD_FLAG_PREPEND 0x2 +# define X509_ADD_FLAG_NO_DUP 0x4 +# define X509_ADD_FLAG_NO_SS 0x8 +int X509_add_cert(STACK_OF(X509) *sk, X509 *cert, int flags); +int X509_add_certs(STACK_OF(X509) *sk, STACK_OF(X509) *certs, int flags); + +int X509_cmp(const X509 *a, const X509 *b); +int X509_NAME_cmp(const X509_NAME *a, const X509_NAME *b); +#ifndef OPENSSL_NO_DEPRECATED_3_0 +# define X509_NAME_hash(x) X509_NAME_hash_ex(x, NULL, NULL, NULL) +OSSL_DEPRECATEDIN_3_0 int X509_certificate_type(const X509 *x, + const EVP_PKEY *pubkey); +#endif +unsigned long X509_NAME_hash_ex(const X509_NAME *x, OSSL_LIB_CTX *libctx, + const char *propq, int *ok); +unsigned long X509_NAME_hash_old(const X509_NAME *x); + +int X509_CRL_cmp(const X509_CRL *a, const X509_CRL *b); +int X509_CRL_match(const X509_CRL *a, const X509_CRL *b); +int X509_aux_print(BIO *out, X509 *x, int indent); +# ifndef OPENSSL_NO_STDIO +int X509_print_ex_fp(FILE *bp, X509 *x, unsigned long nmflag, + unsigned long cflag); +int X509_print_fp(FILE *bp, X509 *x); +int X509_CRL_print_fp(FILE *bp, X509_CRL *x); +int X509_REQ_print_fp(FILE *bp, X509_REQ *req); +int X509_NAME_print_ex_fp(FILE *fp, const X509_NAME *nm, int indent, + unsigned long flags); +# endif + +int X509_NAME_print(BIO *bp, const X509_NAME *name, int obase); +int X509_NAME_print_ex(BIO *out, const X509_NAME *nm, int indent, + unsigned long flags); +int X509_print_ex(BIO *bp, X509 *x, unsigned long nmflag, + unsigned long cflag); +int X509_print(BIO *bp, X509 *x); +int X509_ocspid_print(BIO *bp, X509 *x); +int X509_CRL_print_ex(BIO *out, X509_CRL *x, unsigned long nmflag); +int X509_CRL_print(BIO *bp, X509_CRL *x); +int X509_REQ_print_ex(BIO *bp, X509_REQ *x, unsigned long nmflag, + unsigned long cflag); +int X509_REQ_print(BIO *bp, X509_REQ *req); + +int X509_NAME_entry_count(const X509_NAME *name); +int X509_NAME_get_text_by_NID(const X509_NAME *name, int nid, + char *buf, int len); +int X509_NAME_get_text_by_OBJ(const X509_NAME *name, const ASN1_OBJECT *obj, + char *buf, int len); + +/* + * NOTE: you should be passing -1, not 0 as lastpos. The functions that use + * lastpos, search after that position on. + */ +int X509_NAME_get_index_by_NID(const X509_NAME *name, int nid, int lastpos); +int X509_NAME_get_index_by_OBJ(const X509_NAME *name, const ASN1_OBJECT *obj, + int lastpos); +X509_NAME_ENTRY *X509_NAME_get_entry(const X509_NAME *name, int loc); +X509_NAME_ENTRY *X509_NAME_delete_entry(X509_NAME *name, int loc); +int X509_NAME_add_entry(X509_NAME *name, const X509_NAME_ENTRY *ne, + int loc, int set); +int X509_NAME_add_entry_by_OBJ(X509_NAME *name, const ASN1_OBJECT *obj, int type, + const unsigned char *bytes, int len, int loc, + int set); +int X509_NAME_add_entry_by_NID(X509_NAME *name, int nid, int type, + const unsigned char *bytes, int len, int loc, + int set); +X509_NAME_ENTRY *X509_NAME_ENTRY_create_by_txt(X509_NAME_ENTRY **ne, + const char *field, int type, + const unsigned char *bytes, + int len); +X509_NAME_ENTRY *X509_NAME_ENTRY_create_by_NID(X509_NAME_ENTRY **ne, int nid, + int type, + const unsigned char *bytes, + int len); +int X509_NAME_add_entry_by_txt(X509_NAME *name, const char *field, int type, + const unsigned char *bytes, int len, int loc, + int set); +X509_NAME_ENTRY *X509_NAME_ENTRY_create_by_OBJ(X509_NAME_ENTRY **ne, + const ASN1_OBJECT *obj, int type, + const unsigned char *bytes, + int len); +int X509_NAME_ENTRY_set_object(X509_NAME_ENTRY *ne, const ASN1_OBJECT *obj); +int X509_NAME_ENTRY_set_data(X509_NAME_ENTRY *ne, int type, + const unsigned char *bytes, int len); +ASN1_OBJECT *X509_NAME_ENTRY_get_object(const X509_NAME_ENTRY *ne); +ASN1_STRING * X509_NAME_ENTRY_get_data(const X509_NAME_ENTRY *ne); +int X509_NAME_ENTRY_set(const X509_NAME_ENTRY *ne); + +int X509_NAME_get0_der(const X509_NAME *nm, const unsigned char **pder, + size_t *pderlen); + +int X509v3_get_ext_count(const STACK_OF(X509_EXTENSION) *x); +int X509v3_get_ext_by_NID(const STACK_OF(X509_EXTENSION) *x, + int nid, int lastpos); +int X509v3_get_ext_by_OBJ(const STACK_OF(X509_EXTENSION) *x, + const ASN1_OBJECT *obj, int lastpos); +int X509v3_get_ext_by_critical(const STACK_OF(X509_EXTENSION) *x, + int crit, int lastpos); +X509_EXTENSION *X509v3_get_ext(const STACK_OF(X509_EXTENSION) *x, int loc); +X509_EXTENSION *X509v3_delete_ext(STACK_OF(X509_EXTENSION) *x, int loc); +STACK_OF(X509_EXTENSION) *X509v3_add_ext(STACK_OF(X509_EXTENSION) **x, + X509_EXTENSION *ex, int loc); +STACK_OF(X509_EXTENSION) *X509v3_add_extensions(STACK_OF(X509_EXTENSION) **target, + const STACK_OF(X509_EXTENSION) *exts); + +int X509_get_ext_count(const X509 *x); +int X509_get_ext_by_NID(const X509 *x, int nid, int lastpos); +int X509_get_ext_by_OBJ(const X509 *x, const ASN1_OBJECT *obj, int lastpos); +int X509_get_ext_by_critical(const X509 *x, int crit, int lastpos); +X509_EXTENSION *X509_get_ext(const X509 *x, int loc); +X509_EXTENSION *X509_delete_ext(X509 *x, int loc); +int X509_add_ext(X509 *x, X509_EXTENSION *ex, int loc); +void *X509_get_ext_d2i(const X509 *x, int nid, int *crit, int *idx); +int X509_add1_ext_i2d(X509 *x, int nid, void *value, int crit, + unsigned long flags); + +int X509_CRL_get_ext_count(const X509_CRL *x); +int X509_CRL_get_ext_by_NID(const X509_CRL *x, int nid, int lastpos); +int X509_CRL_get_ext_by_OBJ(const X509_CRL *x, const ASN1_OBJECT *obj, + int lastpos); +int X509_CRL_get_ext_by_critical(const X509_CRL *x, int crit, int lastpos); +X509_EXTENSION *X509_CRL_get_ext(const X509_CRL *x, int loc); +X509_EXTENSION *X509_CRL_delete_ext(X509_CRL *x, int loc); +int X509_CRL_add_ext(X509_CRL *x, X509_EXTENSION *ex, int loc); +void *X509_CRL_get_ext_d2i(const X509_CRL *x, int nid, int *crit, int *idx); +int X509_CRL_add1_ext_i2d(X509_CRL *x, int nid, void *value, int crit, + unsigned long flags); + +int X509_REVOKED_get_ext_count(const X509_REVOKED *x); +int X509_REVOKED_get_ext_by_NID(const X509_REVOKED *x, int nid, int lastpos); +int X509_REVOKED_get_ext_by_OBJ(const X509_REVOKED *x, const ASN1_OBJECT *obj, + int lastpos); +int X509_REVOKED_get_ext_by_critical(const X509_REVOKED *x, int crit, + int lastpos); +X509_EXTENSION *X509_REVOKED_get_ext(const X509_REVOKED *x, int loc); +X509_EXTENSION *X509_REVOKED_delete_ext(X509_REVOKED *x, int loc); +int X509_REVOKED_add_ext(X509_REVOKED *x, X509_EXTENSION *ex, int loc); +void *X509_REVOKED_get_ext_d2i(const X509_REVOKED *x, int nid, int *crit, + int *idx); +int X509_REVOKED_add1_ext_i2d(X509_REVOKED *x, int nid, void *value, int crit, + unsigned long flags); + +X509_EXTENSION *X509_EXTENSION_create_by_NID(X509_EXTENSION **ex, + int nid, int crit, + ASN1_OCTET_STRING *data); +X509_EXTENSION *X509_EXTENSION_create_by_OBJ(X509_EXTENSION **ex, + const ASN1_OBJECT *obj, int crit, + ASN1_OCTET_STRING *data); +int X509_EXTENSION_set_object(X509_EXTENSION *ex, const ASN1_OBJECT *obj); +int X509_EXTENSION_set_critical(X509_EXTENSION *ex, int crit); +int X509_EXTENSION_set_data(X509_EXTENSION *ex, ASN1_OCTET_STRING *data); +ASN1_OBJECT *X509_EXTENSION_get_object(X509_EXTENSION *ex); +ASN1_OCTET_STRING *X509_EXTENSION_get_data(X509_EXTENSION *ne); +int X509_EXTENSION_get_critical(const X509_EXTENSION *ex); + +int X509at_get_attr_count(const STACK_OF(X509_ATTRIBUTE) *x); +int X509at_get_attr_by_NID(const STACK_OF(X509_ATTRIBUTE) *x, int nid, + int lastpos); +int X509at_get_attr_by_OBJ(const STACK_OF(X509_ATTRIBUTE) *sk, + const ASN1_OBJECT *obj, int lastpos); +X509_ATTRIBUTE *X509at_get_attr(const STACK_OF(X509_ATTRIBUTE) *x, int loc); +X509_ATTRIBUTE *X509at_delete_attr(STACK_OF(X509_ATTRIBUTE) *x, int loc); +STACK_OF(X509_ATTRIBUTE) *X509at_add1_attr(STACK_OF(X509_ATTRIBUTE) **x, + X509_ATTRIBUTE *attr); +STACK_OF(X509_ATTRIBUTE) *X509at_add1_attr_by_OBJ(STACK_OF(X509_ATTRIBUTE) + **x, const ASN1_OBJECT *obj, + int type, + const unsigned char *bytes, + int len); +STACK_OF(X509_ATTRIBUTE) *X509at_add1_attr_by_NID(STACK_OF(X509_ATTRIBUTE) + **x, int nid, int type, + const unsigned char *bytes, + int len); +STACK_OF(X509_ATTRIBUTE) *X509at_add1_attr_by_txt(STACK_OF(X509_ATTRIBUTE) + **x, const char *attrname, + int type, + const unsigned char *bytes, + int len); +void *X509at_get0_data_by_OBJ(const STACK_OF(X509_ATTRIBUTE) *x, + const ASN1_OBJECT *obj, int lastpos, int type); +X509_ATTRIBUTE *X509_ATTRIBUTE_create_by_NID(X509_ATTRIBUTE **attr, int nid, + int atrtype, const void *data, + int len); +X509_ATTRIBUTE *X509_ATTRIBUTE_create_by_OBJ(X509_ATTRIBUTE **attr, + const ASN1_OBJECT *obj, + int atrtype, const void *data, + int len); +X509_ATTRIBUTE *X509_ATTRIBUTE_create_by_txt(X509_ATTRIBUTE **attr, + const char *atrname, int type, + const unsigned char *bytes, + int len); +int X509_ATTRIBUTE_set1_object(X509_ATTRIBUTE *attr, const ASN1_OBJECT *obj); +int X509_ATTRIBUTE_set1_data(X509_ATTRIBUTE *attr, int attrtype, + const void *data, int len); +void *X509_ATTRIBUTE_get0_data(X509_ATTRIBUTE *attr, int idx, int atrtype, + void *data); +int X509_ATTRIBUTE_count(const X509_ATTRIBUTE *attr); +ASN1_OBJECT *X509_ATTRIBUTE_get0_object(X509_ATTRIBUTE *attr); +ASN1_TYPE *X509_ATTRIBUTE_get0_type(X509_ATTRIBUTE *attr, int idx); + +int EVP_PKEY_get_attr_count(const EVP_PKEY *key); +int EVP_PKEY_get_attr_by_NID(const EVP_PKEY *key, int nid, int lastpos); +int EVP_PKEY_get_attr_by_OBJ(const EVP_PKEY *key, const ASN1_OBJECT *obj, + int lastpos); +X509_ATTRIBUTE *EVP_PKEY_get_attr(const EVP_PKEY *key, int loc); +X509_ATTRIBUTE *EVP_PKEY_delete_attr(EVP_PKEY *key, int loc); +int EVP_PKEY_add1_attr(EVP_PKEY *key, X509_ATTRIBUTE *attr); +int EVP_PKEY_add1_attr_by_OBJ(EVP_PKEY *key, + const ASN1_OBJECT *obj, int type, + const unsigned char *bytes, int len); +int EVP_PKEY_add1_attr_by_NID(EVP_PKEY *key, + int nid, int type, + const unsigned char *bytes, int len); +int EVP_PKEY_add1_attr_by_txt(EVP_PKEY *key, + const char *attrname, int type, + const unsigned char *bytes, int len); + +/* lookup a cert from a X509 STACK */ +X509 *X509_find_by_issuer_and_serial(STACK_OF(X509) *sk, const X509_NAME *name, + const ASN1_INTEGER *serial); +X509 *X509_find_by_subject(STACK_OF(X509) *sk, const X509_NAME *name); + +DECLARE_ASN1_FUNCTIONS(PBEPARAM) +DECLARE_ASN1_FUNCTIONS(PBE2PARAM) +DECLARE_ASN1_FUNCTIONS(PBKDF2PARAM) +DECLARE_ASN1_FUNCTIONS(PBMAC1PARAM) +# ifndef OPENSSL_NO_SCRYPT +DECLARE_ASN1_FUNCTIONS(SCRYPT_PARAMS) +# endif + +int PKCS5_pbe_set0_algor(X509_ALGOR *algor, int alg, int iter, + const unsigned char *salt, int saltlen); +int PKCS5_pbe_set0_algor_ex(X509_ALGOR *algor, int alg, int iter, + const unsigned char *salt, int saltlen, + OSSL_LIB_CTX *libctx); + +X509_ALGOR *PKCS5_pbe_set(int alg, int iter, + const unsigned char *salt, int saltlen); +X509_ALGOR *PKCS5_pbe_set_ex(int alg, int iter, + const unsigned char *salt, int saltlen, + OSSL_LIB_CTX *libctx); + +X509_ALGOR *PKCS5_pbe2_set(const EVP_CIPHER *cipher, int iter, + unsigned char *salt, int saltlen); +X509_ALGOR *PKCS5_pbe2_set_iv(const EVP_CIPHER *cipher, int iter, + unsigned char *salt, int saltlen, + unsigned char *aiv, int prf_nid); +X509_ALGOR *PKCS5_pbe2_set_iv_ex(const EVP_CIPHER *cipher, int iter, + unsigned char *salt, int saltlen, + unsigned char *aiv, int prf_nid, + OSSL_LIB_CTX *libctx); + +#ifndef OPENSSL_NO_SCRYPT +X509_ALGOR *PKCS5_pbe2_set_scrypt(const EVP_CIPHER *cipher, + const unsigned char *salt, int saltlen, + unsigned char *aiv, uint64_t N, uint64_t r, + uint64_t p); +#endif + +X509_ALGOR *PKCS5_pbkdf2_set(int iter, unsigned char *salt, int saltlen, + int prf_nid, int keylen); +X509_ALGOR *PKCS5_pbkdf2_set_ex(int iter, unsigned char *salt, int saltlen, + int prf_nid, int keylen, + OSSL_LIB_CTX *libctx); + +PBKDF2PARAM *PBMAC1_get1_pbkdf2_param(const X509_ALGOR *macalg); +/* PKCS#8 utilities */ + +DECLARE_ASN1_FUNCTIONS(PKCS8_PRIV_KEY_INFO) + +EVP_PKEY *EVP_PKCS82PKEY(const PKCS8_PRIV_KEY_INFO *p8); +EVP_PKEY *EVP_PKCS82PKEY_ex(const PKCS8_PRIV_KEY_INFO *p8, OSSL_LIB_CTX *libctx, + const char *propq); +PKCS8_PRIV_KEY_INFO *EVP_PKEY2PKCS8(const EVP_PKEY *pkey); + +int PKCS8_pkey_set0(PKCS8_PRIV_KEY_INFO *priv, ASN1_OBJECT *aobj, + int version, int ptype, void *pval, + unsigned char *penc, int penclen); +int PKCS8_pkey_get0(const ASN1_OBJECT **ppkalg, + const unsigned char **pk, int *ppklen, + const X509_ALGOR **pa, const PKCS8_PRIV_KEY_INFO *p8); + +const STACK_OF(X509_ATTRIBUTE) * +PKCS8_pkey_get0_attrs(const PKCS8_PRIV_KEY_INFO *p8); +int PKCS8_pkey_add1_attr(PKCS8_PRIV_KEY_INFO *p8, X509_ATTRIBUTE *attr); +int PKCS8_pkey_add1_attr_by_NID(PKCS8_PRIV_KEY_INFO *p8, int nid, int type, + const unsigned char *bytes, int len); +int PKCS8_pkey_add1_attr_by_OBJ(PKCS8_PRIV_KEY_INFO *p8, const ASN1_OBJECT *obj, + int type, const unsigned char *bytes, int len); + + +void X509_PUBKEY_set0_public_key(X509_PUBKEY *pub, + unsigned char *penc, int penclen); +int X509_PUBKEY_set0_param(X509_PUBKEY *pub, ASN1_OBJECT *aobj, + int ptype, void *pval, + unsigned char *penc, int penclen); +int X509_PUBKEY_get0_param(ASN1_OBJECT **ppkalg, + const unsigned char **pk, int *ppklen, + X509_ALGOR **pa, const X509_PUBKEY *pub); +int X509_PUBKEY_eq(const X509_PUBKEY *a, const X509_PUBKEY *b); + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/common/include/openssl/x509_acert.h b/contrib/openssl-cmake/common/include/openssl/x509_acert.h new file mode 100644 index 000000000000..9dde625677f9 --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/x509_acert.h @@ -0,0 +1,294 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/x509_acert.h.in + * + * Copyright 2022-2024 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_X509_ACERT_H +# define OPENSSL_X509_ACERT_H +# pragma once + +# include +# include +# include + +typedef struct X509_acert_st X509_ACERT; +typedef struct X509_acert_info_st X509_ACERT_INFO; +typedef struct ossl_object_digest_info_st OSSL_OBJECT_DIGEST_INFO; +typedef struct ossl_issuer_serial_st OSSL_ISSUER_SERIAL; +typedef struct X509_acert_issuer_v2form_st X509_ACERT_ISSUER_V2FORM; + +DECLARE_ASN1_FUNCTIONS(X509_ACERT) +DECLARE_ASN1_DUP_FUNCTION(X509_ACERT) +DECLARE_ASN1_ITEM(X509_ACERT_INFO) +DECLARE_ASN1_ALLOC_FUNCTIONS(X509_ACERT_INFO) +DECLARE_ASN1_ALLOC_FUNCTIONS(OSSL_OBJECT_DIGEST_INFO) +DECLARE_ASN1_ALLOC_FUNCTIONS(OSSL_ISSUER_SERIAL) +DECLARE_ASN1_ALLOC_FUNCTIONS(X509_ACERT_ISSUER_V2FORM) + +# ifndef OPENSSL_NO_STDIO +X509_ACERT *d2i_X509_ACERT_fp(FILE *fp, X509_ACERT **acert); +int i2d_X509_ACERT_fp(FILE *fp, const X509_ACERT *acert); +# endif + +DECLARE_PEM_rw(X509_ACERT, X509_ACERT) + +X509_ACERT *d2i_X509_ACERT_bio(BIO *bp, X509_ACERT **acert); +int i2d_X509_ACERT_bio(BIO *bp, const X509_ACERT *acert); + +int X509_ACERT_sign(X509_ACERT *x, EVP_PKEY *pkey, const EVP_MD *md); +int X509_ACERT_sign_ctx(X509_ACERT *x, EVP_MD_CTX *ctx); +int X509_ACERT_verify(X509_ACERT *a, EVP_PKEY *r); + +# define X509_ACERT_VERSION_2 1 + +const GENERAL_NAMES *X509_ACERT_get0_holder_entityName(const X509_ACERT *x); +const OSSL_ISSUER_SERIAL *X509_ACERT_get0_holder_baseCertId(const X509_ACERT *x); +const OSSL_OBJECT_DIGEST_INFO * X509_ACERT_get0_holder_digest(const X509_ACERT *x); +const X509_NAME *X509_ACERT_get0_issuerName(const X509_ACERT *x); +long X509_ACERT_get_version(const X509_ACERT *x); +void X509_ACERT_get0_signature(const X509_ACERT *x, + const ASN1_BIT_STRING **psig, + const X509_ALGOR **palg); +int X509_ACERT_get_signature_nid(const X509_ACERT *x); +const X509_ALGOR *X509_ACERT_get0_info_sigalg(const X509_ACERT *x); +const ASN1_INTEGER *X509_ACERT_get0_serialNumber(const X509_ACERT *x); +const ASN1_TIME *X509_ACERT_get0_notBefore(const X509_ACERT *x); +const ASN1_TIME *X509_ACERT_get0_notAfter(const X509_ACERT *x); +const ASN1_BIT_STRING *X509_ACERT_get0_issuerUID(const X509_ACERT *x); + +int X509_ACERT_print(BIO *bp, X509_ACERT *x); +int X509_ACERT_print_ex(BIO *bp, X509_ACERT *x, unsigned long nmflags, + unsigned long cflag); + +int X509_ACERT_get_attr_count(const X509_ACERT *x); +int X509_ACERT_get_attr_by_NID(const X509_ACERT *x, int nid, int lastpos); +int X509_ACERT_get_attr_by_OBJ(const X509_ACERT *x, const ASN1_OBJECT *obj, + int lastpos); +X509_ATTRIBUTE *X509_ACERT_get_attr(const X509_ACERT *x, int loc); +X509_ATTRIBUTE *X509_ACERT_delete_attr(X509_ACERT *x, int loc); + +void *X509_ACERT_get_ext_d2i(const X509_ACERT *x, int nid, int *crit, int *idx); +int X509_ACERT_add1_ext_i2d(X509_ACERT *x, int nid, void *value, int crit, + unsigned long flags); +const STACK_OF(X509_EXTENSION) *X509_ACERT_get0_extensions(const X509_ACERT *x); + +# define OSSL_OBJECT_DIGEST_INFO_PUBLIC_KEY 0 +# define OSSL_OBJECT_DIGEST_INFO_PUBLIC_KEY_CERT 1 +# define OSSL_OBJECT_DIGEST_INFO_OTHER 2 /* must not be used in RFC 5755 profile */ +int X509_ACERT_set_version(X509_ACERT *x, long version); +void X509_ACERT_set0_holder_entityName(X509_ACERT *x, GENERAL_NAMES *name); +void X509_ACERT_set0_holder_baseCertId(X509_ACERT *x, OSSL_ISSUER_SERIAL *isss); +void X509_ACERT_set0_holder_digest(X509_ACERT *x, + OSSL_OBJECT_DIGEST_INFO *dinfo); + +int X509_ACERT_add1_attr(X509_ACERT *x, X509_ATTRIBUTE *attr); +int X509_ACERT_add1_attr_by_OBJ(X509_ACERT *x, const ASN1_OBJECT *obj, + int type, const void *bytes, int len); +int X509_ACERT_add1_attr_by_NID(X509_ACERT *x, int nid, int type, + const void *bytes, int len); +int X509_ACERT_add1_attr_by_txt(X509_ACERT *x, const char *attrname, int type, + const unsigned char *bytes, int len); +int X509_ACERT_add_attr_nconf(CONF *conf, const char *section, + X509_ACERT *acert); + +int X509_ACERT_set1_issuerName(X509_ACERT *x, const X509_NAME *name); +int X509_ACERT_set1_serialNumber(X509_ACERT *x, const ASN1_INTEGER *serial); +int X509_ACERT_set1_notBefore(X509_ACERT *x, const ASN1_GENERALIZEDTIME *time); +int X509_ACERT_set1_notAfter(X509_ACERT *x, const ASN1_GENERALIZEDTIME *time); + +void OSSL_OBJECT_DIGEST_INFO_get0_digest(const OSSL_OBJECT_DIGEST_INFO *o, + int *digestedObjectType, + const X509_ALGOR **digestAlgorithm, + const ASN1_BIT_STRING **digest); + +int OSSL_OBJECT_DIGEST_INFO_set1_digest(OSSL_OBJECT_DIGEST_INFO *o, + int digestedObjectType, + X509_ALGOR *digestAlgorithm, + ASN1_BIT_STRING *digest); + +const X509_NAME *OSSL_ISSUER_SERIAL_get0_issuer(const OSSL_ISSUER_SERIAL *isss); +const ASN1_INTEGER *OSSL_ISSUER_SERIAL_get0_serial(const OSSL_ISSUER_SERIAL *isss); +const ASN1_BIT_STRING *OSSL_ISSUER_SERIAL_get0_issuerUID(const OSSL_ISSUER_SERIAL *isss); + +int OSSL_ISSUER_SERIAL_set1_issuer(OSSL_ISSUER_SERIAL *isss, + const X509_NAME *issuer); +int OSSL_ISSUER_SERIAL_set1_serial(OSSL_ISSUER_SERIAL *isss, + const ASN1_INTEGER *serial); +int OSSL_ISSUER_SERIAL_set1_issuerUID(OSSL_ISSUER_SERIAL *isss, + const ASN1_BIT_STRING *uid); + +# define OSSL_IETFAS_OCTETS 0 +# define OSSL_IETFAS_OID 1 +# define OSSL_IETFAS_STRING 2 + +typedef struct OSSL_IETF_ATTR_SYNTAX_VALUE_st OSSL_IETF_ATTR_SYNTAX_VALUE; +typedef struct OSSL_IETF_ATTR_SYNTAX_st OSSL_IETF_ATTR_SYNTAX; +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_IETF_ATTR_SYNTAX_VALUE, OSSL_IETF_ATTR_SYNTAX_VALUE, OSSL_IETF_ATTR_SYNTAX_VALUE) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_IETF_ATTR_SYNTAX_VALUE_sk_type(sk)) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_value(sk, idx) ((OSSL_IETF_ATTR_SYNTAX_VALUE *)OPENSSL_sk_value(ossl_check_const_OSSL_IETF_ATTR_SYNTAX_VALUE_sk_type(sk), (idx))) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_new(cmp) ((STACK_OF(OSSL_IETF_ATTR_SYNTAX_VALUE) *)OPENSSL_sk_new(ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_compfunc_type(cmp))) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_new_null() ((STACK_OF(OSSL_IETF_ATTR_SYNTAX_VALUE) *)OPENSSL_sk_new_null()) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_new_reserve(cmp, n) ((STACK_OF(OSSL_IETF_ATTR_SYNTAX_VALUE) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_compfunc_type(cmp), (n))) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_sk_type(sk), (n)) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_free(sk) OPENSSL_sk_free(ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_sk_type(sk)) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_sk_type(sk)) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_delete(sk, i) ((OSSL_IETF_ATTR_SYNTAX_VALUE *)OPENSSL_sk_delete(ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_sk_type(sk), (i))) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_delete_ptr(sk, ptr) ((OSSL_IETF_ATTR_SYNTAX_VALUE *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_sk_type(sk), ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_type(ptr))) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_sk_type(sk), ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_type(ptr)) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_sk_type(sk), ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_type(ptr)) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_pop(sk) ((OSSL_IETF_ATTR_SYNTAX_VALUE *)OPENSSL_sk_pop(ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_sk_type(sk))) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_shift(sk) ((OSSL_IETF_ATTR_SYNTAX_VALUE *)OPENSSL_sk_shift(ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_sk_type(sk))) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_sk_type(sk),ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_freefunc_type(freefunc)) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_sk_type(sk), ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_type(ptr), (idx)) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_set(sk, idx, ptr) ((OSSL_IETF_ATTR_SYNTAX_VALUE *)OPENSSL_sk_set(ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_sk_type(sk), (idx), ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_type(ptr))) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_sk_type(sk), ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_type(ptr)) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_sk_type(sk), ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_type(ptr)) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_sk_type(sk), ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_type(ptr), pnum) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_sk_type(sk)) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_IETF_ATTR_SYNTAX_VALUE_sk_type(sk)) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_dup(sk) ((STACK_OF(OSSL_IETF_ATTR_SYNTAX_VALUE) *)OPENSSL_sk_dup(ossl_check_const_OSSL_IETF_ATTR_SYNTAX_VALUE_sk_type(sk))) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_IETF_ATTR_SYNTAX_VALUE) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_IETF_ATTR_SYNTAX_VALUE_sk_type(sk), ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_copyfunc_type(copyfunc), ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_freefunc_type(freefunc))) +#define sk_OSSL_IETF_ATTR_SYNTAX_VALUE_set_cmp_func(sk, cmp) ((sk_OSSL_IETF_ATTR_SYNTAX_VALUE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_sk_type(sk), ossl_check_OSSL_IETF_ATTR_SYNTAX_VALUE_compfunc_type(cmp))) + + +DECLARE_ASN1_ITEM(OSSL_IETF_ATTR_SYNTAX_VALUE) +DECLARE_ASN1_ALLOC_FUNCTIONS(OSSL_IETF_ATTR_SYNTAX_VALUE) +DECLARE_ASN1_FUNCTIONS(OSSL_IETF_ATTR_SYNTAX) + +const GENERAL_NAMES * +OSSL_IETF_ATTR_SYNTAX_get0_policyAuthority(const OSSL_IETF_ATTR_SYNTAX *a); +void OSSL_IETF_ATTR_SYNTAX_set0_policyAuthority(OSSL_IETF_ATTR_SYNTAX *a, + GENERAL_NAMES *names); + +int OSSL_IETF_ATTR_SYNTAX_get_value_num(const OSSL_IETF_ATTR_SYNTAX *a); +void *OSSL_IETF_ATTR_SYNTAX_get0_value(const OSSL_IETF_ATTR_SYNTAX *a, + int ind, int *type); +int OSSL_IETF_ATTR_SYNTAX_add1_value(OSSL_IETF_ATTR_SYNTAX *a, int type, + void *data); +int OSSL_IETF_ATTR_SYNTAX_print(BIO *bp, OSSL_IETF_ATTR_SYNTAX *a, int indent); + +struct TARGET_CERT_st { + OSSL_ISSUER_SERIAL *targetCertificate; + GENERAL_NAME *targetName; + OSSL_OBJECT_DIGEST_INFO *certDigestInfo; +}; + +typedef struct TARGET_CERT_st OSSL_TARGET_CERT; + +# define OSSL_TGT_TARGET_NAME 0 +# define OSSL_TGT_TARGET_GROUP 1 +# define OSSL_TGT_TARGET_CERT 2 + +typedef struct TARGET_st { + int type; + union { + GENERAL_NAME *targetName; + GENERAL_NAME *targetGroup; + OSSL_TARGET_CERT *targetCert; + } choice; +} OSSL_TARGET; + +typedef STACK_OF(OSSL_TARGET) OSSL_TARGETS; +typedef STACK_OF(OSSL_TARGETS) OSSL_TARGETING_INFORMATION; + +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_TARGET, OSSL_TARGET, OSSL_TARGET) +#define sk_OSSL_TARGET_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_TARGET_sk_type(sk)) +#define sk_OSSL_TARGET_value(sk, idx) ((OSSL_TARGET *)OPENSSL_sk_value(ossl_check_const_OSSL_TARGET_sk_type(sk), (idx))) +#define sk_OSSL_TARGET_new(cmp) ((STACK_OF(OSSL_TARGET) *)OPENSSL_sk_new(ossl_check_OSSL_TARGET_compfunc_type(cmp))) +#define sk_OSSL_TARGET_new_null() ((STACK_OF(OSSL_TARGET) *)OPENSSL_sk_new_null()) +#define sk_OSSL_TARGET_new_reserve(cmp, n) ((STACK_OF(OSSL_TARGET) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_TARGET_compfunc_type(cmp), (n))) +#define sk_OSSL_TARGET_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_TARGET_sk_type(sk), (n)) +#define sk_OSSL_TARGET_free(sk) OPENSSL_sk_free(ossl_check_OSSL_TARGET_sk_type(sk)) +#define sk_OSSL_TARGET_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_TARGET_sk_type(sk)) +#define sk_OSSL_TARGET_delete(sk, i) ((OSSL_TARGET *)OPENSSL_sk_delete(ossl_check_OSSL_TARGET_sk_type(sk), (i))) +#define sk_OSSL_TARGET_delete_ptr(sk, ptr) ((OSSL_TARGET *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_TARGET_sk_type(sk), ossl_check_OSSL_TARGET_type(ptr))) +#define sk_OSSL_TARGET_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_TARGET_sk_type(sk), ossl_check_OSSL_TARGET_type(ptr)) +#define sk_OSSL_TARGET_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_TARGET_sk_type(sk), ossl_check_OSSL_TARGET_type(ptr)) +#define sk_OSSL_TARGET_pop(sk) ((OSSL_TARGET *)OPENSSL_sk_pop(ossl_check_OSSL_TARGET_sk_type(sk))) +#define sk_OSSL_TARGET_shift(sk) ((OSSL_TARGET *)OPENSSL_sk_shift(ossl_check_OSSL_TARGET_sk_type(sk))) +#define sk_OSSL_TARGET_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_TARGET_sk_type(sk),ossl_check_OSSL_TARGET_freefunc_type(freefunc)) +#define sk_OSSL_TARGET_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_TARGET_sk_type(sk), ossl_check_OSSL_TARGET_type(ptr), (idx)) +#define sk_OSSL_TARGET_set(sk, idx, ptr) ((OSSL_TARGET *)OPENSSL_sk_set(ossl_check_OSSL_TARGET_sk_type(sk), (idx), ossl_check_OSSL_TARGET_type(ptr))) +#define sk_OSSL_TARGET_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_TARGET_sk_type(sk), ossl_check_OSSL_TARGET_type(ptr)) +#define sk_OSSL_TARGET_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_TARGET_sk_type(sk), ossl_check_OSSL_TARGET_type(ptr)) +#define sk_OSSL_TARGET_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_TARGET_sk_type(sk), ossl_check_OSSL_TARGET_type(ptr), pnum) +#define sk_OSSL_TARGET_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_TARGET_sk_type(sk)) +#define sk_OSSL_TARGET_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_TARGET_sk_type(sk)) +#define sk_OSSL_TARGET_dup(sk) ((STACK_OF(OSSL_TARGET) *)OPENSSL_sk_dup(ossl_check_const_OSSL_TARGET_sk_type(sk))) +#define sk_OSSL_TARGET_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_TARGET) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_TARGET_sk_type(sk), ossl_check_OSSL_TARGET_copyfunc_type(copyfunc), ossl_check_OSSL_TARGET_freefunc_type(freefunc))) +#define sk_OSSL_TARGET_set_cmp_func(sk, cmp) ((sk_OSSL_TARGET_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_TARGET_sk_type(sk), ossl_check_OSSL_TARGET_compfunc_type(cmp))) + + +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_TARGETS, OSSL_TARGETS, OSSL_TARGETS) +#define sk_OSSL_TARGETS_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_TARGETS_sk_type(sk)) +#define sk_OSSL_TARGETS_value(sk, idx) ((OSSL_TARGETS *)OPENSSL_sk_value(ossl_check_const_OSSL_TARGETS_sk_type(sk), (idx))) +#define sk_OSSL_TARGETS_new(cmp) ((STACK_OF(OSSL_TARGETS) *)OPENSSL_sk_new(ossl_check_OSSL_TARGETS_compfunc_type(cmp))) +#define sk_OSSL_TARGETS_new_null() ((STACK_OF(OSSL_TARGETS) *)OPENSSL_sk_new_null()) +#define sk_OSSL_TARGETS_new_reserve(cmp, n) ((STACK_OF(OSSL_TARGETS) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_TARGETS_compfunc_type(cmp), (n))) +#define sk_OSSL_TARGETS_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_TARGETS_sk_type(sk), (n)) +#define sk_OSSL_TARGETS_free(sk) OPENSSL_sk_free(ossl_check_OSSL_TARGETS_sk_type(sk)) +#define sk_OSSL_TARGETS_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_TARGETS_sk_type(sk)) +#define sk_OSSL_TARGETS_delete(sk, i) ((OSSL_TARGETS *)OPENSSL_sk_delete(ossl_check_OSSL_TARGETS_sk_type(sk), (i))) +#define sk_OSSL_TARGETS_delete_ptr(sk, ptr) ((OSSL_TARGETS *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_TARGETS_sk_type(sk), ossl_check_OSSL_TARGETS_type(ptr))) +#define sk_OSSL_TARGETS_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_TARGETS_sk_type(sk), ossl_check_OSSL_TARGETS_type(ptr)) +#define sk_OSSL_TARGETS_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_TARGETS_sk_type(sk), ossl_check_OSSL_TARGETS_type(ptr)) +#define sk_OSSL_TARGETS_pop(sk) ((OSSL_TARGETS *)OPENSSL_sk_pop(ossl_check_OSSL_TARGETS_sk_type(sk))) +#define sk_OSSL_TARGETS_shift(sk) ((OSSL_TARGETS *)OPENSSL_sk_shift(ossl_check_OSSL_TARGETS_sk_type(sk))) +#define sk_OSSL_TARGETS_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_TARGETS_sk_type(sk),ossl_check_OSSL_TARGETS_freefunc_type(freefunc)) +#define sk_OSSL_TARGETS_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_TARGETS_sk_type(sk), ossl_check_OSSL_TARGETS_type(ptr), (idx)) +#define sk_OSSL_TARGETS_set(sk, idx, ptr) ((OSSL_TARGETS *)OPENSSL_sk_set(ossl_check_OSSL_TARGETS_sk_type(sk), (idx), ossl_check_OSSL_TARGETS_type(ptr))) +#define sk_OSSL_TARGETS_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_TARGETS_sk_type(sk), ossl_check_OSSL_TARGETS_type(ptr)) +#define sk_OSSL_TARGETS_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_TARGETS_sk_type(sk), ossl_check_OSSL_TARGETS_type(ptr)) +#define sk_OSSL_TARGETS_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_TARGETS_sk_type(sk), ossl_check_OSSL_TARGETS_type(ptr), pnum) +#define sk_OSSL_TARGETS_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_TARGETS_sk_type(sk)) +#define sk_OSSL_TARGETS_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_TARGETS_sk_type(sk)) +#define sk_OSSL_TARGETS_dup(sk) ((STACK_OF(OSSL_TARGETS) *)OPENSSL_sk_dup(ossl_check_const_OSSL_TARGETS_sk_type(sk))) +#define sk_OSSL_TARGETS_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_TARGETS) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_TARGETS_sk_type(sk), ossl_check_OSSL_TARGETS_copyfunc_type(copyfunc), ossl_check_OSSL_TARGETS_freefunc_type(freefunc))) +#define sk_OSSL_TARGETS_set_cmp_func(sk, cmp) ((sk_OSSL_TARGETS_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_TARGETS_sk_type(sk), ossl_check_OSSL_TARGETS_compfunc_type(cmp))) + + +DECLARE_ASN1_FUNCTIONS(OSSL_TARGET) +DECLARE_ASN1_FUNCTIONS(OSSL_TARGETS) +DECLARE_ASN1_FUNCTIONS(OSSL_TARGETING_INFORMATION) + +typedef STACK_OF(OSSL_ISSUER_SERIAL) OSSL_AUTHORITY_ATTRIBUTE_ID_SYNTAX; +DECLARE_ASN1_FUNCTIONS(OSSL_AUTHORITY_ATTRIBUTE_ID_SYNTAX) + +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_ISSUER_SERIAL, OSSL_ISSUER_SERIAL, OSSL_ISSUER_SERIAL) +#define sk_OSSL_ISSUER_SERIAL_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_ISSUER_SERIAL_sk_type(sk)) +#define sk_OSSL_ISSUER_SERIAL_value(sk, idx) ((OSSL_ISSUER_SERIAL *)OPENSSL_sk_value(ossl_check_const_OSSL_ISSUER_SERIAL_sk_type(sk), (idx))) +#define sk_OSSL_ISSUER_SERIAL_new(cmp) ((STACK_OF(OSSL_ISSUER_SERIAL) *)OPENSSL_sk_new(ossl_check_OSSL_ISSUER_SERIAL_compfunc_type(cmp))) +#define sk_OSSL_ISSUER_SERIAL_new_null() ((STACK_OF(OSSL_ISSUER_SERIAL) *)OPENSSL_sk_new_null()) +#define sk_OSSL_ISSUER_SERIAL_new_reserve(cmp, n) ((STACK_OF(OSSL_ISSUER_SERIAL) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_ISSUER_SERIAL_compfunc_type(cmp), (n))) +#define sk_OSSL_ISSUER_SERIAL_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), (n)) +#define sk_OSSL_ISSUER_SERIAL_free(sk) OPENSSL_sk_free(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk)) +#define sk_OSSL_ISSUER_SERIAL_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk)) +#define sk_OSSL_ISSUER_SERIAL_delete(sk, i) ((OSSL_ISSUER_SERIAL *)OPENSSL_sk_delete(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), (i))) +#define sk_OSSL_ISSUER_SERIAL_delete_ptr(sk, ptr) ((OSSL_ISSUER_SERIAL *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr))) +#define sk_OSSL_ISSUER_SERIAL_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr)) +#define sk_OSSL_ISSUER_SERIAL_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr)) +#define sk_OSSL_ISSUER_SERIAL_pop(sk) ((OSSL_ISSUER_SERIAL *)OPENSSL_sk_pop(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk))) +#define sk_OSSL_ISSUER_SERIAL_shift(sk) ((OSSL_ISSUER_SERIAL *)OPENSSL_sk_shift(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk))) +#define sk_OSSL_ISSUER_SERIAL_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk),ossl_check_OSSL_ISSUER_SERIAL_freefunc_type(freefunc)) +#define sk_OSSL_ISSUER_SERIAL_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr), (idx)) +#define sk_OSSL_ISSUER_SERIAL_set(sk, idx, ptr) ((OSSL_ISSUER_SERIAL *)OPENSSL_sk_set(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), (idx), ossl_check_OSSL_ISSUER_SERIAL_type(ptr))) +#define sk_OSSL_ISSUER_SERIAL_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr)) +#define sk_OSSL_ISSUER_SERIAL_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr)) +#define sk_OSSL_ISSUER_SERIAL_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_type(ptr), pnum) +#define sk_OSSL_ISSUER_SERIAL_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk)) +#define sk_OSSL_ISSUER_SERIAL_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_ISSUER_SERIAL_sk_type(sk)) +#define sk_OSSL_ISSUER_SERIAL_dup(sk) ((STACK_OF(OSSL_ISSUER_SERIAL) *)OPENSSL_sk_dup(ossl_check_const_OSSL_ISSUER_SERIAL_sk_type(sk))) +#define sk_OSSL_ISSUER_SERIAL_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_ISSUER_SERIAL) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_copyfunc_type(copyfunc), ossl_check_OSSL_ISSUER_SERIAL_freefunc_type(freefunc))) +#define sk_OSSL_ISSUER_SERIAL_set_cmp_func(sk, cmp) ((sk_OSSL_ISSUER_SERIAL_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_ISSUER_SERIAL_sk_type(sk), ossl_check_OSSL_ISSUER_SERIAL_compfunc_type(cmp))) + + +#endif diff --git a/contrib/openssl-cmake/common/include/openssl/x509_vfy.h b/contrib/openssl-cmake/common/include/openssl/x509_vfy.h new file mode 100644 index 000000000000..c9bdc3b39d68 --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/x509_vfy.h @@ -0,0 +1,903 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/x509_vfy.h.in + * + * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_X509_VFY_H +# define OPENSSL_X509_VFY_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_X509_VFY_H +# endif + +/* + * Protect against recursion, x509.h and x509_vfy.h each include the other. + */ +# ifndef OPENSSL_X509_H +# include +# endif + +# include +# include +# include +# include +# include + +#ifdef __cplusplus +extern "C" { +#endif + +/*- +SSL_CTX -> X509_STORE + -> X509_LOOKUP + ->X509_LOOKUP_METHOD + -> X509_LOOKUP + ->X509_LOOKUP_METHOD + +SSL -> X509_STORE_CTX + ->X509_STORE + +The X509_STORE holds the tables etc for verification stuff. +A X509_STORE_CTX is used while validating a single certificate. +The X509_STORE has X509_LOOKUPs for looking up certs. +The X509_STORE then calls a function to actually verify the +certificate chain. +*/ + +typedef enum { + X509_LU_NONE = 0, + X509_LU_X509, X509_LU_CRL +} X509_LOOKUP_TYPE; + +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +#define X509_LU_RETRY -1 +#define X509_LU_FAIL 0 +#endif + +SKM_DEFINE_STACK_OF_INTERNAL(X509_LOOKUP, X509_LOOKUP, X509_LOOKUP) +#define sk_X509_LOOKUP_num(sk) OPENSSL_sk_num(ossl_check_const_X509_LOOKUP_sk_type(sk)) +#define sk_X509_LOOKUP_value(sk, idx) ((X509_LOOKUP *)OPENSSL_sk_value(ossl_check_const_X509_LOOKUP_sk_type(sk), (idx))) +#define sk_X509_LOOKUP_new(cmp) ((STACK_OF(X509_LOOKUP) *)OPENSSL_sk_new(ossl_check_X509_LOOKUP_compfunc_type(cmp))) +#define sk_X509_LOOKUP_new_null() ((STACK_OF(X509_LOOKUP) *)OPENSSL_sk_new_null()) +#define sk_X509_LOOKUP_new_reserve(cmp, n) ((STACK_OF(X509_LOOKUP) *)OPENSSL_sk_new_reserve(ossl_check_X509_LOOKUP_compfunc_type(cmp), (n))) +#define sk_X509_LOOKUP_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_LOOKUP_sk_type(sk), (n)) +#define sk_X509_LOOKUP_free(sk) OPENSSL_sk_free(ossl_check_X509_LOOKUP_sk_type(sk)) +#define sk_X509_LOOKUP_zero(sk) OPENSSL_sk_zero(ossl_check_X509_LOOKUP_sk_type(sk)) +#define sk_X509_LOOKUP_delete(sk, i) ((X509_LOOKUP *)OPENSSL_sk_delete(ossl_check_X509_LOOKUP_sk_type(sk), (i))) +#define sk_X509_LOOKUP_delete_ptr(sk, ptr) ((X509_LOOKUP *)OPENSSL_sk_delete_ptr(ossl_check_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_type(ptr))) +#define sk_X509_LOOKUP_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_type(ptr)) +#define sk_X509_LOOKUP_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_type(ptr)) +#define sk_X509_LOOKUP_pop(sk) ((X509_LOOKUP *)OPENSSL_sk_pop(ossl_check_X509_LOOKUP_sk_type(sk))) +#define sk_X509_LOOKUP_shift(sk) ((X509_LOOKUP *)OPENSSL_sk_shift(ossl_check_X509_LOOKUP_sk_type(sk))) +#define sk_X509_LOOKUP_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_LOOKUP_sk_type(sk),ossl_check_X509_LOOKUP_freefunc_type(freefunc)) +#define sk_X509_LOOKUP_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_type(ptr), (idx)) +#define sk_X509_LOOKUP_set(sk, idx, ptr) ((X509_LOOKUP *)OPENSSL_sk_set(ossl_check_X509_LOOKUP_sk_type(sk), (idx), ossl_check_X509_LOOKUP_type(ptr))) +#define sk_X509_LOOKUP_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_type(ptr)) +#define sk_X509_LOOKUP_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_type(ptr)) +#define sk_X509_LOOKUP_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_type(ptr), pnum) +#define sk_X509_LOOKUP_sort(sk) OPENSSL_sk_sort(ossl_check_X509_LOOKUP_sk_type(sk)) +#define sk_X509_LOOKUP_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_LOOKUP_sk_type(sk)) +#define sk_X509_LOOKUP_dup(sk) ((STACK_OF(X509_LOOKUP) *)OPENSSL_sk_dup(ossl_check_const_X509_LOOKUP_sk_type(sk))) +#define sk_X509_LOOKUP_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_LOOKUP) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_copyfunc_type(copyfunc), ossl_check_X509_LOOKUP_freefunc_type(freefunc))) +#define sk_X509_LOOKUP_set_cmp_func(sk, cmp) ((sk_X509_LOOKUP_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_LOOKUP_sk_type(sk), ossl_check_X509_LOOKUP_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(X509_OBJECT, X509_OBJECT, X509_OBJECT) +#define sk_X509_OBJECT_num(sk) OPENSSL_sk_num(ossl_check_const_X509_OBJECT_sk_type(sk)) +#define sk_X509_OBJECT_value(sk, idx) ((X509_OBJECT *)OPENSSL_sk_value(ossl_check_const_X509_OBJECT_sk_type(sk), (idx))) +#define sk_X509_OBJECT_new(cmp) ((STACK_OF(X509_OBJECT) *)OPENSSL_sk_new(ossl_check_X509_OBJECT_compfunc_type(cmp))) +#define sk_X509_OBJECT_new_null() ((STACK_OF(X509_OBJECT) *)OPENSSL_sk_new_null()) +#define sk_X509_OBJECT_new_reserve(cmp, n) ((STACK_OF(X509_OBJECT) *)OPENSSL_sk_new_reserve(ossl_check_X509_OBJECT_compfunc_type(cmp), (n))) +#define sk_X509_OBJECT_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_OBJECT_sk_type(sk), (n)) +#define sk_X509_OBJECT_free(sk) OPENSSL_sk_free(ossl_check_X509_OBJECT_sk_type(sk)) +#define sk_X509_OBJECT_zero(sk) OPENSSL_sk_zero(ossl_check_X509_OBJECT_sk_type(sk)) +#define sk_X509_OBJECT_delete(sk, i) ((X509_OBJECT *)OPENSSL_sk_delete(ossl_check_X509_OBJECT_sk_type(sk), (i))) +#define sk_X509_OBJECT_delete_ptr(sk, ptr) ((X509_OBJECT *)OPENSSL_sk_delete_ptr(ossl_check_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_type(ptr))) +#define sk_X509_OBJECT_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_type(ptr)) +#define sk_X509_OBJECT_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_type(ptr)) +#define sk_X509_OBJECT_pop(sk) ((X509_OBJECT *)OPENSSL_sk_pop(ossl_check_X509_OBJECT_sk_type(sk))) +#define sk_X509_OBJECT_shift(sk) ((X509_OBJECT *)OPENSSL_sk_shift(ossl_check_X509_OBJECT_sk_type(sk))) +#define sk_X509_OBJECT_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_OBJECT_sk_type(sk),ossl_check_X509_OBJECT_freefunc_type(freefunc)) +#define sk_X509_OBJECT_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_type(ptr), (idx)) +#define sk_X509_OBJECT_set(sk, idx, ptr) ((X509_OBJECT *)OPENSSL_sk_set(ossl_check_X509_OBJECT_sk_type(sk), (idx), ossl_check_X509_OBJECT_type(ptr))) +#define sk_X509_OBJECT_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_type(ptr)) +#define sk_X509_OBJECT_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_type(ptr)) +#define sk_X509_OBJECT_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_type(ptr), pnum) +#define sk_X509_OBJECT_sort(sk) OPENSSL_sk_sort(ossl_check_X509_OBJECT_sk_type(sk)) +#define sk_X509_OBJECT_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_OBJECT_sk_type(sk)) +#define sk_X509_OBJECT_dup(sk) ((STACK_OF(X509_OBJECT) *)OPENSSL_sk_dup(ossl_check_const_X509_OBJECT_sk_type(sk))) +#define sk_X509_OBJECT_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_OBJECT) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_copyfunc_type(copyfunc), ossl_check_X509_OBJECT_freefunc_type(freefunc))) +#define sk_X509_OBJECT_set_cmp_func(sk, cmp) ((sk_X509_OBJECT_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_OBJECT_sk_type(sk), ossl_check_X509_OBJECT_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(X509_VERIFY_PARAM, X509_VERIFY_PARAM, X509_VERIFY_PARAM) +#define sk_X509_VERIFY_PARAM_num(sk) OPENSSL_sk_num(ossl_check_const_X509_VERIFY_PARAM_sk_type(sk)) +#define sk_X509_VERIFY_PARAM_value(sk, idx) ((X509_VERIFY_PARAM *)OPENSSL_sk_value(ossl_check_const_X509_VERIFY_PARAM_sk_type(sk), (idx))) +#define sk_X509_VERIFY_PARAM_new(cmp) ((STACK_OF(X509_VERIFY_PARAM) *)OPENSSL_sk_new(ossl_check_X509_VERIFY_PARAM_compfunc_type(cmp))) +#define sk_X509_VERIFY_PARAM_new_null() ((STACK_OF(X509_VERIFY_PARAM) *)OPENSSL_sk_new_null()) +#define sk_X509_VERIFY_PARAM_new_reserve(cmp, n) ((STACK_OF(X509_VERIFY_PARAM) *)OPENSSL_sk_new_reserve(ossl_check_X509_VERIFY_PARAM_compfunc_type(cmp), (n))) +#define sk_X509_VERIFY_PARAM_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_VERIFY_PARAM_sk_type(sk), (n)) +#define sk_X509_VERIFY_PARAM_free(sk) OPENSSL_sk_free(ossl_check_X509_VERIFY_PARAM_sk_type(sk)) +#define sk_X509_VERIFY_PARAM_zero(sk) OPENSSL_sk_zero(ossl_check_X509_VERIFY_PARAM_sk_type(sk)) +#define sk_X509_VERIFY_PARAM_delete(sk, i) ((X509_VERIFY_PARAM *)OPENSSL_sk_delete(ossl_check_X509_VERIFY_PARAM_sk_type(sk), (i))) +#define sk_X509_VERIFY_PARAM_delete_ptr(sk, ptr) ((X509_VERIFY_PARAM *)OPENSSL_sk_delete_ptr(ossl_check_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_type(ptr))) +#define sk_X509_VERIFY_PARAM_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_type(ptr)) +#define sk_X509_VERIFY_PARAM_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_type(ptr)) +#define sk_X509_VERIFY_PARAM_pop(sk) ((X509_VERIFY_PARAM *)OPENSSL_sk_pop(ossl_check_X509_VERIFY_PARAM_sk_type(sk))) +#define sk_X509_VERIFY_PARAM_shift(sk) ((X509_VERIFY_PARAM *)OPENSSL_sk_shift(ossl_check_X509_VERIFY_PARAM_sk_type(sk))) +#define sk_X509_VERIFY_PARAM_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_VERIFY_PARAM_sk_type(sk),ossl_check_X509_VERIFY_PARAM_freefunc_type(freefunc)) +#define sk_X509_VERIFY_PARAM_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_type(ptr), (idx)) +#define sk_X509_VERIFY_PARAM_set(sk, idx, ptr) ((X509_VERIFY_PARAM *)OPENSSL_sk_set(ossl_check_X509_VERIFY_PARAM_sk_type(sk), (idx), ossl_check_X509_VERIFY_PARAM_type(ptr))) +#define sk_X509_VERIFY_PARAM_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_type(ptr)) +#define sk_X509_VERIFY_PARAM_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_type(ptr)) +#define sk_X509_VERIFY_PARAM_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_type(ptr), pnum) +#define sk_X509_VERIFY_PARAM_sort(sk) OPENSSL_sk_sort(ossl_check_X509_VERIFY_PARAM_sk_type(sk)) +#define sk_X509_VERIFY_PARAM_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_VERIFY_PARAM_sk_type(sk)) +#define sk_X509_VERIFY_PARAM_dup(sk) ((STACK_OF(X509_VERIFY_PARAM) *)OPENSSL_sk_dup(ossl_check_const_X509_VERIFY_PARAM_sk_type(sk))) +#define sk_X509_VERIFY_PARAM_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_VERIFY_PARAM) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_copyfunc_type(copyfunc), ossl_check_X509_VERIFY_PARAM_freefunc_type(freefunc))) +#define sk_X509_VERIFY_PARAM_set_cmp_func(sk, cmp) ((sk_X509_VERIFY_PARAM_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_VERIFY_PARAM_sk_type(sk), ossl_check_X509_VERIFY_PARAM_compfunc_type(cmp))) + + +/* This is used for a table of trust checking functions */ +typedef struct x509_trust_st { + int trust; + int flags; + int (*check_trust) (struct x509_trust_st *, X509 *, int); + char *name; + int arg1; + void *arg2; +} X509_TRUST; +SKM_DEFINE_STACK_OF_INTERNAL(X509_TRUST, X509_TRUST, X509_TRUST) +#define sk_X509_TRUST_num(sk) OPENSSL_sk_num(ossl_check_const_X509_TRUST_sk_type(sk)) +#define sk_X509_TRUST_value(sk, idx) ((X509_TRUST *)OPENSSL_sk_value(ossl_check_const_X509_TRUST_sk_type(sk), (idx))) +#define sk_X509_TRUST_new(cmp) ((STACK_OF(X509_TRUST) *)OPENSSL_sk_new(ossl_check_X509_TRUST_compfunc_type(cmp))) +#define sk_X509_TRUST_new_null() ((STACK_OF(X509_TRUST) *)OPENSSL_sk_new_null()) +#define sk_X509_TRUST_new_reserve(cmp, n) ((STACK_OF(X509_TRUST) *)OPENSSL_sk_new_reserve(ossl_check_X509_TRUST_compfunc_type(cmp), (n))) +#define sk_X509_TRUST_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_TRUST_sk_type(sk), (n)) +#define sk_X509_TRUST_free(sk) OPENSSL_sk_free(ossl_check_X509_TRUST_sk_type(sk)) +#define sk_X509_TRUST_zero(sk) OPENSSL_sk_zero(ossl_check_X509_TRUST_sk_type(sk)) +#define sk_X509_TRUST_delete(sk, i) ((X509_TRUST *)OPENSSL_sk_delete(ossl_check_X509_TRUST_sk_type(sk), (i))) +#define sk_X509_TRUST_delete_ptr(sk, ptr) ((X509_TRUST *)OPENSSL_sk_delete_ptr(ossl_check_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_type(ptr))) +#define sk_X509_TRUST_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_type(ptr)) +#define sk_X509_TRUST_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_type(ptr)) +#define sk_X509_TRUST_pop(sk) ((X509_TRUST *)OPENSSL_sk_pop(ossl_check_X509_TRUST_sk_type(sk))) +#define sk_X509_TRUST_shift(sk) ((X509_TRUST *)OPENSSL_sk_shift(ossl_check_X509_TRUST_sk_type(sk))) +#define sk_X509_TRUST_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_TRUST_sk_type(sk),ossl_check_X509_TRUST_freefunc_type(freefunc)) +#define sk_X509_TRUST_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_type(ptr), (idx)) +#define sk_X509_TRUST_set(sk, idx, ptr) ((X509_TRUST *)OPENSSL_sk_set(ossl_check_X509_TRUST_sk_type(sk), (idx), ossl_check_X509_TRUST_type(ptr))) +#define sk_X509_TRUST_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_type(ptr)) +#define sk_X509_TRUST_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_type(ptr)) +#define sk_X509_TRUST_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_type(ptr), pnum) +#define sk_X509_TRUST_sort(sk) OPENSSL_sk_sort(ossl_check_X509_TRUST_sk_type(sk)) +#define sk_X509_TRUST_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_TRUST_sk_type(sk)) +#define sk_X509_TRUST_dup(sk) ((STACK_OF(X509_TRUST) *)OPENSSL_sk_dup(ossl_check_const_X509_TRUST_sk_type(sk))) +#define sk_X509_TRUST_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_TRUST) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_copyfunc_type(copyfunc), ossl_check_X509_TRUST_freefunc_type(freefunc))) +#define sk_X509_TRUST_set_cmp_func(sk, cmp) ((sk_X509_TRUST_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_TRUST_sk_type(sk), ossl_check_X509_TRUST_compfunc_type(cmp))) + + +/* standard trust ids */ +# define X509_TRUST_DEFAULT 0 /* Only valid in purpose settings */ +# define X509_TRUST_COMPAT 1 +# define X509_TRUST_SSL_CLIENT 2 +# define X509_TRUST_SSL_SERVER 3 +# define X509_TRUST_EMAIL 4 +# define X509_TRUST_OBJECT_SIGN 5 +# define X509_TRUST_OCSP_SIGN 6 +# define X509_TRUST_OCSP_REQUEST 7 +# define X509_TRUST_TSA 8 +/* Keep these up to date! */ +# define X509_TRUST_MIN 1 +# define X509_TRUST_MAX 8 + +/* trust_flags values */ +# define X509_TRUST_DYNAMIC (1U << 0) +# define X509_TRUST_DYNAMIC_NAME (1U << 1) +/* No compat trust if self-signed, preempts "DO_SS" */ +# define X509_TRUST_NO_SS_COMPAT (1U << 2) +/* Compat trust if no explicit accepted trust EKUs */ +# define X509_TRUST_DO_SS_COMPAT (1U << 3) +/* Accept "anyEKU" as a wildcard rejection OID and as a wildcard trust OID */ +# define X509_TRUST_OK_ANY_EKU (1U << 4) + +/* check_trust return codes */ +# define X509_TRUST_TRUSTED 1 +# define X509_TRUST_REJECTED 2 +# define X509_TRUST_UNTRUSTED 3 + +int X509_TRUST_set(int *t, int trust); +int X509_TRUST_get_count(void); +X509_TRUST *X509_TRUST_get0(int idx); +int X509_TRUST_get_by_id(int id); +int X509_TRUST_add(int id, int flags, int (*ck) (X509_TRUST *, X509 *, int), + const char *name, int arg1, void *arg2); +void X509_TRUST_cleanup(void); +int X509_TRUST_get_flags(const X509_TRUST *xp); +char *X509_TRUST_get0_name(const X509_TRUST *xp); +int X509_TRUST_get_trust(const X509_TRUST *xp); + +int X509_trusted(const X509 *x); +int X509_add1_trust_object(X509 *x, const ASN1_OBJECT *obj); +int X509_add1_reject_object(X509 *x, const ASN1_OBJECT *obj); +void X509_trust_clear(X509 *x); +void X509_reject_clear(X509 *x); +STACK_OF(ASN1_OBJECT) *X509_get0_trust_objects(X509 *x); +STACK_OF(ASN1_OBJECT) *X509_get0_reject_objects(X509 *x); + +int (*X509_TRUST_set_default(int (*trust) (int, X509 *, int))) (int, X509 *, + int); +int X509_check_trust(X509 *x, int id, int flags); + +int X509_verify_cert(X509_STORE_CTX *ctx); +int X509_STORE_CTX_verify(X509_STORE_CTX *ctx); +STACK_OF(X509) *X509_build_chain(X509 *target, STACK_OF(X509) *certs, + X509_STORE *store, int with_self_signed, + OSSL_LIB_CTX *libctx, const char *propq); + +int X509_STORE_set_depth(X509_STORE *store, int depth); + +typedef int (*X509_STORE_CTX_verify_cb)(int, X509_STORE_CTX *); +int X509_STORE_CTX_print_verify_cb(int ok, X509_STORE_CTX *ctx); +typedef int (*X509_STORE_CTX_verify_fn)(X509_STORE_CTX *); +typedef int (*X509_STORE_CTX_get_issuer_fn)(X509 **issuer, + X509_STORE_CTX *ctx, X509 *x); +typedef int (*X509_STORE_CTX_check_issued_fn)(X509_STORE_CTX *ctx, + X509 *x, X509 *issuer); +typedef int (*X509_STORE_CTX_check_revocation_fn)(X509_STORE_CTX *ctx); +typedef int (*X509_STORE_CTX_get_crl_fn)(X509_STORE_CTX *ctx, + X509_CRL **crl, X509 *x); +typedef int (*X509_STORE_CTX_check_crl_fn)(X509_STORE_CTX *ctx, X509_CRL *crl); +typedef int (*X509_STORE_CTX_cert_crl_fn)(X509_STORE_CTX *ctx, + X509_CRL *crl, X509 *x); +typedef int (*X509_STORE_CTX_check_policy_fn)(X509_STORE_CTX *ctx); +typedef STACK_OF(X509) + *(*X509_STORE_CTX_lookup_certs_fn)(X509_STORE_CTX *ctx, + const X509_NAME *nm); +typedef STACK_OF(X509_CRL) + *(*X509_STORE_CTX_lookup_crls_fn)(const X509_STORE_CTX *ctx, + const X509_NAME *nm); +typedef int (*X509_STORE_CTX_cleanup_fn)(X509_STORE_CTX *ctx); + +void X509_STORE_CTX_set_depth(X509_STORE_CTX *ctx, int depth); + +# define X509_STORE_CTX_set_app_data(ctx,data) \ + X509_STORE_CTX_set_ex_data(ctx,0,data) +# define X509_STORE_CTX_get_app_data(ctx) \ + X509_STORE_CTX_get_ex_data(ctx,0) + +# define X509_L_FILE_LOAD 1 +# define X509_L_ADD_DIR 2 +# define X509_L_ADD_STORE 3 +# define X509_L_LOAD_STORE 4 + +# define X509_LOOKUP_load_file(x,name,type) \ + X509_LOOKUP_ctrl((x),X509_L_FILE_LOAD,(name),(long)(type),NULL) + +# define X509_LOOKUP_add_dir(x,name,type) \ + X509_LOOKUP_ctrl((x),X509_L_ADD_DIR,(name),(long)(type),NULL) + +# define X509_LOOKUP_add_store(x,name) \ + X509_LOOKUP_ctrl((x),X509_L_ADD_STORE,(name),0,NULL) + +# define X509_LOOKUP_load_store(x,name) \ + X509_LOOKUP_ctrl((x),X509_L_LOAD_STORE,(name),0,NULL) + +# define X509_LOOKUP_load_file_ex(x, name, type, libctx, propq) \ +X509_LOOKUP_ctrl_ex((x), X509_L_FILE_LOAD, (name), (long)(type), NULL,\ + (libctx), (propq)) + +# define X509_LOOKUP_load_store_ex(x, name, libctx, propq) \ +X509_LOOKUP_ctrl_ex((x), X509_L_LOAD_STORE, (name), 0, NULL, \ + (libctx), (propq)) + +# define X509_LOOKUP_add_store_ex(x, name, libctx, propq) \ +X509_LOOKUP_ctrl_ex((x), X509_L_ADD_STORE, (name), 0, NULL, \ + (libctx), (propq)) + +# define X509_V_OK 0 +# define X509_V_ERR_UNSPECIFIED 1 +# define X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT 2 +# define X509_V_ERR_UNABLE_TO_GET_CRL 3 +# define X509_V_ERR_UNABLE_TO_DECRYPT_CERT_SIGNATURE 4 +# define X509_V_ERR_UNABLE_TO_DECRYPT_CRL_SIGNATURE 5 +# define X509_V_ERR_UNABLE_TO_DECODE_ISSUER_PUBLIC_KEY 6 +# define X509_V_ERR_CERT_SIGNATURE_FAILURE 7 +# define X509_V_ERR_CRL_SIGNATURE_FAILURE 8 +# define X509_V_ERR_CERT_NOT_YET_VALID 9 +# define X509_V_ERR_CERT_HAS_EXPIRED 10 +# define X509_V_ERR_CRL_NOT_YET_VALID 11 +# define X509_V_ERR_CRL_HAS_EXPIRED 12 +# define X509_V_ERR_ERROR_IN_CERT_NOT_BEFORE_FIELD 13 +# define X509_V_ERR_ERROR_IN_CERT_NOT_AFTER_FIELD 14 +# define X509_V_ERR_ERROR_IN_CRL_LAST_UPDATE_FIELD 15 +# define X509_V_ERR_ERROR_IN_CRL_NEXT_UPDATE_FIELD 16 +# define X509_V_ERR_OUT_OF_MEM 17 +# define X509_V_ERR_DEPTH_ZERO_SELF_SIGNED_CERT 18 +# define X509_V_ERR_SELF_SIGNED_CERT_IN_CHAIN 19 +# define X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY 20 +# define X509_V_ERR_UNABLE_TO_VERIFY_LEAF_SIGNATURE 21 +# define X509_V_ERR_CERT_CHAIN_TOO_LONG 22 +# define X509_V_ERR_CERT_REVOKED 23 +# define X509_V_ERR_NO_ISSUER_PUBLIC_KEY 24 +# define X509_V_ERR_PATH_LENGTH_EXCEEDED 25 +# define X509_V_ERR_INVALID_PURPOSE 26 +# define X509_V_ERR_CERT_UNTRUSTED 27 +# define X509_V_ERR_CERT_REJECTED 28 + +/* These are 'informational' when looking for issuer cert */ +# define X509_V_ERR_SUBJECT_ISSUER_MISMATCH 29 +# define X509_V_ERR_AKID_SKID_MISMATCH 30 +# define X509_V_ERR_AKID_ISSUER_SERIAL_MISMATCH 31 +# define X509_V_ERR_KEYUSAGE_NO_CERTSIGN 32 +# define X509_V_ERR_UNABLE_TO_GET_CRL_ISSUER 33 +# define X509_V_ERR_UNHANDLED_CRITICAL_EXTENSION 34 +# define X509_V_ERR_KEYUSAGE_NO_CRL_SIGN 35 +# define X509_V_ERR_UNHANDLED_CRITICAL_CRL_EXTENSION 36 +# define X509_V_ERR_INVALID_NON_CA 37 +# define X509_V_ERR_PROXY_PATH_LENGTH_EXCEEDED 38 +# define X509_V_ERR_KEYUSAGE_NO_DIGITAL_SIGNATURE 39 +# define X509_V_ERR_PROXY_CERTIFICATES_NOT_ALLOWED 40 +# define X509_V_ERR_INVALID_EXTENSION 41 +# define X509_V_ERR_INVALID_POLICY_EXTENSION 42 +# define X509_V_ERR_NO_EXPLICIT_POLICY 43 +# define X509_V_ERR_DIFFERENT_CRL_SCOPE 44 +# define X509_V_ERR_UNSUPPORTED_EXTENSION_FEATURE 45 +# define X509_V_ERR_UNNESTED_RESOURCE 46 +# define X509_V_ERR_PERMITTED_VIOLATION 47 +# define X509_V_ERR_EXCLUDED_VIOLATION 48 +# define X509_V_ERR_SUBTREE_MINMAX 49 +/* The application is not happy */ +# define X509_V_ERR_APPLICATION_VERIFICATION 50 +# define X509_V_ERR_UNSUPPORTED_CONSTRAINT_TYPE 51 +# define X509_V_ERR_UNSUPPORTED_CONSTRAINT_SYNTAX 52 +# define X509_V_ERR_UNSUPPORTED_NAME_SYNTAX 53 +# define X509_V_ERR_CRL_PATH_VALIDATION_ERROR 54 +/* Another issuer check debug option */ +# define X509_V_ERR_PATH_LOOP 55 +/* Suite B mode algorithm violation */ +# define X509_V_ERR_SUITE_B_INVALID_VERSION 56 +# define X509_V_ERR_SUITE_B_INVALID_ALGORITHM 57 +# define X509_V_ERR_SUITE_B_INVALID_CURVE 58 +# define X509_V_ERR_SUITE_B_INVALID_SIGNATURE_ALGORITHM 59 +# define X509_V_ERR_SUITE_B_LOS_NOT_ALLOWED 60 +# define X509_V_ERR_SUITE_B_CANNOT_SIGN_P_384_WITH_P_256 61 +/* Host, email and IP check errors */ +# define X509_V_ERR_HOSTNAME_MISMATCH 62 +# define X509_V_ERR_EMAIL_MISMATCH 63 +# define X509_V_ERR_IP_ADDRESS_MISMATCH 64 +/* DANE TLSA errors */ +# define X509_V_ERR_DANE_NO_MATCH 65 +/* security level errors */ +# define X509_V_ERR_EE_KEY_TOO_SMALL 66 +# define X509_V_ERR_CA_KEY_TOO_SMALL 67 +# define X509_V_ERR_CA_MD_TOO_WEAK 68 +/* Caller error */ +# define X509_V_ERR_INVALID_CALL 69 +/* Issuer lookup error */ +# define X509_V_ERR_STORE_LOOKUP 70 +/* Certificate transparency */ +# define X509_V_ERR_NO_VALID_SCTS 71 + +# define X509_V_ERR_PROXY_SUBJECT_NAME_VIOLATION 72 +/* OCSP status errors */ +# define X509_V_ERR_OCSP_VERIFY_NEEDED 73 /* Need OCSP verification */ +# define X509_V_ERR_OCSP_VERIFY_FAILED 74 /* Couldn't verify cert through OCSP */ +# define X509_V_ERR_OCSP_CERT_UNKNOWN 75 /* Certificate wasn't recognized by the OCSP responder */ + +# define X509_V_ERR_UNSUPPORTED_SIGNATURE_ALGORITHM 76 +# define X509_V_ERR_SIGNATURE_ALGORITHM_MISMATCH 77 + +/* Errors in case a check in X509_V_FLAG_X509_STRICT mode fails */ +# define X509_V_ERR_SIGNATURE_ALGORITHM_INCONSISTENCY 78 +# define X509_V_ERR_INVALID_CA 79 +# define X509_V_ERR_PATHLEN_INVALID_FOR_NON_CA 80 +# define X509_V_ERR_PATHLEN_WITHOUT_KU_KEY_CERT_SIGN 81 +# define X509_V_ERR_KU_KEY_CERT_SIGN_INVALID_FOR_NON_CA 82 +# define X509_V_ERR_ISSUER_NAME_EMPTY 83 +# define X509_V_ERR_SUBJECT_NAME_EMPTY 84 +# define X509_V_ERR_MISSING_AUTHORITY_KEY_IDENTIFIER 85 +# define X509_V_ERR_MISSING_SUBJECT_KEY_IDENTIFIER 86 +# define X509_V_ERR_EMPTY_SUBJECT_ALT_NAME 87 +# define X509_V_ERR_EMPTY_SUBJECT_SAN_NOT_CRITICAL 88 +# define X509_V_ERR_CA_BCONS_NOT_CRITICAL 89 +# define X509_V_ERR_AUTHORITY_KEY_IDENTIFIER_CRITICAL 90 +# define X509_V_ERR_SUBJECT_KEY_IDENTIFIER_CRITICAL 91 +# define X509_V_ERR_CA_CERT_MISSING_KEY_USAGE 92 +# define X509_V_ERR_EXTENSIONS_REQUIRE_VERSION_3 93 +# define X509_V_ERR_EC_KEY_EXPLICIT_PARAMS 94 +# define X509_V_ERR_RPK_UNTRUSTED 95 + +/* Certificate verify flags */ +# ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define X509_V_FLAG_CB_ISSUER_CHECK 0x0 /* Deprecated */ +# endif +/* Use check time instead of current time */ +# define X509_V_FLAG_USE_CHECK_TIME 0x2 +/* Lookup CRLs */ +# define X509_V_FLAG_CRL_CHECK 0x4 +/* Lookup CRLs for whole chain */ +# define X509_V_FLAG_CRL_CHECK_ALL 0x8 +/* Ignore unhandled critical extensions */ +# define X509_V_FLAG_IGNORE_CRITICAL 0x10 +/* Disable workarounds for broken certificates */ +# define X509_V_FLAG_X509_STRICT 0x20 +/* Enable proxy certificate validation */ +# define X509_V_FLAG_ALLOW_PROXY_CERTS 0x40 +/* Enable policy checking */ +# define X509_V_FLAG_POLICY_CHECK 0x80 +/* Policy variable require-explicit-policy */ +# define X509_V_FLAG_EXPLICIT_POLICY 0x100 +/* Policy variable inhibit-any-policy */ +# define X509_V_FLAG_INHIBIT_ANY 0x200 +/* Policy variable inhibit-policy-mapping */ +# define X509_V_FLAG_INHIBIT_MAP 0x400 +/* Notify callback that policy is OK */ +# define X509_V_FLAG_NOTIFY_POLICY 0x800 +/* Extended CRL features such as indirect CRLs, alternate CRL signing keys */ +# define X509_V_FLAG_EXTENDED_CRL_SUPPORT 0x1000 +/* Delta CRL support */ +# define X509_V_FLAG_USE_DELTAS 0x2000 +/* Check self-signed CA signature */ +# define X509_V_FLAG_CHECK_SS_SIGNATURE 0x4000 +/* Use trusted store first */ +# define X509_V_FLAG_TRUSTED_FIRST 0x8000 +/* Suite B 128 bit only mode: not normally used */ +# define X509_V_FLAG_SUITEB_128_LOS_ONLY 0x10000 +/* Suite B 192 bit only mode */ +# define X509_V_FLAG_SUITEB_192_LOS 0x20000 +/* Suite B 128 bit mode allowing 192 bit algorithms */ +# define X509_V_FLAG_SUITEB_128_LOS 0x30000 +/* Allow partial chains if at least one certificate is in trusted store */ +# define X509_V_FLAG_PARTIAL_CHAIN 0x80000 +/* + * If the initial chain is not trusted, do not attempt to build an alternative + * chain. Alternate chain checking was introduced in 1.1.0. Setting this flag + * will force the behaviour to match that of previous versions. + */ +# define X509_V_FLAG_NO_ALT_CHAINS 0x100000 +/* Do not check certificate/CRL validity against current time */ +# define X509_V_FLAG_NO_CHECK_TIME 0x200000 + +# define X509_VP_FLAG_DEFAULT 0x1 +# define X509_VP_FLAG_OVERWRITE 0x2 +# define X509_VP_FLAG_RESET_FLAGS 0x4 +# define X509_VP_FLAG_LOCKED 0x8 +# define X509_VP_FLAG_ONCE 0x10 + +/* Internal use: mask of policy related options */ +# define X509_V_FLAG_POLICY_MASK (X509_V_FLAG_POLICY_CHECK \ + | X509_V_FLAG_EXPLICIT_POLICY \ + | X509_V_FLAG_INHIBIT_ANY \ + | X509_V_FLAG_INHIBIT_MAP) + +int X509_OBJECT_idx_by_subject(STACK_OF(X509_OBJECT) *h, X509_LOOKUP_TYPE type, + const X509_NAME *name); +X509_OBJECT *X509_OBJECT_retrieve_by_subject(STACK_OF(X509_OBJECT) *h, + X509_LOOKUP_TYPE type, + const X509_NAME *name); +X509_OBJECT *X509_OBJECT_retrieve_match(STACK_OF(X509_OBJECT) *h, + X509_OBJECT *x); +int X509_OBJECT_up_ref_count(X509_OBJECT *a); +X509_OBJECT *X509_OBJECT_new(void); +void X509_OBJECT_free(X509_OBJECT *a); +X509_LOOKUP_TYPE X509_OBJECT_get_type(const X509_OBJECT *a); +X509 *X509_OBJECT_get0_X509(const X509_OBJECT *a); +int X509_OBJECT_set1_X509(X509_OBJECT *a, X509 *obj); +X509_CRL *X509_OBJECT_get0_X509_CRL(const X509_OBJECT *a); +int X509_OBJECT_set1_X509_CRL(X509_OBJECT *a, X509_CRL *obj); +X509_STORE *X509_STORE_new(void); +void X509_STORE_free(X509_STORE *xs); +int X509_STORE_lock(X509_STORE *xs); +int X509_STORE_unlock(X509_STORE *xs); +int X509_STORE_up_ref(X509_STORE *xs); +STACK_OF(X509_OBJECT) *X509_STORE_get0_objects(const X509_STORE *xs); +STACK_OF(X509_OBJECT) *X509_STORE_get1_objects(X509_STORE *xs); +STACK_OF(X509) *X509_STORE_get1_all_certs(X509_STORE *xs); +STACK_OF(X509) *X509_STORE_CTX_get1_certs(X509_STORE_CTX *xs, + const X509_NAME *nm); +STACK_OF(X509_CRL) *X509_STORE_CTX_get1_crls(const X509_STORE_CTX *st, + const X509_NAME *nm); +int X509_STORE_set_flags(X509_STORE *xs, unsigned long flags); +int X509_STORE_set_purpose(X509_STORE *xs, int purpose); +int X509_STORE_set_trust(X509_STORE *xs, int trust); +int X509_STORE_set1_param(X509_STORE *xs, const X509_VERIFY_PARAM *pm); +X509_VERIFY_PARAM *X509_STORE_get0_param(const X509_STORE *xs); + +void X509_STORE_set_verify(X509_STORE *xs, X509_STORE_CTX_verify_fn verify); +#define X509_STORE_set_verify_func(ctx, func) \ + X509_STORE_set_verify((ctx),(func)) +void X509_STORE_CTX_set_verify(X509_STORE_CTX *ctx, + X509_STORE_CTX_verify_fn verify); +X509_STORE_CTX_verify_fn X509_STORE_get_verify(const X509_STORE *xs); +void X509_STORE_set_verify_cb(X509_STORE *xs, + X509_STORE_CTX_verify_cb verify_cb); +# define X509_STORE_set_verify_cb_func(ctx,func) \ + X509_STORE_set_verify_cb((ctx),(func)) +X509_STORE_CTX_verify_cb X509_STORE_get_verify_cb(const X509_STORE *xs); +void X509_STORE_set_get_issuer(X509_STORE *xs, + X509_STORE_CTX_get_issuer_fn get_issuer); +X509_STORE_CTX_get_issuer_fn X509_STORE_get_get_issuer(const X509_STORE *xs); +void X509_STORE_set_check_issued(X509_STORE *xs, + X509_STORE_CTX_check_issued_fn check_issued); +X509_STORE_CTX_check_issued_fn X509_STORE_get_check_issued(const X509_STORE *s); +void X509_STORE_set_check_revocation(X509_STORE *xs, + X509_STORE_CTX_check_revocation_fn check_revocation); +X509_STORE_CTX_check_revocation_fn + X509_STORE_get_check_revocation(const X509_STORE *xs); +void X509_STORE_set_get_crl(X509_STORE *xs, + X509_STORE_CTX_get_crl_fn get_crl); +X509_STORE_CTX_get_crl_fn X509_STORE_get_get_crl(const X509_STORE *xs); +void X509_STORE_set_check_crl(X509_STORE *xs, + X509_STORE_CTX_check_crl_fn check_crl); +X509_STORE_CTX_check_crl_fn X509_STORE_get_check_crl(const X509_STORE *xs); +void X509_STORE_set_cert_crl(X509_STORE *xs, + X509_STORE_CTX_cert_crl_fn cert_crl); +X509_STORE_CTX_cert_crl_fn X509_STORE_get_cert_crl(const X509_STORE *xs); +void X509_STORE_set_check_policy(X509_STORE *xs, + X509_STORE_CTX_check_policy_fn check_policy); +X509_STORE_CTX_check_policy_fn X509_STORE_get_check_policy(const X509_STORE *s); +void X509_STORE_set_lookup_certs(X509_STORE *xs, + X509_STORE_CTX_lookup_certs_fn lookup_certs); +X509_STORE_CTX_lookup_certs_fn X509_STORE_get_lookup_certs(const X509_STORE *s); +void X509_STORE_set_lookup_crls(X509_STORE *xs, + X509_STORE_CTX_lookup_crls_fn lookup_crls); +#define X509_STORE_set_lookup_crls_cb(ctx, func) \ + X509_STORE_set_lookup_crls((ctx), (func)) +X509_STORE_CTX_lookup_crls_fn X509_STORE_get_lookup_crls(const X509_STORE *xs); +void X509_STORE_set_cleanup(X509_STORE *xs, + X509_STORE_CTX_cleanup_fn cleanup); +X509_STORE_CTX_cleanup_fn X509_STORE_get_cleanup(const X509_STORE *xs); + +#define X509_STORE_get_ex_new_index(l, p, newf, dupf, freef) \ + CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_X509_STORE, l, p, newf, dupf, freef) +int X509_STORE_set_ex_data(X509_STORE *xs, int idx, void *data); +void *X509_STORE_get_ex_data(const X509_STORE *xs, int idx); + +X509_STORE_CTX *X509_STORE_CTX_new_ex(OSSL_LIB_CTX *libctx, const char *propq); +X509_STORE_CTX *X509_STORE_CTX_new(void); + +int X509_STORE_CTX_get1_issuer(X509 **issuer, X509_STORE_CTX *ctx, X509 *x); + +void X509_STORE_CTX_free(X509_STORE_CTX *ctx); +int X509_STORE_CTX_init(X509_STORE_CTX *ctx, X509_STORE *trust_store, + X509 *target, STACK_OF(X509) *untrusted); +int X509_STORE_CTX_init_rpk(X509_STORE_CTX *ctx, X509_STORE *trust_store, + EVP_PKEY* rpk); +void X509_STORE_CTX_set0_trusted_stack(X509_STORE_CTX *ctx, STACK_OF(X509) *sk); +void X509_STORE_CTX_cleanup(X509_STORE_CTX *ctx); + +X509_STORE *X509_STORE_CTX_get0_store(const X509_STORE_CTX *ctx); +X509 *X509_STORE_CTX_get0_cert(const X509_STORE_CTX *ctx); +EVP_PKEY *X509_STORE_CTX_get0_rpk(const X509_STORE_CTX *ctx); +STACK_OF(X509)* X509_STORE_CTX_get0_untrusted(const X509_STORE_CTX *ctx); +void X509_STORE_CTX_set0_untrusted(X509_STORE_CTX *ctx, STACK_OF(X509) *sk); +void X509_STORE_CTX_set_verify_cb(X509_STORE_CTX *ctx, + X509_STORE_CTX_verify_cb verify); +X509_STORE_CTX_verify_cb X509_STORE_CTX_get_verify_cb(const X509_STORE_CTX *ctx); +X509_STORE_CTX_verify_fn X509_STORE_CTX_get_verify(const X509_STORE_CTX *ctx); +X509_STORE_CTX_get_issuer_fn X509_STORE_CTX_get_get_issuer(const X509_STORE_CTX *ctx); +X509_STORE_CTX_check_issued_fn X509_STORE_CTX_get_check_issued(const X509_STORE_CTX *ctx); +X509_STORE_CTX_check_revocation_fn X509_STORE_CTX_get_check_revocation(const X509_STORE_CTX *ctx); +void X509_STORE_CTX_set_get_crl(X509_STORE_CTX *ctx, + X509_STORE_CTX_get_crl_fn get_crl); +X509_STORE_CTX_get_crl_fn X509_STORE_CTX_get_get_crl(const X509_STORE_CTX *ctx); +X509_STORE_CTX_check_crl_fn X509_STORE_CTX_get_check_crl(const X509_STORE_CTX *ctx); +X509_STORE_CTX_cert_crl_fn X509_STORE_CTX_get_cert_crl(const X509_STORE_CTX *ctx); +X509_STORE_CTX_check_policy_fn X509_STORE_CTX_get_check_policy(const X509_STORE_CTX *ctx); +X509_STORE_CTX_lookup_certs_fn X509_STORE_CTX_get_lookup_certs(const X509_STORE_CTX *ctx); +X509_STORE_CTX_lookup_crls_fn X509_STORE_CTX_get_lookup_crls(const X509_STORE_CTX *ctx); +X509_STORE_CTX_cleanup_fn X509_STORE_CTX_get_cleanup(const X509_STORE_CTX *ctx); + +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +# define X509_STORE_CTX_get_chain X509_STORE_CTX_get0_chain +# define X509_STORE_CTX_set_chain X509_STORE_CTX_set0_untrusted +# define X509_STORE_CTX_trusted_stack X509_STORE_CTX_set0_trusted_stack +# define X509_STORE_get_by_subject X509_STORE_CTX_get_by_subject +# define X509_STORE_get1_certs X509_STORE_CTX_get1_certs +# define X509_STORE_get1_crls X509_STORE_CTX_get1_crls +/* the following macro is misspelled; use X509_STORE_get1_certs instead */ +# define X509_STORE_get1_cert X509_STORE_CTX_get1_certs +/* the following macro is misspelled; use X509_STORE_get1_crls instead */ +# define X509_STORE_get1_crl X509_STORE_CTX_get1_crls +#endif + +X509_LOOKUP *X509_STORE_add_lookup(X509_STORE *xs, X509_LOOKUP_METHOD *m); +X509_LOOKUP_METHOD *X509_LOOKUP_hash_dir(void); +X509_LOOKUP_METHOD *X509_LOOKUP_file(void); +X509_LOOKUP_METHOD *X509_LOOKUP_store(void); + +typedef int (*X509_LOOKUP_ctrl_fn)(X509_LOOKUP *ctx, int cmd, const char *argc, + long argl, char **ret); +typedef int (*X509_LOOKUP_ctrl_ex_fn)( + X509_LOOKUP *ctx, int cmd, const char *argc, long argl, char **ret, + OSSL_LIB_CTX *libctx, const char *propq); + +typedef int (*X509_LOOKUP_get_by_subject_fn)(X509_LOOKUP *ctx, + X509_LOOKUP_TYPE type, + const X509_NAME *name, + X509_OBJECT *ret); +typedef int (*X509_LOOKUP_get_by_subject_ex_fn)(X509_LOOKUP *ctx, + X509_LOOKUP_TYPE type, + const X509_NAME *name, + X509_OBJECT *ret, + OSSL_LIB_CTX *libctx, + const char *propq); +typedef int (*X509_LOOKUP_get_by_issuer_serial_fn)(X509_LOOKUP *ctx, + X509_LOOKUP_TYPE type, + const X509_NAME *name, + const ASN1_INTEGER *serial, + X509_OBJECT *ret); +typedef int (*X509_LOOKUP_get_by_fingerprint_fn)(X509_LOOKUP *ctx, + X509_LOOKUP_TYPE type, + const unsigned char* bytes, + int len, + X509_OBJECT *ret); +typedef int (*X509_LOOKUP_get_by_alias_fn)(X509_LOOKUP *ctx, + X509_LOOKUP_TYPE type, + const char *str, + int len, + X509_OBJECT *ret); + +X509_LOOKUP_METHOD *X509_LOOKUP_meth_new(const char *name); +void X509_LOOKUP_meth_free(X509_LOOKUP_METHOD *method); + +int X509_LOOKUP_meth_set_new_item(X509_LOOKUP_METHOD *method, + int (*new_item) (X509_LOOKUP *ctx)); +int (*X509_LOOKUP_meth_get_new_item(const X509_LOOKUP_METHOD* method)) + (X509_LOOKUP *ctx); + +int X509_LOOKUP_meth_set_free(X509_LOOKUP_METHOD *method, + void (*free_fn) (X509_LOOKUP *ctx)); +void (*X509_LOOKUP_meth_get_free(const X509_LOOKUP_METHOD* method)) + (X509_LOOKUP *ctx); + +int X509_LOOKUP_meth_set_init(X509_LOOKUP_METHOD *method, + int (*init) (X509_LOOKUP *ctx)); +int (*X509_LOOKUP_meth_get_init(const X509_LOOKUP_METHOD* method)) + (X509_LOOKUP *ctx); + +int X509_LOOKUP_meth_set_shutdown(X509_LOOKUP_METHOD *method, + int (*shutdown) (X509_LOOKUP *ctx)); +int (*X509_LOOKUP_meth_get_shutdown(const X509_LOOKUP_METHOD* method)) + (X509_LOOKUP *ctx); + +int X509_LOOKUP_meth_set_ctrl(X509_LOOKUP_METHOD *method, + X509_LOOKUP_ctrl_fn ctrl_fn); +X509_LOOKUP_ctrl_fn X509_LOOKUP_meth_get_ctrl(const X509_LOOKUP_METHOD *method); + +int X509_LOOKUP_meth_set_get_by_subject(X509_LOOKUP_METHOD *method, + X509_LOOKUP_get_by_subject_fn fn); +X509_LOOKUP_get_by_subject_fn X509_LOOKUP_meth_get_get_by_subject( + const X509_LOOKUP_METHOD *method); + +int X509_LOOKUP_meth_set_get_by_issuer_serial(X509_LOOKUP_METHOD *method, + X509_LOOKUP_get_by_issuer_serial_fn fn); +X509_LOOKUP_get_by_issuer_serial_fn X509_LOOKUP_meth_get_get_by_issuer_serial( + const X509_LOOKUP_METHOD *method); + +int X509_LOOKUP_meth_set_get_by_fingerprint(X509_LOOKUP_METHOD *method, + X509_LOOKUP_get_by_fingerprint_fn fn); +X509_LOOKUP_get_by_fingerprint_fn X509_LOOKUP_meth_get_get_by_fingerprint( + const X509_LOOKUP_METHOD *method); + +int X509_LOOKUP_meth_set_get_by_alias(X509_LOOKUP_METHOD *method, + X509_LOOKUP_get_by_alias_fn fn); +X509_LOOKUP_get_by_alias_fn X509_LOOKUP_meth_get_get_by_alias( + const X509_LOOKUP_METHOD *method); + + +int X509_STORE_add_cert(X509_STORE *xs, X509 *x); +int X509_STORE_add_crl(X509_STORE *xs, X509_CRL *x); + +int X509_STORE_CTX_get_by_subject(const X509_STORE_CTX *vs, + X509_LOOKUP_TYPE type, + const X509_NAME *name, X509_OBJECT *ret); +X509_OBJECT *X509_STORE_CTX_get_obj_by_subject(X509_STORE_CTX *vs, + X509_LOOKUP_TYPE type, + const X509_NAME *name); + +int X509_LOOKUP_ctrl(X509_LOOKUP *ctx, int cmd, const char *argc, + long argl, char **ret); +int X509_LOOKUP_ctrl_ex(X509_LOOKUP *ctx, int cmd, const char *argc, long argl, + char **ret, OSSL_LIB_CTX *libctx, const char *propq); + +int X509_load_cert_file(X509_LOOKUP *ctx, const char *file, int type); +int X509_load_cert_file_ex(X509_LOOKUP *ctx, const char *file, int type, + OSSL_LIB_CTX *libctx, const char *propq); +int X509_load_crl_file(X509_LOOKUP *ctx, const char *file, int type); +int X509_load_cert_crl_file(X509_LOOKUP *ctx, const char *file, int type); +int X509_load_cert_crl_file_ex(X509_LOOKUP *ctx, const char *file, int type, + OSSL_LIB_CTX *libctx, const char *propq); + +X509_LOOKUP *X509_LOOKUP_new(X509_LOOKUP_METHOD *method); +void X509_LOOKUP_free(X509_LOOKUP *ctx); +int X509_LOOKUP_init(X509_LOOKUP *ctx); +int X509_LOOKUP_by_subject(X509_LOOKUP *ctx, X509_LOOKUP_TYPE type, + const X509_NAME *name, X509_OBJECT *ret); +int X509_LOOKUP_by_subject_ex(X509_LOOKUP *ctx, X509_LOOKUP_TYPE type, + const X509_NAME *name, X509_OBJECT *ret, + OSSL_LIB_CTX *libctx, const char *propq); +int X509_LOOKUP_by_issuer_serial(X509_LOOKUP *ctx, X509_LOOKUP_TYPE type, + const X509_NAME *name, + const ASN1_INTEGER *serial, + X509_OBJECT *ret); +int X509_LOOKUP_by_fingerprint(X509_LOOKUP *ctx, X509_LOOKUP_TYPE type, + const unsigned char *bytes, int len, + X509_OBJECT *ret); +int X509_LOOKUP_by_alias(X509_LOOKUP *ctx, X509_LOOKUP_TYPE type, + const char *str, int len, X509_OBJECT *ret); +int X509_LOOKUP_set_method_data(X509_LOOKUP *ctx, void *data); +void *X509_LOOKUP_get_method_data(const X509_LOOKUP *ctx); +X509_STORE *X509_LOOKUP_get_store(const X509_LOOKUP *ctx); +int X509_LOOKUP_shutdown(X509_LOOKUP *ctx); + +int X509_STORE_load_file(X509_STORE *xs, const char *file); +int X509_STORE_load_path(X509_STORE *xs, const char *path); +int X509_STORE_load_store(X509_STORE *xs, const char *store); +int X509_STORE_load_locations(X509_STORE *s, const char *file, const char *dir); +int X509_STORE_set_default_paths(X509_STORE *xs); + +int X509_STORE_load_file_ex(X509_STORE *xs, const char *file, + OSSL_LIB_CTX *libctx, const char *propq); +int X509_STORE_load_store_ex(X509_STORE *xs, const char *store, + OSSL_LIB_CTX *libctx, const char *propq); +int X509_STORE_load_locations_ex(X509_STORE *xs, + const char *file, const char *dir, + OSSL_LIB_CTX *libctx, const char *propq); +int X509_STORE_set_default_paths_ex(X509_STORE *xs, + OSSL_LIB_CTX *libctx, const char *propq); + +#define X509_STORE_CTX_get_ex_new_index(l, p, newf, dupf, freef) \ + CRYPTO_get_ex_new_index(CRYPTO_EX_INDEX_X509_STORE_CTX, l, p, newf, dupf, freef) +int X509_STORE_CTX_set_ex_data(X509_STORE_CTX *ctx, int idx, void *data); +void *X509_STORE_CTX_get_ex_data(const X509_STORE_CTX *ctx, int idx); +int X509_STORE_CTX_get_error(const X509_STORE_CTX *ctx); +void X509_STORE_CTX_set_error(X509_STORE_CTX *ctx, int s); +int X509_STORE_CTX_get_error_depth(const X509_STORE_CTX *ctx); +void X509_STORE_CTX_set_error_depth(X509_STORE_CTX *ctx, int depth); +X509 *X509_STORE_CTX_get_current_cert(const X509_STORE_CTX *ctx); +void X509_STORE_CTX_set_current_cert(X509_STORE_CTX *ctx, X509 *x); +X509 *X509_STORE_CTX_get0_current_issuer(const X509_STORE_CTX *ctx); +X509_CRL *X509_STORE_CTX_get0_current_crl(const X509_STORE_CTX *ctx); +X509_STORE_CTX *X509_STORE_CTX_get0_parent_ctx(const X509_STORE_CTX *ctx); +STACK_OF(X509) *X509_STORE_CTX_get0_chain(const X509_STORE_CTX *ctx); +STACK_OF(X509) *X509_STORE_CTX_get1_chain(const X509_STORE_CTX *ctx); +void X509_STORE_CTX_set_cert(X509_STORE_CTX *ctx, X509 *target); +void X509_STORE_CTX_set0_rpk(X509_STORE_CTX *ctx, EVP_PKEY *target); +void X509_STORE_CTX_set0_verified_chain(X509_STORE_CTX *c, STACK_OF(X509) *sk); +void X509_STORE_CTX_set0_crls(X509_STORE_CTX *ctx, STACK_OF(X509_CRL) *sk); +int X509_STORE_CTX_set_purpose(X509_STORE_CTX *ctx, int purpose); +int X509_STORE_CTX_set_trust(X509_STORE_CTX *ctx, int trust); +int X509_STORE_CTX_purpose_inherit(X509_STORE_CTX *ctx, int def_purpose, + int purpose, int trust); +void X509_STORE_CTX_set_flags(X509_STORE_CTX *ctx, unsigned long flags); +void X509_STORE_CTX_set_time(X509_STORE_CTX *ctx, unsigned long flags, + time_t t); +void X509_STORE_CTX_set_current_reasons(X509_STORE_CTX *ctx, + unsigned int current_reasons); + +X509_POLICY_TREE *X509_STORE_CTX_get0_policy_tree(const X509_STORE_CTX *ctx); +int X509_STORE_CTX_get_explicit_policy(const X509_STORE_CTX *ctx); +int X509_STORE_CTX_get_num_untrusted(const X509_STORE_CTX *ctx); + +X509_VERIFY_PARAM *X509_STORE_CTX_get0_param(const X509_STORE_CTX *ctx); +void X509_STORE_CTX_set0_param(X509_STORE_CTX *ctx, X509_VERIFY_PARAM *param); +int X509_STORE_CTX_set_default(X509_STORE_CTX *ctx, const char *name); + +/* + * Bridge opacity barrier between libcrypt and libssl, also needed to support + * offline testing in test/danetest.c + */ +void X509_STORE_CTX_set0_dane(X509_STORE_CTX *ctx, SSL_DANE *dane); +#define DANE_FLAG_NO_DANE_EE_NAMECHECKS (1L << 0) + +/* X509_VERIFY_PARAM functions */ + +X509_VERIFY_PARAM *X509_VERIFY_PARAM_new(void); +void X509_VERIFY_PARAM_free(X509_VERIFY_PARAM *param); +int X509_VERIFY_PARAM_inherit(X509_VERIFY_PARAM *to, + const X509_VERIFY_PARAM *from); +int X509_VERIFY_PARAM_set1(X509_VERIFY_PARAM *to, + const X509_VERIFY_PARAM *from); +int X509_VERIFY_PARAM_set1_name(X509_VERIFY_PARAM *param, const char *name); +int X509_VERIFY_PARAM_set_flags(X509_VERIFY_PARAM *param, + unsigned long flags); +int X509_VERIFY_PARAM_clear_flags(X509_VERIFY_PARAM *param, + unsigned long flags); +unsigned long X509_VERIFY_PARAM_get_flags(const X509_VERIFY_PARAM *param); +int X509_VERIFY_PARAM_set_purpose(X509_VERIFY_PARAM *param, int purpose); +int X509_VERIFY_PARAM_get_purpose(const X509_VERIFY_PARAM *param); +int X509_VERIFY_PARAM_set_trust(X509_VERIFY_PARAM *param, int trust); +void X509_VERIFY_PARAM_set_depth(X509_VERIFY_PARAM *param, int depth); +void X509_VERIFY_PARAM_set_auth_level(X509_VERIFY_PARAM *param, int auth_level); +time_t X509_VERIFY_PARAM_get_time(const X509_VERIFY_PARAM *param); +void X509_VERIFY_PARAM_set_time(X509_VERIFY_PARAM *param, time_t t); +int X509_VERIFY_PARAM_add0_policy(X509_VERIFY_PARAM *param, + ASN1_OBJECT *policy); +int X509_VERIFY_PARAM_set1_policies(X509_VERIFY_PARAM *param, + STACK_OF(ASN1_OBJECT) *policies); + +int X509_VERIFY_PARAM_set_inh_flags(X509_VERIFY_PARAM *param, + uint32_t flags); +uint32_t X509_VERIFY_PARAM_get_inh_flags(const X509_VERIFY_PARAM *param); + +char *X509_VERIFY_PARAM_get0_host(X509_VERIFY_PARAM *param, int idx); +int X509_VERIFY_PARAM_set1_host(X509_VERIFY_PARAM *param, + const char *name, size_t namelen); +int X509_VERIFY_PARAM_add1_host(X509_VERIFY_PARAM *param, + const char *name, size_t namelen); +void X509_VERIFY_PARAM_set_hostflags(X509_VERIFY_PARAM *param, + unsigned int flags); +unsigned int X509_VERIFY_PARAM_get_hostflags(const X509_VERIFY_PARAM *param); +char *X509_VERIFY_PARAM_get0_peername(const X509_VERIFY_PARAM *param); +void X509_VERIFY_PARAM_move_peername(X509_VERIFY_PARAM *, X509_VERIFY_PARAM *); +char *X509_VERIFY_PARAM_get0_email(X509_VERIFY_PARAM *param); +int X509_VERIFY_PARAM_set1_email(X509_VERIFY_PARAM *param, + const char *email, size_t emaillen); +char *X509_VERIFY_PARAM_get1_ip_asc(X509_VERIFY_PARAM *param); +int X509_VERIFY_PARAM_set1_ip(X509_VERIFY_PARAM *param, + const unsigned char *ip, size_t iplen); +int X509_VERIFY_PARAM_set1_ip_asc(X509_VERIFY_PARAM *param, + const char *ipasc); + +int X509_VERIFY_PARAM_get_depth(const X509_VERIFY_PARAM *param); +int X509_VERIFY_PARAM_get_auth_level(const X509_VERIFY_PARAM *param); +const char *X509_VERIFY_PARAM_get0_name(const X509_VERIFY_PARAM *param); + +int X509_VERIFY_PARAM_add0_table(X509_VERIFY_PARAM *param); +int X509_VERIFY_PARAM_get_count(void); +const X509_VERIFY_PARAM *X509_VERIFY_PARAM_get0(int id); +const X509_VERIFY_PARAM *X509_VERIFY_PARAM_lookup(const char *name); +void X509_VERIFY_PARAM_table_cleanup(void); + +/* Non positive return values are errors */ +#define X509_PCY_TREE_FAILURE -2 /* Failure to satisfy explicit policy */ +#define X509_PCY_TREE_INVALID -1 /* Inconsistent or invalid extensions */ +#define X509_PCY_TREE_INTERNAL 0 /* Internal error, most likely malloc */ + +/* + * Positive return values form a bit mask, all but the first are internal to + * the library and don't appear in results from X509_policy_check(). + */ +#define X509_PCY_TREE_VALID 1 /* The policy tree is valid */ +#define X509_PCY_TREE_EMPTY 2 /* The policy tree is empty */ +#define X509_PCY_TREE_EXPLICIT 4 /* Explicit policy required */ + +int X509_policy_check(X509_POLICY_TREE **ptree, int *pexplicit_policy, + STACK_OF(X509) *certs, + STACK_OF(ASN1_OBJECT) *policy_oids, unsigned int flags); + +void X509_policy_tree_free(X509_POLICY_TREE *tree); + +int X509_policy_tree_level_count(const X509_POLICY_TREE *tree); +X509_POLICY_LEVEL *X509_policy_tree_get0_level(const X509_POLICY_TREE *tree, + int i); + +STACK_OF(X509_POLICY_NODE) + *X509_policy_tree_get0_policies(const X509_POLICY_TREE *tree); + +STACK_OF(X509_POLICY_NODE) + *X509_policy_tree_get0_user_policies(const X509_POLICY_TREE *tree); + +int X509_policy_level_node_count(X509_POLICY_LEVEL *level); + +X509_POLICY_NODE *X509_policy_level_get0_node(const X509_POLICY_LEVEL *level, + int i); + +const ASN1_OBJECT *X509_policy_node_get0_policy(const X509_POLICY_NODE *node); + +STACK_OF(POLICYQUALINFO) + *X509_policy_node_get0_qualifiers(const X509_POLICY_NODE *node); +const X509_POLICY_NODE + *X509_policy_node_get0_parent(const X509_POLICY_NODE *node); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/contrib/openssl-cmake/common/include/openssl/x509v3.h b/contrib/openssl-cmake/common/include/openssl/x509v3.h new file mode 100644 index 000000000000..b8dabac35a49 --- /dev/null +++ b/contrib/openssl-cmake/common/include/openssl/x509v3.h @@ -0,0 +1,1968 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from include/openssl/x509v3.h.in + * + * Copyright 1999-2025 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + + +#ifndef OPENSSL_X509V3_H +# define OPENSSL_X509V3_H +# pragma once + +# include +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define HEADER_X509V3_H +# endif + +# include +# include +# include +# include +# ifndef OPENSSL_NO_STDIO +# include +# endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* Forward reference */ +struct v3_ext_method; +struct v3_ext_ctx; + +/* Useful typedefs */ + +typedef void *(*X509V3_EXT_NEW)(void); +typedef void (*X509V3_EXT_FREE) (void *); +typedef void *(*X509V3_EXT_D2I)(void *, const unsigned char **, long); +typedef int (*X509V3_EXT_I2D) (const void *, unsigned char **); +typedef STACK_OF(CONF_VALUE) * + (*X509V3_EXT_I2V) (const struct v3_ext_method *method, void *ext, + STACK_OF(CONF_VALUE) *extlist); +typedef void *(*X509V3_EXT_V2I)(const struct v3_ext_method *method, + struct v3_ext_ctx *ctx, + STACK_OF(CONF_VALUE) *values); +typedef char *(*X509V3_EXT_I2S)(const struct v3_ext_method *method, + void *ext); +typedef void *(*X509V3_EXT_S2I)(const struct v3_ext_method *method, + struct v3_ext_ctx *ctx, const char *str); +typedef int (*X509V3_EXT_I2R) (const struct v3_ext_method *method, void *ext, + BIO *out, int indent); +typedef void *(*X509V3_EXT_R2I)(const struct v3_ext_method *method, + struct v3_ext_ctx *ctx, const char *str); + +/* V3 extension structure */ + +struct v3_ext_method { + int ext_nid; + int ext_flags; +/* If this is set the following four fields are ignored */ + ASN1_ITEM_EXP *it; +/* Old style ASN1 calls */ + X509V3_EXT_NEW ext_new; + X509V3_EXT_FREE ext_free; + X509V3_EXT_D2I d2i; + X509V3_EXT_I2D i2d; +/* The following pair is used for string extensions */ + X509V3_EXT_I2S i2s; + X509V3_EXT_S2I s2i; +/* The following pair is used for multi-valued extensions */ + X509V3_EXT_I2V i2v; + X509V3_EXT_V2I v2i; +/* The following are used for raw extensions */ + X509V3_EXT_I2R i2r; + X509V3_EXT_R2I r2i; + void *usr_data; /* Any extension specific data */ +}; + +typedef struct X509V3_CONF_METHOD_st { + char *(*get_string) (void *db, const char *section, const char *value); + STACK_OF(CONF_VALUE) *(*get_section) (void *db, const char *section); + void (*free_string) (void *db, char *string); + void (*free_section) (void *db, STACK_OF(CONF_VALUE) *section); +} X509V3_CONF_METHOD; + +/* Context specific info for producing X509 v3 extensions*/ +struct v3_ext_ctx { +# define X509V3_CTX_TEST 0x1 +# ifndef OPENSSL_NO_DEPRECATED_3_0 +# define CTX_TEST X509V3_CTX_TEST +# endif +# define X509V3_CTX_REPLACE 0x2 + int flags; + X509 *issuer_cert; + X509 *subject_cert; + X509_REQ *subject_req; + X509_CRL *crl; + X509V3_CONF_METHOD *db_meth; + void *db; + EVP_PKEY *issuer_pkey; +/* Maybe more here */ +}; + +typedef struct v3_ext_method X509V3_EXT_METHOD; + +SKM_DEFINE_STACK_OF_INTERNAL(X509V3_EXT_METHOD, X509V3_EXT_METHOD, X509V3_EXT_METHOD) +#define sk_X509V3_EXT_METHOD_num(sk) OPENSSL_sk_num(ossl_check_const_X509V3_EXT_METHOD_sk_type(sk)) +#define sk_X509V3_EXT_METHOD_value(sk, idx) ((X509V3_EXT_METHOD *)OPENSSL_sk_value(ossl_check_const_X509V3_EXT_METHOD_sk_type(sk), (idx))) +#define sk_X509V3_EXT_METHOD_new(cmp) ((STACK_OF(X509V3_EXT_METHOD) *)OPENSSL_sk_new(ossl_check_X509V3_EXT_METHOD_compfunc_type(cmp))) +#define sk_X509V3_EXT_METHOD_new_null() ((STACK_OF(X509V3_EXT_METHOD) *)OPENSSL_sk_new_null()) +#define sk_X509V3_EXT_METHOD_new_reserve(cmp, n) ((STACK_OF(X509V3_EXT_METHOD) *)OPENSSL_sk_new_reserve(ossl_check_X509V3_EXT_METHOD_compfunc_type(cmp), (n))) +#define sk_X509V3_EXT_METHOD_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509V3_EXT_METHOD_sk_type(sk), (n)) +#define sk_X509V3_EXT_METHOD_free(sk) OPENSSL_sk_free(ossl_check_X509V3_EXT_METHOD_sk_type(sk)) +#define sk_X509V3_EXT_METHOD_zero(sk) OPENSSL_sk_zero(ossl_check_X509V3_EXT_METHOD_sk_type(sk)) +#define sk_X509V3_EXT_METHOD_delete(sk, i) ((X509V3_EXT_METHOD *)OPENSSL_sk_delete(ossl_check_X509V3_EXT_METHOD_sk_type(sk), (i))) +#define sk_X509V3_EXT_METHOD_delete_ptr(sk, ptr) ((X509V3_EXT_METHOD *)OPENSSL_sk_delete_ptr(ossl_check_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_type(ptr))) +#define sk_X509V3_EXT_METHOD_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_type(ptr)) +#define sk_X509V3_EXT_METHOD_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_type(ptr)) +#define sk_X509V3_EXT_METHOD_pop(sk) ((X509V3_EXT_METHOD *)OPENSSL_sk_pop(ossl_check_X509V3_EXT_METHOD_sk_type(sk))) +#define sk_X509V3_EXT_METHOD_shift(sk) ((X509V3_EXT_METHOD *)OPENSSL_sk_shift(ossl_check_X509V3_EXT_METHOD_sk_type(sk))) +#define sk_X509V3_EXT_METHOD_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509V3_EXT_METHOD_sk_type(sk),ossl_check_X509V3_EXT_METHOD_freefunc_type(freefunc)) +#define sk_X509V3_EXT_METHOD_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_type(ptr), (idx)) +#define sk_X509V3_EXT_METHOD_set(sk, idx, ptr) ((X509V3_EXT_METHOD *)OPENSSL_sk_set(ossl_check_X509V3_EXT_METHOD_sk_type(sk), (idx), ossl_check_X509V3_EXT_METHOD_type(ptr))) +#define sk_X509V3_EXT_METHOD_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_type(ptr)) +#define sk_X509V3_EXT_METHOD_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_type(ptr)) +#define sk_X509V3_EXT_METHOD_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_type(ptr), pnum) +#define sk_X509V3_EXT_METHOD_sort(sk) OPENSSL_sk_sort(ossl_check_X509V3_EXT_METHOD_sk_type(sk)) +#define sk_X509V3_EXT_METHOD_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509V3_EXT_METHOD_sk_type(sk)) +#define sk_X509V3_EXT_METHOD_dup(sk) ((STACK_OF(X509V3_EXT_METHOD) *)OPENSSL_sk_dup(ossl_check_const_X509V3_EXT_METHOD_sk_type(sk))) +#define sk_X509V3_EXT_METHOD_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509V3_EXT_METHOD) *)OPENSSL_sk_deep_copy(ossl_check_const_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_copyfunc_type(copyfunc), ossl_check_X509V3_EXT_METHOD_freefunc_type(freefunc))) +#define sk_X509V3_EXT_METHOD_set_cmp_func(sk, cmp) ((sk_X509V3_EXT_METHOD_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509V3_EXT_METHOD_sk_type(sk), ossl_check_X509V3_EXT_METHOD_compfunc_type(cmp))) + + +/* ext_flags values */ +# define X509V3_EXT_DYNAMIC 0x1 +# define X509V3_EXT_CTX_DEP 0x2 +# define X509V3_EXT_MULTILINE 0x4 + +typedef BIT_STRING_BITNAME ENUMERATED_NAMES; + +typedef struct BASIC_CONSTRAINTS_st { + int ca; + ASN1_INTEGER *pathlen; +} BASIC_CONSTRAINTS; + +typedef struct OSSL_BASIC_ATTR_CONSTRAINTS_st { + int authority; + ASN1_INTEGER *pathlen; +} OSSL_BASIC_ATTR_CONSTRAINTS; + +typedef struct PKEY_USAGE_PERIOD_st { + ASN1_GENERALIZEDTIME *notBefore; + ASN1_GENERALIZEDTIME *notAfter; +} PKEY_USAGE_PERIOD; + +typedef struct otherName_st { + ASN1_OBJECT *type_id; + ASN1_TYPE *value; +} OTHERNAME; + +typedef struct EDIPartyName_st { + ASN1_STRING *nameAssigner; + ASN1_STRING *partyName; +} EDIPARTYNAME; + +typedef struct GENERAL_NAME_st { +# define GEN_OTHERNAME 0 +# define GEN_EMAIL 1 +# define GEN_DNS 2 +# define GEN_X400 3 +# define GEN_DIRNAME 4 +# define GEN_EDIPARTY 5 +# define GEN_URI 6 +# define GEN_IPADD 7 +# define GEN_RID 8 + int type; + union { + char *ptr; + OTHERNAME *otherName; /* otherName */ + ASN1_IA5STRING *rfc822Name; + ASN1_IA5STRING *dNSName; + ASN1_STRING *x400Address; + X509_NAME *directoryName; + EDIPARTYNAME *ediPartyName; + ASN1_IA5STRING *uniformResourceIdentifier; + ASN1_OCTET_STRING *iPAddress; + ASN1_OBJECT *registeredID; + /* Old names */ + ASN1_OCTET_STRING *ip; /* iPAddress */ + X509_NAME *dirn; /* dirn */ + ASN1_IA5STRING *ia5; /* rfc822Name, dNSName, + * uniformResourceIdentifier */ + ASN1_OBJECT *rid; /* registeredID */ + ASN1_TYPE *other; /* x400Address */ + } d; +} GENERAL_NAME; + +typedef struct ACCESS_DESCRIPTION_st { + ASN1_OBJECT *method; + GENERAL_NAME *location; +} ACCESS_DESCRIPTION; + +int GENERAL_NAME_set1_X509_NAME(GENERAL_NAME **tgt, const X509_NAME *src); + +SKM_DEFINE_STACK_OF_INTERNAL(ACCESS_DESCRIPTION, ACCESS_DESCRIPTION, ACCESS_DESCRIPTION) +#define sk_ACCESS_DESCRIPTION_num(sk) OPENSSL_sk_num(ossl_check_const_ACCESS_DESCRIPTION_sk_type(sk)) +#define sk_ACCESS_DESCRIPTION_value(sk, idx) ((ACCESS_DESCRIPTION *)OPENSSL_sk_value(ossl_check_const_ACCESS_DESCRIPTION_sk_type(sk), (idx))) +#define sk_ACCESS_DESCRIPTION_new(cmp) ((STACK_OF(ACCESS_DESCRIPTION) *)OPENSSL_sk_new(ossl_check_ACCESS_DESCRIPTION_compfunc_type(cmp))) +#define sk_ACCESS_DESCRIPTION_new_null() ((STACK_OF(ACCESS_DESCRIPTION) *)OPENSSL_sk_new_null()) +#define sk_ACCESS_DESCRIPTION_new_reserve(cmp, n) ((STACK_OF(ACCESS_DESCRIPTION) *)OPENSSL_sk_new_reserve(ossl_check_ACCESS_DESCRIPTION_compfunc_type(cmp), (n))) +#define sk_ACCESS_DESCRIPTION_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), (n)) +#define sk_ACCESS_DESCRIPTION_free(sk) OPENSSL_sk_free(ossl_check_ACCESS_DESCRIPTION_sk_type(sk)) +#define sk_ACCESS_DESCRIPTION_zero(sk) OPENSSL_sk_zero(ossl_check_ACCESS_DESCRIPTION_sk_type(sk)) +#define sk_ACCESS_DESCRIPTION_delete(sk, i) ((ACCESS_DESCRIPTION *)OPENSSL_sk_delete(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), (i))) +#define sk_ACCESS_DESCRIPTION_delete_ptr(sk, ptr) ((ACCESS_DESCRIPTION *)OPENSSL_sk_delete_ptr(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_type(ptr))) +#define sk_ACCESS_DESCRIPTION_push(sk, ptr) OPENSSL_sk_push(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_type(ptr)) +#define sk_ACCESS_DESCRIPTION_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_type(ptr)) +#define sk_ACCESS_DESCRIPTION_pop(sk) ((ACCESS_DESCRIPTION *)OPENSSL_sk_pop(ossl_check_ACCESS_DESCRIPTION_sk_type(sk))) +#define sk_ACCESS_DESCRIPTION_shift(sk) ((ACCESS_DESCRIPTION *)OPENSSL_sk_shift(ossl_check_ACCESS_DESCRIPTION_sk_type(sk))) +#define sk_ACCESS_DESCRIPTION_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ACCESS_DESCRIPTION_sk_type(sk),ossl_check_ACCESS_DESCRIPTION_freefunc_type(freefunc)) +#define sk_ACCESS_DESCRIPTION_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_type(ptr), (idx)) +#define sk_ACCESS_DESCRIPTION_set(sk, idx, ptr) ((ACCESS_DESCRIPTION *)OPENSSL_sk_set(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), (idx), ossl_check_ACCESS_DESCRIPTION_type(ptr))) +#define sk_ACCESS_DESCRIPTION_find(sk, ptr) OPENSSL_sk_find(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_type(ptr)) +#define sk_ACCESS_DESCRIPTION_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_type(ptr)) +#define sk_ACCESS_DESCRIPTION_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_type(ptr), pnum) +#define sk_ACCESS_DESCRIPTION_sort(sk) OPENSSL_sk_sort(ossl_check_ACCESS_DESCRIPTION_sk_type(sk)) +#define sk_ACCESS_DESCRIPTION_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ACCESS_DESCRIPTION_sk_type(sk)) +#define sk_ACCESS_DESCRIPTION_dup(sk) ((STACK_OF(ACCESS_DESCRIPTION) *)OPENSSL_sk_dup(ossl_check_const_ACCESS_DESCRIPTION_sk_type(sk))) +#define sk_ACCESS_DESCRIPTION_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ACCESS_DESCRIPTION) *)OPENSSL_sk_deep_copy(ossl_check_const_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_copyfunc_type(copyfunc), ossl_check_ACCESS_DESCRIPTION_freefunc_type(freefunc))) +#define sk_ACCESS_DESCRIPTION_set_cmp_func(sk, cmp) ((sk_ACCESS_DESCRIPTION_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ACCESS_DESCRIPTION_sk_type(sk), ossl_check_ACCESS_DESCRIPTION_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(GENERAL_NAME, GENERAL_NAME, GENERAL_NAME) +#define sk_GENERAL_NAME_num(sk) OPENSSL_sk_num(ossl_check_const_GENERAL_NAME_sk_type(sk)) +#define sk_GENERAL_NAME_value(sk, idx) ((GENERAL_NAME *)OPENSSL_sk_value(ossl_check_const_GENERAL_NAME_sk_type(sk), (idx))) +#define sk_GENERAL_NAME_new(cmp) ((STACK_OF(GENERAL_NAME) *)OPENSSL_sk_new(ossl_check_GENERAL_NAME_compfunc_type(cmp))) +#define sk_GENERAL_NAME_new_null() ((STACK_OF(GENERAL_NAME) *)OPENSSL_sk_new_null()) +#define sk_GENERAL_NAME_new_reserve(cmp, n) ((STACK_OF(GENERAL_NAME) *)OPENSSL_sk_new_reserve(ossl_check_GENERAL_NAME_compfunc_type(cmp), (n))) +#define sk_GENERAL_NAME_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_GENERAL_NAME_sk_type(sk), (n)) +#define sk_GENERAL_NAME_free(sk) OPENSSL_sk_free(ossl_check_GENERAL_NAME_sk_type(sk)) +#define sk_GENERAL_NAME_zero(sk) OPENSSL_sk_zero(ossl_check_GENERAL_NAME_sk_type(sk)) +#define sk_GENERAL_NAME_delete(sk, i) ((GENERAL_NAME *)OPENSSL_sk_delete(ossl_check_GENERAL_NAME_sk_type(sk), (i))) +#define sk_GENERAL_NAME_delete_ptr(sk, ptr) ((GENERAL_NAME *)OPENSSL_sk_delete_ptr(ossl_check_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_type(ptr))) +#define sk_GENERAL_NAME_push(sk, ptr) OPENSSL_sk_push(ossl_check_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_type(ptr)) +#define sk_GENERAL_NAME_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_type(ptr)) +#define sk_GENERAL_NAME_pop(sk) ((GENERAL_NAME *)OPENSSL_sk_pop(ossl_check_GENERAL_NAME_sk_type(sk))) +#define sk_GENERAL_NAME_shift(sk) ((GENERAL_NAME *)OPENSSL_sk_shift(ossl_check_GENERAL_NAME_sk_type(sk))) +#define sk_GENERAL_NAME_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_GENERAL_NAME_sk_type(sk),ossl_check_GENERAL_NAME_freefunc_type(freefunc)) +#define sk_GENERAL_NAME_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_type(ptr), (idx)) +#define sk_GENERAL_NAME_set(sk, idx, ptr) ((GENERAL_NAME *)OPENSSL_sk_set(ossl_check_GENERAL_NAME_sk_type(sk), (idx), ossl_check_GENERAL_NAME_type(ptr))) +#define sk_GENERAL_NAME_find(sk, ptr) OPENSSL_sk_find(ossl_check_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_type(ptr)) +#define sk_GENERAL_NAME_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_type(ptr)) +#define sk_GENERAL_NAME_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_type(ptr), pnum) +#define sk_GENERAL_NAME_sort(sk) OPENSSL_sk_sort(ossl_check_GENERAL_NAME_sk_type(sk)) +#define sk_GENERAL_NAME_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_GENERAL_NAME_sk_type(sk)) +#define sk_GENERAL_NAME_dup(sk) ((STACK_OF(GENERAL_NAME) *)OPENSSL_sk_dup(ossl_check_const_GENERAL_NAME_sk_type(sk))) +#define sk_GENERAL_NAME_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(GENERAL_NAME) *)OPENSSL_sk_deep_copy(ossl_check_const_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_copyfunc_type(copyfunc), ossl_check_GENERAL_NAME_freefunc_type(freefunc))) +#define sk_GENERAL_NAME_set_cmp_func(sk, cmp) ((sk_GENERAL_NAME_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_GENERAL_NAME_sk_type(sk), ossl_check_GENERAL_NAME_compfunc_type(cmp))) + + +typedef STACK_OF(ACCESS_DESCRIPTION) AUTHORITY_INFO_ACCESS; +typedef STACK_OF(ASN1_OBJECT) EXTENDED_KEY_USAGE; +typedef STACK_OF(ASN1_INTEGER) TLS_FEATURE; +typedef STACK_OF(GENERAL_NAME) GENERAL_NAMES; + +SKM_DEFINE_STACK_OF_INTERNAL(GENERAL_NAMES, GENERAL_NAMES, GENERAL_NAMES) +#define sk_GENERAL_NAMES_num(sk) OPENSSL_sk_num(ossl_check_const_GENERAL_NAMES_sk_type(sk)) +#define sk_GENERAL_NAMES_value(sk, idx) ((GENERAL_NAMES *)OPENSSL_sk_value(ossl_check_const_GENERAL_NAMES_sk_type(sk), (idx))) +#define sk_GENERAL_NAMES_new(cmp) ((STACK_OF(GENERAL_NAMES) *)OPENSSL_sk_new(ossl_check_GENERAL_NAMES_compfunc_type(cmp))) +#define sk_GENERAL_NAMES_new_null() ((STACK_OF(GENERAL_NAMES) *)OPENSSL_sk_new_null()) +#define sk_GENERAL_NAMES_new_reserve(cmp, n) ((STACK_OF(GENERAL_NAMES) *)OPENSSL_sk_new_reserve(ossl_check_GENERAL_NAMES_compfunc_type(cmp), (n))) +#define sk_GENERAL_NAMES_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_GENERAL_NAMES_sk_type(sk), (n)) +#define sk_GENERAL_NAMES_free(sk) OPENSSL_sk_free(ossl_check_GENERAL_NAMES_sk_type(sk)) +#define sk_GENERAL_NAMES_zero(sk) OPENSSL_sk_zero(ossl_check_GENERAL_NAMES_sk_type(sk)) +#define sk_GENERAL_NAMES_delete(sk, i) ((GENERAL_NAMES *)OPENSSL_sk_delete(ossl_check_GENERAL_NAMES_sk_type(sk), (i))) +#define sk_GENERAL_NAMES_delete_ptr(sk, ptr) ((GENERAL_NAMES *)OPENSSL_sk_delete_ptr(ossl_check_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_type(ptr))) +#define sk_GENERAL_NAMES_push(sk, ptr) OPENSSL_sk_push(ossl_check_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_type(ptr)) +#define sk_GENERAL_NAMES_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_type(ptr)) +#define sk_GENERAL_NAMES_pop(sk) ((GENERAL_NAMES *)OPENSSL_sk_pop(ossl_check_GENERAL_NAMES_sk_type(sk))) +#define sk_GENERAL_NAMES_shift(sk) ((GENERAL_NAMES *)OPENSSL_sk_shift(ossl_check_GENERAL_NAMES_sk_type(sk))) +#define sk_GENERAL_NAMES_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_GENERAL_NAMES_sk_type(sk),ossl_check_GENERAL_NAMES_freefunc_type(freefunc)) +#define sk_GENERAL_NAMES_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_type(ptr), (idx)) +#define sk_GENERAL_NAMES_set(sk, idx, ptr) ((GENERAL_NAMES *)OPENSSL_sk_set(ossl_check_GENERAL_NAMES_sk_type(sk), (idx), ossl_check_GENERAL_NAMES_type(ptr))) +#define sk_GENERAL_NAMES_find(sk, ptr) OPENSSL_sk_find(ossl_check_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_type(ptr)) +#define sk_GENERAL_NAMES_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_type(ptr)) +#define sk_GENERAL_NAMES_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_type(ptr), pnum) +#define sk_GENERAL_NAMES_sort(sk) OPENSSL_sk_sort(ossl_check_GENERAL_NAMES_sk_type(sk)) +#define sk_GENERAL_NAMES_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_GENERAL_NAMES_sk_type(sk)) +#define sk_GENERAL_NAMES_dup(sk) ((STACK_OF(GENERAL_NAMES) *)OPENSSL_sk_dup(ossl_check_const_GENERAL_NAMES_sk_type(sk))) +#define sk_GENERAL_NAMES_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(GENERAL_NAMES) *)OPENSSL_sk_deep_copy(ossl_check_const_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_copyfunc_type(copyfunc), ossl_check_GENERAL_NAMES_freefunc_type(freefunc))) +#define sk_GENERAL_NAMES_set_cmp_func(sk, cmp) ((sk_GENERAL_NAMES_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_GENERAL_NAMES_sk_type(sk), ossl_check_GENERAL_NAMES_compfunc_type(cmp))) + + +typedef struct DIST_POINT_NAME_st { + int type; + union { + GENERAL_NAMES *fullname; + STACK_OF(X509_NAME_ENTRY) *relativename; + } name; +/* If relativename then this contains the full distribution point name */ + X509_NAME *dpname; +} DIST_POINT_NAME; +DECLARE_ASN1_DUP_FUNCTION(DIST_POINT_NAME) +/* All existing reasons */ +# define CRLDP_ALL_REASONS 0x807f + +# define CRL_REASON_NONE -1 +# define CRL_REASON_UNSPECIFIED 0 +# define CRL_REASON_KEY_COMPROMISE 1 +# define CRL_REASON_CA_COMPROMISE 2 +# define CRL_REASON_AFFILIATION_CHANGED 3 +# define CRL_REASON_SUPERSEDED 4 +# define CRL_REASON_CESSATION_OF_OPERATION 5 +# define CRL_REASON_CERTIFICATE_HOLD 6 +# define CRL_REASON_REMOVE_FROM_CRL 8 +# define CRL_REASON_PRIVILEGE_WITHDRAWN 9 +# define CRL_REASON_AA_COMPROMISE 10 + +struct DIST_POINT_st { + DIST_POINT_NAME *distpoint; + ASN1_BIT_STRING *reasons; + GENERAL_NAMES *CRLissuer; + int dp_reasons; +}; + +SKM_DEFINE_STACK_OF_INTERNAL(DIST_POINT, DIST_POINT, DIST_POINT) +#define sk_DIST_POINT_num(sk) OPENSSL_sk_num(ossl_check_const_DIST_POINT_sk_type(sk)) +#define sk_DIST_POINT_value(sk, idx) ((DIST_POINT *)OPENSSL_sk_value(ossl_check_const_DIST_POINT_sk_type(sk), (idx))) +#define sk_DIST_POINT_new(cmp) ((STACK_OF(DIST_POINT) *)OPENSSL_sk_new(ossl_check_DIST_POINT_compfunc_type(cmp))) +#define sk_DIST_POINT_new_null() ((STACK_OF(DIST_POINT) *)OPENSSL_sk_new_null()) +#define sk_DIST_POINT_new_reserve(cmp, n) ((STACK_OF(DIST_POINT) *)OPENSSL_sk_new_reserve(ossl_check_DIST_POINT_compfunc_type(cmp), (n))) +#define sk_DIST_POINT_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_DIST_POINT_sk_type(sk), (n)) +#define sk_DIST_POINT_free(sk) OPENSSL_sk_free(ossl_check_DIST_POINT_sk_type(sk)) +#define sk_DIST_POINT_zero(sk) OPENSSL_sk_zero(ossl_check_DIST_POINT_sk_type(sk)) +#define sk_DIST_POINT_delete(sk, i) ((DIST_POINT *)OPENSSL_sk_delete(ossl_check_DIST_POINT_sk_type(sk), (i))) +#define sk_DIST_POINT_delete_ptr(sk, ptr) ((DIST_POINT *)OPENSSL_sk_delete_ptr(ossl_check_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_type(ptr))) +#define sk_DIST_POINT_push(sk, ptr) OPENSSL_sk_push(ossl_check_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_type(ptr)) +#define sk_DIST_POINT_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_type(ptr)) +#define sk_DIST_POINT_pop(sk) ((DIST_POINT *)OPENSSL_sk_pop(ossl_check_DIST_POINT_sk_type(sk))) +#define sk_DIST_POINT_shift(sk) ((DIST_POINT *)OPENSSL_sk_shift(ossl_check_DIST_POINT_sk_type(sk))) +#define sk_DIST_POINT_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_DIST_POINT_sk_type(sk),ossl_check_DIST_POINT_freefunc_type(freefunc)) +#define sk_DIST_POINT_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_type(ptr), (idx)) +#define sk_DIST_POINT_set(sk, idx, ptr) ((DIST_POINT *)OPENSSL_sk_set(ossl_check_DIST_POINT_sk_type(sk), (idx), ossl_check_DIST_POINT_type(ptr))) +#define sk_DIST_POINT_find(sk, ptr) OPENSSL_sk_find(ossl_check_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_type(ptr)) +#define sk_DIST_POINT_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_type(ptr)) +#define sk_DIST_POINT_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_type(ptr), pnum) +#define sk_DIST_POINT_sort(sk) OPENSSL_sk_sort(ossl_check_DIST_POINT_sk_type(sk)) +#define sk_DIST_POINT_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_DIST_POINT_sk_type(sk)) +#define sk_DIST_POINT_dup(sk) ((STACK_OF(DIST_POINT) *)OPENSSL_sk_dup(ossl_check_const_DIST_POINT_sk_type(sk))) +#define sk_DIST_POINT_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(DIST_POINT) *)OPENSSL_sk_deep_copy(ossl_check_const_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_copyfunc_type(copyfunc), ossl_check_DIST_POINT_freefunc_type(freefunc))) +#define sk_DIST_POINT_set_cmp_func(sk, cmp) ((sk_DIST_POINT_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_DIST_POINT_sk_type(sk), ossl_check_DIST_POINT_compfunc_type(cmp))) + + +typedef STACK_OF(DIST_POINT) CRL_DIST_POINTS; + +struct AUTHORITY_KEYID_st { + ASN1_OCTET_STRING *keyid; + GENERAL_NAMES *issuer; + ASN1_INTEGER *serial; +}; + +/* Strong extranet structures */ + +typedef struct SXNET_ID_st { + ASN1_INTEGER *zone; + ASN1_OCTET_STRING *user; +} SXNETID; + +SKM_DEFINE_STACK_OF_INTERNAL(SXNETID, SXNETID, SXNETID) +#define sk_SXNETID_num(sk) OPENSSL_sk_num(ossl_check_const_SXNETID_sk_type(sk)) +#define sk_SXNETID_value(sk, idx) ((SXNETID *)OPENSSL_sk_value(ossl_check_const_SXNETID_sk_type(sk), (idx))) +#define sk_SXNETID_new(cmp) ((STACK_OF(SXNETID) *)OPENSSL_sk_new(ossl_check_SXNETID_compfunc_type(cmp))) +#define sk_SXNETID_new_null() ((STACK_OF(SXNETID) *)OPENSSL_sk_new_null()) +#define sk_SXNETID_new_reserve(cmp, n) ((STACK_OF(SXNETID) *)OPENSSL_sk_new_reserve(ossl_check_SXNETID_compfunc_type(cmp), (n))) +#define sk_SXNETID_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_SXNETID_sk_type(sk), (n)) +#define sk_SXNETID_free(sk) OPENSSL_sk_free(ossl_check_SXNETID_sk_type(sk)) +#define sk_SXNETID_zero(sk) OPENSSL_sk_zero(ossl_check_SXNETID_sk_type(sk)) +#define sk_SXNETID_delete(sk, i) ((SXNETID *)OPENSSL_sk_delete(ossl_check_SXNETID_sk_type(sk), (i))) +#define sk_SXNETID_delete_ptr(sk, ptr) ((SXNETID *)OPENSSL_sk_delete_ptr(ossl_check_SXNETID_sk_type(sk), ossl_check_SXNETID_type(ptr))) +#define sk_SXNETID_push(sk, ptr) OPENSSL_sk_push(ossl_check_SXNETID_sk_type(sk), ossl_check_SXNETID_type(ptr)) +#define sk_SXNETID_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_SXNETID_sk_type(sk), ossl_check_SXNETID_type(ptr)) +#define sk_SXNETID_pop(sk) ((SXNETID *)OPENSSL_sk_pop(ossl_check_SXNETID_sk_type(sk))) +#define sk_SXNETID_shift(sk) ((SXNETID *)OPENSSL_sk_shift(ossl_check_SXNETID_sk_type(sk))) +#define sk_SXNETID_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_SXNETID_sk_type(sk),ossl_check_SXNETID_freefunc_type(freefunc)) +#define sk_SXNETID_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_SXNETID_sk_type(sk), ossl_check_SXNETID_type(ptr), (idx)) +#define sk_SXNETID_set(sk, idx, ptr) ((SXNETID *)OPENSSL_sk_set(ossl_check_SXNETID_sk_type(sk), (idx), ossl_check_SXNETID_type(ptr))) +#define sk_SXNETID_find(sk, ptr) OPENSSL_sk_find(ossl_check_SXNETID_sk_type(sk), ossl_check_SXNETID_type(ptr)) +#define sk_SXNETID_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_SXNETID_sk_type(sk), ossl_check_SXNETID_type(ptr)) +#define sk_SXNETID_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_SXNETID_sk_type(sk), ossl_check_SXNETID_type(ptr), pnum) +#define sk_SXNETID_sort(sk) OPENSSL_sk_sort(ossl_check_SXNETID_sk_type(sk)) +#define sk_SXNETID_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_SXNETID_sk_type(sk)) +#define sk_SXNETID_dup(sk) ((STACK_OF(SXNETID) *)OPENSSL_sk_dup(ossl_check_const_SXNETID_sk_type(sk))) +#define sk_SXNETID_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(SXNETID) *)OPENSSL_sk_deep_copy(ossl_check_const_SXNETID_sk_type(sk), ossl_check_SXNETID_copyfunc_type(copyfunc), ossl_check_SXNETID_freefunc_type(freefunc))) +#define sk_SXNETID_set_cmp_func(sk, cmp) ((sk_SXNETID_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_SXNETID_sk_type(sk), ossl_check_SXNETID_compfunc_type(cmp))) + + + +typedef struct SXNET_st { + ASN1_INTEGER *version; + STACK_OF(SXNETID) *ids; +} SXNET; + +typedef struct ISSUER_SIGN_TOOL_st { + ASN1_UTF8STRING *signTool; + ASN1_UTF8STRING *cATool; + ASN1_UTF8STRING *signToolCert; + ASN1_UTF8STRING *cAToolCert; +} ISSUER_SIGN_TOOL; + +typedef struct NOTICEREF_st { + ASN1_STRING *organization; + STACK_OF(ASN1_INTEGER) *noticenos; +} NOTICEREF; + +typedef struct USERNOTICE_st { + NOTICEREF *noticeref; + ASN1_STRING *exptext; +} USERNOTICE; + +typedef struct POLICYQUALINFO_st { + ASN1_OBJECT *pqualid; + union { + ASN1_IA5STRING *cpsuri; + USERNOTICE *usernotice; + ASN1_TYPE *other; + } d; +} POLICYQUALINFO; + +SKM_DEFINE_STACK_OF_INTERNAL(POLICYQUALINFO, POLICYQUALINFO, POLICYQUALINFO) +#define sk_POLICYQUALINFO_num(sk) OPENSSL_sk_num(ossl_check_const_POLICYQUALINFO_sk_type(sk)) +#define sk_POLICYQUALINFO_value(sk, idx) ((POLICYQUALINFO *)OPENSSL_sk_value(ossl_check_const_POLICYQUALINFO_sk_type(sk), (idx))) +#define sk_POLICYQUALINFO_new(cmp) ((STACK_OF(POLICYQUALINFO) *)OPENSSL_sk_new(ossl_check_POLICYQUALINFO_compfunc_type(cmp))) +#define sk_POLICYQUALINFO_new_null() ((STACK_OF(POLICYQUALINFO) *)OPENSSL_sk_new_null()) +#define sk_POLICYQUALINFO_new_reserve(cmp, n) ((STACK_OF(POLICYQUALINFO) *)OPENSSL_sk_new_reserve(ossl_check_POLICYQUALINFO_compfunc_type(cmp), (n))) +#define sk_POLICYQUALINFO_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_POLICYQUALINFO_sk_type(sk), (n)) +#define sk_POLICYQUALINFO_free(sk) OPENSSL_sk_free(ossl_check_POLICYQUALINFO_sk_type(sk)) +#define sk_POLICYQUALINFO_zero(sk) OPENSSL_sk_zero(ossl_check_POLICYQUALINFO_sk_type(sk)) +#define sk_POLICYQUALINFO_delete(sk, i) ((POLICYQUALINFO *)OPENSSL_sk_delete(ossl_check_POLICYQUALINFO_sk_type(sk), (i))) +#define sk_POLICYQUALINFO_delete_ptr(sk, ptr) ((POLICYQUALINFO *)OPENSSL_sk_delete_ptr(ossl_check_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_type(ptr))) +#define sk_POLICYQUALINFO_push(sk, ptr) OPENSSL_sk_push(ossl_check_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_type(ptr)) +#define sk_POLICYQUALINFO_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_type(ptr)) +#define sk_POLICYQUALINFO_pop(sk) ((POLICYQUALINFO *)OPENSSL_sk_pop(ossl_check_POLICYQUALINFO_sk_type(sk))) +#define sk_POLICYQUALINFO_shift(sk) ((POLICYQUALINFO *)OPENSSL_sk_shift(ossl_check_POLICYQUALINFO_sk_type(sk))) +#define sk_POLICYQUALINFO_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_POLICYQUALINFO_sk_type(sk),ossl_check_POLICYQUALINFO_freefunc_type(freefunc)) +#define sk_POLICYQUALINFO_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_type(ptr), (idx)) +#define sk_POLICYQUALINFO_set(sk, idx, ptr) ((POLICYQUALINFO *)OPENSSL_sk_set(ossl_check_POLICYQUALINFO_sk_type(sk), (idx), ossl_check_POLICYQUALINFO_type(ptr))) +#define sk_POLICYQUALINFO_find(sk, ptr) OPENSSL_sk_find(ossl_check_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_type(ptr)) +#define sk_POLICYQUALINFO_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_type(ptr)) +#define sk_POLICYQUALINFO_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_type(ptr), pnum) +#define sk_POLICYQUALINFO_sort(sk) OPENSSL_sk_sort(ossl_check_POLICYQUALINFO_sk_type(sk)) +#define sk_POLICYQUALINFO_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_POLICYQUALINFO_sk_type(sk)) +#define sk_POLICYQUALINFO_dup(sk) ((STACK_OF(POLICYQUALINFO) *)OPENSSL_sk_dup(ossl_check_const_POLICYQUALINFO_sk_type(sk))) +#define sk_POLICYQUALINFO_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(POLICYQUALINFO) *)OPENSSL_sk_deep_copy(ossl_check_const_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_copyfunc_type(copyfunc), ossl_check_POLICYQUALINFO_freefunc_type(freefunc))) +#define sk_POLICYQUALINFO_set_cmp_func(sk, cmp) ((sk_POLICYQUALINFO_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_POLICYQUALINFO_sk_type(sk), ossl_check_POLICYQUALINFO_compfunc_type(cmp))) + + + +typedef struct POLICYINFO_st { + ASN1_OBJECT *policyid; + STACK_OF(POLICYQUALINFO) *qualifiers; +} POLICYINFO; + +SKM_DEFINE_STACK_OF_INTERNAL(POLICYINFO, POLICYINFO, POLICYINFO) +#define sk_POLICYINFO_num(sk) OPENSSL_sk_num(ossl_check_const_POLICYINFO_sk_type(sk)) +#define sk_POLICYINFO_value(sk, idx) ((POLICYINFO *)OPENSSL_sk_value(ossl_check_const_POLICYINFO_sk_type(sk), (idx))) +#define sk_POLICYINFO_new(cmp) ((STACK_OF(POLICYINFO) *)OPENSSL_sk_new(ossl_check_POLICYINFO_compfunc_type(cmp))) +#define sk_POLICYINFO_new_null() ((STACK_OF(POLICYINFO) *)OPENSSL_sk_new_null()) +#define sk_POLICYINFO_new_reserve(cmp, n) ((STACK_OF(POLICYINFO) *)OPENSSL_sk_new_reserve(ossl_check_POLICYINFO_compfunc_type(cmp), (n))) +#define sk_POLICYINFO_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_POLICYINFO_sk_type(sk), (n)) +#define sk_POLICYINFO_free(sk) OPENSSL_sk_free(ossl_check_POLICYINFO_sk_type(sk)) +#define sk_POLICYINFO_zero(sk) OPENSSL_sk_zero(ossl_check_POLICYINFO_sk_type(sk)) +#define sk_POLICYINFO_delete(sk, i) ((POLICYINFO *)OPENSSL_sk_delete(ossl_check_POLICYINFO_sk_type(sk), (i))) +#define sk_POLICYINFO_delete_ptr(sk, ptr) ((POLICYINFO *)OPENSSL_sk_delete_ptr(ossl_check_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_type(ptr))) +#define sk_POLICYINFO_push(sk, ptr) OPENSSL_sk_push(ossl_check_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_type(ptr)) +#define sk_POLICYINFO_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_type(ptr)) +#define sk_POLICYINFO_pop(sk) ((POLICYINFO *)OPENSSL_sk_pop(ossl_check_POLICYINFO_sk_type(sk))) +#define sk_POLICYINFO_shift(sk) ((POLICYINFO *)OPENSSL_sk_shift(ossl_check_POLICYINFO_sk_type(sk))) +#define sk_POLICYINFO_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_POLICYINFO_sk_type(sk),ossl_check_POLICYINFO_freefunc_type(freefunc)) +#define sk_POLICYINFO_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_type(ptr), (idx)) +#define sk_POLICYINFO_set(sk, idx, ptr) ((POLICYINFO *)OPENSSL_sk_set(ossl_check_POLICYINFO_sk_type(sk), (idx), ossl_check_POLICYINFO_type(ptr))) +#define sk_POLICYINFO_find(sk, ptr) OPENSSL_sk_find(ossl_check_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_type(ptr)) +#define sk_POLICYINFO_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_type(ptr)) +#define sk_POLICYINFO_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_type(ptr), pnum) +#define sk_POLICYINFO_sort(sk) OPENSSL_sk_sort(ossl_check_POLICYINFO_sk_type(sk)) +#define sk_POLICYINFO_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_POLICYINFO_sk_type(sk)) +#define sk_POLICYINFO_dup(sk) ((STACK_OF(POLICYINFO) *)OPENSSL_sk_dup(ossl_check_const_POLICYINFO_sk_type(sk))) +#define sk_POLICYINFO_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(POLICYINFO) *)OPENSSL_sk_deep_copy(ossl_check_const_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_copyfunc_type(copyfunc), ossl_check_POLICYINFO_freefunc_type(freefunc))) +#define sk_POLICYINFO_set_cmp_func(sk, cmp) ((sk_POLICYINFO_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_POLICYINFO_sk_type(sk), ossl_check_POLICYINFO_compfunc_type(cmp))) + + +typedef STACK_OF(POLICYINFO) CERTIFICATEPOLICIES; + +typedef struct POLICY_MAPPING_st { + ASN1_OBJECT *issuerDomainPolicy; + ASN1_OBJECT *subjectDomainPolicy; +} POLICY_MAPPING; + +SKM_DEFINE_STACK_OF_INTERNAL(POLICY_MAPPING, POLICY_MAPPING, POLICY_MAPPING) +#define sk_POLICY_MAPPING_num(sk) OPENSSL_sk_num(ossl_check_const_POLICY_MAPPING_sk_type(sk)) +#define sk_POLICY_MAPPING_value(sk, idx) ((POLICY_MAPPING *)OPENSSL_sk_value(ossl_check_const_POLICY_MAPPING_sk_type(sk), (idx))) +#define sk_POLICY_MAPPING_new(cmp) ((STACK_OF(POLICY_MAPPING) *)OPENSSL_sk_new(ossl_check_POLICY_MAPPING_compfunc_type(cmp))) +#define sk_POLICY_MAPPING_new_null() ((STACK_OF(POLICY_MAPPING) *)OPENSSL_sk_new_null()) +#define sk_POLICY_MAPPING_new_reserve(cmp, n) ((STACK_OF(POLICY_MAPPING) *)OPENSSL_sk_new_reserve(ossl_check_POLICY_MAPPING_compfunc_type(cmp), (n))) +#define sk_POLICY_MAPPING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_POLICY_MAPPING_sk_type(sk), (n)) +#define sk_POLICY_MAPPING_free(sk) OPENSSL_sk_free(ossl_check_POLICY_MAPPING_sk_type(sk)) +#define sk_POLICY_MAPPING_zero(sk) OPENSSL_sk_zero(ossl_check_POLICY_MAPPING_sk_type(sk)) +#define sk_POLICY_MAPPING_delete(sk, i) ((POLICY_MAPPING *)OPENSSL_sk_delete(ossl_check_POLICY_MAPPING_sk_type(sk), (i))) +#define sk_POLICY_MAPPING_delete_ptr(sk, ptr) ((POLICY_MAPPING *)OPENSSL_sk_delete_ptr(ossl_check_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_type(ptr))) +#define sk_POLICY_MAPPING_push(sk, ptr) OPENSSL_sk_push(ossl_check_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_type(ptr)) +#define sk_POLICY_MAPPING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_type(ptr)) +#define sk_POLICY_MAPPING_pop(sk) ((POLICY_MAPPING *)OPENSSL_sk_pop(ossl_check_POLICY_MAPPING_sk_type(sk))) +#define sk_POLICY_MAPPING_shift(sk) ((POLICY_MAPPING *)OPENSSL_sk_shift(ossl_check_POLICY_MAPPING_sk_type(sk))) +#define sk_POLICY_MAPPING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_POLICY_MAPPING_sk_type(sk),ossl_check_POLICY_MAPPING_freefunc_type(freefunc)) +#define sk_POLICY_MAPPING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_type(ptr), (idx)) +#define sk_POLICY_MAPPING_set(sk, idx, ptr) ((POLICY_MAPPING *)OPENSSL_sk_set(ossl_check_POLICY_MAPPING_sk_type(sk), (idx), ossl_check_POLICY_MAPPING_type(ptr))) +#define sk_POLICY_MAPPING_find(sk, ptr) OPENSSL_sk_find(ossl_check_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_type(ptr)) +#define sk_POLICY_MAPPING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_type(ptr)) +#define sk_POLICY_MAPPING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_type(ptr), pnum) +#define sk_POLICY_MAPPING_sort(sk) OPENSSL_sk_sort(ossl_check_POLICY_MAPPING_sk_type(sk)) +#define sk_POLICY_MAPPING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_POLICY_MAPPING_sk_type(sk)) +#define sk_POLICY_MAPPING_dup(sk) ((STACK_OF(POLICY_MAPPING) *)OPENSSL_sk_dup(ossl_check_const_POLICY_MAPPING_sk_type(sk))) +#define sk_POLICY_MAPPING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(POLICY_MAPPING) *)OPENSSL_sk_deep_copy(ossl_check_const_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_copyfunc_type(copyfunc), ossl_check_POLICY_MAPPING_freefunc_type(freefunc))) +#define sk_POLICY_MAPPING_set_cmp_func(sk, cmp) ((sk_POLICY_MAPPING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_POLICY_MAPPING_sk_type(sk), ossl_check_POLICY_MAPPING_compfunc_type(cmp))) + + +typedef STACK_OF(POLICY_MAPPING) POLICY_MAPPINGS; + +typedef struct GENERAL_SUBTREE_st { + GENERAL_NAME *base; + ASN1_INTEGER *minimum; + ASN1_INTEGER *maximum; +} GENERAL_SUBTREE; + +SKM_DEFINE_STACK_OF_INTERNAL(GENERAL_SUBTREE, GENERAL_SUBTREE, GENERAL_SUBTREE) +#define sk_GENERAL_SUBTREE_num(sk) OPENSSL_sk_num(ossl_check_const_GENERAL_SUBTREE_sk_type(sk)) +#define sk_GENERAL_SUBTREE_value(sk, idx) ((GENERAL_SUBTREE *)OPENSSL_sk_value(ossl_check_const_GENERAL_SUBTREE_sk_type(sk), (idx))) +#define sk_GENERAL_SUBTREE_new(cmp) ((STACK_OF(GENERAL_SUBTREE) *)OPENSSL_sk_new(ossl_check_GENERAL_SUBTREE_compfunc_type(cmp))) +#define sk_GENERAL_SUBTREE_new_null() ((STACK_OF(GENERAL_SUBTREE) *)OPENSSL_sk_new_null()) +#define sk_GENERAL_SUBTREE_new_reserve(cmp, n) ((STACK_OF(GENERAL_SUBTREE) *)OPENSSL_sk_new_reserve(ossl_check_GENERAL_SUBTREE_compfunc_type(cmp), (n))) +#define sk_GENERAL_SUBTREE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_GENERAL_SUBTREE_sk_type(sk), (n)) +#define sk_GENERAL_SUBTREE_free(sk) OPENSSL_sk_free(ossl_check_GENERAL_SUBTREE_sk_type(sk)) +#define sk_GENERAL_SUBTREE_zero(sk) OPENSSL_sk_zero(ossl_check_GENERAL_SUBTREE_sk_type(sk)) +#define sk_GENERAL_SUBTREE_delete(sk, i) ((GENERAL_SUBTREE *)OPENSSL_sk_delete(ossl_check_GENERAL_SUBTREE_sk_type(sk), (i))) +#define sk_GENERAL_SUBTREE_delete_ptr(sk, ptr) ((GENERAL_SUBTREE *)OPENSSL_sk_delete_ptr(ossl_check_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_type(ptr))) +#define sk_GENERAL_SUBTREE_push(sk, ptr) OPENSSL_sk_push(ossl_check_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_type(ptr)) +#define sk_GENERAL_SUBTREE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_type(ptr)) +#define sk_GENERAL_SUBTREE_pop(sk) ((GENERAL_SUBTREE *)OPENSSL_sk_pop(ossl_check_GENERAL_SUBTREE_sk_type(sk))) +#define sk_GENERAL_SUBTREE_shift(sk) ((GENERAL_SUBTREE *)OPENSSL_sk_shift(ossl_check_GENERAL_SUBTREE_sk_type(sk))) +#define sk_GENERAL_SUBTREE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_GENERAL_SUBTREE_sk_type(sk),ossl_check_GENERAL_SUBTREE_freefunc_type(freefunc)) +#define sk_GENERAL_SUBTREE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_type(ptr), (idx)) +#define sk_GENERAL_SUBTREE_set(sk, idx, ptr) ((GENERAL_SUBTREE *)OPENSSL_sk_set(ossl_check_GENERAL_SUBTREE_sk_type(sk), (idx), ossl_check_GENERAL_SUBTREE_type(ptr))) +#define sk_GENERAL_SUBTREE_find(sk, ptr) OPENSSL_sk_find(ossl_check_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_type(ptr)) +#define sk_GENERAL_SUBTREE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_type(ptr)) +#define sk_GENERAL_SUBTREE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_type(ptr), pnum) +#define sk_GENERAL_SUBTREE_sort(sk) OPENSSL_sk_sort(ossl_check_GENERAL_SUBTREE_sk_type(sk)) +#define sk_GENERAL_SUBTREE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_GENERAL_SUBTREE_sk_type(sk)) +#define sk_GENERAL_SUBTREE_dup(sk) ((STACK_OF(GENERAL_SUBTREE) *)OPENSSL_sk_dup(ossl_check_const_GENERAL_SUBTREE_sk_type(sk))) +#define sk_GENERAL_SUBTREE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(GENERAL_SUBTREE) *)OPENSSL_sk_deep_copy(ossl_check_const_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_copyfunc_type(copyfunc), ossl_check_GENERAL_SUBTREE_freefunc_type(freefunc))) +#define sk_GENERAL_SUBTREE_set_cmp_func(sk, cmp) ((sk_GENERAL_SUBTREE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_GENERAL_SUBTREE_sk_type(sk), ossl_check_GENERAL_SUBTREE_compfunc_type(cmp))) + + +struct NAME_CONSTRAINTS_st { + STACK_OF(GENERAL_SUBTREE) *permittedSubtrees; + STACK_OF(GENERAL_SUBTREE) *excludedSubtrees; +}; + +typedef struct POLICY_CONSTRAINTS_st { + ASN1_INTEGER *requireExplicitPolicy; + ASN1_INTEGER *inhibitPolicyMapping; +} POLICY_CONSTRAINTS; + +/* Proxy certificate structures, see RFC 3820 */ +typedef struct PROXY_POLICY_st { + ASN1_OBJECT *policyLanguage; + ASN1_OCTET_STRING *policy; +} PROXY_POLICY; + +typedef struct PROXY_CERT_INFO_EXTENSION_st { + ASN1_INTEGER *pcPathLengthConstraint; + PROXY_POLICY *proxyPolicy; +} PROXY_CERT_INFO_EXTENSION; + +DECLARE_ASN1_FUNCTIONS(PROXY_POLICY) +DECLARE_ASN1_FUNCTIONS(PROXY_CERT_INFO_EXTENSION) + +struct ISSUING_DIST_POINT_st { + DIST_POINT_NAME *distpoint; + int onlyuser; + int onlyCA; + ASN1_BIT_STRING *onlysomereasons; + int indirectCRL; + int onlyattr; +}; + +/* Values in idp_flags field */ +/* IDP present */ +# define IDP_PRESENT 0x1 +/* IDP values inconsistent */ +# define IDP_INVALID 0x2 +/* onlyuser true */ +# define IDP_ONLYUSER 0x4 +/* onlyCA true */ +# define IDP_ONLYCA 0x8 +/* onlyattr true */ +# define IDP_ONLYATTR 0x10 +/* indirectCRL true */ +# define IDP_INDIRECT 0x20 +/* onlysomereasons present */ +# define IDP_REASONS 0x40 + +# define X509V3_conf_err(val) ERR_add_error_data(6, \ + "section:", (val)->section, \ + ",name:", (val)->name, ",value:", (val)->value) + +# define X509V3_set_ctx_test(ctx) \ + X509V3_set_ctx(ctx, NULL, NULL, NULL, NULL, X509V3_CTX_TEST) +# define X509V3_set_ctx_nodb(ctx) (ctx)->db = NULL; + +# define EXT_BITSTRING(nid, table) { nid, 0, ASN1_ITEM_ref(ASN1_BIT_STRING), \ + 0,0,0,0, \ + 0,0, \ + (X509V3_EXT_I2V)i2v_ASN1_BIT_STRING, \ + (X509V3_EXT_V2I)v2i_ASN1_BIT_STRING, \ + NULL, NULL, \ + table} + +# define EXT_IA5STRING(nid) { nid, 0, ASN1_ITEM_ref(ASN1_IA5STRING), \ + 0,0,0,0, \ + (X509V3_EXT_I2S)i2s_ASN1_IA5STRING, \ + (X509V3_EXT_S2I)s2i_ASN1_IA5STRING, \ + 0,0,0,0, \ + NULL} + +#define EXT_UTF8STRING(nid) { nid, 0, ASN1_ITEM_ref(ASN1_UTF8STRING), \ + 0,0,0,0, \ + (X509V3_EXT_I2S)i2s_ASN1_UTF8STRING, \ + (X509V3_EXT_S2I)s2i_ASN1_UTF8STRING, \ + 0,0,0,0, \ + NULL} + +# define EXT_END { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + +/* X509_PURPOSE stuff */ + +# define EXFLAG_BCONS 0x1 +# define EXFLAG_KUSAGE 0x2 +# define EXFLAG_XKUSAGE 0x4 +# define EXFLAG_NSCERT 0x8 + +# define EXFLAG_CA 0x10 +# define EXFLAG_SI 0x20 /* self-issued, maybe not self-signed */ +# define EXFLAG_V1 0x40 +# define EXFLAG_INVALID 0x80 +/* EXFLAG_SET is set to indicate that some values have been precomputed */ +# define EXFLAG_SET 0x100 +# define EXFLAG_CRITICAL 0x200 +# define EXFLAG_PROXY 0x400 + +# define EXFLAG_INVALID_POLICY 0x800 +# define EXFLAG_FRESHEST 0x1000 +# define EXFLAG_SS 0x2000 /* cert is apparently self-signed */ + +# define EXFLAG_BCONS_CRITICAL 0x10000 +# define EXFLAG_AKID_CRITICAL 0x20000 +# define EXFLAG_SKID_CRITICAL 0x40000 +# define EXFLAG_SAN_CRITICAL 0x80000 +# define EXFLAG_NO_FINGERPRINT 0x100000 + +/* https://datatracker.ietf.org/doc/html/rfc5280#section-4.2.1.3 */ +# define KU_DIGITAL_SIGNATURE X509v3_KU_DIGITAL_SIGNATURE +# define KU_NON_REPUDIATION X509v3_KU_NON_REPUDIATION +# define KU_KEY_ENCIPHERMENT X509v3_KU_KEY_ENCIPHERMENT +# define KU_DATA_ENCIPHERMENT X509v3_KU_DATA_ENCIPHERMENT +# define KU_KEY_AGREEMENT X509v3_KU_KEY_AGREEMENT +# define KU_KEY_CERT_SIGN X509v3_KU_KEY_CERT_SIGN +# define KU_CRL_SIGN X509v3_KU_CRL_SIGN +# define KU_ENCIPHER_ONLY X509v3_KU_ENCIPHER_ONLY +# define KU_DECIPHER_ONLY X509v3_KU_DECIPHER_ONLY + +# define NS_SSL_CLIENT 0x80 +# define NS_SSL_SERVER 0x40 +# define NS_SMIME 0x20 +# define NS_OBJSIGN 0x10 +# define NS_SSL_CA 0x04 +# define NS_SMIME_CA 0x02 +# define NS_OBJSIGN_CA 0x01 +# define NS_ANY_CA (NS_SSL_CA|NS_SMIME_CA|NS_OBJSIGN_CA) + +# define XKU_SSL_SERVER 0x1 +# define XKU_SSL_CLIENT 0x2 +# define XKU_SMIME 0x4 +# define XKU_CODE_SIGN 0x8 +# define XKU_SGC 0x10 /* Netscape or MS Server-Gated Crypto */ +# define XKU_OCSP_SIGN 0x20 +# define XKU_TIMESTAMP 0x40 +# define XKU_DVCS 0x80 +# define XKU_ANYEKU 0x100 + +# define X509_PURPOSE_DYNAMIC 0x1 +# define X509_PURPOSE_DYNAMIC_NAME 0x2 + +typedef struct x509_purpose_st { + int purpose; + int trust; /* Default trust ID */ + int flags; + int (*check_purpose) (const struct x509_purpose_st *, const X509 *, int); + char *name; + char *sname; + void *usr_data; +} X509_PURPOSE; + +SKM_DEFINE_STACK_OF_INTERNAL(X509_PURPOSE, X509_PURPOSE, X509_PURPOSE) +#define sk_X509_PURPOSE_num(sk) OPENSSL_sk_num(ossl_check_const_X509_PURPOSE_sk_type(sk)) +#define sk_X509_PURPOSE_value(sk, idx) ((X509_PURPOSE *)OPENSSL_sk_value(ossl_check_const_X509_PURPOSE_sk_type(sk), (idx))) +#define sk_X509_PURPOSE_new(cmp) ((STACK_OF(X509_PURPOSE) *)OPENSSL_sk_new(ossl_check_X509_PURPOSE_compfunc_type(cmp))) +#define sk_X509_PURPOSE_new_null() ((STACK_OF(X509_PURPOSE) *)OPENSSL_sk_new_null()) +#define sk_X509_PURPOSE_new_reserve(cmp, n) ((STACK_OF(X509_PURPOSE) *)OPENSSL_sk_new_reserve(ossl_check_X509_PURPOSE_compfunc_type(cmp), (n))) +#define sk_X509_PURPOSE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_PURPOSE_sk_type(sk), (n)) +#define sk_X509_PURPOSE_free(sk) OPENSSL_sk_free(ossl_check_X509_PURPOSE_sk_type(sk)) +#define sk_X509_PURPOSE_zero(sk) OPENSSL_sk_zero(ossl_check_X509_PURPOSE_sk_type(sk)) +#define sk_X509_PURPOSE_delete(sk, i) ((X509_PURPOSE *)OPENSSL_sk_delete(ossl_check_X509_PURPOSE_sk_type(sk), (i))) +#define sk_X509_PURPOSE_delete_ptr(sk, ptr) ((X509_PURPOSE *)OPENSSL_sk_delete_ptr(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_type(ptr))) +#define sk_X509_PURPOSE_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_type(ptr)) +#define sk_X509_PURPOSE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_type(ptr)) +#define sk_X509_PURPOSE_pop(sk) ((X509_PURPOSE *)OPENSSL_sk_pop(ossl_check_X509_PURPOSE_sk_type(sk))) +#define sk_X509_PURPOSE_shift(sk) ((X509_PURPOSE *)OPENSSL_sk_shift(ossl_check_X509_PURPOSE_sk_type(sk))) +#define sk_X509_PURPOSE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_PURPOSE_sk_type(sk),ossl_check_X509_PURPOSE_freefunc_type(freefunc)) +#define sk_X509_PURPOSE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_type(ptr), (idx)) +#define sk_X509_PURPOSE_set(sk, idx, ptr) ((X509_PURPOSE *)OPENSSL_sk_set(ossl_check_X509_PURPOSE_sk_type(sk), (idx), ossl_check_X509_PURPOSE_type(ptr))) +#define sk_X509_PURPOSE_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_type(ptr)) +#define sk_X509_PURPOSE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_type(ptr)) +#define sk_X509_PURPOSE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_type(ptr), pnum) +#define sk_X509_PURPOSE_sort(sk) OPENSSL_sk_sort(ossl_check_X509_PURPOSE_sk_type(sk)) +#define sk_X509_PURPOSE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_PURPOSE_sk_type(sk)) +#define sk_X509_PURPOSE_dup(sk) ((STACK_OF(X509_PURPOSE) *)OPENSSL_sk_dup(ossl_check_const_X509_PURPOSE_sk_type(sk))) +#define sk_X509_PURPOSE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_PURPOSE) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_copyfunc_type(copyfunc), ossl_check_X509_PURPOSE_freefunc_type(freefunc))) +#define sk_X509_PURPOSE_set_cmp_func(sk, cmp) ((sk_X509_PURPOSE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_PURPOSE_sk_type(sk), ossl_check_X509_PURPOSE_compfunc_type(cmp))) + + +# define X509_PURPOSE_DEFAULT_ANY 0 +# define X509_PURPOSE_SSL_CLIENT 1 +# define X509_PURPOSE_SSL_SERVER 2 +# define X509_PURPOSE_NS_SSL_SERVER 3 +# define X509_PURPOSE_SMIME_SIGN 4 +# define X509_PURPOSE_SMIME_ENCRYPT 5 +# define X509_PURPOSE_CRL_SIGN 6 +# define X509_PURPOSE_ANY 7 +# define X509_PURPOSE_OCSP_HELPER 8 +# define X509_PURPOSE_TIMESTAMP_SIGN 9 +# define X509_PURPOSE_CODE_SIGN 10 + +# define X509_PURPOSE_MIN 1 +# define X509_PURPOSE_MAX 10 + +/* Flags for X509V3_EXT_print() */ + +# define X509V3_EXT_UNKNOWN_MASK (0xfL << 16) +/* Return error for unknown extensions */ +# define X509V3_EXT_DEFAULT 0 +/* Print error for unknown extensions */ +# define X509V3_EXT_ERROR_UNKNOWN (1L << 16) +/* ASN1 parse unknown extensions */ +# define X509V3_EXT_PARSE_UNKNOWN (2L << 16) +/* BIO_dump unknown extensions */ +# define X509V3_EXT_DUMP_UNKNOWN (3L << 16) + +/* Flags for X509V3_add1_i2d */ + +# define X509V3_ADD_OP_MASK 0xfL +# define X509V3_ADD_DEFAULT 0L +# define X509V3_ADD_APPEND 1L +# define X509V3_ADD_REPLACE 2L +# define X509V3_ADD_REPLACE_EXISTING 3L +# define X509V3_ADD_KEEP_EXISTING 4L +# define X509V3_ADD_DELETE 5L +# define X509V3_ADD_SILENT 0x10 + +DECLARE_ASN1_FUNCTIONS(BASIC_CONSTRAINTS) +DECLARE_ASN1_FUNCTIONS(OSSL_BASIC_ATTR_CONSTRAINTS) + +DECLARE_ASN1_FUNCTIONS(SXNET) +DECLARE_ASN1_FUNCTIONS(SXNETID) + +DECLARE_ASN1_FUNCTIONS(ISSUER_SIGN_TOOL) + +int SXNET_add_id_asc(SXNET **psx, const char *zone, const char *user, int userlen); +int SXNET_add_id_ulong(SXNET **psx, unsigned long lzone, const char *user, + int userlen); +int SXNET_add_id_INTEGER(SXNET **psx, ASN1_INTEGER *izone, const char *user, + int userlen); + +ASN1_OCTET_STRING *SXNET_get_id_asc(SXNET *sx, const char *zone); +ASN1_OCTET_STRING *SXNET_get_id_ulong(SXNET *sx, unsigned long lzone); +ASN1_OCTET_STRING *SXNET_get_id_INTEGER(SXNET *sx, ASN1_INTEGER *zone); + +DECLARE_ASN1_FUNCTIONS(AUTHORITY_KEYID) + +DECLARE_ASN1_FUNCTIONS(PKEY_USAGE_PERIOD) + +DECLARE_ASN1_FUNCTIONS(GENERAL_NAME) +DECLARE_ASN1_DUP_FUNCTION(GENERAL_NAME) +int GENERAL_NAME_cmp(GENERAL_NAME *a, GENERAL_NAME *b); + +ASN1_BIT_STRING *v2i_ASN1_BIT_STRING(X509V3_EXT_METHOD *method, + X509V3_CTX *ctx, + STACK_OF(CONF_VALUE) *nval); +STACK_OF(CONF_VALUE) *i2v_ASN1_BIT_STRING(X509V3_EXT_METHOD *method, + ASN1_BIT_STRING *bits, + STACK_OF(CONF_VALUE) *extlist); +char *i2s_ASN1_IA5STRING(X509V3_EXT_METHOD *method, ASN1_IA5STRING *ia5); +ASN1_IA5STRING *s2i_ASN1_IA5STRING(X509V3_EXT_METHOD *method, + X509V3_CTX *ctx, const char *str); +char *i2s_ASN1_UTF8STRING(X509V3_EXT_METHOD *method, ASN1_UTF8STRING *utf8); +ASN1_UTF8STRING *s2i_ASN1_UTF8STRING(X509V3_EXT_METHOD *method, + X509V3_CTX *ctx, const char *str); + +STACK_OF(CONF_VALUE) *i2v_GENERAL_NAME(X509V3_EXT_METHOD *method, + GENERAL_NAME *gen, + STACK_OF(CONF_VALUE) *ret); +int GENERAL_NAME_print(BIO *out, GENERAL_NAME *gen); + +DECLARE_ASN1_FUNCTIONS(GENERAL_NAMES) + +STACK_OF(CONF_VALUE) *i2v_GENERAL_NAMES(X509V3_EXT_METHOD *method, + GENERAL_NAMES *gen, + STACK_OF(CONF_VALUE) *extlist); +GENERAL_NAMES *v2i_GENERAL_NAMES(const X509V3_EXT_METHOD *method, + X509V3_CTX *ctx, STACK_OF(CONF_VALUE) *nval); + +DECLARE_ASN1_FUNCTIONS(OTHERNAME) +DECLARE_ASN1_FUNCTIONS(EDIPARTYNAME) +int OTHERNAME_cmp(OTHERNAME *a, OTHERNAME *b); +void GENERAL_NAME_set0_value(GENERAL_NAME *a, int type, void *value); +void *GENERAL_NAME_get0_value(const GENERAL_NAME *a, int *ptype); +int GENERAL_NAME_set0_othername(GENERAL_NAME *gen, + ASN1_OBJECT *oid, ASN1_TYPE *value); +int GENERAL_NAME_get0_otherName(const GENERAL_NAME *gen, + ASN1_OBJECT **poid, ASN1_TYPE **pvalue); + +char *i2s_ASN1_OCTET_STRING(X509V3_EXT_METHOD *method, + const ASN1_OCTET_STRING *ia5); +ASN1_OCTET_STRING *s2i_ASN1_OCTET_STRING(X509V3_EXT_METHOD *method, + X509V3_CTX *ctx, const char *str); + +DECLARE_ASN1_FUNCTIONS(EXTENDED_KEY_USAGE) +int i2a_ACCESS_DESCRIPTION(BIO *bp, const ACCESS_DESCRIPTION *a); + +DECLARE_ASN1_ALLOC_FUNCTIONS(TLS_FEATURE) + +DECLARE_ASN1_FUNCTIONS(CERTIFICATEPOLICIES) +DECLARE_ASN1_FUNCTIONS(POLICYINFO) +DECLARE_ASN1_FUNCTIONS(POLICYQUALINFO) +DECLARE_ASN1_FUNCTIONS(USERNOTICE) +DECLARE_ASN1_FUNCTIONS(NOTICEREF) + +DECLARE_ASN1_FUNCTIONS(CRL_DIST_POINTS) +DECLARE_ASN1_FUNCTIONS(DIST_POINT) +DECLARE_ASN1_FUNCTIONS(DIST_POINT_NAME) +DECLARE_ASN1_FUNCTIONS(ISSUING_DIST_POINT) + +int DIST_POINT_set_dpname(DIST_POINT_NAME *dpn, const X509_NAME *iname); + +int NAME_CONSTRAINTS_check(X509 *x, NAME_CONSTRAINTS *nc); +int NAME_CONSTRAINTS_check_CN(X509 *x, NAME_CONSTRAINTS *nc); + +DECLARE_ASN1_FUNCTIONS(ACCESS_DESCRIPTION) +DECLARE_ASN1_FUNCTIONS(AUTHORITY_INFO_ACCESS) + +DECLARE_ASN1_ITEM(POLICY_MAPPING) +DECLARE_ASN1_ALLOC_FUNCTIONS(POLICY_MAPPING) +DECLARE_ASN1_ITEM(POLICY_MAPPINGS) + +DECLARE_ASN1_ITEM(GENERAL_SUBTREE) +DECLARE_ASN1_ALLOC_FUNCTIONS(GENERAL_SUBTREE) + +DECLARE_ASN1_ITEM(NAME_CONSTRAINTS) +DECLARE_ASN1_ALLOC_FUNCTIONS(NAME_CONSTRAINTS) + +DECLARE_ASN1_ALLOC_FUNCTIONS(POLICY_CONSTRAINTS) +DECLARE_ASN1_ITEM(POLICY_CONSTRAINTS) + +GENERAL_NAME *a2i_GENERAL_NAME(GENERAL_NAME *out, + const X509V3_EXT_METHOD *method, + X509V3_CTX *ctx, int gen_type, + const char *value, int is_nc); + +# ifdef OPENSSL_CONF_H +GENERAL_NAME *v2i_GENERAL_NAME(const X509V3_EXT_METHOD *method, + X509V3_CTX *ctx, CONF_VALUE *cnf); +GENERAL_NAME *v2i_GENERAL_NAME_ex(GENERAL_NAME *out, + const X509V3_EXT_METHOD *method, + X509V3_CTX *ctx, CONF_VALUE *cnf, + int is_nc); + +void X509V3_conf_free(CONF_VALUE *val); + +X509_EXTENSION *X509V3_EXT_nconf_nid(CONF *conf, X509V3_CTX *ctx, int ext_nid, + const char *value); +X509_EXTENSION *X509V3_EXT_nconf(CONF *conf, X509V3_CTX *ctx, const char *name, + const char *value); +int X509V3_EXT_add_nconf_sk(CONF *conf, X509V3_CTX *ctx, const char *section, + STACK_OF(X509_EXTENSION) **sk); +int X509V3_EXT_add_nconf(CONF *conf, X509V3_CTX *ctx, const char *section, + X509 *cert); +int X509V3_EXT_REQ_add_nconf(CONF *conf, X509V3_CTX *ctx, const char *section, + X509_REQ *req); +int X509V3_EXT_CRL_add_nconf(CONF *conf, X509V3_CTX *ctx, const char *section, + X509_CRL *crl); + +X509_EXTENSION *X509V3_EXT_conf_nid(LHASH_OF(CONF_VALUE) *conf, + X509V3_CTX *ctx, int ext_nid, + const char *value); +X509_EXTENSION *X509V3_EXT_conf(LHASH_OF(CONF_VALUE) *conf, X509V3_CTX *ctx, + const char *name, const char *value); +int X509V3_EXT_add_conf(LHASH_OF(CONF_VALUE) *conf, X509V3_CTX *ctx, + const char *section, X509 *cert); +int X509V3_EXT_REQ_add_conf(LHASH_OF(CONF_VALUE) *conf, X509V3_CTX *ctx, + const char *section, X509_REQ *req); +int X509V3_EXT_CRL_add_conf(LHASH_OF(CONF_VALUE) *conf, X509V3_CTX *ctx, + const char *section, X509_CRL *crl); + +int X509V3_add_value_bool_nf(const char *name, int asn1_bool, + STACK_OF(CONF_VALUE) **extlist); +int X509V3_get_value_bool(const CONF_VALUE *value, int *asn1_bool); +int X509V3_get_value_int(const CONF_VALUE *value, ASN1_INTEGER **aint); +void X509V3_set_nconf(X509V3_CTX *ctx, CONF *conf); +void X509V3_set_conf_lhash(X509V3_CTX *ctx, LHASH_OF(CONF_VALUE) *lhash); +# endif + +char *X509V3_get_string(X509V3_CTX *ctx, const char *name, const char *section); +STACK_OF(CONF_VALUE) *X509V3_get_section(X509V3_CTX *ctx, const char *section); +void X509V3_string_free(X509V3_CTX *ctx, char *str); +void X509V3_section_free(X509V3_CTX *ctx, STACK_OF(CONF_VALUE) *section); +void X509V3_set_ctx(X509V3_CTX *ctx, X509 *issuer, X509 *subject, + X509_REQ *req, X509_CRL *crl, int flags); +/* For API backward compatibility, this is separate from X509V3_set_ctx(): */ +int X509V3_set_issuer_pkey(X509V3_CTX *ctx, EVP_PKEY *pkey); + +int X509V3_add_value(const char *name, const char *value, + STACK_OF(CONF_VALUE) **extlist); +int X509V3_add_value_uchar(const char *name, const unsigned char *value, + STACK_OF(CONF_VALUE) **extlist); +int X509V3_add_value_bool(const char *name, int asn1_bool, + STACK_OF(CONF_VALUE) **extlist); +int X509V3_add_value_int(const char *name, const ASN1_INTEGER *aint, + STACK_OF(CONF_VALUE) **extlist); +char *i2s_ASN1_INTEGER(X509V3_EXT_METHOD *meth, const ASN1_INTEGER *aint); +ASN1_INTEGER *s2i_ASN1_INTEGER(X509V3_EXT_METHOD *meth, const char *value); +char *i2s_ASN1_ENUMERATED(X509V3_EXT_METHOD *meth, const ASN1_ENUMERATED *aint); +char *i2s_ASN1_ENUMERATED_TABLE(X509V3_EXT_METHOD *meth, + const ASN1_ENUMERATED *aint); +int X509V3_EXT_add(X509V3_EXT_METHOD *ext); +int X509V3_EXT_add_list(X509V3_EXT_METHOD *extlist); +int X509V3_EXT_add_alias(int nid_to, int nid_from); +void X509V3_EXT_cleanup(void); + +const X509V3_EXT_METHOD *X509V3_EXT_get(X509_EXTENSION *ext); +const X509V3_EXT_METHOD *X509V3_EXT_get_nid(int nid); +int X509V3_add_standard_extensions(void); +STACK_OF(CONF_VALUE) *X509V3_parse_list(const char *line); +void *X509V3_EXT_d2i(X509_EXTENSION *ext); +void *X509V3_get_d2i(const STACK_OF(X509_EXTENSION) *x, int nid, int *crit, + int *idx); + +X509_EXTENSION *X509V3_EXT_i2d(int ext_nid, int crit, void *ext_struc); +int X509V3_add1_i2d(STACK_OF(X509_EXTENSION) **x, int nid, void *value, + int crit, unsigned long flags); + +#ifndef OPENSSL_NO_DEPRECATED_1_1_0 +/* The new declarations are in crypto.h, but the old ones were here. */ +# define hex_to_string OPENSSL_buf2hexstr +# define string_to_hex OPENSSL_hexstr2buf +#endif + +void X509V3_EXT_val_prn(BIO *out, STACK_OF(CONF_VALUE) *val, int indent, + int ml); +int X509V3_EXT_print(BIO *out, X509_EXTENSION *ext, unsigned long flag, + int indent); +#ifndef OPENSSL_NO_STDIO +int X509V3_EXT_print_fp(FILE *out, X509_EXTENSION *ext, int flag, int indent); +#endif +int X509V3_extensions_print(BIO *out, const char *title, + const STACK_OF(X509_EXTENSION) *exts, + unsigned long flag, int indent); + +int X509_check_ca(X509 *x); +int X509_check_purpose(X509 *x, int id, int ca); +int X509_supported_extension(X509_EXTENSION *ex); +int X509_check_issued(X509 *issuer, X509 *subject); +int X509_check_akid(const X509 *issuer, const AUTHORITY_KEYID *akid); +void X509_set_proxy_flag(X509 *x); +void X509_set_proxy_pathlen(X509 *x, long l); +long X509_get_proxy_pathlen(X509 *x); + +uint32_t X509_get_extension_flags(X509 *x); +uint32_t X509_get_key_usage(X509 *x); +uint32_t X509_get_extended_key_usage(X509 *x); +const ASN1_OCTET_STRING *X509_get0_subject_key_id(X509 *x); +const ASN1_OCTET_STRING *X509_get0_authority_key_id(X509 *x); +const GENERAL_NAMES *X509_get0_authority_issuer(X509 *x); +const ASN1_INTEGER *X509_get0_authority_serial(X509 *x); + +int X509_PURPOSE_get_count(void); +int X509_PURPOSE_get_unused_id(OSSL_LIB_CTX *libctx); +int X509_PURPOSE_get_by_sname(const char *sname); +int X509_PURPOSE_get_by_id(int id); +int X509_PURPOSE_add(int id, int trust, int flags, + int (*ck) (const X509_PURPOSE *, const X509 *, int), + const char *name, const char *sname, void *arg); +void X509_PURPOSE_cleanup(void); + +X509_PURPOSE *X509_PURPOSE_get0(int idx); +int X509_PURPOSE_get_id(const X509_PURPOSE *); +char *X509_PURPOSE_get0_name(const X509_PURPOSE *xp); +char *X509_PURPOSE_get0_sname(const X509_PURPOSE *xp); +int X509_PURPOSE_get_trust(const X509_PURPOSE *xp); +int X509_PURPOSE_set(int *p, int purpose); + +STACK_OF(OPENSSL_STRING) *X509_get1_email(X509 *x); +STACK_OF(OPENSSL_STRING) *X509_REQ_get1_email(X509_REQ *x); +void X509_email_free(STACK_OF(OPENSSL_STRING) *sk); +STACK_OF(OPENSSL_STRING) *X509_get1_ocsp(X509 *x); + +/* Flags for X509_check_* functions */ + +/* + * Always check subject name for host match even if subject alt names present + */ +# define X509_CHECK_FLAG_ALWAYS_CHECK_SUBJECT 0x1 +/* Disable wildcard matching for dnsName fields and common name. */ +# define X509_CHECK_FLAG_NO_WILDCARDS 0x2 +/* Wildcards must not match a partial label. */ +# define X509_CHECK_FLAG_NO_PARTIAL_WILDCARDS 0x4 +/* Allow (non-partial) wildcards to match multiple labels. */ +# define X509_CHECK_FLAG_MULTI_LABEL_WILDCARDS 0x8 +/* Constraint verifier subdomain patterns to match a single labels. */ +# define X509_CHECK_FLAG_SINGLE_LABEL_SUBDOMAINS 0x10 +/* Never check the subject CN */ +# define X509_CHECK_FLAG_NEVER_CHECK_SUBJECT 0x20 +/* + * Match reference identifiers starting with "." to any sub-domain. + * This is a non-public flag, turned on implicitly when the subject + * reference identity is a DNS name. + */ +# define _X509_CHECK_FLAG_DOT_SUBDOMAINS 0x8000 + +int X509_check_host(X509 *x, const char *chk, size_t chklen, + unsigned int flags, char **peername); +int X509_check_email(X509 *x, const char *chk, size_t chklen, + unsigned int flags); +int X509_check_ip(X509 *x, const unsigned char *chk, size_t chklen, + unsigned int flags); +int X509_check_ip_asc(X509 *x, const char *ipasc, unsigned int flags); + +ASN1_OCTET_STRING *a2i_IPADDRESS(const char *ipasc); +ASN1_OCTET_STRING *a2i_IPADDRESS_NC(const char *ipasc); +int X509V3_NAME_from_section(X509_NAME *nm, STACK_OF(CONF_VALUE) *dn_sk, + unsigned long chtype); + +void X509_POLICY_NODE_print(BIO *out, X509_POLICY_NODE *node, int indent); +SKM_DEFINE_STACK_OF_INTERNAL(X509_POLICY_NODE, X509_POLICY_NODE, X509_POLICY_NODE) +#define sk_X509_POLICY_NODE_num(sk) OPENSSL_sk_num(ossl_check_const_X509_POLICY_NODE_sk_type(sk)) +#define sk_X509_POLICY_NODE_value(sk, idx) ((X509_POLICY_NODE *)OPENSSL_sk_value(ossl_check_const_X509_POLICY_NODE_sk_type(sk), (idx))) +#define sk_X509_POLICY_NODE_new(cmp) ((STACK_OF(X509_POLICY_NODE) *)OPENSSL_sk_new(ossl_check_X509_POLICY_NODE_compfunc_type(cmp))) +#define sk_X509_POLICY_NODE_new_null() ((STACK_OF(X509_POLICY_NODE) *)OPENSSL_sk_new_null()) +#define sk_X509_POLICY_NODE_new_reserve(cmp, n) ((STACK_OF(X509_POLICY_NODE) *)OPENSSL_sk_new_reserve(ossl_check_X509_POLICY_NODE_compfunc_type(cmp), (n))) +#define sk_X509_POLICY_NODE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_X509_POLICY_NODE_sk_type(sk), (n)) +#define sk_X509_POLICY_NODE_free(sk) OPENSSL_sk_free(ossl_check_X509_POLICY_NODE_sk_type(sk)) +#define sk_X509_POLICY_NODE_zero(sk) OPENSSL_sk_zero(ossl_check_X509_POLICY_NODE_sk_type(sk)) +#define sk_X509_POLICY_NODE_delete(sk, i) ((X509_POLICY_NODE *)OPENSSL_sk_delete(ossl_check_X509_POLICY_NODE_sk_type(sk), (i))) +#define sk_X509_POLICY_NODE_delete_ptr(sk, ptr) ((X509_POLICY_NODE *)OPENSSL_sk_delete_ptr(ossl_check_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_type(ptr))) +#define sk_X509_POLICY_NODE_push(sk, ptr) OPENSSL_sk_push(ossl_check_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_type(ptr)) +#define sk_X509_POLICY_NODE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_type(ptr)) +#define sk_X509_POLICY_NODE_pop(sk) ((X509_POLICY_NODE *)OPENSSL_sk_pop(ossl_check_X509_POLICY_NODE_sk_type(sk))) +#define sk_X509_POLICY_NODE_shift(sk) ((X509_POLICY_NODE *)OPENSSL_sk_shift(ossl_check_X509_POLICY_NODE_sk_type(sk))) +#define sk_X509_POLICY_NODE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_X509_POLICY_NODE_sk_type(sk),ossl_check_X509_POLICY_NODE_freefunc_type(freefunc)) +#define sk_X509_POLICY_NODE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_type(ptr), (idx)) +#define sk_X509_POLICY_NODE_set(sk, idx, ptr) ((X509_POLICY_NODE *)OPENSSL_sk_set(ossl_check_X509_POLICY_NODE_sk_type(sk), (idx), ossl_check_X509_POLICY_NODE_type(ptr))) +#define sk_X509_POLICY_NODE_find(sk, ptr) OPENSSL_sk_find(ossl_check_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_type(ptr)) +#define sk_X509_POLICY_NODE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_type(ptr)) +#define sk_X509_POLICY_NODE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_type(ptr), pnum) +#define sk_X509_POLICY_NODE_sort(sk) OPENSSL_sk_sort(ossl_check_X509_POLICY_NODE_sk_type(sk)) +#define sk_X509_POLICY_NODE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_X509_POLICY_NODE_sk_type(sk)) +#define sk_X509_POLICY_NODE_dup(sk) ((STACK_OF(X509_POLICY_NODE) *)OPENSSL_sk_dup(ossl_check_const_X509_POLICY_NODE_sk_type(sk))) +#define sk_X509_POLICY_NODE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(X509_POLICY_NODE) *)OPENSSL_sk_deep_copy(ossl_check_const_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_copyfunc_type(copyfunc), ossl_check_X509_POLICY_NODE_freefunc_type(freefunc))) +#define sk_X509_POLICY_NODE_set_cmp_func(sk, cmp) ((sk_X509_POLICY_NODE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_X509_POLICY_NODE_sk_type(sk), ossl_check_X509_POLICY_NODE_compfunc_type(cmp))) + + + +#ifndef OPENSSL_NO_RFC3779 +typedef struct ASRange_st { + ASN1_INTEGER *min, *max; +} ASRange; + +# define ASIdOrRange_id 0 +# define ASIdOrRange_range 1 + +typedef struct ASIdOrRange_st { + int type; + union { + ASN1_INTEGER *id; + ASRange *range; + } u; +} ASIdOrRange; + +SKM_DEFINE_STACK_OF_INTERNAL(ASIdOrRange, ASIdOrRange, ASIdOrRange) +#define sk_ASIdOrRange_num(sk) OPENSSL_sk_num(ossl_check_const_ASIdOrRange_sk_type(sk)) +#define sk_ASIdOrRange_value(sk, idx) ((ASIdOrRange *)OPENSSL_sk_value(ossl_check_const_ASIdOrRange_sk_type(sk), (idx))) +#define sk_ASIdOrRange_new(cmp) ((STACK_OF(ASIdOrRange) *)OPENSSL_sk_new(ossl_check_ASIdOrRange_compfunc_type(cmp))) +#define sk_ASIdOrRange_new_null() ((STACK_OF(ASIdOrRange) *)OPENSSL_sk_new_null()) +#define sk_ASIdOrRange_new_reserve(cmp, n) ((STACK_OF(ASIdOrRange) *)OPENSSL_sk_new_reserve(ossl_check_ASIdOrRange_compfunc_type(cmp), (n))) +#define sk_ASIdOrRange_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASIdOrRange_sk_type(sk), (n)) +#define sk_ASIdOrRange_free(sk) OPENSSL_sk_free(ossl_check_ASIdOrRange_sk_type(sk)) +#define sk_ASIdOrRange_zero(sk) OPENSSL_sk_zero(ossl_check_ASIdOrRange_sk_type(sk)) +#define sk_ASIdOrRange_delete(sk, i) ((ASIdOrRange *)OPENSSL_sk_delete(ossl_check_ASIdOrRange_sk_type(sk), (i))) +#define sk_ASIdOrRange_delete_ptr(sk, ptr) ((ASIdOrRange *)OPENSSL_sk_delete_ptr(ossl_check_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_type(ptr))) +#define sk_ASIdOrRange_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_type(ptr)) +#define sk_ASIdOrRange_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_type(ptr)) +#define sk_ASIdOrRange_pop(sk) ((ASIdOrRange *)OPENSSL_sk_pop(ossl_check_ASIdOrRange_sk_type(sk))) +#define sk_ASIdOrRange_shift(sk) ((ASIdOrRange *)OPENSSL_sk_shift(ossl_check_ASIdOrRange_sk_type(sk))) +#define sk_ASIdOrRange_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASIdOrRange_sk_type(sk),ossl_check_ASIdOrRange_freefunc_type(freefunc)) +#define sk_ASIdOrRange_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_type(ptr), (idx)) +#define sk_ASIdOrRange_set(sk, idx, ptr) ((ASIdOrRange *)OPENSSL_sk_set(ossl_check_ASIdOrRange_sk_type(sk), (idx), ossl_check_ASIdOrRange_type(ptr))) +#define sk_ASIdOrRange_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_type(ptr)) +#define sk_ASIdOrRange_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_type(ptr)) +#define sk_ASIdOrRange_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_type(ptr), pnum) +#define sk_ASIdOrRange_sort(sk) OPENSSL_sk_sort(ossl_check_ASIdOrRange_sk_type(sk)) +#define sk_ASIdOrRange_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASIdOrRange_sk_type(sk)) +#define sk_ASIdOrRange_dup(sk) ((STACK_OF(ASIdOrRange) *)OPENSSL_sk_dup(ossl_check_const_ASIdOrRange_sk_type(sk))) +#define sk_ASIdOrRange_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASIdOrRange) *)OPENSSL_sk_deep_copy(ossl_check_const_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_copyfunc_type(copyfunc), ossl_check_ASIdOrRange_freefunc_type(freefunc))) +#define sk_ASIdOrRange_set_cmp_func(sk, cmp) ((sk_ASIdOrRange_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASIdOrRange_sk_type(sk), ossl_check_ASIdOrRange_compfunc_type(cmp))) + + +typedef STACK_OF(ASIdOrRange) ASIdOrRanges; + +# define ASIdentifierChoice_inherit 0 +# define ASIdentifierChoice_asIdsOrRanges 1 + +typedef struct ASIdentifierChoice_st { + int type; + union { + ASN1_NULL *inherit; + ASIdOrRanges *asIdsOrRanges; + } u; +} ASIdentifierChoice; + +typedef struct ASIdentifiers_st { + ASIdentifierChoice *asnum, *rdi; +} ASIdentifiers; + +DECLARE_ASN1_FUNCTIONS(ASRange) +DECLARE_ASN1_FUNCTIONS(ASIdOrRange) +DECLARE_ASN1_FUNCTIONS(ASIdentifierChoice) +DECLARE_ASN1_FUNCTIONS(ASIdentifiers) + +typedef struct IPAddressRange_st { + ASN1_BIT_STRING *min, *max; +} IPAddressRange; + +# define IPAddressOrRange_addressPrefix 0 +# define IPAddressOrRange_addressRange 1 + +typedef struct IPAddressOrRange_st { + int type; + union { + ASN1_BIT_STRING *addressPrefix; + IPAddressRange *addressRange; + } u; +} IPAddressOrRange; + +SKM_DEFINE_STACK_OF_INTERNAL(IPAddressOrRange, IPAddressOrRange, IPAddressOrRange) +#define sk_IPAddressOrRange_num(sk) OPENSSL_sk_num(ossl_check_const_IPAddressOrRange_sk_type(sk)) +#define sk_IPAddressOrRange_value(sk, idx) ((IPAddressOrRange *)OPENSSL_sk_value(ossl_check_const_IPAddressOrRange_sk_type(sk), (idx))) +#define sk_IPAddressOrRange_new(cmp) ((STACK_OF(IPAddressOrRange) *)OPENSSL_sk_new(ossl_check_IPAddressOrRange_compfunc_type(cmp))) +#define sk_IPAddressOrRange_new_null() ((STACK_OF(IPAddressOrRange) *)OPENSSL_sk_new_null()) +#define sk_IPAddressOrRange_new_reserve(cmp, n) ((STACK_OF(IPAddressOrRange) *)OPENSSL_sk_new_reserve(ossl_check_IPAddressOrRange_compfunc_type(cmp), (n))) +#define sk_IPAddressOrRange_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_IPAddressOrRange_sk_type(sk), (n)) +#define sk_IPAddressOrRange_free(sk) OPENSSL_sk_free(ossl_check_IPAddressOrRange_sk_type(sk)) +#define sk_IPAddressOrRange_zero(sk) OPENSSL_sk_zero(ossl_check_IPAddressOrRange_sk_type(sk)) +#define sk_IPAddressOrRange_delete(sk, i) ((IPAddressOrRange *)OPENSSL_sk_delete(ossl_check_IPAddressOrRange_sk_type(sk), (i))) +#define sk_IPAddressOrRange_delete_ptr(sk, ptr) ((IPAddressOrRange *)OPENSSL_sk_delete_ptr(ossl_check_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_type(ptr))) +#define sk_IPAddressOrRange_push(sk, ptr) OPENSSL_sk_push(ossl_check_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_type(ptr)) +#define sk_IPAddressOrRange_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_type(ptr)) +#define sk_IPAddressOrRange_pop(sk) ((IPAddressOrRange *)OPENSSL_sk_pop(ossl_check_IPAddressOrRange_sk_type(sk))) +#define sk_IPAddressOrRange_shift(sk) ((IPAddressOrRange *)OPENSSL_sk_shift(ossl_check_IPAddressOrRange_sk_type(sk))) +#define sk_IPAddressOrRange_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_IPAddressOrRange_sk_type(sk),ossl_check_IPAddressOrRange_freefunc_type(freefunc)) +#define sk_IPAddressOrRange_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_type(ptr), (idx)) +#define sk_IPAddressOrRange_set(sk, idx, ptr) ((IPAddressOrRange *)OPENSSL_sk_set(ossl_check_IPAddressOrRange_sk_type(sk), (idx), ossl_check_IPAddressOrRange_type(ptr))) +#define sk_IPAddressOrRange_find(sk, ptr) OPENSSL_sk_find(ossl_check_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_type(ptr)) +#define sk_IPAddressOrRange_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_type(ptr)) +#define sk_IPAddressOrRange_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_type(ptr), pnum) +#define sk_IPAddressOrRange_sort(sk) OPENSSL_sk_sort(ossl_check_IPAddressOrRange_sk_type(sk)) +#define sk_IPAddressOrRange_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_IPAddressOrRange_sk_type(sk)) +#define sk_IPAddressOrRange_dup(sk) ((STACK_OF(IPAddressOrRange) *)OPENSSL_sk_dup(ossl_check_const_IPAddressOrRange_sk_type(sk))) +#define sk_IPAddressOrRange_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(IPAddressOrRange) *)OPENSSL_sk_deep_copy(ossl_check_const_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_copyfunc_type(copyfunc), ossl_check_IPAddressOrRange_freefunc_type(freefunc))) +#define sk_IPAddressOrRange_set_cmp_func(sk, cmp) ((sk_IPAddressOrRange_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_IPAddressOrRange_sk_type(sk), ossl_check_IPAddressOrRange_compfunc_type(cmp))) + + +typedef STACK_OF(IPAddressOrRange) IPAddressOrRanges; + +# define IPAddressChoice_inherit 0 +# define IPAddressChoice_addressesOrRanges 1 + +typedef struct IPAddressChoice_st { + int type; + union { + ASN1_NULL *inherit; + IPAddressOrRanges *addressesOrRanges; + } u; +} IPAddressChoice; + +typedef struct IPAddressFamily_st { + ASN1_OCTET_STRING *addressFamily; + IPAddressChoice *ipAddressChoice; +} IPAddressFamily; + +SKM_DEFINE_STACK_OF_INTERNAL(IPAddressFamily, IPAddressFamily, IPAddressFamily) +#define sk_IPAddressFamily_num(sk) OPENSSL_sk_num(ossl_check_const_IPAddressFamily_sk_type(sk)) +#define sk_IPAddressFamily_value(sk, idx) ((IPAddressFamily *)OPENSSL_sk_value(ossl_check_const_IPAddressFamily_sk_type(sk), (idx))) +#define sk_IPAddressFamily_new(cmp) ((STACK_OF(IPAddressFamily) *)OPENSSL_sk_new(ossl_check_IPAddressFamily_compfunc_type(cmp))) +#define sk_IPAddressFamily_new_null() ((STACK_OF(IPAddressFamily) *)OPENSSL_sk_new_null()) +#define sk_IPAddressFamily_new_reserve(cmp, n) ((STACK_OF(IPAddressFamily) *)OPENSSL_sk_new_reserve(ossl_check_IPAddressFamily_compfunc_type(cmp), (n))) +#define sk_IPAddressFamily_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_IPAddressFamily_sk_type(sk), (n)) +#define sk_IPAddressFamily_free(sk) OPENSSL_sk_free(ossl_check_IPAddressFamily_sk_type(sk)) +#define sk_IPAddressFamily_zero(sk) OPENSSL_sk_zero(ossl_check_IPAddressFamily_sk_type(sk)) +#define sk_IPAddressFamily_delete(sk, i) ((IPAddressFamily *)OPENSSL_sk_delete(ossl_check_IPAddressFamily_sk_type(sk), (i))) +#define sk_IPAddressFamily_delete_ptr(sk, ptr) ((IPAddressFamily *)OPENSSL_sk_delete_ptr(ossl_check_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_type(ptr))) +#define sk_IPAddressFamily_push(sk, ptr) OPENSSL_sk_push(ossl_check_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_type(ptr)) +#define sk_IPAddressFamily_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_type(ptr)) +#define sk_IPAddressFamily_pop(sk) ((IPAddressFamily *)OPENSSL_sk_pop(ossl_check_IPAddressFamily_sk_type(sk))) +#define sk_IPAddressFamily_shift(sk) ((IPAddressFamily *)OPENSSL_sk_shift(ossl_check_IPAddressFamily_sk_type(sk))) +#define sk_IPAddressFamily_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_IPAddressFamily_sk_type(sk),ossl_check_IPAddressFamily_freefunc_type(freefunc)) +#define sk_IPAddressFamily_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_type(ptr), (idx)) +#define sk_IPAddressFamily_set(sk, idx, ptr) ((IPAddressFamily *)OPENSSL_sk_set(ossl_check_IPAddressFamily_sk_type(sk), (idx), ossl_check_IPAddressFamily_type(ptr))) +#define sk_IPAddressFamily_find(sk, ptr) OPENSSL_sk_find(ossl_check_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_type(ptr)) +#define sk_IPAddressFamily_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_type(ptr)) +#define sk_IPAddressFamily_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_type(ptr), pnum) +#define sk_IPAddressFamily_sort(sk) OPENSSL_sk_sort(ossl_check_IPAddressFamily_sk_type(sk)) +#define sk_IPAddressFamily_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_IPAddressFamily_sk_type(sk)) +#define sk_IPAddressFamily_dup(sk) ((STACK_OF(IPAddressFamily) *)OPENSSL_sk_dup(ossl_check_const_IPAddressFamily_sk_type(sk))) +#define sk_IPAddressFamily_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(IPAddressFamily) *)OPENSSL_sk_deep_copy(ossl_check_const_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_copyfunc_type(copyfunc), ossl_check_IPAddressFamily_freefunc_type(freefunc))) +#define sk_IPAddressFamily_set_cmp_func(sk, cmp) ((sk_IPAddressFamily_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_IPAddressFamily_sk_type(sk), ossl_check_IPAddressFamily_compfunc_type(cmp))) + + + +typedef STACK_OF(IPAddressFamily) IPAddrBlocks; + +DECLARE_ASN1_FUNCTIONS(IPAddressRange) +DECLARE_ASN1_FUNCTIONS(IPAddressOrRange) +DECLARE_ASN1_FUNCTIONS(IPAddressChoice) +DECLARE_ASN1_FUNCTIONS(IPAddressFamily) + +/* + * API tag for elements of the ASIdentifer SEQUENCE. + */ +# define V3_ASID_ASNUM 0 +# define V3_ASID_RDI 1 + +/* + * AFI values, assigned by IANA. It'd be nice to make the AFI + * handling code totally generic, but there are too many little things + * that would need to be defined for other address families for it to + * be worth the trouble. + */ +# define IANA_AFI_IPV4 1 +# define IANA_AFI_IPV6 2 + +/* + * Utilities to construct and extract values from RFC3779 extensions, + * since some of the encodings (particularly for IP address prefixes + * and ranges) are a bit tedious to work with directly. + */ +int X509v3_asid_add_inherit(ASIdentifiers *asid, int which); +int X509v3_asid_add_id_or_range(ASIdentifiers *asid, int which, + ASN1_INTEGER *min, ASN1_INTEGER *max); +int X509v3_addr_add_inherit(IPAddrBlocks *addr, + const unsigned afi, const unsigned *safi); +int X509v3_addr_add_prefix(IPAddrBlocks *addr, + const unsigned afi, const unsigned *safi, + unsigned char *a, const int prefixlen); +int X509v3_addr_add_range(IPAddrBlocks *addr, + const unsigned afi, const unsigned *safi, + unsigned char *min, unsigned char *max); +unsigned X509v3_addr_get_afi(const IPAddressFamily *f); +int X509v3_addr_get_range(IPAddressOrRange *aor, const unsigned afi, + unsigned char *min, unsigned char *max, + const int length); + +/* + * Canonical forms. + */ +int X509v3_asid_is_canonical(ASIdentifiers *asid); +int X509v3_addr_is_canonical(IPAddrBlocks *addr); +int X509v3_asid_canonize(ASIdentifiers *asid); +int X509v3_addr_canonize(IPAddrBlocks *addr); + +/* + * Tests for inheritance and containment. + */ +int X509v3_asid_inherits(ASIdentifiers *asid); +int X509v3_addr_inherits(IPAddrBlocks *addr); +int X509v3_asid_subset(ASIdentifiers *a, ASIdentifiers *b); +int X509v3_addr_subset(IPAddrBlocks *a, IPAddrBlocks *b); + +/* + * Check whether RFC 3779 extensions nest properly in chains. + */ +int X509v3_asid_validate_path(X509_STORE_CTX *); +int X509v3_addr_validate_path(X509_STORE_CTX *); +int X509v3_asid_validate_resource_set(STACK_OF(X509) *chain, + ASIdentifiers *ext, + int allow_inheritance); +int X509v3_addr_validate_resource_set(STACK_OF(X509) *chain, + IPAddrBlocks *ext, int allow_inheritance); + +#endif /* OPENSSL_NO_RFC3779 */ + +SKM_DEFINE_STACK_OF_INTERNAL(ASN1_STRING, ASN1_STRING, ASN1_STRING) +#define sk_ASN1_STRING_num(sk) OPENSSL_sk_num(ossl_check_const_ASN1_STRING_sk_type(sk)) +#define sk_ASN1_STRING_value(sk, idx) ((ASN1_STRING *)OPENSSL_sk_value(ossl_check_const_ASN1_STRING_sk_type(sk), (idx))) +#define sk_ASN1_STRING_new(cmp) ((STACK_OF(ASN1_STRING) *)OPENSSL_sk_new(ossl_check_ASN1_STRING_compfunc_type(cmp))) +#define sk_ASN1_STRING_new_null() ((STACK_OF(ASN1_STRING) *)OPENSSL_sk_new_null()) +#define sk_ASN1_STRING_new_reserve(cmp, n) ((STACK_OF(ASN1_STRING) *)OPENSSL_sk_new_reserve(ossl_check_ASN1_STRING_compfunc_type(cmp), (n))) +#define sk_ASN1_STRING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ASN1_STRING_sk_type(sk), (n)) +#define sk_ASN1_STRING_free(sk) OPENSSL_sk_free(ossl_check_ASN1_STRING_sk_type(sk)) +#define sk_ASN1_STRING_zero(sk) OPENSSL_sk_zero(ossl_check_ASN1_STRING_sk_type(sk)) +#define sk_ASN1_STRING_delete(sk, i) ((ASN1_STRING *)OPENSSL_sk_delete(ossl_check_ASN1_STRING_sk_type(sk), (i))) +#define sk_ASN1_STRING_delete_ptr(sk, ptr) ((ASN1_STRING *)OPENSSL_sk_delete_ptr(ossl_check_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_type(ptr))) +#define sk_ASN1_STRING_push(sk, ptr) OPENSSL_sk_push(ossl_check_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_type(ptr)) +#define sk_ASN1_STRING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_type(ptr)) +#define sk_ASN1_STRING_pop(sk) ((ASN1_STRING *)OPENSSL_sk_pop(ossl_check_ASN1_STRING_sk_type(sk))) +#define sk_ASN1_STRING_shift(sk) ((ASN1_STRING *)OPENSSL_sk_shift(ossl_check_ASN1_STRING_sk_type(sk))) +#define sk_ASN1_STRING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ASN1_STRING_sk_type(sk),ossl_check_ASN1_STRING_freefunc_type(freefunc)) +#define sk_ASN1_STRING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_type(ptr), (idx)) +#define sk_ASN1_STRING_set(sk, idx, ptr) ((ASN1_STRING *)OPENSSL_sk_set(ossl_check_ASN1_STRING_sk_type(sk), (idx), ossl_check_ASN1_STRING_type(ptr))) +#define sk_ASN1_STRING_find(sk, ptr) OPENSSL_sk_find(ossl_check_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_type(ptr)) +#define sk_ASN1_STRING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_type(ptr)) +#define sk_ASN1_STRING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_type(ptr), pnum) +#define sk_ASN1_STRING_sort(sk) OPENSSL_sk_sort(ossl_check_ASN1_STRING_sk_type(sk)) +#define sk_ASN1_STRING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ASN1_STRING_sk_type(sk)) +#define sk_ASN1_STRING_dup(sk) ((STACK_OF(ASN1_STRING) *)OPENSSL_sk_dup(ossl_check_const_ASN1_STRING_sk_type(sk))) +#define sk_ASN1_STRING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ASN1_STRING) *)OPENSSL_sk_deep_copy(ossl_check_const_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_copyfunc_type(copyfunc), ossl_check_ASN1_STRING_freefunc_type(freefunc))) +#define sk_ASN1_STRING_set_cmp_func(sk, cmp) ((sk_ASN1_STRING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ASN1_STRING_sk_type(sk), ossl_check_ASN1_STRING_compfunc_type(cmp))) + + +/* + * Admission Syntax + */ +typedef struct NamingAuthority_st NAMING_AUTHORITY; +typedef struct ProfessionInfo_st PROFESSION_INFO; +typedef struct Admissions_st ADMISSIONS; +typedef struct AdmissionSyntax_st ADMISSION_SYNTAX; +DECLARE_ASN1_FUNCTIONS(NAMING_AUTHORITY) +DECLARE_ASN1_FUNCTIONS(PROFESSION_INFO) +DECLARE_ASN1_FUNCTIONS(ADMISSIONS) +DECLARE_ASN1_FUNCTIONS(ADMISSION_SYNTAX) +SKM_DEFINE_STACK_OF_INTERNAL(PROFESSION_INFO, PROFESSION_INFO, PROFESSION_INFO) +#define sk_PROFESSION_INFO_num(sk) OPENSSL_sk_num(ossl_check_const_PROFESSION_INFO_sk_type(sk)) +#define sk_PROFESSION_INFO_value(sk, idx) ((PROFESSION_INFO *)OPENSSL_sk_value(ossl_check_const_PROFESSION_INFO_sk_type(sk), (idx))) +#define sk_PROFESSION_INFO_new(cmp) ((STACK_OF(PROFESSION_INFO) *)OPENSSL_sk_new(ossl_check_PROFESSION_INFO_compfunc_type(cmp))) +#define sk_PROFESSION_INFO_new_null() ((STACK_OF(PROFESSION_INFO) *)OPENSSL_sk_new_null()) +#define sk_PROFESSION_INFO_new_reserve(cmp, n) ((STACK_OF(PROFESSION_INFO) *)OPENSSL_sk_new_reserve(ossl_check_PROFESSION_INFO_compfunc_type(cmp), (n))) +#define sk_PROFESSION_INFO_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_PROFESSION_INFO_sk_type(sk), (n)) +#define sk_PROFESSION_INFO_free(sk) OPENSSL_sk_free(ossl_check_PROFESSION_INFO_sk_type(sk)) +#define sk_PROFESSION_INFO_zero(sk) OPENSSL_sk_zero(ossl_check_PROFESSION_INFO_sk_type(sk)) +#define sk_PROFESSION_INFO_delete(sk, i) ((PROFESSION_INFO *)OPENSSL_sk_delete(ossl_check_PROFESSION_INFO_sk_type(sk), (i))) +#define sk_PROFESSION_INFO_delete_ptr(sk, ptr) ((PROFESSION_INFO *)OPENSSL_sk_delete_ptr(ossl_check_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_type(ptr))) +#define sk_PROFESSION_INFO_push(sk, ptr) OPENSSL_sk_push(ossl_check_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_type(ptr)) +#define sk_PROFESSION_INFO_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_type(ptr)) +#define sk_PROFESSION_INFO_pop(sk) ((PROFESSION_INFO *)OPENSSL_sk_pop(ossl_check_PROFESSION_INFO_sk_type(sk))) +#define sk_PROFESSION_INFO_shift(sk) ((PROFESSION_INFO *)OPENSSL_sk_shift(ossl_check_PROFESSION_INFO_sk_type(sk))) +#define sk_PROFESSION_INFO_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_PROFESSION_INFO_sk_type(sk),ossl_check_PROFESSION_INFO_freefunc_type(freefunc)) +#define sk_PROFESSION_INFO_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_type(ptr), (idx)) +#define sk_PROFESSION_INFO_set(sk, idx, ptr) ((PROFESSION_INFO *)OPENSSL_sk_set(ossl_check_PROFESSION_INFO_sk_type(sk), (idx), ossl_check_PROFESSION_INFO_type(ptr))) +#define sk_PROFESSION_INFO_find(sk, ptr) OPENSSL_sk_find(ossl_check_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_type(ptr)) +#define sk_PROFESSION_INFO_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_type(ptr)) +#define sk_PROFESSION_INFO_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_type(ptr), pnum) +#define sk_PROFESSION_INFO_sort(sk) OPENSSL_sk_sort(ossl_check_PROFESSION_INFO_sk_type(sk)) +#define sk_PROFESSION_INFO_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_PROFESSION_INFO_sk_type(sk)) +#define sk_PROFESSION_INFO_dup(sk) ((STACK_OF(PROFESSION_INFO) *)OPENSSL_sk_dup(ossl_check_const_PROFESSION_INFO_sk_type(sk))) +#define sk_PROFESSION_INFO_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(PROFESSION_INFO) *)OPENSSL_sk_deep_copy(ossl_check_const_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_copyfunc_type(copyfunc), ossl_check_PROFESSION_INFO_freefunc_type(freefunc))) +#define sk_PROFESSION_INFO_set_cmp_func(sk, cmp) ((sk_PROFESSION_INFO_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_PROFESSION_INFO_sk_type(sk), ossl_check_PROFESSION_INFO_compfunc_type(cmp))) +SKM_DEFINE_STACK_OF_INTERNAL(ADMISSIONS, ADMISSIONS, ADMISSIONS) +#define sk_ADMISSIONS_num(sk) OPENSSL_sk_num(ossl_check_const_ADMISSIONS_sk_type(sk)) +#define sk_ADMISSIONS_value(sk, idx) ((ADMISSIONS *)OPENSSL_sk_value(ossl_check_const_ADMISSIONS_sk_type(sk), (idx))) +#define sk_ADMISSIONS_new(cmp) ((STACK_OF(ADMISSIONS) *)OPENSSL_sk_new(ossl_check_ADMISSIONS_compfunc_type(cmp))) +#define sk_ADMISSIONS_new_null() ((STACK_OF(ADMISSIONS) *)OPENSSL_sk_new_null()) +#define sk_ADMISSIONS_new_reserve(cmp, n) ((STACK_OF(ADMISSIONS) *)OPENSSL_sk_new_reserve(ossl_check_ADMISSIONS_compfunc_type(cmp), (n))) +#define sk_ADMISSIONS_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_ADMISSIONS_sk_type(sk), (n)) +#define sk_ADMISSIONS_free(sk) OPENSSL_sk_free(ossl_check_ADMISSIONS_sk_type(sk)) +#define sk_ADMISSIONS_zero(sk) OPENSSL_sk_zero(ossl_check_ADMISSIONS_sk_type(sk)) +#define sk_ADMISSIONS_delete(sk, i) ((ADMISSIONS *)OPENSSL_sk_delete(ossl_check_ADMISSIONS_sk_type(sk), (i))) +#define sk_ADMISSIONS_delete_ptr(sk, ptr) ((ADMISSIONS *)OPENSSL_sk_delete_ptr(ossl_check_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_type(ptr))) +#define sk_ADMISSIONS_push(sk, ptr) OPENSSL_sk_push(ossl_check_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_type(ptr)) +#define sk_ADMISSIONS_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_type(ptr)) +#define sk_ADMISSIONS_pop(sk) ((ADMISSIONS *)OPENSSL_sk_pop(ossl_check_ADMISSIONS_sk_type(sk))) +#define sk_ADMISSIONS_shift(sk) ((ADMISSIONS *)OPENSSL_sk_shift(ossl_check_ADMISSIONS_sk_type(sk))) +#define sk_ADMISSIONS_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_ADMISSIONS_sk_type(sk),ossl_check_ADMISSIONS_freefunc_type(freefunc)) +#define sk_ADMISSIONS_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_type(ptr), (idx)) +#define sk_ADMISSIONS_set(sk, idx, ptr) ((ADMISSIONS *)OPENSSL_sk_set(ossl_check_ADMISSIONS_sk_type(sk), (idx), ossl_check_ADMISSIONS_type(ptr))) +#define sk_ADMISSIONS_find(sk, ptr) OPENSSL_sk_find(ossl_check_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_type(ptr)) +#define sk_ADMISSIONS_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_type(ptr)) +#define sk_ADMISSIONS_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_type(ptr), pnum) +#define sk_ADMISSIONS_sort(sk) OPENSSL_sk_sort(ossl_check_ADMISSIONS_sk_type(sk)) +#define sk_ADMISSIONS_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_ADMISSIONS_sk_type(sk)) +#define sk_ADMISSIONS_dup(sk) ((STACK_OF(ADMISSIONS) *)OPENSSL_sk_dup(ossl_check_const_ADMISSIONS_sk_type(sk))) +#define sk_ADMISSIONS_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(ADMISSIONS) *)OPENSSL_sk_deep_copy(ossl_check_const_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_copyfunc_type(copyfunc), ossl_check_ADMISSIONS_freefunc_type(freefunc))) +#define sk_ADMISSIONS_set_cmp_func(sk, cmp) ((sk_ADMISSIONS_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_ADMISSIONS_sk_type(sk), ossl_check_ADMISSIONS_compfunc_type(cmp))) + +typedef STACK_OF(PROFESSION_INFO) PROFESSION_INFOS; + +const ASN1_OBJECT *NAMING_AUTHORITY_get0_authorityId( + const NAMING_AUTHORITY *n); +const ASN1_IA5STRING *NAMING_AUTHORITY_get0_authorityURL( + const NAMING_AUTHORITY *n); +const ASN1_STRING *NAMING_AUTHORITY_get0_authorityText( + const NAMING_AUTHORITY *n); +void NAMING_AUTHORITY_set0_authorityId(NAMING_AUTHORITY *n, + ASN1_OBJECT* namingAuthorityId); +void NAMING_AUTHORITY_set0_authorityURL(NAMING_AUTHORITY *n, + ASN1_IA5STRING* namingAuthorityUrl); +void NAMING_AUTHORITY_set0_authorityText(NAMING_AUTHORITY *n, + ASN1_STRING* namingAuthorityText); + +const GENERAL_NAME *ADMISSION_SYNTAX_get0_admissionAuthority( + const ADMISSION_SYNTAX *as); +void ADMISSION_SYNTAX_set0_admissionAuthority( + ADMISSION_SYNTAX *as, GENERAL_NAME *aa); +const STACK_OF(ADMISSIONS) *ADMISSION_SYNTAX_get0_contentsOfAdmissions( + const ADMISSION_SYNTAX *as); +void ADMISSION_SYNTAX_set0_contentsOfAdmissions( + ADMISSION_SYNTAX *as, STACK_OF(ADMISSIONS) *a); +const GENERAL_NAME *ADMISSIONS_get0_admissionAuthority(const ADMISSIONS *a); +void ADMISSIONS_set0_admissionAuthority(ADMISSIONS *a, GENERAL_NAME *aa); +const NAMING_AUTHORITY *ADMISSIONS_get0_namingAuthority(const ADMISSIONS *a); +void ADMISSIONS_set0_namingAuthority(ADMISSIONS *a, NAMING_AUTHORITY *na); +const PROFESSION_INFOS *ADMISSIONS_get0_professionInfos(const ADMISSIONS *a); +void ADMISSIONS_set0_professionInfos(ADMISSIONS *a, PROFESSION_INFOS *pi); +const ASN1_OCTET_STRING *PROFESSION_INFO_get0_addProfessionInfo( + const PROFESSION_INFO *pi); +void PROFESSION_INFO_set0_addProfessionInfo( + PROFESSION_INFO *pi, ASN1_OCTET_STRING *aos); +const NAMING_AUTHORITY *PROFESSION_INFO_get0_namingAuthority( + const PROFESSION_INFO *pi); +void PROFESSION_INFO_set0_namingAuthority( + PROFESSION_INFO *pi, NAMING_AUTHORITY *na); +const STACK_OF(ASN1_STRING) *PROFESSION_INFO_get0_professionItems( + const PROFESSION_INFO *pi); +void PROFESSION_INFO_set0_professionItems( + PROFESSION_INFO *pi, STACK_OF(ASN1_STRING) *as); +const STACK_OF(ASN1_OBJECT) *PROFESSION_INFO_get0_professionOIDs( + const PROFESSION_INFO *pi); +void PROFESSION_INFO_set0_professionOIDs( + PROFESSION_INFO *pi, STACK_OF(ASN1_OBJECT) *po); +const ASN1_PRINTABLESTRING *PROFESSION_INFO_get0_registrationNumber( + const PROFESSION_INFO *pi); +void PROFESSION_INFO_set0_registrationNumber( + PROFESSION_INFO *pi, ASN1_PRINTABLESTRING *rn); + +int OSSL_GENERAL_NAMES_print(BIO *out, GENERAL_NAMES *gens, int indent); + +typedef STACK_OF(X509_ATTRIBUTE) OSSL_ATTRIBUTES_SYNTAX; +DECLARE_ASN1_FUNCTIONS(OSSL_ATTRIBUTES_SYNTAX) + +typedef STACK_OF(USERNOTICE) OSSL_USER_NOTICE_SYNTAX; +DECLARE_ASN1_FUNCTIONS(OSSL_USER_NOTICE_SYNTAX) + +SKM_DEFINE_STACK_OF_INTERNAL(USERNOTICE, USERNOTICE, USERNOTICE) +#define sk_USERNOTICE_num(sk) OPENSSL_sk_num(ossl_check_const_USERNOTICE_sk_type(sk)) +#define sk_USERNOTICE_value(sk, idx) ((USERNOTICE *)OPENSSL_sk_value(ossl_check_const_USERNOTICE_sk_type(sk), (idx))) +#define sk_USERNOTICE_new(cmp) ((STACK_OF(USERNOTICE) *)OPENSSL_sk_new(ossl_check_USERNOTICE_compfunc_type(cmp))) +#define sk_USERNOTICE_new_null() ((STACK_OF(USERNOTICE) *)OPENSSL_sk_new_null()) +#define sk_USERNOTICE_new_reserve(cmp, n) ((STACK_OF(USERNOTICE) *)OPENSSL_sk_new_reserve(ossl_check_USERNOTICE_compfunc_type(cmp), (n))) +#define sk_USERNOTICE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_USERNOTICE_sk_type(sk), (n)) +#define sk_USERNOTICE_free(sk) OPENSSL_sk_free(ossl_check_USERNOTICE_sk_type(sk)) +#define sk_USERNOTICE_zero(sk) OPENSSL_sk_zero(ossl_check_USERNOTICE_sk_type(sk)) +#define sk_USERNOTICE_delete(sk, i) ((USERNOTICE *)OPENSSL_sk_delete(ossl_check_USERNOTICE_sk_type(sk), (i))) +#define sk_USERNOTICE_delete_ptr(sk, ptr) ((USERNOTICE *)OPENSSL_sk_delete_ptr(ossl_check_USERNOTICE_sk_type(sk), ossl_check_USERNOTICE_type(ptr))) +#define sk_USERNOTICE_push(sk, ptr) OPENSSL_sk_push(ossl_check_USERNOTICE_sk_type(sk), ossl_check_USERNOTICE_type(ptr)) +#define sk_USERNOTICE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_USERNOTICE_sk_type(sk), ossl_check_USERNOTICE_type(ptr)) +#define sk_USERNOTICE_pop(sk) ((USERNOTICE *)OPENSSL_sk_pop(ossl_check_USERNOTICE_sk_type(sk))) +#define sk_USERNOTICE_shift(sk) ((USERNOTICE *)OPENSSL_sk_shift(ossl_check_USERNOTICE_sk_type(sk))) +#define sk_USERNOTICE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_USERNOTICE_sk_type(sk),ossl_check_USERNOTICE_freefunc_type(freefunc)) +#define sk_USERNOTICE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_USERNOTICE_sk_type(sk), ossl_check_USERNOTICE_type(ptr), (idx)) +#define sk_USERNOTICE_set(sk, idx, ptr) ((USERNOTICE *)OPENSSL_sk_set(ossl_check_USERNOTICE_sk_type(sk), (idx), ossl_check_USERNOTICE_type(ptr))) +#define sk_USERNOTICE_find(sk, ptr) OPENSSL_sk_find(ossl_check_USERNOTICE_sk_type(sk), ossl_check_USERNOTICE_type(ptr)) +#define sk_USERNOTICE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_USERNOTICE_sk_type(sk), ossl_check_USERNOTICE_type(ptr)) +#define sk_USERNOTICE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_USERNOTICE_sk_type(sk), ossl_check_USERNOTICE_type(ptr), pnum) +#define sk_USERNOTICE_sort(sk) OPENSSL_sk_sort(ossl_check_USERNOTICE_sk_type(sk)) +#define sk_USERNOTICE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_USERNOTICE_sk_type(sk)) +#define sk_USERNOTICE_dup(sk) ((STACK_OF(USERNOTICE) *)OPENSSL_sk_dup(ossl_check_const_USERNOTICE_sk_type(sk))) +#define sk_USERNOTICE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(USERNOTICE) *)OPENSSL_sk_deep_copy(ossl_check_const_USERNOTICE_sk_type(sk), ossl_check_USERNOTICE_copyfunc_type(copyfunc), ossl_check_USERNOTICE_freefunc_type(freefunc))) +#define sk_USERNOTICE_set_cmp_func(sk, cmp) ((sk_USERNOTICE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_USERNOTICE_sk_type(sk), ossl_check_USERNOTICE_compfunc_type(cmp))) + + +typedef struct OSSL_ROLE_SPEC_CERT_ID_st { + GENERAL_NAME *roleName; + GENERAL_NAME *roleCertIssuer; + ASN1_INTEGER *roleCertSerialNumber; + GENERAL_NAMES *roleCertLocator; +} OSSL_ROLE_SPEC_CERT_ID; + +DECLARE_ASN1_FUNCTIONS(OSSL_ROLE_SPEC_CERT_ID) + +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_ROLE_SPEC_CERT_ID, OSSL_ROLE_SPEC_CERT_ID, OSSL_ROLE_SPEC_CERT_ID) +#define sk_OSSL_ROLE_SPEC_CERT_ID_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk)) +#define sk_OSSL_ROLE_SPEC_CERT_ID_value(sk, idx) ((OSSL_ROLE_SPEC_CERT_ID *)OPENSSL_sk_value(ossl_check_const_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), (idx))) +#define sk_OSSL_ROLE_SPEC_CERT_ID_new(cmp) ((STACK_OF(OSSL_ROLE_SPEC_CERT_ID) *)OPENSSL_sk_new(ossl_check_OSSL_ROLE_SPEC_CERT_ID_compfunc_type(cmp))) +#define sk_OSSL_ROLE_SPEC_CERT_ID_new_null() ((STACK_OF(OSSL_ROLE_SPEC_CERT_ID) *)OPENSSL_sk_new_null()) +#define sk_OSSL_ROLE_SPEC_CERT_ID_new_reserve(cmp, n) ((STACK_OF(OSSL_ROLE_SPEC_CERT_ID) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_ROLE_SPEC_CERT_ID_compfunc_type(cmp), (n))) +#define sk_OSSL_ROLE_SPEC_CERT_ID_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), (n)) +#define sk_OSSL_ROLE_SPEC_CERT_ID_free(sk) OPENSSL_sk_free(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk)) +#define sk_OSSL_ROLE_SPEC_CERT_ID_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk)) +#define sk_OSSL_ROLE_SPEC_CERT_ID_delete(sk, i) ((OSSL_ROLE_SPEC_CERT_ID *)OPENSSL_sk_delete(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), (i))) +#define sk_OSSL_ROLE_SPEC_CERT_ID_delete_ptr(sk, ptr) ((OSSL_ROLE_SPEC_CERT_ID *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr))) +#define sk_OSSL_ROLE_SPEC_CERT_ID_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr)) +#define sk_OSSL_ROLE_SPEC_CERT_ID_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr)) +#define sk_OSSL_ROLE_SPEC_CERT_ID_pop(sk) ((OSSL_ROLE_SPEC_CERT_ID *)OPENSSL_sk_pop(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk))) +#define sk_OSSL_ROLE_SPEC_CERT_ID_shift(sk) ((OSSL_ROLE_SPEC_CERT_ID *)OPENSSL_sk_shift(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk))) +#define sk_OSSL_ROLE_SPEC_CERT_ID_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk),ossl_check_OSSL_ROLE_SPEC_CERT_ID_freefunc_type(freefunc)) +#define sk_OSSL_ROLE_SPEC_CERT_ID_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr), (idx)) +#define sk_OSSL_ROLE_SPEC_CERT_ID_set(sk, idx, ptr) ((OSSL_ROLE_SPEC_CERT_ID *)OPENSSL_sk_set(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), (idx), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr))) +#define sk_OSSL_ROLE_SPEC_CERT_ID_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr)) +#define sk_OSSL_ROLE_SPEC_CERT_ID_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr)) +#define sk_OSSL_ROLE_SPEC_CERT_ID_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_type(ptr), pnum) +#define sk_OSSL_ROLE_SPEC_CERT_ID_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk)) +#define sk_OSSL_ROLE_SPEC_CERT_ID_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk)) +#define sk_OSSL_ROLE_SPEC_CERT_ID_dup(sk) ((STACK_OF(OSSL_ROLE_SPEC_CERT_ID) *)OPENSSL_sk_dup(ossl_check_const_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk))) +#define sk_OSSL_ROLE_SPEC_CERT_ID_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_ROLE_SPEC_CERT_ID) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_copyfunc_type(copyfunc), ossl_check_OSSL_ROLE_SPEC_CERT_ID_freefunc_type(freefunc))) +#define sk_OSSL_ROLE_SPEC_CERT_ID_set_cmp_func(sk, cmp) ((sk_OSSL_ROLE_SPEC_CERT_ID_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_ROLE_SPEC_CERT_ID_sk_type(sk), ossl_check_OSSL_ROLE_SPEC_CERT_ID_compfunc_type(cmp))) + + +typedef STACK_OF(OSSL_ROLE_SPEC_CERT_ID) OSSL_ROLE_SPEC_CERT_ID_SYNTAX; + +DECLARE_ASN1_FUNCTIONS(OSSL_ROLE_SPEC_CERT_ID_SYNTAX) +typedef struct OSSL_HASH_st { + X509_ALGOR *algorithmIdentifier; + ASN1_BIT_STRING *hashValue; +} OSSL_HASH; + +typedef struct OSSL_INFO_SYNTAX_POINTER_st { + GENERAL_NAMES *name; + OSSL_HASH *hash; +} OSSL_INFO_SYNTAX_POINTER; + +# define OSSL_INFO_SYNTAX_TYPE_CONTENT 0 +# define OSSL_INFO_SYNTAX_TYPE_POINTER 1 + +typedef struct OSSL_INFO_SYNTAX_st { + int type; + union { + ASN1_STRING *content; + OSSL_INFO_SYNTAX_POINTER *pointer; + } choice; +} OSSL_INFO_SYNTAX; + +typedef struct OSSL_PRIVILEGE_POLICY_ID_st { + ASN1_OBJECT *privilegePolicy; + OSSL_INFO_SYNTAX *privPolSyntax; +} OSSL_PRIVILEGE_POLICY_ID; + +typedef struct OSSL_ATTRIBUTE_DESCRIPTOR_st { + ASN1_OBJECT *identifier; + ASN1_STRING *attributeSyntax; + ASN1_UTF8STRING *name; + ASN1_UTF8STRING *description; + OSSL_PRIVILEGE_POLICY_ID *dominationRule; +} OSSL_ATTRIBUTE_DESCRIPTOR; + +DECLARE_ASN1_FUNCTIONS(OSSL_HASH) +DECLARE_ASN1_FUNCTIONS(OSSL_INFO_SYNTAX) +DECLARE_ASN1_FUNCTIONS(OSSL_INFO_SYNTAX_POINTER) +DECLARE_ASN1_FUNCTIONS(OSSL_PRIVILEGE_POLICY_ID) +DECLARE_ASN1_FUNCTIONS(OSSL_ATTRIBUTE_DESCRIPTOR) + +typedef struct OSSL_TIME_SPEC_ABSOLUTE_st { + ASN1_GENERALIZEDTIME *startTime; + ASN1_GENERALIZEDTIME *endTime; +} OSSL_TIME_SPEC_ABSOLUTE; + +typedef struct OSSL_DAY_TIME_st { + ASN1_INTEGER *hour; + ASN1_INTEGER *minute; + ASN1_INTEGER *second; +} OSSL_DAY_TIME; + +typedef struct OSSL_DAY_TIME_BAND_st { + OSSL_DAY_TIME *startDayTime; + OSSL_DAY_TIME *endDayTime; +} OSSL_DAY_TIME_BAND; + +# define OSSL_NAMED_DAY_TYPE_INT 0 +# define OSSL_NAMED_DAY_TYPE_BIT 1 +# define OSSL_NAMED_DAY_INT_SUN 1 +# define OSSL_NAMED_DAY_INT_MON 2 +# define OSSL_NAMED_DAY_INT_TUE 3 +# define OSSL_NAMED_DAY_INT_WED 4 +# define OSSL_NAMED_DAY_INT_THU 5 +# define OSSL_NAMED_DAY_INT_FRI 6 +# define OSSL_NAMED_DAY_INT_SAT 7 +# define OSSL_NAMED_DAY_BIT_SUN 0 +# define OSSL_NAMED_DAY_BIT_MON 1 +# define OSSL_NAMED_DAY_BIT_TUE 2 +# define OSSL_NAMED_DAY_BIT_WED 3 +# define OSSL_NAMED_DAY_BIT_THU 4 +# define OSSL_NAMED_DAY_BIT_FRI 5 +# define OSSL_NAMED_DAY_BIT_SAT 6 + +typedef struct OSSL_NAMED_DAY_st { + int type; + union { + ASN1_INTEGER *intNamedDays; + ASN1_BIT_STRING *bitNamedDays; + } choice; +} OSSL_NAMED_DAY; + +# define OSSL_TIME_SPEC_X_DAY_OF_FIRST 0 +# define OSSL_TIME_SPEC_X_DAY_OF_SECOND 1 +# define OSSL_TIME_SPEC_X_DAY_OF_THIRD 2 +# define OSSL_TIME_SPEC_X_DAY_OF_FOURTH 3 +# define OSSL_TIME_SPEC_X_DAY_OF_FIFTH 4 + +typedef struct OSSL_TIME_SPEC_X_DAY_OF_st { + int type; + union { + OSSL_NAMED_DAY *first; + OSSL_NAMED_DAY *second; + OSSL_NAMED_DAY *third; + OSSL_NAMED_DAY *fourth; + OSSL_NAMED_DAY *fifth; + } choice; +} OSSL_TIME_SPEC_X_DAY_OF; + +# define OSSL_TIME_SPEC_DAY_TYPE_INT 0 +# define OSSL_TIME_SPEC_DAY_TYPE_BIT 1 +# define OSSL_TIME_SPEC_DAY_TYPE_DAY_OF 2 +# define OSSL_TIME_SPEC_DAY_BIT_SUN 0 +# define OSSL_TIME_SPEC_DAY_BIT_MON 1 +# define OSSL_TIME_SPEC_DAY_BIT_TUE 2 +# define OSSL_TIME_SPEC_DAY_BIT_WED 3 +# define OSSL_TIME_SPEC_DAY_BIT_THU 4 +# define OSSL_TIME_SPEC_DAY_BIT_FRI 5 +# define OSSL_TIME_SPEC_DAY_BIT_SAT 6 +# define OSSL_TIME_SPEC_DAY_INT_SUN 1 +# define OSSL_TIME_SPEC_DAY_INT_MON 2 +# define OSSL_TIME_SPEC_DAY_INT_TUE 3 +# define OSSL_TIME_SPEC_DAY_INT_WED 4 +# define OSSL_TIME_SPEC_DAY_INT_THU 5 +# define OSSL_TIME_SPEC_DAY_INT_FRI 6 +# define OSSL_TIME_SPEC_DAY_INT_SAT 7 + +typedef struct OSSL_TIME_SPEC_DAY_st { + int type; + union { + STACK_OF(ASN1_INTEGER) *intDay; + ASN1_BIT_STRING *bitDay; + OSSL_TIME_SPEC_X_DAY_OF *dayOf; + } choice; +} OSSL_TIME_SPEC_DAY; + +# define OSSL_TIME_SPEC_WEEKS_TYPE_ALL 0 +# define OSSL_TIME_SPEC_WEEKS_TYPE_INT 1 +# define OSSL_TIME_SPEC_WEEKS_TYPE_BIT 2 +# define OSSL_TIME_SPEC_BIT_WEEKS_1 0 +# define OSSL_TIME_SPEC_BIT_WEEKS_2 1 +# define OSSL_TIME_SPEC_BIT_WEEKS_3 2 +# define OSSL_TIME_SPEC_BIT_WEEKS_4 3 +# define OSSL_TIME_SPEC_BIT_WEEKS_5 4 + +typedef struct OSSL_TIME_SPEC_WEEKS_st { + int type; + union { + ASN1_NULL *allWeeks; + STACK_OF(ASN1_INTEGER) *intWeek; + ASN1_BIT_STRING *bitWeek; + } choice; +} OSSL_TIME_SPEC_WEEKS; + +# define OSSL_TIME_SPEC_MONTH_TYPE_ALL 0 +# define OSSL_TIME_SPEC_MONTH_TYPE_INT 1 +# define OSSL_TIME_SPEC_MONTH_TYPE_BIT 2 +# define OSSL_TIME_SPEC_INT_MONTH_JAN 1 +# define OSSL_TIME_SPEC_INT_MONTH_FEB 2 +# define OSSL_TIME_SPEC_INT_MONTH_MAR 3 +# define OSSL_TIME_SPEC_INT_MONTH_APR 4 +# define OSSL_TIME_SPEC_INT_MONTH_MAY 5 +# define OSSL_TIME_SPEC_INT_MONTH_JUN 6 +# define OSSL_TIME_SPEC_INT_MONTH_JUL 7 +# define OSSL_TIME_SPEC_INT_MONTH_AUG 8 +# define OSSL_TIME_SPEC_INT_MONTH_SEP 9 +# define OSSL_TIME_SPEC_INT_MONTH_OCT 10 +# define OSSL_TIME_SPEC_INT_MONTH_NOV 11 +# define OSSL_TIME_SPEC_INT_MONTH_DEC 12 +# define OSSL_TIME_SPEC_BIT_MONTH_JAN 0 +# define OSSL_TIME_SPEC_BIT_MONTH_FEB 1 +# define OSSL_TIME_SPEC_BIT_MONTH_MAR 2 +# define OSSL_TIME_SPEC_BIT_MONTH_APR 3 +# define OSSL_TIME_SPEC_BIT_MONTH_MAY 4 +# define OSSL_TIME_SPEC_BIT_MONTH_JUN 5 +# define OSSL_TIME_SPEC_BIT_MONTH_JUL 6 +# define OSSL_TIME_SPEC_BIT_MONTH_AUG 7 +# define OSSL_TIME_SPEC_BIT_MONTH_SEP 8 +# define OSSL_TIME_SPEC_BIT_MONTH_OCT 9 +# define OSSL_TIME_SPEC_BIT_MONTH_NOV 10 +# define OSSL_TIME_SPEC_BIT_MONTH_DEC 11 + +typedef struct OSSL_TIME_SPEC_MONTH_st { + int type; + union { + ASN1_NULL *allMonths; + STACK_OF(ASN1_INTEGER) *intMonth; + ASN1_BIT_STRING *bitMonth; + } choice; +} OSSL_TIME_SPEC_MONTH; + +typedef struct OSSL_TIME_PERIOD_st { + STACK_OF(OSSL_DAY_TIME_BAND) *timesOfDay; + OSSL_TIME_SPEC_DAY *days; + OSSL_TIME_SPEC_WEEKS *weeks; + OSSL_TIME_SPEC_MONTH *months; + STACK_OF(ASN1_INTEGER) *years; +} OSSL_TIME_PERIOD; + +# define OSSL_TIME_SPEC_TIME_TYPE_ABSOLUTE 0 +# define OSSL_TIME_SPEC_TIME_TYPE_PERIODIC 1 + +typedef struct OSSL_TIME_SPEC_TIME_st { + int type; + union { + OSSL_TIME_SPEC_ABSOLUTE *absolute; + STACK_OF(OSSL_TIME_PERIOD) *periodic; + } choice; +} OSSL_TIME_SPEC_TIME; + +typedef struct OSSL_TIME_SPEC_st { + OSSL_TIME_SPEC_TIME *time; + ASN1_BOOLEAN notThisTime; + ASN1_INTEGER *timeZone; +} OSSL_TIME_SPEC; + +DECLARE_ASN1_FUNCTIONS(OSSL_DAY_TIME) +DECLARE_ASN1_FUNCTIONS(OSSL_DAY_TIME_BAND) +DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC_DAY) +DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC_WEEKS) +DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC_MONTH) +DECLARE_ASN1_FUNCTIONS(OSSL_NAMED_DAY) +DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC_X_DAY_OF) +DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC_ABSOLUTE) +DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC_TIME) +DECLARE_ASN1_FUNCTIONS(OSSL_TIME_SPEC) +DECLARE_ASN1_FUNCTIONS(OSSL_TIME_PERIOD) + +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_TIME_PERIOD, OSSL_TIME_PERIOD, OSSL_TIME_PERIOD) +#define sk_OSSL_TIME_PERIOD_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_TIME_PERIOD_sk_type(sk)) +#define sk_OSSL_TIME_PERIOD_value(sk, idx) ((OSSL_TIME_PERIOD *)OPENSSL_sk_value(ossl_check_const_OSSL_TIME_PERIOD_sk_type(sk), (idx))) +#define sk_OSSL_TIME_PERIOD_new(cmp) ((STACK_OF(OSSL_TIME_PERIOD) *)OPENSSL_sk_new(ossl_check_OSSL_TIME_PERIOD_compfunc_type(cmp))) +#define sk_OSSL_TIME_PERIOD_new_null() ((STACK_OF(OSSL_TIME_PERIOD) *)OPENSSL_sk_new_null()) +#define sk_OSSL_TIME_PERIOD_new_reserve(cmp, n) ((STACK_OF(OSSL_TIME_PERIOD) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_TIME_PERIOD_compfunc_type(cmp), (n))) +#define sk_OSSL_TIME_PERIOD_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), (n)) +#define sk_OSSL_TIME_PERIOD_free(sk) OPENSSL_sk_free(ossl_check_OSSL_TIME_PERIOD_sk_type(sk)) +#define sk_OSSL_TIME_PERIOD_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_TIME_PERIOD_sk_type(sk)) +#define sk_OSSL_TIME_PERIOD_delete(sk, i) ((OSSL_TIME_PERIOD *)OPENSSL_sk_delete(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), (i))) +#define sk_OSSL_TIME_PERIOD_delete_ptr(sk, ptr) ((OSSL_TIME_PERIOD *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr))) +#define sk_OSSL_TIME_PERIOD_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr)) +#define sk_OSSL_TIME_PERIOD_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr)) +#define sk_OSSL_TIME_PERIOD_pop(sk) ((OSSL_TIME_PERIOD *)OPENSSL_sk_pop(ossl_check_OSSL_TIME_PERIOD_sk_type(sk))) +#define sk_OSSL_TIME_PERIOD_shift(sk) ((OSSL_TIME_PERIOD *)OPENSSL_sk_shift(ossl_check_OSSL_TIME_PERIOD_sk_type(sk))) +#define sk_OSSL_TIME_PERIOD_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_TIME_PERIOD_sk_type(sk),ossl_check_OSSL_TIME_PERIOD_freefunc_type(freefunc)) +#define sk_OSSL_TIME_PERIOD_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr), (idx)) +#define sk_OSSL_TIME_PERIOD_set(sk, idx, ptr) ((OSSL_TIME_PERIOD *)OPENSSL_sk_set(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), (idx), ossl_check_OSSL_TIME_PERIOD_type(ptr))) +#define sk_OSSL_TIME_PERIOD_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr)) +#define sk_OSSL_TIME_PERIOD_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr)) +#define sk_OSSL_TIME_PERIOD_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_type(ptr), pnum) +#define sk_OSSL_TIME_PERIOD_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_TIME_PERIOD_sk_type(sk)) +#define sk_OSSL_TIME_PERIOD_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_TIME_PERIOD_sk_type(sk)) +#define sk_OSSL_TIME_PERIOD_dup(sk) ((STACK_OF(OSSL_TIME_PERIOD) *)OPENSSL_sk_dup(ossl_check_const_OSSL_TIME_PERIOD_sk_type(sk))) +#define sk_OSSL_TIME_PERIOD_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_TIME_PERIOD) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_copyfunc_type(copyfunc), ossl_check_OSSL_TIME_PERIOD_freefunc_type(freefunc))) +#define sk_OSSL_TIME_PERIOD_set_cmp_func(sk, cmp) ((sk_OSSL_TIME_PERIOD_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_TIME_PERIOD_sk_type(sk), ossl_check_OSSL_TIME_PERIOD_compfunc_type(cmp))) + + +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_DAY_TIME_BAND, OSSL_DAY_TIME_BAND, OSSL_DAY_TIME_BAND) +#define sk_OSSL_DAY_TIME_BAND_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_DAY_TIME_BAND_sk_type(sk)) +#define sk_OSSL_DAY_TIME_BAND_value(sk, idx) ((OSSL_DAY_TIME_BAND *)OPENSSL_sk_value(ossl_check_const_OSSL_DAY_TIME_BAND_sk_type(sk), (idx))) +#define sk_OSSL_DAY_TIME_BAND_new(cmp) ((STACK_OF(OSSL_DAY_TIME_BAND) *)OPENSSL_sk_new(ossl_check_OSSL_DAY_TIME_BAND_compfunc_type(cmp))) +#define sk_OSSL_DAY_TIME_BAND_new_null() ((STACK_OF(OSSL_DAY_TIME_BAND) *)OPENSSL_sk_new_null()) +#define sk_OSSL_DAY_TIME_BAND_new_reserve(cmp, n) ((STACK_OF(OSSL_DAY_TIME_BAND) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_DAY_TIME_BAND_compfunc_type(cmp), (n))) +#define sk_OSSL_DAY_TIME_BAND_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), (n)) +#define sk_OSSL_DAY_TIME_BAND_free(sk) OPENSSL_sk_free(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk)) +#define sk_OSSL_DAY_TIME_BAND_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk)) +#define sk_OSSL_DAY_TIME_BAND_delete(sk, i) ((OSSL_DAY_TIME_BAND *)OPENSSL_sk_delete(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), (i))) +#define sk_OSSL_DAY_TIME_BAND_delete_ptr(sk, ptr) ((OSSL_DAY_TIME_BAND *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr))) +#define sk_OSSL_DAY_TIME_BAND_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr)) +#define sk_OSSL_DAY_TIME_BAND_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr)) +#define sk_OSSL_DAY_TIME_BAND_pop(sk) ((OSSL_DAY_TIME_BAND *)OPENSSL_sk_pop(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk))) +#define sk_OSSL_DAY_TIME_BAND_shift(sk) ((OSSL_DAY_TIME_BAND *)OPENSSL_sk_shift(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk))) +#define sk_OSSL_DAY_TIME_BAND_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk),ossl_check_OSSL_DAY_TIME_BAND_freefunc_type(freefunc)) +#define sk_OSSL_DAY_TIME_BAND_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr), (idx)) +#define sk_OSSL_DAY_TIME_BAND_set(sk, idx, ptr) ((OSSL_DAY_TIME_BAND *)OPENSSL_sk_set(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), (idx), ossl_check_OSSL_DAY_TIME_BAND_type(ptr))) +#define sk_OSSL_DAY_TIME_BAND_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr)) +#define sk_OSSL_DAY_TIME_BAND_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr)) +#define sk_OSSL_DAY_TIME_BAND_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_type(ptr), pnum) +#define sk_OSSL_DAY_TIME_BAND_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk)) +#define sk_OSSL_DAY_TIME_BAND_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_DAY_TIME_BAND_sk_type(sk)) +#define sk_OSSL_DAY_TIME_BAND_dup(sk) ((STACK_OF(OSSL_DAY_TIME_BAND) *)OPENSSL_sk_dup(ossl_check_const_OSSL_DAY_TIME_BAND_sk_type(sk))) +#define sk_OSSL_DAY_TIME_BAND_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_DAY_TIME_BAND) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_copyfunc_type(copyfunc), ossl_check_OSSL_DAY_TIME_BAND_freefunc_type(freefunc))) +#define sk_OSSL_DAY_TIME_BAND_set_cmp_func(sk, cmp) ((sk_OSSL_DAY_TIME_BAND_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_DAY_TIME_BAND_sk_type(sk), ossl_check_OSSL_DAY_TIME_BAND_compfunc_type(cmp))) + + +/* Attribute Type and Value */ +typedef struct atav_st { + ASN1_OBJECT *type; + ASN1_TYPE *value; +} OSSL_ATAV; + +typedef struct ATTRIBUTE_TYPE_MAPPING_st { + ASN1_OBJECT *local; + ASN1_OBJECT *remote; +} OSSL_ATTRIBUTE_TYPE_MAPPING; + +typedef struct ATTRIBUTE_VALUE_MAPPING_st { + OSSL_ATAV *local; + OSSL_ATAV *remote; +} OSSL_ATTRIBUTE_VALUE_MAPPING; + +# define OSSL_ATTR_MAP_TYPE 0 +# define OSSL_ATTR_MAP_VALUE 1 + +typedef struct ATTRIBUTE_MAPPING_st { + int type; + union { + OSSL_ATTRIBUTE_TYPE_MAPPING *typeMappings; + OSSL_ATTRIBUTE_VALUE_MAPPING *typeValueMappings; + } choice; +} OSSL_ATTRIBUTE_MAPPING; + +typedef STACK_OF(OSSL_ATTRIBUTE_MAPPING) OSSL_ATTRIBUTE_MAPPINGS; +DECLARE_ASN1_FUNCTIONS(OSSL_ATAV) +DECLARE_ASN1_FUNCTIONS(OSSL_ATTRIBUTE_TYPE_MAPPING) +DECLARE_ASN1_FUNCTIONS(OSSL_ATTRIBUTE_VALUE_MAPPING) +DECLARE_ASN1_FUNCTIONS(OSSL_ATTRIBUTE_MAPPING) +DECLARE_ASN1_FUNCTIONS(OSSL_ATTRIBUTE_MAPPINGS) + +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_ATTRIBUTE_MAPPING, OSSL_ATTRIBUTE_MAPPING, OSSL_ATTRIBUTE_MAPPING) +#define sk_OSSL_ATTRIBUTE_MAPPING_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_ATTRIBUTE_MAPPING_sk_type(sk)) +#define sk_OSSL_ATTRIBUTE_MAPPING_value(sk, idx) ((OSSL_ATTRIBUTE_MAPPING *)OPENSSL_sk_value(ossl_check_const_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), (idx))) +#define sk_OSSL_ATTRIBUTE_MAPPING_new(cmp) ((STACK_OF(OSSL_ATTRIBUTE_MAPPING) *)OPENSSL_sk_new(ossl_check_OSSL_ATTRIBUTE_MAPPING_compfunc_type(cmp))) +#define sk_OSSL_ATTRIBUTE_MAPPING_new_null() ((STACK_OF(OSSL_ATTRIBUTE_MAPPING) *)OPENSSL_sk_new_null()) +#define sk_OSSL_ATTRIBUTE_MAPPING_new_reserve(cmp, n) ((STACK_OF(OSSL_ATTRIBUTE_MAPPING) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_ATTRIBUTE_MAPPING_compfunc_type(cmp), (n))) +#define sk_OSSL_ATTRIBUTE_MAPPING_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), (n)) +#define sk_OSSL_ATTRIBUTE_MAPPING_free(sk) OPENSSL_sk_free(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk)) +#define sk_OSSL_ATTRIBUTE_MAPPING_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk)) +#define sk_OSSL_ATTRIBUTE_MAPPING_delete(sk, i) ((OSSL_ATTRIBUTE_MAPPING *)OPENSSL_sk_delete(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), (i))) +#define sk_OSSL_ATTRIBUTE_MAPPING_delete_ptr(sk, ptr) ((OSSL_ATTRIBUTE_MAPPING *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr))) +#define sk_OSSL_ATTRIBUTE_MAPPING_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr)) +#define sk_OSSL_ATTRIBUTE_MAPPING_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr)) +#define sk_OSSL_ATTRIBUTE_MAPPING_pop(sk) ((OSSL_ATTRIBUTE_MAPPING *)OPENSSL_sk_pop(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk))) +#define sk_OSSL_ATTRIBUTE_MAPPING_shift(sk) ((OSSL_ATTRIBUTE_MAPPING *)OPENSSL_sk_shift(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk))) +#define sk_OSSL_ATTRIBUTE_MAPPING_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk),ossl_check_OSSL_ATTRIBUTE_MAPPING_freefunc_type(freefunc)) +#define sk_OSSL_ATTRIBUTE_MAPPING_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr), (idx)) +#define sk_OSSL_ATTRIBUTE_MAPPING_set(sk, idx, ptr) ((OSSL_ATTRIBUTE_MAPPING *)OPENSSL_sk_set(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), (idx), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr))) +#define sk_OSSL_ATTRIBUTE_MAPPING_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr)) +#define sk_OSSL_ATTRIBUTE_MAPPING_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr)) +#define sk_OSSL_ATTRIBUTE_MAPPING_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_type(ptr), pnum) +#define sk_OSSL_ATTRIBUTE_MAPPING_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk)) +#define sk_OSSL_ATTRIBUTE_MAPPING_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_ATTRIBUTE_MAPPING_sk_type(sk)) +#define sk_OSSL_ATTRIBUTE_MAPPING_dup(sk) ((STACK_OF(OSSL_ATTRIBUTE_MAPPING) *)OPENSSL_sk_dup(ossl_check_const_OSSL_ATTRIBUTE_MAPPING_sk_type(sk))) +#define sk_OSSL_ATTRIBUTE_MAPPING_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_ATTRIBUTE_MAPPING) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_copyfunc_type(copyfunc), ossl_check_OSSL_ATTRIBUTE_MAPPING_freefunc_type(freefunc))) +#define sk_OSSL_ATTRIBUTE_MAPPING_set_cmp_func(sk, cmp) ((sk_OSSL_ATTRIBUTE_MAPPING_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_ATTRIBUTE_MAPPING_sk_type(sk), ossl_check_OSSL_ATTRIBUTE_MAPPING_compfunc_type(cmp))) + + +# define OSSL_AAA_ATTRIBUTE_TYPE 0 +# define OSSL_AAA_ATTRIBUTE_VALUES 1 + +typedef struct ALLOWED_ATTRIBUTES_CHOICE_st { + int type; + union { + ASN1_OBJECT *attributeType; + X509_ATTRIBUTE *attributeTypeandValues; + } choice; +} OSSL_ALLOWED_ATTRIBUTES_CHOICE; + +typedef struct ALLOWED_ATTRIBUTES_ITEM_st { + STACK_OF(OSSL_ALLOWED_ATTRIBUTES_CHOICE) *attributes; + GENERAL_NAME *holderDomain; +} OSSL_ALLOWED_ATTRIBUTES_ITEM; + +typedef STACK_OF(OSSL_ALLOWED_ATTRIBUTES_ITEM) OSSL_ALLOWED_ATTRIBUTES_SYNTAX; + +DECLARE_ASN1_FUNCTIONS(OSSL_ALLOWED_ATTRIBUTES_CHOICE) +DECLARE_ASN1_FUNCTIONS(OSSL_ALLOWED_ATTRIBUTES_ITEM) +DECLARE_ASN1_FUNCTIONS(OSSL_ALLOWED_ATTRIBUTES_SYNTAX) + +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_ALLOWED_ATTRIBUTES_CHOICE, OSSL_ALLOWED_ATTRIBUTES_CHOICE, OSSL_ALLOWED_ATTRIBUTES_CHOICE) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_value(sk, idx) ((OSSL_ALLOWED_ATTRIBUTES_CHOICE *)OPENSSL_sk_value(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), (idx))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_new(cmp) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_CHOICE) *)OPENSSL_sk_new(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_compfunc_type(cmp))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_new_null() ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_CHOICE) *)OPENSSL_sk_new_null()) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_new_reserve(cmp, n) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_CHOICE) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_compfunc_type(cmp), (n))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), (n)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_free(sk) OPENSSL_sk_free(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_delete(sk, i) ((OSSL_ALLOWED_ATTRIBUTES_CHOICE *)OPENSSL_sk_delete(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), (i))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_delete_ptr(sk, ptr) ((OSSL_ALLOWED_ATTRIBUTES_CHOICE *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_pop(sk) ((OSSL_ALLOWED_ATTRIBUTES_CHOICE *)OPENSSL_sk_pop(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_shift(sk) ((OSSL_ALLOWED_ATTRIBUTES_CHOICE *)OPENSSL_sk_shift(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk),ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_freefunc_type(freefunc)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr), (idx)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_set(sk, idx, ptr) ((OSSL_ALLOWED_ATTRIBUTES_CHOICE *)OPENSSL_sk_set(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), (idx), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_type(ptr), pnum) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_dup(sk) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_CHOICE) *)OPENSSL_sk_dup(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_CHOICE) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_copyfunc_type(copyfunc), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_freefunc_type(freefunc))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_set_cmp_func(sk, cmp) ((sk_OSSL_ALLOWED_ATTRIBUTES_CHOICE_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_CHOICE_compfunc_type(cmp))) + + +SKM_DEFINE_STACK_OF_INTERNAL(OSSL_ALLOWED_ATTRIBUTES_ITEM, OSSL_ALLOWED_ATTRIBUTES_ITEM, OSSL_ALLOWED_ATTRIBUTES_ITEM) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_num(sk) OPENSSL_sk_num(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_value(sk, idx) ((OSSL_ALLOWED_ATTRIBUTES_ITEM *)OPENSSL_sk_value(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), (idx))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_new(cmp) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_ITEM) *)OPENSSL_sk_new(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_compfunc_type(cmp))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_new_null() ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_ITEM) *)OPENSSL_sk_new_null()) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_new_reserve(cmp, n) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_ITEM) *)OPENSSL_sk_new_reserve(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_compfunc_type(cmp), (n))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_reserve(sk, n) OPENSSL_sk_reserve(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), (n)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_free(sk) OPENSSL_sk_free(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_zero(sk) OPENSSL_sk_zero(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_delete(sk, i) ((OSSL_ALLOWED_ATTRIBUTES_ITEM *)OPENSSL_sk_delete(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), (i))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_delete_ptr(sk, ptr) ((OSSL_ALLOWED_ATTRIBUTES_ITEM *)OPENSSL_sk_delete_ptr(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_push(sk, ptr) OPENSSL_sk_push(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_unshift(sk, ptr) OPENSSL_sk_unshift(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_pop(sk) ((OSSL_ALLOWED_ATTRIBUTES_ITEM *)OPENSSL_sk_pop(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_shift(sk) ((OSSL_ALLOWED_ATTRIBUTES_ITEM *)OPENSSL_sk_shift(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_pop_free(sk, freefunc) OPENSSL_sk_pop_free(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk),ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_freefunc_type(freefunc)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_insert(sk, ptr, idx) OPENSSL_sk_insert(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr), (idx)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_set(sk, idx, ptr) ((OSSL_ALLOWED_ATTRIBUTES_ITEM *)OPENSSL_sk_set(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), (idx), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_find(sk, ptr) OPENSSL_sk_find(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_find_ex(sk, ptr) OPENSSL_sk_find_ex(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_find_all(sk, ptr, pnum) OPENSSL_sk_find_all(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_type(ptr), pnum) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_sort(sk) OPENSSL_sk_sort(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_is_sorted(sk) OPENSSL_sk_is_sorted(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk)) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_dup(sk) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_ITEM) *)OPENSSL_sk_dup(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_deep_copy(sk, copyfunc, freefunc) ((STACK_OF(OSSL_ALLOWED_ATTRIBUTES_ITEM) *)OPENSSL_sk_deep_copy(ossl_check_const_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_copyfunc_type(copyfunc), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_freefunc_type(freefunc))) +#define sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_set_cmp_func(sk, cmp) ((sk_OSSL_ALLOWED_ATTRIBUTES_ITEM_compfunc)OPENSSL_sk_set_cmp_func(ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_sk_type(sk), ossl_check_OSSL_ALLOWED_ATTRIBUTES_ITEM_compfunc_type(cmp))) + + +typedef struct AA_DIST_POINT_st { + DIST_POINT_NAME *distpoint; + ASN1_BIT_STRING *reasons; + int dp_reasons; + ASN1_BOOLEAN indirectCRL; + ASN1_BOOLEAN containsUserAttributeCerts; + ASN1_BOOLEAN containsAACerts; + ASN1_BOOLEAN containsSOAPublicKeyCerts; +} OSSL_AA_DIST_POINT; + +DECLARE_ASN1_FUNCTIONS(OSSL_AA_DIST_POINT) + +# ifdef __cplusplus +} +# endif +#endif diff --git a/contrib/openssl-cmake/common/include/prov/der_digests.h b/contrib/openssl-cmake/common/include/prov/der_digests.h new file mode 100644 index 000000000000..b184807c80ce --- /dev/null +++ b/contrib/openssl-cmake/common/include/prov/der_digests.h @@ -0,0 +1,160 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_digests.h.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "internal/der.h" + +/* Well known OIDs precompiled */ + +/* + * sigAlgs OBJECT IDENTIFIER ::= { nistAlgorithms 3 } + */ +#define DER_OID_V_sigAlgs DER_P_OBJECT, 8, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03 +#define DER_OID_SZ_sigAlgs 10 +extern const unsigned char ossl_der_oid_sigAlgs[DER_OID_SZ_sigAlgs]; + +/* + * id-sha1 OBJECT IDENTIFIER ::= { iso(1) + * identified-organization(3) oiw(14) + * secsig(3) algorithms(2) 26 } + */ +#define DER_OID_V_id_sha1 DER_P_OBJECT, 5, 0x2B, 0x0E, 0x03, 0x02, 0x1A +#define DER_OID_SZ_id_sha1 7 +extern const unsigned char ossl_der_oid_id_sha1[DER_OID_SZ_id_sha1]; + +/* + * id-md2 OBJECT IDENTIFIER ::= { + * iso(1) member-body(2) us(840) rsadsi(113549) digestAlgorithm(2) 2 } + */ +#define DER_OID_V_id_md2 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x02, 0x02 +#define DER_OID_SZ_id_md2 10 +extern const unsigned char ossl_der_oid_id_md2[DER_OID_SZ_id_md2]; + +/* + * id-md5 OBJECT IDENTIFIER ::= { + * iso(1) member-body(2) us(840) rsadsi(113549) digestAlgorithm(2) 5 } + */ +#define DER_OID_V_id_md5 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x02, 0x05 +#define DER_OID_SZ_id_md5 10 +extern const unsigned char ossl_der_oid_id_md5[DER_OID_SZ_id_md5]; + +/* + * id-sha256 OBJECT IDENTIFIER ::= { hashAlgs 1 } + */ +#define DER_OID_V_id_sha256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01 +#define DER_OID_SZ_id_sha256 11 +extern const unsigned char ossl_der_oid_id_sha256[DER_OID_SZ_id_sha256]; + +/* + * id-sha384 OBJECT IDENTIFIER ::= { hashAlgs 2 } + */ +#define DER_OID_V_id_sha384 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02 +#define DER_OID_SZ_id_sha384 11 +extern const unsigned char ossl_der_oid_id_sha384[DER_OID_SZ_id_sha384]; + +/* + * id-sha512 OBJECT IDENTIFIER ::= { hashAlgs 3 } + */ +#define DER_OID_V_id_sha512 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03 +#define DER_OID_SZ_id_sha512 11 +extern const unsigned char ossl_der_oid_id_sha512[DER_OID_SZ_id_sha512]; + +/* + * id-sha224 OBJECT IDENTIFIER ::= { hashAlgs 4 } + */ +#define DER_OID_V_id_sha224 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04 +#define DER_OID_SZ_id_sha224 11 +extern const unsigned char ossl_der_oid_id_sha224[DER_OID_SZ_id_sha224]; + +/* + * id-sha512-224 OBJECT IDENTIFIER ::= { hashAlgs 5 } + */ +#define DER_OID_V_id_sha512_224 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x05 +#define DER_OID_SZ_id_sha512_224 11 +extern const unsigned char ossl_der_oid_id_sha512_224[DER_OID_SZ_id_sha512_224]; + +/* + * id-sha512-256 OBJECT IDENTIFIER ::= { hashAlgs 6 } + */ +#define DER_OID_V_id_sha512_256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x06 +#define DER_OID_SZ_id_sha512_256 11 +extern const unsigned char ossl_der_oid_id_sha512_256[DER_OID_SZ_id_sha512_256]; + +/* + * id-sha3-224 OBJECT IDENTIFIER ::= { hashAlgs 7 } + */ +#define DER_OID_V_id_sha3_224 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x07 +#define DER_OID_SZ_id_sha3_224 11 +extern const unsigned char ossl_der_oid_id_sha3_224[DER_OID_SZ_id_sha3_224]; + +/* + * id-sha3-256 OBJECT IDENTIFIER ::= { hashAlgs 8 } + */ +#define DER_OID_V_id_sha3_256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x08 +#define DER_OID_SZ_id_sha3_256 11 +extern const unsigned char ossl_der_oid_id_sha3_256[DER_OID_SZ_id_sha3_256]; + +/* + * id-sha3-384 OBJECT IDENTIFIER ::= { hashAlgs 9 } + */ +#define DER_OID_V_id_sha3_384 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x09 +#define DER_OID_SZ_id_sha3_384 11 +extern const unsigned char ossl_der_oid_id_sha3_384[DER_OID_SZ_id_sha3_384]; + +/* + * id-sha3-512 OBJECT IDENTIFIER ::= { hashAlgs 10 } + */ +#define DER_OID_V_id_sha3_512 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x0A +#define DER_OID_SZ_id_sha3_512 11 +extern const unsigned char ossl_der_oid_id_sha3_512[DER_OID_SZ_id_sha3_512]; + +/* + * id-shake128 OBJECT IDENTIFIER ::= { hashAlgs 11 } + */ +#define DER_OID_V_id_shake128 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x0B +#define DER_OID_SZ_id_shake128 11 +extern const unsigned char ossl_der_oid_id_shake128[DER_OID_SZ_id_shake128]; + +/* + * id-shake256 OBJECT IDENTIFIER ::= { hashAlgs 12 } + */ +#define DER_OID_V_id_shake256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x0C +#define DER_OID_SZ_id_shake256 11 +extern const unsigned char ossl_der_oid_id_shake256[DER_OID_SZ_id_shake256]; + +/* + * id-shake128-len OBJECT IDENTIFIER ::= { hashAlgs 17 } + */ +#define DER_OID_V_id_shake128_len DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x11 +#define DER_OID_SZ_id_shake128_len 11 +extern const unsigned char ossl_der_oid_id_shake128_len[DER_OID_SZ_id_shake128_len]; + +/* + * id-shake256-len OBJECT IDENTIFIER ::= { hashAlgs 18 } + */ +#define DER_OID_V_id_shake256_len DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x12 +#define DER_OID_SZ_id_shake256_len 11 +extern const unsigned char ossl_der_oid_id_shake256_len[DER_OID_SZ_id_shake256_len]; + +/* + * id-KMACWithSHAKE128 OBJECT IDENTIFIER ::={hashAlgs 19} + */ +#define DER_OID_V_id_KMACWithSHAKE128 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x13 +#define DER_OID_SZ_id_KMACWithSHAKE128 11 +extern const unsigned char ossl_der_oid_id_KMACWithSHAKE128[DER_OID_SZ_id_KMACWithSHAKE128]; + +/* + * id-KMACWithSHAKE256 OBJECT IDENTIFIER ::={ hashAlgs 20} + */ +#define DER_OID_V_id_KMACWithSHAKE256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x14 +#define DER_OID_SZ_id_KMACWithSHAKE256 11 +extern const unsigned char ossl_der_oid_id_KMACWithSHAKE256[DER_OID_SZ_id_KMACWithSHAKE256]; + diff --git a/contrib/openssl-cmake/common/include/prov/der_dsa.h b/contrib/openssl-cmake/common/include/prov/der_dsa.h new file mode 100644 index 000000000000..b12a56282b25 --- /dev/null +++ b/contrib/openssl-cmake/common/include/prov/der_dsa.h @@ -0,0 +1,94 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_dsa.h.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "internal/der.h" + +/* Well known OIDs precompiled */ + +/* + * id-dsa OBJECT IDENTIFIER ::= { + * iso(1) member-body(2) us(840) x9-57(10040) x9algorithm(4) 1 } + */ +#define DER_OID_V_id_dsa DER_P_OBJECT, 7, 0x2A, 0x86, 0x48, 0xCE, 0x38, 0x04, 0x01 +#define DER_OID_SZ_id_dsa 9 +extern const unsigned char ossl_der_oid_id_dsa[DER_OID_SZ_id_dsa]; + +/* + * id-dsa-with-sha1 OBJECT IDENTIFIER ::= { + * iso(1) member-body(2) us(840) x9-57 (10040) x9algorithm(4) 3 } + */ +#define DER_OID_V_id_dsa_with_sha1 DER_P_OBJECT, 7, 0x2A, 0x86, 0x48, 0xCE, 0x38, 0x04, 0x03 +#define DER_OID_SZ_id_dsa_with_sha1 9 +extern const unsigned char ossl_der_oid_id_dsa_with_sha1[DER_OID_SZ_id_dsa_with_sha1]; + +/* + * id-dsa-with-sha224 OBJECT IDENTIFIER ::= { sigAlgs 1 } + */ +#define DER_OID_V_id_dsa_with_sha224 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x01 +#define DER_OID_SZ_id_dsa_with_sha224 11 +extern const unsigned char ossl_der_oid_id_dsa_with_sha224[DER_OID_SZ_id_dsa_with_sha224]; + +/* + * id-dsa-with-sha256 OBJECT IDENTIFIER ::= { sigAlgs 2 } + */ +#define DER_OID_V_id_dsa_with_sha256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x02 +#define DER_OID_SZ_id_dsa_with_sha256 11 +extern const unsigned char ossl_der_oid_id_dsa_with_sha256[DER_OID_SZ_id_dsa_with_sha256]; + +/* + * id-dsa-with-sha384 OBJECT IDENTIFIER ::= { sigAlgs 3 } + */ +#define DER_OID_V_id_dsa_with_sha384 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x03 +#define DER_OID_SZ_id_dsa_with_sha384 11 +extern const unsigned char ossl_der_oid_id_dsa_with_sha384[DER_OID_SZ_id_dsa_with_sha384]; + +/* + * id-dsa-with-sha512 OBJECT IDENTIFIER ::= { sigAlgs 4 } + */ +#define DER_OID_V_id_dsa_with_sha512 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x04 +#define DER_OID_SZ_id_dsa_with_sha512 11 +extern const unsigned char ossl_der_oid_id_dsa_with_sha512[DER_OID_SZ_id_dsa_with_sha512]; + +/* + * id-dsa-with-sha3-224 OBJECT IDENTIFIER ::= { sigAlgs 5 } + */ +#define DER_OID_V_id_dsa_with_sha3_224 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x05 +#define DER_OID_SZ_id_dsa_with_sha3_224 11 +extern const unsigned char ossl_der_oid_id_dsa_with_sha3_224[DER_OID_SZ_id_dsa_with_sha3_224]; + +/* + * id-dsa-with-sha3-256 OBJECT IDENTIFIER ::= { sigAlgs 6 } + */ +#define DER_OID_V_id_dsa_with_sha3_256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x06 +#define DER_OID_SZ_id_dsa_with_sha3_256 11 +extern const unsigned char ossl_der_oid_id_dsa_with_sha3_256[DER_OID_SZ_id_dsa_with_sha3_256]; + +/* + * id-dsa-with-sha3-384 OBJECT IDENTIFIER ::= { sigAlgs 7 } + */ +#define DER_OID_V_id_dsa_with_sha3_384 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x07 +#define DER_OID_SZ_id_dsa_with_sha3_384 11 +extern const unsigned char ossl_der_oid_id_dsa_with_sha3_384[DER_OID_SZ_id_dsa_with_sha3_384]; + +/* + * id-dsa-with-sha3-512 OBJECT IDENTIFIER ::= { sigAlgs 8 } + */ +#define DER_OID_V_id_dsa_with_sha3_512 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x08 +#define DER_OID_SZ_id_dsa_with_sha3_512 11 +extern const unsigned char ossl_der_oid_id_dsa_with_sha3_512[DER_OID_SZ_id_dsa_with_sha3_512]; + + +/* Subject Public Key Info */ +int ossl_DER_w_algorithmIdentifier_DSA(WPACKET *pkt, int tag, DSA *dsa); +/* Signature */ +int ossl_DER_w_algorithmIdentifier_DSA_with_MD(WPACKET *pkt, int tag, + DSA *dsa, int mdnid); diff --git a/contrib/openssl-cmake/common/include/prov/der_ec.h b/contrib/openssl-cmake/common/include/prov/der_ec.h new file mode 100644 index 000000000000..dd697771f711 --- /dev/null +++ b/contrib/openssl-cmake/common/include/prov/der_ec.h @@ -0,0 +1,286 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_ec.h.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "crypto/ec.h" +#include "internal/der.h" + +/* Well known OIDs precompiled */ + +/* + * ecdsa-with-SHA1 OBJECT IDENTIFIER ::= { id-ecSigType 1 } + */ +#define DER_OID_V_ecdsa_with_SHA1 DER_P_OBJECT, 7, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x01 +#define DER_OID_SZ_ecdsa_with_SHA1 9 +extern const unsigned char ossl_der_oid_ecdsa_with_SHA1[DER_OID_SZ_ecdsa_with_SHA1]; + +/* + * id-ecPublicKey OBJECT IDENTIFIER ::= { id-publicKeyType 1 } + */ +#define DER_OID_V_id_ecPublicKey DER_P_OBJECT, 7, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x02, 0x01 +#define DER_OID_SZ_id_ecPublicKey 9 +extern const unsigned char ossl_der_oid_id_ecPublicKey[DER_OID_SZ_id_ecPublicKey]; + +/* + * c2pnb163v1 OBJECT IDENTIFIER ::= { c-TwoCurve 1 } + */ +#define DER_OID_V_c2pnb163v1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x01 +#define DER_OID_SZ_c2pnb163v1 10 +extern const unsigned char ossl_der_oid_c2pnb163v1[DER_OID_SZ_c2pnb163v1]; + +/* + * c2pnb163v2 OBJECT IDENTIFIER ::= { c-TwoCurve 2 } + */ +#define DER_OID_V_c2pnb163v2 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x02 +#define DER_OID_SZ_c2pnb163v2 10 +extern const unsigned char ossl_der_oid_c2pnb163v2[DER_OID_SZ_c2pnb163v2]; + +/* + * c2pnb163v3 OBJECT IDENTIFIER ::= { c-TwoCurve 3 } + */ +#define DER_OID_V_c2pnb163v3 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x03 +#define DER_OID_SZ_c2pnb163v3 10 +extern const unsigned char ossl_der_oid_c2pnb163v3[DER_OID_SZ_c2pnb163v3]; + +/* + * c2pnb176w1 OBJECT IDENTIFIER ::= { c-TwoCurve 4 } + */ +#define DER_OID_V_c2pnb176w1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x04 +#define DER_OID_SZ_c2pnb176w1 10 +extern const unsigned char ossl_der_oid_c2pnb176w1[DER_OID_SZ_c2pnb176w1]; + +/* + * c2tnb191v1 OBJECT IDENTIFIER ::= { c-TwoCurve 5 } + */ +#define DER_OID_V_c2tnb191v1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x05 +#define DER_OID_SZ_c2tnb191v1 10 +extern const unsigned char ossl_der_oid_c2tnb191v1[DER_OID_SZ_c2tnb191v1]; + +/* + * c2tnb191v2 OBJECT IDENTIFIER ::= { c-TwoCurve 6 } + */ +#define DER_OID_V_c2tnb191v2 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x06 +#define DER_OID_SZ_c2tnb191v2 10 +extern const unsigned char ossl_der_oid_c2tnb191v2[DER_OID_SZ_c2tnb191v2]; + +/* + * c2tnb191v3 OBJECT IDENTIFIER ::= { c-TwoCurve 7 } + */ +#define DER_OID_V_c2tnb191v3 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x07 +#define DER_OID_SZ_c2tnb191v3 10 +extern const unsigned char ossl_der_oid_c2tnb191v3[DER_OID_SZ_c2tnb191v3]; + +/* + * c2onb191v4 OBJECT IDENTIFIER ::= { c-TwoCurve 8 } + */ +#define DER_OID_V_c2onb191v4 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x08 +#define DER_OID_SZ_c2onb191v4 10 +extern const unsigned char ossl_der_oid_c2onb191v4[DER_OID_SZ_c2onb191v4]; + +/* + * c2onb191v5 OBJECT IDENTIFIER ::= { c-TwoCurve 9 } + */ +#define DER_OID_V_c2onb191v5 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x09 +#define DER_OID_SZ_c2onb191v5 10 +extern const unsigned char ossl_der_oid_c2onb191v5[DER_OID_SZ_c2onb191v5]; + +/* + * c2pnb208w1 OBJECT IDENTIFIER ::= { c-TwoCurve 10 } + */ +#define DER_OID_V_c2pnb208w1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x0A +#define DER_OID_SZ_c2pnb208w1 10 +extern const unsigned char ossl_der_oid_c2pnb208w1[DER_OID_SZ_c2pnb208w1]; + +/* + * c2tnb239v1 OBJECT IDENTIFIER ::= { c-TwoCurve 11 } + */ +#define DER_OID_V_c2tnb239v1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x0B +#define DER_OID_SZ_c2tnb239v1 10 +extern const unsigned char ossl_der_oid_c2tnb239v1[DER_OID_SZ_c2tnb239v1]; + +/* + * c2tnb239v2 OBJECT IDENTIFIER ::= { c-TwoCurve 12 } + */ +#define DER_OID_V_c2tnb239v2 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x0C +#define DER_OID_SZ_c2tnb239v2 10 +extern const unsigned char ossl_der_oid_c2tnb239v2[DER_OID_SZ_c2tnb239v2]; + +/* + * c2tnb239v3 OBJECT IDENTIFIER ::= { c-TwoCurve 13 } + */ +#define DER_OID_V_c2tnb239v3 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x0D +#define DER_OID_SZ_c2tnb239v3 10 +extern const unsigned char ossl_der_oid_c2tnb239v3[DER_OID_SZ_c2tnb239v3]; + +/* + * c2onb239v4 OBJECT IDENTIFIER ::= { c-TwoCurve 14 } + */ +#define DER_OID_V_c2onb239v4 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x0E +#define DER_OID_SZ_c2onb239v4 10 +extern const unsigned char ossl_der_oid_c2onb239v4[DER_OID_SZ_c2onb239v4]; + +/* + * c2onb239v5 OBJECT IDENTIFIER ::= { c-TwoCurve 15 } + */ +#define DER_OID_V_c2onb239v5 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x0F +#define DER_OID_SZ_c2onb239v5 10 +extern const unsigned char ossl_der_oid_c2onb239v5[DER_OID_SZ_c2onb239v5]; + +/* + * c2pnb272w1 OBJECT IDENTIFIER ::= { c-TwoCurve 16 } + */ +#define DER_OID_V_c2pnb272w1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x10 +#define DER_OID_SZ_c2pnb272w1 10 +extern const unsigned char ossl_der_oid_c2pnb272w1[DER_OID_SZ_c2pnb272w1]; + +/* + * c2pnb304w1 OBJECT IDENTIFIER ::= { c-TwoCurve 17 } + */ +#define DER_OID_V_c2pnb304w1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x11 +#define DER_OID_SZ_c2pnb304w1 10 +extern const unsigned char ossl_der_oid_c2pnb304w1[DER_OID_SZ_c2pnb304w1]; + +/* + * c2tnb359v1 OBJECT IDENTIFIER ::= { c-TwoCurve 18 } + */ +#define DER_OID_V_c2tnb359v1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x12 +#define DER_OID_SZ_c2tnb359v1 10 +extern const unsigned char ossl_der_oid_c2tnb359v1[DER_OID_SZ_c2tnb359v1]; + +/* + * c2pnb368w1 OBJECT IDENTIFIER ::= { c-TwoCurve 19 } + */ +#define DER_OID_V_c2pnb368w1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x13 +#define DER_OID_SZ_c2pnb368w1 10 +extern const unsigned char ossl_der_oid_c2pnb368w1[DER_OID_SZ_c2pnb368w1]; + +/* + * c2tnb431r1 OBJECT IDENTIFIER ::= { c-TwoCurve 20 } + */ +#define DER_OID_V_c2tnb431r1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x00, 0x14 +#define DER_OID_SZ_c2tnb431r1 10 +extern const unsigned char ossl_der_oid_c2tnb431r1[DER_OID_SZ_c2tnb431r1]; + +/* + * prime192v1 OBJECT IDENTIFIER ::= { primeCurve 1 } + */ +#define DER_OID_V_prime192v1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x01 +#define DER_OID_SZ_prime192v1 10 +extern const unsigned char ossl_der_oid_prime192v1[DER_OID_SZ_prime192v1]; + +/* + * prime192v2 OBJECT IDENTIFIER ::= { primeCurve 2 } + */ +#define DER_OID_V_prime192v2 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x02 +#define DER_OID_SZ_prime192v2 10 +extern const unsigned char ossl_der_oid_prime192v2[DER_OID_SZ_prime192v2]; + +/* + * prime192v3 OBJECT IDENTIFIER ::= { primeCurve 3 } + */ +#define DER_OID_V_prime192v3 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x03 +#define DER_OID_SZ_prime192v3 10 +extern const unsigned char ossl_der_oid_prime192v3[DER_OID_SZ_prime192v3]; + +/* + * prime239v1 OBJECT IDENTIFIER ::= { primeCurve 4 } + */ +#define DER_OID_V_prime239v1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x04 +#define DER_OID_SZ_prime239v1 10 +extern const unsigned char ossl_der_oid_prime239v1[DER_OID_SZ_prime239v1]; + +/* + * prime239v2 OBJECT IDENTIFIER ::= { primeCurve 5 } + */ +#define DER_OID_V_prime239v2 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x05 +#define DER_OID_SZ_prime239v2 10 +extern const unsigned char ossl_der_oid_prime239v2[DER_OID_SZ_prime239v2]; + +/* + * prime239v3 OBJECT IDENTIFIER ::= { primeCurve 6 } + */ +#define DER_OID_V_prime239v3 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x06 +#define DER_OID_SZ_prime239v3 10 +extern const unsigned char ossl_der_oid_prime239v3[DER_OID_SZ_prime239v3]; + +/* + * prime256v1 OBJECT IDENTIFIER ::= { primeCurve 7 } + */ +#define DER_OID_V_prime256v1 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x03, 0x01, 0x07 +#define DER_OID_SZ_prime256v1 10 +extern const unsigned char ossl_der_oid_prime256v1[DER_OID_SZ_prime256v1]; + +/* + * ecdsa-with-SHA224 OBJECT IDENTIFIER ::= { iso(1) member-body(2) + * us(840) ansi-X9-62(10045) signatures(4) ecdsa-with-SHA2(3) 1 } + */ +#define DER_OID_V_ecdsa_with_SHA224 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x01 +#define DER_OID_SZ_ecdsa_with_SHA224 10 +extern const unsigned char ossl_der_oid_ecdsa_with_SHA224[DER_OID_SZ_ecdsa_with_SHA224]; + +/* + * ecdsa-with-SHA256 OBJECT IDENTIFIER ::= { iso(1) member-body(2) + * us(840) ansi-X9-62(10045) signatures(4) ecdsa-with-SHA2(3) 2 } + */ +#define DER_OID_V_ecdsa_with_SHA256 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x02 +#define DER_OID_SZ_ecdsa_with_SHA256 10 +extern const unsigned char ossl_der_oid_ecdsa_with_SHA256[DER_OID_SZ_ecdsa_with_SHA256]; + +/* + * ecdsa-with-SHA384 OBJECT IDENTIFIER ::= { iso(1) member-body(2) + * us(840) ansi-X9-62(10045) signatures(4) ecdsa-with-SHA2(3) 3 } + */ +#define DER_OID_V_ecdsa_with_SHA384 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x03 +#define DER_OID_SZ_ecdsa_with_SHA384 10 +extern const unsigned char ossl_der_oid_ecdsa_with_SHA384[DER_OID_SZ_ecdsa_with_SHA384]; + +/* + * ecdsa-with-SHA512 OBJECT IDENTIFIER ::= { iso(1) member-body(2) + * us(840) ansi-X9-62(10045) signatures(4) ecdsa-with-SHA2(3) 4 } + */ +#define DER_OID_V_ecdsa_with_SHA512 DER_P_OBJECT, 8, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x04 +#define DER_OID_SZ_ecdsa_with_SHA512 10 +extern const unsigned char ossl_der_oid_ecdsa_with_SHA512[DER_OID_SZ_ecdsa_with_SHA512]; + +/* + * id-ecdsa-with-sha3-224 OBJECT IDENTIFIER ::= { sigAlgs 9 } + */ +#define DER_OID_V_id_ecdsa_with_sha3_224 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x09 +#define DER_OID_SZ_id_ecdsa_with_sha3_224 11 +extern const unsigned char ossl_der_oid_id_ecdsa_with_sha3_224[DER_OID_SZ_id_ecdsa_with_sha3_224]; + +/* + * id-ecdsa-with-sha3-256 OBJECT IDENTIFIER ::= { sigAlgs 10 } + */ +#define DER_OID_V_id_ecdsa_with_sha3_256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x0A +#define DER_OID_SZ_id_ecdsa_with_sha3_256 11 +extern const unsigned char ossl_der_oid_id_ecdsa_with_sha3_256[DER_OID_SZ_id_ecdsa_with_sha3_256]; + +/* + * id-ecdsa-with-sha3-384 OBJECT IDENTIFIER ::= { sigAlgs 11 } + */ +#define DER_OID_V_id_ecdsa_with_sha3_384 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x0B +#define DER_OID_SZ_id_ecdsa_with_sha3_384 11 +extern const unsigned char ossl_der_oid_id_ecdsa_with_sha3_384[DER_OID_SZ_id_ecdsa_with_sha3_384]; + +/* + * id-ecdsa-with-sha3-512 OBJECT IDENTIFIER ::= { sigAlgs 12 } + */ +#define DER_OID_V_id_ecdsa_with_sha3_512 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x0C +#define DER_OID_SZ_id_ecdsa_with_sha3_512 11 +extern const unsigned char ossl_der_oid_id_ecdsa_with_sha3_512[DER_OID_SZ_id_ecdsa_with_sha3_512]; + + +/* Subject Public Key Info */ +int ossl_DER_w_algorithmIdentifier_EC(WPACKET *pkt, int cont, EC_KEY *ec); +/* Signature */ +int ossl_DER_w_algorithmIdentifier_ECDSA_with_MD(WPACKET *pkt, int cont, + EC_KEY *ec, int mdnid); diff --git a/contrib/openssl-cmake/common/include/prov/der_ecx.h b/contrib/openssl-cmake/common/include/prov/der_ecx.h new file mode 100644 index 000000000000..fc85738055b5 --- /dev/null +++ b/contrib/openssl-cmake/common/include/prov/der_ecx.h @@ -0,0 +1,50 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_ecx.h.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "internal/der.h" +#include "crypto/ecx.h" + +/* Well known OIDs precompiled */ + +/* + * id-X25519 OBJECT IDENTIFIER ::= { id-edwards-curve-algs 110 } + */ +#define DER_OID_V_id_X25519 DER_P_OBJECT, 3, 0x2B, 0x65, 0x6E +#define DER_OID_SZ_id_X25519 5 +extern const unsigned char ossl_der_oid_id_X25519[DER_OID_SZ_id_X25519]; + +/* + * id-X448 OBJECT IDENTIFIER ::= { id-edwards-curve-algs 111 } + */ +#define DER_OID_V_id_X448 DER_P_OBJECT, 3, 0x2B, 0x65, 0x6F +#define DER_OID_SZ_id_X448 5 +extern const unsigned char ossl_der_oid_id_X448[DER_OID_SZ_id_X448]; + +/* + * id-Ed25519 OBJECT IDENTIFIER ::= { id-edwards-curve-algs 112 } + */ +#define DER_OID_V_id_Ed25519 DER_P_OBJECT, 3, 0x2B, 0x65, 0x70 +#define DER_OID_SZ_id_Ed25519 5 +extern const unsigned char ossl_der_oid_id_Ed25519[DER_OID_SZ_id_Ed25519]; + +/* + * id-Ed448 OBJECT IDENTIFIER ::= { id-edwards-curve-algs 113 } + */ +#define DER_OID_V_id_Ed448 DER_P_OBJECT, 3, 0x2B, 0x65, 0x71 +#define DER_OID_SZ_id_Ed448 5 +extern const unsigned char ossl_der_oid_id_Ed448[DER_OID_SZ_id_Ed448]; + + +int ossl_DER_w_algorithmIdentifier_ED25519(WPACKET *pkt, int cont, ECX_KEY *ec); +int ossl_DER_w_algorithmIdentifier_ED448(WPACKET *pkt, int cont, ECX_KEY *ec); +int ossl_DER_w_algorithmIdentifier_X25519(WPACKET *pkt, int cont, ECX_KEY *ec); +int ossl_DER_w_algorithmIdentifier_X448(WPACKET *pkt, int cont, ECX_KEY *ec); diff --git a/contrib/openssl-cmake/common/include/prov/der_ml_dsa.h b/contrib/openssl-cmake/common/include/prov/der_ml_dsa.h new file mode 100644 index 000000000000..c55f780ab452 --- /dev/null +++ b/contrib/openssl-cmake/common/include/prov/der_ml_dsa.h @@ -0,0 +1,40 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_ml_dsa.h.in + * + * Copyright 2025 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "internal/der.h" +#include "crypto/ml_dsa.h" + +/* Well known OIDs precompiled */ + +/* + * id-ml-dsa-44 OBJECT IDENTIFIER ::= { sigAlgs 17 } + */ +#define DER_OID_V_id_ml_dsa_44 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x11 +#define DER_OID_SZ_id_ml_dsa_44 11 +extern const unsigned char ossl_der_oid_id_ml_dsa_44[DER_OID_SZ_id_ml_dsa_44]; + +/* + * id-ml-dsa-65 OBJECT IDENTIFIER ::= { sigAlgs 18 } + */ +#define DER_OID_V_id_ml_dsa_65 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x12 +#define DER_OID_SZ_id_ml_dsa_65 11 +extern const unsigned char ossl_der_oid_id_ml_dsa_65[DER_OID_SZ_id_ml_dsa_65]; + +/* + * id-ml-dsa-87 OBJECT IDENTIFIER ::= { sigAlgs 19 } + */ +#define DER_OID_V_id_ml_dsa_87 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x13 +#define DER_OID_SZ_id_ml_dsa_87 11 +extern const unsigned char ossl_der_oid_id_ml_dsa_87[DER_OID_SZ_id_ml_dsa_87]; + + +int ossl_DER_w_algorithmIdentifier_ML_DSA(WPACKET *pkt, int tag, ML_DSA_KEY *key); diff --git a/contrib/openssl-cmake/common/include/prov/der_rsa.h b/contrib/openssl-cmake/common/include/prov/der_rsa.h new file mode 100644 index 000000000000..5ec3c515a1bd --- /dev/null +++ b/contrib/openssl-cmake/common/include/prov/der_rsa.h @@ -0,0 +1,187 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_rsa.h.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "crypto/rsa.h" +#include "internal/der.h" + +/* Well known OIDs precompiled */ + +/* + * hashAlgs OBJECT IDENTIFIER ::= { nistAlgorithms 2 } + */ +#define DER_OID_V_hashAlgs DER_P_OBJECT, 8, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02 +#define DER_OID_SZ_hashAlgs 10 +extern const unsigned char ossl_der_oid_hashAlgs[DER_OID_SZ_hashAlgs]; + +/* + * rsaEncryption OBJECT IDENTIFIER ::= { pkcs-1 1 } + */ +#define DER_OID_V_rsaEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01 +#define DER_OID_SZ_rsaEncryption 11 +extern const unsigned char ossl_der_oid_rsaEncryption[DER_OID_SZ_rsaEncryption]; + +/* + * id-RSAES-OAEP OBJECT IDENTIFIER ::= { pkcs-1 7 } + */ +#define DER_OID_V_id_RSAES_OAEP DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x07 +#define DER_OID_SZ_id_RSAES_OAEP 11 +extern const unsigned char ossl_der_oid_id_RSAES_OAEP[DER_OID_SZ_id_RSAES_OAEP]; + +/* + * id-pSpecified OBJECT IDENTIFIER ::= { pkcs-1 9 } + */ +#define DER_OID_V_id_pSpecified DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x09 +#define DER_OID_SZ_id_pSpecified 11 +extern const unsigned char ossl_der_oid_id_pSpecified[DER_OID_SZ_id_pSpecified]; + +/* + * id-RSASSA-PSS OBJECT IDENTIFIER ::= { pkcs-1 10 } + */ +#define DER_OID_V_id_RSASSA_PSS DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0A +#define DER_OID_SZ_id_RSASSA_PSS 11 +extern const unsigned char ossl_der_oid_id_RSASSA_PSS[DER_OID_SZ_id_RSASSA_PSS]; + +/* + * md2WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 2 } + */ +#define DER_OID_V_md2WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x02 +#define DER_OID_SZ_md2WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_md2WithRSAEncryption[DER_OID_SZ_md2WithRSAEncryption]; + +/* + * md5WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 4 } + */ +#define DER_OID_V_md5WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x04 +#define DER_OID_SZ_md5WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_md5WithRSAEncryption[DER_OID_SZ_md5WithRSAEncryption]; + +/* + * sha1WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 5 } + */ +#define DER_OID_V_sha1WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x05 +#define DER_OID_SZ_sha1WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_sha1WithRSAEncryption[DER_OID_SZ_sha1WithRSAEncryption]; + +/* + * sha224WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 14 } + */ +#define DER_OID_V_sha224WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0E +#define DER_OID_SZ_sha224WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_sha224WithRSAEncryption[DER_OID_SZ_sha224WithRSAEncryption]; + +/* + * sha256WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 11 } + */ +#define DER_OID_V_sha256WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0B +#define DER_OID_SZ_sha256WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_sha256WithRSAEncryption[DER_OID_SZ_sha256WithRSAEncryption]; + +/* + * sha384WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 12 } + */ +#define DER_OID_V_sha384WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0C +#define DER_OID_SZ_sha384WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_sha384WithRSAEncryption[DER_OID_SZ_sha384WithRSAEncryption]; + +/* + * sha512WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 13 } + */ +#define DER_OID_V_sha512WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0D +#define DER_OID_SZ_sha512WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_sha512WithRSAEncryption[DER_OID_SZ_sha512WithRSAEncryption]; + +/* + * sha512-224WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 15 } + */ +#define DER_OID_V_sha512_224WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0F +#define DER_OID_SZ_sha512_224WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_sha512_224WithRSAEncryption[DER_OID_SZ_sha512_224WithRSAEncryption]; + +/* + * sha512-256WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 16 } + */ +#define DER_OID_V_sha512_256WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x10 +#define DER_OID_SZ_sha512_256WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_sha512_256WithRSAEncryption[DER_OID_SZ_sha512_256WithRSAEncryption]; + +/* + * id-mgf1 OBJECT IDENTIFIER ::= { pkcs-1 8 } + */ +#define DER_OID_V_id_mgf1 DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x08 +#define DER_OID_SZ_id_mgf1 11 +extern const unsigned char ossl_der_oid_id_mgf1[DER_OID_SZ_id_mgf1]; + +/* + * id-rsassa-pkcs1-v1_5-with-sha3-224 OBJECT IDENTIFIER ::= { sigAlgs 13 } + */ +#define DER_OID_V_id_rsassa_pkcs1_v1_5_with_sha3_224 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x0D +#define DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_224 11 +extern const unsigned char ossl_der_oid_id_rsassa_pkcs1_v1_5_with_sha3_224[DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_224]; + +/* + * id-rsassa-pkcs1-v1_5-with-sha3-256 OBJECT IDENTIFIER ::= { sigAlgs 14 } + */ +#define DER_OID_V_id_rsassa_pkcs1_v1_5_with_sha3_256 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x0E +#define DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_256 11 +extern const unsigned char ossl_der_oid_id_rsassa_pkcs1_v1_5_with_sha3_256[DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_256]; + +/* + * id-rsassa-pkcs1-v1_5-with-sha3-384 OBJECT IDENTIFIER ::= { sigAlgs 15 } + */ +#define DER_OID_V_id_rsassa_pkcs1_v1_5_with_sha3_384 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x0F +#define DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_384 11 +extern const unsigned char ossl_der_oid_id_rsassa_pkcs1_v1_5_with_sha3_384[DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_384]; + +/* + * id-rsassa-pkcs1-v1_5-with-sha3-512 OBJECT IDENTIFIER ::= { sigAlgs 16 } + */ +#define DER_OID_V_id_rsassa_pkcs1_v1_5_with_sha3_512 DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x10 +#define DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_512 11 +extern const unsigned char ossl_der_oid_id_rsassa_pkcs1_v1_5_with_sha3_512[DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_512]; + +/* + * md4WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 3 } + */ +#define DER_OID_V_md4WithRSAEncryption DER_P_OBJECT, 9, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x03 +#define DER_OID_SZ_md4WithRSAEncryption 11 +extern const unsigned char ossl_der_oid_md4WithRSAEncryption[DER_OID_SZ_md4WithRSAEncryption]; + +/* + * ripemd160WithRSAEncryption OBJECT IDENTIFIER ::= { + * iso(1) identified-organization(3) teletrust(36) algorithm(3) signatureAlgorithm(3) rsaSignature(1) 2 + * } + */ +#define DER_OID_V_ripemd160WithRSAEncryption DER_P_OBJECT, 6, 0x2B, 0x24, 0x03, 0x03, 0x01, 0x02 +#define DER_OID_SZ_ripemd160WithRSAEncryption 8 +extern const unsigned char ossl_der_oid_ripemd160WithRSAEncryption[DER_OID_SZ_ripemd160WithRSAEncryption]; + +/* + * mdc2WithRSASignature OBJECT IDENTIFIER ::= { + * iso(1) identified-organization(3) oiw(14) secsig(3) algorithms(2) mdc2WithRSASignature(14) + * } + */ +#define DER_OID_V_mdc2WithRSASignature DER_P_OBJECT, 5, 0x2B, 0x0E, 0x03, 0x02, 0x0E +#define DER_OID_SZ_mdc2WithRSASignature 7 +extern const unsigned char ossl_der_oid_mdc2WithRSASignature[DER_OID_SZ_mdc2WithRSASignature]; + + +/* PSS parameters */ +int ossl_DER_w_RSASSA_PSS_params(WPACKET *pkt, int tag, + const RSA_PSS_PARAMS_30 *pss); +/* Subject Public Key Info */ +int ossl_DER_w_algorithmIdentifier_RSA(WPACKET *pkt, int tag, RSA *rsa); +int ossl_DER_w_algorithmIdentifier_RSA_PSS(WPACKET *pkt, int tag, + int rsa_type, + const RSA_PSS_PARAMS_30 *pss); +/* Signature */ +int ossl_DER_w_algorithmIdentifier_MDWithRSAEncryption(WPACKET *pkt, int tag, + int mdnid); diff --git a/contrib/openssl-cmake/common/include/prov/der_slh_dsa.h b/contrib/openssl-cmake/common/include/prov/der_slh_dsa.h new file mode 100644 index 000000000000..760f8e7699be --- /dev/null +++ b/contrib/openssl-cmake/common/include/prov/der_slh_dsa.h @@ -0,0 +1,103 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_slh_dsa.h.in + * + * Copyright 2025 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "internal/der.h" +#include "crypto/slh_dsa.h" + +/* Well known OIDs precompiled */ + +/* + * id-slh-dsa-sha2-128s OBJECT IDENTIFIER ::= { sigAlgs 20 } + */ +#define DER_OID_V_id_slh_dsa_sha2_128s DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x14 +#define DER_OID_SZ_id_slh_dsa_sha2_128s 11 +extern const unsigned char ossl_der_oid_id_slh_dsa_sha2_128s[DER_OID_SZ_id_slh_dsa_sha2_128s]; + +/* + * id-slh-dsa-sha2-128f OBJECT IDENTIFIER ::= { sigAlgs 21 } + */ +#define DER_OID_V_id_slh_dsa_sha2_128f DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x15 +#define DER_OID_SZ_id_slh_dsa_sha2_128f 11 +extern const unsigned char ossl_der_oid_id_slh_dsa_sha2_128f[DER_OID_SZ_id_slh_dsa_sha2_128f]; + +/* + * id-slh-dsa-sha2-192s OBJECT IDENTIFIER ::= { sigAlgs 22 } + */ +#define DER_OID_V_id_slh_dsa_sha2_192s DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x16 +#define DER_OID_SZ_id_slh_dsa_sha2_192s 11 +extern const unsigned char ossl_der_oid_id_slh_dsa_sha2_192s[DER_OID_SZ_id_slh_dsa_sha2_192s]; + +/* + * id-slh-dsa-sha2-192f OBJECT IDENTIFIER ::= { sigAlgs 23 } + */ +#define DER_OID_V_id_slh_dsa_sha2_192f DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x17 +#define DER_OID_SZ_id_slh_dsa_sha2_192f 11 +extern const unsigned char ossl_der_oid_id_slh_dsa_sha2_192f[DER_OID_SZ_id_slh_dsa_sha2_192f]; + +/* + * id-slh-dsa-sha2-256s OBJECT IDENTIFIER ::= { sigAlgs 24 } + */ +#define DER_OID_V_id_slh_dsa_sha2_256s DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x18 +#define DER_OID_SZ_id_slh_dsa_sha2_256s 11 +extern const unsigned char ossl_der_oid_id_slh_dsa_sha2_256s[DER_OID_SZ_id_slh_dsa_sha2_256s]; + +/* + * id-slh-dsa-sha2-256f OBJECT IDENTIFIER ::= { sigAlgs 25 } + */ +#define DER_OID_V_id_slh_dsa_sha2_256f DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x19 +#define DER_OID_SZ_id_slh_dsa_sha2_256f 11 +extern const unsigned char ossl_der_oid_id_slh_dsa_sha2_256f[DER_OID_SZ_id_slh_dsa_sha2_256f]; + +/* + * id-slh-dsa-shake-128s OBJECT IDENTIFIER ::= { sigAlgs 26 } + */ +#define DER_OID_V_id_slh_dsa_shake_128s DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x1A +#define DER_OID_SZ_id_slh_dsa_shake_128s 11 +extern const unsigned char ossl_der_oid_id_slh_dsa_shake_128s[DER_OID_SZ_id_slh_dsa_shake_128s]; + +/* + * id-slh-dsa-shake-128f OBJECT IDENTIFIER ::= { sigAlgs 27 } + */ +#define DER_OID_V_id_slh_dsa_shake_128f DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x1B +#define DER_OID_SZ_id_slh_dsa_shake_128f 11 +extern const unsigned char ossl_der_oid_id_slh_dsa_shake_128f[DER_OID_SZ_id_slh_dsa_shake_128f]; + +/* + * id-slh-dsa-shake-192s OBJECT IDENTIFIER ::= { sigAlgs 28 } + */ +#define DER_OID_V_id_slh_dsa_shake_192s DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x1C +#define DER_OID_SZ_id_slh_dsa_shake_192s 11 +extern const unsigned char ossl_der_oid_id_slh_dsa_shake_192s[DER_OID_SZ_id_slh_dsa_shake_192s]; + +/* + * id-slh-dsa-shake-192f OBJECT IDENTIFIER ::= { sigAlgs 29 } + */ +#define DER_OID_V_id_slh_dsa_shake_192f DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x1D +#define DER_OID_SZ_id_slh_dsa_shake_192f 11 +extern const unsigned char ossl_der_oid_id_slh_dsa_shake_192f[DER_OID_SZ_id_slh_dsa_shake_192f]; + +/* + * id-slh-dsa-shake-256s OBJECT IDENTIFIER ::= { sigAlgs 30 } + */ +#define DER_OID_V_id_slh_dsa_shake_256s DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x1E +#define DER_OID_SZ_id_slh_dsa_shake_256s 11 +extern const unsigned char ossl_der_oid_id_slh_dsa_shake_256s[DER_OID_SZ_id_slh_dsa_shake_256s]; + +/* + * id-slh-dsa-shake-256f OBJECT IDENTIFIER ::= { sigAlgs 31 } + */ +#define DER_OID_V_id_slh_dsa_shake_256f DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x03, 0x1F +#define DER_OID_SZ_id_slh_dsa_shake_256f 11 +extern const unsigned char ossl_der_oid_id_slh_dsa_shake_256f[DER_OID_SZ_id_slh_dsa_shake_256f]; + + +int ossl_DER_w_algorithmIdentifier_SLH_DSA(WPACKET *pkt, int tag, SLH_DSA_KEY *key); diff --git a/contrib/openssl-cmake/common/include/prov/der_sm2.h b/contrib/openssl-cmake/common/include/prov/der_sm2.h new file mode 100644 index 000000000000..9d41b31265ca --- /dev/null +++ b/contrib/openssl-cmake/common/include/prov/der_sm2.h @@ -0,0 +1,37 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_sm2.h.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "crypto/ec.h" +#include "internal/der.h" + +/* Well known OIDs precompiled */ + +/* + * sm2-with-SM3 OBJECT IDENTIFIER ::= { sm-scheme 501 } + */ +#define DER_OID_V_sm2_with_SM3 DER_P_OBJECT, 8, 0x2A, 0x81, 0x1C, 0xCF, 0x55, 0x01, 0x83, 0x75 +#define DER_OID_SZ_sm2_with_SM3 10 +extern const unsigned char ossl_der_oid_sm2_with_SM3[DER_OID_SZ_sm2_with_SM3]; + +/* + * curveSM2 OBJECT IDENTIFIER ::= { sm-scheme 301 } + */ +#define DER_OID_V_curveSM2 DER_P_OBJECT, 8, 0x2A, 0x81, 0x1C, 0xCF, 0x55, 0x01, 0x82, 0x2D +#define DER_OID_SZ_curveSM2 10 +extern const unsigned char ossl_der_oid_curveSM2[DER_OID_SZ_curveSM2]; + + +/* Subject Public Key Info */ +int ossl_DER_w_algorithmIdentifier_SM2(WPACKET *pkt, int cont, EC_KEY *ec); +/* Signature */ +int ossl_DER_w_algorithmIdentifier_SM2_with_MD(WPACKET *pkt, int cont, + EC_KEY *ec, int mdnid); diff --git a/contrib/openssl-cmake/common/include/prov/der_wrap.h b/contrib/openssl-cmake/common/include/prov/der_wrap.h new file mode 100644 index 000000000000..ff2954037727 --- /dev/null +++ b/contrib/openssl-cmake/common/include/prov/der_wrap.h @@ -0,0 +1,46 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/include/prov/der_wrap.h.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "internal/der.h" + +/* Well known OIDs precompiled */ + +/* + * id-alg-CMS3DESwrap OBJECT IDENTIFIER ::= { + * iso(1) member-body(2) us(840) rsadsi(113549) pkcs(1) pkcs-9(9) smime(16) alg(3) 6 + * } + */ +#define DER_OID_V_id_alg_CMS3DESwrap DER_P_OBJECT, 11, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x09, 0x10, 0x03, 0x06 +#define DER_OID_SZ_id_alg_CMS3DESwrap 13 +extern const unsigned char ossl_der_oid_id_alg_CMS3DESwrap[DER_OID_SZ_id_alg_CMS3DESwrap]; + +/* + * id-aes128-wrap OBJECT IDENTIFIER ::= { aes 5 } + */ +#define DER_OID_V_id_aes128_wrap DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x01, 0x05 +#define DER_OID_SZ_id_aes128_wrap 11 +extern const unsigned char ossl_der_oid_id_aes128_wrap[DER_OID_SZ_id_aes128_wrap]; + +/* + * id-aes192-wrap OBJECT IDENTIFIER ::= { aes 25 } + */ +#define DER_OID_V_id_aes192_wrap DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x01, 0x19 +#define DER_OID_SZ_id_aes192_wrap 11 +extern const unsigned char ossl_der_oid_id_aes192_wrap[DER_OID_SZ_id_aes192_wrap]; + +/* + * id-aes256-wrap OBJECT IDENTIFIER ::= { aes 45 } + */ +#define DER_OID_V_id_aes256_wrap DER_P_OBJECT, 9, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x01, 0x2D +#define DER_OID_SZ_id_aes256_wrap 11 +extern const unsigned char ossl_der_oid_id_aes256_wrap[DER_OID_SZ_id_aes256_wrap]; + diff --git a/contrib/openssl-cmake/common/params_idx.c b/contrib/openssl-cmake/common/params_idx.c new file mode 100644 index 000000000000..9d76ffededc2 --- /dev/null +++ b/contrib/openssl-cmake/common/params_idx.c @@ -0,0 +1,3366 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from crypto/params_idx.c.in + * + * Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + + +#include "internal/e_os.h" +#include "internal/param_names.h" +#include + +/* Machine generated TRIE -- generated by util/perl/OpenSSL/paramnames.pm */ +int ossl_param_find_pidx(const char *s) +{ + switch(s[0]) { + default: + break; + case 'a': + switch(s[1]) { + default: + break; + case 'c': + if (strcmp("vp-info", s + 2) == 0) + return PIDX_KDF_PARAM_X942_ACVPINFO; + break; + case 'd': + switch(s[2]) { + default: + break; + case 'd': + if (strcmp("itional-random", s + 3) == 0) + return PIDX_SIGNATURE_PARAM_ADD_RANDOM; + break; + case '\0': + return PIDX_KDF_PARAM_ARGON2_AD; + } + break; + case 'e': + if (strcmp("ad", s + 2) == 0) + return PIDX_CIPHER_PARAM_AEAD; + break; + case 'l': + switch(s[2]) { + default: + break; + case 'g': + switch(s[3]) { + default: + break; + case '_': + if (strcmp("id_param", s + 4) == 0) + return PIDX_CIPHER_PARAM_ALGORITHM_ID_PARAMS_OLD; + break; + case 'i': + if (strcmp("d-absent", s + 4) == 0) + return PIDX_DIGEST_PARAM_ALGID_ABSENT; + break; + case 'o': + switch(s[4]) { + default: + break; + case 'r': + switch(s[5]) { + default: + break; + case 'i': + switch(s[6]) { + default: + break; + case 't': + switch(s[7]) { + default: + break; + case 'h': + switch(s[8]) { + default: + break; + case 'm': + switch(s[9]) { + default: + break; + case '-': + switch(s[10]) { + default: + break; + case 'i': + switch(s[11]) { + default: + break; + case 'd': + switch(s[12]) { + default: + break; + case '-': + if (strcmp("params", s + 13) == 0) + return PIDX_ALG_PARAM_ALGORITHM_ID_PARAMS; + break; + case '\0': + return PIDX_ALG_PARAM_ALGORITHM_ID; + } + } + } + } + } + } + } + } + } + } + break; + case 'i': + if (strcmp("as", s + 3) == 0) + return PIDX_STORE_PARAM_ALIAS; + } + break; + case '\0': + return PIDX_PKEY_PARAM_EC_A; + } + break; + case 'b': + switch(s[1]) { + default: + break; + case 'a': + if (strcmp("sis-type", s + 2) == 0) + return PIDX_PKEY_PARAM_EC_CHAR2_TYPE; + break; + case 'i': + if (strcmp("ts", s + 2) == 0) + return PIDX_PKEY_PARAM_BITS; + break; + case 'l': + switch(s[2]) { + default: + break; + case 'o': + switch(s[3]) { + default: + break; + case 'c': + switch(s[4]) { + default: + break; + case 'k': + switch(s[5]) { + default: + break; + case '-': + if (strcmp("size", s + 6) == 0) + return PIDX_MAC_PARAM_BLOCK_SIZE; + break; + case '_': + if (strcmp("padding", s + 6) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_BLOCK_PADDING; + break; + case 's': + if (strcmp("ize", s + 6) == 0) + return PIDX_DIGEST_PARAM_BLOCK_SIZE; + } + } + } + } + break; + case 'u': + if (strcmp("ildinfo", s + 2) == 0) + return PIDX_PROV_PARAM_BUILDINFO; + break; + case '\0': + return PIDX_PKEY_PARAM_EC_B; + } + break; + case 'c': + switch(s[1]) { + default: + break; + case '-': + if (strcmp("rounds", s + 2) == 0) + return PIDX_MAC_PARAM_C_ROUNDS; + break; + case 'e': + if (strcmp("kalg", s + 2) == 0) + return PIDX_KDF_PARAM_CEK_ALG; + break; + case 'i': + if (strcmp("pher", s + 2) == 0) + return PIDX_ALG_PARAM_CIPHER; + break; + case 'o': + switch(s[2]) { + default: + break; + case 'f': + if (strcmp("actor", s + 3) == 0) + return PIDX_PKEY_PARAM_EC_COFACTOR; + break; + case 'n': + switch(s[3]) { + default: + break; + case 's': + if (strcmp("tant", s + 4) == 0) + return PIDX_KDF_PARAM_CONSTANT; + break; + case 't': + if (strcmp("ext-string", s + 4) == 0) + return PIDX_SIGNATURE_PARAM_CONTEXT_STRING; + } + } + break; + case 't': + switch(s[2]) { + default: + break; + case 's': + switch(s[3]) { + default: + break; + case '_': + if (strcmp("mode", s + 4) == 0) + return PIDX_CIPHER_PARAM_CTS_MODE; + break; + case '\0': + return PIDX_CIPHER_PARAM_CTS; + } + } + break; + case 'u': + switch(s[2]) { + default: + break; + case 's': + switch(s[3]) { + default: + break; + case 't': + switch(s[4]) { + default: + break; + case 'o': + switch(s[5]) { + default: + break; + case 'm': + switch(s[6]) { + default: + break; + case '-': + if (strcmp("iv", s + 7) == 0) + return PIDX_CIPHER_PARAM_CUSTOM_IV; + break; + case '\0': + return PIDX_MAC_PARAM_CUSTOM; + } + } + } + } + } + } + break; + case 'd': + switch(s[1]) { + default: + break; + case '-': + if (strcmp("rounds", s + 2) == 0) + return PIDX_MAC_PARAM_D_ROUNDS; + break; + case 'a': + switch(s[2]) { + default: + break; + case 't': + switch(s[3]) { + default: + break; + case 'a': + switch(s[4]) { + default: + break; + case '-': + switch(s[5]) { + default: + break; + case 's': + if (strcmp("tructure", s + 6) == 0) + return PIDX_OBJECT_PARAM_DATA_STRUCTURE; + break; + case 't': + if (strcmp("ype", s + 6) == 0) + return PIDX_OBJECT_PARAM_DATA_TYPE; + } + break; + case '\0': + return PIDX_OBJECT_PARAM_DATA; + } + } + } + break; + case 'e': + switch(s[2]) { + default: + break; + case 'c': + switch(s[3]) { + default: + break; + case 'o': + if (strcmp("ded-from-explicit", s + 4) == 0) + return PIDX_PKEY_PARAM_EC_DECODED_FROM_EXPLICIT_PARAMS; + break; + case 'r': + if (strcmp("ypt-only", s + 4) == 0) + return PIDX_CIPHER_PARAM_DECRYPT_ONLY; + } + break; + case 'f': + if (strcmp("ault-digest", s + 3) == 0) + return PIDX_PKEY_PARAM_DEFAULT_DIGEST; + break; + case 's': + if (strcmp("c", s + 3) == 0) + return PIDX_OBJECT_PARAM_DESC; + break; + case 't': + if (strcmp("erministic", s + 3) == 0) + return PIDX_SIGNATURE_PARAM_DETERMINISTIC; + } + break; + case 'h': + if (strcmp("kem-ikm", s + 2) == 0) + return PIDX_PKEY_PARAM_DHKEM_IKM; + break; + case 'i': + switch(s[2]) { + default: + break; + case 'g': + switch(s[3]) { + default: + break; + case 'e': + switch(s[4]) { + default: + break; + case 's': + switch(s[5]) { + default: + break; + case 't': + switch(s[6]) { + default: + break; + case '-': + switch(s[7]) { + default: + break; + case 'c': + if (strcmp("heck", s + 8) == 0) + return PIDX_PKEY_PARAM_FIPS_DIGEST_CHECK; + break; + case 'n': + if (strcmp("oinit", s + 8) == 0) + return PIDX_MAC_PARAM_DIGEST_NOINIT; + break; + case 'o': + if (strcmp("neshot", s + 8) == 0) + return PIDX_MAC_PARAM_DIGEST_ONESHOT; + break; + case 'p': + if (strcmp("rops", s + 8) == 0) + return PIDX_ASYM_CIPHER_PARAM_OAEP_DIGEST_PROPS; + break; + case 's': + if (strcmp("ize", s + 8) == 0) + return PIDX_PKEY_PARAM_DIGEST_SIZE; + } + break; + case '\0': + return PIDX_STORE_PARAM_DIGEST; + } + } + } + } + break; + case 's': + if (strcmp("tid", s + 3) == 0) + return PIDX_PKEY_PARAM_DIST_ID; + } + break; + case 'r': + if (strcmp("bg-no-trunc-md", s + 2) == 0) + return PIDX_PROV_PARAM_DRBG_TRUNC_DIGEST; + break; + case 's': + if (strcmp("a-sign-disabled", s + 2) == 0) + return PIDX_PROV_PARAM_DSA_SIGN_DISABLED; + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_D; + } + break; + case 'e': + switch(s[1]) { + default: + break; + case 'a': + if (strcmp("rly_clean", s + 2) == 0) + return PIDX_KDF_PARAM_EARLY_CLEAN; + break; + case 'c': + switch(s[2]) { + default: + break; + case 'd': + switch(s[3]) { + default: + break; + case 'h': + switch(s[4]) { + default: + break; + case '-': + switch(s[5]) { + default: + break; + case 'c': + switch(s[6]) { + default: + break; + case 'o': + switch(s[7]) { + default: + break; + case 'f': + switch(s[8]) { + default: + break; + case 'a': + switch(s[9]) { + default: + break; + case 'c': + switch(s[10]) { + default: + break; + case 't': + switch(s[11]) { + default: + break; + case 'o': + switch(s[12]) { + default: + break; + case 'r': + switch(s[13]) { + default: + break; + case '-': + switch(s[14]) { + default: + break; + case 'c': + if (strcmp("heck", s + 15) == 0) + return PIDX_PROV_PARAM_ECDH_COFACTOR_CHECK; + break; + case 'm': + if (strcmp("ode", s + 15) == 0) + return PIDX_EXCHANGE_PARAM_EC_ECDH_COFACTOR_MODE; + } + } + } + } + } + } + } + } + } + } + } + } + } + break; + case 'm': + if (strcmp("s_check", s + 2) == 0) + return PIDX_KDF_PARAM_FIPS_EMS_CHECK; + break; + case 'n': + switch(s[2]) { + default: + break; + case 'c': + switch(s[3]) { + default: + break; + case 'o': + switch(s[4]) { + default: + break; + case 'd': + switch(s[5]) { + default: + break; + case 'e': + if (strcmp("d-pub-key", s + 6) == 0) + return PIDX_PKEY_PARAM_ENCODED_PUBLIC_KEY; + break; + case 'i': + if (strcmp("ng", s + 6) == 0) + return PIDX_PKEY_PARAM_EC_ENCODING; + } + } + break; + case 'r': + switch(s[4]) { + default: + break; + case 'y': + switch(s[5]) { + default: + break; + case 'p': + switch(s[6]) { + default: + break; + case 't': + switch(s[7]) { + default: + break; + case '-': + switch(s[8]) { + default: + break; + case 'c': + if (strcmp("heck", s + 9) == 0) + return PIDX_CIPHER_PARAM_FIPS_ENCRYPT_CHECK; + break; + case 'l': + if (strcmp("evel", s + 9) == 0) + return PIDX_ENCODER_PARAM_ENCRYPT_LEVEL; + } + } + } + } + } + } + break; + case 'g': + if (strcmp("ine", s + 3) == 0) + return PIDX_ALG_PARAM_ENGINE; + break; + case 't': + switch(s[3]) { + default: + break; + case 'r': + switch(s[4]) { + default: + break; + case 'o': + switch(s[5]) { + default: + break; + case 'p': + switch(s[6]) { + default: + break; + case 'y': + switch(s[7]) { + default: + break; + case '_': + if (strcmp("required", s + 8) == 0) + return PIDX_DRBG_PARAM_ENTROPY_REQUIRED; + break; + case '\0': + return PIDX_KDF_PARAM_HMACDRBG_ENTROPY; + } + } + } + } + } + } + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_E; + break; + case 'x': + if (strcmp("pect", s + 2) == 0) + return PIDX_STORE_PARAM_EXPECT; + } + break; + case 'f': + switch(s[1]) { + default: + break; + case 'i': + switch(s[2]) { + default: + break; + case 'e': + if (strcmp("ld-type", s + 3) == 0) + return PIDX_PKEY_PARAM_EC_FIELD_TYPE; + break; + case 'n': + if (strcmp("gerprint", s + 3) == 0) + return PIDX_STORE_PARAM_FINGERPRINT; + break; + case 'p': + if (strcmp("s-indicator", s + 3) == 0) + return PIDX_ALG_PARAM_FIPS_APPROVED_INDICATOR; + } + } + break; + case 'g': + switch(s[1]) { + default: + break; + case 'e': + switch(s[2]) { + default: + break; + case 'n': + switch(s[3]) { + default: + break; + case 'e': + switch(s[4]) { + default: + break; + case 'r': + switch(s[5]) { + default: + break; + case 'a': + switch(s[6]) { + default: + break; + case 't': + switch(s[7]) { + default: + break; + case 'e': + switch(s[8]) { + default: + break; + case '\0': + return PIDX_RAND_PARAM_GENERATE; + } + break; + case 'o': + if (strcmp("r", s + 8) == 0) + return PIDX_PKEY_PARAM_EC_GENERATOR; + } + } + } + } + } + } + break; + case 'i': + if (strcmp("ndex", s + 2) == 0) + return PIDX_PKEY_PARAM_FFC_GINDEX; + break; + case 'r': + switch(s[2]) { + default: + break; + case 'o': + switch(s[3]) { + default: + break; + case 'u': + switch(s[4]) { + default: + break; + case 'p': + switch(s[5]) { + default: + break; + case '-': + if (strcmp("check", s + 6) == 0) + return PIDX_PKEY_PARAM_EC_GROUP_CHECK_TYPE; + break; + case '\0': + return PIDX_PKEY_PARAM_GROUP_NAME; + } + } + } + } + break; + case '\0': + return PIDX_PKEY_PARAM_FFC_G; + } + break; + case 'h': + switch(s[1]) { + default: + break; + case 'a': + if (strcmp("s-randkey", s + 2) == 0) + return PIDX_CIPHER_PARAM_HAS_RAND_KEY; + break; + case 'i': + if (strcmp("ndex", s + 2) == 0) + return PIDX_PKEY_PARAM_FFC_H; + break; + case 'k': + switch(s[2]) { + default: + break; + case 'd': + switch(s[3]) { + default: + break; + case 'f': + switch(s[4]) { + default: + break; + case '-': + switch(s[5]) { + default: + break; + case 'd': + if (strcmp("igest-check", s + 6) == 0) + return PIDX_PROV_PARAM_HKDF_DIGEST_CHECK; + break; + case 'k': + if (strcmp("ey-check", s + 6) == 0) + return PIDX_PROV_PARAM_HKDF_KEY_CHECK; + } + } + } + } + break; + case 'm': + if (strcmp("ac-key-check", s + 2) == 0) + return PIDX_PROV_PARAM_HMAC_KEY_CHECK; + break; + case 's': + if (strcmp("_padding", s + 2) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_HS_PADDING; + } + break; + case 'i': + switch(s[1]) { + default: + break; + case 'd': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_KDF_PARAM_PKCS12_ID; + } + break; + case 'k': + if (strcmp("me", s + 2) == 0) + return PIDX_KEM_PARAM_IKME; + break; + case 'm': + if (strcmp("plicit-rejection", s + 2) == 0) + return PIDX_PKEY_PARAM_IMPLICIT_REJECTION; + break; + case 'n': + switch(s[2]) { + default: + break; + case 'c': + if (strcmp("lude-public", s + 3) == 0) + return PIDX_PKEY_PARAM_EC_INCLUDE_PUBLIC; + break; + case 'f': + if (strcmp("o", s + 3) == 0) + return PIDX_PASSPHRASE_PARAM_INFO; + break; + case 'p': + if (strcmp("ut-type", s + 3) == 0) + return PIDX_STORE_PARAM_INPUT_TYPE; + break; + case 's': + if (strcmp("tance", s + 3) == 0) + return PIDX_SIGNATURE_PARAM_INSTANCE; + } + break; + case 't': + switch(s[2]) { + default: + break; + case 'e': + switch(s[3]) { + default: + break; + case 'r': + switch(s[4]) { + default: + break; + case 'a': + if (strcmp("tion", s + 5) == 0) + return PIDX_GEN_PARAM_ITERATION; + break; + case '\0': + return PIDX_KDF_PARAM_ITER; + } + } + } + break; + case 'v': + switch(s[2]) { + default: + break; + case '-': + if (strcmp("generated", s + 3) == 0) + return PIDX_CIPHER_PARAM_AEAD_IV_GENERATED; + break; + case 'l': + if (strcmp("en", s + 3) == 0) + return PIDX_CIPHER_PARAM_IVLEN; + break; + case '\0': + return PIDX_MAC_PARAM_IV; + } + } + break; + case 'j': + switch(s[1]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_FFC_COFACTOR; + } + break; + case 'k': + switch(s[1]) { + default: + break; + case '1': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_EC_CHAR2_PP_K1; + } + break; + case '2': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_EC_CHAR2_PP_K2; + } + break; + case '3': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_EC_CHAR2_PP_K3; + } + break; + case 'a': + if (strcmp("t", s + 2) == 0) + return PIDX_SIGNATURE_PARAM_KAT; + break; + case 'b': + if (strcmp("kdf-key-check", s + 2) == 0) + return PIDX_PROV_PARAM_KBKDF_KEY_CHECK; + break; + case 'd': + switch(s[2]) { + default: + break; + case 'f': + switch(s[3]) { + default: + break; + case '-': + switch(s[4]) { + default: + break; + case 'd': + switch(s[5]) { + default: + break; + case 'i': + switch(s[6]) { + default: + break; + case 'g': + switch(s[7]) { + default: + break; + case 'e': + switch(s[8]) { + default: + break; + case 's': + switch(s[9]) { + default: + break; + case 't': + switch(s[10]) { + default: + break; + case '-': + if (strcmp("props", s + 11) == 0) + return PIDX_EXCHANGE_PARAM_KDF_DIGEST_PROPS; + break; + case '\0': + return PIDX_EXCHANGE_PARAM_KDF_DIGEST; + } + } + } + } + } + } + break; + case 'o': + if (strcmp("utlen", s + 5) == 0) + return PIDX_EXCHANGE_PARAM_KDF_OUTLEN; + break; + case 't': + if (strcmp("ype", s + 5) == 0) + return PIDX_EXCHANGE_PARAM_KDF_TYPE; + break; + case 'u': + if (strcmp("km", s + 5) == 0) + return PIDX_EXCHANGE_PARAM_KDF_UKM; + } + } + } + break; + case 'e': + switch(s[2]) { + default: + break; + case 'y': + switch(s[3]) { + default: + break; + case '-': + switch(s[4]) { + default: + break; + case 'c': + if (strcmp("heck", s + 5) == 0) + return PIDX_PKEY_PARAM_FIPS_KEY_CHECK; + break; + case 'l': + if (strcmp("ength", s + 5) == 0) + return PIDX_SKEY_PARAM_KEY_LENGTH; + } + break; + case 'b': + if (strcmp("its", s + 4) == 0) + return PIDX_CIPHER_PARAM_RC2_KEYBITS; + break; + case 'l': + if (strcmp("en", s + 4) == 0) + return PIDX_CIPHER_PARAM_KEYLEN; + break; + case '\0': + return PIDX_MAC_PARAM_KEY; + } + } + break; + case 'm': + if (strcmp("ac-key-check", s + 2) == 0) + return PIDX_PROV_PARAM_KMAC_KEY_CHECK; + } + break; + case 'l': + switch(s[1]) { + default: + break; + case 'a': + switch(s[2]) { + default: + break; + case 'b': + if (strcmp("el", s + 3) == 0) + return PIDX_KDF_PARAM_LABEL; + break; + case 'n': + if (strcmp("es", s + 3) == 0) + return PIDX_KDF_PARAM_ARGON2_LANES; + } + } + break; + case 'm': + switch(s[1]) { + default: + break; + case 'a': + switch(s[2]) { + default: + break; + case 'c': + switch(s[3]) { + default: + break; + case 'k': + if (strcmp("ey", s + 4) == 0) + return PIDX_CIPHER_PARAM_AEAD_MAC_KEY; + break; + case 'l': + if (strcmp("en", s + 4) == 0) + return PIDX_KDF_PARAM_MAC_SIZE; + break; + case '\0': + return PIDX_ALG_PARAM_MAC; + } + break; + case 'n': + if (strcmp("datory-digest", s + 3) == 0) + return PIDX_PKEY_PARAM_MANDATORY_DIGEST; + break; + case 'x': + switch(s[3]) { + default: + break; + case '-': + if (strcmp("size", s + 4) == 0) + return PIDX_PKEY_PARAM_MAX_SIZE; + break; + case '_': + switch(s[4]) { + default: + break; + case 'a': + if (strcmp("dinlen", s + 5) == 0) + return PIDX_DRBG_PARAM_MAX_ADINLEN; + break; + case 'e': + switch(s[5]) { + default: + break; + case 'a': + if (strcmp("rly_data", s + 6) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_MAX_EARLY_DATA; + break; + case 'n': + if (strcmp("tropylen", s + 6) == 0) + return PIDX_DRBG_PARAM_MAX_ENTROPYLEN; + } + break; + case 'f': + if (strcmp("rag_len", s + 5) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_MAX_FRAG_LEN; + break; + case 'n': + if (strcmp("oncelen", s + 5) == 0) + return PIDX_DRBG_PARAM_MAX_NONCELEN; + break; + case 'p': + if (strcmp("erslen", s + 5) == 0) + return PIDX_DRBG_PARAM_MAX_PERSLEN; + break; + case 'r': + if (strcmp("equest", s + 5) == 0) + return PIDX_RAND_PARAM_MAX_REQUEST; + } + break; + case 'i': + if (strcmp("um_length", s + 4) == 0) + return PIDX_DRBG_PARAM_MAX_LENGTH; + break; + case 'm': + if (strcmp("em_bytes", s + 4) == 0) + return PIDX_KDF_PARAM_SCRYPT_MAXMEM; + } + } + break; + case 'e': + switch(s[2]) { + default: + break; + case 'm': + if (strcmp("cost", s + 3) == 0) + return PIDX_KDF_PARAM_ARGON2_MEMCOST; + break; + case 's': + if (strcmp("sage-encoding", s + 3) == 0) + return PIDX_SIGNATURE_PARAM_MESSAGE_ENCODING; + } + break; + case 'g': + switch(s[2]) { + default: + break; + case 'f': + switch(s[3]) { + default: + break; + case '1': + switch(s[4]) { + default: + break; + case '-': + switch(s[5]) { + default: + break; + case 'd': + if (strcmp("igest", s + 6) == 0) + return PIDX_PKEY_PARAM_MGF1_DIGEST; + break; + case 'p': + if (strcmp("roperties", s + 6) == 0) + return PIDX_PKEY_PARAM_MGF1_PROPERTIES; + } + } + break; + case '\0': + return PIDX_PKEY_PARAM_MASKGENFUNC; + } + } + break; + case 'i': + switch(s[2]) { + default: + break; + case 'c': + if (strcmp("alg", s + 3) == 0) + return PIDX_DIGEST_PARAM_MICALG; + break; + case 'n': + switch(s[3]) { + default: + break; + case '_': + switch(s[4]) { + default: + break; + case 'e': + if (strcmp("ntropylen", s + 5) == 0) + return PIDX_DRBG_PARAM_MIN_ENTROPYLEN; + break; + case 'n': + if (strcmp("oncelen", s + 5) == 0) + return PIDX_DRBG_PARAM_MIN_NONCELEN; + } + break; + case 'i': + if (strcmp("um_length", s + 4) == 0) + return PIDX_DRBG_PARAM_MIN_LENGTH; + } + } + break; + case 'l': + switch(s[2]) { + default: + break; + case '-': + switch(s[3]) { + default: + break; + case 'd': + switch(s[4]) { + default: + break; + case 's': + switch(s[5]) { + default: + break; + case 'a': + switch(s[6]) { + default: + break; + case '.': + switch(s[7]) { + default: + break; + case 'i': + if (strcmp("nput_formats", s + 8) == 0) + return PIDX_PKEY_PARAM_ML_DSA_INPUT_FORMATS; + break; + case 'o': + if (strcmp("utput_formats", s + 8) == 0) + return PIDX_PKEY_PARAM_ML_DSA_OUTPUT_FORMATS; + break; + case 'p': + if (strcmp("refer_seed", s + 8) == 0) + return PIDX_PKEY_PARAM_ML_DSA_PREFER_SEED; + break; + case 'r': + if (strcmp("etain_seed", s + 8) == 0) + return PIDX_PKEY_PARAM_ML_DSA_RETAIN_SEED; + } + } + } + } + break; + case 'k': + switch(s[4]) { + default: + break; + case 'e': + switch(s[5]) { + default: + break; + case 'm': + switch(s[6]) { + default: + break; + case '.': + switch(s[7]) { + default: + break; + case 'i': + switch(s[8]) { + default: + break; + case 'm': + if (strcmp("port_pct_type", s + 9) == 0) + return PIDX_PKEY_PARAM_ML_KEM_IMPORT_PCT_TYPE; + break; + case 'n': + if (strcmp("put_formats", s + 9) == 0) + return PIDX_PKEY_PARAM_ML_KEM_INPUT_FORMATS; + } + break; + case 'o': + if (strcmp("utput_formats", s + 8) == 0) + return PIDX_PKEY_PARAM_ML_KEM_OUTPUT_FORMATS; + break; + case 'p': + if (strcmp("refer_seed", s + 8) == 0) + return PIDX_PKEY_PARAM_ML_KEM_PREFER_SEED; + break; + case 'r': + if (strcmp("etain_seed", s + 8) == 0) + return PIDX_PKEY_PARAM_ML_KEM_RETAIN_SEED; + } + } + } + } + } + } + break; + case 'o': + switch(s[2]) { + default: + break; + case 'd': + switch(s[3]) { + default: + break; + case 'e': + switch(s[4]) { + default: + break; + case '\0': + return PIDX_LIBSSL_RECORD_LAYER_PARAM_MODE; + } + break; + case 'u': + if (strcmp("le-filename", s + 4) == 0) + return PIDX_PROV_PARAM_CORE_MODULE_FILENAME; + } + } + break; + case 'u': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_SIGNATURE_PARAM_MU; + } + break; + case '\0': + return PIDX_PKEY_PARAM_EC_CHAR2_M; + } + break; + case 'n': + switch(s[1]) { + default: + break; + case 'a': + if (strcmp("me", s + 2) == 0) + return PIDX_STORE_PARAM_ISSUER; + break; + case 'o': + switch(s[2]) { + default: + break; + case '-': + if (strcmp("short-mac", s + 3) == 0) + return PIDX_PROV_PARAM_NO_SHORT_MAC; + break; + case 'n': + switch(s[3]) { + default: + break; + case 'c': + switch(s[4]) { + default: + break; + case 'e': + switch(s[5]) { + default: + break; + case '-': + if (strcmp("type", s + 6) == 0) + return PIDX_SIGNATURE_PARAM_NONCE_TYPE; + break; + case '\0': + return PIDX_KDF_PARAM_HMACDRBG_NONCE; + } + } + } + } + break; + case 'u': + if (strcmp("m", s + 2) == 0) + return PIDX_CIPHER_PARAM_NUM; + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_N; + } + break; + case 'o': + switch(s[1]) { + default: + break; + case 'a': + if (strcmp("ep-label", s + 2) == 0) + return PIDX_ASYM_CIPHER_PARAM_OAEP_LABEL; + break; + case 'p': + switch(s[2]) { + default: + break; + case 'e': + switch(s[3]) { + default: + break; + case 'n': + if (strcmp("ssl-version", s + 4) == 0) + return PIDX_PROV_PARAM_CORE_VERSION; + break; + case 'r': + if (strcmp("ation", s + 4) == 0) + return PIDX_KEM_PARAM_OPERATION; + } + break; + case 't': + if (strcmp("ions", s + 3) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_OPTIONS; + } + break; + case 'r': + if (strcmp("der", s + 2) == 0) + return PIDX_PKEY_PARAM_EC_ORDER; + } + break; + case 'p': + switch(s[1]) { + default: + break; + case '1': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_P1; + } + break; + case '2': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_P2; + } + break; + case 'a': + switch(s[2]) { + default: + break; + case 'd': + switch(s[3]) { + default: + break; + case '-': + switch(s[4]) { + default: + break; + case 'm': + if (strcmp("ode", s + 5) == 0) + return PIDX_PKEY_PARAM_PAD_MODE; + break; + case 't': + if (strcmp("ype", s + 5) == 0) + return PIDX_DIGEST_PARAM_PAD_TYPE; + } + break; + case 'd': + if (strcmp("ing", s + 4) == 0) + return PIDX_CIPHER_PARAM_PADDING; + break; + case '\0': + return PIDX_EXCHANGE_PARAM_PAD; + } + break; + case 'r': + switch(s[3]) { + default: + break; + case 't': + switch(s[4]) { + default: + break; + case 'y': + switch(s[5]) { + default: + break; + case 'u': + if (strcmp("-info", s + 6) == 0) + return PIDX_KDF_PARAM_X942_PARTYUINFO; + break; + case 'v': + if (strcmp("-info", s + 6) == 0) + return PIDX_KDF_PARAM_X942_PARTYVINFO; + } + } + } + break; + case 's': + if (strcmp("s", s + 3) == 0) + return PIDX_KDF_PARAM_PASSWORD; + } + break; + case 'b': + switch(s[2]) { + default: + break; + case 'i': + if (strcmp("ts", s + 3) == 0) + return PIDX_PKEY_PARAM_FFC_PBITS; + break; + case 'k': + if (strcmp("df2-lower-bound-check", s + 3) == 0) + return PIDX_PROV_PARAM_PBKDF2_LOWER_BOUND_CHECK; + } + break; + case 'c': + if (strcmp("ounter", s + 2) == 0) + return PIDX_PKEY_PARAM_FFC_PCOUNTER; + break; + case 'i': + if (strcmp("peline-tag", s + 2) == 0) + return PIDX_CIPHER_PARAM_PIPELINE_AEAD_TAG; + break; + case 'k': + if (strcmp("cs5", s + 2) == 0) + return PIDX_KDF_PARAM_PKCS5; + break; + case 'o': + switch(s[2]) { + default: + break; + case 'i': + if (strcmp("nt-format", s + 3) == 0) + return PIDX_PKEY_PARAM_EC_POINT_CONVERSION_FORMAT; + break; + case 't': + if (strcmp("ential", s + 3) == 0) + return PIDX_GEN_PARAM_POTENTIAL; + } + break; + case 'r': + switch(s[2]) { + default: + break; + case 'e': + switch(s[3]) { + default: + break; + case 'd': + if (strcmp("iction_resistance", s + 4) == 0) + return PIDX_DRBG_PARAM_PREDICTION_RESISTANCE; + break; + case 'f': + if (strcmp("ix", s + 4) == 0) + return PIDX_KDF_PARAM_PREFIX; + } + break; + case 'i': + switch(s[3]) { + default: + break; + case 'm': + if (strcmp("es", s + 4) == 0) + return PIDX_PKEY_PARAM_RSA_PRIMES; + break; + case 'v': + switch(s[4]) { + default: + break; + case '_': + if (strcmp("len", s + 5) == 0) + return PIDX_PKEY_PARAM_DH_PRIV_LEN; + break; + case '\0': + return PIDX_PKEY_PARAM_PRIV_KEY; + } + } + break; + case 'o': + switch(s[3]) { + default: + break; + case 'p': + if (strcmp("erties", s + 4) == 0) + return PIDX_STORE_PARAM_PROPERTIES; + break; + case 'v': + if (strcmp("ider-name", s + 4) == 0) + return PIDX_PROV_PARAM_CORE_PROV_NAME; + } + } + break; + case 'u': + if (strcmp("b", s + 2) == 0) + return PIDX_PKEY_PARAM_PUB_KEY; + break; + case '\0': + return PIDX_PKEY_PARAM_FFC_P; + } + break; + case 'q': + switch(s[1]) { + default: + break; + case '1': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_Q1; + } + break; + case '2': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_Q2; + } + break; + case 'b': + if (strcmp("its", s + 2) == 0) + return PIDX_PKEY_PARAM_FFC_QBITS; + break; + case '\0': + return PIDX_PKEY_PARAM_FFC_Q; + break; + case 'x': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_EC_PUB_X; + } + break; + case 'y': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_EC_PUB_Y; + } + } + break; + case 'r': + switch(s[1]) { + default: + break; + case 'a': + switch(s[2]) { + default: + break; + case 'n': + switch(s[3]) { + default: + break; + case 'd': + switch(s[4]) { + default: + break; + case 'k': + if (strcmp("ey", s + 5) == 0) + return PIDX_CIPHER_PARAM_RANDOM_KEY; + break; + case 'o': + if (strcmp("m_data", s + 5) == 0) + return PIDX_DRBG_PARAM_RANDOM_DATA; + } + } + break; + case 'w': + if (strcmp("-bytes", s + 3) == 0) + return PIDX_SKEY_PARAM_RAW_BYTES; + } + break; + case 'e': + switch(s[2]) { + default: + break; + case 'a': + switch(s[3]) { + default: + break; + case 'd': + switch(s[4]) { + default: + break; + case '_': + switch(s[5]) { + default: + break; + case 'a': + if (strcmp("head", s + 6) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_READ_AHEAD; + break; + case 'b': + if (strcmp("uffer_len", s + 6) == 0) + return PIDX_LIBSSL_RECORD_LAYER_READ_BUFFER_LEN; + } + } + } + break; + case 'f': + if (strcmp("erence", s + 3) == 0) + return PIDX_OBJECT_PARAM_REFERENCE; + break; + case 's': + switch(s[3]) { + default: + break; + case 'e': + switch(s[4]) { + default: + break; + case 'e': + switch(s[5]) { + default: + break; + case 'd': + switch(s[6]) { + default: + break; + case '_': + switch(s[7]) { + default: + break; + case 'c': + if (strcmp("ounter", s + 8) == 0) + return PIDX_DRBG_PARAM_RESEED_COUNTER; + break; + case 'r': + if (strcmp("equests", s + 8) == 0) + return PIDX_DRBG_PARAM_RESEED_REQUESTS; + break; + case 't': + switch(s[8]) { + default: + break; + case 'i': + switch(s[9]) { + default: + break; + case 'm': + switch(s[10]) { + default: + break; + case 'e': + switch(s[11]) { + default: + break; + case '_': + if (strcmp("interval", s + 12) == 0) + return PIDX_DRBG_PARAM_RESEED_TIME_INTERVAL; + break; + case '\0': + return PIDX_DRBG_PARAM_RESEED_TIME; + } + } + } + } + } + } + } + } + } + } + break; + case 'o': + if (strcmp("unds", s + 2) == 0) + return PIDX_CIPHER_PARAM_ROUNDS; + break; + case 's': + switch(s[2]) { + default: + break; + case 'a': + switch(s[3]) { + default: + break; + case '-': + switch(s[4]) { + default: + break; + case 'c': + switch(s[5]) { + default: + break; + case 'o': + switch(s[6]) { + default: + break; + case 'e': + switch(s[7]) { + default: + break; + case 'f': + switch(s[8]) { + default: + break; + case 'f': + switch(s[9]) { + default: + break; + case 'i': + switch(s[10]) { + default: + break; + case 'c': + switch(s[11]) { + default: + break; + case 'i': + switch(s[12]) { + default: + break; + case 'e': + switch(s[13]) { + default: + break; + case 'n': + switch(s[14]) { + default: + break; + case 't': + switch(s[15]) { + default: + break; + case '1': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT1; + } + break; + case '2': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT2; + } + break; + case '3': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT3; + } + break; + case '4': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT4; + } + break; + case '5': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT5; + } + break; + case '6': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT6; + } + break; + case '7': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT7; + } + break; + case '8': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT8; + } + break; + case '9': + switch(s[16]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT9; + } + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_COEFFICIENT; + } + } + } + } + } + } + } + } + } + } + } + break; + case 'd': + if (strcmp("erive-from-pq", s + 5) == 0) + return PIDX_PKEY_PARAM_RSA_DERIVE_FROM_PQ; + break; + case 'e': + switch(s[5]) { + default: + break; + case 'x': + switch(s[6]) { + default: + break; + case 'p': + switch(s[7]) { + default: + break; + case 'o': + switch(s[8]) { + default: + break; + case 'n': + switch(s[9]) { + default: + break; + case 'e': + switch(s[10]) { + default: + break; + case 'n': + switch(s[11]) { + default: + break; + case 't': + switch(s[12]) { + default: + break; + case '1': + switch(s[13]) { + default: + break; + case '0': + switch(s[14]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT10; + } + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT1; + } + break; + case '2': + switch(s[13]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT2; + } + break; + case '3': + switch(s[13]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT3; + } + break; + case '4': + switch(s[13]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT4; + } + break; + case '5': + switch(s[13]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT5; + } + break; + case '6': + switch(s[13]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT6; + } + break; + case '7': + switch(s[13]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT7; + } + break; + case '8': + switch(s[13]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT8; + } + break; + case '9': + switch(s[13]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT9; + } + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_EXPONENT; + } + } + } + } + } + } + } + } + break; + case 'f': + switch(s[5]) { + default: + break; + case 'a': + switch(s[6]) { + default: + break; + case 'c': + switch(s[7]) { + default: + break; + case 't': + switch(s[8]) { + default: + break; + case 'o': + switch(s[9]) { + default: + break; + case 'r': + switch(s[10]) { + default: + break; + case '1': + switch(s[11]) { + default: + break; + case '0': + switch(s[12]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR10; + } + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR1; + } + break; + case '2': + switch(s[11]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR2; + } + break; + case '3': + switch(s[11]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR3; + } + break; + case '4': + switch(s[11]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR4; + } + break; + case '5': + switch(s[11]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR5; + } + break; + case '6': + switch(s[11]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR6; + } + break; + case '7': + switch(s[11]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR7; + } + break; + case '8': + switch(s[11]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR8; + } + break; + case '9': + switch(s[11]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR9; + } + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_FACTOR; + } + } + } + } + } + } + break; + case 'p': + switch(s[5]) { + default: + break; + case 'k': + if (strcmp("cs15-pad-disabled", s + 6) == 0) + return PIDX_PROV_PARAM_RSA_PKCS15_PAD_DISABLED; + break; + case 's': + if (strcmp("s-saltlen-check", s + 6) == 0) + return PIDX_SIGNATURE_PARAM_FIPS_RSA_PSS_SALTLEN_CHECK; + } + break; + case 's': + if (strcmp("ign-x931-pad-disabled", s + 5) == 0) + return PIDX_PROV_PARAM_RSA_SIGN_X931_PAD_DISABLED; + } + } + } + break; + case '\0': + return PIDX_KDF_PARAM_SCRYPT_R; + } + break; + case 's': + switch(s[1]) { + default: + break; + case 'a': + switch(s[2]) { + default: + break; + case 'f': + if (strcmp("eprime-generator", s + 3) == 0) + return PIDX_PKEY_PARAM_DH_GENERATOR; + break; + case 'l': + switch(s[3]) { + default: + break; + case 't': + switch(s[4]) { + default: + break; + case 'l': + if (strcmp("en", s + 5) == 0) + return PIDX_SIGNATURE_PARAM_PSS_SALTLEN; + break; + case '\0': + return PIDX_MAC_PARAM_SALT; + } + } + break; + case 'v': + if (strcmp("e-parameters", s + 3) == 0) + return PIDX_ENCODER_PARAM_SAVE_PARAMETERS; + } + break; + case 'e': + switch(s[2]) { + default: + break; + case 'c': + switch(s[3]) { + default: + break; + case 'r': + if (strcmp("et", s + 4) == 0) + return PIDX_KDF_PARAM_SECRET; + break; + case 'u': + switch(s[4]) { + default: + break; + case 'r': + switch(s[5]) { + default: + break; + case 'i': + switch(s[6]) { + default: + break; + case 't': + switch(s[7]) { + default: + break; + case 'y': + switch(s[8]) { + default: + break; + case '-': + switch(s[9]) { + default: + break; + case 'b': + if (strcmp("its", s + 10) == 0) + return PIDX_PKEY_PARAM_SECURITY_BITS; + break; + case 'c': + if (strcmp("hecks", s + 10) == 0) + return PIDX_PROV_PARAM_SECURITY_CHECKS; + } + } + } + } + } + } + } + break; + case 'e': + if (strcmp("d", s + 3) == 0) + return PIDX_PKEY_PARAM_SLH_DSA_SEED; + break; + case 'r': + if (strcmp("ial", s + 3) == 0) + return PIDX_STORE_PARAM_SERIAL; + break; + case 's': + if (strcmp("sion_id", s + 3) == 0) + return PIDX_KDF_PARAM_SSHKDF_SESSION_ID; + } + break; + case 'i': + switch(s[2]) { + default: + break; + case 'g': + switch(s[3]) { + default: + break; + case 'n': + switch(s[4]) { + default: + break; + case '-': + switch(s[5]) { + default: + break; + case 'c': + if (strcmp("heck", s + 6) == 0) + return PIDX_PKEY_PARAM_FIPS_SIGN_CHECK; + break; + case 'x': + if (strcmp("931-pad-check", s + 6) == 0) + return PIDX_SIGNATURE_PARAM_FIPS_SIGN_X931_PAD_CHECK; + } + break; + case 'a': + switch(s[5]) { + default: + break; + case 't': + switch(s[6]) { + default: + break; + case 'u': + switch(s[7]) { + default: + break; + case 'r': + switch(s[8]) { + default: + break; + case 'e': + switch(s[9]) { + default: + break; + case '-': + if (strcmp("digest-check", s + 10) == 0) + return PIDX_PROV_PARAM_SIGNATURE_DIGEST_CHECK; + break; + case '\0': + return PIDX_SIGNATURE_PARAM_SIGNATURE; + } + } + } + } + } + } + } + break; + case 'z': + if (strcmp("e", s + 3) == 0) + return PIDX_MAC_PARAM_SIZE; + } + break; + case 'p': + if (strcmp("eed", s + 2) == 0) + return PIDX_CIPHER_PARAM_SPEED; + break; + case 's': + switch(s[2]) { + default: + break; + case 'h': + switch(s[3]) { + default: + break; + case 'k': + switch(s[4]) { + default: + break; + case 'd': + switch(s[5]) { + default: + break; + case 'f': + switch(s[6]) { + default: + break; + case '-': + switch(s[7]) { + default: + break; + case 'd': + if (strcmp("igest-check", s + 8) == 0) + return PIDX_PROV_PARAM_SSHKDF_DIGEST_CHECK; + break; + case 'k': + if (strcmp("ey-check", s + 8) == 0) + return PIDX_PROV_PARAM_SSHKDF_KEY_CHECK; + } + } + } + } + } + break; + case 'k': + switch(s[3]) { + default: + break; + case 'd': + switch(s[4]) { + default: + break; + case 'f': + switch(s[5]) { + default: + break; + case '-': + switch(s[6]) { + default: + break; + case 'd': + if (strcmp("igest-check", s + 7) == 0) + return PIDX_PROV_PARAM_SSKDF_DIGEST_CHECK; + break; + case 'k': + if (strcmp("ey-check", s + 7) == 0) + return PIDX_PROV_PARAM_SSKDF_KEY_CHECK; + } + } + } + } + break; + case 'l': + if (strcmp("3-ms", s + 3) == 0) + return PIDX_DIGEST_PARAM_SSL3_MS; + } + break; + case 't': + switch(s[2]) { + default: + break; + case '-': + switch(s[3]) { + default: + break; + case 'd': + if (strcmp("esc", s + 4) == 0) + return PIDX_PROV_PARAM_SELF_TEST_DESC; + break; + case 'p': + if (strcmp("hase", s + 4) == 0) + return PIDX_PROV_PARAM_SELF_TEST_PHASE; + break; + case 't': + if (strcmp("ype", s + 4) == 0) + return PIDX_PROV_PARAM_SELF_TEST_TYPE; + } + break; + case 'a': + switch(s[3]) { + default: + break; + case 't': + switch(s[4]) { + default: + break; + case 'e': + switch(s[5]) { + default: + break; + case '\0': + return PIDX_RAND_PARAM_STATE; + } + break; + case 'u': + if (strcmp("s", s + 5) == 0) + return PIDX_PROV_PARAM_STATUS; + } + } + break; + case 'r': + switch(s[3]) { + default: + break; + case 'e': + switch(s[4]) { + default: + break; + case 'a': + if (strcmp("m_mac", s + 5) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_STREAM_MAC; + break; + case 'n': + if (strcmp("gth", s + 5) == 0) + return PIDX_RAND_PARAM_STRENGTH; + } + } + } + break; + case 'u': + switch(s[2]) { + default: + break; + case 'b': + if (strcmp("ject", s + 3) == 0) + return PIDX_STORE_PARAM_SUBJECT; + break; + case 'p': + switch(s[3]) { + default: + break; + case 'p': + switch(s[4]) { + default: + break; + case '-': + switch(s[5]) { + default: + break; + case 'p': + switch(s[6]) { + default: + break; + case 'r': + if (strcmp("ivinfo", s + 7) == 0) + return PIDX_KDF_PARAM_X942_SUPP_PRIVINFO; + break; + case 'u': + if (strcmp("binfo", s + 7) == 0) + return PIDX_KDF_PARAM_X942_SUPP_PUBINFO; + } + } + } + } + } + } + break; + case 't': + switch(s[1]) { + default: + break; + case 'a': + switch(s[2]) { + default: + break; + case 'g': + switch(s[3]) { + default: + break; + case 'l': + if (strcmp("en", s + 4) == 0) + return PIDX_CIPHER_PARAM_AEAD_TAGLEN; + break; + case '\0': + return PIDX_CIPHER_PARAM_AEAD_TAG; + } + } + break; + case 'd': + if (strcmp("es-encrypt-disabled", s + 2) == 0) + return PIDX_PROV_PARAM_TDES_ENCRYPT_DISABLED; + break; + case 'e': + switch(s[2]) { + default: + break; + case 's': + switch(s[3]) { + default: + break; + case 't': + switch(s[4]) { + default: + break; + case '-': + if (strcmp("entropy", s + 5) == 0) + return PIDX_SIGNATURE_PARAM_TEST_ENTROPY; + break; + case '_': + switch(s[5]) { + default: + break; + case 'e': + if (strcmp("ntropy", s + 6) == 0) + return PIDX_RAND_PARAM_TEST_ENTROPY; + break; + case 'n': + if (strcmp("once", s + 6) == 0) + return PIDX_RAND_PARAM_TEST_NONCE; + } + } + } + } + break; + case 'h': + if (strcmp("reads", s + 2) == 0) + return PIDX_KDF_PARAM_THREADS; + break; + case 'l': + switch(s[2]) { + default: + break; + case 's': + switch(s[3]) { + default: + break; + case '-': + switch(s[4]) { + default: + break; + case 'c': + if (strcmp("lient-version", s + 5) == 0) + return PIDX_ASYM_CIPHER_PARAM_TLS_CLIENT_VERSION; + break; + case 'd': + if (strcmp("ata-size", s + 5) == 0) + return PIDX_MAC_PARAM_TLS_DATA_SIZE; + break; + case 'g': + switch(s[5]) { + default: + break; + case 'r': + switch(s[6]) { + default: + break; + case 'o': + switch(s[7]) { + default: + break; + case 'u': + switch(s[8]) { + default: + break; + case 'p': + switch(s[9]) { + default: + break; + case '-': + switch(s[10]) { + default: + break; + case 'a': + if (strcmp("lg", s + 11) == 0) + return PIDX_CAPABILITY_TLS_GROUP_ALG; + break; + case 'i': + switch(s[11]) { + default: + break; + case 'd': + switch(s[12]) { + default: + break; + case '\0': + return PIDX_CAPABILITY_TLS_GROUP_ID; + } + break; + case 's': + if (strcmp("-kem", s + 12) == 0) + return PIDX_CAPABILITY_TLS_GROUP_IS_KEM; + } + break; + case 'n': + switch(s[11]) { + default: + break; + case 'a': + switch(s[12]) { + default: + break; + case 'm': + switch(s[13]) { + default: + break; + case 'e': + switch(s[14]) { + default: + break; + case '-': + if (strcmp("internal", s + 15) == 0) + return PIDX_CAPABILITY_TLS_GROUP_NAME_INTERNAL; + break; + case '\0': + return PIDX_CAPABILITY_TLS_GROUP_NAME; + } + } + } + } + break; + case 's': + if (strcmp("ec-bits", s + 11) == 0) + return PIDX_CAPABILITY_TLS_GROUP_SECURITY_BITS; + } + } + } + } + } + } + break; + case 'm': + switch(s[5]) { + default: + break; + case 'a': + switch(s[6]) { + default: + break; + case 'c': + switch(s[7]) { + default: + break; + case '-': + if (strcmp("size", s + 8) == 0) + return PIDX_CIPHER_PARAM_TLS_MAC_SIZE; + break; + case '\0': + return PIDX_CIPHER_PARAM_TLS_MAC; + } + break; + case 'x': + switch(s[7]) { + default: + break; + case '-': + switch(s[8]) { + default: + break; + case 'd': + if (strcmp("tls", s + 9) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_MAX_DTLS; + break; + case 't': + if (strcmp("ls", s + 9) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_MAX_TLS; + } + } + } + break; + case 'i': + switch(s[6]) { + default: + break; + case 'n': + switch(s[7]) { + default: + break; + case '-': + switch(s[8]) { + default: + break; + case 'd': + if (strcmp("tls", s + 9) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_MIN_DTLS; + break; + case 't': + if (strcmp("ls", s + 9) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_MIN_TLS; + } + } + } + break; + case 'u': + if (strcmp("lti", s + 6) == 0) + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK; + } + break; + case 'n': + if (strcmp("egotiated-version", s + 5) == 0) + return PIDX_ASYM_CIPHER_PARAM_TLS_NEGOTIATED_VERSION; + break; + case 's': + switch(s[5]) { + default: + break; + case 'i': + switch(s[6]) { + default: + break; + case 'g': + switch(s[7]) { + default: + break; + case 'a': + switch(s[8]) { + default: + break; + case 'l': + switch(s[9]) { + default: + break; + case 'g': + switch(s[10]) { + default: + break; + case '-': + switch(s[11]) { + default: + break; + case 'c': + if (strcmp("ode-point", s + 12) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_CODE_POINT; + break; + case 'h': + switch(s[12]) { + default: + break; + case 'a': + switch(s[13]) { + default: + break; + case 's': + switch(s[14]) { + default: + break; + case 'h': + switch(s[15]) { + default: + break; + case '-': + switch(s[16]) { + default: + break; + case 'n': + if (strcmp("ame", s + 17) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_HASH_NAME; + break; + case 'o': + if (strcmp("id", s + 17) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_HASH_OID; + } + } + } + } + } + break; + case 'i': + if (strcmp("ana-name", s + 12) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_IANA_NAME; + break; + case 'k': + switch(s[12]) { + default: + break; + case 'e': + switch(s[13]) { + default: + break; + case 'y': + switch(s[14]) { + default: + break; + case 't': + switch(s[15]) { + default: + break; + case 'y': + switch(s[16]) { + default: + break; + case 'p': + switch(s[17]) { + default: + break; + case 'e': + switch(s[18]) { + default: + break; + case '-': + if (strcmp("oid", s + 19) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_KEYTYPE_OID; + break; + case '\0': + return PIDX_CAPABILITY_TLS_SIGALG_KEYTYPE; + } + } + } + } + } + } + } + break; + case 'n': + if (strcmp("ame", s + 12) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_NAME; + break; + case 'o': + if (strcmp("id", s + 12) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_OID; + break; + case 's': + switch(s[12]) { + default: + break; + case 'e': + if (strcmp("c-bits", s + 13) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_SECURITY_BITS; + break; + case 'i': + switch(s[13]) { + default: + break; + case 'g': + switch(s[14]) { + default: + break; + case '-': + switch(s[15]) { + default: + break; + case 'n': + if (strcmp("ame", s + 16) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_SIG_NAME; + break; + case 'o': + if (strcmp("id", s + 16) == 0) + return PIDX_CAPABILITY_TLS_SIGALG_SIG_OID; + } + } + } + } + } + } + } + } + } + } + } + break; + case 'v': + if (strcmp("ersion", s + 5) == 0) + return PIDX_CIPHER_PARAM_TLS_VERSION; + } + break; + case '1': + switch(s[4]) { + default: + break; + case '-': + switch(s[5]) { + default: + break; + case 'p': + switch(s[6]) { + default: + break; + case 'r': + switch(s[7]) { + default: + break; + case 'f': + switch(s[8]) { + default: + break; + case '-': + switch(s[9]) { + default: + break; + case 'd': + if (strcmp("igest-check", s + 10) == 0) + return PIDX_PROV_PARAM_TLS1_PRF_DIGEST_CHECK; + break; + case 'e': + if (strcmp("ms-check", s + 10) == 0) + return PIDX_PROV_PARAM_TLS1_PRF_EMS_CHECK; + break; + case 'k': + if (strcmp("ey-check", s + 10) == 0) + return PIDX_PROV_PARAM_TLS1_PRF_KEY_CHECK; + } + } + } + } + } + break; + case '3': + switch(s[5]) { + default: + break; + case '-': + switch(s[6]) { + default: + break; + case 'k': + switch(s[7]) { + default: + break; + case 'd': + switch(s[8]) { + default: + break; + case 'f': + switch(s[9]) { + default: + break; + case '-': + switch(s[10]) { + default: + break; + case 'd': + if (strcmp("igest-check", s + 11) == 0) + return PIDX_PROV_PARAM_TLS13_KDF_DIGEST_CHECK; + break; + case 'k': + if (strcmp("ey-check", s + 11) == 0) + return PIDX_PROV_PARAM_TLS13_KDF_KEY_CHECK; + } + } + } + } + } + } + break; + case 'm': + switch(s[5]) { + default: + break; + case 'u': + switch(s[6]) { + default: + break; + case 'l': + switch(s[7]) { + default: + break; + case 't': + switch(s[8]) { + default: + break; + case 'i': + switch(s[9]) { + default: + break; + case '_': + switch(s[10]) { + default: + break; + case 'a': + switch(s[11]) { + default: + break; + case 'a': + switch(s[12]) { + default: + break; + case 'd': + switch(s[13]) { + default: + break; + case 'p': + if (strcmp("acklen", s + 14) == 0) + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD_PACKLEN; + break; + case '\0': + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_AAD; + } + } + } + break; + case 'e': + switch(s[11]) { + default: + break; + case 'n': + switch(s[12]) { + default: + break; + case 'c': + switch(s[13]) { + default: + break; + case 'i': + if (strcmp("n", s + 14) == 0) + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_IN; + break; + case 'l': + if (strcmp("en", s + 14) == 0) + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC_LEN; + break; + case '\0': + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_ENC; + } + } + } + break; + case 'i': + if (strcmp("nterleave", s + 11) == 0) + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_INTERLEAVE; + break; + case 'm': + switch(s[11]) { + default: + break; + case 'a': + switch(s[12]) { + default: + break; + case 'x': + switch(s[13]) { + default: + break; + case 'b': + if (strcmp("ufsz", s + 14) == 0) + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_BUFSIZE; + break; + case 's': + if (strcmp("ndfrag", s + 14) == 0) + return PIDX_CIPHER_PARAM_TLS1_MULTIBLOCK_MAX_SEND_FRAGMENT; + } + } + } + } + } + } + } + } + } + } + break; + case 'a': + switch(s[4]) { + default: + break; + case 'a': + switch(s[5]) { + default: + break; + case 'd': + switch(s[6]) { + default: + break; + case 'p': + if (strcmp("ad", s + 7) == 0) + return PIDX_CIPHER_PARAM_AEAD_TLS1_AAD_PAD; + break; + case '\0': + return PIDX_CIPHER_PARAM_AEAD_TLS1_AAD; + } + } + } + break; + case 'i': + switch(s[4]) { + default: + break; + case 'v': + switch(s[5]) { + default: + break; + case 'f': + if (strcmp("ixed", s + 6) == 0) + return PIDX_CIPHER_PARAM_AEAD_TLS1_IV_FIXED; + break; + case 'g': + if (strcmp("en", s + 6) == 0) + return PIDX_CIPHER_PARAM_AEAD_TLS1_GET_IV_GEN; + break; + case 'i': + if (strcmp("nv", s + 6) == 0) + return PIDX_CIPHER_PARAM_AEAD_TLS1_SET_IV_INV; + } + } + break; + case 't': + if (strcmp("ree", s + 4) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_TLSTREE; + } + } + break; + case 'p': + switch(s[2]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_EC_CHAR2_TP_BASIS; + } + break; + case 'y': + if (strcmp("pe", s + 2) == 0) + return PIDX_PKEY_PARAM_FFC_TYPE; + } + break; + case 'u': + switch(s[1]) { + default: + break; + case 'k': + if (strcmp("m", s + 2) == 0) + return PIDX_KDF_PARAM_UKM; + break; + case 'p': + if (strcmp("dated-iv", s + 2) == 0) + return PIDX_CIPHER_PARAM_UPDATED_IV; + break; + case 's': + switch(s[2]) { + default: + break; + case 'e': + switch(s[3]) { + default: + break; + case '-': + switch(s[4]) { + default: + break; + case 'b': + if (strcmp("its", s + 5) == 0) + return PIDX_CIPHER_PARAM_USE_BITS; + break; + case 'c': + if (strcmp("ofactor-flag", s + 5) == 0) + return PIDX_PKEY_PARAM_USE_COFACTOR_FLAG; + break; + case 'k': + if (strcmp("eybits", s + 5) == 0) + return PIDX_KDF_PARAM_X942_USE_KEYBITS; + break; + case 'l': + switch(s[5]) { + default: + break; + case '\0': + return PIDX_KDF_PARAM_KBKDF_USE_L; + } + break; + case 's': + if (strcmp("eparator", s + 5) == 0) + return PIDX_KDF_PARAM_KBKDF_USE_SEPARATOR; + } + break; + case '_': + switch(s[4]) { + default: + break; + case 'd': + if (strcmp("erivation_function", s + 5) == 0) + return PIDX_DRBG_PARAM_USE_DF; + break; + case 'e': + if (strcmp("tm", s + 5) == 0) + return PIDX_LIBSSL_RECORD_LAYER_PARAM_USE_ETM; + } + } + } + } + break; + case 'v': + switch(s[1]) { + default: + break; + case 'a': + switch(s[2]) { + default: + break; + case 'l': + switch(s[3]) { + default: + break; + case 'i': + switch(s[4]) { + default: + break; + case 'd': + switch(s[5]) { + default: + break; + case 'a': + switch(s[6]) { + default: + break; + case 't': + switch(s[7]) { + default: + break; + case 'e': + switch(s[8]) { + default: + break; + case '-': + switch(s[9]) { + default: + break; + case 'g': + switch(s[10]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_FFC_VALIDATE_G; + } + break; + case 'l': + if (strcmp("egacy", s + 10) == 0) + return PIDX_PKEY_PARAM_FFC_VALIDATE_LEGACY; + break; + case 'p': + if (strcmp("q", s + 10) == 0) + return PIDX_PKEY_PARAM_FFC_VALIDATE_PQ; + } + } + } + } + } + } + } + } + break; + case 'e': + switch(s[2]) { + default: + break; + case 'r': + switch(s[3]) { + default: + break; + case 'i': + if (strcmp("fy-message", s + 4) == 0) + return PIDX_SIGNATURE_PARAM_FIPS_VERIFY_MESSAGE; + break; + case 's': + if (strcmp("ion", s + 4) == 0) + return PIDX_PROV_PARAM_VERSION; + } + } + } + break; + case 'x': + switch(s[1]) { + default: + break; + case '9': + switch(s[2]) { + default: + break; + case '4': + if (strcmp("2kdf-key-check", s + 3) == 0) + return PIDX_PROV_PARAM_X942KDF_KEY_CHECK; + break; + case '6': + switch(s[3]) { + default: + break; + case '3': + switch(s[4]) { + default: + break; + case 'k': + switch(s[5]) { + default: + break; + case 'd': + switch(s[6]) { + default: + break; + case 'f': + switch(s[7]) { + default: + break; + case '-': + switch(s[8]) { + default: + break; + case 'd': + if (strcmp("igest-check", s + 9) == 0) + return PIDX_PROV_PARAM_X963KDF_DIGEST_CHECK; + break; + case 'k': + if (strcmp("ey-check", s + 9) == 0) + return PIDX_PROV_PARAM_X963KDF_KEY_CHECK; + } + } + } + } + } + } + } + break; + case 'c': + if (strcmp("ghash", s + 2) == 0) + return PIDX_KDF_PARAM_SSHKDF_XCGHASH; + break; + case 'o': + switch(s[2]) { + default: + break; + case 'f': + switch(s[3]) { + default: + break; + case 'l': + if (strcmp("en", s + 4) == 0) + return PIDX_DIGEST_PARAM_XOFLEN; + break; + case '\0': + return PIDX_MAC_PARAM_XOF; + } + } + break; + case 'p': + switch(s[2]) { + default: + break; + case '1': + switch(s[3]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_XP1; + } + break; + case '2': + switch(s[3]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_XP2; + } + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_XP; + } + break; + case 'q': + switch(s[2]) { + default: + break; + case '1': + switch(s[3]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_XQ1; + } + break; + case '2': + switch(s[3]) { + default: + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_XQ2; + } + break; + case '\0': + return PIDX_PKEY_PARAM_RSA_TEST_XQ; + } + break; + case 't': + if (strcmp("s_standard", s + 2) == 0) + return PIDX_CIPHER_PARAM_XTS_STANDARD; + } + } + return -1; +} + +/* End of TRIE */ diff --git a/contrib/openssl-cmake/common/providers/der_digests_gen.c b/contrib/openssl-cmake/common/providers/der_digests_gen.c new file mode 100644 index 000000000000..e4e14e82e564 --- /dev/null +++ b/contrib/openssl-cmake/common/providers/der_digests_gen.c @@ -0,0 +1,160 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/der/der_digests_gen.c.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "prov/der_digests.h" + +/* Well known OIDs precompiled */ + +/* + * sigAlgs OBJECT IDENTIFIER ::= { nistAlgorithms 3 } + */ +const unsigned char ossl_der_oid_sigAlgs[DER_OID_SZ_sigAlgs] = { + DER_OID_V_sigAlgs +}; + +/* + * id-sha1 OBJECT IDENTIFIER ::= { iso(1) + * identified-organization(3) oiw(14) + * secsig(3) algorithms(2) 26 } + */ +const unsigned char ossl_der_oid_id_sha1[DER_OID_SZ_id_sha1] = { + DER_OID_V_id_sha1 +}; + +/* + * id-md2 OBJECT IDENTIFIER ::= { + * iso(1) member-body(2) us(840) rsadsi(113549) digestAlgorithm(2) 2 } + */ +const unsigned char ossl_der_oid_id_md2[DER_OID_SZ_id_md2] = { + DER_OID_V_id_md2 +}; + +/* + * id-md5 OBJECT IDENTIFIER ::= { + * iso(1) member-body(2) us(840) rsadsi(113549) digestAlgorithm(2) 5 } + */ +const unsigned char ossl_der_oid_id_md5[DER_OID_SZ_id_md5] = { + DER_OID_V_id_md5 +}; + +/* + * id-sha256 OBJECT IDENTIFIER ::= { hashAlgs 1 } + */ +const unsigned char ossl_der_oid_id_sha256[DER_OID_SZ_id_sha256] = { + DER_OID_V_id_sha256 +}; + +/* + * id-sha384 OBJECT IDENTIFIER ::= { hashAlgs 2 } + */ +const unsigned char ossl_der_oid_id_sha384[DER_OID_SZ_id_sha384] = { + DER_OID_V_id_sha384 +}; + +/* + * id-sha512 OBJECT IDENTIFIER ::= { hashAlgs 3 } + */ +const unsigned char ossl_der_oid_id_sha512[DER_OID_SZ_id_sha512] = { + DER_OID_V_id_sha512 +}; + +/* + * id-sha224 OBJECT IDENTIFIER ::= { hashAlgs 4 } + */ +const unsigned char ossl_der_oid_id_sha224[DER_OID_SZ_id_sha224] = { + DER_OID_V_id_sha224 +}; + +/* + * id-sha512-224 OBJECT IDENTIFIER ::= { hashAlgs 5 } + */ +const unsigned char ossl_der_oid_id_sha512_224[DER_OID_SZ_id_sha512_224] = { + DER_OID_V_id_sha512_224 +}; + +/* + * id-sha512-256 OBJECT IDENTIFIER ::= { hashAlgs 6 } + */ +const unsigned char ossl_der_oid_id_sha512_256[DER_OID_SZ_id_sha512_256] = { + DER_OID_V_id_sha512_256 +}; + +/* + * id-sha3-224 OBJECT IDENTIFIER ::= { hashAlgs 7 } + */ +const unsigned char ossl_der_oid_id_sha3_224[DER_OID_SZ_id_sha3_224] = { + DER_OID_V_id_sha3_224 +}; + +/* + * id-sha3-256 OBJECT IDENTIFIER ::= { hashAlgs 8 } + */ +const unsigned char ossl_der_oid_id_sha3_256[DER_OID_SZ_id_sha3_256] = { + DER_OID_V_id_sha3_256 +}; + +/* + * id-sha3-384 OBJECT IDENTIFIER ::= { hashAlgs 9 } + */ +const unsigned char ossl_der_oid_id_sha3_384[DER_OID_SZ_id_sha3_384] = { + DER_OID_V_id_sha3_384 +}; + +/* + * id-sha3-512 OBJECT IDENTIFIER ::= { hashAlgs 10 } + */ +const unsigned char ossl_der_oid_id_sha3_512[DER_OID_SZ_id_sha3_512] = { + DER_OID_V_id_sha3_512 +}; + +/* + * id-shake128 OBJECT IDENTIFIER ::= { hashAlgs 11 } + */ +const unsigned char ossl_der_oid_id_shake128[DER_OID_SZ_id_shake128] = { + DER_OID_V_id_shake128 +}; + +/* + * id-shake256 OBJECT IDENTIFIER ::= { hashAlgs 12 } + */ +const unsigned char ossl_der_oid_id_shake256[DER_OID_SZ_id_shake256] = { + DER_OID_V_id_shake256 +}; + +/* + * id-shake128-len OBJECT IDENTIFIER ::= { hashAlgs 17 } + */ +const unsigned char ossl_der_oid_id_shake128_len[DER_OID_SZ_id_shake128_len] = { + DER_OID_V_id_shake128_len +}; + +/* + * id-shake256-len OBJECT IDENTIFIER ::= { hashAlgs 18 } + */ +const unsigned char ossl_der_oid_id_shake256_len[DER_OID_SZ_id_shake256_len] = { + DER_OID_V_id_shake256_len +}; + +/* + * id-KMACWithSHAKE128 OBJECT IDENTIFIER ::={hashAlgs 19} + */ +const unsigned char ossl_der_oid_id_KMACWithSHAKE128[DER_OID_SZ_id_KMACWithSHAKE128] = { + DER_OID_V_id_KMACWithSHAKE128 +}; + +/* + * id-KMACWithSHAKE256 OBJECT IDENTIFIER ::={ hashAlgs 20} + */ +const unsigned char ossl_der_oid_id_KMACWithSHAKE256[DER_OID_SZ_id_KMACWithSHAKE256] = { + DER_OID_V_id_KMACWithSHAKE256 +}; + diff --git a/contrib/openssl-cmake/common/providers/der_dsa_gen.c b/contrib/openssl-cmake/common/providers/der_dsa_gen.c new file mode 100644 index 000000000000..e5cfe91e0f25 --- /dev/null +++ b/contrib/openssl-cmake/common/providers/der_dsa_gen.c @@ -0,0 +1,94 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/der/der_dsa_gen.c.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +/* + * DSA low level APIs are deprecated for public use, but still ok for + * internal use. + */ +#include "internal/deprecated.h" + +#include "prov/der_dsa.h" + +/* Well known OIDs precompiled */ + +/* + * id-dsa OBJECT IDENTIFIER ::= { + * iso(1) member-body(2) us(840) x9-57(10040) x9algorithm(4) 1 } + */ +const unsigned char ossl_der_oid_id_dsa[DER_OID_SZ_id_dsa] = { + DER_OID_V_id_dsa +}; + +/* + * id-dsa-with-sha1 OBJECT IDENTIFIER ::= { + * iso(1) member-body(2) us(840) x9-57 (10040) x9algorithm(4) 3 } + */ +const unsigned char ossl_der_oid_id_dsa_with_sha1[DER_OID_SZ_id_dsa_with_sha1] = { + DER_OID_V_id_dsa_with_sha1 +}; + +/* + * id-dsa-with-sha224 OBJECT IDENTIFIER ::= { sigAlgs 1 } + */ +const unsigned char ossl_der_oid_id_dsa_with_sha224[DER_OID_SZ_id_dsa_with_sha224] = { + DER_OID_V_id_dsa_with_sha224 +}; + +/* + * id-dsa-with-sha256 OBJECT IDENTIFIER ::= { sigAlgs 2 } + */ +const unsigned char ossl_der_oid_id_dsa_with_sha256[DER_OID_SZ_id_dsa_with_sha256] = { + DER_OID_V_id_dsa_with_sha256 +}; + +/* + * id-dsa-with-sha384 OBJECT IDENTIFIER ::= { sigAlgs 3 } + */ +const unsigned char ossl_der_oid_id_dsa_with_sha384[DER_OID_SZ_id_dsa_with_sha384] = { + DER_OID_V_id_dsa_with_sha384 +}; + +/* + * id-dsa-with-sha512 OBJECT IDENTIFIER ::= { sigAlgs 4 } + */ +const unsigned char ossl_der_oid_id_dsa_with_sha512[DER_OID_SZ_id_dsa_with_sha512] = { + DER_OID_V_id_dsa_with_sha512 +}; + +/* + * id-dsa-with-sha3-224 OBJECT IDENTIFIER ::= { sigAlgs 5 } + */ +const unsigned char ossl_der_oid_id_dsa_with_sha3_224[DER_OID_SZ_id_dsa_with_sha3_224] = { + DER_OID_V_id_dsa_with_sha3_224 +}; + +/* + * id-dsa-with-sha3-256 OBJECT IDENTIFIER ::= { sigAlgs 6 } + */ +const unsigned char ossl_der_oid_id_dsa_with_sha3_256[DER_OID_SZ_id_dsa_with_sha3_256] = { + DER_OID_V_id_dsa_with_sha3_256 +}; + +/* + * id-dsa-with-sha3-384 OBJECT IDENTIFIER ::= { sigAlgs 7 } + */ +const unsigned char ossl_der_oid_id_dsa_with_sha3_384[DER_OID_SZ_id_dsa_with_sha3_384] = { + DER_OID_V_id_dsa_with_sha3_384 +}; + +/* + * id-dsa-with-sha3-512 OBJECT IDENTIFIER ::= { sigAlgs 8 } + */ +const unsigned char ossl_der_oid_id_dsa_with_sha3_512[DER_OID_SZ_id_dsa_with_sha3_512] = { + DER_OID_V_id_dsa_with_sha3_512 +}; + diff --git a/contrib/openssl-cmake/common/providers/der_ec_gen.c b/contrib/openssl-cmake/common/providers/der_ec_gen.c new file mode 100644 index 000000000000..e1ed54ba05b6 --- /dev/null +++ b/contrib/openssl-cmake/common/providers/der_ec_gen.c @@ -0,0 +1,279 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/der/der_ec_gen.c.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "prov/der_ec.h" + +/* Well known OIDs precompiled */ + +/* + * ecdsa-with-SHA1 OBJECT IDENTIFIER ::= { id-ecSigType 1 } + */ +const unsigned char ossl_der_oid_ecdsa_with_SHA1[DER_OID_SZ_ecdsa_with_SHA1] = { + DER_OID_V_ecdsa_with_SHA1 +}; + +/* + * id-ecPublicKey OBJECT IDENTIFIER ::= { id-publicKeyType 1 } + */ +const unsigned char ossl_der_oid_id_ecPublicKey[DER_OID_SZ_id_ecPublicKey] = { + DER_OID_V_id_ecPublicKey +}; + +/* + * c2pnb163v1 OBJECT IDENTIFIER ::= { c-TwoCurve 1 } + */ +const unsigned char ossl_der_oid_c2pnb163v1[DER_OID_SZ_c2pnb163v1] = { + DER_OID_V_c2pnb163v1 +}; + +/* + * c2pnb163v2 OBJECT IDENTIFIER ::= { c-TwoCurve 2 } + */ +const unsigned char ossl_der_oid_c2pnb163v2[DER_OID_SZ_c2pnb163v2] = { + DER_OID_V_c2pnb163v2 +}; + +/* + * c2pnb163v3 OBJECT IDENTIFIER ::= { c-TwoCurve 3 } + */ +const unsigned char ossl_der_oid_c2pnb163v3[DER_OID_SZ_c2pnb163v3] = { + DER_OID_V_c2pnb163v3 +}; + +/* + * c2pnb176w1 OBJECT IDENTIFIER ::= { c-TwoCurve 4 } + */ +const unsigned char ossl_der_oid_c2pnb176w1[DER_OID_SZ_c2pnb176w1] = { + DER_OID_V_c2pnb176w1 +}; + +/* + * c2tnb191v1 OBJECT IDENTIFIER ::= { c-TwoCurve 5 } + */ +const unsigned char ossl_der_oid_c2tnb191v1[DER_OID_SZ_c2tnb191v1] = { + DER_OID_V_c2tnb191v1 +}; + +/* + * c2tnb191v2 OBJECT IDENTIFIER ::= { c-TwoCurve 6 } + */ +const unsigned char ossl_der_oid_c2tnb191v2[DER_OID_SZ_c2tnb191v2] = { + DER_OID_V_c2tnb191v2 +}; + +/* + * c2tnb191v3 OBJECT IDENTIFIER ::= { c-TwoCurve 7 } + */ +const unsigned char ossl_der_oid_c2tnb191v3[DER_OID_SZ_c2tnb191v3] = { + DER_OID_V_c2tnb191v3 +}; + +/* + * c2onb191v4 OBJECT IDENTIFIER ::= { c-TwoCurve 8 } + */ +const unsigned char ossl_der_oid_c2onb191v4[DER_OID_SZ_c2onb191v4] = { + DER_OID_V_c2onb191v4 +}; + +/* + * c2onb191v5 OBJECT IDENTIFIER ::= { c-TwoCurve 9 } + */ +const unsigned char ossl_der_oid_c2onb191v5[DER_OID_SZ_c2onb191v5] = { + DER_OID_V_c2onb191v5 +}; + +/* + * c2pnb208w1 OBJECT IDENTIFIER ::= { c-TwoCurve 10 } + */ +const unsigned char ossl_der_oid_c2pnb208w1[DER_OID_SZ_c2pnb208w1] = { + DER_OID_V_c2pnb208w1 +}; + +/* + * c2tnb239v1 OBJECT IDENTIFIER ::= { c-TwoCurve 11 } + */ +const unsigned char ossl_der_oid_c2tnb239v1[DER_OID_SZ_c2tnb239v1] = { + DER_OID_V_c2tnb239v1 +}; + +/* + * c2tnb239v2 OBJECT IDENTIFIER ::= { c-TwoCurve 12 } + */ +const unsigned char ossl_der_oid_c2tnb239v2[DER_OID_SZ_c2tnb239v2] = { + DER_OID_V_c2tnb239v2 +}; + +/* + * c2tnb239v3 OBJECT IDENTIFIER ::= { c-TwoCurve 13 } + */ +const unsigned char ossl_der_oid_c2tnb239v3[DER_OID_SZ_c2tnb239v3] = { + DER_OID_V_c2tnb239v3 +}; + +/* + * c2onb239v4 OBJECT IDENTIFIER ::= { c-TwoCurve 14 } + */ +const unsigned char ossl_der_oid_c2onb239v4[DER_OID_SZ_c2onb239v4] = { + DER_OID_V_c2onb239v4 +}; + +/* + * c2onb239v5 OBJECT IDENTIFIER ::= { c-TwoCurve 15 } + */ +const unsigned char ossl_der_oid_c2onb239v5[DER_OID_SZ_c2onb239v5] = { + DER_OID_V_c2onb239v5 +}; + +/* + * c2pnb272w1 OBJECT IDENTIFIER ::= { c-TwoCurve 16 } + */ +const unsigned char ossl_der_oid_c2pnb272w1[DER_OID_SZ_c2pnb272w1] = { + DER_OID_V_c2pnb272w1 +}; + +/* + * c2pnb304w1 OBJECT IDENTIFIER ::= { c-TwoCurve 17 } + */ +const unsigned char ossl_der_oid_c2pnb304w1[DER_OID_SZ_c2pnb304w1] = { + DER_OID_V_c2pnb304w1 +}; + +/* + * c2tnb359v1 OBJECT IDENTIFIER ::= { c-TwoCurve 18 } + */ +const unsigned char ossl_der_oid_c2tnb359v1[DER_OID_SZ_c2tnb359v1] = { + DER_OID_V_c2tnb359v1 +}; + +/* + * c2pnb368w1 OBJECT IDENTIFIER ::= { c-TwoCurve 19 } + */ +const unsigned char ossl_der_oid_c2pnb368w1[DER_OID_SZ_c2pnb368w1] = { + DER_OID_V_c2pnb368w1 +}; + +/* + * c2tnb431r1 OBJECT IDENTIFIER ::= { c-TwoCurve 20 } + */ +const unsigned char ossl_der_oid_c2tnb431r1[DER_OID_SZ_c2tnb431r1] = { + DER_OID_V_c2tnb431r1 +}; + +/* + * prime192v1 OBJECT IDENTIFIER ::= { primeCurve 1 } + */ +const unsigned char ossl_der_oid_prime192v1[DER_OID_SZ_prime192v1] = { + DER_OID_V_prime192v1 +}; + +/* + * prime192v2 OBJECT IDENTIFIER ::= { primeCurve 2 } + */ +const unsigned char ossl_der_oid_prime192v2[DER_OID_SZ_prime192v2] = { + DER_OID_V_prime192v2 +}; + +/* + * prime192v3 OBJECT IDENTIFIER ::= { primeCurve 3 } + */ +const unsigned char ossl_der_oid_prime192v3[DER_OID_SZ_prime192v3] = { + DER_OID_V_prime192v3 +}; + +/* + * prime239v1 OBJECT IDENTIFIER ::= { primeCurve 4 } + */ +const unsigned char ossl_der_oid_prime239v1[DER_OID_SZ_prime239v1] = { + DER_OID_V_prime239v1 +}; + +/* + * prime239v2 OBJECT IDENTIFIER ::= { primeCurve 5 } + */ +const unsigned char ossl_der_oid_prime239v2[DER_OID_SZ_prime239v2] = { + DER_OID_V_prime239v2 +}; + +/* + * prime239v3 OBJECT IDENTIFIER ::= { primeCurve 6 } + */ +const unsigned char ossl_der_oid_prime239v3[DER_OID_SZ_prime239v3] = { + DER_OID_V_prime239v3 +}; + +/* + * prime256v1 OBJECT IDENTIFIER ::= { primeCurve 7 } + */ +const unsigned char ossl_der_oid_prime256v1[DER_OID_SZ_prime256v1] = { + DER_OID_V_prime256v1 +}; + +/* + * ecdsa-with-SHA224 OBJECT IDENTIFIER ::= { iso(1) member-body(2) + * us(840) ansi-X9-62(10045) signatures(4) ecdsa-with-SHA2(3) 1 } + */ +const unsigned char ossl_der_oid_ecdsa_with_SHA224[DER_OID_SZ_ecdsa_with_SHA224] = { + DER_OID_V_ecdsa_with_SHA224 +}; + +/* + * ecdsa-with-SHA256 OBJECT IDENTIFIER ::= { iso(1) member-body(2) + * us(840) ansi-X9-62(10045) signatures(4) ecdsa-with-SHA2(3) 2 } + */ +const unsigned char ossl_der_oid_ecdsa_with_SHA256[DER_OID_SZ_ecdsa_with_SHA256] = { + DER_OID_V_ecdsa_with_SHA256 +}; + +/* + * ecdsa-with-SHA384 OBJECT IDENTIFIER ::= { iso(1) member-body(2) + * us(840) ansi-X9-62(10045) signatures(4) ecdsa-with-SHA2(3) 3 } + */ +const unsigned char ossl_der_oid_ecdsa_with_SHA384[DER_OID_SZ_ecdsa_with_SHA384] = { + DER_OID_V_ecdsa_with_SHA384 +}; + +/* + * ecdsa-with-SHA512 OBJECT IDENTIFIER ::= { iso(1) member-body(2) + * us(840) ansi-X9-62(10045) signatures(4) ecdsa-with-SHA2(3) 4 } + */ +const unsigned char ossl_der_oid_ecdsa_with_SHA512[DER_OID_SZ_ecdsa_with_SHA512] = { + DER_OID_V_ecdsa_with_SHA512 +}; + +/* + * id-ecdsa-with-sha3-224 OBJECT IDENTIFIER ::= { sigAlgs 9 } + */ +const unsigned char ossl_der_oid_id_ecdsa_with_sha3_224[DER_OID_SZ_id_ecdsa_with_sha3_224] = { + DER_OID_V_id_ecdsa_with_sha3_224 +}; + +/* + * id-ecdsa-with-sha3-256 OBJECT IDENTIFIER ::= { sigAlgs 10 } + */ +const unsigned char ossl_der_oid_id_ecdsa_with_sha3_256[DER_OID_SZ_id_ecdsa_with_sha3_256] = { + DER_OID_V_id_ecdsa_with_sha3_256 +}; + +/* + * id-ecdsa-with-sha3-384 OBJECT IDENTIFIER ::= { sigAlgs 11 } + */ +const unsigned char ossl_der_oid_id_ecdsa_with_sha3_384[DER_OID_SZ_id_ecdsa_with_sha3_384] = { + DER_OID_V_id_ecdsa_with_sha3_384 +}; + +/* + * id-ecdsa-with-sha3-512 OBJECT IDENTIFIER ::= { sigAlgs 12 } + */ +const unsigned char ossl_der_oid_id_ecdsa_with_sha3_512[DER_OID_SZ_id_ecdsa_with_sha3_512] = { + DER_OID_V_id_ecdsa_with_sha3_512 +}; + diff --git a/contrib/openssl-cmake/common/providers/der_ecx_gen.c b/contrib/openssl-cmake/common/providers/der_ecx_gen.c new file mode 100644 index 000000000000..ba7bf14b5e15 --- /dev/null +++ b/contrib/openssl-cmake/common/providers/der_ecx_gen.c @@ -0,0 +1,44 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/der/der_ecx_gen.c.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "prov/der_ecx.h" + +/* Well known OIDs precompiled */ + +/* + * id-X25519 OBJECT IDENTIFIER ::= { id-edwards-curve-algs 110 } + */ +const unsigned char ossl_der_oid_id_X25519[DER_OID_SZ_id_X25519] = { + DER_OID_V_id_X25519 +}; + +/* + * id-X448 OBJECT IDENTIFIER ::= { id-edwards-curve-algs 111 } + */ +const unsigned char ossl_der_oid_id_X448[DER_OID_SZ_id_X448] = { + DER_OID_V_id_X448 +}; + +/* + * id-Ed25519 OBJECT IDENTIFIER ::= { id-edwards-curve-algs 112 } + */ +const unsigned char ossl_der_oid_id_Ed25519[DER_OID_SZ_id_Ed25519] = { + DER_OID_V_id_Ed25519 +}; + +/* + * id-Ed448 OBJECT IDENTIFIER ::= { id-edwards-curve-algs 113 } + */ +const unsigned char ossl_der_oid_id_Ed448[DER_OID_SZ_id_Ed448] = { + DER_OID_V_id_Ed448 +}; + diff --git a/contrib/openssl-cmake/common/providers/der_ml_dsa_gen.c b/contrib/openssl-cmake/common/providers/der_ml_dsa_gen.c new file mode 100644 index 000000000000..4a8a113a2685 --- /dev/null +++ b/contrib/openssl-cmake/common/providers/der_ml_dsa_gen.c @@ -0,0 +1,37 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/der/der_ml_dsa_gen.c.in + * + * Copyright 2025 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "prov/der_ml_dsa.h" + +/* Well known OIDs precompiled */ + +/* + * id-ml-dsa-44 OBJECT IDENTIFIER ::= { sigAlgs 17 } + */ +const unsigned char ossl_der_oid_id_ml_dsa_44[DER_OID_SZ_id_ml_dsa_44] = { + DER_OID_V_id_ml_dsa_44 +}; + +/* + * id-ml-dsa-65 OBJECT IDENTIFIER ::= { sigAlgs 18 } + */ +const unsigned char ossl_der_oid_id_ml_dsa_65[DER_OID_SZ_id_ml_dsa_65] = { + DER_OID_V_id_ml_dsa_65 +}; + +/* + * id-ml-dsa-87 OBJECT IDENTIFIER ::= { sigAlgs 19 } + */ +const unsigned char ossl_der_oid_id_ml_dsa_87[DER_OID_SZ_id_ml_dsa_87] = { + DER_OID_V_id_ml_dsa_87 +}; + diff --git a/contrib/openssl-cmake/common/providers/der_rsa_gen.c b/contrib/openssl-cmake/common/providers/der_rsa_gen.c new file mode 100644 index 000000000000..a3431798402f --- /dev/null +++ b/contrib/openssl-cmake/common/providers/der_rsa_gen.c @@ -0,0 +1,174 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/der/der_rsa_gen.c.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "prov/der_rsa.h" + +/* Well known OIDs precompiled */ + +/* + * hashAlgs OBJECT IDENTIFIER ::= { nistAlgorithms 2 } + */ +const unsigned char ossl_der_oid_hashAlgs[DER_OID_SZ_hashAlgs] = { + DER_OID_V_hashAlgs +}; + +/* + * rsaEncryption OBJECT IDENTIFIER ::= { pkcs-1 1 } + */ +const unsigned char ossl_der_oid_rsaEncryption[DER_OID_SZ_rsaEncryption] = { + DER_OID_V_rsaEncryption +}; + +/* + * id-RSAES-OAEP OBJECT IDENTIFIER ::= { pkcs-1 7 } + */ +const unsigned char ossl_der_oid_id_RSAES_OAEP[DER_OID_SZ_id_RSAES_OAEP] = { + DER_OID_V_id_RSAES_OAEP +}; + +/* + * id-pSpecified OBJECT IDENTIFIER ::= { pkcs-1 9 } + */ +const unsigned char ossl_der_oid_id_pSpecified[DER_OID_SZ_id_pSpecified] = { + DER_OID_V_id_pSpecified +}; + +/* + * id-RSASSA-PSS OBJECT IDENTIFIER ::= { pkcs-1 10 } + */ +const unsigned char ossl_der_oid_id_RSASSA_PSS[DER_OID_SZ_id_RSASSA_PSS] = { + DER_OID_V_id_RSASSA_PSS +}; + +/* + * md2WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 2 } + */ +const unsigned char ossl_der_oid_md2WithRSAEncryption[DER_OID_SZ_md2WithRSAEncryption] = { + DER_OID_V_md2WithRSAEncryption +}; + +/* + * md5WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 4 } + */ +const unsigned char ossl_der_oid_md5WithRSAEncryption[DER_OID_SZ_md5WithRSAEncryption] = { + DER_OID_V_md5WithRSAEncryption +}; + +/* + * sha1WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 5 } + */ +const unsigned char ossl_der_oid_sha1WithRSAEncryption[DER_OID_SZ_sha1WithRSAEncryption] = { + DER_OID_V_sha1WithRSAEncryption +}; + +/* + * sha224WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 14 } + */ +const unsigned char ossl_der_oid_sha224WithRSAEncryption[DER_OID_SZ_sha224WithRSAEncryption] = { + DER_OID_V_sha224WithRSAEncryption +}; + +/* + * sha256WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 11 } + */ +const unsigned char ossl_der_oid_sha256WithRSAEncryption[DER_OID_SZ_sha256WithRSAEncryption] = { + DER_OID_V_sha256WithRSAEncryption +}; + +/* + * sha384WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 12 } + */ +const unsigned char ossl_der_oid_sha384WithRSAEncryption[DER_OID_SZ_sha384WithRSAEncryption] = { + DER_OID_V_sha384WithRSAEncryption +}; + +/* + * sha512WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 13 } + */ +const unsigned char ossl_der_oid_sha512WithRSAEncryption[DER_OID_SZ_sha512WithRSAEncryption] = { + DER_OID_V_sha512WithRSAEncryption +}; + +/* + * sha512-224WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 15 } + */ +const unsigned char ossl_der_oid_sha512_224WithRSAEncryption[DER_OID_SZ_sha512_224WithRSAEncryption] = { + DER_OID_V_sha512_224WithRSAEncryption +}; + +/* + * sha512-256WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 16 } + */ +const unsigned char ossl_der_oid_sha512_256WithRSAEncryption[DER_OID_SZ_sha512_256WithRSAEncryption] = { + DER_OID_V_sha512_256WithRSAEncryption +}; + +/* + * id-mgf1 OBJECT IDENTIFIER ::= { pkcs-1 8 } + */ +const unsigned char ossl_der_oid_id_mgf1[DER_OID_SZ_id_mgf1] = { + DER_OID_V_id_mgf1 +}; + +/* + * id-rsassa-pkcs1-v1_5-with-sha3-224 OBJECT IDENTIFIER ::= { sigAlgs 13 } + */ +const unsigned char ossl_der_oid_id_rsassa_pkcs1_v1_5_with_sha3_224[DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_224] = { + DER_OID_V_id_rsassa_pkcs1_v1_5_with_sha3_224 +}; + +/* + * id-rsassa-pkcs1-v1_5-with-sha3-256 OBJECT IDENTIFIER ::= { sigAlgs 14 } + */ +const unsigned char ossl_der_oid_id_rsassa_pkcs1_v1_5_with_sha3_256[DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_256] = { + DER_OID_V_id_rsassa_pkcs1_v1_5_with_sha3_256 +}; + +/* + * id-rsassa-pkcs1-v1_5-with-sha3-384 OBJECT IDENTIFIER ::= { sigAlgs 15 } + */ +const unsigned char ossl_der_oid_id_rsassa_pkcs1_v1_5_with_sha3_384[DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_384] = { + DER_OID_V_id_rsassa_pkcs1_v1_5_with_sha3_384 +}; + +/* + * id-rsassa-pkcs1-v1_5-with-sha3-512 OBJECT IDENTIFIER ::= { sigAlgs 16 } + */ +const unsigned char ossl_der_oid_id_rsassa_pkcs1_v1_5_with_sha3_512[DER_OID_SZ_id_rsassa_pkcs1_v1_5_with_sha3_512] = { + DER_OID_V_id_rsassa_pkcs1_v1_5_with_sha3_512 +}; + +/* + * md4WithRSAEncryption OBJECT IDENTIFIER ::= { pkcs-1 3 } + */ +const unsigned char ossl_der_oid_md4WithRSAEncryption[DER_OID_SZ_md4WithRSAEncryption] = { + DER_OID_V_md4WithRSAEncryption +}; + +/* + * ripemd160WithRSAEncryption OBJECT IDENTIFIER ::= { + * iso(1) identified-organization(3) teletrust(36) algorithm(3) signatureAlgorithm(3) rsaSignature(1) 2 + * } + */ +const unsigned char ossl_der_oid_ripemd160WithRSAEncryption[DER_OID_SZ_ripemd160WithRSAEncryption] = { + DER_OID_V_ripemd160WithRSAEncryption +}; + +/* + * mdc2WithRSASignature OBJECT IDENTIFIER ::= { + * iso(1) identified-organization(3) oiw(14) secsig(3) algorithms(2) mdc2WithRSASignature(14) + * } + */ +const unsigned char ossl_der_oid_mdc2WithRSASignature[DER_OID_SZ_mdc2WithRSASignature] = { + DER_OID_V_mdc2WithRSASignature +}; + diff --git a/contrib/openssl-cmake/common/providers/der_slh_dsa_gen.c b/contrib/openssl-cmake/common/providers/der_slh_dsa_gen.c new file mode 100644 index 000000000000..1419a9515097 --- /dev/null +++ b/contrib/openssl-cmake/common/providers/der_slh_dsa_gen.c @@ -0,0 +1,100 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/der/der_slh_dsa_gen.c.in + * + * Copyright 2025 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "prov/der_slh_dsa.h" + +/* Well known OIDs precompiled */ + +/* + * id-slh-dsa-sha2-128s OBJECT IDENTIFIER ::= { sigAlgs 20 } + */ +const unsigned char ossl_der_oid_id_slh_dsa_sha2_128s[DER_OID_SZ_id_slh_dsa_sha2_128s] = { + DER_OID_V_id_slh_dsa_sha2_128s +}; + +/* + * id-slh-dsa-sha2-128f OBJECT IDENTIFIER ::= { sigAlgs 21 } + */ +const unsigned char ossl_der_oid_id_slh_dsa_sha2_128f[DER_OID_SZ_id_slh_dsa_sha2_128f] = { + DER_OID_V_id_slh_dsa_sha2_128f +}; + +/* + * id-slh-dsa-sha2-192s OBJECT IDENTIFIER ::= { sigAlgs 22 } + */ +const unsigned char ossl_der_oid_id_slh_dsa_sha2_192s[DER_OID_SZ_id_slh_dsa_sha2_192s] = { + DER_OID_V_id_slh_dsa_sha2_192s +}; + +/* + * id-slh-dsa-sha2-192f OBJECT IDENTIFIER ::= { sigAlgs 23 } + */ +const unsigned char ossl_der_oid_id_slh_dsa_sha2_192f[DER_OID_SZ_id_slh_dsa_sha2_192f] = { + DER_OID_V_id_slh_dsa_sha2_192f +}; + +/* + * id-slh-dsa-sha2-256s OBJECT IDENTIFIER ::= { sigAlgs 24 } + */ +const unsigned char ossl_der_oid_id_slh_dsa_sha2_256s[DER_OID_SZ_id_slh_dsa_sha2_256s] = { + DER_OID_V_id_slh_dsa_sha2_256s +}; + +/* + * id-slh-dsa-sha2-256f OBJECT IDENTIFIER ::= { sigAlgs 25 } + */ +const unsigned char ossl_der_oid_id_slh_dsa_sha2_256f[DER_OID_SZ_id_slh_dsa_sha2_256f] = { + DER_OID_V_id_slh_dsa_sha2_256f +}; + +/* + * id-slh-dsa-shake-128s OBJECT IDENTIFIER ::= { sigAlgs 26 } + */ +const unsigned char ossl_der_oid_id_slh_dsa_shake_128s[DER_OID_SZ_id_slh_dsa_shake_128s] = { + DER_OID_V_id_slh_dsa_shake_128s +}; + +/* + * id-slh-dsa-shake-128f OBJECT IDENTIFIER ::= { sigAlgs 27 } + */ +const unsigned char ossl_der_oid_id_slh_dsa_shake_128f[DER_OID_SZ_id_slh_dsa_shake_128f] = { + DER_OID_V_id_slh_dsa_shake_128f +}; + +/* + * id-slh-dsa-shake-192s OBJECT IDENTIFIER ::= { sigAlgs 28 } + */ +const unsigned char ossl_der_oid_id_slh_dsa_shake_192s[DER_OID_SZ_id_slh_dsa_shake_192s] = { + DER_OID_V_id_slh_dsa_shake_192s +}; + +/* + * id-slh-dsa-shake-192f OBJECT IDENTIFIER ::= { sigAlgs 29 } + */ +const unsigned char ossl_der_oid_id_slh_dsa_shake_192f[DER_OID_SZ_id_slh_dsa_shake_192f] = { + DER_OID_V_id_slh_dsa_shake_192f +}; + +/* + * id-slh-dsa-shake-256s OBJECT IDENTIFIER ::= { sigAlgs 30 } + */ +const unsigned char ossl_der_oid_id_slh_dsa_shake_256s[DER_OID_SZ_id_slh_dsa_shake_256s] = { + DER_OID_V_id_slh_dsa_shake_256s +}; + +/* + * id-slh-dsa-shake-256f OBJECT IDENTIFIER ::= { sigAlgs 31 } + */ +const unsigned char ossl_der_oid_id_slh_dsa_shake_256f[DER_OID_SZ_id_slh_dsa_shake_256f] = { + DER_OID_V_id_slh_dsa_shake_256f +}; + diff --git a/contrib/openssl-cmake/common/providers/der_sm2_gen.c b/contrib/openssl-cmake/common/providers/der_sm2_gen.c new file mode 100644 index 000000000000..6424ea166b7e --- /dev/null +++ b/contrib/openssl-cmake/common/providers/der_sm2_gen.c @@ -0,0 +1,30 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/der/der_sm2_gen.c.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "prov/der_sm2.h" + +/* Well known OIDs precompiled */ + +/* + * sm2-with-SM3 OBJECT IDENTIFIER ::= { sm-scheme 501 } + */ +const unsigned char ossl_der_oid_sm2_with_SM3[DER_OID_SZ_sm2_with_SM3] = { + DER_OID_V_sm2_with_SM3 +}; + +/* + * curveSM2 OBJECT IDENTIFIER ::= { sm-scheme 301 } + */ +const unsigned char ossl_der_oid_curveSM2[DER_OID_SZ_curveSM2] = { + DER_OID_V_curveSM2 +}; + diff --git a/contrib/openssl-cmake/common/providers/der_wrap_gen.c b/contrib/openssl-cmake/common/providers/der_wrap_gen.c new file mode 100644 index 000000000000..6cf93972f48b --- /dev/null +++ b/contrib/openssl-cmake/common/providers/der_wrap_gen.c @@ -0,0 +1,46 @@ +/* + * WARNING: do not edit! + * Generated by Makefile from providers/common/der/der_wrap_gen.c.in + * + * Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#include "prov/der_wrap.h" + +/* Well known OIDs precompiled */ + +/* + * id-alg-CMS3DESwrap OBJECT IDENTIFIER ::= { + * iso(1) member-body(2) us(840) rsadsi(113549) pkcs(1) pkcs-9(9) smime(16) alg(3) 6 + * } + */ +const unsigned char ossl_der_oid_id_alg_CMS3DESwrap[DER_OID_SZ_id_alg_CMS3DESwrap] = { + DER_OID_V_id_alg_CMS3DESwrap +}; + +/* + * id-aes128-wrap OBJECT IDENTIFIER ::= { aes 5 } + */ +const unsigned char ossl_der_oid_id_aes128_wrap[DER_OID_SZ_id_aes128_wrap] = { + DER_OID_V_id_aes128_wrap +}; + +/* + * id-aes192-wrap OBJECT IDENTIFIER ::= { aes 25 } + */ +const unsigned char ossl_der_oid_id_aes192_wrap[DER_OID_SZ_id_aes192_wrap] = { + DER_OID_V_id_aes192_wrap +}; + +/* + * id-aes256-wrap OBJECT IDENTIFIER ::= { aes 45 } + */ +const unsigned char ossl_der_oid_id_aes256_wrap[DER_OID_SZ_id_aes256_wrap] = { + DER_OID_V_id_aes256_wrap +}; + diff --git a/contrib/openssl-cmake/darwin_aarch64/include/openssl/cmp.h b/contrib/openssl-cmake/darwin_aarch64/include/openssl/cmp.h index 60beffd57ef6..1b25211ebb3a 100644 --- a/contrib/openssl-cmake/darwin_aarch64/include/openssl/cmp.h +++ b/contrib/openssl-cmake/darwin_aarch64/include/openssl/cmp.h @@ -194,6 +194,8 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO; * -- CertReqMsg * } */ +# define OSSL_CMP_PKISTATUS_rejected_by_client -5 +# define OSSL_CMP_PKISTATUS_checking_response -4 # define OSSL_CMP_PKISTATUS_request -3 # define OSSL_CMP_PKISTATUS_trans -2 # define OSSL_CMP_PKISTATUS_unspecified -1 diff --git a/contrib/openssl-cmake/darwin_aarch64/include/openssl/opensslv.h b/contrib/openssl-cmake/darwin_aarch64/include/openssl/opensslv.h index b38d64da5930..110c34de0e46 100644 --- a/contrib/openssl-cmake/darwin_aarch64/include/openssl/opensslv.h +++ b/contrib/openssl-cmake/darwin_aarch64/include/openssl/opensslv.h @@ -28,8 +28,8 @@ extern "C" { * These macros express version number MAJOR.MINOR.PATCH exactly */ # define OPENSSL_VERSION_MAJOR 3 -# define OPENSSL_VERSION_MINOR 2 -# define OPENSSL_VERSION_PATCH 1 +# define OPENSSL_VERSION_MINOR 5 +# define OPENSSL_VERSION_PATCH 6 /* * Additional version information @@ -74,21 +74,21 @@ extern "C" { * longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and * OPENSSL_VERSION_BUILD_METADATA_STR appended. */ -# define OPENSSL_VERSION_STR "3.2.1" -# define OPENSSL_FULL_VERSION_STR "3.2.1" +# define OPENSSL_VERSION_STR "3.5.6" +# define OPENSSL_FULL_VERSION_STR "3.5.6" /* * SECTION 3: ADDITIONAL METADATA * * These strings are defined separately to allow them to be parsable. */ -# define OPENSSL_RELEASE_DATE "30 Jan 2024" +# define OPENSSL_RELEASE_DATE "7 Apr 2026" /* * SECTION 4: BACKWARD COMPATIBILITY */ -# define OPENSSL_VERSION_TEXT "OpenSSL 3.2.1 30 Jan 2024" +# define OPENSSL_VERSION_TEXT "OpenSSL 3.5.6 7 Apr 2026" /* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */ # ifdef OPENSSL_VERSION_PRE_RELEASE diff --git a/contrib/openssl-cmake/darwin_aarch64/include_private/buildinf.h b/contrib/openssl-cmake/darwin_aarch64/include_private/buildinf.h index 0b63021d18ce..e06bdda45093 100644 --- a/contrib/openssl-cmake/darwin_aarch64/include_private/buildinf.h +++ b/contrib/openssl-cmake/darwin_aarch64/include_private/buildinf.h @@ -2,7 +2,7 @@ * WARNING: do not edit! * Generated by util/mkbuildinf.pl * - * Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2014-2025 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -11,7 +11,7 @@ */ #define PLATFORM "platform: darwin64-arm64" -#define DATE "built on: Thu Feb 22 19:39:51 2024 UTC" +#define DATE "built on: Tue Oct 21 14:52:13 2025 UTC" /* * Generate compiler_flags as an array of individual characters. This is a @@ -19,13 +19,14 @@ * literal */ static const char compiler_flags[] = { - 'c','o','m','p','i','l','e','r',':',' ','/','o','p','t','/','h', - 'o','m','e','b','r','e','w','/','o','p','t','/','l','l','v','m', - '/','b','i','n','/','c','l','a','n','g',' ','-','f','P','I','C', - ' ','-','a','r','c','h',' ','a','r','m','6','4',' ','-','O','3', - ' ','-','W','a','l','l',' ','-','D','L','_','E','N','D','I','A', - 'N',' ','-','D','O','P','E','N','S','S','L','_','P','I','C',' ', - '-','D','_','R','E','E','N','T','R','A','N','T',' ','-','D','O', - 'P','E','N','S','S','L','_','B','U','I','L','D','I','N','G','_', - 'O','P','E','N','S','S','L',' ','-','D','N','D','E','B','U','G','\0' + 'c','o','m','p','i','l','e','r',':',' ','c','l','a','n','g',' ', + '-','f','P','I','C',' ','-','a','r','c','h',' ','a','r','m','6', + '4',' ','-','W','a',',','-','-','n','o','e','x','e','c','s','t', + 'a','c','k',' ','-','Q','u','n','u','s','e','d','-','a','r','g', + 'u','m','e','n','t','s',' ','-','O','3',' ','-','W','a','l','l', + ' ','-','D','L','_','E','N','D','I','A','N',' ','-','D','O','P', + 'E','N','S','S','L','_','P','I','C',' ','-','D','_','R','E','E', + 'N','T','R','A','N','T',' ','-','D','O','P','E','N','S','S','L', + '_','B','U','I','L','D','I','N','G','_','O','P','E','N','S','S', + 'L',' ','-','D','N','D','E','B','U','G','\0' }; diff --git a/contrib/openssl-cmake/darwin_x86_64/include/openssl/cmp.h b/contrib/openssl-cmake/darwin_x86_64/include/openssl/cmp.h index 60beffd57ef6..1b25211ebb3a 100644 --- a/contrib/openssl-cmake/darwin_x86_64/include/openssl/cmp.h +++ b/contrib/openssl-cmake/darwin_x86_64/include/openssl/cmp.h @@ -194,6 +194,8 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO; * -- CertReqMsg * } */ +# define OSSL_CMP_PKISTATUS_rejected_by_client -5 +# define OSSL_CMP_PKISTATUS_checking_response -4 # define OSSL_CMP_PKISTATUS_request -3 # define OSSL_CMP_PKISTATUS_trans -2 # define OSSL_CMP_PKISTATUS_unspecified -1 diff --git a/contrib/openssl-cmake/darwin_x86_64/include/openssl/opensslv.h b/contrib/openssl-cmake/darwin_x86_64/include/openssl/opensslv.h index b38d64da5930..110c34de0e46 100644 --- a/contrib/openssl-cmake/darwin_x86_64/include/openssl/opensslv.h +++ b/contrib/openssl-cmake/darwin_x86_64/include/openssl/opensslv.h @@ -28,8 +28,8 @@ extern "C" { * These macros express version number MAJOR.MINOR.PATCH exactly */ # define OPENSSL_VERSION_MAJOR 3 -# define OPENSSL_VERSION_MINOR 2 -# define OPENSSL_VERSION_PATCH 1 +# define OPENSSL_VERSION_MINOR 5 +# define OPENSSL_VERSION_PATCH 6 /* * Additional version information @@ -74,21 +74,21 @@ extern "C" { * longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and * OPENSSL_VERSION_BUILD_METADATA_STR appended. */ -# define OPENSSL_VERSION_STR "3.2.1" -# define OPENSSL_FULL_VERSION_STR "3.2.1" +# define OPENSSL_VERSION_STR "3.5.6" +# define OPENSSL_FULL_VERSION_STR "3.5.6" /* * SECTION 3: ADDITIONAL METADATA * * These strings are defined separately to allow them to be parsable. */ -# define OPENSSL_RELEASE_DATE "30 Jan 2024" +# define OPENSSL_RELEASE_DATE "7 Apr 2026" /* * SECTION 4: BACKWARD COMPATIBILITY */ -# define OPENSSL_VERSION_TEXT "OpenSSL 3.2.1 30 Jan 2024" +# define OPENSSL_VERSION_TEXT "OpenSSL 3.5.6 7 Apr 2026" /* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */ # ifdef OPENSSL_VERSION_PRE_RELEASE diff --git a/contrib/openssl-cmake/darwin_x86_64/include_private/buildinf.h b/contrib/openssl-cmake/darwin_x86_64/include_private/buildinf.h index ca39312766de..2c049aad35c7 100644 --- a/contrib/openssl-cmake/darwin_x86_64/include_private/buildinf.h +++ b/contrib/openssl-cmake/darwin_x86_64/include_private/buildinf.h @@ -2,7 +2,7 @@ * WARNING: do not edit! * Generated by util/mkbuildinf.pl * - * Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2014-2025 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -11,7 +11,7 @@ */ #define PLATFORM "platform: darwin64-x86_64" -#define DATE "built on: Thu Feb 22 19:59:27 2024 UTC" +#define DATE "built on: Tue Oct 21 14:50:23 2025 UTC" /* * Generate compiler_flags as an array of individual characters. This is a @@ -19,14 +19,14 @@ * literal */ static const char compiler_flags[] = { - 'c','o','m','p','i','l','e','r',':',' ','/','o','p','t','/','h', - 'o','m','e','b','r','e','w','/','o','p','t','/','l','l','v','m', - '/','b','i','n','/','c','l','a','n','g',' ','-','f','P','I','C', - ' ','-','a','r','c','h',' ','x','8','6','_','6','4',' ','-','O', - '3',' ','-','W','a','l','l',' ','-','D','L','_','E','N','D','I', - 'A','N',' ','-','D','O','P','E','N','S','S','L','_','P','I','C', - ' ','-','D','_','R','E','E','N','T','R','A','N','T',' ','-','D', - 'O','P','E','N','S','S','L','_','B','U','I','L','D','I','N','G', - '_','O','P','E','N','S','S','L',' ','-','D','N','D','E','B','U', - 'G','\0' + 'c','o','m','p','i','l','e','r',':',' ','c','l','a','n','g',' ', + '-','f','P','I','C',' ','-','a','r','c','h',' ','x','8','6','_', + '6','4',' ','-','W','a',',','-','-','n','o','e','x','e','c','s', + 't','a','c','k',' ','-','Q','u','n','u','s','e','d','-','a','r', + 'g','u','m','e','n','t','s',' ','-','O','3',' ','-','W','a','l', + 'l',' ','-','D','L','_','E','N','D','I','A','N',' ','-','D','O', + 'P','E','N','S','S','L','_','P','I','C',' ','-','D','_','R','E', + 'E','N','T','R','A','N','T',' ','-','D','O','P','E','N','S','S', + 'L','_','B','U','I','L','D','I','N','G','_','O','P','E','N','S', + 'S','L',' ','-','D','N','D','E','B','U','G','\0' }; diff --git a/contrib/openssl-cmake/linux_aarch64/include/openssl/cmp.h b/contrib/openssl-cmake/linux_aarch64/include/openssl/cmp.h index 60beffd57ef6..1b25211ebb3a 100644 --- a/contrib/openssl-cmake/linux_aarch64/include/openssl/cmp.h +++ b/contrib/openssl-cmake/linux_aarch64/include/openssl/cmp.h @@ -194,6 +194,8 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO; * -- CertReqMsg * } */ +# define OSSL_CMP_PKISTATUS_rejected_by_client -5 +# define OSSL_CMP_PKISTATUS_checking_response -4 # define OSSL_CMP_PKISTATUS_request -3 # define OSSL_CMP_PKISTATUS_trans -2 # define OSSL_CMP_PKISTATUS_unspecified -1 diff --git a/contrib/openssl-cmake/linux_aarch64/include/openssl/opensslv.h b/contrib/openssl-cmake/linux_aarch64/include/openssl/opensslv.h index b38d64da5930..110c34de0e46 100644 --- a/contrib/openssl-cmake/linux_aarch64/include/openssl/opensslv.h +++ b/contrib/openssl-cmake/linux_aarch64/include/openssl/opensslv.h @@ -28,8 +28,8 @@ extern "C" { * These macros express version number MAJOR.MINOR.PATCH exactly */ # define OPENSSL_VERSION_MAJOR 3 -# define OPENSSL_VERSION_MINOR 2 -# define OPENSSL_VERSION_PATCH 1 +# define OPENSSL_VERSION_MINOR 5 +# define OPENSSL_VERSION_PATCH 6 /* * Additional version information @@ -74,21 +74,21 @@ extern "C" { * longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and * OPENSSL_VERSION_BUILD_METADATA_STR appended. */ -# define OPENSSL_VERSION_STR "3.2.1" -# define OPENSSL_FULL_VERSION_STR "3.2.1" +# define OPENSSL_VERSION_STR "3.5.6" +# define OPENSSL_FULL_VERSION_STR "3.5.6" /* * SECTION 3: ADDITIONAL METADATA * * These strings are defined separately to allow them to be parsable. */ -# define OPENSSL_RELEASE_DATE "30 Jan 2024" +# define OPENSSL_RELEASE_DATE "7 Apr 2026" /* * SECTION 4: BACKWARD COMPATIBILITY */ -# define OPENSSL_VERSION_TEXT "OpenSSL 3.2.1 30 Jan 2024" +# define OPENSSL_VERSION_TEXT "OpenSSL 3.5.6 7 Apr 2026" /* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */ # ifdef OPENSSL_VERSION_PRE_RELEASE diff --git a/contrib/openssl-cmake/linux_aarch64/include_private/buildinf.h b/contrib/openssl-cmake/linux_aarch64/include_private/buildinf.h index 194ad73c7a36..140e6f2f2515 100644 --- a/contrib/openssl-cmake/linux_aarch64/include_private/buildinf.h +++ b/contrib/openssl-cmake/linux_aarch64/include_private/buildinf.h @@ -2,7 +2,7 @@ * WARNING: do not edit! * Generated by util/mkbuildinf.pl * - * Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2014-2025 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -11,7 +11,7 @@ */ #define PLATFORM "platform: linux-aarch64" -#define DATE "built on: Tue Feb 13 14:16:46 2024 UTC" +#define DATE "built on: Tue Oct 21 14:48:11 2025 UTC" /* * Generate compiler_flags as an array of individual characters. This is a @@ -19,14 +19,14 @@ * literal */ static const char compiler_flags[] = { - 'c','o','m','p','i','l','e','r',':',' ','c','l','a','n','g','-', - '1','8',' ','-','f','P','I','C',' ','-','p','t','h','r','e','a', - 'd',' ','-','W','a',',','-','-','n','o','e','x','e','c','s','t', - 'a','c','k',' ','-','Q','u','n','u','s','e','d','-','a','r','g', - 'u','m','e','n','t','s',' ','-','W','a','l','l',' ','-','O','3', - ' ','-','D','O','P','E','N','S','S','L','_','U','S','E','_','N', - 'O','D','E','L','E','T','E',' ','-','D','O','P','E','N','S','S', - 'L','_','P','I','C',' ','-','D','O','P','E','N','S','S','L','_', - 'B','U','I','L','D','I','N','G','_','O','P','E','N','S','S','L', - ' ','-','D','N','D','E','B','U','G','\0' + 'c','o','m','p','i','l','e','r',':',' ','c','l','a','n','g',' ', + '-','f','P','I','C',' ','-','p','t','h','r','e','a','d',' ','-', + 'W','a',',','-','-','n','o','e','x','e','c','s','t','a','c','k', + ' ','-','Q','u','n','u','s','e','d','-','a','r','g','u','m','e', + 'n','t','s',' ','-','W','a','l','l',' ','-','O','3',' ','-','D', + 'O','P','E','N','S','S','L','_','U','S','E','_','N','O','D','E', + 'L','E','T','E',' ','-','D','O','P','E','N','S','S','L','_','P', + 'I','C',' ','-','D','O','P','E','N','S','S','L','_','B','U','I', + 'L','D','I','N','G','_','O','P','E','N','S','S','L',' ','-','D', + 'N','D','E','B','U','G','\0' }; diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/cmp.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/cmp.h index 60beffd57ef6..1b25211ebb3a 100644 --- a/contrib/openssl-cmake/linux_loongarch64/include/openssl/cmp.h +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/cmp.h @@ -194,6 +194,8 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO; * -- CertReqMsg * } */ +# define OSSL_CMP_PKISTATUS_rejected_by_client -5 +# define OSSL_CMP_PKISTATUS_checking_response -4 # define OSSL_CMP_PKISTATUS_request -3 # define OSSL_CMP_PKISTATUS_trans -2 # define OSSL_CMP_PKISTATUS_unspecified -1 diff --git a/contrib/openssl-cmake/linux_loongarch64/include/openssl/opensslv.h b/contrib/openssl-cmake/linux_loongarch64/include/openssl/opensslv.h index b38d64da5930..110c34de0e46 100644 --- a/contrib/openssl-cmake/linux_loongarch64/include/openssl/opensslv.h +++ b/contrib/openssl-cmake/linux_loongarch64/include/openssl/opensslv.h @@ -28,8 +28,8 @@ extern "C" { * These macros express version number MAJOR.MINOR.PATCH exactly */ # define OPENSSL_VERSION_MAJOR 3 -# define OPENSSL_VERSION_MINOR 2 -# define OPENSSL_VERSION_PATCH 1 +# define OPENSSL_VERSION_MINOR 5 +# define OPENSSL_VERSION_PATCH 6 /* * Additional version information @@ -74,21 +74,21 @@ extern "C" { * longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and * OPENSSL_VERSION_BUILD_METADATA_STR appended. */ -# define OPENSSL_VERSION_STR "3.2.1" -# define OPENSSL_FULL_VERSION_STR "3.2.1" +# define OPENSSL_VERSION_STR "3.5.6" +# define OPENSSL_FULL_VERSION_STR "3.5.6" /* * SECTION 3: ADDITIONAL METADATA * * These strings are defined separately to allow them to be parsable. */ -# define OPENSSL_RELEASE_DATE "30 Jan 2024" +# define OPENSSL_RELEASE_DATE "7 Apr 2026" /* * SECTION 4: BACKWARD COMPATIBILITY */ -# define OPENSSL_VERSION_TEXT "OpenSSL 3.2.1 30 Jan 2024" +# define OPENSSL_VERSION_TEXT "OpenSSL 3.5.6 7 Apr 2026" /* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */ # ifdef OPENSSL_VERSION_PRE_RELEASE diff --git a/contrib/openssl-cmake/linux_loongarch64/include_private/buildinf.h b/contrib/openssl-cmake/linux_loongarch64/include_private/buildinf.h index a3a6485bbe9b..41428e11406f 100644 --- a/contrib/openssl-cmake/linux_loongarch64/include_private/buildinf.h +++ b/contrib/openssl-cmake/linux_loongarch64/include_private/buildinf.h @@ -2,7 +2,7 @@ * WARNING: do not edit! * Generated by util/mkbuildinf.pl * - * Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2014-2025 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -11,7 +11,7 @@ */ #define PLATFORM "platform: linux64-loongarch64" -#define DATE "built on: Thu Apr 18 07:53:56 2024 UTC" +#define DATE "built on: Tue Oct 21 14:56:50 2025 UTC" /* * Generate compiler_flags as an array of individual characters. This is a @@ -19,12 +19,15 @@ * literal */ static const char compiler_flags[] = { - 'c','o','m','p','i','l','e','r',':',' ','g','c','c',' ','-','f', - 'P','I','C',' ','-','p','t','h','r','e','a','d',' ','-','W','a', - 'l','l',' ','-','O','3',' ','-','D','O','P','E','N','S','S','L', - '_','U','S','E','_','N','O','D','E','L','E','T','E',' ','-','D', - 'L','_','E','N','D','I','A','N',' ','-','D','O','P','E','N','S', - 'S','L','_','P','I','C',' ','-','D','O','P','E','N','S','S','L', - '_','B','U','I','L','D','I','N','G','_','O','P','E','N','S','S', - 'L',' ','-','D','N','D','E','B','U','G','\0' + 'c','o','m','p','i','l','e','r',':',' ','c','l','a','n','g',' ', + '-','f','P','I','C',' ','-','p','t','h','r','e','a','d',' ','-', + 'W','a',',','-','-','n','o','e','x','e','c','s','t','a','c','k', + ' ','-','Q','u','n','u','s','e','d','-','a','r','g','u','m','e', + 'n','t','s',' ','-','W','a','l','l',' ','-','O','3',' ','-','D', + 'O','P','E','N','S','S','L','_','U','S','E','_','N','O','D','E', + 'L','E','T','E',' ','-','D','L','_','E','N','D','I','A','N',' ', + '-','D','O','P','E','N','S','S','L','_','P','I','C',' ','-','D', + 'O','P','E','N','S','S','L','_','B','U','I','L','D','I','N','G', + '_','O','P','E','N','S','S','L',' ','-','D','N','D','E','B','U', + 'G','\0' }; diff --git a/contrib/openssl-cmake/linux_ppc64le/include/openssl/cmp.h b/contrib/openssl-cmake/linux_ppc64le/include/openssl/cmp.h index 60beffd57ef6..1b25211ebb3a 100644 --- a/contrib/openssl-cmake/linux_ppc64le/include/openssl/cmp.h +++ b/contrib/openssl-cmake/linux_ppc64le/include/openssl/cmp.h @@ -194,6 +194,8 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO; * -- CertReqMsg * } */ +# define OSSL_CMP_PKISTATUS_rejected_by_client -5 +# define OSSL_CMP_PKISTATUS_checking_response -4 # define OSSL_CMP_PKISTATUS_request -3 # define OSSL_CMP_PKISTATUS_trans -2 # define OSSL_CMP_PKISTATUS_unspecified -1 diff --git a/contrib/openssl-cmake/linux_ppc64le/include/openssl/opensslv.h b/contrib/openssl-cmake/linux_ppc64le/include/openssl/opensslv.h index b38d64da5930..110c34de0e46 100644 --- a/contrib/openssl-cmake/linux_ppc64le/include/openssl/opensslv.h +++ b/contrib/openssl-cmake/linux_ppc64le/include/openssl/opensslv.h @@ -28,8 +28,8 @@ extern "C" { * These macros express version number MAJOR.MINOR.PATCH exactly */ # define OPENSSL_VERSION_MAJOR 3 -# define OPENSSL_VERSION_MINOR 2 -# define OPENSSL_VERSION_PATCH 1 +# define OPENSSL_VERSION_MINOR 5 +# define OPENSSL_VERSION_PATCH 6 /* * Additional version information @@ -74,21 +74,21 @@ extern "C" { * longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and * OPENSSL_VERSION_BUILD_METADATA_STR appended. */ -# define OPENSSL_VERSION_STR "3.2.1" -# define OPENSSL_FULL_VERSION_STR "3.2.1" +# define OPENSSL_VERSION_STR "3.5.6" +# define OPENSSL_FULL_VERSION_STR "3.5.6" /* * SECTION 3: ADDITIONAL METADATA * * These strings are defined separately to allow them to be parsable. */ -# define OPENSSL_RELEASE_DATE "30 Jan 2024" +# define OPENSSL_RELEASE_DATE "7 Apr 2026" /* * SECTION 4: BACKWARD COMPATIBILITY */ -# define OPENSSL_VERSION_TEXT "OpenSSL 3.2.1 30 Jan 2024" +# define OPENSSL_VERSION_TEXT "OpenSSL 3.5.6 7 Apr 2026" /* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */ # ifdef OPENSSL_VERSION_PRE_RELEASE diff --git a/contrib/openssl-cmake/linux_ppc64le/include_private/buildinf.h b/contrib/openssl-cmake/linux_ppc64le/include_private/buildinf.h index f1dba0c989df..124e3aaf3760 100644 --- a/contrib/openssl-cmake/linux_ppc64le/include_private/buildinf.h +++ b/contrib/openssl-cmake/linux_ppc64le/include_private/buildinf.h @@ -2,7 +2,7 @@ * WARNING: do not edit! * Generated by util/mkbuildinf.pl * - * Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2014-2025 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -11,7 +11,7 @@ */ #define PLATFORM "platform: linux-ppc64le" -#define DATE "built on: Thu Feb 22 12:58:04 2024 UTC" +#define DATE "built on: Tue Oct 21 14:58:20 2025 UTC" /* * Generate compiler_flags as an array of individual characters. This is a @@ -19,13 +19,15 @@ * literal */ static const char compiler_flags[] = { - 'c','o','m','p','i','l','e','r',':',' ','c','l','a','n','g','-', - '1','7',' ','-','f','P','I','C',' ','-','p','t','h','r','e','a', - 'd',' ','-','m','6','4',' ','-','W','a','l','l',' ','-','O','3', - ' ','-','D','O','P','E','N','S','S','L','_','U','S','E','_','N', - 'O','D','E','L','E','T','E',' ','-','D','L','_','E','N','D','I', - 'A','N',' ','-','D','O','P','E','N','S','S','L','_','P','I','C', - ' ','-','D','O','P','E','N','S','S','L','_','B','U','I','L','D', - 'I','N','G','_','O','P','E','N','S','S','L',' ','-','D','N','D', - 'E','B','U','G','\0' + 'c','o','m','p','i','l','e','r',':',' ','c','l','a','n','g',' ', + '-','f','P','I','C',' ','-','p','t','h','r','e','a','d',' ','-', + 'm','6','4',' ','-','W','a',',','-','-','n','o','e','x','e','c', + 's','t','a','c','k',' ','-','Q','u','n','u','s','e','d','-','a', + 'r','g','u','m','e','n','t','s',' ','-','W','a','l','l',' ','-', + 'O','3',' ','-','D','O','P','E','N','S','S','L','_','U','S','E', + '_','N','O','D','E','L','E','T','E',' ','-','D','L','_','E','N', + 'D','I','A','N',' ','-','D','O','P','E','N','S','S','L','_','P', + 'I','C',' ','-','D','O','P','E','N','S','S','L','_','B','U','I', + 'L','D','I','N','G','_','O','P','E','N','S','S','L',' ','-','D', + 'N','D','E','B','U','G','\0' }; diff --git a/contrib/openssl-cmake/linux_riscv64/include/openssl/cmp.h b/contrib/openssl-cmake/linux_riscv64/include/openssl/cmp.h index 60beffd57ef6..1b25211ebb3a 100644 --- a/contrib/openssl-cmake/linux_riscv64/include/openssl/cmp.h +++ b/contrib/openssl-cmake/linux_riscv64/include/openssl/cmp.h @@ -194,6 +194,8 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO; * -- CertReqMsg * } */ +# define OSSL_CMP_PKISTATUS_rejected_by_client -5 +# define OSSL_CMP_PKISTATUS_checking_response -4 # define OSSL_CMP_PKISTATUS_request -3 # define OSSL_CMP_PKISTATUS_trans -2 # define OSSL_CMP_PKISTATUS_unspecified -1 diff --git a/contrib/openssl-cmake/linux_riscv64/include/openssl/opensslv.h b/contrib/openssl-cmake/linux_riscv64/include/openssl/opensslv.h index b38d64da5930..110c34de0e46 100644 --- a/contrib/openssl-cmake/linux_riscv64/include/openssl/opensslv.h +++ b/contrib/openssl-cmake/linux_riscv64/include/openssl/opensslv.h @@ -28,8 +28,8 @@ extern "C" { * These macros express version number MAJOR.MINOR.PATCH exactly */ # define OPENSSL_VERSION_MAJOR 3 -# define OPENSSL_VERSION_MINOR 2 -# define OPENSSL_VERSION_PATCH 1 +# define OPENSSL_VERSION_MINOR 5 +# define OPENSSL_VERSION_PATCH 6 /* * Additional version information @@ -74,21 +74,21 @@ extern "C" { * longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and * OPENSSL_VERSION_BUILD_METADATA_STR appended. */ -# define OPENSSL_VERSION_STR "3.2.1" -# define OPENSSL_FULL_VERSION_STR "3.2.1" +# define OPENSSL_VERSION_STR "3.5.6" +# define OPENSSL_FULL_VERSION_STR "3.5.6" /* * SECTION 3: ADDITIONAL METADATA * * These strings are defined separately to allow them to be parsable. */ -# define OPENSSL_RELEASE_DATE "30 Jan 2024" +# define OPENSSL_RELEASE_DATE "7 Apr 2026" /* * SECTION 4: BACKWARD COMPATIBILITY */ -# define OPENSSL_VERSION_TEXT "OpenSSL 3.2.1 30 Jan 2024" +# define OPENSSL_VERSION_TEXT "OpenSSL 3.5.6 7 Apr 2026" /* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */ # ifdef OPENSSL_VERSION_PRE_RELEASE diff --git a/contrib/openssl-cmake/linux_riscv64/include_private/buildinf.h b/contrib/openssl-cmake/linux_riscv64/include_private/buildinf.h index cfa3b3079f2b..7114f2bbbdfd 100644 --- a/contrib/openssl-cmake/linux_riscv64/include_private/buildinf.h +++ b/contrib/openssl-cmake/linux_riscv64/include_private/buildinf.h @@ -2,7 +2,7 @@ * WARNING: do not edit! * Generated by util/mkbuildinf.pl * - * Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2014-2025 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -11,7 +11,7 @@ */ #define PLATFORM "platform: linux64-riscv64" -#define DATE "built on: Thu Feb 22 16:18:09 2024 UTC" +#define DATE "built on: Tue Oct 21 15:00:11 2025 UTC" /* * Generate compiler_flags as an array of individual characters. This is a @@ -19,12 +19,14 @@ * literal */ static const char compiler_flags[] = { - 'c','o','m','p','i','l','e','r',':',' ','c','l','a','n','g','-', - '1','7',' ','-','f','P','I','C',' ','-','p','t','h','r','e','a', - 'd',' ','-','W','a','l','l',' ','-','O','3',' ','-','D','O','P', - 'E','N','S','S','L','_','U','S','E','_','N','O','D','E','L','E', - 'T','E',' ','-','D','O','P','E','N','S','S','L','_','P','I','C', - ' ','-','D','O','P','E','N','S','S','L','_','B','U','I','L','D', - 'I','N','G','_','O','P','E','N','S','S','L',' ','-','D','N','D', - 'E','B','U','G','\0' + 'c','o','m','p','i','l','e','r',':',' ','c','l','a','n','g',' ', + '-','f','P','I','C',' ','-','p','t','h','r','e','a','d',' ','-', + 'W','a',',','-','-','n','o','e','x','e','c','s','t','a','c','k', + ' ','-','Q','u','n','u','s','e','d','-','a','r','g','u','m','e', + 'n','t','s',' ','-','W','a','l','l',' ','-','O','3',' ','-','D', + 'O','P','E','N','S','S','L','_','U','S','E','_','N','O','D','E', + 'L','E','T','E',' ','-','D','O','P','E','N','S','S','L','_','P', + 'I','C',' ','-','D','O','P','E','N','S','S','L','_','B','U','I', + 'L','D','I','N','G','_','O','P','E','N','S','S','L',' ','-','D', + 'N','D','E','B','U','G','\0' }; diff --git a/contrib/openssl-cmake/linux_s390x/include/openssl/cmp.h b/contrib/openssl-cmake/linux_s390x/include/openssl/cmp.h index 60beffd57ef6..1b25211ebb3a 100644 --- a/contrib/openssl-cmake/linux_s390x/include/openssl/cmp.h +++ b/contrib/openssl-cmake/linux_s390x/include/openssl/cmp.h @@ -194,6 +194,8 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO; * -- CertReqMsg * } */ +# define OSSL_CMP_PKISTATUS_rejected_by_client -5 +# define OSSL_CMP_PKISTATUS_checking_response -4 # define OSSL_CMP_PKISTATUS_request -3 # define OSSL_CMP_PKISTATUS_trans -2 # define OSSL_CMP_PKISTATUS_unspecified -1 diff --git a/contrib/openssl-cmake/linux_s390x/include/openssl/opensslv.h b/contrib/openssl-cmake/linux_s390x/include/openssl/opensslv.h index b38d64da5930..110c34de0e46 100644 --- a/contrib/openssl-cmake/linux_s390x/include/openssl/opensslv.h +++ b/contrib/openssl-cmake/linux_s390x/include/openssl/opensslv.h @@ -28,8 +28,8 @@ extern "C" { * These macros express version number MAJOR.MINOR.PATCH exactly */ # define OPENSSL_VERSION_MAJOR 3 -# define OPENSSL_VERSION_MINOR 2 -# define OPENSSL_VERSION_PATCH 1 +# define OPENSSL_VERSION_MINOR 5 +# define OPENSSL_VERSION_PATCH 6 /* * Additional version information @@ -74,21 +74,21 @@ extern "C" { * longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and * OPENSSL_VERSION_BUILD_METADATA_STR appended. */ -# define OPENSSL_VERSION_STR "3.2.1" -# define OPENSSL_FULL_VERSION_STR "3.2.1" +# define OPENSSL_VERSION_STR "3.5.6" +# define OPENSSL_FULL_VERSION_STR "3.5.6" /* * SECTION 3: ADDITIONAL METADATA * * These strings are defined separately to allow them to be parsable. */ -# define OPENSSL_RELEASE_DATE "30 Jan 2024" +# define OPENSSL_RELEASE_DATE "7 Apr 2026" /* * SECTION 4: BACKWARD COMPATIBILITY */ -# define OPENSSL_VERSION_TEXT "OpenSSL 3.2.1 30 Jan 2024" +# define OPENSSL_VERSION_TEXT "OpenSSL 3.5.6 7 Apr 2026" /* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */ # ifdef OPENSSL_VERSION_PRE_RELEASE diff --git a/contrib/openssl-cmake/linux_s390x/include_private/buildinf.h b/contrib/openssl-cmake/linux_s390x/include_private/buildinf.h index f1dba0c989df..18dee70bb178 100644 --- a/contrib/openssl-cmake/linux_s390x/include_private/buildinf.h +++ b/contrib/openssl-cmake/linux_s390x/include_private/buildinf.h @@ -2,7 +2,7 @@ * WARNING: do not edit! * Generated by util/mkbuildinf.pl * - * Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2014-2025 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -10,8 +10,8 @@ * https://www.openssl.org/source/license.html */ -#define PLATFORM "platform: linux-ppc64le" -#define DATE "built on: Thu Feb 22 12:58:04 2024 UTC" +#define PLATFORM "platform: linux64-s390x" +#define DATE "built on: Tue Oct 21 14:43:11 2025 UTC" /* * Generate compiler_flags as an array of individual characters. This is a @@ -19,13 +19,15 @@ * literal */ static const char compiler_flags[] = { - 'c','o','m','p','i','l','e','r',':',' ','c','l','a','n','g','-', - '1','7',' ','-','f','P','I','C',' ','-','p','t','h','r','e','a', - 'd',' ','-','m','6','4',' ','-','W','a','l','l',' ','-','O','3', - ' ','-','D','O','P','E','N','S','S','L','_','U','S','E','_','N', - 'O','D','E','L','E','T','E',' ','-','D','L','_','E','N','D','I', - 'A','N',' ','-','D','O','P','E','N','S','S','L','_','P','I','C', - ' ','-','D','O','P','E','N','S','S','L','_','B','U','I','L','D', - 'I','N','G','_','O','P','E','N','S','S','L',' ','-','D','N','D', - 'E','B','U','G','\0' + 'c','o','m','p','i','l','e','r',':',' ','c','l','a','n','g',' ', + '-','f','P','I','C',' ','-','p','t','h','r','e','a','d',' ','-', + 'm','6','4',' ','-','W','a',',','-','-','n','o','e','x','e','c', + 's','t','a','c','k',' ','-','Q','u','n','u','s','e','d','-','a', + 'r','g','u','m','e','n','t','s',' ','-','W','a','l','l',' ','-', + 'O','3',' ','-','D','O','P','E','N','S','S','L','_','U','S','E', + '_','N','O','D','E','L','E','T','E',' ','-','D','B','_','E','N', + 'D','I','A','N',' ','-','D','O','P','E','N','S','S','L','_','P', + 'I','C',' ','-','D','O','P','E','N','S','S','L','_','B','U','I', + 'L','D','I','N','G','_','O','P','E','N','S','S','L',' ','-','D', + 'N','D','E','B','U','G','\0' }; diff --git a/contrib/openssl-cmake/linux_x86_64/include/openssl/cmp.h b/contrib/openssl-cmake/linux_x86_64/include/openssl/cmp.h index 60beffd57ef6..1b25211ebb3a 100644 --- a/contrib/openssl-cmake/linux_x86_64/include/openssl/cmp.h +++ b/contrib/openssl-cmake/linux_x86_64/include/openssl/cmp.h @@ -194,6 +194,8 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO; * -- CertReqMsg * } */ +# define OSSL_CMP_PKISTATUS_rejected_by_client -5 +# define OSSL_CMP_PKISTATUS_checking_response -4 # define OSSL_CMP_PKISTATUS_request -3 # define OSSL_CMP_PKISTATUS_trans -2 # define OSSL_CMP_PKISTATUS_unspecified -1 diff --git a/contrib/openssl-cmake/linux_x86_64/include/openssl/opensslv.h b/contrib/openssl-cmake/linux_x86_64/include/openssl/opensslv.h index b38d64da5930..110c34de0e46 100644 --- a/contrib/openssl-cmake/linux_x86_64/include/openssl/opensslv.h +++ b/contrib/openssl-cmake/linux_x86_64/include/openssl/opensslv.h @@ -28,8 +28,8 @@ extern "C" { * These macros express version number MAJOR.MINOR.PATCH exactly */ # define OPENSSL_VERSION_MAJOR 3 -# define OPENSSL_VERSION_MINOR 2 -# define OPENSSL_VERSION_PATCH 1 +# define OPENSSL_VERSION_MINOR 5 +# define OPENSSL_VERSION_PATCH 6 /* * Additional version information @@ -74,21 +74,21 @@ extern "C" { * longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and * OPENSSL_VERSION_BUILD_METADATA_STR appended. */ -# define OPENSSL_VERSION_STR "3.2.1" -# define OPENSSL_FULL_VERSION_STR "3.2.1" +# define OPENSSL_VERSION_STR "3.5.6" +# define OPENSSL_FULL_VERSION_STR "3.5.6" /* * SECTION 3: ADDITIONAL METADATA * * These strings are defined separately to allow them to be parsable. */ -# define OPENSSL_RELEASE_DATE "30 Jan 2024" +# define OPENSSL_RELEASE_DATE "7 Apr 2026" /* * SECTION 4: BACKWARD COMPATIBILITY */ -# define OPENSSL_VERSION_TEXT "OpenSSL 3.2.1 30 Jan 2024" +# define OPENSSL_VERSION_TEXT "OpenSSL 3.5.6 7 Apr 2026" /* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */ # ifdef OPENSSL_VERSION_PRE_RELEASE diff --git a/contrib/openssl-cmake/linux_x86_64/include_private/buildinf.h b/contrib/openssl-cmake/linux_x86_64/include_private/buildinf.h index aa43a964f2f1..04a5c9184046 100644 --- a/contrib/openssl-cmake/linux_x86_64/include_private/buildinf.h +++ b/contrib/openssl-cmake/linux_x86_64/include_private/buildinf.h @@ -2,7 +2,7 @@ * WARNING: do not edit! * Generated by util/mkbuildinf.pl * - * Copyright 2014-2017 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2014-2025 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -10,8 +10,8 @@ * https://www.openssl.org/source/license.html */ -#define PLATFORM "platform: linux-x86_64" -#define DATE "built on: Mon Feb 12 14:02:48 2024 UTC" +#define PLATFORM "platform: linux-x86_64-clang" +#define DATE "built on: Tue Oct 21 14:46:34 2025 UTC" /* * Generate compiler_flags as an array of individual characters. This is a @@ -19,15 +19,15 @@ * literal */ static const char compiler_flags[] = { - 'c','o','m','p','i','l','e','r',':',' ','c','l','a','n','g','-', - '1','7',' ','-','f','P','I','C',' ','-','p','t','h','r','e','a', - 'd',' ','-','m','6','4',' ','-','W','a',',','-','-','n','o','e', - 'x','e','c','s','t','a','c','k',' ','-','Q','u','n','u','s','e', - 'd','-','a','r','g','u','m','e','n','t','s',' ','-','W','a','l', - 'l',' ','-','O','3',' ','-','D','O','P','E','N','S','S','L','_', - 'U','S','E','_','N','O','D','E','L','E','T','E',' ','-','D','L', - '_','E','N','D','I','A','N',' ','-','D','O','P','E','N','S','S', - 'L','_','P','I','C',' ','-','D','O','P','E','N','S','S','L','_', - 'B','U','I','L','D','I','N','G','_','O','P','E','N','S','S','L', - ' ','-','D','N','D','E','B','U','G','\0' + 'c','o','m','p','i','l','e','r',':',' ','c','l','a','n','g',' ', + '-','f','P','I','C',' ','-','p','t','h','r','e','a','d',' ','-', + 'm','6','4',' ','-','W','a',',','-','-','n','o','e','x','e','c', + 's','t','a','c','k',' ','-','Q','u','n','u','s','e','d','-','a', + 'r','g','u','m','e','n','t','s',' ','-','W','a','l','l',' ','-', + 'O','3',' ','-','D','O','P','E','N','S','S','L','_','U','S','E', + '_','N','O','D','E','L','E','T','E',' ','-','D','L','_','E','N', + 'D','I','A','N',' ','-','D','O','P','E','N','S','S','L','_','P', + 'I','C',' ','-','D','O','P','E','N','S','S','L','_','B','U','I', + 'L','D','I','N','G','_','O','P','E','N','S','S','L',' ','-','D', + 'N','D','E','B','U','G','\0' }; diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 0b6d81e6bce8..0770a15b107c 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -19,7 +19,7 @@ #include #if USE_SSL -# include +# include #endif #include @@ -64,6 +64,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NOT_IMPLEMENTED; extern const int ILLEGAL_COLUMN; + extern const int OPENSSL_ERROR; } namespace impl @@ -247,12 +248,22 @@ struct HalfMD5Impl uint64_t uint64_data; } buf; - MD5_CTX ctx; - MD5_Init(&ctx); - MD5_Update(&ctx, reinterpret_cast(begin), size); - MD5_Final(buf.char_data, &ctx); + using EVP_MD_CTX_ptr = std::unique_ptr; + const auto ctx = EVP_MD_CTX_ptr(EVP_MD_CTX_new(), EVP_MD_CTX_free); - /// Compatibility with existing code. Cast need for old poco AND macos where UInt64 != uint64_t + if (!ctx) + throw Exception(ErrorCodes::OPENSSL_ERROR, "EVP_MD_CTX_new failed"); + + if (!EVP_DigestInit_ex(ctx.get(), EVP_md5(), nullptr)) + throw Exception(ErrorCodes::OPENSSL_ERROR, "EVP_DigestInit_ex failed"); + + if (!EVP_DigestUpdate(ctx.get(), begin, size)) + throw Exception(ErrorCodes::OPENSSL_ERROR, "EVP_DigestUpdate failed"); + + if (!EVP_DigestFinal_ex(ctx.get(), buf.char_data, nullptr)) + throw Exception(ErrorCodes::OPENSSL_ERROR, "EVP_DigestFinal_ex failed"); + + /// Compatibility with existing code. Cast is necessary for old poco AND macos where UInt64 != uint64_t transformEndianness(buf.uint64_data); return buf.uint64_data; } diff --git a/src/Functions/FunctionsStringHashFixedString.cpp b/src/Functions/FunctionsStringHashFixedString.cpp index ca57074d1824..2142504d7a11 100644 --- a/src/Functions/FunctionsStringHashFixedString.cpp +++ b/src/Functions/FunctionsStringHashFixedString.cpp @@ -15,10 +15,11 @@ #if USE_SSL # include -# include -# include # include # include +# include +# include +# include #endif #if USE_SHA3IUF @@ -34,173 +35,159 @@ extern "C" { namespace DB { + namespace ErrorCodes { -extern const int ILLEGAL_COLUMN; -extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int OPENSSL_ERROR; + extern const int LOGICAL_ERROR; } #if USE_SSL +using EVP_MD_CTX_ptr = std::unique_ptr; + +template +class OpenSSLProvider +{ +public: + static constexpr auto name = ProviderImpl::name; + static constexpr auto length = ProviderImpl::length; + + OpenSSLProvider() : ctx_template(EVP_MD_CTX_new(), &EVP_MD_CTX_free) + { + if (!ctx_template) + throw Exception(ErrorCodes::OPENSSL_ERROR, "EVP_MD_CTX_new failed: {}", getOpenSSLErrors()); + + if (EVP_DigestInit_ex(ctx_template.get(), ProviderImpl::provider(), nullptr) != 1) + throw Exception(ErrorCodes::OPENSSL_ERROR, "EVP_DigestInit_ex failed: {}", getOpenSSLErrors()); + } + + void apply(const char * begin, size_t size, unsigned char * out_char_data) + { + if (!ctx_template) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No context provided"); + + thread_local EVP_MD_CTX_ptr ctx(EVP_MD_CTX_new(), &EVP_MD_CTX_free); + if (EVP_MD_CTX_copy_ex(ctx.get(), ctx_template.get()) != 1) + throw Exception(ErrorCodes::OPENSSL_ERROR, "EVP_MD_CTX_copy_ex failed: {}", getOpenSSLErrors()); + + if (EVP_DigestUpdate(ctx.get(), begin, size) != 1) + throw Exception(ErrorCodes::OPENSSL_ERROR, "EVP_DigestUpdate failed: {}", getOpenSSLErrors()); + + if (EVP_DigestFinal_ex(ctx.get(), out_char_data, nullptr) != 1) + throw Exception(ErrorCodes::OPENSSL_ERROR, "EVP_DigestFinal_ex failed: {}", getOpenSSLErrors()); + } + +private: + EVP_MD_CTX_ptr ctx_template; +}; struct MD4Impl { static constexpr auto name = "MD4"; + static constexpr const EVP_MD * (*provider)() = &EVP_md4; enum { length = MD4_DIGEST_LENGTH }; - - static void apply(const char * begin, const size_t size, unsigned char * out_char_data) - { - MD4_CTX ctx; - MD4_Init(&ctx); - MD4_Update(&ctx, reinterpret_cast(begin), size); - MD4_Final(out_char_data, &ctx); - } }; struct MD5Impl { static constexpr auto name = "MD5"; + static constexpr const EVP_MD * (*provider)() = &EVP_md5; enum { length = MD5_DIGEST_LENGTH }; - - static void apply(const char * begin, const size_t size, unsigned char * out_char_data) - { - MD5_CTX ctx; - MD5_Init(&ctx); - MD5_Update(&ctx, reinterpret_cast(begin), size); - MD5_Final(out_char_data, &ctx); - } }; struct SHA1Impl { static constexpr auto name = "SHA1"; + static constexpr const EVP_MD * (*provider)() = &EVP_sha1; enum { length = SHA_DIGEST_LENGTH }; - - static void apply(const char * begin, const size_t size, unsigned char * out_char_data) - { - SHA_CTX ctx; - SHA1_Init(&ctx); - SHA1_Update(&ctx, reinterpret_cast(begin), size); - SHA1_Final(out_char_data, &ctx); - } }; struct SHA224Impl { static constexpr auto name = "SHA224"; + static constexpr const EVP_MD * (*provider)() = &EVP_sha224; enum { length = SHA224_DIGEST_LENGTH }; - static void apply(const char * begin, const size_t size, unsigned char * out_char_data) - { - SHA256_CTX ctx; - SHA224_Init(&ctx); - SHA224_Update(&ctx, reinterpret_cast(begin), size); - SHA224_Final(out_char_data, &ctx); - } }; struct SHA256Impl { static constexpr auto name = "SHA256"; + static constexpr const EVP_MD * (*provider)() = &EVP_sha256; enum { length = SHA256_DIGEST_LENGTH }; - - static void apply(const char * begin, const size_t size, unsigned char * out_char_data) - { - SHA256_CTX ctx; - SHA256_Init(&ctx); - SHA256_Update(&ctx, reinterpret_cast(begin), size); - SHA256_Final(out_char_data, &ctx); - } }; struct SHA384Impl { static constexpr auto name = "SHA384"; + static constexpr const EVP_MD * (*provider)() = &EVP_sha384; enum { length = SHA384_DIGEST_LENGTH }; - - static void apply(const char * begin, const size_t size, unsigned char * out_char_data) - { - SHA512_CTX ctx; - SHA384_Init(&ctx); - SHA384_Update(&ctx, reinterpret_cast(begin), size); - SHA384_Final(out_char_data, &ctx); - } }; struct SHA512Impl { static constexpr auto name = "SHA512"; + static constexpr const EVP_MD * (*provider)() = &EVP_sha512; enum { - length = 64 + length = SHA512_DIGEST_LENGTH }; - - static void apply(const char * begin, const size_t size, unsigned char * out_char_data) - { - SHA512_CTX ctx; - SHA512_Init(&ctx); - SHA512_Update(&ctx, reinterpret_cast(begin), size); - SHA512_Final(out_char_data, &ctx); - } }; struct SHA512Impl256 { static constexpr auto name = "SHA512_256"; + static constexpr const EVP_MD * (*provider)() = &EVP_sha512_256; enum { - length = 32 + length = SHA256_DIGEST_LENGTH }; - - static void apply(const char * begin, const size_t size, unsigned char * out_char_data) - { - /// Here, we use the EVP interface that is common to both BoringSSL and OpenSSL. Though BoringSSL is the default - /// SSL library that we use, for S390X architecture only OpenSSL is supported. But the SHA512-256, SHA512_256_Init, - /// SHA512_256_Update, SHA512_256_Final methods to calculate hash (similar to the other SHA functions) aren't available - /// in the current version of OpenSSL that we use which necessitates the use of the EVP interface. - auto * md_ctx = EVP_MD_CTX_create(); - EVP_DigestInit_ex(md_ctx, EVP_sha512_256(), nullptr /*engine*/); - EVP_DigestUpdate(md_ctx, begin, size); - EVP_DigestFinal_ex(md_ctx, out_char_data, nullptr /*size*/); - EVP_MD_CTX_destroy(md_ctx); - } }; struct RIPEMD160Impl { static constexpr auto name = "RIPEMD160"; + static constexpr const EVP_MD * (*provider)() = &EVP_ripemd160; enum { length = RIPEMD160_DIGEST_LENGTH }; +}; +#endif + +template +class GenericProvider +{ +public: + static constexpr auto name = Impl::name; + static constexpr auto length = Impl::length; - static void apply(const char * begin, const size_t size, unsigned char * out_char_data) + void apply(const char* begin, size_t size, unsigned char* out_char_data) { - RIPEMD160_CTX ctx; - RIPEMD160_Init(&ctx); - RIPEMD160_Update(&ctx, reinterpret_cast(begin), size); - RIPEMD160_Final(out_char_data, &ctx); + Impl::apply(begin, size, out_char_data); } }; -#endif #if USE_BLAKE3 struct ImplBLAKE3 @@ -211,7 +198,7 @@ struct ImplBLAKE3 length = 32 }; - static void apply(const char * begin, const size_t size, unsigned char * out_char_data) + static void apply(const char * begin, size_t size, unsigned char * out_char_data) { static_assert(LLVM_BLAKE3_OUT_LEN == ImplBLAKE3::length); auto & result = *reinterpret_cast *>(out_char_data); @@ -234,7 +221,7 @@ struct Keccak256Impl length = 32 }; - static void apply(const char * begin, const size_t size, unsigned char * out_char_data) + static void apply(const char * begin, size_t size, unsigned char * out_char_data) { sha3_HashBuffer(256, SHA3_FLAGS_KECCAK, begin, size, out_char_data, Keccak256Impl::length); } @@ -267,6 +254,8 @@ class FunctionStringHashFixedString : public IFunction ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + auto hasher = Impl(); + if (const ColumnString * col_from = checkAndGetColumn(arguments[0].column.get())) { auto col_to = ColumnFixedString::create(Impl::length); @@ -277,9 +266,10 @@ class FunctionStringHashFixedString : public IFunction chars_to.resize(input_rows_count * Impl::length); ColumnString::Offset current_offset = 0; + for (size_t i = 0; i < input_rows_count; ++i) { - Impl::apply( + hasher.apply( reinterpret_cast(&data[current_offset]), offsets[i] - current_offset - 1, reinterpret_cast(&chars_to[i * Impl::length])); @@ -289,6 +279,7 @@ class FunctionStringHashFixedString : public IFunction return col_to; } + if (const ColumnFixedString * col_from_fix = checkAndGetColumn(arguments[0].column.get())) { auto col_to = ColumnFixedString::create(Impl::length); @@ -298,11 +289,15 @@ class FunctionStringHashFixedString : public IFunction chars_to.resize(input_rows_count * Impl::length); for (size_t i = 0; i < input_rows_count; ++i) { - Impl::apply( - reinterpret_cast(&data[i * length]), length, reinterpret_cast(&chars_to[i * Impl::length])); + hasher.apply( + reinterpret_cast(&data[i * length]), + length, + reinterpret_cast(&chars_to[i * Impl::length]) + ); } return col_to; } + if (const ColumnIPv6 * col_from_ip = checkAndGetColumn(arguments[0].column.get())) { auto col_to = ColumnFixedString::create(Impl::length); @@ -312,7 +307,11 @@ class FunctionStringHashFixedString : public IFunction chars_to.resize(input_rows_count * Impl::length); for (size_t i = 0; i < input_rows_count; ++i) { - Impl::apply(reinterpret_cast(&data[i]), length, reinterpret_cast(&chars_to[i * Impl::length])); + hasher.apply( + reinterpret_cast(&data[i]), + length, + reinterpret_cast(&chars_to[i * Impl::length]) + ); } return col_to; } @@ -325,15 +324,15 @@ class FunctionStringHashFixedString : public IFunction REGISTER_FUNCTION(HashFixedStrings) { # if USE_SSL - using FunctionMD4 = FunctionStringHashFixedString; - using FunctionMD5 = FunctionStringHashFixedString; - using FunctionSHA1 = FunctionStringHashFixedString; - using FunctionSHA224 = FunctionStringHashFixedString; - using FunctionSHA256 = FunctionStringHashFixedString; - using FunctionSHA384 = FunctionStringHashFixedString; - using FunctionSHA512 = FunctionStringHashFixedString; - using FunctionSHA512_256 = FunctionStringHashFixedString; - using FunctionRIPEMD160 = FunctionStringHashFixedString; + using FunctionMD4 = FunctionStringHashFixedString>; + using FunctionMD5 = FunctionStringHashFixedString>; + using FunctionSHA1 = FunctionStringHashFixedString>; + using FunctionSHA224 = FunctionStringHashFixedString>; + using FunctionSHA256 = FunctionStringHashFixedString>; + using FunctionSHA384 = FunctionStringHashFixedString>; + using FunctionSHA512 = FunctionStringHashFixedString>; + using FunctionSHA512_256 = FunctionStringHashFixedString>; + using FunctionRIPEMD160 = FunctionStringHashFixedString>; factory.registerFunction(FunctionDocumentation{ .description = R"(Calculates the RIPEMD-160 hash of the given string.)", @@ -466,7 +465,7 @@ REGISTER_FUNCTION(HashFixedStrings) # endif # if USE_BLAKE3 - using FunctionBLAKE3 = FunctionStringHashFixedString; + using FunctionBLAKE3 = FunctionStringHashFixedString>; factory.registerFunction(FunctionDocumentation{ .description = R"( Calculates BLAKE3 hash string and returns the resulting set of bytes as FixedString. @@ -479,7 +478,7 @@ REGISTER_FUNCTION(HashFixedStrings) # endif # if USE_SHA3IUF - using FunctionKeccak256 = FunctionStringHashFixedString; + using FunctionKeccak256 = FunctionStringHashFixedString>; factory.registerFunction(FunctionDocumentation{ .description = R"(Calculates the Keccak-256 cryptographic hash of the given string. This hash function is widely used in blockchain applications, particularly Ethereum.)", diff --git a/tests/integration/test_dictionaries_ddl/test.py b/tests/integration/test_dictionaries_ddl/test.py index da69ce619e7e..9d418810bfe4 100644 --- a/tests/integration/test_dictionaries_ddl/test.py +++ b/tests/integration/test_dictionaries_ddl/test.py @@ -586,7 +586,11 @@ def test_secure(started_cluster): ) with pytest.raises(QueryRuntimeException) as excinfo: node1.query("SELECT dictGet('test.clickhouse_secure', 'value', toUInt64(1))") - assert "Unexpected packet from server localhost:9440" in str(excinfo.value) + error = str(excinfo.value) + assert ( + "Unexpected packet from server localhost:9440" in error + or "Connection reset by peer" in error + ) # Secure is set to 0 in named collection node1.query("DROP DICTIONARY IF EXISTS test.clickhouse_secure") @@ -607,7 +611,11 @@ def test_secure(started_cluster): ) with pytest.raises(QueryRuntimeException) as excinfo: node1.query("SELECT dictGet('test.clickhouse_secure', 'value', toUInt64(1))") - assert "Unexpected packet from server localhost:9440" in str(excinfo.value) + error = str(excinfo.value) + assert ( + "Unexpected packet from server localhost:9440" in error + or "Connection reset by peer" in error + ) # Secure is set to 0 in named collection and in 1 in DDL node1.query("DROP DICTIONARY IF EXISTS test.clickhouse_secure")